xref: /aosp_15_r20/external/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h (revision 67e74705e28f6214e480b399dd47ea732279e315)
1*67e74705SXin Li //===----- CGOpenMPRuntimeNVPTX.h - Interface to OpenMP NVPTX Runtimes ----===//
2*67e74705SXin Li //
3*67e74705SXin Li //                     The LLVM Compiler Infrastructure
4*67e74705SXin Li //
5*67e74705SXin Li // This file is distributed under the University of Illinois Open Source
6*67e74705SXin Li // License. See LICENSE.TXT for details.
7*67e74705SXin Li //
8*67e74705SXin Li //===----------------------------------------------------------------------===//
9*67e74705SXin Li //
10*67e74705SXin Li // This provides a class for OpenMP runtime code generation specialized to NVPTX
11*67e74705SXin Li // targets.
12*67e74705SXin Li //
13*67e74705SXin Li //===----------------------------------------------------------------------===//
14*67e74705SXin Li 
15*67e74705SXin Li #ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H
16*67e74705SXin Li #define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H
17*67e74705SXin Li 
18*67e74705SXin Li #include "CGOpenMPRuntime.h"
19*67e74705SXin Li #include "CodeGenFunction.h"
20*67e74705SXin Li #include "clang/AST/StmtOpenMP.h"
21*67e74705SXin Li #include "llvm/IR/CallSite.h"
22*67e74705SXin Li 
23*67e74705SXin Li namespace clang {
24*67e74705SXin Li namespace CodeGen {
25*67e74705SXin Li 
26*67e74705SXin Li class CGOpenMPRuntimeNVPTX : public CGOpenMPRuntime {
27*67e74705SXin Li public:
28*67e74705SXin Li   class EntryFunctionState {
29*67e74705SXin Li   public:
30*67e74705SXin Li     llvm::BasicBlock *ExitBB;
31*67e74705SXin Li 
EntryFunctionState()32*67e74705SXin Li     EntryFunctionState() : ExitBB(nullptr){};
33*67e74705SXin Li   };
34*67e74705SXin Li 
35*67e74705SXin Li   class WorkerFunctionState {
36*67e74705SXin Li   public:
37*67e74705SXin Li     llvm::Function *WorkerFn;
38*67e74705SXin Li     const CGFunctionInfo *CGFI;
39*67e74705SXin Li 
40*67e74705SXin Li     WorkerFunctionState(CodeGenModule &CGM);
41*67e74705SXin Li 
42*67e74705SXin Li   private:
43*67e74705SXin Li     void createWorkerFunction(CodeGenModule &CGM);
44*67e74705SXin Li   };
45*67e74705SXin Li 
46*67e74705SXin Li   /// \brief Helper for target entry function. Guide the master and worker
47*67e74705SXin Li   /// threads to their respective locations.
48*67e74705SXin Li   void emitEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST,
49*67e74705SXin Li                        WorkerFunctionState &WST);
50*67e74705SXin Li 
51*67e74705SXin Li   /// \brief Signal termination of OMP execution.
52*67e74705SXin Li   void emitEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST);
53*67e74705SXin Li 
54*67e74705SXin Li private:
55*67e74705SXin Li   //
56*67e74705SXin Li   // NVPTX calls.
57*67e74705SXin Li   //
58*67e74705SXin Li 
59*67e74705SXin Li   /// \brief Get the GPU warp size.
60*67e74705SXin Li   llvm::Value *getNVPTXWarpSize(CodeGenFunction &CGF);
61*67e74705SXin Li 
62*67e74705SXin Li   /// \brief Get the id of the current thread on the GPU.
63*67e74705SXin Li   llvm::Value *getNVPTXThreadID(CodeGenFunction &CGF);
64*67e74705SXin Li 
65*67e74705SXin Li   // \brief Get the maximum number of threads in a block of the GPU.
66*67e74705SXin Li   llvm::Value *getNVPTXNumThreads(CodeGenFunction &CGF);
67*67e74705SXin Li 
68*67e74705SXin Li   /// \brief Get barrier to synchronize all threads in a block.
69*67e74705SXin Li   void getNVPTXCTABarrier(CodeGenFunction &CGF);
70*67e74705SXin Li 
71*67e74705SXin Li   // \brief Synchronize all GPU threads in a block.
72*67e74705SXin Li   void syncCTAThreads(CodeGenFunction &CGF);
73*67e74705SXin Li 
74*67e74705SXin Li   //
75*67e74705SXin Li   // OMP calls.
76*67e74705SXin Li   //
77*67e74705SXin Li 
78*67e74705SXin Li   /// \brief Get the thread id of the OMP master thread.
79*67e74705SXin Li   /// The master thread id is the first thread (lane) of the last warp in the
80*67e74705SXin Li   /// GPU block.  Warp size is assumed to be some power of 2.
81*67e74705SXin Li   /// Thread id is 0 indexed.
82*67e74705SXin Li   /// E.g: If NumThreads is 33, master id is 32.
83*67e74705SXin Li   ///      If NumThreads is 64, master id is 32.
84*67e74705SXin Li   ///      If NumThreads is 1024, master id is 992.
85*67e74705SXin Li   llvm::Value *getMasterThreadID(CodeGenFunction &CGF);
86*67e74705SXin Li 
87*67e74705SXin Li   //
88*67e74705SXin Li   // Private state and methods.
89*67e74705SXin Li   //
90*67e74705SXin Li 
91*67e74705SXin Li   // Master-worker control state.
92*67e74705SXin Li   // Number of requested OMP threads in parallel region.
93*67e74705SXin Li   llvm::GlobalVariable *ActiveWorkers;
94*67e74705SXin Li   // Outlined function for the workers to execute.
95*67e74705SXin Li   llvm::GlobalVariable *WorkID;
96*67e74705SXin Li 
97*67e74705SXin Li   /// \brief Initialize master-worker control state.
98*67e74705SXin Li   void initializeEnvironment();
99*67e74705SXin Li 
100*67e74705SXin Li   /// \brief Emit the worker function for the current target region.
101*67e74705SXin Li   void emitWorkerFunction(WorkerFunctionState &WST);
102*67e74705SXin Li 
103*67e74705SXin Li   /// \brief Helper for worker function. Emit body of worker loop.
104*67e74705SXin Li   void emitWorkerLoop(CodeGenFunction &CGF, WorkerFunctionState &WST);
105*67e74705SXin Li 
106*67e74705SXin Li   /// \brief Returns specified OpenMP runtime function for the current OpenMP
107*67e74705SXin Li   /// implementation.  Specialized for the NVPTX device.
108*67e74705SXin Li   /// \param Function OpenMP runtime function.
109*67e74705SXin Li   /// \return Specified function.
110*67e74705SXin Li   llvm::Constant *createNVPTXRuntimeFunction(unsigned Function);
111*67e74705SXin Li 
112*67e74705SXin Li   //
113*67e74705SXin Li   // Base class overrides.
114*67e74705SXin Li   //
115*67e74705SXin Li 
116*67e74705SXin Li   /// \brief Creates offloading entry for the provided entry ID \a ID,
117*67e74705SXin Li   /// address \a Addr and size \a Size.
118*67e74705SXin Li   void createOffloadEntry(llvm::Constant *ID, llvm::Constant *Addr,
119*67e74705SXin Li                           uint64_t Size) override;
120*67e74705SXin Li 
121*67e74705SXin Li   /// \brief Emit outlined function for 'target' directive on the NVPTX
122*67e74705SXin Li   /// device.
123*67e74705SXin Li   /// \param D Directive to emit.
124*67e74705SXin Li   /// \param ParentName Name of the function that encloses the target region.
125*67e74705SXin Li   /// \param OutlinedFn Outlined function value to be defined by this call.
126*67e74705SXin Li   /// \param OutlinedFnID Outlined function ID value to be defined by this call.
127*67e74705SXin Li   /// \param IsOffloadEntry True if the outlined function is an offload entry.
128*67e74705SXin Li   /// An outlined function may not be an entry if, e.g. the if clause always
129*67e74705SXin Li   /// evaluates to false.
130*67e74705SXin Li   void emitTargetOutlinedFunction(const OMPExecutableDirective &D,
131*67e74705SXin Li                                   StringRef ParentName,
132*67e74705SXin Li                                   llvm::Function *&OutlinedFn,
133*67e74705SXin Li                                   llvm::Constant *&OutlinedFnID,
134*67e74705SXin Li                                   bool IsOffloadEntry,
135*67e74705SXin Li                                   const RegionCodeGenTy &CodeGen) override;
136*67e74705SXin Li 
137*67e74705SXin Li public:
138*67e74705SXin Li   explicit CGOpenMPRuntimeNVPTX(CodeGenModule &CGM);
139*67e74705SXin Li 
140*67e74705SXin Li   /// \brief This function ought to emit, in the general case, a call to
141*67e74705SXin Li   // the openmp runtime kmpc_push_num_teams. In NVPTX backend it is not needed
142*67e74705SXin Li   // as these numbers are obtained through the PTX grid and block configuration.
143*67e74705SXin Li   /// \param NumTeams An integer expression of teams.
144*67e74705SXin Li   /// \param ThreadLimit An integer expression of threads.
145*67e74705SXin Li   void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams,
146*67e74705SXin Li                           const Expr *ThreadLimit, SourceLocation Loc) override;
147*67e74705SXin Li 
148*67e74705SXin Li   /// \brief Emits inlined function for the specified OpenMP parallel
149*67e74705SXin Li   //  directive but an inlined function for teams.
150*67e74705SXin Li   /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
151*67e74705SXin Li   /// kmp_int32 BoundID, struct context_vars*).
152*67e74705SXin Li   /// \param D OpenMP directive.
153*67e74705SXin Li   /// \param ThreadIDVar Variable for thread id in the current OpenMP region.
154*67e74705SXin Li   /// \param InnermostKind Kind of innermost directive (for simple directives it
155*67e74705SXin Li   /// is a directive itself, for combined - its innermost directive).
156*67e74705SXin Li   /// \param CodeGen Code generation sequence for the \a D directive.
157*67e74705SXin Li   llvm::Value *
158*67e74705SXin Li   emitParallelOrTeamsOutlinedFunction(const OMPExecutableDirective &D,
159*67e74705SXin Li                                       const VarDecl *ThreadIDVar,
160*67e74705SXin Li                                       OpenMPDirectiveKind InnermostKind,
161*67e74705SXin Li                                       const RegionCodeGenTy &CodeGen) override;
162*67e74705SXin Li 
163*67e74705SXin Li   /// \brief Emits code for teams call of the \a OutlinedFn with
164*67e74705SXin Li   /// variables captured in a record which address is stored in \a
165*67e74705SXin Li   /// CapturedStruct.
166*67e74705SXin Li   /// \param OutlinedFn Outlined function to be run by team masters. Type of
167*67e74705SXin Li   /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
168*67e74705SXin Li   /// \param CapturedVars A pointer to the record with the references to
169*67e74705SXin Li   /// variables used in \a OutlinedFn function.
170*67e74705SXin Li   ///
171*67e74705SXin Li   void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D,
172*67e74705SXin Li                      SourceLocation Loc, llvm::Value *OutlinedFn,
173*67e74705SXin Li                      ArrayRef<llvm::Value *> CapturedVars) override;
174*67e74705SXin Li };
175*67e74705SXin Li 
176*67e74705SXin Li } // CodeGen namespace.
177*67e74705SXin Li } // clang namespace.
178*67e74705SXin Li 
179*67e74705SXin Li #endif // LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H
180