1*67e74705SXin Li //===----- CGOpenMPRuntimeNVPTX.h - Interface to OpenMP NVPTX Runtimes ----===// 2*67e74705SXin Li // 3*67e74705SXin Li // The LLVM Compiler Infrastructure 4*67e74705SXin Li // 5*67e74705SXin Li // This file is distributed under the University of Illinois Open Source 6*67e74705SXin Li // License. See LICENSE.TXT for details. 7*67e74705SXin Li // 8*67e74705SXin Li //===----------------------------------------------------------------------===// 9*67e74705SXin Li // 10*67e74705SXin Li // This provides a class for OpenMP runtime code generation specialized to NVPTX 11*67e74705SXin Li // targets. 12*67e74705SXin Li // 13*67e74705SXin Li //===----------------------------------------------------------------------===// 14*67e74705SXin Li 15*67e74705SXin Li #ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H 16*67e74705SXin Li #define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H 17*67e74705SXin Li 18*67e74705SXin Li #include "CGOpenMPRuntime.h" 19*67e74705SXin Li #include "CodeGenFunction.h" 20*67e74705SXin Li #include "clang/AST/StmtOpenMP.h" 21*67e74705SXin Li #include "llvm/IR/CallSite.h" 22*67e74705SXin Li 23*67e74705SXin Li namespace clang { 24*67e74705SXin Li namespace CodeGen { 25*67e74705SXin Li 26*67e74705SXin Li class CGOpenMPRuntimeNVPTX : public CGOpenMPRuntime { 27*67e74705SXin Li public: 28*67e74705SXin Li class EntryFunctionState { 29*67e74705SXin Li public: 30*67e74705SXin Li llvm::BasicBlock *ExitBB; 31*67e74705SXin Li EntryFunctionState()32*67e74705SXin Li EntryFunctionState() : ExitBB(nullptr){}; 33*67e74705SXin Li }; 34*67e74705SXin Li 35*67e74705SXin Li class WorkerFunctionState { 36*67e74705SXin Li public: 37*67e74705SXin Li llvm::Function *WorkerFn; 38*67e74705SXin Li const CGFunctionInfo *CGFI; 39*67e74705SXin Li 40*67e74705SXin Li WorkerFunctionState(CodeGenModule &CGM); 41*67e74705SXin Li 42*67e74705SXin Li private: 43*67e74705SXin Li void createWorkerFunction(CodeGenModule &CGM); 44*67e74705SXin Li }; 45*67e74705SXin Li 46*67e74705SXin Li /// \brief Helper for target entry function. Guide the master and worker 47*67e74705SXin Li /// threads to their respective locations. 48*67e74705SXin Li void emitEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST, 49*67e74705SXin Li WorkerFunctionState &WST); 50*67e74705SXin Li 51*67e74705SXin Li /// \brief Signal termination of OMP execution. 52*67e74705SXin Li void emitEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST); 53*67e74705SXin Li 54*67e74705SXin Li private: 55*67e74705SXin Li // 56*67e74705SXin Li // NVPTX calls. 57*67e74705SXin Li // 58*67e74705SXin Li 59*67e74705SXin Li /// \brief Get the GPU warp size. 60*67e74705SXin Li llvm::Value *getNVPTXWarpSize(CodeGenFunction &CGF); 61*67e74705SXin Li 62*67e74705SXin Li /// \brief Get the id of the current thread on the GPU. 63*67e74705SXin Li llvm::Value *getNVPTXThreadID(CodeGenFunction &CGF); 64*67e74705SXin Li 65*67e74705SXin Li // \brief Get the maximum number of threads in a block of the GPU. 66*67e74705SXin Li llvm::Value *getNVPTXNumThreads(CodeGenFunction &CGF); 67*67e74705SXin Li 68*67e74705SXin Li /// \brief Get barrier to synchronize all threads in a block. 69*67e74705SXin Li void getNVPTXCTABarrier(CodeGenFunction &CGF); 70*67e74705SXin Li 71*67e74705SXin Li // \brief Synchronize all GPU threads in a block. 72*67e74705SXin Li void syncCTAThreads(CodeGenFunction &CGF); 73*67e74705SXin Li 74*67e74705SXin Li // 75*67e74705SXin Li // OMP calls. 76*67e74705SXin Li // 77*67e74705SXin Li 78*67e74705SXin Li /// \brief Get the thread id of the OMP master thread. 79*67e74705SXin Li /// The master thread id is the first thread (lane) of the last warp in the 80*67e74705SXin Li /// GPU block. Warp size is assumed to be some power of 2. 81*67e74705SXin Li /// Thread id is 0 indexed. 82*67e74705SXin Li /// E.g: If NumThreads is 33, master id is 32. 83*67e74705SXin Li /// If NumThreads is 64, master id is 32. 84*67e74705SXin Li /// If NumThreads is 1024, master id is 992. 85*67e74705SXin Li llvm::Value *getMasterThreadID(CodeGenFunction &CGF); 86*67e74705SXin Li 87*67e74705SXin Li // 88*67e74705SXin Li // Private state and methods. 89*67e74705SXin Li // 90*67e74705SXin Li 91*67e74705SXin Li // Master-worker control state. 92*67e74705SXin Li // Number of requested OMP threads in parallel region. 93*67e74705SXin Li llvm::GlobalVariable *ActiveWorkers; 94*67e74705SXin Li // Outlined function for the workers to execute. 95*67e74705SXin Li llvm::GlobalVariable *WorkID; 96*67e74705SXin Li 97*67e74705SXin Li /// \brief Initialize master-worker control state. 98*67e74705SXin Li void initializeEnvironment(); 99*67e74705SXin Li 100*67e74705SXin Li /// \brief Emit the worker function for the current target region. 101*67e74705SXin Li void emitWorkerFunction(WorkerFunctionState &WST); 102*67e74705SXin Li 103*67e74705SXin Li /// \brief Helper for worker function. Emit body of worker loop. 104*67e74705SXin Li void emitWorkerLoop(CodeGenFunction &CGF, WorkerFunctionState &WST); 105*67e74705SXin Li 106*67e74705SXin Li /// \brief Returns specified OpenMP runtime function for the current OpenMP 107*67e74705SXin Li /// implementation. Specialized for the NVPTX device. 108*67e74705SXin Li /// \param Function OpenMP runtime function. 109*67e74705SXin Li /// \return Specified function. 110*67e74705SXin Li llvm::Constant *createNVPTXRuntimeFunction(unsigned Function); 111*67e74705SXin Li 112*67e74705SXin Li // 113*67e74705SXin Li // Base class overrides. 114*67e74705SXin Li // 115*67e74705SXin Li 116*67e74705SXin Li /// \brief Creates offloading entry for the provided entry ID \a ID, 117*67e74705SXin Li /// address \a Addr and size \a Size. 118*67e74705SXin Li void createOffloadEntry(llvm::Constant *ID, llvm::Constant *Addr, 119*67e74705SXin Li uint64_t Size) override; 120*67e74705SXin Li 121*67e74705SXin Li /// \brief Emit outlined function for 'target' directive on the NVPTX 122*67e74705SXin Li /// device. 123*67e74705SXin Li /// \param D Directive to emit. 124*67e74705SXin Li /// \param ParentName Name of the function that encloses the target region. 125*67e74705SXin Li /// \param OutlinedFn Outlined function value to be defined by this call. 126*67e74705SXin Li /// \param OutlinedFnID Outlined function ID value to be defined by this call. 127*67e74705SXin Li /// \param IsOffloadEntry True if the outlined function is an offload entry. 128*67e74705SXin Li /// An outlined function may not be an entry if, e.g. the if clause always 129*67e74705SXin Li /// evaluates to false. 130*67e74705SXin Li void emitTargetOutlinedFunction(const OMPExecutableDirective &D, 131*67e74705SXin Li StringRef ParentName, 132*67e74705SXin Li llvm::Function *&OutlinedFn, 133*67e74705SXin Li llvm::Constant *&OutlinedFnID, 134*67e74705SXin Li bool IsOffloadEntry, 135*67e74705SXin Li const RegionCodeGenTy &CodeGen) override; 136*67e74705SXin Li 137*67e74705SXin Li public: 138*67e74705SXin Li explicit CGOpenMPRuntimeNVPTX(CodeGenModule &CGM); 139*67e74705SXin Li 140*67e74705SXin Li /// \brief This function ought to emit, in the general case, a call to 141*67e74705SXin Li // the openmp runtime kmpc_push_num_teams. In NVPTX backend it is not needed 142*67e74705SXin Li // as these numbers are obtained through the PTX grid and block configuration. 143*67e74705SXin Li /// \param NumTeams An integer expression of teams. 144*67e74705SXin Li /// \param ThreadLimit An integer expression of threads. 145*67e74705SXin Li void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, 146*67e74705SXin Li const Expr *ThreadLimit, SourceLocation Loc) override; 147*67e74705SXin Li 148*67e74705SXin Li /// \brief Emits inlined function for the specified OpenMP parallel 149*67e74705SXin Li // directive but an inlined function for teams. 150*67e74705SXin Li /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID, 151*67e74705SXin Li /// kmp_int32 BoundID, struct context_vars*). 152*67e74705SXin Li /// \param D OpenMP directive. 153*67e74705SXin Li /// \param ThreadIDVar Variable for thread id in the current OpenMP region. 154*67e74705SXin Li /// \param InnermostKind Kind of innermost directive (for simple directives it 155*67e74705SXin Li /// is a directive itself, for combined - its innermost directive). 156*67e74705SXin Li /// \param CodeGen Code generation sequence for the \a D directive. 157*67e74705SXin Li llvm::Value * 158*67e74705SXin Li emitParallelOrTeamsOutlinedFunction(const OMPExecutableDirective &D, 159*67e74705SXin Li const VarDecl *ThreadIDVar, 160*67e74705SXin Li OpenMPDirectiveKind InnermostKind, 161*67e74705SXin Li const RegionCodeGenTy &CodeGen) override; 162*67e74705SXin Li 163*67e74705SXin Li /// \brief Emits code for teams call of the \a OutlinedFn with 164*67e74705SXin Li /// variables captured in a record which address is stored in \a 165*67e74705SXin Li /// CapturedStruct. 166*67e74705SXin Li /// \param OutlinedFn Outlined function to be run by team masters. Type of 167*67e74705SXin Li /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*). 168*67e74705SXin Li /// \param CapturedVars A pointer to the record with the references to 169*67e74705SXin Li /// variables used in \a OutlinedFn function. 170*67e74705SXin Li /// 171*67e74705SXin Li void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, 172*67e74705SXin Li SourceLocation Loc, llvm::Value *OutlinedFn, 173*67e74705SXin Li ArrayRef<llvm::Value *> CapturedVars) override; 174*67e74705SXin Li }; 175*67e74705SXin Li 176*67e74705SXin Li } // CodeGen namespace. 177*67e74705SXin Li } // clang namespace. 178*67e74705SXin Li 179*67e74705SXin Li #endif // LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H 180