21 #include "llvm/ADT/SmallPtrSet.h" 23 using namespace clang;
24 using namespace CodeGen;
30 OMPRTL_NVPTX__kmpc_kernel_init,
32 OMPRTL_NVPTX__kmpc_kernel_deinit,
35 OMPRTL_NVPTX__kmpc_spmd_kernel_init,
37 OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2,
41 OMPRTL_NVPTX__kmpc_kernel_prepare_parallel,
44 OMPRTL_NVPTX__kmpc_kernel_parallel,
46 OMPRTL_NVPTX__kmpc_kernel_end_parallel,
49 OMPRTL_NVPTX__kmpc_serialized_parallel,
52 OMPRTL_NVPTX__kmpc_end_serialized_parallel,
55 OMPRTL_NVPTX__kmpc_shuffle_int32,
58 OMPRTL_NVPTX__kmpc_shuffle_int64,
64 OMPRTL_NVPTX__kmpc_parallel_reduce_nowait_v2,
67 OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_simple,
70 OMPRTL_NVPTX__kmpc_nvptx_teams_end_reduce_nowait_simple,
72 OMPRTL_NVPTX__kmpc_end_reduce_nowait,
74 OMPRTL_NVPTX__kmpc_data_sharing_init_stack,
76 OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd,
79 OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack,
81 OMPRTL_NVPTX__kmpc_data_sharing_pop_stack,
84 OMPRTL_NVPTX__kmpc_begin_sharing_variables,
86 OMPRTL_NVPTX__kmpc_end_sharing_variables,
88 OMPRTL_NVPTX__kmpc_get_shared_variables,
91 OMPRTL_NVPTX__kmpc_parallel_level,
93 OMPRTL_NVPTX__kmpc_is_spmd_exec_mode,
96 OMPRTL_NVPTX__kmpc_get_team_static_memory,
99 OMPRTL_NVPTX__kmpc_restore_team_static_memory,
104 OMPRTL__kmpc_barrier_simple_spmd,
114 llvm::BasicBlock *ContBlock =
nullptr;
119 bool Conditional =
false)
120 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
129 CGF.
Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
147 class ExecutionRuntimeModesRAII {
152 bool SavedRuntimeMode =
false;
153 bool *RuntimeMode =
nullptr;
158 : ExecMode(ExecMode) {
159 SavedExecMode = ExecMode;
164 bool &RuntimeMode,
bool FullRuntimeMode)
165 : ExecMode(ExecMode), RuntimeMode(&RuntimeMode) {
166 SavedExecMode = ExecMode;
167 SavedRuntimeMode = RuntimeMode;
169 RuntimeMode = FullRuntimeMode;
171 ~ExecutionRuntimeModesRAII() {
172 ExecMode = SavedExecMode;
174 *RuntimeMode = SavedRuntimeMode;
187 LaneIDMask = WarpSize - 1,
190 GlobalMemoryAlignment = 128,
193 SharedMemorySize = 128,
198 if (
const auto *ASE = dyn_cast<ArraySubscriptExpr>(RefExpr)) {
199 const Expr *
Base = ASE->getBase()->IgnoreParenImpCasts();
200 while (
const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
203 }
else if (
auto *OASE = dyn_cast<OMPArraySectionExpr>(RefExpr)) {
204 const Expr *
Base = OASE->getBase()->IgnoreParenImpCasts();
205 while (
const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
207 while (
const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
212 if (
const auto *DE = dyn_cast<DeclRefExpr>(RefExpr))
213 return cast<ValueDecl>(DE->getDecl()->getCanonicalDecl());
214 const auto *ME = cast<MemberExpr>(RefExpr);
215 return cast<ValueDecl>(ME->getMemberDecl()->getCanonicalDecl());
220 return P1.first > P2.first;
223 static RecordDecl *buildRecordForGlobalizedVars(
226 llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
227 &MappedDeclsFields) {
228 if (EscapedDecls.empty() && EscapedDeclsForTeams.empty())
232 GlobalizedVars.emplace_back(
237 for (
const ValueDecl *D : EscapedDeclsForTeams)
239 std::stable_sort(GlobalizedVars.begin(), GlobalizedVars.end(),
248 llvm::SmallPtrSet<const ValueDecl *, 16> SingleEscaped(
249 EscapedDeclsForTeams.begin(), EscapedDeclsForTeams.end());
250 for (
const auto &Pair : GlobalizedVars) {
259 if (SingleEscaped.count(VD)) {
273 llvm::APInt ArraySize(32, WarpSize);
283 GlobalMemoryAlignment)));
284 Field->
addAttr(AlignedAttr::CreateImplicit(
285 C, AlignedAttr::GNU_aligned,
true,
290 GlobalizedRD->addDecl(Field);
291 MappedDeclsFields.try_emplace(VD, Field);
293 GlobalizedRD->completeDefinition();
298 class CheckVarsEscapingDeclContext final
301 llvm::SetVector<const ValueDecl *> EscapedDecls;
302 llvm::SetVector<const ValueDecl *> EscapedVariableLengthDecls;
303 llvm::SmallPtrSet<const Decl *, 4> EscapedParameters;
305 llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields;
306 bool AllEscaped =
false;
307 bool IsForCombinedParallelRegion =
false;
309 void markAsEscaped(
const ValueDecl *VD) {
311 if (!isa<VarDecl>(VD) ||
312 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD))
316 if (
auto *CSI = CGF.CapturedStmtInfo) {
317 if (
const FieldDecl *FD = CSI->lookup(cast<VarDecl>(VD))) {
320 if (!IsForCombinedParallelRegion) {
323 const auto *
Attr = FD->getAttr<OMPCaptureKindAttr>();
326 if (((
Attr->getCaptureKind() != OMPC_map) &&
328 static_cast<OpenMPClauseKind>(
Attr->getCaptureKind()))) ||
329 ((
Attr->getCaptureKind() == OMPC_map) &&
330 !FD->getType()->isAnyPointerType()))
333 if (!FD->getType()->isReferenceType()) {
335 "Parameter captured by value with variably modified type");
336 EscapedParameters.insert(VD);
337 }
else if (!IsForCombinedParallelRegion) {
342 if ((!CGF.CapturedStmtInfo ||
343 (IsForCombinedParallelRegion && CGF.CapturedStmtInfo)) &&
348 EscapedVariableLengthDecls.insert(VD);
350 EscapedDecls.insert(VD);
353 void VisitValueDecl(
const ValueDecl *VD) {
356 if (
const auto *VarD = dyn_cast<VarDecl>(VD)) {
357 if (!isa<ParmVarDecl>(VarD) && VarD->hasInit()) {
358 const bool SavedAllEscaped = AllEscaped;
360 Visit(VarD->getInit());
361 AllEscaped = SavedAllEscaped;
367 bool IsCombinedParallelRegion) {
371 if (C.capturesVariable() && !C.capturesVariableByCopy()) {
372 const ValueDecl *VD = C.getCapturedVar();
373 bool SavedIsForCombinedParallelRegion = IsForCombinedParallelRegion;
374 if (IsCombinedParallelRegion) {
378 IsForCombinedParallelRegion =
false;
381 C->getClauseKind() == OMPC_reduction ||
382 C->getClauseKind() == OMPC_linear ||
383 C->getClauseKind() == OMPC_private)
386 if (
const auto *PC = dyn_cast<OMPFirstprivateClause>(C))
387 Vars = PC->getVarRefs();
388 else if (
const auto *PC = dyn_cast<OMPLastprivateClause>(C))
389 Vars = PC->getVarRefs();
391 llvm_unreachable(
"Unexpected clause.");
392 for (
const auto *E : Vars) {
396 IsForCombinedParallelRegion =
true;
400 if (IsForCombinedParallelRegion)
405 if (isa<OMPCapturedExprDecl>(VD))
407 IsForCombinedParallelRegion = SavedIsForCombinedParallelRegion;
412 void buildRecordForGlobalizedVars(
bool IsInTTDRegion) {
413 assert(!GlobalizedRD &&
414 "Record for globalized variables is built already.");
417 EscapedDeclsForTeams = EscapedDecls.getArrayRef();
419 EscapedDeclsForParallel = EscapedDecls.getArrayRef();
420 GlobalizedRD = ::buildRecordForGlobalizedVars(
421 CGF.getContext(), EscapedDeclsForParallel, EscapedDeclsForTeams,
428 : CGF(CGF), EscapedDecls(TeamsReductions.begin(), TeamsReductions.end()) {
430 virtual ~CheckVarsEscapingDeclContext() =
default;
431 void VisitDeclStmt(
const DeclStmt *S) {
435 if (
const auto *VD = dyn_cast_or_null<ValueDecl>(D))
449 if (CaptureRegions.size() == 1 && CaptureRegions.back() ==
OMPD_unknown) {
450 VisitStmt(S->getCapturedStmt());
453 VisitOpenMPCapturedStmt(
455 CaptureRegions.back() == OMPD_parallel &&
463 if (C.capturesVariable() && !C.capturesVariableByCopy()) {
464 const ValueDecl *VD = C.getCapturedVar();
466 if (isa<OMPCapturedExprDecl>(VD))
475 if (C.capturesVariable()) {
477 const ValueDecl *VD = C.getCapturedVar();
485 void VisitBlockExpr(
const BlockExpr *E) {
490 const VarDecl *VD = C.getVariable();
497 void VisitCallExpr(
const CallExpr *E) {
503 if (Arg->isLValue()) {
504 const bool SavedAllEscaped = AllEscaped;
507 AllEscaped = SavedAllEscaped;
520 if (isa<OMPCapturedExprDecl>(VD))
522 else if (
const auto *VarD = dyn_cast<VarDecl>(VD))
523 if (VarD->isInitCapture())
530 const bool SavedAllEscaped = AllEscaped;
533 AllEscaped = SavedAllEscaped;
542 const bool SavedAllEscaped = AllEscaped;
545 AllEscaped = SavedAllEscaped;
550 void VisitExpr(
const Expr *E) {
553 bool SavedAllEscaped = AllEscaped;
559 AllEscaped = SavedAllEscaped;
561 void VisitStmt(
const Stmt *S) {
571 const RecordDecl *getGlobalizedRecord(
bool IsInTTDRegion) {
573 buildRecordForGlobalizedVars(IsInTTDRegion);
579 assert(GlobalizedRD &&
580 "Record for globalized variables must be generated already.");
581 auto I = MappedDeclsFields.find(VD);
582 if (I == MappedDeclsFields.end())
584 return I->getSecond();
589 return EscapedDecls.getArrayRef();
594 const llvm::SmallPtrSetImpl<const Decl *> &getEscapedParameters()
const {
595 return EscapedParameters;
601 return EscapedVariableLengthDecls.getArrayRef();
609 llvm::Intrinsic::getDeclaration(
610 &CGF.
CGM.
getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_warpsize),
617 llvm::Intrinsic::getDeclaration(
618 &CGF.
CGM.
getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x),
642 llvm::Intrinsic::getDeclaration(
643 &CGF.
CGM.
getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x),
644 "nvptx_num_threads");
653 bool IsInSPMDExecutionMode =
false) {
655 return IsInSPMDExecutionMode
675 return Bld.CreateAnd(Bld.CreateNUWSub(NumThreads, Bld.getInt32(1)),
676 Bld.CreateNot(Mask),
"master_tid");
679 CGOpenMPRuntimeNVPTX::WorkerFunctionState::WorkerFunctionState(
683 createWorkerFunction(CGM);
686 void CGOpenMPRuntimeNVPTX::WorkerFunctionState::createWorkerFunction(
694 WorkerFn->setDoesNotRecurse();
698 CGOpenMPRuntimeNVPTX::getExecutionMode()
const {
699 return CurrentExecutionMode;
721 if (
const auto *C = dyn_cast<CompoundStmt>(Body)) {
722 const Stmt *Child =
nullptr;
723 for (
const Stmt *S : C->body()) {
724 if (
const auto *E = dyn_cast<Expr>(S)) {
729 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
730 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
733 if (
const auto *DS = dyn_cast<DeclStmt>(S)) {
734 if (llvm::all_of(DS->decls(), [&Ctx](
const Decl *D) {
735 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
736 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
737 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
738 isa<UsingDirectiveDecl>(D) ||
739 isa<OMPDeclareReductionDecl>(D) ||
740 isa<OMPThreadPrivateDecl>(D))
742 const auto *VD = dyn_cast<
VarDecl>(D);
746 ((VD->getType().isTrivialType(Ctx) ||
747 VD->getType()->isReferenceType()) &&
748 (!VD->hasInit() ||
isTrivial(Ctx, VD->getInit())));
772 if (NameModifier != OMPD_parallel && NameModifier !=
OMPD_unknown)
774 const Expr *Cond = C->getCondition();
790 if (
const auto *NestedDir = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
797 if (DKind == OMPD_teams) {
798 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
803 if (
const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
804 DKind = NND->getDirectiveKind();
811 case OMPD_target_teams:
814 case OMPD_target_simd:
815 case OMPD_target_parallel:
816 case OMPD_target_parallel_for:
817 case OMPD_target_parallel_for_simd:
818 case OMPD_target_teams_distribute:
819 case OMPD_target_teams_distribute_simd:
820 case OMPD_target_teams_distribute_parallel_for:
821 case OMPD_target_teams_distribute_parallel_for_simd:
824 case OMPD_parallel_for:
825 case OMPD_parallel_sections:
827 case OMPD_parallel_for_simd:
829 case OMPD_cancellation_point:
831 case OMPD_threadprivate:
846 case OMPD_target_data:
847 case OMPD_target_exit_data:
848 case OMPD_target_enter_data:
849 case OMPD_distribute:
850 case OMPD_distribute_simd:
851 case OMPD_distribute_parallel_for:
852 case OMPD_distribute_parallel_for_simd:
853 case OMPD_teams_distribute:
854 case OMPD_teams_distribute_simd:
855 case OMPD_teams_distribute_parallel_for:
856 case OMPD_teams_distribute_parallel_for_simd:
857 case OMPD_target_update:
858 case OMPD_declare_simd:
859 case OMPD_declare_target:
860 case OMPD_end_declare_target:
861 case OMPD_declare_reduction:
863 case OMPD_taskloop_simd:
866 llvm_unreachable(
"Unexpected directive.");
876 switch (DirectiveKind) {
878 case OMPD_target_teams:
880 case OMPD_target_parallel:
881 case OMPD_target_parallel_for:
882 case OMPD_target_parallel_for_simd:
883 case OMPD_target_teams_distribute_parallel_for:
884 case OMPD_target_teams_distribute_parallel_for_simd:
886 case OMPD_target_simd:
887 case OMPD_target_teams_distribute:
888 case OMPD_target_teams_distribute_simd:
892 case OMPD_parallel_for:
893 case OMPD_parallel_sections:
895 case OMPD_parallel_for_simd:
897 case OMPD_cancellation_point:
899 case OMPD_threadprivate:
914 case OMPD_target_data:
915 case OMPD_target_exit_data:
916 case OMPD_target_enter_data:
917 case OMPD_distribute:
918 case OMPD_distribute_simd:
919 case OMPD_distribute_parallel_for:
920 case OMPD_distribute_parallel_for_simd:
921 case OMPD_teams_distribute:
922 case OMPD_teams_distribute_simd:
923 case OMPD_teams_distribute_parallel_for:
924 case OMPD_teams_distribute_parallel_for_simd:
925 case OMPD_target_update:
926 case OMPD_declare_simd:
927 case OMPD_declare_target:
928 case OMPD_end_declare_target:
929 case OMPD_declare_reduction:
931 case OMPD_taskloop_simd:
937 "Unknown programming model for OpenMP directive on NVPTX target.");
945 "Expected loop-based directive.");
950 return C->getScheduleKind() == OMPC_SCHEDULE_static;
963 if (
const auto *NestedDir = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
971 if (DKind == OMPD_parallel) {
972 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
977 if (
const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
978 DKind = NND->getDirectiveKind();
983 }
else if (DKind == OMPD_teams) {
984 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
989 if (
const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
990 DKind = NND->getDirectiveKind();
995 if (DKind == OMPD_parallel) {
996 Body = NND->getInnermostCapturedStmt()->IgnoreContainers(
1001 if (
const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
1002 DKind = NND->getDirectiveKind();
1011 case OMPD_target_teams:
1016 if (DKind == OMPD_parallel) {
1017 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
1022 if (
const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
1023 DKind = NND->getDirectiveKind();
1030 case OMPD_target_parallel:
1033 case OMPD_target_teams_distribute:
1034 case OMPD_target_simd:
1035 case OMPD_target_parallel_for:
1036 case OMPD_target_parallel_for_simd:
1037 case OMPD_target_teams_distribute_simd:
1038 case OMPD_target_teams_distribute_parallel_for:
1039 case OMPD_target_teams_distribute_parallel_for_simd:
1042 case OMPD_parallel_for:
1043 case OMPD_parallel_sections:
1045 case OMPD_parallel_for_simd:
1047 case OMPD_cancellation_point:
1049 case OMPD_threadprivate:
1057 case OMPD_taskyield:
1060 case OMPD_taskgroup:
1064 case OMPD_target_data:
1065 case OMPD_target_exit_data:
1066 case OMPD_target_enter_data:
1067 case OMPD_distribute:
1068 case OMPD_distribute_simd:
1069 case OMPD_distribute_parallel_for:
1070 case OMPD_distribute_parallel_for_simd:
1071 case OMPD_teams_distribute:
1072 case OMPD_teams_distribute_simd:
1073 case OMPD_teams_distribute_parallel_for:
1074 case OMPD_teams_distribute_parallel_for_simd:
1075 case OMPD_target_update:
1076 case OMPD_declare_simd:
1077 case OMPD_declare_target:
1078 case OMPD_end_declare_target:
1079 case OMPD_declare_reduction:
1081 case OMPD_taskloop_simd:
1084 llvm_unreachable(
"Unexpected directive.");
1098 switch (DirectiveKind) {
1100 case OMPD_target_teams:
1101 case OMPD_target_parallel:
1103 case OMPD_target_parallel_for:
1104 case OMPD_target_parallel_for_simd:
1105 case OMPD_target_teams_distribute_parallel_for:
1106 case OMPD_target_teams_distribute_parallel_for_simd:
1109 case OMPD_target_simd:
1110 case OMPD_target_teams_distribute:
1111 case OMPD_target_teams_distribute_simd:
1115 case OMPD_parallel_for:
1116 case OMPD_parallel_sections:
1118 case OMPD_parallel_for_simd:
1120 case OMPD_cancellation_point:
1122 case OMPD_threadprivate:
1130 case OMPD_taskyield:
1133 case OMPD_taskgroup:
1137 case OMPD_target_data:
1138 case OMPD_target_exit_data:
1139 case OMPD_target_enter_data:
1140 case OMPD_distribute:
1141 case OMPD_distribute_simd:
1142 case OMPD_distribute_parallel_for:
1143 case OMPD_distribute_parallel_for_simd:
1144 case OMPD_teams_distribute:
1145 case OMPD_teams_distribute_simd:
1146 case OMPD_teams_distribute_parallel_for:
1147 case OMPD_teams_distribute_parallel_for_simd:
1148 case OMPD_target_update:
1149 case OMPD_declare_simd:
1150 case OMPD_declare_target:
1151 case OMPD_end_declare_target:
1152 case OMPD_declare_reduction:
1154 case OMPD_taskloop_simd:
1160 "Unknown programming model for OpenMP directive on NVPTX target.");
1164 StringRef ParentName,
1165 llvm::Function *&OutlinedFn,
1166 llvm::Constant *&OutlinedFnID,
1167 bool IsOffloadEntry,
1169 ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode);
1170 EntryFunctionState EST;
1173 WrapperFunctionsMap.clear();
1177 CGOpenMPRuntimeNVPTX::EntryFunctionState &EST;
1178 CGOpenMPRuntimeNVPTX::WorkerFunctionState &WST;
1181 NVPTXPrePostActionTy(CGOpenMPRuntimeNVPTX::EntryFunctionState &EST,
1182 CGOpenMPRuntimeNVPTX::WorkerFunctionState &WST)
1183 : EST(EST), WST(WST) {}
1187 RT.emitNonSPMDEntryHeader(CGF, EST, WST);
1189 RT.setLocThreadIdInsertPt(CGF,
true);
1195 RT.emitNonSPMDEntryFooter(CGF, EST);
1199 IsInTTDRegion =
true;
1201 GlobalizedRecords.emplace_back();
1202 if (!KernelStaticGlobalized) {
1203 KernelStaticGlobalized =
new llvm::GlobalVariable(
1206 llvm::ConstantPointerNull::get(CGM.
VoidPtrTy),
1207 "_openmp_kernel_static_glob_rd$ptr",
nullptr,
1208 llvm::GlobalValue::NotThreadLocal,
1211 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
1212 IsOffloadEntry, CodeGen);
1213 IsInTTDRegion =
false;
1217 WST.WorkerFn->setName(Twine(OutlinedFn->getName(),
"_worker"));
1220 emitWorkerFunction(WST);
1224 void CGOpenMPRuntimeNVPTX::emitNonSPMDEntryHeader(
CodeGenFunction &CGF,
1225 EntryFunctionState &EST,
1226 WorkerFunctionState &WST) {
1236 Bld.CreateCondBr(IsWorker, WorkerBB, MasterCheckBB);
1239 emitCall(CGF, WST.Loc, WST.WorkerFn);
1245 Bld.CreateCondBr(IsMaster, MasterBB, EST.ExitBB);
1248 IsInTargetMasterThreadRegion =
true;
1256 createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_init), Args);
1260 createNVPTXRuntimeFunction(
1261 OMPRTL_NVPTX__kmpc_data_sharing_init_stack));
1263 emitGenericVarsProlog(CGF, WST.Loc);
1266 void CGOpenMPRuntimeNVPTX::emitNonSPMDEntryFooter(
CodeGenFunction &CGF,
1267 EntryFunctionState &EST) {
1268 IsInTargetMasterThreadRegion =
false;
1272 emitGenericVarsEpilog(CGF);
1277 llvm::BasicBlock *TerminateBB = CGF.
createBasicBlock(
".termination.notifier");
1285 createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_deinit), Args);
1287 syncCTAThreads(CGF);
1292 EST.ExitBB =
nullptr;
1296 StringRef ParentName,
1297 llvm::Function *&OutlinedFn,
1298 llvm::Constant *&OutlinedFnID,
1299 bool IsOffloadEntry,
1301 ExecutionRuntimeModesRAII ModeRAII(
1302 CurrentExecutionMode, RequiresFullRuntime,
1305 EntryFunctionState EST;
1310 CGOpenMPRuntimeNVPTX::EntryFunctionState &EST;
1315 CGOpenMPRuntimeNVPTX::EntryFunctionState &EST,
1317 : RT(RT), EST(EST), D(D) {}
1319 RT.emitSPMDEntryHeader(CGF, EST, D);
1325 RT.emitSPMDEntryFooter(CGF, EST);
1327 } Action(*
this, EST, D);
1329 IsInTTDRegion =
true;
1331 GlobalizedRecords.emplace_back();
1332 if (!KernelStaticGlobalized) {
1333 KernelStaticGlobalized =
new llvm::GlobalVariable(
1336 llvm::ConstantPointerNull::get(CGM.
VoidPtrTy),
1337 "_openmp_kernel_static_glob_rd$ptr",
nullptr,
1338 llvm::GlobalValue::NotThreadLocal,
1341 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
1342 IsOffloadEntry, CodeGen);
1343 IsInTTDRegion =
false;
1346 void CGOpenMPRuntimeNVPTX::emitSPMDEntryHeader(
1357 Bld.getInt16(RequiresFullRuntime ? 1 : 0),
1360 createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_init), Args);
1362 if (RequiresFullRuntime) {
1365 OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd));
1372 IsInTargetMasterThreadRegion =
true;
1376 EntryFunctionState &EST) {
1377 IsInTargetMasterThreadRegion =
false;
1390 CGF.
Builder.getInt16(RequiresFullRuntime ? 1 : 0)};
1392 createNVPTXRuntimeFunction(
1393 OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2), Args);
1397 EST.ExitBB =
nullptr;
1410 llvm::GlobalValue::WeakAnyLinkage,
1411 llvm::ConstantInt::get(CGM.
Int8Ty, Mode ? 0 : 1),
1412 Twine(Name,
"_exec_mode"));
1416 void CGOpenMPRuntimeNVPTX::emitWorkerFunction(WorkerFunctionState &WST) {
1422 emitWorkerLoop(CGF, WST);
1427 WorkerFunctionState &WST) {
1440 llvm::BasicBlock *SelectWorkersBB = CGF.
createBasicBlock(
".select.workers");
1442 llvm::BasicBlock *TerminateBB = CGF.
createBasicBlock(
".terminate.parallel");
1451 syncCTAThreads(CGF);
1464 createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_parallel), Args);
1469 llvm::Value *ShouldTerminate = Bld.CreateIsNull(WorkID,
"should_terminate");
1470 Bld.CreateCondBr(ShouldTerminate, ExitBB, SelectWorkersBB);
1475 Bld.CreateIsNotNull(Bld.
CreateLoad(ExecStatus),
"is_active");
1476 Bld.CreateCondBr(IsActive, ExecuteBB, BarrierBB);
1481 setLocThreadIdInsertPt(CGF,
true);
1484 for (llvm::Function *W : Work) {
1489 Bld.CreateICmpEQ(Bld.
CreateLoad(WorkFn),
ID,
"work_match");
1493 Bld.CreateCondBr(WorkFnMatch, ExecuteFNBB, CheckNextBB);
1502 emitCall(CGF, WST.Loc, W,
1503 {Bld.getInt16(0), getThreadID(CGF, WST.Loc)});
1513 auto *ParallelFnTy =
1514 llvm::FunctionType::get(CGM.
VoidTy, {CGM.Int16Ty, CGM.Int32Ty},
1522 emitCall(CGF, WST.Loc, WorkFnCast,
1523 {Bld.getInt16(0), getThreadID(CGF, WST.Loc)});
1530 createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_end_parallel),
1537 syncCTAThreads(CGF);
1543 clearLocThreadIdInsertPt(CGF);
1552 llvm::Constant *RTLFn =
nullptr;
1553 switch (static_cast<OpenMPRTLFunctionNVPTX>(Function)) {
1554 case OMPRTL_NVPTX__kmpc_kernel_init: {
1559 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
1563 case OMPRTL_NVPTX__kmpc_kernel_deinit: {
1567 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
1571 case OMPRTL_NVPTX__kmpc_spmd_kernel_init: {
1576 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
1580 case OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2: {
1584 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
1588 case OMPRTL_NVPTX__kmpc_kernel_prepare_parallel: {
1593 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
1597 case OMPRTL_NVPTX__kmpc_kernel_parallel: {
1603 llvm::FunctionType::get(RetTy, TypeParams,
false);
1607 case OMPRTL_NVPTX__kmpc_kernel_end_parallel: {
1614 case OMPRTL_NVPTX__kmpc_serialized_parallel: {
1619 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
1623 case OMPRTL_NVPTX__kmpc_end_serialized_parallel: {
1628 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
1632 case OMPRTL_NVPTX__kmpc_shuffle_int32: {
1637 llvm::FunctionType::get(CGM.
Int32Ty, TypeParams,
false);
1641 case OMPRTL_NVPTX__kmpc_shuffle_int64: {
1646 llvm::FunctionType::get(CGM.
Int64Ty, TypeParams,
false);
1650 case OMPRTL_NVPTX__kmpc_parallel_reduce_nowait_v2: {
1658 auto *ShuffleReduceFnTy =
1659 llvm::FunctionType::get(CGM.
VoidTy, ShuffleReduceTypeParams,
1662 auto *InterWarpCopyFnTy =
1663 llvm::FunctionType::get(CGM.
VoidTy, InterWarpCopyTypeParams,
1665 llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
1670 ShuffleReduceFnTy->getPointerTo(),
1671 InterWarpCopyFnTy->getPointerTo()};
1673 llvm::FunctionType::get(CGM.
Int32Ty, TypeParams,
false);
1675 FnTy,
"__kmpc_nvptx_parallel_reduce_nowait_v2");
1678 case OMPRTL_NVPTX__kmpc_end_reduce_nowait: {
1682 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
1684 FnTy,
"__kmpc_nvptx_end_reduce_nowait");
1687 case OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_simple: {
1691 getIdentTyPointerTy(), CGM.
Int32Ty,
1692 llvm::PointerType::getUnqual(getKmpCriticalNameTy())};
1694 llvm::FunctionType::get(CGM.
Int32Ty, TypeParams,
false);
1696 FnTy,
"__kmpc_nvptx_teams_reduce_nowait_simple");
1699 case OMPRTL_NVPTX__kmpc_nvptx_teams_end_reduce_nowait_simple: {
1703 getIdentTyPointerTy(), CGM.
Int32Ty,
1704 llvm::PointerType::getUnqual(getKmpCriticalNameTy())};
1706 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
1708 FnTy,
"__kmpc_nvptx_teams_end_reduce_nowait_simple");
1711 case OMPRTL_NVPTX__kmpc_data_sharing_init_stack: {
1718 case OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd: {
1726 case OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack: {
1731 llvm::FunctionType::get(CGM.
VoidPtrTy, TypeParams,
false);
1733 FnTy,
"__kmpc_data_sharing_coalesced_push_stack");
1736 case OMPRTL_NVPTX__kmpc_data_sharing_pop_stack: {
1740 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
1742 "__kmpc_data_sharing_pop_stack");
1745 case OMPRTL_NVPTX__kmpc_begin_sharing_variables: {
1750 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
1754 case OMPRTL_NVPTX__kmpc_end_sharing_variables: {
1761 case OMPRTL_NVPTX__kmpc_get_shared_variables: {
1765 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
1769 case OMPRTL_NVPTX__kmpc_parallel_level: {
1773 llvm::FunctionType::get(CGM.
Int16Ty, TypeParams,
false);
1777 case OMPRTL_NVPTX__kmpc_is_spmd_exec_mode: {
1779 auto *FnTy = llvm::FunctionType::get(CGM.
Int8Ty,
false);
1783 case OMPRTL_NVPTX__kmpc_get_team_static_memory: {
1789 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
1793 case OMPRTL_NVPTX__kmpc_restore_team_static_memory: {
1798 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
1807 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
1809 cast<llvm::Function>(RTLFn)->addFnAttr(llvm::Attribute::Convergent);
1812 case OMPRTL__kmpc_barrier_simple_spmd: {
1817 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
1820 cast<llvm::Function>(RTLFn)->addFnAttr(llvm::Attribute::Convergent);
1827 void CGOpenMPRuntimeNVPTX::createOffloadEntry(llvm::Constant *
ID,
1828 llvm::Constant *Addr,
1829 uint64_t Size, int32_t,
1830 llvm::GlobalValue::LinkageTypes) {
1833 if (!isa<llvm::Function>(Addr))
1839 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata(
"nvvm.annotations");
1841 llvm::Metadata *MDVals[] = {
1842 llvm::ConstantAsMetadata::get(Addr), llvm::MDString::get(Ctx,
"kernel"),
1843 llvm::ConstantAsMetadata::get(
1844 llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};
1846 MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
1849 void CGOpenMPRuntimeNVPTX::emitTargetOutlinedFunction(
1851 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
1853 if (!IsOffloadEntry)
1856 assert(!ParentName.empty() &&
"Invalid target region parent name!");
1860 emitSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
1863 emitNonSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
1874 KMP_IDENT_SPMD_MODE = 0x01,
1876 KMP_IDENT_SIMPLE_RT_MODE = 0x02,
1877 LLVM_MARK_AS_BITMASK_ENUM(KMP_IDENT_SIMPLE_RT_MODE)
1882 (~KMP_IDENT_SPMD_MODE) & KMP_IDENT_SIMPLE_RT_MODE;
1886 switch (getExecutionMode()) {
1888 if (requiresFullRuntime())
1889 return KMP_IDENT_SPMD_MODE & (~KMP_IDENT_SIMPLE_RT_MODE);
1890 return KMP_IDENT_SPMD_MODE | KMP_IDENT_SIMPLE_RT_MODE;
1892 assert(requiresFullRuntime() &&
"Expected full runtime.");
1893 return (~KMP_IDENT_SPMD_MODE) & (~KMP_IDENT_SIMPLE_RT_MODE);
1895 return UndefinedMode;
1897 llvm_unreachable(
"Unknown flags are requested.");
1903 llvm_unreachable(
"OpenMP NVPTX can only handle device code.");
1927 const Expr *NumTeams,
1928 const Expr *ThreadLimit,
1936 bool &IsInParallelRegion;
1937 bool PrevIsInParallelRegion;
1940 NVPTXPrePostActionTy(
bool &IsInParallelRegion)
1941 : IsInParallelRegion(IsInParallelRegion) {}
1943 PrevIsInParallelRegion = IsInParallelRegion;
1944 IsInParallelRegion =
true;
1947 IsInParallelRegion = PrevIsInParallelRegion;
1949 } Action(IsInParallelRegion);
1951 bool PrevIsInTTDRegion = IsInTTDRegion;
1952 IsInTTDRegion =
false;
1953 bool PrevIsInTargetMasterThreadRegion = IsInTargetMasterThreadRegion;
1954 IsInTargetMasterThreadRegion =
false;
1957 D, ThreadIDVar, InnermostKind, CodeGen));
1958 IsInTargetMasterThreadRegion = PrevIsInTargetMasterThreadRegion;
1959 IsInTTDRegion = PrevIsInTTDRegion;
1961 !IsInParallelRegion) {
1962 llvm::Function *WrapperFun =
1963 createParallelDataSharingWrapper(OutlinedFun, D);
1964 WrapperFunctionsMap[OutlinedFun] = WrapperFun;
1976 "expected teams directive.");
1991 for (
const Expr *E : C->getVarRefs())
2001 "expected teams directive.");
2003 for (
const Expr *E : C->privates())
2015 llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields;
2020 if (!LastPrivatesReductions.empty()) {
2021 GlobalizedRD = ::buildRecordForGlobalizedVars(
2025 }
else if (!LastPrivatesReductions.empty()) {
2026 assert(!TeamAndReductions.first &&
2027 "Previous team declaration is not expected.");
2029 std::swap(TeamAndReductions.second, LastPrivatesReductions);
2036 llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
2040 NVPTXPrePostActionTy(
2042 llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
2044 : Loc(Loc), GlobalizedRD(GlobalizedRD),
2045 MappedDeclsFields(MappedDeclsFields) {}
2050 auto I = Rt.FunctionGlobalizedDecls.try_emplace(CGF.
CurFn).first;
2051 I->getSecond().GlobalRecord = GlobalizedRD;
2052 I->getSecond().MappedParams =
2053 llvm::make_unique<CodeGenFunction::OMPMapVars>();
2054 DeclToAddrMapTy &Data = I->getSecond().LocalVarData;
2055 for (
const auto &Pair : MappedDeclsFields) {
2056 assert(Pair.getFirst()->isCanonicalDecl() &&
2057 "Expected canonical declaration");
2058 Data.insert(std::make_pair(Pair.getFirst(),
2059 MappedVarData(Pair.getSecond(),
2063 Rt.emitGenericVarsProlog(CGF, Loc);
2067 .emitGenericVarsEpilog(CGF);
2069 } Action(Loc, GlobalizedRD, MappedDeclsFields);
2072 D, ThreadIDVar, InnermostKind, CodeGen);
2073 llvm::Function *OutlinedFun = cast<llvm::Function>(OutlinedFunVal);
2074 OutlinedFun->removeFnAttr(llvm::Attribute::NoInline);
2075 OutlinedFun->removeFnAttr(llvm::Attribute::OptimizeNone);
2076 OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline);
2081 void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(
CodeGenFunction &CGF,
2083 bool WithSPMDCheck) {
2090 const auto I = FunctionGlobalizedDecls.find(CGF.
CurFn);
2091 if (I == FunctionGlobalizedDecls.end())
2093 if (
const RecordDecl *GlobalizedVarsRecord = I->getSecond().GlobalRecord) {
2101 unsigned Alignment =
2103 unsigned GlobalRecordSize =
2105 GlobalRecordSize = llvm::alignTo(GlobalRecordSize, Alignment);
2107 llvm::PointerType *GlobalRecPtrTy =
2111 if (!IsInTTDRegion &&
2117 if (I->getSecond().SecondaryGlobalRecord.hasValue()) {
2123 IsTTD = Bld.CreateIsNull(PL);
2127 Bld.CreateCondBr(IsSPMD, SPMDBB, NonSPMDBB);
2131 Address RecPtr =
Address(llvm::ConstantPointerNull::get(GlobalRecPtrTy),
2138 if (
const RecordDecl *SecGlobalizedVarsRecord =
2139 I->getSecond().SecondaryGlobalRecord.getValueOr(
nullptr)) {
2147 unsigned Alignment =
2149 unsigned GlobalRecordSize =
2151 GlobalRecordSize = llvm::alignTo(GlobalRecordSize, Alignment);
2152 Size = Bld.CreateSelect(
2153 IsTTD, llvm::ConstantInt::get(CGM.
SizeTy, GlobalRecordSize), Size);
2158 Size, CGF.
Builder.getInt16(0)};
2161 OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack),
2162 GlobalRecordSizeArg);
2164 GlobalRecValue, GlobalRecPtrTy);
2166 auto *Phi = Bld.CreatePHI(GlobalRecPtrTy,
2167 2,
"_select_stack");
2168 Phi->addIncoming(RecPtr.
getPointer(), SPMDBB);
2169 Phi->addIncoming(GlobalRecCastAddr, NonSPMDBB);
2170 GlobalRecCastAddr = Phi;
2171 I->getSecond().GlobalRecordAddr = Phi;
2172 I->getSecond().IsInSPMDModeFlag = IsSPMD;
2173 }
else if (IsInTTDRegion) {
2174 assert(GlobalizedRecords.back().Records.size() < 2 &&
2175 "Expected less than 2 globalized records: one for target and one " 2178 for (
const RecordDecl *RD : GlobalizedRecords.back().Records) {
2180 unsigned Alignment =
2184 llvm::alignTo(llvm::alignTo(Offset, Alignment) + Size, Alignment);
2186 unsigned Alignment =
2188 Offset = llvm::alignTo(Offset, Alignment);
2189 GlobalizedRecords.back().Records.push_back(GlobalizedVarsRecord);
2190 ++GlobalizedRecords.back().RegionCounter;
2191 if (GlobalizedRecords.back().Records.size() == 1) {
2192 assert(KernelStaticGlobalized &&
2193 "Kernel static pointer must be initialized already.");
2194 auto *UseSharedMemory =
new llvm::GlobalVariable(
2197 "_openmp_static_kernel$is_shared");
2198 UseSharedMemory->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
2204 false, Int16Ty, Loc);
2205 auto *StaticGlobalized =
new llvm::GlobalVariable(
2207 llvm::GlobalValue::CommonLinkage,
nullptr);
2208 auto *RecSize =
new llvm::GlobalVariable(
2211 "_openmp_static_kernel$size");
2212 RecSize->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
2219 llvm::ConstantInt::get(
2222 StaticGlobalized, Ld, IsInSharedMemory, ResAddr};
2224 OMPRTL_NVPTX__kmpc_get_team_static_memory),
2225 GlobalRecordSizeArg);
2226 GlobalizedRecords.back().Buffer = StaticGlobalized;
2227 GlobalizedRecords.back().RecSize = RecSize;
2228 GlobalizedRecords.back().UseSharedMemory = UseSharedMemory;
2229 GlobalizedRecords.back().Loc = Loc;
2231 assert(KernelStaticGlobalized &&
"Global address must be set already.");
2236 .castAs<PointerType>());
2240 I->getSecond().GlobalRecordAddr = GlobalRecValue;
2241 I->getSecond().IsInSPMDModeFlag =
nullptr;
2248 llvm::ConstantInt::get(CGM.
SizeTy, GlobalRecordSize),
2252 OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack),
2253 GlobalRecordSizeArg);
2255 GlobalRecValue, GlobalRecPtrTy);
2256 I->getSecond().GlobalRecordAddr = GlobalRecValue;
2257 I->getSecond().IsInSPMDModeFlag =
nullptr;
2265 decltype(I->getSecond().LocalVarData)::const_iterator SecIt;
2267 SecIt = I->getSecond().SecondaryLocalVarData->begin();
2268 llvm::PointerType *SecGlobalRecPtrTy =
2272 I->getSecond().GlobalRecordAddr, SecGlobalRecPtrTy),
2275 for (
auto &Rec : I->getSecond().LocalVarData) {
2276 bool EscapedParam = I->getSecond().EscapedParameters.count(Rec.first);
2279 const auto *VD = cast<VarDecl>(Rec.first);
2287 if (Rec.second.IsOnePerTeam) {
2288 VarTy = Rec.second.FD->getType();
2299 Rec.second.PrivateAddr = VarAddr.
getAddress();
2300 if (!IsInTTDRegion &&
2303 assert(I->getSecond().IsInSPMDModeFlag &&
2304 "Expected unknown execution mode or required SPMD check.");
2306 assert(SecIt->second.IsOnePerTeam &&
2307 "Secondary glob data must be one per team.");
2313 Rec.second.PrivateAddr = VarAddr.
getAddress();
2315 Address GlobalPtr = Rec.second.PrivateAddr;
2317 Rec.second.PrivateAddr =
Address(
2318 Bld.CreateSelect(I->getSecond().IsInSPMDModeFlag,
2323 const auto *VD = cast<VarDecl>(Rec.first);
2325 I->getSecond().MappedParams->setVarAddr(CGF, VD, VarAddr.
getAddress());
2331 for (
const ValueDecl *VD : I->getSecond().EscapedVariableLengthDecls) {
2339 Size = Bld.CreateNUWAdd(
2343 Size = Bld.CreateUDiv(Size, AlignVal);
2344 Size = Bld.CreateNUWMul(Size, AlignVal);
2348 Size, CGF.
Builder.getInt16(0)};
2351 OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack),
2352 GlobalRecordSizeArg);
2358 I->getSecond().MappedParams->setVarAddr(CGF, cast<VarDecl>(VD),
2360 I->getSecond().EscapedVariableLengthDeclsAddrs.emplace_back(GlobalRecValue);
2362 I->getSecond().MappedParams->apply(CGF);
2365 void CGOpenMPRuntimeNVPTX::emitGenericVarsEpilog(
CodeGenFunction &CGF,
2366 bool WithSPMDCheck) {
2371 const auto I = FunctionGlobalizedDecls.find(CGF.
CurFn);
2372 if (I != FunctionGlobalizedDecls.end()) {
2373 I->getSecond().MappedParams->restore(CGF);
2377 llvm::reverse(I->getSecond().EscapedVariableLengthDeclsAddrs)) {
2382 if (I->getSecond().GlobalRecordAddr) {
2383 if (!IsInTTDRegion &&
2389 Bld.CreateCondBr(I->getSecond().IsInSPMDModeFlag, ExitBB, NonSPMDBB);
2395 OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
2398 }
else if (IsInTTDRegion) {
2399 assert(GlobalizedRecords.back().RegionCounter > 0 &&
2400 "region counter must be > 0.");
2401 --GlobalizedRecords.back().RegionCounter;
2403 if (GlobalizedRecords.back().RegionCounter == 0) {
2407 Address(GlobalizedRecords.back().UseSharedMemory,
2409 false, Int16Ty, GlobalizedRecords.back().Loc);
2411 llvm::ConstantInt::get(
2417 OMPRTL_NVPTX__kmpc_restore_team_static_memory),
2422 OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
2423 I->getSecond().GlobalRecordAddr);
2443 OutlinedFnArgs.push_back(ZeroAddr.
getPointer());
2444 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2455 emitSPMDParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond);
2457 emitNonSPMDParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond);
2460 void CGOpenMPRuntimeNVPTX::emitNonSPMDParallelCall(
2463 llvm::Function *Fn = cast<llvm::Function>(OutlinedFn);
2473 Address ThreadIDAddr = ZeroAddr;
2474 auto &&CodeGen = [
this, Fn, CapturedVars, Loc, ZeroAddr, &ThreadIDAddr](
2479 OutlinedFnArgs.push_back(ThreadIDAddr.
getPointer());
2480 OutlinedFnArgs.push_back(ZeroAddr.getPointer());
2481 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2492 NVPTXActionTy Action(
2504 llvm::Function *WFn = WrapperFunctionsMap[Fn];
2505 assert(WFn &&
"Wrapper function does not exist!");
2510 CGF.EmitRuntimeCall(
2519 if (!CapturedVars.empty()) {
2522 CGF.CreateDefaultAlignTempAlloca(CGF.VoidPtrPtrTy,
"shared_arg_refs");
2527 llvm::ConstantInt::get(CGM.
SizeTy, CapturedVars.size())};
2529 OMPRTL_NVPTX__kmpc_begin_sharing_variables),
2535 Address SharedArgListAddress = CGF.EmitLoadOfPointer(
2537 .castAs<PointerType>());
2540 CGF.getPointerSize());
2542 if (V->getType()->isIntegerTy())
2543 PtrV = Bld.CreateIntToPtr(V, CGF.VoidPtrTy);
2546 CGF.EmitStoreOfScalar(PtrV, Dst,
false,
2554 syncCTAThreads(CGF);
2562 syncCTAThreads(CGF);
2564 if (!CapturedVars.empty())
2565 CGF.EmitRuntimeCall(
2569 Work.emplace_back(WFn);
2572 auto &&LNParallelGen = [
this, Loc, &SeqGen, &L0ParallelGen](
2574 if (IsInParallelRegion) {
2575 SeqGen(CGF, Action);
2576 }
else if (IsInTargetMasterThreadRegion) {
2577 L0ParallelGen(CGF, Action);
2586 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(
".exit");
2587 llvm::BasicBlock *SeqBB = CGF.createBasicBlock(
".sequential");
2588 llvm::BasicBlock *ParallelCheckBB = CGF.createBasicBlock(
".parcheck");
2589 llvm::BasicBlock *MasterBB = CGF.createBasicBlock(
".master");
2590 llvm::Value *IsSPMD = Bld.CreateIsNotNull(CGF.EmitNounwindRuntimeCall(
2592 Bld.CreateCondBr(IsSPMD, SeqBB, ParallelCheckBB);
2595 CGF.EmitBlock(ParallelCheckBB);
2602 Bld.CreateCondBr(Res, SeqBB, MasterBB);
2603 CGF.EmitBlock(SeqBB);
2604 SeqGen(CGF, Action);
2605 CGF.EmitBranch(ExitBB);
2608 CGF.EmitBlock(MasterBB);
2609 L0ParallelGen(CGF, Action);
2610 CGF.EmitBranch(ExitBB);
2614 CGF.EmitBlock(ExitBB,
true);
2627 void CGOpenMPRuntimeNVPTX::emitSPMDParallelCall(
2640 Address ThreadIDAddr = ZeroAddr;
2641 auto &&CodeGen = [
this, OutlinedFn, CapturedVars, Loc, ZeroAddr,
2647 OutlinedFnArgs.push_back(ThreadIDAddr.
getPointer());
2648 OutlinedFnArgs.push_back(ZeroAddr.getPointer());
2649 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2660 NVPTXActionTy Action(
2669 if (IsInTargetMasterThreadRegion) {
2690 llvm::ConstantPointerNull::get(
2692 llvm::ConstantInt::get(CGF.
Int32Ty, 0,
true)};
2739 CGF.
Builder.CreateCondBr(CmpLoopBound, TestBB, ExitBB);
2746 CGF.
Builder.CreateICmpEQ(ThreadID, CounterVal);
2747 CGF.
Builder.CreateCondBr(CmpThreadToCounter, BodyBB, SyncBB);
2778 "Cast type must sized.");
2780 "Val type must sized.");
2782 if (ValTy == CastTy)
2788 return CGF.
Builder.CreateIntCast(Val, LLVMCastTy,
2811 "Unsupported bitwidth in shuffle instruction.");
2814 ? OMPRTL_NVPTX__kmpc_shuffle_int32
2815 : OMPRTL_NVPTX__kmpc_shuffle_int64;
2850 for (
int IntSize = 8; IntSize >= 1; IntSize /= 2) {
2864 llvm::BasicBlock *CurrentBB = Bld.GetInsertBlock();
2866 llvm::PHINode *PhiSrc =
2867 Bld.CreatePHI(Ptr.
getType(), 2);
2868 PhiSrc->addIncoming(Ptr.
getPointer(), CurrentBB);
2869 llvm::PHINode *PhiDest =
2870 Bld.CreatePHI(ElemPtr.
getType(), 2);
2871 PhiDest->addIncoming(ElemPtr.
getPointer(), CurrentBB);
2877 Bld.CreateCondBr(Bld.CreateICmpSGT(PtrDiff, Bld.getInt64(IntSize - 1)),
2888 PhiSrc->addIncoming(LocalPtr.
getPointer(), ThenBB);
2889 PhiDest->addIncoming(LocalElemPtr.
getPointer(), ThenBB);
2901 Size = Size % IntSize;
2937 llvm::Value *RemoteLaneOffset = CopyOptions.RemoteLaneOffset;
2938 llvm::Value *ScratchpadIndex = CopyOptions.ScratchpadIndex;
2939 llvm::Value *ScratchpadWidth = CopyOptions.ScratchpadWidth;
2944 unsigned Size = Privates.size();
2945 for (
const Expr *Private : Privates) {
2950 bool ShuffleInElement =
false;
2953 bool UpdateDestListPtr =
false;
2956 bool IncrScratchpadSrc =
false;
2957 bool IncrScratchpadDest =
false;
2960 case RemoteLaneToThread: {
2970 DestElementPtrAddr =
2973 CGF.
CreateMemTemp(Private->getType(),
".omp.reduction.element");
2974 ShuffleInElement =
true;
2975 UpdateDestListPtr =
true;
2988 DestElementPtrAddr =
2995 case ThreadToScratchpad: {
3007 Bld.CreateNUWMul(ElementSizeInChars, ScratchpadIndex);
3009 Bld.CreateNUWAdd(DestBase.
getPointer(), CurrentOffset);
3010 ScratchPadElemAbsolutePtrVal =
3011 Bld.CreateIntToPtr(ScratchPadElemAbsolutePtrVal, CGF.
VoidPtrTy);
3012 DestElementAddr =
Address(ScratchPadElemAbsolutePtrVal,
3014 IncrScratchpadDest =
true;
3017 case ScratchpadToThread: {
3022 Bld.CreateNUWMul(ElementSizeInChars, ScratchpadIndex);
3024 Bld.CreateNUWAdd(SrcBase.
getPointer(), CurrentOffset);
3025 ScratchPadElemAbsolutePtrVal =
3026 Bld.CreateIntToPtr(ScratchPadElemAbsolutePtrVal, CGF.
VoidPtrTy);
3027 SrcElementAddr =
Address(ScratchPadElemAbsolutePtrVal,
3029 IncrScratchpadSrc =
true;
3033 DestElementPtrAddr =
3036 CGF.
CreateMemTemp(Private->getType(),
".omp.reduction.element");
3037 UpdateDestListPtr =
true;
3051 if (ShuffleInElement) {
3053 RemoteLaneOffset, Private->getExprLoc());
3055 if (Private->getType()->isScalarType()) {
3058 Private->
getType(), Private->getExprLoc());
3075 if (UpdateDestListPtr) {
3078 DestElementPtrAddr,
false,
3085 if ((IncrScratchpadDest || IncrScratchpadSrc) && (Idx + 1 < Size)) {
3089 ScratchpadBasePtr = Bld.CreateNUWAdd(
3091 Bld.CreateNUWMul(ScratchpadWidth, ElementSizeInChars));
3094 ScratchpadBasePtr = Bld.CreateNUWSub(
3095 ScratchpadBasePtr, llvm::ConstantInt::get(CGM.
SizeTy, 1));
3096 ScratchpadBasePtr = Bld.CreateUDiv(
3098 llvm::ConstantInt::get(CGM.
SizeTy, GlobalMemoryAlignment));
3099 ScratchpadBasePtr = Bld.CreateNUWAdd(
3100 ScratchpadBasePtr, llvm::ConstantInt::get(CGM.
SizeTy, 1));
3101 ScratchpadBasePtr = Bld.CreateNUWMul(
3103 llvm::ConstantInt::get(CGM.
SizeTy, GlobalMemoryAlignment));
3105 if (IncrScratchpadDest)
3145 Args.push_back(&ReduceListArg);
3146 Args.push_back(&NumWarpsArg);
3152 "_omp_reduction_inter_warp_copy_func", &CGM.
getModule());
3154 Fn->setDoesNotRecurse();
3167 StringRef TransferMediumName =
3168 "__openmp_nvptx_data_transfer_temporary_storage";
3169 llvm::GlobalVariable *TransferMedium =
3170 M.getGlobalVariable(TransferMediumName);
3171 if (!TransferMedium) {
3172 auto *Ty = llvm::ArrayType::get(CGM.
Int32Ty, WarpSize);
3174 TransferMedium =
new llvm::GlobalVariable(
3175 M, Ty,
false, llvm::GlobalVariable::CommonLinkage,
3176 llvm::Constant::getNullValue(Ty), TransferMediumName,
3177 nullptr, llvm::GlobalVariable::NotThreadLocal,
3178 SharedAddressSpace);
3198 for (
const Expr *Private : Privates) {
3203 unsigned RealTySize =
3207 for (
unsigned TySize = 4; TySize > 0 && RealTySize > 0; TySize /=2) {
3208 unsigned NumIters = RealTySize / TySize;
3217 llvm::BasicBlock *PrecondBB =
nullptr;
3218 llvm::BasicBlock *ExitBB =
nullptr;
3231 Bld.CreateICmpULT(Cnt, llvm::ConstantInt::get(CGM.
IntTy, NumIters));
3232 Bld.CreateCondBr(Cmp, BodyBB, ExitBB);
3244 llvm::Value *IsWarpMaster = Bld.CreateIsNull(LaneID,
"warp_master");
3245 Bld.CreateCondBr(IsWarpMaster, ThenBB, ElseBB);
3263 llvm::Value *MediumPtrVal = Bld.CreateInBoundsGEP(
3264 TransferMedium, {llvm::Constant::getNullValue(CGM.
Int64Ty), WarpID});
3265 Address MediumPtr(MediumPtrVal, Align);
3277 Bld.CreateBr(MergeBB);
3280 Bld.CreateBr(MergeBB);
3298 AddrNumWarpsArg,
false, C.
IntTy, Loc);
3302 Bld.CreateICmpULT(ThreadID, NumWarpsVal,
"is_active_thread");
3303 Bld.CreateCondBr(IsActiveThread, W0ThenBB, W0ElseBB);
3308 llvm::Value *SrcMediumPtrVal = Bld.CreateInBoundsGEP(
3310 {llvm::Constant::getNullValue(CGM.
Int64Ty), ThreadID});
3311 Address SrcMediumPtr(SrcMediumPtrVal, Align);
3319 TargetElemPtrPtr,
false, C.
VoidPtrTy, Loc);
3332 Bld.CreateBr(W0MergeBB);
3335 Bld.CreateBr(W0MergeBB);
3340 Cnt = Bld.CreateNSWAdd(Cnt, llvm::ConstantInt::get(CGM.
IntTy, 1));
3346 RealTySize %= TySize;
3439 Args.push_back(&ReduceListArg);
3440 Args.push_back(&LaneIDArg);
3441 Args.push_back(&RemoteLaneOffsetArg);
3442 Args.push_back(&AlgoVerArg);
3448 "_omp_reduction_shuffle_and_reduce_func", &CGM.
getModule());
3450 Fn->setDoesNotRecurse();
3479 CGF.
CreateMemTemp(ReductionArrayTy,
".omp.reduction.remote_reduce_list");
3485 LocalReduceList, RemoteReduceList,
3486 {RemoteLaneOffsetArgVal,
3511 llvm::Value *CondAlgo0 = Bld.CreateIsNull(AlgoVerArgVal);
3513 llvm::Value *Algo1 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(1));
3515 Algo1, Bld.CreateICmpULT(LaneIDArgVal, RemoteLaneOffsetArgVal));
3517 llvm::Value *Algo2 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(2));
3519 Algo2, Bld.CreateIsNull(Bld.CreateAnd(LaneIDArgVal, Bld.getInt16(1))));
3520 CondAlgo2 = Bld.CreateAnd(
3521 CondAlgo2, Bld.CreateICmpSGT(RemoteLaneOffsetArgVal, Bld.getInt16(0)));
3523 llvm::Value *CondReduce = Bld.CreateOr(CondAlgo0, CondAlgo1);
3524 CondReduce = Bld.CreateOr(CondReduce, CondAlgo2);
3529 Bld.CreateCondBr(CondReduce, ThenBB, ElseBB);
3534 LocalReduceList.getPointer(), CGF.
VoidPtrTy);
3538 CGF, Loc, ReduceFn, {LocalReduceListPtr, RemoteReduceListPtr});
3539 Bld.CreateBr(MergeBB);
3542 Bld.CreateBr(MergeBB);
3548 Algo1 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(1));
3550 Algo1, Bld.CreateICmpUGE(LaneIDArgVal, RemoteLaneOffsetArgVal));
3555 Bld.CreateCondBr(CondCopy, CpyThenBB, CpyElseBB);
3559 RemoteReduceList, LocalReduceList);
3560 Bld.CreateBr(CpyMergeBB);
3563 Bld.CreateBr(CpyMergeBB);
3826 assert(!TeamsReduction && !ParallelReduction &&
3827 "Invalid reduction selection in emitReduction.");
3829 ReductionOps, Options);
3833 assert((TeamsReduction || ParallelReduction) &&
3834 "Invalid reduction selection in emitReduction.");
3844 if (ParallelReduction) {
3848 auto Size = RHSExprs.size();
3849 for (
const Expr *E : Privates) {
3850 if (E->getType()->isVariablyModifiedType())
3854 llvm::APInt ArraySize(32, Size);
3859 CGF.
CreateMemTemp(ReductionArrayTy,
".omp.reduction.red_list");
3860 auto IPriv = Privates.begin();
3862 for (
unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
3869 if ((*IPriv)->getType()->isVariablyModifiedType()) {
3889 Privates, LHSExprs, RHSExprs, ReductionOps);
3891 CGM, Privates, ReductionArrayTy, ReductionFn, Loc);
3897 CGF.
Builder.getInt32(RHSExprs.size()),
3898 ReductionArrayTySize,
3904 OMPRTL_NVPTX__kmpc_parallel_reduce_nowait_v2),
3907 assert(TeamsReduction &&
"expected teams reduction.");
3908 std::string Name =
getName({
"reduction"});
3913 OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_simple),
3921 Res, llvm::ConstantInt::get(CGM.
Int32Ty, 1));
3922 CGF.
Builder.CreateCondBr(Cond, ThenBB, ExitBB);
3931 auto &&CodeGen = [
Privates, LHSExprs, RHSExprs, ReductionOps,
3933 auto IPriv = Privates.begin();
3934 auto ILHS = LHSExprs.begin();
3935 auto IRHS = RHSExprs.begin();
3936 for (
const Expr *E : ReductionOps) {
3938 cast<DeclRefExpr>(*IRHS));
3944 if (ParallelReduction) {
3947 NVPTXActionTy Action(
3954 assert(TeamsReduction &&
"expected teams reduction.");
3956 std::string Name =
getName({
"reduction"});
3960 NVPTXActionTy Action(
3963 OMPRTL_NVPTX__kmpc_nvptx_teams_end_reduce_nowait_simple),
3970 CGF.EmitBlock(ExitBB,
true);
3975 const VarDecl *NativeParam)
const {
3980 const Type *NonQualTy = QC.
strip(ArgType);
3981 QualType PointeeTy = cast<ReferenceType>(NonQualTy)->getPointeeType();
3982 if (
const auto *
Attr = FD->
getAttr<OMPCaptureKindAttr>()) {
3983 if (
Attr->getCaptureKind() == OMPC_map) {
3990 enum { NVPTX_local_addr = 5 };
3993 if (isa<ImplicitParamDecl>(NativeParam))
4008 const VarDecl *TargetParam)
const {
4009 assert(NativeParam != TargetParam &&
4011 "Native arg must not be the same as target arg.");
4015 const Type *NonQualTy = QC.
strip(NativeParamType);
4016 QualType NativePointeeTy = cast<ReferenceType>(NonQualTy)->getPointeeType();
4017 unsigned NativePointeeAddrSpace =
4024 TargetAddr, TargetAddr->getType()->getPointerElementType()->getPointerTo(
4028 TargetAddr, TargetAddr->getType()->getPointerElementType()->getPointerTo(
4029 NativePointeeAddrSpace));
4033 return NativeParamAddr;
4040 TargetArgs.reserve(Args.size());
4042 cast<llvm::FunctionType>(OutlinedFn->getType()->getPointerElementType());
4043 for (
unsigned I = 0, E = Args.size(); I < E; ++I) {
4044 if (FnType->isVarArg() && FnType->getNumParams() <= I) {
4045 TargetArgs.append(std::next(Args.begin(), I), Args.end());
4048 llvm::Type *TargetType = FnType->getParamType(I);
4050 if (!TargetType->isPointerTy()) {
4051 TargetArgs.emplace_back(NativeArg);
4056 NativeArg->getType()->getPointerElementType()->getPointerTo());
4057 TargetArgs.emplace_back(
4067 llvm::Function *CGOpenMPRuntimeNVPTX::createParallelDataSharingWrapper(
4084 WrapperArgs.emplace_back(&ParallelLevelArg);
4085 WrapperArgs.emplace_back(&WrapperArg);
4092 Twine(OutlinedParallelFn->getName(),
"_wrapper"), &CGM.
getModule());
4095 Fn->setDoesNotRecurse();
4101 const auto *RD = CS.getCapturedRecordDecl();
4102 auto CurField = RD->field_begin();
4115 auto CI = CS.capture_begin();
4130 if (CS.capture_size() > 0 ||
4143 Src, CGF.
SizeTy->getPointerTo());
4148 cast<OMPLoopDirective>(D).getLowerBoundVariable()->getExprLoc());
4149 Args.emplace_back(LB);
4154 Src, CGF.
SizeTy->getPointerTo());
4159 cast<OMPLoopDirective>(D).getUpperBoundVariable()->getExprLoc());
4160 Args.emplace_back(UB);
4163 if (CS.capture_size() > 0) {
4165 for (
unsigned I = 0, E = CS.capture_size(); I < E; ++I, ++CI, ++CurField) {
4166 QualType ElemTy = CurField->getType();
4175 if (CI->capturesVariableByCopy() &&
4176 !CI->getCapturedVar()->getType()->isAnyPointerType()) {
4180 Args.emplace_back(Arg);
4194 assert(D &&
"Expected function or captured|block decl.");
4195 assert(FunctionGlobalizedDecls.count(CGF.
CurFn) == 0 &&
4196 "Function is registered already.");
4197 assert((!TeamAndReductions.first || TeamAndReductions.first == D) &&
4198 "Team is set but not processed.");
4199 const Stmt *Body =
nullptr;
4200 bool NeedToDelayGlobalization =
false;
4201 if (
const auto *FD = dyn_cast<FunctionDecl>(D)) {
4202 Body = FD->getBody();
4203 }
else if (
const auto *BD = dyn_cast<BlockDecl>(D)) {
4204 Body = BD->getBody();
4205 }
else if (
const auto *CD = dyn_cast<CapturedDecl>(D)) {
4206 Body = CD->getBody();
4208 if (NeedToDelayGlobalization &&
4214 CheckVarsEscapingDeclContext VarChecker(CGF, TeamAndReductions.second);
4215 VarChecker.Visit(Body);
4217 VarChecker.getGlobalizedRecord(IsInTTDRegion);
4218 TeamAndReductions.first =
nullptr;
4219 TeamAndReductions.second.clear();
4221 VarChecker.getEscapedVariableLengthDecls();
4222 if (!GlobalizedVarsRecord && EscapedVariableLengthDecls.empty())
4224 auto I = FunctionGlobalizedDecls.try_emplace(CGF.
CurFn).first;
4225 I->getSecond().MappedParams =
4226 llvm::make_unique<CodeGenFunction::OMPMapVars>();
4227 I->getSecond().GlobalRecord = GlobalizedVarsRecord;
4228 I->getSecond().EscapedParameters.insert(
4229 VarChecker.getEscapedParameters().begin(),
4230 VarChecker.getEscapedParameters().end());
4231 I->getSecond().EscapedVariableLengthDecls.append(
4232 EscapedVariableLengthDecls.begin(), EscapedVariableLengthDecls.end());
4233 DeclToAddrMapTy &Data = I->getSecond().LocalVarData;
4234 for (
const ValueDecl *VD : VarChecker.getEscapedDecls()) {
4235 assert(VD->isCanonicalDecl() &&
"Expected canonical declaration");
4236 const FieldDecl *FD = VarChecker.getFieldForGlobalizedVar(VD);
4237 Data.insert(std::make_pair(VD, MappedVarData(FD, IsInTTDRegion)));
4239 if (!IsInTTDRegion && !NeedToDelayGlobalization && !IsInParallelRegion) {
4240 CheckVarsEscapingDeclContext VarChecker(CGF,
llvm::None);
4241 VarChecker.Visit(Body);
4242 I->getSecond().SecondaryGlobalRecord =
4243 VarChecker.getGlobalizedRecord(
true);
4244 I->getSecond().SecondaryLocalVarData.emplace();
4245 DeclToAddrMapTy &Data = I->getSecond().SecondaryLocalVarData.getValue();
4246 for (
const ValueDecl *VD : VarChecker.getEscapedDecls()) {
4247 assert(VD->isCanonicalDecl() &&
"Expected canonical declaration");
4248 const FieldDecl *FD = VarChecker.getFieldForGlobalizedVar(VD);
4250 std::make_pair(VD, MappedVarData(FD,
true)));
4253 if (!NeedToDelayGlobalization) {
4254 emitGenericVarsProlog(CGF, D->
getBeginLoc(),
true);
4256 GlobalizationScope() =
default;
4260 .emitGenericVarsEpilog(CGF,
true);
4273 auto I = FunctionGlobalizedDecls.find(CGF.
CurFn);
4274 if (I == FunctionGlobalizedDecls.end())
4276 auto VDI = I->getSecond().LocalVarData.find(VD);
4277 if (VDI != I->getSecond().LocalVarData.end())
4278 return VDI->second.PrivateAddr;
4283 auto VDI = I->getSecond().LocalVarData.find(
4284 cast<VarDecl>(cast<DeclRefExpr>(IT->getRef())->getDecl())
4286 if (VDI != I->getSecond().LocalVarData.end())
4287 return VDI->second.PrivateAddr;
4294 FunctionGlobalizedDecls.erase(CGF.
CurFn);
4303 ScheduleKind = OMPC_DIST_SCHEDULE_static;
4310 CGF, S, ScheduleKind, Chunk);
4316 const Expr *&ChunkExpr)
const {
4317 ScheduleKind = OMPC_SCHEDULE_static;
4319 llvm::APInt ChunkSize(32, 1);
4328 " Expected target-based directive.");
4333 if (!C.capturesVariable())
4335 const VarDecl *VD = C.getCapturedVar();
4336 const auto *RD = VD->
getType()
4340 if (!RD || !RD->isLambda())
4349 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
4351 RD->getCaptureFields(Captures, ThisCapture);
4361 const VarDecl *VD = LC.getCapturedVar();
4364 auto It = Captures.find(VD);
4365 assert(It != Captures.end() &&
"Found lambda capture without field.");
4381 llvm::StringMap<bool> Features;
4385 for (
const auto &Feature : Features) {
4386 if (Feature.getValue()) {
4400 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
4414 CGM.
Error(Clause->getBeginLoc(),
4415 "Target architecture does not support unified addressing");
4439 llvm_unreachable(
"Unexpected Cuda arch.");
4447 std::pair<unsigned, unsigned> Data;
4451 Data.second = CGM.
getLangOpts().OpenMPCUDABlocksPerSM;
4452 if (Data.first && Data.second)
4492 llvm_unreachable(
"Unexpected Cuda arch.");
4494 llvm_unreachable(
"Unexpected NVPTX target without ptx feature.");
4498 if (!GlobalizedRecords.empty()) {
4508 for (
const GlobalPtrSizeRecsTy &Records : GlobalizedRecords) {
4509 if (Records.Records.empty())
4512 unsigned RecAlignment = 0;
4513 for (
const RecordDecl *RD : Records.Records) {
4516 RecAlignment =
std::max(RecAlignment, Alignment);
4519 llvm::alignTo(llvm::alignTo(Size, Alignment) + RecSize, Alignment);
4521 Size = llvm::alignTo(Size, RecAlignment);
4522 llvm::APInt ArySize(64, Size);
4525 const bool UseSharedMemory = Size <= SharedMemorySize;
4533 if (UseSharedMemory) {
4534 SharedStaticRD->
addDecl(Field);
4535 SharedRecs.push_back(&Records);
4537 StaticRD->addDecl(Field);
4538 GlobalRecs.push_back(&Records);
4540 Records.RecSize->setInitializer(llvm::ConstantInt::get(CGM.
SizeTy, Size));
4541 Records.UseSharedMemory->setInitializer(
4542 llvm::ConstantInt::get(CGM.
Int16Ty, UseSharedMemory ? 1 : 0));
4549 llvm::APInt ArySize(64, SharedMemorySize);
4558 SharedStaticRD->
addDecl(Field);
4564 auto *GV =
new llvm::GlobalVariable(
4566 false, llvm::GlobalValue::CommonLinkage,
4567 llvm::Constant::getNullValue(LLVMStaticTy),
4568 "_openmp_shared_static_glob_rd_$_",
nullptr,
4569 llvm::GlobalValue::NotThreadLocal,
4571 auto *Replacement = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
4573 for (
const GlobalPtrSizeRecsTy *Rec : SharedRecs) {
4574 Rec->Buffer->replaceAllUsesWith(Replacement);
4575 Rec->Buffer->eraseFromParent();
4582 llvm::APInt Size1(32, SMsBlockPerSM.second);
4586 llvm::APInt Size2(32, SMsBlockPerSM.first);
4590 auto *GV =
new llvm::GlobalVariable(
4592 false, llvm::GlobalValue::CommonLinkage,
4593 llvm::Constant::getNullValue(LLVMArr2Ty),
4594 "_openmp_static_glob_rd_$_");
4595 auto *Replacement = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
4597 for (
const GlobalPtrSizeRecsTy *Rec : GlobalRecs) {
4598 Rec->Buffer->replaceAllUsesWith(Replacement);
4599 Rec->Buffer->eraseFromParent();
const CGFunctionInfo & arrangeBuiltinFunctionDeclaration(QualType resultType, const FunctionArgList &args)
A builtin function is a freestanding function using the default C conventions.
llvm::PointerType * Int8PtrPtrTy
RecordDecl * buildImplicitRecord(StringRef Name, RecordDecl::TagKind TK=TTK_Struct) const
Create a new implicit TU-level CXXRecordDecl or RecordDecl declaration.
QualType getAddrSpaceQualType(QualType T, LangAS AddressSpace) const
Return the uniqued reference to the type for an address space qualified type with the specified type ...
const BlockDecl * getBlockDecl() const
TargetOptions & getTargetOpts() const
Retrieve the target options.
static const Decl * getCanonicalDecl(const Decl *D)
llvm::IntegerType * IntTy
int
LValue MakeNaturalAlignPointeeAddrLValue(llvm::Value *V, QualType T)
Given a value of type T* that may not be to a complete object, construct an l-value with the natural ...
Other implicit parameter.
A class which contains all the information about a particular captured value.
if(T->getSizeExpr()) TRY_TO(TraverseStmt(T -> getSizeExpr()))
PointerType - C99 6.7.5.1 - Pointer Declarators.
A (possibly-)qualified type.
CodeGenTypes & getTypes()
ArrayRef< OMPClause * > clauses()
llvm::Type * ConvertTypeForMem(QualType T)
static llvm::Value * getNVPTXLaneID(CodeGenFunction &CGF)
Get the id of the current lane in the Warp.
static bool hasParallelIfNumThreadsClause(ASTContext &Ctx, const OMPExecutableDirective &D)
Check if the parallel directive has an 'if' clause with non-constant or false condition.
Address CreateMemTemp(QualType T, const Twine &Name="tmp", Address *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
ConstStmtVisitor - This class implements a simple visitor for Stmt subclasses.
bool HaveInsertPoint() const
HaveInsertPoint - True if an insertion point is defined.
llvm::LLVMContext & getLLVMContext()
void emitSingleReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp, const Expr *PrivateRef, const DeclRefExpr *LHS, const DeclRefExpr *RHS)
Emits single reduction combiner.
Address CreateConstGEP(Address Addr, uint64_t Index, CharUnits EltSize, const llvm::Twine &Name="")
Given addr = T* ...
static std::pair< unsigned, unsigned > getSMsBlocksPerSM(CodeGenModule &CGM)
Get number of SMs and number of blocks per SM.
attr_iterator attr_begin() const
Stmt - This represents one statement.
llvm::Value * emitParallelOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits inlined function for the specified OpenMP parallel.
void adjustTargetSpecificDataForLambdas(CodeGenFunction &CGF, const OMPExecutableDirective &D) const override
Adjust some parameters for the target-based directives, like addresses of the variables captured by r...
bool hasNonTrivialCall(const ASTContext &Ctx) const
Determine whether this expression involves a call to any function that is not trivial.
void clearLocThreadIdInsertPt(CodeGenFunction &CGF)
static void getTeamsReductionVars(ASTContext &Ctx, const OMPExecutableDirective &D, llvm::SmallVectorImpl< const ValueDecl *> &Vars)
Get list of reduction variables from the teams ... directives.
Decl - This represents one declaration (or definition), e.g.
specific_attr_iterator - Iterates over a subrange of an AttrVec, only providing attributes that are o...
SourceLocation getBeginLoc() const
Returns starting location of directive kind.
SourceLocation getBeginLoc() const LLVM_READONLY
CharUnits getPointerSize() const
Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const override
Gets the address of the native argument basing on the address of the target-specific parameter...
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
static bool stable_sort_comparator(const PrivateDataTy P1, const PrivateDataTy P2)
This represents 'if' clause in the '#pragma omp ...' directive.
llvm::Value * ScratchpadIndex
CapturedStmt * getInnermostCapturedStmt()
Get innermost captured statement for the construct.
static llvm::Value * castValueToType(CodeGenFunction &CGF, llvm::Value *Val, QualType ValTy, QualType CastTy, SourceLocation Loc)
Cast value to the specified type.
QualType getNonReferenceType() const
If Type is a reference type (e.g., const int&), returns the type that the reference refers to ("const...
llvm::Value * LoadCXXThis()
LoadCXXThis - Load the value of 'this'.
The base class of the type hierarchy.
virtual void completeDefinition()
Note that the definition of this type is now complete.
bool isZero() const
isZero - Test whether the quantity equals zero.
The l-value was an access to a declared entity or something equivalently strong, like the address of ...
static bool hasNestedSPMDDirective(ASTContext &Ctx, const OMPExecutableDirective &D)
Check for inner (nested) SPMD construct, if any.
Address EmitLoadOfPointer(Address Ptr, const PointerType *PtrTy, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
LValue EmitLValueForFieldInitialization(LValue Base, const FieldDecl *Field)
EmitLValueForFieldInitialization - Like EmitLValueForField, except that if the Field is a reference...
static bool hasStaticScheduling(const OMPExecutableDirective &D)
Check if the directive is loops based and has schedule clause at all or has static scheduling...
Describes the capture of a variable or of this, or of a C++1y init-capture.
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
static std::pair< ValueDecl *, bool > getPrivateItem(Sema &S, Expr *&RefExpr, SourceLocation &ELoc, SourceRange &ERange, bool AllowArraySection=false)
QualType getElementType() const
bool capturesVariable(const VarDecl *Var) const
True if this variable has been captured.
Address GetAddrOfLocalVar(const VarDecl *VD)
GetAddrOfLocalVar - Return the address of a local variable.
CudaArch StringToCudaArch(llvm::StringRef S)
Represents a variable declaration or definition.
llvm::Value * getThreadID(CodeGenFunction &CGF, SourceLocation Loc)
Gets thread id value for the current thread.
LangAS getLangASFromTargetAS(unsigned TargetAS)
This represents 'num_threads' clause in the '#pragma omp ...' directive.
virtual void emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, ArrayRef< llvm::Value *> Args=llvm::None) const
Emits call of the outlined function with the provided arguments, translating these arguments to corre...
const ArrayType * castAsArrayTypeUnsafe() const
A variant of castAs<> for array type which silently discards qualifiers from the outermost type...
DiagnosticsEngine & getDiags() const
OpenMPDirectiveKind ReductionKind
llvm::Value * getPointer() const
llvm::Type * ConvertTypeForMem(QualType T)
ConvertTypeForMem - Convert type T into a llvm::Type.
std::string getName(ArrayRef< StringRef > Parts) const
Get the platform-specific name separator.
unsigned getAddressSpace() const
Return the address space that this address resides in.
SPMD execution mode (all threads are worker threads).
IdentifierInfo * getIdentifier() const
Get the identifier that names this declaration, if there is one.
Represents a struct/union/class.
DataSharingMode
Target codegen is specialized based on two data-sharing modes: CUDA, in which the local variables are...
clauselist_range clauselists()
Address getAddress() const
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
llvm::Type * ConvertType(QualType T)
ConvertType - Convert type T into a llvm::Type.
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
attr_iterator attr_end() const
llvm::IntegerType * Int64Ty
The scope used to remap some variables as private in the OpenMP loop body (or other captured region e...
static const Stmt * getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body)
Checks if the Body is the CompoundStmt and returns its child statement iff there is only one that is ...
Represents a member of a struct/union/class.
This represents clause 'lastprivate' in the '#pragma omp ...' directives.
CharUnits getAlignment() const
llvm::IntegerType * SizeTy
const CapturedStmt * getCapturedStmt(OpenMPDirectiveKind RegionKind) const
Returns the captured statement associated with the component region within the (combined) directive...
unsigned getDefaultLocationReserved2Flags() const override
Returns additional flags that can be stored in reserved_2 field of the default location.
static llvm::Value * getMasterThreadID(CodeGenFunction &CGF)
Get the thread id of the OMP master thread.
void setLocThreadIdInsertPt(CodeGenFunction &CGF, bool AtCurrentPoint=false)
CharUnits getSizeAlign() const
llvm::CallInst * EmitRuntimeCall(llvm::Value *callee, const Twine &name="")
void startDefinition()
Starts the definition of this tag declaration.
bool isReferenceType() const
void functionFinished(CodeGenFunction &CGF) override
Cleans up references to the objects in finished function.
OpenMPDirectiveKind getDirectiveKind() const
SourceLocation getBeginLoc() const LLVM_READONLY
static bool hasNestedLightweightDirective(ASTContext &Ctx, const OMPExecutableDirective &D)
Check for inner (nested) lightweight runtime construct, if any.
void InitTempAlloca(Address Alloca, llvm::Value *Value)
InitTempAlloca - Provide an initial value for the given alloca which will be observable at all locati...
This is a common base class for loop directives ('omp simd', 'omp for', 'omp for simd' etc...
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
OpenMPDistScheduleClauseKind
OpenMP attributes for 'dist_schedule' clause.
bool EvaluateAsBooleanCondition(bool &Result, const ASTContext &Ctx) const
EvaluateAsBooleanCondition - Return true if this is a constant which we can fold and convert to a boo...
Address CreateElementBitCast(Address Addr, llvm::Type *Ty, const llvm::Twine &Name="")
Cast the element type of the given address to a different type, preserving information like the align...
CharUnits - This is an opaque type for sizes expressed in character units.
bool isOpenMPTeamsDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a teams-kind directive.
CharUnits getAlignment() const
Return the alignment of this pointer.
llvm::PointerType * VoidPtrTy
Expr * getIterationVariable() const
virtual void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc) override
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)...
void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc) override
This function ought to emit, in the general case, a call to.
ModeFlagsTy
Enum for accesseing the reserved_2 field of the ident_t struct.
bool isCXXThisExprCaptured() const
bool isConstexpr() const
Whether this variable is (C++11) constexpr.
CharUnits getDeclAlign(const Decl *D, bool ForAlignof=false) const
Return a conservative estimate of the alignment of the specified decl D.
static llvm::Value * getNVPTXWarpID(CodeGenFunction &CGF)
Get the id of the warp in the block.
Scope - A scope is a transient data structure that is used while parsing the program.
llvm::PointerType * VoidPtrPtrTy
static CGOpenMPRuntimeNVPTX::DataSharingMode getDataSharingMode(CodeGenModule &CGM)
bool isOpenMPTargetExecutionDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target code offload directive.
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, ArrayRef< llvm::Value *> CapturedVars, const Expr *IfCond) override
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
void addCompilerUsedGlobal(llvm::GlobalValue *GV)
Add a global to a list to be added to the llvm.compiler.used metadata.
llvm::Value * emitReductionFunction(CodeGenModule &CGM, SourceLocation Loc, llvm::Type *ArgsType, ArrayRef< const Expr *> Privates, ArrayRef< const Expr *> LHSExprs, ArrayRef< const Expr *> RHSExprs, ArrayRef< const Expr *> ReductionOps)
Emits reduction function.
This represents clause 'reduction' in the '#pragma omp ...' directives.
llvm::Value * emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, unsigned Flags=0)
Emits object of ident_t type with info for source location.
bool isOpenMPWorksharingDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a worksharing directive.
A C++ lambda expression, which produces a function object (of unspecified type) that can be invoked l...
virtual llvm::Value * emitTeamsOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP teams directive D.
CharUnits getPointerAlign() const
static IntegerLiteral * Create(const ASTContext &C, const llvm::APInt &V, QualType type, SourceLocation l)
Returns a new integer literal with value 'V' and type 'type'.
bool isInitCapture(const LambdaCapture *Capture) const
Determine whether one of this lambda's captures is an init-capture.
static llvm::Value * createRuntimeShuffleFunction(CodeGenFunction &CGF, llvm::Value *Elem, QualType ElemType, llvm::Value *Offset, SourceLocation Loc)
This function creates calls to one of two shuffle functions to copy variables between lanes in a warp...
virtual Decl * getCanonicalDecl()
Retrieves the "canonical" declaration of the given declaration.
LValue EmitLValueForField(LValue Base, const FieldDecl *Field)
llvm::Constant * CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeList ExtraAttrs=llvm::AttributeList(), bool Local=false)
Create a new runtime function with the specified type and name.
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
static ImplicitParamDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, ImplicitParamKind ParamKind)
Create implicit parameter.
Unknown execution mode (orphaned directive).
static CharUnits One()
One - Construct a CharUnits quantity of one.
CXXRecordDecl * getAsCXXRecordDecl() const
Retrieves the CXXRecordDecl that this type refers to, either because the type is a RecordType or beca...
ASTContext & getContext() const
llvm::CallInst * EmitNounwindRuntimeCall(llvm::Value *callee, const Twine &name="")
Describes the capture of either a variable, or 'this', or variable-length array type.
bool isOpenMPPrivate(OpenMPClauseKind Kind)
Checks if the specified clause is one of private clauses like 'private', 'firstprivate', 'reduction' etc.
void setAddress(Address address)
static void getDistributeLastprivateVars(ASTContext &Ctx, const OMPExecutableDirective &D, llvm::SmallVectorImpl< const ValueDecl *> &Vars)
Get list of lastprivate variables from the teams distribute ...
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
TypeSourceInfo * getTrivialTypeSourceInfo(QualType T, SourceLocation Loc=SourceLocation()) const
Allocate a TypeSourceInfo where all locations have been initialized to a given location, which defaults to the empty location.
Address CreateDefaultAlignTempAlloca(llvm::Type *Ty, const Twine &Name="tmp")
CreateDefaultAlignedTempAlloca - This creates an alloca with the default ABI alignment of the given L...
This represents '#pragma omp requires...' directive.
bool HasSideEffects(const ASTContext &Ctx, bool IncludePossibleEffects=true) const
HasSideEffects - This routine returns true for all those expressions which have any effect other than...
const Stmt * getAssociatedStmt() const
Returns statement associated with the directive.
virtual bool initFeatureMap(llvm::StringMap< bool > &Features, DiagnosticsEngine &Diags, StringRef CPU, const std::vector< std::string > &FeatureVec) const
Initialize the map with the default set of target features for the CPU this should include all legal ...
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
This represents one expression.
virtual llvm::Value * emitParallelOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP parallel directive D.
Enters a new scope for capturing cleanups, all of which will be executed once the scope is exited...
Address getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD) override
Gets the OpenMP-specific address of the local variable.
Stmt * IgnoreContainers(bool IgnoreCaptured=false)
Skip no-op (attributed, compound) container stmts and skip captured stmt at the top, if IgnoreCaptured is true.
bool isOpenMPParallelDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a parallel-kind directive.
const CGFunctionInfo & arrangeNullaryFunction()
A nullary function is a freestanding function of type 'void ()'.
BlockExpr - Adaptor class for mixing a BlockDecl with expressions.
VlaSizePair getVLASize(const VariableArrayType *vla)
Returns an LLVM value that corresponds to the size, in non-variably-sized elements, of a variable length array type, plus that largest non-variably-sized element type.
void getDefaultScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const override
Choose a default value for the schedule clause.
llvm::PointerType * getType() const
Return the type of the pointer value.
CharUnits getTypeAlignInChars(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in characters.
DeclContext * getDeclContext()
static llvm::iterator_range< specific_clause_iterator< SpecificClause > > getClausesOfKind(ArrayRef< OMPClause *> Clauses)
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
This represents 'ordered' clause in the '#pragma omp ...' directive.
llvm::IntegerType * Int32Ty
QualType getConstantArrayType(QualType EltTy, const llvm::APInt &ArySize, ArrayType::ArraySizeModifier ASM, unsigned IndexTypeQuals) const
Return the unique reference to the type for a constant array of the specified element type...
QualType getRecordType(const RecordDecl *Decl) const
UnaryOperator - This represents the unary-expression's (except sizeof and alignof), the postinc/postdec operators from postfix-expression, and various extensions.
MachineConfiguration
GPU Configuration: This information can be derived from cuda registers, however, providing compile ti...
llvm::Value * EmitCastToVoidPtr(llvm::Value *value)
Emit a cast to void* in the appropriate address space.
Allow UB that we can give a value, but not arbitrary unmodeled side effects.
const TargetInfo & getTarget() const
const LangOptions & getLangOpts() const
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
ASTContext & getContext() const
OpenMPProcBindClauseKind
OpenMP attributes for 'proc_bind' clause.
Non-SPMD execution mode (1 master thread, others are workers).
llvm::Value * ScratchpadWidth
virtual void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr)
Emits a critical region.
VarDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
GlobalDecl - represents a global declaration.
bool hasClausesOfKind() const
Returns true if the current directive has one or more clauses of a specific kind. ...
std::string CPU
If given, the name of the target CPU to generate code for.
The l-value was considered opaque, so the alignment was determined from a type.
llvm::Value * emitTeamsOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits inlined function for the specified OpenMP teams.
Address CreateBitCast(Address Addr, llvm::Type *Ty, const llvm::Twine &Name="")
void emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, ArrayRef< llvm::Value *> Args=llvm::None) const override
Emits call of the outlined function with the provided arguments, translating these arguments to corre...
This captures a statement into a function.
QualType getCanonicalType() const
void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Value *OutlinedFn, ArrayRef< llvm::Value *> CapturedVars) override
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
static unsigned getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind)
Returns default flags for the barriers depending on the directive, for which this barier is going to ...
void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, const RegionCodeGenTy &ThenGen, const RegionCodeGenTy &ElseGen)
Emits code for OpenMP 'if' clause using specified CodeGen function.
Encodes a location in the source.
static llvm::Value * getThreadLimit(CodeGenFunction &CGF, bool IsInSPMDExecutionMode=false)
Get the value of the thread_limit clause in the teams directive.
llvm::Type * getIdentTyPointerTy()
Returns pointer to ident_t type.
QualType getUIntPtrType() const
Return a type compatible with "uintptr_t" (C99 7.18.1.4), as defined by the target.
Expr * getSubExpr() const
bool isVariablyModifiedType() const
Whether this type is a variably-modified type (C99 6.7.5).
void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr) override
Emits a critical region.
This is a basic class for representing single OpenMP executable directive.
CastKind getCastKind() const
This represents 'schedule' clause in the '#pragma omp ...' directive.
llvm::IntegerType * Int16Ty
DeclStmt - Adaptor class for mixing declarations with statements and expressions. ...
OpenMPDirectiveKind
OpenMP directives.
This file defines OpenMP nodes for declarative directives.
std::vector< std::string > Features
The list of target specific features to enable or disable – this should be a list of strings startin...
This is a basic class for representing single OpenMP clause.
bool isOpenMPLoopBoundSharingDirective(OpenMPDirectiveKind Kind)
Checks if the specified directive kind is one of the composite or combined directives that need loop ...
static void setPropertyExecutionMode(CodeGenModule &CGM, StringRef Name, bool Mode)
void StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, const CGFunctionInfo &FnInfo, const FunctionArgList &Args, SourceLocation Loc=SourceLocation(), SourceLocation StartLoc=SourceLocation())
Emit code for the start of a function.
ImplicitCastExpr - Allows us to explicitly represent implicit type conversions, which have no direct ...
Stmt * getCapturedStmt()
Retrieve the statement being captured.
bool isLValue() const
isLValue - True if this expression is an "l-value" according to the rules of the current language...
llvm::Value * getCriticalRegionLock(StringRef CriticalName)
Returns corresponding lock object for the specified critical region name.
virtual void emitProcBindClause(CodeGenFunction &CGF, OpenMPProcBindClauseKind ProcBind, SourceLocation Loc)
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, int proc_bind) to generat...
void Error(SourceLocation loc, StringRef error)
Emit a general error that something can't be done.
virtual void functionFinished(CodeGenFunction &CGF)
Cleans up references to the objects in finished function.
const VarDecl * translateParameter(const FieldDecl *FD, const VarDecl *NativeParam) const override
Translates the native parameter of outlined function if this is required for target.
void FinishFunction(SourceLocation EndLoc=SourceLocation())
FinishFunction - Complete IR generation of the current function.
FunctionArgList - Type for representing both the decl and type of parameters to a function...
static CudaArch getCudaArch(CodeGenModule &CGM)
void setAction(PrePostActionTy &Action) const
CGFunctionInfo - Class to encapsulate the information about a function definition.
This class organizes the cross-function state that is used while generating LLVM code.
CGOpenMPRuntime & getOpenMPRuntime()
Return a reference to the configured OpenMP runtime.
static ParmVarDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, StorageClass S, Expr *DefArg)
Dataflow Directional Tag Classes.
Class provides a way to call simple version of codegen for OpenMP region, or an advanced with possibl...
LValue EmitLoadOfReferenceLValue(LValue RefLVal)
A qualifier set is used to build a set of qualifiers.
DeclContext - This is used only as base class of specific decl types that can act as declaration cont...
ArrayRef< Capture > captures() const
static bool isTrivial(ASTContext &Ctx, const Expr *E)
Checks if the expression is constant or does not have non-trivial function calls. ...
A basic class for pre|post-action for advanced codegen sequence for OpenMP region.
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
static void emitReductionListCopy(CopyAction Action, CodeGenFunction &CGF, QualType ReductionArrayTy, ArrayRef< const Expr *> Privates, Address SrcBase, Address DestBase, CopyOptionsTy CopyOptions={nullptr, nullptr, nullptr})
Emit instructions to copy a Reduce list, which contains partially aggregated values, in the specified direction.
const Type * strip(QualType type)
Collect any qualifiers on the given type and return an unqualified type.
Address CreateConstInBoundsGEP(Address Addr, uint64_t Index, CharUnits EltSize, const llvm::Twine &Name="")
Given addr = T* ...
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
bool isInitCapture() const
Whether this variable is the implicit variable for a lambda init-capture.
llvm::Module & getModule() const
QualType apply(const ASTContext &Context, QualType QT) const
Apply the collected qualifiers to the given type.
LValue MakeAddrLValue(Address Addr, QualType T, AlignmentSource Source=AlignmentSource::Type)
virtual bool hasFeature(StringRef Feature) const
Determine whether the given target has the given feature.
Expr * IgnoreParenImpCasts() LLVM_READONLY
IgnoreParenImpCasts - Ignore parentheses and implicit casts.
virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc)
Emits address of the word in a memory where current thread id is stored.
void checkArchForUnifiedAddressing(CodeGenModule &CGM, const OMPRequiresDecl *D) const override
Perform check on requires decl to ensure that target architecture supports unified addressing...
void getOpenMPCaptureRegions(llvm::SmallVectorImpl< OpenMPDirectiveKind > &CaptureRegions, OpenMPDirectiveKind DKind)
Return the captured regions of an OpenMP directive.
bool isOpenMPDistributeDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a distribute directive.
llvm::Constant * createNVPTXRuntimeFunction(unsigned Function)
Returns specified OpenMP runtime function for the current OpenMP implementation.
virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr *> Privates, ArrayRef< const Expr *> LHSExprs, ArrayRef< const Expr *> RHSExprs, ArrayRef< const Expr *> ReductionOps, ReductionOptionsTy Options) override
Emit a code for reduction clause.
This file defines OpenMP AST classes for executable directives and clauses.
Address CreateConstArrayGEP(Address Addr, uint64_t Index, CharUnits EltSize, const llvm::Twine &Name="")
Given addr = [n x T]* ...
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
llvm::Type * getElementType() const
Return the type of the values stored in this address.
llvm::PointerType * Int8PtrTy
OpenMPScheduleClauseKind
OpenMP attributes for 'schedule' clause.
void SetInternalFunctionAttributes(GlobalDecl GD, llvm::Function *F, const CGFunctionInfo &FI)
Set the attributes on the LLVM function for the given decl and function info.
static bool supportsSPMDExecutionMode(ASTContext &Ctx, const OMPExecutableDirective &D)
Internal linkage, which indicates that the entity can be referred to from within the translation unit...
void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)
EmitBlock - Emit the given block.
void addDecl(Decl *D)
Add the declaration D into this context.
bool hasAssociatedStmt() const
Returns true if directive has associated statement.
ExecutionMode
Defines the execution mode.
void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) override
Emits OpenMP-specific function prolog.
bool isLValueReferenceType() const
static void shuffleAndStore(CodeGenFunction &CGF, Address SrcAddr, Address DestAddr, QualType ElemType, llvm::Value *Offset, SourceLocation Loc)
static llvm::Value * emitShuffleAndReduceFunction(CodeGenModule &CGM, ArrayRef< const Expr *> Privates, QualType ReductionArrayTy, llvm::Value *ReduceFn, SourceLocation Loc)
Emit a helper that reduces data across two OpenMP threads (lanes) in the same warp.
CapturedDecl * getCapturedDecl()
Retrieve the outlined function declaration.
Generic data-sharing mode.
int64_t toBits(CharUnits CharSize) const
Convert a size in characters to a size in bits.
virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr *> Privates, ArrayRef< const Expr *> LHSExprs, ArrayRef< const Expr *> RHSExprs, ArrayRef< const Expr *> ReductionOps, ReductionOptionsTy Options)
Emit a code for reduction clause.
bool hasSignedIntegerRepresentation() const
Determine whether this type has an signed integer representation of some sort, e.g., it is an signed integer type or a vector.
void EmitBranch(llvm::BasicBlock *Block)
EmitBranch - Emit a branch to the specified basic block from the current insert block, taking care to avoid creation of branches from dummy blocks.
Privates[]
Gets the list of initial values for linear variables.
virtual void emitProcBindClause(CodeGenFunction &CGF, OpenMPProcBindClauseKind ProcBind, SourceLocation Loc) override
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, int proc_bind) to generat...
LValue EmitLValue(const Expr *E)
EmitLValue - Emit code to compute a designator that specifies the location of the expression...
QualType getPointerType(QualType T) const
Return the uniqued reference to the type for a pointer to the specified type.
capture_range captures() const
Retrieve this lambda's captures.
CapturedRegionKind getKind() const
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
void getDefaultDistScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPDistScheduleClauseKind &ScheduleKind, llvm::Value *&Chunk) const override
Choose a default value for the dist_schedule clause.
static llvm::Value * getNVPTXThreadID(CodeGenFunction &CGF)
Get the id of the current thread on the GPU.
CGCapturedStmtInfo * CapturedStmtInfo
llvm::Value * EmitScalarConversion(llvm::Value *Src, QualType SrcTy, QualType DstTy, SourceLocation Loc)
Emit a conversion from the specified type to the specified destination type, both of which are LLVM s...
const VariableArrayType * getAsVariableArrayType(QualType T) const
static llvm::Value * getNVPTXWarpSize(CodeGenFunction &CGF)
Get the GPU warp size.
__DEVICE__ int max(int __a, int __b)
llvm::Value * RemoteLaneOffset
bool isEvaluatable(const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects) const
isEvaluatable - Call EvaluateAsRValue to see if this expression can be constant folded without side-e...
void EmitAggregateCopy(LValue Dest, LValue Src, QualType EltTy, AggValueSlot::Overlap_t MayOverlap, bool isVolatile=false)
EmitAggregateCopy - Emit an aggregate copy.
A reference to a declared variable, function, enum, etc.
CGOpenMPRuntimeNVPTX(CodeGenModule &CGM)
void addAddressSpace(LangAS space)
static llvm::Value * emitInterWarpCopyFunction(CodeGenModule &CGM, ArrayRef< const Expr *> Privates, QualType ReductionArrayTy, SourceLocation Loc)
This function emits a helper that gathers Reduce lists from the first lane of every active warp to la...
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
static ApplyDebugLocation CreateEmpty(CodeGenFunction &CGF)
Set the IRBuilder to not attach debug locations.
bool isOpenMPLoopDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a directive with an associated loop construct.
LValue - This represents an lvalue references.
Information for lazily generating a cleanup.
virtual void getDefaultDistScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPDistScheduleClauseKind &ScheduleKind, llvm::Value *&Chunk) const
Choose default schedule type and chunk value for the dist_schedule clause.
void setAccess(AccessSpecifier AS)
unsigned getTargetAddressSpace(QualType T) const
void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false) override
Emit an implicit/explicit barrier for OpenMP threads.
static FieldDecl * Create(const ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, Expr *BW, bool Mutable, InClassInitStyle InitStyle)
Address CreatePointerBitCastOrAddrSpaceCast(Address Addr, llvm::Type *Ty, const llvm::Twine &Name="")
static llvm::Value * getNVPTXNumThreads(CodeGenFunction &CGF)
Get the maximum number of threads in a block of the GPU.
llvm::Value * getPointer() const
virtual void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc)
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)...
Attr - This represents one attribute.
SourceLocation getLocation() const
QualType getIntTypeForBitwidth(unsigned DestWidth, unsigned Signed) const
getIntTypeForBitwidth - sets integer QualTy according to specified details: bitwidth, signed/unsigned.
Expr * IgnoreParens() LLVM_READONLY
IgnoreParens - Ignore parentheses.
static OMPLinearClause * Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc, OpenMPLinearClauseKind Modifier, SourceLocation ModifierLoc, SourceLocation ColonLoc, SourceLocation EndLoc, ArrayRef< Expr *> VL, ArrayRef< Expr *> PL, ArrayRef< Expr *> IL, Expr *Step, Expr *CalcStep, Stmt *PreInit, Expr *PostUpdate)
Creates clause with a list of variables VL and a linear step Step.
CanQualType getSizeType() const
Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.
llvm::FunctionType * GetFunctionType(const CGFunctionInfo &Info)
GetFunctionType - Get the LLVM function type for.
static bool supportsLightweightRuntime(ASTContext &Ctx, const OMPExecutableDirective &D)
Checks if the construct supports lightweight runtime.