21 #include "llvm/ADT/SmallPtrSet.h" 22 #include "llvm/IR/IntrinsicsNVPTX.h" 24 using namespace clang;
25 using namespace CodeGen;
32 OMPRTL_NVPTX__kmpc_kernel_init,
34 OMPRTL_NVPTX__kmpc_kernel_deinit,
37 OMPRTL_NVPTX__kmpc_spmd_kernel_init,
39 OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2,
43 OMPRTL_NVPTX__kmpc_kernel_prepare_parallel,
46 OMPRTL_NVPTX__kmpc_kernel_parallel,
48 OMPRTL_NVPTX__kmpc_kernel_end_parallel,
51 OMPRTL_NVPTX__kmpc_serialized_parallel,
54 OMPRTL_NVPTX__kmpc_end_serialized_parallel,
57 OMPRTL_NVPTX__kmpc_shuffle_int32,
60 OMPRTL_NVPTX__kmpc_shuffle_int64,
66 OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2,
78 OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2,
80 OMPRTL_NVPTX__kmpc_end_reduce_nowait,
82 OMPRTL_NVPTX__kmpc_data_sharing_init_stack,
84 OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd,
87 OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack,
89 OMPRTL_NVPTX__kmpc_data_sharing_pop_stack,
92 OMPRTL_NVPTX__kmpc_begin_sharing_variables,
94 OMPRTL_NVPTX__kmpc_end_sharing_variables,
96 OMPRTL_NVPTX__kmpc_get_shared_variables,
99 OMPRTL_NVPTX__kmpc_parallel_level,
101 OMPRTL_NVPTX__kmpc_is_spmd_exec_mode,
104 OMPRTL_NVPTX__kmpc_get_team_static_memory,
107 OMPRTL_NVPTX__kmpc_restore_team_static_memory,
112 OMPRTL__kmpc_barrier_simple_spmd,
114 OMPRTL_NVPTX__kmpc_warp_active_thread_mask,
116 OMPRTL_NVPTX__kmpc_syncwarp,
121 llvm::FunctionCallee EnterCallee =
nullptr;
123 llvm::FunctionCallee ExitCallee =
nullptr;
126 llvm::BasicBlock *ContBlock =
nullptr;
129 NVPTXActionTy(llvm::FunctionCallee EnterCallee,
131 llvm::FunctionCallee ExitCallee,
133 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
142 CGF.
Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
160 class ExecutionRuntimeModesRAII {
165 bool SavedRuntimeMode =
false;
166 bool *RuntimeMode =
nullptr;
171 : ExecMode(ExecMode) {
172 SavedExecMode = ExecMode;
177 bool &RuntimeMode,
bool FullRuntimeMode)
178 : ExecMode(ExecMode), RuntimeMode(&RuntimeMode) {
179 SavedExecMode = ExecMode;
180 SavedRuntimeMode = RuntimeMode;
182 RuntimeMode = FullRuntimeMode;
184 ~ExecutionRuntimeModesRAII() {
185 ExecMode = SavedExecMode;
187 *RuntimeMode = SavedRuntimeMode;
200 LaneIDMask = WarpSize - 1,
203 GlobalMemoryAlignment = 128,
206 SharedMemorySize = 128,
211 if (
const auto *ASE = dyn_cast<ArraySubscriptExpr>(RefExpr)) {
212 const Expr *
Base = ASE->getBase()->IgnoreParenImpCasts();
213 while (
const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
216 }
else if (
auto *OASE = dyn_cast<OMPArraySectionExpr>(RefExpr)) {
217 const Expr *
Base = OASE->getBase()->IgnoreParenImpCasts();
218 while (
const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
220 while (
const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
225 if (
const auto *DE = dyn_cast<DeclRefExpr>(RefExpr))
226 return cast<ValueDecl>(DE->getDecl()->getCanonicalDecl());
227 const auto *ME = cast<MemberExpr>(RefExpr);
228 return cast<ValueDecl>(ME->getMemberDecl()->getCanonicalDecl());
232 static RecordDecl *buildRecordForGlobalizedVars(
235 llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
236 &MappedDeclsFields,
int BufSize) {
238 if (EscapedDecls.empty() && EscapedDeclsForTeams.empty())
241 for (
const ValueDecl *D : EscapedDecls)
242 GlobalizedVars.emplace_back(
247 for (
const ValueDecl *D : EscapedDeclsForTeams)
249 llvm::stable_sort(GlobalizedVars, [](VarsDataTy L, VarsDataTy R) {
250 return L.first > R.first;
260 llvm::SmallPtrSet<const ValueDecl *, 16> SingleEscaped(
261 EscapedDeclsForTeams.begin(), EscapedDeclsForTeams.end());
262 for (
const auto &Pair : GlobalizedVars) {
263 const ValueDecl *VD = Pair.second;
271 if (SingleEscaped.count(VD)) {
296 GlobalMemoryAlignment)));
297 Field->
addAttr(AlignedAttr::CreateImplicit(
304 GlobalizedRD->addDecl(Field);
305 MappedDeclsFields.try_emplace(VD, Field);
307 GlobalizedRD->completeDefinition();
312 class CheckVarsEscapingDeclContext final
315 llvm::SetVector<const ValueDecl *> EscapedDecls;
316 llvm::SetVector<const ValueDecl *> EscapedVariableLengthDecls;
317 llvm::SmallPtrSet<const Decl *, 4> EscapedParameters;
319 llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields;
320 bool AllEscaped =
false;
321 bool IsForCombinedParallelRegion =
false;
323 void markAsEscaped(
const ValueDecl *VD) {
325 if (!isa<VarDecl>(VD) ||
326 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD))
333 if (
auto *CSI = CGF.CapturedStmtInfo) {
334 if (
const FieldDecl *FD = CSI->lookup(cast<VarDecl>(VD))) {
337 if (!IsForCombinedParallelRegion) {
340 const auto *
Attr = FD->getAttr<OMPCaptureKindAttr>();
343 if (((
Attr->getCaptureKind() != OMPC_map) &&
345 static_cast<OpenMPClauseKind>(
Attr->getCaptureKind()))) ||
346 ((
Attr->getCaptureKind() == OMPC_map) &&
347 !FD->getType()->isAnyPointerType()))
350 if (!FD->getType()->isReferenceType()) {
352 "Parameter captured by value with variably modified type");
353 EscapedParameters.insert(VD);
354 }
else if (!IsForCombinedParallelRegion) {
359 if ((!CGF.CapturedStmtInfo ||
360 (IsForCombinedParallelRegion && CGF.CapturedStmtInfo)) &&
365 EscapedVariableLengthDecls.insert(VD);
367 EscapedDecls.insert(VD);
370 void VisitValueDecl(
const ValueDecl *VD) {
373 if (
const auto *VarD = dyn_cast<VarDecl>(VD)) {
374 if (!isa<ParmVarDecl>(VarD) && VarD->hasInit()) {
375 const bool SavedAllEscaped = AllEscaped;
377 Visit(VarD->getInit());
378 AllEscaped = SavedAllEscaped;
384 bool IsCombinedParallelRegion) {
388 if (C.capturesVariable() && !C.capturesVariableByCopy()) {
389 const ValueDecl *VD = C.getCapturedVar();
390 bool SavedIsForCombinedParallelRegion = IsForCombinedParallelRegion;
391 if (IsCombinedParallelRegion) {
395 IsForCombinedParallelRegion =
false;
398 C->getClauseKind() == OMPC_reduction ||
399 C->getClauseKind() == OMPC_linear ||
400 C->getClauseKind() == OMPC_private)
403 if (
const auto *PC = dyn_cast<OMPFirstprivateClause>(C))
404 Vars = PC->getVarRefs();
405 else if (
const auto *PC = dyn_cast<OMPLastprivateClause>(C))
406 Vars = PC->getVarRefs();
408 llvm_unreachable(
"Unexpected clause.");
409 for (
const auto *E : Vars) {
413 IsForCombinedParallelRegion =
true;
417 if (IsForCombinedParallelRegion)
422 if (isa<OMPCapturedExprDecl>(VD))
424 IsForCombinedParallelRegion = SavedIsForCombinedParallelRegion;
429 void buildRecordForGlobalizedVars(
bool IsInTTDRegion) {
430 assert(!GlobalizedRD &&
431 "Record for globalized variables is built already.");
434 EscapedDeclsForTeams = EscapedDecls.getArrayRef();
436 EscapedDeclsForParallel = EscapedDecls.getArrayRef();
437 GlobalizedRD = ::buildRecordForGlobalizedVars(
438 CGF.getContext(), EscapedDeclsForParallel, EscapedDeclsForTeams,
439 MappedDeclsFields, WarpSize);
445 : CGF(CGF), EscapedDecls(TeamsReductions.begin(), TeamsReductions.end()) {
447 virtual ~CheckVarsEscapingDeclContext() =
default;
448 void VisitDeclStmt(
const DeclStmt *S) {
452 if (
const auto *VD = dyn_cast_or_null<ValueDecl>(D))
466 if (CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown) {
467 VisitStmt(S->getCapturedStmt());
470 VisitOpenMPCapturedStmt(
472 CaptureRegions.back() == OMPD_parallel &&
480 if (C.capturesVariable() && !C.capturesVariableByCopy()) {
481 const ValueDecl *VD = C.getCapturedVar();
483 if (isa<OMPCapturedExprDecl>(VD))
492 if (C.capturesVariable()) {
494 const ValueDecl *VD = C.getCapturedVar();
502 void VisitBlockExpr(
const BlockExpr *E) {
507 const VarDecl *VD = C.getVariable();
514 void VisitCallExpr(
const CallExpr *E) {
520 if (Arg->isLValue()) {
521 const bool SavedAllEscaped = AllEscaped;
524 AllEscaped = SavedAllEscaped;
537 if (isa<OMPCapturedExprDecl>(VD))
539 else if (
const auto *VarD = dyn_cast<VarDecl>(VD))
540 if (VarD->isInitCapture())
547 const bool SavedAllEscaped = AllEscaped;
550 AllEscaped = SavedAllEscaped;
559 const bool SavedAllEscaped = AllEscaped;
562 AllEscaped = SavedAllEscaped;
567 void VisitExpr(
const Expr *E) {
570 bool SavedAllEscaped = AllEscaped;
576 AllEscaped = SavedAllEscaped;
578 void VisitStmt(
const Stmt *S) {
588 const RecordDecl *getGlobalizedRecord(
bool IsInTTDRegion) {
590 buildRecordForGlobalizedVars(IsInTTDRegion);
596 assert(GlobalizedRD &&
597 "Record for globalized variables must be generated already.");
598 auto I = MappedDeclsFields.find(VD);
599 if (I == MappedDeclsFields.end())
601 return I->getSecond();
606 return EscapedDecls.getArrayRef();
611 const llvm::SmallPtrSetImpl<const Decl *> &getEscapedParameters()
const {
612 return EscapedParameters;
618 return EscapedVariableLengthDecls.getArrayRef();
626 llvm::Intrinsic::getDeclaration(
627 &CGF.
CGM.
getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_warpsize),
634 llvm::Intrinsic::getDeclaration(
635 &CGF.
CGM.
getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x),
659 llvm::Intrinsic::getDeclaration(
660 &CGF.
CGM.
getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x),
661 "nvptx_num_threads");
670 bool IsInSPMDExecutionMode =
false) {
672 return IsInSPMDExecutionMode
692 return Bld.CreateAnd(Bld.CreateNUWSub(NumThreads, Bld.getInt32(1)),
693 Bld.CreateNot(Mask),
"master_tid");
696 CGOpenMPRuntimeNVPTX::WorkerFunctionState::WorkerFunctionState(
700 createWorkerFunction(CGM);
703 void CGOpenMPRuntimeNVPTX::WorkerFunctionState::createWorkerFunction(
711 WorkerFn->setDoesNotRecurse();
715 CGOpenMPRuntimeNVPTX::getExecutionMode()
const {
716 return CurrentExecutionMode;
733 if (
const auto *NestedDir =
734 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
740 if (DKind == OMPD_teams) {
741 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
746 if (
const auto *NND =
747 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
748 DKind = NND->getDirectiveKind();
754 case OMPD_target_teams:
756 case OMPD_target_simd:
757 case OMPD_target_parallel:
758 case OMPD_target_parallel_for:
759 case OMPD_target_parallel_for_simd:
760 case OMPD_target_teams_distribute:
761 case OMPD_target_teams_distribute_simd:
762 case OMPD_target_teams_distribute_parallel_for:
763 case OMPD_target_teams_distribute_parallel_for_simd:
766 case OMPD_parallel_for:
767 case OMPD_parallel_master:
768 case OMPD_parallel_sections:
770 case OMPD_parallel_for_simd:
772 case OMPD_cancellation_point:
774 case OMPD_threadprivate:
790 case OMPD_target_data:
791 case OMPD_target_exit_data:
792 case OMPD_target_enter_data:
793 case OMPD_distribute:
794 case OMPD_distribute_simd:
795 case OMPD_distribute_parallel_for:
796 case OMPD_distribute_parallel_for_simd:
797 case OMPD_teams_distribute:
798 case OMPD_teams_distribute_simd:
799 case OMPD_teams_distribute_parallel_for:
800 case OMPD_teams_distribute_parallel_for_simd:
801 case OMPD_target_update:
802 case OMPD_declare_simd:
803 case OMPD_declare_variant:
804 case OMPD_declare_target:
805 case OMPD_end_declare_target:
806 case OMPD_declare_reduction:
807 case OMPD_declare_mapper:
809 case OMPD_taskloop_simd:
810 case OMPD_master_taskloop:
811 case OMPD_master_taskloop_simd:
812 case OMPD_parallel_master_taskloop:
813 case OMPD_parallel_master_taskloop_simd:
816 llvm_unreachable(
"Unexpected directive.");
826 switch (DirectiveKind) {
828 case OMPD_target_teams:
830 case OMPD_target_parallel:
831 case OMPD_target_parallel_for:
832 case OMPD_target_parallel_for_simd:
833 case OMPD_target_teams_distribute_parallel_for:
834 case OMPD_target_teams_distribute_parallel_for_simd:
835 case OMPD_target_simd:
836 case OMPD_target_teams_distribute_simd:
838 case OMPD_target_teams_distribute:
842 case OMPD_parallel_for:
843 case OMPD_parallel_master:
844 case OMPD_parallel_sections:
846 case OMPD_parallel_for_simd:
848 case OMPD_cancellation_point:
850 case OMPD_threadprivate:
866 case OMPD_target_data:
867 case OMPD_target_exit_data:
868 case OMPD_target_enter_data:
869 case OMPD_distribute:
870 case OMPD_distribute_simd:
871 case OMPD_distribute_parallel_for:
872 case OMPD_distribute_parallel_for_simd:
873 case OMPD_teams_distribute:
874 case OMPD_teams_distribute_simd:
875 case OMPD_teams_distribute_parallel_for:
876 case OMPD_teams_distribute_parallel_for_simd:
877 case OMPD_target_update:
878 case OMPD_declare_simd:
879 case OMPD_declare_variant:
880 case OMPD_declare_target:
881 case OMPD_end_declare_target:
882 case OMPD_declare_reduction:
883 case OMPD_declare_mapper:
885 case OMPD_taskloop_simd:
886 case OMPD_master_taskloop:
887 case OMPD_master_taskloop_simd:
888 case OMPD_parallel_master_taskloop:
889 case OMPD_parallel_master_taskloop_simd:
895 "Unknown programming model for OpenMP directive on NVPTX target.");
903 "Expected loop-based directive.");
908 return C->getScheduleKind() == OMPC_SCHEDULE_static;
921 if (
const auto *NestedDir =
922 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
930 if (DKind == OMPD_teams_distribute_simd || DKind == OMPD_simd)
932 if (DKind == OMPD_parallel) {
933 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
938 if (
const auto *NND =
939 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
940 DKind = NND->getDirectiveKind();
945 }
else if (DKind == OMPD_teams) {
946 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
951 if (
const auto *NND =
952 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
953 DKind = NND->getDirectiveKind();
958 if (DKind == OMPD_parallel) {
959 Body = NND->getInnermostCapturedStmt()->IgnoreContainers(
964 if (
const auto *NND =
965 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
966 DKind = NND->getDirectiveKind();
975 case OMPD_target_teams:
980 if (DKind == OMPD_distribute_simd || DKind == OMPD_simd)
982 if (DKind == OMPD_parallel) {
983 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
988 if (
const auto *NND =
989 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
990 DKind = NND->getDirectiveKind();
997 case OMPD_target_parallel:
998 if (DKind == OMPD_simd)
1002 case OMPD_target_teams_distribute:
1003 case OMPD_target_simd:
1004 case OMPD_target_parallel_for:
1005 case OMPD_target_parallel_for_simd:
1006 case OMPD_target_teams_distribute_simd:
1007 case OMPD_target_teams_distribute_parallel_for:
1008 case OMPD_target_teams_distribute_parallel_for_simd:
1011 case OMPD_parallel_for:
1012 case OMPD_parallel_master:
1013 case OMPD_parallel_sections:
1015 case OMPD_parallel_for_simd:
1017 case OMPD_cancellation_point:
1019 case OMPD_threadprivate:
1028 case OMPD_taskyield:
1031 case OMPD_taskgroup:
1035 case OMPD_target_data:
1036 case OMPD_target_exit_data:
1037 case OMPD_target_enter_data:
1038 case OMPD_distribute:
1039 case OMPD_distribute_simd:
1040 case OMPD_distribute_parallel_for:
1041 case OMPD_distribute_parallel_for_simd:
1042 case OMPD_teams_distribute:
1043 case OMPD_teams_distribute_simd:
1044 case OMPD_teams_distribute_parallel_for:
1045 case OMPD_teams_distribute_parallel_for_simd:
1046 case OMPD_target_update:
1047 case OMPD_declare_simd:
1048 case OMPD_declare_variant:
1049 case OMPD_declare_target:
1050 case OMPD_end_declare_target:
1051 case OMPD_declare_reduction:
1052 case OMPD_declare_mapper:
1054 case OMPD_taskloop_simd:
1055 case OMPD_master_taskloop:
1056 case OMPD_master_taskloop_simd:
1057 case OMPD_parallel_master_taskloop:
1058 case OMPD_parallel_master_taskloop_simd:
1061 llvm_unreachable(
"Unexpected directive.");
1075 switch (DirectiveKind) {
1077 case OMPD_target_teams:
1078 case OMPD_target_parallel:
1080 case OMPD_target_parallel_for:
1081 case OMPD_target_parallel_for_simd:
1082 case OMPD_target_teams_distribute_parallel_for:
1083 case OMPD_target_teams_distribute_parallel_for_simd:
1086 case OMPD_target_simd:
1087 case OMPD_target_teams_distribute_simd:
1089 case OMPD_target_teams_distribute:
1093 case OMPD_parallel_for:
1094 case OMPD_parallel_master:
1095 case OMPD_parallel_sections:
1097 case OMPD_parallel_for_simd:
1099 case OMPD_cancellation_point:
1101 case OMPD_threadprivate:
1110 case OMPD_taskyield:
1113 case OMPD_taskgroup:
1117 case OMPD_target_data:
1118 case OMPD_target_exit_data:
1119 case OMPD_target_enter_data:
1120 case OMPD_distribute:
1121 case OMPD_distribute_simd:
1122 case OMPD_distribute_parallel_for:
1123 case OMPD_distribute_parallel_for_simd:
1124 case OMPD_teams_distribute:
1125 case OMPD_teams_distribute_simd:
1126 case OMPD_teams_distribute_parallel_for:
1127 case OMPD_teams_distribute_parallel_for_simd:
1128 case OMPD_target_update:
1129 case OMPD_declare_simd:
1130 case OMPD_declare_variant:
1131 case OMPD_declare_target:
1132 case OMPD_end_declare_target:
1133 case OMPD_declare_reduction:
1134 case OMPD_declare_mapper:
1136 case OMPD_taskloop_simd:
1137 case OMPD_master_taskloop:
1138 case OMPD_master_taskloop_simd:
1139 case OMPD_parallel_master_taskloop:
1140 case OMPD_parallel_master_taskloop_simd:
1146 "Unknown programming model for OpenMP directive on NVPTX target.");
1150 StringRef ParentName,
1151 llvm::Function *&OutlinedFn,
1152 llvm::Constant *&OutlinedFnID,
1153 bool IsOffloadEntry,
1155 ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode);
1156 EntryFunctionState EST;
1159 WrapperFunctionsMap.clear();
1163 CGOpenMPRuntimeNVPTX::EntryFunctionState &EST;
1164 CGOpenMPRuntimeNVPTX::WorkerFunctionState &WST;
1167 NVPTXPrePostActionTy(CGOpenMPRuntimeNVPTX::EntryFunctionState &EST,
1168 CGOpenMPRuntimeNVPTX::WorkerFunctionState &WST)
1169 : EST(EST), WST(WST) {}
1173 RT.emitNonSPMDEntryHeader(CGF, EST, WST);
1175 RT.setLocThreadIdInsertPt(CGF,
true);
1181 RT.emitNonSPMDEntryFooter(CGF, EST);
1185 IsInTTDRegion =
true;
1187 GlobalizedRecords.emplace_back();
1188 if (!KernelStaticGlobalized) {
1189 KernelStaticGlobalized =
new llvm::GlobalVariable(
1192 llvm::ConstantPointerNull::get(CGM.
VoidPtrTy),
1193 "_openmp_kernel_static_glob_rd$ptr",
nullptr,
1194 llvm::GlobalValue::NotThreadLocal,
1197 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
1198 IsOffloadEntry, CodeGen);
1199 IsInTTDRegion =
false;
1203 WST.WorkerFn->setName(Twine(OutlinedFn->getName(),
"_worker"));
1206 emitWorkerFunction(WST);
1210 void CGOpenMPRuntimeNVPTX::emitNonSPMDEntryHeader(
CodeGenFunction &CGF,
1211 EntryFunctionState &EST,
1212 WorkerFunctionState &WST) {
1222 Bld.CreateCondBr(IsWorker, WorkerBB, MasterCheckBB);
1225 emitCall(CGF, WST.Loc, WST.WorkerFn);
1231 Bld.CreateCondBr(IsMaster, MasterBB, EST.ExitBB);
1234 IsInTargetMasterThreadRegion =
true;
1242 createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_init), Args);
1246 createNVPTXRuntimeFunction(
1247 OMPRTL_NVPTX__kmpc_data_sharing_init_stack));
1249 emitGenericVarsProlog(CGF, WST.Loc);
1252 void CGOpenMPRuntimeNVPTX::emitNonSPMDEntryFooter(
CodeGenFunction &CGF,
1253 EntryFunctionState &EST) {
1254 IsInTargetMasterThreadRegion =
false;
1258 emitGenericVarsEpilog(CGF);
1263 llvm::BasicBlock *TerminateBB = CGF.
createBasicBlock(
".termination.notifier");
1271 createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_deinit), Args);
1273 syncCTAThreads(CGF);
1278 EST.ExitBB =
nullptr;
1282 StringRef ParentName,
1283 llvm::Function *&OutlinedFn,
1284 llvm::Constant *&OutlinedFnID,
1285 bool IsOffloadEntry,
1287 ExecutionRuntimeModesRAII ModeRAII(
1288 CurrentExecutionMode, RequiresFullRuntime,
1291 EntryFunctionState EST;
1296 CGOpenMPRuntimeNVPTX::EntryFunctionState &EST;
1301 CGOpenMPRuntimeNVPTX::EntryFunctionState &EST,
1303 : RT(RT), EST(EST), D(D) {}
1305 RT.emitSPMDEntryHeader(CGF, EST, D);
1311 RT.emitSPMDEntryFooter(CGF, EST);
1313 } Action(*
this, EST, D);
1315 IsInTTDRegion =
true;
1317 GlobalizedRecords.emplace_back();
1318 if (!KernelStaticGlobalized) {
1319 KernelStaticGlobalized =
new llvm::GlobalVariable(
1322 llvm::ConstantPointerNull::get(CGM.
VoidPtrTy),
1323 "_openmp_kernel_static_glob_rd$ptr",
nullptr,
1324 llvm::GlobalValue::NotThreadLocal,
1327 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
1328 IsOffloadEntry, CodeGen);
1329 IsInTTDRegion =
false;
1332 void CGOpenMPRuntimeNVPTX::emitSPMDEntryHeader(
1343 Bld.getInt16(RequiresFullRuntime ? 1 : 0),
1346 createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_init), Args);
1348 if (RequiresFullRuntime) {
1351 OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd));
1358 IsInTargetMasterThreadRegion =
true;
1362 EntryFunctionState &EST) {
1363 IsInTargetMasterThreadRegion =
false;
1376 CGF.
Builder.getInt16(RequiresFullRuntime ? 1 : 0)};
1378 createNVPTXRuntimeFunction(
1379 OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2), Args);
1383 EST.ExitBB =
nullptr;
1396 llvm::GlobalValue::WeakAnyLinkage,
1397 llvm::ConstantInt::get(CGM.
Int8Ty, Mode ? 0 : 1),
1398 Twine(Name,
"_exec_mode"));
1402 void CGOpenMPRuntimeNVPTX::emitWorkerFunction(WorkerFunctionState &WST) {
1408 emitWorkerLoop(CGF, WST);
1413 WorkerFunctionState &WST) {
1426 llvm::BasicBlock *SelectWorkersBB = CGF.
createBasicBlock(
".select.workers");
1428 llvm::BasicBlock *TerminateBB = CGF.
createBasicBlock(
".terminate.parallel");
1437 syncCTAThreads(CGF);
1450 createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_parallel), Args);
1455 llvm::Value *ShouldTerminate = Bld.CreateIsNull(WorkID,
"should_terminate");
1456 Bld.CreateCondBr(ShouldTerminate, ExitBB, SelectWorkersBB);
1461 Bld.CreateIsNotNull(Bld.
CreateLoad(ExecStatus),
"is_active");
1462 Bld.CreateCondBr(IsActive, ExecuteBB, BarrierBB);
1467 setLocThreadIdInsertPt(CGF,
true);
1470 for (llvm::Function *W : Work) {
1475 Bld.CreateICmpEQ(Bld.
CreateLoad(WorkFn),
ID,
"work_match");
1479 Bld.CreateCondBr(WorkFnMatch, ExecuteFNBB, CheckNextBB);
1488 emitCall(CGF, WST.Loc, W,
1489 {Bld.getInt16(0), getThreadID(CGF, WST.Loc)});
1499 auto *ParallelFnTy =
1500 llvm::FunctionType::get(CGM.
VoidTy, {CGM.Int16Ty, CGM.Int32Ty},
1508 emitCall(CGF, WST.Loc, {ParallelFnTy, WorkFnCast},
1509 {Bld.getInt16(0), getThreadID(CGF, WST.Loc)});
1516 createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_end_parallel),
1523 syncCTAThreads(CGF);
1529 clearLocThreadIdInsertPt(CGF);
1536 llvm::FunctionCallee
1538 llvm::FunctionCallee RTLFn =
nullptr;
1539 switch (static_cast<OpenMPRTLFunctionNVPTX>(Function)) {
1540 case OMPRTL_NVPTX__kmpc_kernel_init: {
1545 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
1549 case OMPRTL_NVPTX__kmpc_kernel_deinit: {
1553 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
1557 case OMPRTL_NVPTX__kmpc_spmd_kernel_init: {
1562 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
1566 case OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2: {
1570 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
1574 case OMPRTL_NVPTX__kmpc_kernel_prepare_parallel: {
1579 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
1583 case OMPRTL_NVPTX__kmpc_kernel_parallel: {
1589 llvm::FunctionType::get(RetTy, TypeParams,
false);
1593 case OMPRTL_NVPTX__kmpc_kernel_end_parallel: {
1600 case OMPRTL_NVPTX__kmpc_serialized_parallel: {
1605 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
1609 case OMPRTL_NVPTX__kmpc_end_serialized_parallel: {
1614 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
1618 case OMPRTL_NVPTX__kmpc_shuffle_int32: {
1623 llvm::FunctionType::get(CGM.
Int32Ty, TypeParams,
false);
1627 case OMPRTL_NVPTX__kmpc_shuffle_int64: {
1632 llvm::FunctionType::get(CGM.
Int64Ty, TypeParams,
false);
1636 case OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2: {
1644 auto *ShuffleReduceFnTy =
1645 llvm::FunctionType::get(CGM.
VoidTy, ShuffleReduceTypeParams,
1648 auto *InterWarpCopyFnTy =
1649 llvm::FunctionType::get(CGM.
VoidTy, InterWarpCopyTypeParams,
1651 llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
1656 ShuffleReduceFnTy->getPointerTo(),
1657 InterWarpCopyFnTy->getPointerTo()};
1659 llvm::FunctionType::get(CGM.
Int32Ty, TypeParams,
false);
1661 FnTy,
"__kmpc_nvptx_parallel_reduce_nowait_v2");
1664 case OMPRTL_NVPTX__kmpc_end_reduce_nowait: {
1668 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
1670 FnTy,
"__kmpc_nvptx_end_reduce_nowait");
1673 case OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2: {
1687 auto *ShuffleReduceFnTy =
1688 llvm::FunctionType::get(CGM.
VoidTy, ShuffleReduceTypeParams,
1691 auto *InterWarpCopyFnTy =
1692 llvm::FunctionType::get(CGM.
VoidTy, InterWarpCopyTypeParams,
1696 auto *GlobalListFnTy =
1697 llvm::FunctionType::get(CGM.
VoidTy, GlobalListTypeParams,
1699 llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
1704 ShuffleReduceFnTy->getPointerTo(),
1705 InterWarpCopyFnTy->getPointerTo(),
1706 GlobalListFnTy->getPointerTo(),
1707 GlobalListFnTy->getPointerTo(),
1708 GlobalListFnTy->getPointerTo(),
1709 GlobalListFnTy->getPointerTo()};
1711 llvm::FunctionType::get(CGM.
Int32Ty, TypeParams,
false);
1713 FnTy,
"__kmpc_nvptx_teams_reduce_nowait_v2");
1716 case OMPRTL_NVPTX__kmpc_data_sharing_init_stack: {
1723 case OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd: {
1731 case OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack: {
1736 llvm::FunctionType::get(CGM.
VoidPtrTy, TypeParams,
false);
1738 FnTy,
"__kmpc_data_sharing_coalesced_push_stack");
1741 case OMPRTL_NVPTX__kmpc_data_sharing_pop_stack: {
1745 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
1747 "__kmpc_data_sharing_pop_stack");
1750 case OMPRTL_NVPTX__kmpc_begin_sharing_variables: {
1755 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
1759 case OMPRTL_NVPTX__kmpc_end_sharing_variables: {
1766 case OMPRTL_NVPTX__kmpc_get_shared_variables: {
1770 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
1774 case OMPRTL_NVPTX__kmpc_parallel_level: {
1778 llvm::FunctionType::get(CGM.
Int16Ty, TypeParams,
false);
1782 case OMPRTL_NVPTX__kmpc_is_spmd_exec_mode: {
1784 auto *FnTy = llvm::FunctionType::get(CGM.
Int8Ty,
false);
1788 case OMPRTL_NVPTX__kmpc_get_team_static_memory: {
1794 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
1798 case OMPRTL_NVPTX__kmpc_restore_team_static_memory: {
1803 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
1812 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
1817 case OMPRTL__kmpc_barrier_simple_spmd: {
1822 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
1824 FnTy,
"__kmpc_barrier_simple_spmd");
1827 case OMPRTL_NVPTX__kmpc_warp_active_thread_mask: {
1834 case OMPRTL_NVPTX__kmpc_syncwarp: {
1845 void CGOpenMPRuntimeNVPTX::createOffloadEntry(llvm::Constant *
ID,
1846 llvm::Constant *Addr,
1847 uint64_t Size, int32_t,
1848 llvm::GlobalValue::LinkageTypes) {
1851 if (!isa<llvm::Function>(Addr))
1857 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata(
"nvvm.annotations");
1859 llvm::Metadata *MDVals[] = {
1860 llvm::ConstantAsMetadata::get(Addr), llvm::MDString::get(Ctx,
"kernel"),
1861 llvm::ConstantAsMetadata::get(
1862 llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};
1864 MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
1867 void CGOpenMPRuntimeNVPTX::emitTargetOutlinedFunction(
1869 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
1871 if (!IsOffloadEntry)
1874 assert(!ParentName.empty() &&
"Invalid target region parent name!");
1878 emitSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
1881 emitNonSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
1892 KMP_IDENT_SPMD_MODE = 0x01,
1894 KMP_IDENT_SIMPLE_RT_MODE = 0x02,
1895 LLVM_MARK_AS_BITMASK_ENUM(KMP_IDENT_SIMPLE_RT_MODE)
1900 (~KMP_IDENT_SPMD_MODE) & KMP_IDENT_SIMPLE_RT_MODE;
1904 switch (getExecutionMode()) {
1906 if (requiresFullRuntime())
1907 return KMP_IDENT_SPMD_MODE & (~KMP_IDENT_SIMPLE_RT_MODE);
1908 return KMP_IDENT_SPMD_MODE | KMP_IDENT_SIMPLE_RT_MODE;
1910 assert(requiresFullRuntime() &&
"Expected full runtime.");
1911 return (~KMP_IDENT_SPMD_MODE) & (~KMP_IDENT_SIMPLE_RT_MODE);
1913 return UndefinedMode;
1915 llvm_unreachable(
"Unknown flags are requested.");
1920 llvm::GlobalValue *OrigAddr,
1921 bool IsForDefinition) {
1923 auto *NewFD = cast<FunctionDecl>(NewGD.
getDecl());
1924 if (NewFD->isDefined()) {
1934 llvm_unreachable(
"OpenMP NVPTX can only handle device code.");
1938 ProcBindKind ProcBind,
1958 const Expr *NumTeams,
1959 const Expr *ThreadLimit,
1967 bool &IsInParallelRegion;
1968 bool PrevIsInParallelRegion;
1971 NVPTXPrePostActionTy(
bool &IsInParallelRegion)
1972 : IsInParallelRegion(IsInParallelRegion) {}
1974 PrevIsInParallelRegion = IsInParallelRegion;
1975 IsInParallelRegion =
true;
1978 IsInParallelRegion = PrevIsInParallelRegion;
1980 } Action(IsInParallelRegion);
1982 bool PrevIsInTTDRegion = IsInTTDRegion;
1983 IsInTTDRegion =
false;
1984 bool PrevIsInTargetMasterThreadRegion = IsInTargetMasterThreadRegion;
1985 IsInTargetMasterThreadRegion =
false;
1988 D, ThreadIDVar, InnermostKind, CodeGen));
1990 OutlinedFun->removeFnAttr(llvm::Attribute::NoInline);
1991 OutlinedFun->removeFnAttr(llvm::Attribute::OptimizeNone);
1992 OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline);
1994 IsInTargetMasterThreadRegion = PrevIsInTargetMasterThreadRegion;
1995 IsInTTDRegion = PrevIsInTTDRegion;
1997 !IsInParallelRegion) {
1998 llvm::Function *WrapperFun =
1999 createParallelDataSharingWrapper(OutlinedFun, D);
2000 WrapperFunctionsMap[OutlinedFun] = WrapperFun;
2012 "expected teams directive.");
2019 Dir = dyn_cast_or_null<OMPExecutableDirective>(S);
2027 for (
const Expr *E : C->getVarRefs())
2037 "expected teams directive.");
2039 for (
const Expr *E : C->privates())
2051 llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields;
2057 if (!LastPrivatesReductions.empty()) {
2058 GlobalizedRD = ::buildRecordForGlobalizedVars(
2060 MappedDeclsFields, WarpSize);
2062 }
else if (!LastPrivatesReductions.empty()) {
2063 assert(!TeamAndReductions.first &&
2064 "Previous team declaration is not expected.");
2066 std::swap(TeamAndReductions.second, LastPrivatesReductions);
2073 llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
2077 NVPTXPrePostActionTy(
2079 llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
2081 : Loc(Loc), GlobalizedRD(GlobalizedRD),
2082 MappedDeclsFields(MappedDeclsFields) {}
2087 auto I = Rt.FunctionGlobalizedDecls.try_emplace(CGF.
CurFn).first;
2088 I->getSecond().GlobalRecord = GlobalizedRD;
2089 I->getSecond().MappedParams =
2090 std::make_unique<CodeGenFunction::OMPMapVars>();
2091 DeclToAddrMapTy &Data = I->getSecond().LocalVarData;
2092 for (
const auto &Pair : MappedDeclsFields) {
2093 assert(Pair.getFirst()->isCanonicalDecl() &&
2094 "Expected canonical declaration");
2095 Data.insert(std::make_pair(Pair.getFirst(),
2096 MappedVarData(Pair.getSecond(),
2100 Rt.emitGenericVarsProlog(CGF, Loc);
2104 .emitGenericVarsEpilog(CGF);
2106 } Action(Loc, GlobalizedRD, MappedDeclsFields);
2109 D, ThreadIDVar, InnermostKind, CodeGen);
2111 OutlinedFun->removeFnAttr(llvm::Attribute::NoInline);
2112 OutlinedFun->removeFnAttr(llvm::Attribute::OptimizeNone);
2113 OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline);
2119 void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(
CodeGenFunction &CGF,
2121 bool WithSPMDCheck) {
2128 const auto I = FunctionGlobalizedDecls.find(CGF.
CurFn);
2129 if (I == FunctionGlobalizedDecls.end())
2131 if (
const RecordDecl *GlobalizedVarsRecord = I->getSecond().GlobalRecord) {
2139 unsigned Alignment =
2141 unsigned GlobalRecordSize =
2143 GlobalRecordSize = llvm::alignTo(GlobalRecordSize, Alignment);
2145 llvm::PointerType *GlobalRecPtrTy =
2149 if (!IsInTTDRegion &&
2155 if (I->getSecond().SecondaryGlobalRecord.hasValue()) {
2161 IsTTD = Bld.CreateIsNull(PL);
2165 Bld.CreateCondBr(IsSPMD, SPMDBB, NonSPMDBB);
2169 Address RecPtr =
Address(llvm::ConstantPointerNull::get(GlobalRecPtrTy),
2176 if (
const RecordDecl *SecGlobalizedVarsRecord =
2177 I->getSecond().SecondaryGlobalRecord.getValueOr(
nullptr)) {
2185 unsigned Alignment =
2187 unsigned GlobalRecordSize =
2189 GlobalRecordSize = llvm::alignTo(GlobalRecordSize, Alignment);
2190 Size = Bld.CreateSelect(
2191 IsTTD, llvm::ConstantInt::get(CGM.
SizeTy, GlobalRecordSize), Size);
2196 Size, CGF.
Builder.getInt16(0)};
2199 OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack),
2200 GlobalRecordSizeArg);
2202 GlobalRecValue, GlobalRecPtrTy);
2204 auto *Phi = Bld.CreatePHI(GlobalRecPtrTy,
2205 2,
"_select_stack");
2206 Phi->addIncoming(RecPtr.
getPointer(), SPMDBB);
2207 Phi->addIncoming(GlobalRecCastAddr, NonSPMDBB);
2208 GlobalRecCastAddr = Phi;
2209 I->getSecond().GlobalRecordAddr = Phi;
2210 I->getSecond().IsInSPMDModeFlag = IsSPMD;
2211 }
else if (IsInTTDRegion) {
2212 assert(GlobalizedRecords.back().Records.size() < 2 &&
2213 "Expected less than 2 globalized records: one for target and one " 2216 for (
const RecordDecl *RD : GlobalizedRecords.back().Records) {
2218 unsigned Alignment =
2222 llvm::alignTo(llvm::alignTo(Offset, Alignment) + Size, Alignment);
2224 unsigned Alignment =
2226 Offset = llvm::alignTo(Offset, Alignment);
2227 GlobalizedRecords.back().Records.push_back(GlobalizedVarsRecord);
2228 ++GlobalizedRecords.back().RegionCounter;
2229 if (GlobalizedRecords.back().Records.size() == 1) {
2230 assert(KernelStaticGlobalized &&
2231 "Kernel static pointer must be initialized already.");
2232 auto *UseSharedMemory =
new llvm::GlobalVariable(
2235 "_openmp_static_kernel$is_shared");
2236 UseSharedMemory->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
2242 false, Int16Ty, Loc);
2243 auto *StaticGlobalized =
new llvm::GlobalVariable(
2245 llvm::GlobalValue::CommonLinkage,
nullptr);
2246 auto *RecSize =
new llvm::GlobalVariable(
2249 "_openmp_static_kernel$size");
2250 RecSize->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
2257 llvm::ConstantInt::get(
2260 StaticGlobalized, Ld, IsInSharedMemory, ResAddr};
2262 OMPRTL_NVPTX__kmpc_get_team_static_memory),
2263 GlobalRecordSizeArg);
2264 GlobalizedRecords.back().Buffer = StaticGlobalized;
2265 GlobalizedRecords.back().RecSize = RecSize;
2266 GlobalizedRecords.back().UseSharedMemory = UseSharedMemory;
2267 GlobalizedRecords.back().Loc = Loc;
2269 assert(KernelStaticGlobalized &&
"Global address must be set already.");
2274 .castAs<PointerType>());
2277 I->getSecond().GlobalRecordAddr = GlobalRecValue;
2278 I->getSecond().IsInSPMDModeFlag =
nullptr;
2285 llvm::ConstantInt::get(CGM.
SizeTy, GlobalRecordSize),
2289 OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack),
2290 GlobalRecordSizeArg);
2292 GlobalRecValue, GlobalRecPtrTy);
2293 I->getSecond().GlobalRecordAddr = GlobalRecValue;
2294 I->getSecond().IsInSPMDModeFlag =
nullptr;
2302 decltype(I->getSecond().LocalVarData)::const_iterator SecIt;
2304 SecIt = I->getSecond().SecondaryLocalVarData->begin();
2305 llvm::PointerType *SecGlobalRecPtrTy =
2309 I->getSecond().GlobalRecordAddr, SecGlobalRecPtrTy),
2312 for (
auto &Rec : I->getSecond().LocalVarData) {
2313 bool EscapedParam = I->getSecond().EscapedParameters.count(Rec.first);
2316 const auto *VD = cast<VarDecl>(Rec.first);
2324 if (Rec.second.IsOnePerTeam) {
2325 VarTy = Rec.second.FD->getType();
2336 Rec.second.PrivateAddr = VarAddr.
getAddress(CGF);
2337 if (!IsInTTDRegion &&
2340 assert(I->getSecond().IsInSPMDModeFlag &&
2341 "Expected unknown execution mode or required SPMD check.");
2343 assert(SecIt->second.IsOnePerTeam &&
2344 "Secondary glob data must be one per team.");
2350 Rec.second.PrivateAddr = VarAddr.
getAddress(CGF);
2352 Address GlobalPtr = Rec.second.PrivateAddr;
2354 Rec.second.PrivateAddr =
Address(
2355 Bld.CreateSelect(I->getSecond().IsInSPMDModeFlag,
2360 const auto *VD = cast<VarDecl>(Rec.first);
2362 I->getSecond().MappedParams->setVarAddr(CGF, VD,
2369 for (
const ValueDecl *VD : I->getSecond().EscapedVariableLengthDecls) {
2377 Size = Bld.CreateNUWAdd(
2381 Size = Bld.CreateUDiv(Size, AlignVal);
2382 Size = Bld.CreateNUWMul(Size, AlignVal);
2386 Size, CGF.
Builder.getInt16(0)};
2389 OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack),
2390 GlobalRecordSizeArg);
2396 I->getSecond().MappedParams->setVarAddr(CGF, cast<VarDecl>(VD),
2398 I->getSecond().EscapedVariableLengthDeclsAddrs.emplace_back(GlobalRecValue);
2400 I->getSecond().MappedParams->apply(CGF);
2403 void CGOpenMPRuntimeNVPTX::emitGenericVarsEpilog(
CodeGenFunction &CGF,
2404 bool WithSPMDCheck) {
2409 const auto I = FunctionGlobalizedDecls.find(CGF.
CurFn);
2410 if (I != FunctionGlobalizedDecls.end()) {
2411 I->getSecond().MappedParams->restore(CGF);
2415 llvm::reverse(I->getSecond().EscapedVariableLengthDeclsAddrs)) {
2420 if (I->getSecond().GlobalRecordAddr) {
2421 if (!IsInTTDRegion &&
2427 Bld.CreateCondBr(I->getSecond().IsInSPMDModeFlag, ExitBB, NonSPMDBB);
2433 OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
2436 }
else if (IsInTTDRegion) {
2437 assert(GlobalizedRecords.back().RegionCounter > 0 &&
2438 "region counter must be > 0.");
2439 --GlobalizedRecords.back().RegionCounter;
2441 if (GlobalizedRecords.back().RegionCounter == 0) {
2445 Address(GlobalizedRecords.back().UseSharedMemory,
2447 false, Int16Ty, GlobalizedRecords.back().Loc);
2449 llvm::ConstantInt::get(
2455 OMPRTL_NVPTX__kmpc_restore_team_static_memory),
2460 OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
2461 I->getSecond().GlobalRecordAddr);
2470 llvm::Function *OutlinedFn,
2480 OutlinedFnArgs.push_back(ZeroAddr.
getPointer());
2481 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2492 emitSPMDParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond);
2494 emitNonSPMDParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond);
2497 void CGOpenMPRuntimeNVPTX::emitNonSPMDParallelCall(
2500 llvm::Function *Fn = cast<llvm::Function>(OutlinedFn);
2509 Address ThreadIDAddr = ZeroAddr;
2510 auto &&CodeGen = [
this, Fn, CapturedVars, Loc, &ThreadIDAddr](
2515 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2516 ".bound.zero.addr");
2517 CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32( 0));
2519 OutlinedFnArgs.push_back(ThreadIDAddr.
getPointer());
2520 OutlinedFnArgs.push_back(ZeroAddr.
getPointer());
2521 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2532 NVPTXActionTy Action(
2544 llvm::Function *WFn = WrapperFunctionsMap[Fn];
2545 assert(WFn &&
"Wrapper function does not exist!");
2550 CGF.EmitRuntimeCall(
2559 if (!CapturedVars.empty()) {
2562 CGF.CreateDefaultAlignTempAlloca(CGF.VoidPtrPtrTy,
"shared_arg_refs");
2567 llvm::ConstantInt::get(CGM.
SizeTy, CapturedVars.size())};
2569 OMPRTL_NVPTX__kmpc_begin_sharing_variables),
2575 Address SharedArgListAddress = CGF.EmitLoadOfPointer(
2577 .castAs<PointerType>());
2581 if (
V->getType()->isIntegerTy())
2582 PtrV = Bld.CreateIntToPtr(
V, CGF.VoidPtrTy);
2585 CGF.EmitStoreOfScalar(PtrV, Dst,
false,
2593 syncCTAThreads(CGF);
2601 syncCTAThreads(CGF);
2603 if (!CapturedVars.empty())
2604 CGF.EmitRuntimeCall(
2608 Work.emplace_back(WFn);
2611 auto &&LNParallelGen = [
this, Loc, &SeqGen, &L0ParallelGen](
2613 if (IsInParallelRegion) {
2614 SeqGen(CGF, Action);
2615 }
else if (IsInTargetMasterThreadRegion) {
2616 L0ParallelGen(CGF, Action);
2625 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(
".exit");
2626 llvm::BasicBlock *SeqBB = CGF.createBasicBlock(
".sequential");
2627 llvm::BasicBlock *ParallelCheckBB = CGF.createBasicBlock(
".parcheck");
2628 llvm::BasicBlock *MasterBB = CGF.createBasicBlock(
".master");
2629 llvm::Value *IsSPMD = Bld.CreateIsNotNull(CGF.EmitNounwindRuntimeCall(
2631 Bld.CreateCondBr(IsSPMD, SeqBB, ParallelCheckBB);
2634 CGF.EmitBlock(ParallelCheckBB);
2641 Bld.CreateCondBr(Res, SeqBB, MasterBB);
2642 CGF.EmitBlock(SeqBB);
2643 SeqGen(CGF, Action);
2644 CGF.EmitBranch(ExitBB);
2647 CGF.EmitBlock(MasterBB);
2648 L0ParallelGen(CGF, Action);
2649 CGF.EmitBranch(ExitBB);
2653 CGF.EmitBlock(ExitBB,
true);
2666 void CGOpenMPRuntimeNVPTX::emitSPMDParallelCall(
2678 Address ThreadIDAddr = ZeroAddr;
2679 auto &&CodeGen = [
this, OutlinedFn, CapturedVars, Loc, &ThreadIDAddr](
2684 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2685 ".bound.zero.addr");
2686 CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32( 0));
2688 OutlinedFnArgs.push_back(ThreadIDAddr.
getPointer());
2689 OutlinedFnArgs.push_back(ZeroAddr.
getPointer());
2690 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2701 NVPTXActionTy Action(
2710 if (IsInTargetMasterThreadRegion) {
2731 llvm::ConstantPointerNull::get(
2733 llvm::ConstantInt::get(CGF.
Int32Ty, 0,
true)};
2736 Call->setConvergent();
2752 Call->setConvergent();
2786 CGF.
Builder.CreateCondBr(CmpLoopBound, TestBB, ExitBB);
2793 CGF.
Builder.CreateICmpEQ(ThreadID, CounterVal);
2794 CGF.
Builder.CreateCondBr(CmpThreadToCounter, BodyBB, SyncBB);
2826 "Cast type must sized.");
2828 "Val type must sized.");
2830 if (ValTy == CastTy)
2836 return CGF.
Builder.CreateIntCast(Val, LLVMCastTy,
2859 "Unsupported bitwidth in shuffle instruction.");
2862 ? OMPRTL_NVPTX__kmpc_shuffle_int32
2863 : OMPRTL_NVPTX__kmpc_shuffle_int64;
2898 for (
int IntSize = 8; IntSize >= 1; IntSize /= 2) {
2912 llvm::BasicBlock *CurrentBB = Bld.GetInsertBlock();
2914 llvm::PHINode *PhiSrc =
2915 Bld.CreatePHI(Ptr.
getType(), 2);
2916 PhiSrc->addIncoming(Ptr.
getPointer(), CurrentBB);
2917 llvm::PHINode *PhiDest =
2918 Bld.CreatePHI(ElemPtr.
getType(), 2);
2919 PhiDest->addIncoming(ElemPtr.
getPointer(), CurrentBB);
2925 Bld.CreateCondBr(Bld.CreateICmpSGT(PtrDiff, Bld.getInt64(IntSize - 1)),
2934 PhiSrc->addIncoming(LocalPtr.
getPointer(), ThenBB);
2935 PhiDest->addIncoming(LocalElemPtr.
getPointer(), ThenBB);
2946 Size = Size % IntSize;
2982 llvm::Value *RemoteLaneOffset = CopyOptions.RemoteLaneOffset;
2983 llvm::Value *ScratchpadIndex = CopyOptions.ScratchpadIndex;
2984 llvm::Value *ScratchpadWidth = CopyOptions.ScratchpadWidth;
2989 unsigned Size = Privates.size();
2990 for (
const Expr *Private : Privates) {
2995 bool ShuffleInElement =
false;
2998 bool UpdateDestListPtr =
false;
3001 bool IncrScratchpadSrc =
false;
3002 bool IncrScratchpadDest =
false;
3005 case RemoteLaneToThread: {
3016 CGF.
CreateMemTemp(Private->getType(),
".omp.reduction.element");
3017 ShuffleInElement =
true;
3018 UpdateDestListPtr =
true;
3036 case ThreadToScratchpad: {
3047 Bld.CreateNUWMul(ElementSizeInChars, ScratchpadIndex);
3049 Bld.CreateNUWAdd(DestBase.
getPointer(), CurrentOffset);
3050 ScratchPadElemAbsolutePtrVal =
3051 Bld.CreateIntToPtr(ScratchPadElemAbsolutePtrVal, CGF.
VoidPtrTy);
3052 DestElementAddr =
Address(ScratchPadElemAbsolutePtrVal,
3054 IncrScratchpadDest =
true;
3057 case ScratchpadToThread: {
3062 Bld.CreateNUWMul(ElementSizeInChars, ScratchpadIndex);
3064 Bld.CreateNUWAdd(SrcBase.
getPointer(), CurrentOffset);
3065 ScratchPadElemAbsolutePtrVal =
3066 Bld.CreateIntToPtr(ScratchPadElemAbsolutePtrVal, CGF.
VoidPtrTy);
3067 SrcElementAddr =
Address(ScratchPadElemAbsolutePtrVal,
3069 IncrScratchpadSrc =
true;
3075 CGF.
CreateMemTemp(Private->getType(),
".omp.reduction.element");
3076 UpdateDestListPtr =
true;
3090 if (ShuffleInElement) {
3092 RemoteLaneOffset, Private->getExprLoc());
3098 Private->
getType(), Private->getExprLoc());
3107 Private->getExprLoc());
3127 if (UpdateDestListPtr) {
3130 DestElementPtrAddr,
false,
3137 if ((IncrScratchpadDest || IncrScratchpadSrc) && (Idx + 1 < Size)) {
3141 ScratchpadBasePtr = Bld.CreateNUWAdd(
3143 Bld.CreateNUWMul(ScratchpadWidth, ElementSizeInChars));
3146 ScratchpadBasePtr = Bld.CreateNUWSub(
3147 ScratchpadBasePtr, llvm::ConstantInt::get(CGM.
SizeTy, 1));
3148 ScratchpadBasePtr = Bld.CreateUDiv(
3150 llvm::ConstantInt::get(CGM.
SizeTy, GlobalMemoryAlignment));
3151 ScratchpadBasePtr = Bld.CreateNUWAdd(
3152 ScratchpadBasePtr, llvm::ConstantInt::get(CGM.
SizeTy, 1));
3153 ScratchpadBasePtr = Bld.CreateNUWMul(
3155 llvm::ConstantInt::get(CGM.
SizeTy, GlobalMemoryAlignment));
3157 if (IncrScratchpadDest)
3197 Args.push_back(&ReduceListArg);
3198 Args.push_back(&NumWarpsArg);
3204 "_omp_reduction_inter_warp_copy_func", &M);
3206 Fn->setDoesNotRecurse();
3219 StringRef TransferMediumName =
3220 "__openmp_nvptx_data_transfer_temporary_storage";
3221 llvm::GlobalVariable *TransferMedium =
3222 M.getGlobalVariable(TransferMediumName);
3223 if (!TransferMedium) {
3224 auto *Ty = llvm::ArrayType::get(CGM.
Int32Ty, WarpSize);
3226 TransferMedium =
new llvm::GlobalVariable(
3227 M, Ty,
false, llvm::GlobalVariable::CommonLinkage,
3228 llvm::Constant::getNullValue(Ty), TransferMediumName,
3229 nullptr, llvm::GlobalVariable::NotThreadLocal,
3230 SharedAddressSpace);
3250 for (
const Expr *Private : Privates) {
3255 unsigned RealTySize =
3259 for (
unsigned TySize = 4; TySize > 0 && RealTySize > 0; TySize /=2) {
3260 unsigned NumIters = RealTySize / TySize;
3269 llvm::BasicBlock *PrecondBB =
nullptr;
3270 llvm::BasicBlock *ExitBB =
nullptr;
3283 Bld.CreateICmpULT(Cnt, llvm::ConstantInt::get(CGM.
IntTy, NumIters));
3284 Bld.CreateCondBr(Cmp, BodyBB, ExitBB);
3296 llvm::Value *IsWarpMaster = Bld.CreateIsNull(LaneID,
"warp_master");
3297 Bld.CreateCondBr(IsWarpMaster, ThenBB, ElseBB);
3314 llvm::Value *MediumPtrVal = Bld.CreateInBoundsGEP(
3315 TransferMedium, {llvm::Constant::getNullValue(CGM.
Int64Ty), WarpID});
3316 Address MediumPtr(MediumPtrVal, Align);
3328 Bld.CreateBr(MergeBB);
3331 Bld.CreateBr(MergeBB);
3349 AddrNumWarpsArg,
false, C.
IntTy, Loc);
3353 Bld.CreateICmpULT(ThreadID, NumWarpsVal,
"is_active_thread");
3354 Bld.CreateCondBr(IsActiveThread, W0ThenBB, W0ElseBB);
3359 llvm::Value *SrcMediumPtrVal = Bld.CreateInBoundsGEP(
3361 {llvm::Constant::getNullValue(CGM.
Int64Ty), ThreadID});
3362 Address SrcMediumPtr(SrcMediumPtrVal, Align);
3369 TargetElemPtrPtr,
false, C.
VoidPtrTy, Loc);
3382 Bld.CreateBr(W0MergeBB);
3385 Bld.CreateBr(W0MergeBB);
3390 Cnt = Bld.CreateNSWAdd(Cnt, llvm::ConstantInt::get(CGM.
IntTy, 1));
3396 RealTySize %= TySize;
3489 Args.push_back(&ReduceListArg);
3490 Args.push_back(&LaneIDArg);
3491 Args.push_back(&RemoteLaneOffsetArg);
3492 Args.push_back(&AlgoVerArg);
3498 "_omp_reduction_shuffle_and_reduce_func", &CGM.
getModule());
3500 Fn->setDoesNotRecurse();
3502 Fn->removeFnAttr(llvm::Attribute::NoInline);
3503 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
3504 Fn->addFnAttr(llvm::Attribute::AlwaysInline);
3535 CGF.
CreateMemTemp(ReductionArrayTy,
".omp.reduction.remote_reduce_list");
3541 LocalReduceList, RemoteReduceList,
3542 {RemoteLaneOffsetArgVal,
3567 llvm::Value *CondAlgo0 = Bld.CreateIsNull(AlgoVerArgVal);
3569 llvm::Value *Algo1 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(1));
3571 Algo1, Bld.CreateICmpULT(LaneIDArgVal, RemoteLaneOffsetArgVal));
3573 llvm::Value *Algo2 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(2));
3575 Algo2, Bld.CreateIsNull(Bld.CreateAnd(LaneIDArgVal, Bld.getInt16(1))));
3576 CondAlgo2 = Bld.CreateAnd(
3577 CondAlgo2, Bld.CreateICmpSGT(RemoteLaneOffsetArgVal, Bld.getInt16(0)));
3579 llvm::Value *CondReduce = Bld.CreateOr(CondAlgo0, CondAlgo1);
3580 CondReduce = Bld.CreateOr(CondReduce, CondAlgo2);
3585 Bld.CreateCondBr(CondReduce, ThenBB, ElseBB);
3590 LocalReduceList.getPointer(), CGF.
VoidPtrTy);
3594 CGF, Loc, ReduceFn, {LocalReduceListPtr, RemoteReduceListPtr});
3595 Bld.CreateBr(MergeBB);
3598 Bld.CreateBr(MergeBB);
3604 Algo1 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(1));
3606 Algo1, Bld.CreateICmpUGE(LaneIDArgVal, RemoteLaneOffsetArgVal));
3611 Bld.CreateCondBr(CondCopy, CpyThenBB, CpyElseBB);
3615 RemoteReduceList, LocalReduceList);
3616 Bld.CreateBr(CpyMergeBB);
3619 Bld.CreateBr(CpyMergeBB);
3637 const llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
3651 Args.push_back(&BufferArg);
3652 Args.push_back(&IdxArg);
3653 Args.push_back(&ReduceListArg);
3659 "_omp_reduction_list_to_global_copy_func", &CGM.
getModule());
3661 Fn->setDoesNotRecurse();
3680 LLVMReductionsBufferTy->getPointerTo());
3686 for (
const Expr *Private : Privates) {
3696 const ValueDecl *VD = cast<DeclRefExpr>(Private)->getDecl();
3698 const FieldDecl *FD = VarFieldMap.lookup(VD);
3702 Bld.CreateInBoundsGEP(GlobLVal.
getPointer(CGF), Idxs);
3743 const llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
3745 llvm::Function *ReduceFn) {
3758 Args.push_back(&BufferArg);
3759 Args.push_back(&IdxArg);
3760 Args.push_back(&ReduceListArg);
3766 "_omp_reduction_list_to_global_reduce_func", &CGM.
getModule());
3768 Fn->setDoesNotRecurse();
3780 LLVMReductionsBufferTy->getPointerTo());
3785 CGF.
CreateMemTemp(ReductionArrayTy,
".omp.reduction.red_list");
3786 auto IPriv = Privates.begin();
3792 for (
unsigned I = 0, E = Privates.size(); I < E; ++I, ++IPriv, ++Idx) {
3795 const ValueDecl *VD = cast<DeclRefExpr>(*IPriv)->getDecl();
3796 const FieldDecl *FD = VarFieldMap.lookup(VD);
3800 Bld.CreateInBoundsGEP(GlobLVal.
getPointer(CGF), Idxs);
3803 if ((*IPriv)->getType()->isVariablyModifiedType()) {
3822 AddrReduceListArg,
false, C.
VoidPtrTy, Loc);
3824 CGF, Loc, ReduceFn, {GlobalReduceList, ReducedPtr});
3839 const llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
3853 Args.push_back(&BufferArg);
3854 Args.push_back(&IdxArg);
3855 Args.push_back(&ReduceListArg);
3861 "_omp_reduction_global_to_list_copy_func", &CGM.
getModule());
3863 Fn->setDoesNotRecurse();
3882 LLVMReductionsBufferTy->getPointerTo());
3889 for (
const Expr *Private : Privates) {
3899 const ValueDecl *VD = cast<DeclRefExpr>(Private)->getDecl();
3901 const FieldDecl *FD = VarFieldMap.lookup(VD);
3905 Bld.CreateInBoundsGEP(GlobLVal.
getPointer(CGF), Idxs);
3945 const llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
3947 llvm::Function *ReduceFn) {
3960 Args.push_back(&BufferArg);
3961 Args.push_back(&IdxArg);
3962 Args.push_back(&ReduceListArg);
3968 "_omp_reduction_global_to_list_reduce_func", &CGM.
getModule());
3970 Fn->setDoesNotRecurse();
3982 LLVMReductionsBufferTy->getPointerTo());
3987 CGF.
CreateMemTemp(ReductionArrayTy,
".omp.reduction.red_list");
3988 auto IPriv = Privates.begin();
3994 for (
unsigned I = 0, E = Privates.size(); I < E; ++I, ++IPriv, ++Idx) {
3997 const ValueDecl *VD = cast<DeclRefExpr>(*IPriv)->getDecl();
3998 const FieldDecl *FD = VarFieldMap.lookup(VD);
4002 Bld.CreateInBoundsGEP(GlobLVal.
getPointer(CGF), Idxs);
4005 if ((*IPriv)->getType()->isVariablyModifiedType()) {
4024 AddrReduceListArg,
false, C.
VoidPtrTy, Loc);
4026 CGF, Loc, ReduceFn, {ReducedPtr, GlobalReduceList});
4286 assert(!TeamsReduction && !ParallelReduction &&
4287 "Invalid reduction selection in emitReduction.");
4289 ReductionOps, Options);
4293 assert((TeamsReduction || ParallelReduction) &&
4294 "Invalid reduction selection in emitReduction.");
4307 auto Size = RHSExprs.size();
4308 for (
const Expr *E : Privates) {
4309 if (E->getType()->isVariablyModifiedType())
4318 CGF.
CreateMemTemp(ReductionArrayTy,
".omp.reduction.red_list");
4319 auto IPriv = Privates.begin();
4321 for (
unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
4327 if ((*IPriv)->getType()->isVariablyModifiedType()) {
4345 LHSExprs, RHSExprs, ReductionOps);
4348 CGM, Privates, ReductionArrayTy, ReductionFn, Loc);
4352 if (ParallelReduction) {
4355 CGF.
Builder.getInt32(RHSExprs.size()),
4356 ReductionArrayTySize,
4363 OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2),
4366 assert(TeamsReduction &&
"expected teams reduction.");
4367 llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> VarFieldMap;
4370 for (
const Expr *DRE : Privates) {
4371 PrivatesReductions[Cnt] = cast<DeclRefExpr>(DRE)->getDecl();
4374 const RecordDecl *TeamReductionRec = ::buildRecordForGlobalizedVars(
4377 TeamsReductions.push_back(TeamReductionRec);
4378 if (!KernelTeamsReductionPtr) {
4379 KernelTeamsReductionPtr =
new llvm::GlobalVariable(
4382 "_openmp_teams_reductions_buffer_$_$ptr");
4388 CGM, Privates, ReductionArrayTy, Loc, TeamReductionRec, VarFieldMap);
4390 CGM, Privates, ReductionArrayTy, Loc, TeamReductionRec, VarFieldMap,
4393 CGM, Privates, ReductionArrayTy, Loc, TeamReductionRec, VarFieldMap);
4395 CGM, Privates, ReductionArrayTy, Loc, TeamReductionRec, VarFieldMap,
4406 GlobalToBufferCpyFn,
4407 GlobalToBufferRedFn,
4408 BufferToGlobalCpyFn,
4409 BufferToGlobalRedFn};
4413 OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2),
4421 Res, llvm::ConstantInt::get(CGM.
Int32Ty, 1));
4422 CGF.
Builder.CreateCondBr(Cond, ThenBB, ExitBB);
4431 auto &&CodeGen = [
Privates, LHSExprs, RHSExprs, ReductionOps,
4433 auto IPriv = Privates.begin();
4434 auto ILHS = LHSExprs.begin();
4435 auto IRHS = RHSExprs.begin();
4436 for (
const Expr *E : ReductionOps) {
4438 cast<DeclRefExpr>(*IRHS));
4446 NVPTXActionTy Action(
4454 CGF.EmitBlock(ExitBB,
true);
4459 const VarDecl *NativeParam)
const {
4464 const Type *NonQualTy = QC.
strip(ArgType);
4465 QualType PointeeTy = cast<ReferenceType>(NonQualTy)->getPointeeType();
4466 if (
const auto *
Attr = FD->
getAttr<OMPCaptureKindAttr>()) {
4467 if (
Attr->getCaptureKind() == OMPC_map) {
4470 }
else if (
Attr->getCaptureKind() == OMPC_firstprivate &&
4478 enum { NVPTX_local_addr = 5 };
4481 if (isa<ImplicitParamDecl>(NativeParam))
4496 const VarDecl *TargetParam)
const {
4497 assert(NativeParam != TargetParam &&
4499 "Native arg must not be the same as target arg.");
4503 const Type *NonQualTy = QC.
strip(NativeParamType);
4504 QualType NativePointeeTy = cast<ReferenceType>(NonQualTy)->getPointeeType();
4505 unsigned NativePointeeAddrSpace =
4512 TargetAddr, TargetAddr->getType()->getPointerElementType()->getPointerTo(
4516 TargetAddr, TargetAddr->getType()->getPointerElementType()->getPointerTo(
4517 NativePointeeAddrSpace));
4521 return NativeParamAddr;
4528 TargetArgs.reserve(Args.size());
4529 auto *FnType = OutlinedFn.getFunctionType();
4530 for (
unsigned I = 0, E = Args.size(); I < E; ++I) {
4531 if (FnType->isVarArg() && FnType->getNumParams() <= I) {
4532 TargetArgs.append(std::next(Args.begin(), I), Args.end());
4535 llvm::Type *TargetType = FnType->getParamType(I);
4537 if (!TargetType->isPointerTy()) {
4538 TargetArgs.emplace_back(NativeArg);
4543 NativeArg->getType()->getPointerElementType()->getPointerTo());
4544 TargetArgs.emplace_back(
4554 llvm::Function *CGOpenMPRuntimeNVPTX::createParallelDataSharingWrapper(
4571 WrapperArgs.emplace_back(&ParallelLevelArg);
4572 WrapperArgs.emplace_back(&WrapperArg);
4579 Twine(OutlinedParallelFn->getName(),
"_wrapper"), &CGM.
getModule());
4582 Fn->setDoesNotRecurse();
4588 const auto *RD = CS.getCapturedRecordDecl();
4589 auto CurField = RD->field_begin();
4601 auto CI = CS.capture_begin();
4616 if (CS.capture_size() > 0 ||
4628 Src, CGF.
SizeTy->getPointerTo());
4633 cast<OMPLoopDirective>(D).getLowerBoundVariable()->getExprLoc());
4634 Args.emplace_back(LB);
4638 Src, CGF.
SizeTy->getPointerTo());
4643 cast<OMPLoopDirective>(D).getUpperBoundVariable()->getExprLoc());
4644 Args.emplace_back(UB);
4647 if (CS.capture_size() > 0) {
4649 for (
unsigned I = 0, E = CS.capture_size(); I < E; ++I, ++CI, ++CurField) {
4650 QualType ElemTy = CurField->getType();
4658 if (CI->capturesVariableByCopy() &&
4659 !CI->getCapturedVar()->getType()->isAnyPointerType()) {
4663 Args.emplace_back(Arg);
4677 assert(D &&
"Expected function or captured|block decl.");
4678 assert(FunctionGlobalizedDecls.count(CGF.
CurFn) == 0 &&
4679 "Function is registered already.");
4680 assert((!TeamAndReductions.first || TeamAndReductions.first == D) &&
4681 "Team is set but not processed.");
4682 const Stmt *Body =
nullptr;
4683 bool NeedToDelayGlobalization =
false;
4684 if (
const auto *FD = dyn_cast<FunctionDecl>(D)) {
4685 Body = FD->getBody();
4686 }
else if (
const auto *BD = dyn_cast<BlockDecl>(D)) {
4687 Body = BD->getBody();
4688 }
else if (
const auto *CD = dyn_cast<CapturedDecl>(D)) {
4689 Body = CD->getBody();
4691 if (NeedToDelayGlobalization &&
4697 CheckVarsEscapingDeclContext VarChecker(CGF, TeamAndReductions.second);
4698 VarChecker.Visit(Body);
4700 VarChecker.getGlobalizedRecord(IsInTTDRegion);
4701 TeamAndReductions.first =
nullptr;
4702 TeamAndReductions.second.clear();
4704 VarChecker.getEscapedVariableLengthDecls();
4705 if (!GlobalizedVarsRecord && EscapedVariableLengthDecls.empty())
4707 auto I = FunctionGlobalizedDecls.try_emplace(CGF.
CurFn).first;
4708 I->getSecond().MappedParams =
4709 std::make_unique<CodeGenFunction::OMPMapVars>();
4710 I->getSecond().GlobalRecord = GlobalizedVarsRecord;
4711 I->getSecond().EscapedParameters.insert(
4712 VarChecker.getEscapedParameters().begin(),
4713 VarChecker.getEscapedParameters().end());
4714 I->getSecond().EscapedVariableLengthDecls.append(
4715 EscapedVariableLengthDecls.begin(), EscapedVariableLengthDecls.end());
4716 DeclToAddrMapTy &Data = I->getSecond().LocalVarData;
4717 for (
const ValueDecl *VD : VarChecker.getEscapedDecls()) {
4718 assert(VD->isCanonicalDecl() &&
"Expected canonical declaration");
4719 const FieldDecl *FD = VarChecker.getFieldForGlobalizedVar(VD);
4720 Data.insert(std::make_pair(VD, MappedVarData(FD, IsInTTDRegion)));
4722 if (!IsInTTDRegion && !NeedToDelayGlobalization && !IsInParallelRegion) {
4723 CheckVarsEscapingDeclContext VarChecker(CGF,
llvm::None);
4724 VarChecker.Visit(Body);
4725 I->getSecond().SecondaryGlobalRecord =
4726 VarChecker.getGlobalizedRecord(
true);
4727 I->getSecond().SecondaryLocalVarData.emplace();
4728 DeclToAddrMapTy &Data = I->getSecond().SecondaryLocalVarData.getValue();
4729 for (
const ValueDecl *VD : VarChecker.getEscapedDecls()) {
4730 assert(VD->isCanonicalDecl() &&
"Expected canonical declaration");
4731 const FieldDecl *FD = VarChecker.getFieldForGlobalizedVar(VD);
4733 std::make_pair(VD, MappedVarData(FD,
true)));
4736 if (!NeedToDelayGlobalization) {
4737 emitGenericVarsProlog(CGF, D->
getBeginLoc(),
true);
4739 GlobalizationScope() =
default;
4743 .emitGenericVarsEpilog(CGF,
true);
4752 if (VD && VD->
hasAttr<OMPAllocateDeclAttr>()) {
4753 const auto *A = VD->
getAttr<OMPAllocateDeclAttr>();
4754 switch (A->getAllocatorType()) {
4757 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
4758 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
4759 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
4760 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
4763 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
4766 case OMPAllocateDeclAttr::OMPConstMemAlloc: {
4768 auto *GV =
new llvm::GlobalVariable(
4771 llvm::Constant::getNullValue(VarTy), VD->
getName(),
4772 nullptr, llvm::GlobalValue::NotThreadLocal,
4775 GV->setAlignment(Align.getAsAlign());
4778 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: {
4780 auto *GV =
new llvm::GlobalVariable(
4783 llvm::Constant::getNullValue(VarTy), VD->
getName(),
4784 nullptr, llvm::GlobalValue::NotThreadLocal,
4787 GV->setAlignment(Align.getAsAlign());
4790 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
4791 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: {
4793 auto *GV =
new llvm::GlobalVariable(
4796 llvm::Constant::getNullValue(VarTy), VD->
getName());
4798 GV->setAlignment(Align.getAsAlign());
4808 auto I = FunctionGlobalizedDecls.find(CGF.
CurFn);
4809 if (I == FunctionGlobalizedDecls.end())
4811 auto VDI = I->getSecond().LocalVarData.find(VD);
4812 if (VDI != I->getSecond().LocalVarData.end())
4813 return VDI->second.PrivateAddr;
4818 auto VDI = I->getSecond().LocalVarData.find(
4819 cast<VarDecl>(cast<DeclRefExpr>(IT->getRef())->getDecl())
4821 if (VDI != I->getSecond().LocalVarData.end())
4822 return VDI->second.PrivateAddr;
4830 FunctionGlobalizedDecls.erase(CGF.
CurFn);
4839 ScheduleKind = OMPC_DIST_SCHEDULE_static;
4846 CGF, S, ScheduleKind, Chunk);
4852 const Expr *&ChunkExpr)
const {
4853 ScheduleKind = OMPC_SCHEDULE_static;
4864 " Expected target-based directive.");
4869 if (!C.capturesVariable())
4871 const VarDecl *VD = C.getCapturedVar();
4872 const auto *RD = VD->
getType()
4876 if (!RD || !RD->isLambda())
4885 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
4887 RD->getCaptureFields(Captures, ThisCapture);
4897 const VarDecl *VD = LC.getCapturedVar();
4900 auto It = Captures.find(VD);
4901 assert(It != Captures.end() &&
"Found lambda capture without field.");
4919 if (!VD || !VD->
hasAttr<OMPAllocateDeclAttr>())
4921 const auto *A = VD->
getAttr<OMPAllocateDeclAttr>();
4922 switch(A->getAllocatorType()) {
4923 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
4925 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
4926 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
4927 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
4928 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
4929 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
4932 case OMPAllocateDeclAttr::OMPConstMemAlloc:
4935 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
4938 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
4939 llvm_unreachable(
"Expected predefined allocator for the variables with the " 4949 llvm::StringMap<bool> Features;
4953 for (
const auto &Feature : Features) {
4954 if (Feature.getValue()) {
4968 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
4984 llvm::raw_svector_ostream Out(Buffer);
4986 <<
" does not support unified addressing";
4987 CGM.
Error(Clause->getBeginLoc(), Out.str());
5016 llvm_unreachable(
"Unexpected Cuda arch.");
5025 std::pair<unsigned, unsigned> Data;
5029 Data.second = CGM.
getLangOpts().OpenMPCUDABlocksPerSM;
5030 if (Data.first && Data.second)
5074 llvm_unreachable(
"Unexpected Cuda arch.");
5076 llvm_unreachable(
"Unexpected NVPTX target without ptx feature.");
5080 if (!GlobalizedRecords.empty()) {
5090 for (
const GlobalPtrSizeRecsTy &Records : GlobalizedRecords) {
5091 if (Records.Records.empty())
5094 unsigned RecAlignment = 0;
5095 for (
const RecordDecl *RD : Records.Records) {
5098 RecAlignment =
std::max(RecAlignment, Alignment);
5101 llvm::alignTo(llvm::alignTo(Size, Alignment) + RecSize, Alignment);
5103 Size = llvm::alignTo(Size, RecAlignment);
5107 const bool UseSharedMemory = Size <= SharedMemorySize;
5115 if (UseSharedMemory) {
5116 SharedStaticRD->
addDecl(Field);
5117 SharedRecs.push_back(&Records);
5119 StaticRD->addDecl(Field);
5120 GlobalRecs.push_back(&Records);
5122 Records.RecSize->setInitializer(llvm::ConstantInt::get(CGM.
SizeTy, Size));
5123 Records.UseSharedMemory->setInitializer(
5124 llvm::ConstantInt::get(CGM.
Int16Ty, UseSharedMemory ? 1 : 0));
5140 SharedStaticRD->
addDecl(Field);
5146 auto *GV =
new llvm::GlobalVariable(
5148 false, llvm::GlobalValue::CommonLinkage,
5149 llvm::Constant::getNullValue(LLVMStaticTy),
5150 "_openmp_shared_static_glob_rd_$_",
nullptr,
5151 llvm::GlobalValue::NotThreadLocal,
5153 auto *Replacement = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
5155 for (
const GlobalPtrSizeRecsTy *Rec : SharedRecs) {
5156 Rec->Buffer->replaceAllUsesWith(Replacement);
5157 Rec->Buffer->eraseFromParent();
5176 auto *GV =
new llvm::GlobalVariable(
5179 llvm::Constant::getNullValue(LLVMArr2Ty),
5180 "_openmp_static_glob_rd_$_");
5181 auto *Replacement = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
5183 for (
const GlobalPtrSizeRecsTy *Rec : GlobalRecs) {
5184 Rec->Buffer->replaceAllUsesWith(Replacement);
5185 Rec->Buffer->eraseFromParent();
5189 if (!TeamsReductions.empty()) {
5194 for (
const RecordDecl *TeamReductionRec : TeamsReductions) {
5211 auto *GV =
new llvm::GlobalVariable(
5212 CGM.
getModule(), LLVMReductionsBufferTy,
5214 llvm::Constant::getNullValue(LLVMReductionsBufferTy),
5215 "_openmp_teams_reductions_buffer_$_");
5216 KernelTeamsReductionPtr->setInitializer(
5217 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV,
const CGFunctionInfo & arrangeBuiltinFunctionDeclaration(QualType resultType, const FunctionArgList &args)
A builtin function is a freestanding function using the default C conventions.
llvm::PointerType * Int8PtrPtrTy
RecordDecl * buildImplicitRecord(StringRef Name, RecordDecl::TagKind TK=TTK_Struct) const
Create a new implicit TU-level CXXRecordDecl or RecordDecl declaration.
QualType getAddrSpaceQualType(QualType T, LangAS AddressSpace) const
Return the uniqued reference to the type for an address space qualified type with the specified type ...
Address CreateConstInBoundsGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = T* ...
const BlockDecl * getBlockDecl() const
TargetOptions & getTargetOpts() const
Retrieve the target options.
static llvm::Value * emitGlobalToListCopyFunction(CodeGenModule &CGM, ArrayRef< const Expr *> Privates, QualType ReductionArrayTy, SourceLocation Loc, const RecordDecl *TeamReductionRec, const llvm::SmallDenseMap< const ValueDecl *, const FieldDecl *> &VarFieldMap)
This function emits a helper that copies all the reduction variables from the team into the provided ...
static const Decl * getCanonicalDecl(const Decl *D)
llvm::IntegerType * IntTy
int
LValue MakeNaturalAlignPointeeAddrLValue(llvm::Value *V, QualType T)
Given a value of type T* that may not be to a complete object, construct an l-value with the natural ...
Other implicit parameter.
A class which contains all the information about a particular captured value.
if(T->getSizeExpr()) TRY_TO(TraverseStmt(T -> getSizeExpr()))
PointerType - C99 6.7.5.1 - Pointer Declarators.
A (possibly-)qualified type.
CodeGenTypes & getTypes()
llvm::Function * emitReductionFunction(SourceLocation Loc, llvm::Type *ArgsType, ArrayRef< const Expr *> Privates, ArrayRef< const Expr *> LHSExprs, ArrayRef< const Expr *> RHSExprs, ArrayRef< const Expr *> ReductionOps)
Emits reduction function.
ArrayRef< OMPClause * > clauses()
llvm::Type * ConvertTypeForMem(QualType T)
const char * CudaArchToString(CudaArch A)
static llvm::Value * getNVPTXLaneID(CodeGenFunction &CGF)
Get the id of the current lane in the Warp.
Address CreateMemTemp(QualType T, const Twine &Name="tmp", Address *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
ConstStmtVisitor - This class implements a simple visitor for Stmt subclasses.
bool HaveInsertPoint() const
HaveInsertPoint - True if an insertion point is defined.
llvm::LLVMContext & getLLVMContext()
void emitSingleReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp, const Expr *PrivateRef, const DeclRefExpr *LHS, const DeclRefExpr *RHS)
Emits single reduction combiner.
static std::pair< unsigned, unsigned > getSMsBlocksPerSM(CodeGenModule &CGM)
Get number of SMs and number of blocks per SM.
attr_iterator attr_begin() const
Stmt - This represents one statement.
void checkArchForUnifiedAddressing(const OMPRequiresDecl *D) override
Perform check on requires decl to ensure that target architecture supports unified addressing...
void adjustTargetSpecificDataForLambdas(CodeGenFunction &CGF, const OMPExecutableDirective &D) const override
Adjust some parameters for the target-based directives, like addresses of the variables captured by r...
void clearLocThreadIdInsertPt(CodeGenFunction &CGF)
static void getTeamsReductionVars(ASTContext &Ctx, const OMPExecutableDirective &D, llvm::SmallVectorImpl< const ValueDecl *> &Vars)
Get list of reduction variables from the teams ... directives.
Decl - This represents one declaration (or definition), e.g.
specific_attr_iterator - Iterates over a subrange of an AttrVec, only providing attributes that are o...
SourceLocation getBeginLoc() const
Returns starting location of directive kind.
SourceLocation getBeginLoc() const LLVM_READONLY
Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const override
Gets the address of the native argument basing on the address of the target-specific parameter...
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
llvm::Value * ScratchpadIndex
CapturedStmt * getInnermostCapturedStmt()
Get innermost captured statement for the construct.
static llvm::Value * castValueToType(CodeGenFunction &CGF, llvm::Value *Val, QualType ValTy, QualType CastTy, SourceLocation Loc)
Cast value to the specified type.
QualType getNonReferenceType() const
If Type is a reference type (e.g., const int&), returns the type that the reference refers to ("const...
llvm::Value * LoadCXXThis()
LoadCXXThis - Load the value of 'this'.
The base class of the type hierarchy.
virtual void completeDefinition()
Note that the definition of this type is now complete.
bool isZero() const
isZero - Test whether the quantity equals zero.
The l-value was an access to a declared entity or something equivalently strong, like the address of ...
static bool hasNestedSPMDDirective(ASTContext &Ctx, const OMPExecutableDirective &D)
Check for inner (nested) SPMD construct, if any.
Address EmitLoadOfPointer(Address Ptr, const PointerType *PtrTy, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
void emitIfClause(CodeGenFunction &CGF, const Expr *Cond, const RegionCodeGenTy &ThenGen, const RegionCodeGenTy &ElseGen)
Emits code for OpenMP 'if' clause using specified CodeGen function.
LValue EmitLValueForFieldInitialization(LValue Base, const FieldDecl *Field)
EmitLValueForFieldInitialization - Like EmitLValueForField, except that if the Field is a reference...
static bool hasStaticScheduling(const OMPExecutableDirective &D)
Check if the directive is loops based and has schedule clause at all or has static scheduling...
virtual void checkArchForUnifiedAddressing(const OMPRequiresDecl *D)
Perform check on requires decl to ensure that target architecture supports unified addressing...
Describes the capture of a variable or of this, or of a C++1y init-capture.
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
llvm::FunctionCallee createNVPTXRuntimeFunction(unsigned Function)
Returns specified OpenMP runtime function for the current OpenMP implementation.
QualType getElementType() const
bool capturesVariable(const VarDecl *Var) const
True if this variable has been captured.
Address GetAddrOfLocalVar(const VarDecl *VD)
GetAddrOfLocalVar - Return the address of a local variable.
CudaArch StringToCudaArch(llvm::StringRef S)
static const Stmt * getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body)
Checks if the Body is the CompoundStmt and returns its child statement iff there is only one that is ...
Represents a variable declaration or definition.
llvm::Value * getThreadID(CodeGenFunction &CGF, SourceLocation Loc)
Gets thread id value for the current thread.
LangAS getLangASFromTargetAS(unsigned TargetAS)
const ArrayType * castAsArrayTypeUnsafe() const
A variant of castAs<> for array type which silently discards qualifiers from the outermost type...
LangAS
Defines the address space values used by the address space qualifier of QualType. ...
DiagnosticsEngine & getDiags() const
OpenMPDirectiveKind ReductionKind
llvm::Value * getPointer() const
llvm::Type * ConvertTypeForMem(QualType T)
ConvertTypeForMem - Convert type T into a llvm::Type.
bool tryEmitDeclareVariant(const GlobalDecl &NewGD, const GlobalDecl &OldGD, llvm::GlobalValue *OrigAddr, bool IsForDefinition) override
Tries to emit declare variant function for OldGD from NewGD.
unsigned getAddressSpace() const
Return the address space that this address resides in.
SPMD execution mode (all threads are worker threads).
IdentifierInfo * getIdentifier() const
Get the identifier that names this declaration, if there is one.
Represents a struct/union/class.
DataSharingMode
Target codegen is specialized based on two data-sharing modes: CUDA, in which the local variables are...
clauselist_range clauselists()
virtual llvm::Function * emitParallelOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP parallel directive D.
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
llvm::Type * ConvertType(QualType T)
ConvertType - Convert type T into a llvm::Type.
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
attr_iterator attr_end() const
llvm::IntegerType * Int64Ty
The scope used to remap some variables as private in the OpenMP loop body (or other captured region e...
Represents a member of a struct/union/class.
This represents clause 'lastprivate' in the '#pragma omp ...' directives.
CharUnits getAlignment() const
llvm::IntegerType * SizeTy
const CapturedStmt * getCapturedStmt(OpenMPDirectiveKind RegionKind) const
Returns the captured statement associated with the component region within the (combined) directive...
unsigned getDefaultLocationReserved2Flags() const override
Returns additional flags that can be stored in reserved_2 field of the default location.
static llvm::Value * getMasterThreadID(CodeGenFunction &CGF)
Get the thread id of the OMP master thread.
void setLocThreadIdInsertPt(CodeGenFunction &CGF, bool AtCurrentPoint=false)
CharUnits getSizeAlign() const
void startDefinition()
Starts the definition of this tag declaration.
bool isReferenceType() const
static llvm::Value * emitListToGlobalCopyFunction(CodeGenModule &CGM, ArrayRef< const Expr *> Privates, QualType ReductionArrayTy, SourceLocation Loc, const RecordDecl *TeamReductionRec, const llvm::SmallDenseMap< const ValueDecl *, const FieldDecl *> &VarFieldMap)
This function emits a helper that copies all the reduction variables from the team into the provided ...
void functionFinished(CodeGenFunction &CGF) override
Cleans up references to the objects in finished function.
OpenMPDirectiveKind getDirectiveKind() const
__DEVICE__ int max(int __a, int __b)
SourceLocation getBeginLoc() const LLVM_READONLY
static bool hasNestedLightweightDirective(ASTContext &Ctx, const OMPExecutableDirective &D)
Check for inner (nested) lightweight runtime construct, if any.
void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value *> CapturedVars) override
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
void InitTempAlloca(Address Alloca, llvm::Value *Value)
InitTempAlloca - Provide an initial value for the given alloca which will be observable at all locati...
This is a common base class for loop directives ('omp simd', 'omp for', 'omp for simd' etc...
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
ComplexPairTy EmitLoadOfComplex(LValue src, SourceLocation loc)
EmitLoadOfComplex - Load a complex number from the specified l-value.
OpenMPDistScheduleClauseKind
OpenMP attributes for 'dist_schedule' clause.
Address CreateElementBitCast(Address Addr, llvm::Type *Ty, const llvm::Twine &Name="")
Cast the element type of the given address to a different type, preserving information like the align...
CharUnits - This is an opaque type for sizes expressed in character units.
bool isOpenMPTeamsDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a teams-kind directive.
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
void emitOpenMPDeviceFunctionRedefinition(GlobalDecl OldGD, GlobalDecl NewGD, llvm::GlobalValue *GV)
Emits the definition of OldGD function with body from NewGD.
CharUnits getAlignment() const
Return the alignment of this pointer.
llvm::PointerType * VoidPtrTy
Expr * getIterationVariable() const
virtual void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc) override
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)...
void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc) override
This function ought to emit, in the general case, a call to.
ModeFlagsTy
Enum for accesseing the reserved_2 field of the ident_t struct.
bool isCXXThisExprCaptured() const
CharUnits getDeclAlign(const Decl *D, bool ForAlignof=false) const
Return a conservative estimate of the alignment of the specified decl D.
static llvm::Value * getNVPTXWarpID(CodeGenFunction &CGF)
Get the id of the warp in the block.
Scope - A scope is a transient data structure that is used while parsing the program.
llvm::PointerType * VoidPtrPtrTy
static CGOpenMPRuntimeNVPTX::DataSharingMode getDataSharingMode(CodeGenModule &CGM)
bool isOpenMPTargetExecutionDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target code offload directive.
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
void addCompilerUsedGlobal(llvm::GlobalValue *GV)
Add a global to a list to be added to the llvm.compiler.used metadata.
This represents clause 'reduction' in the '#pragma omp ...' directives.
llvm::Value * emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, unsigned Flags=0)
Emits object of ident_t type with info for source location.
bool isOpenMPWorksharingDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a worksharing directive.
A C++ lambda expression, which produces a function object (of unspecified type) that can be invoked l...
CharUnits getPointerAlign() const
static IntegerLiteral * Create(const ASTContext &C, const llvm::APInt &V, QualType type, SourceLocation l)
Returns a new integer literal with value 'V' and type 'type'.
unsigned getDefaultFirstprivateAddressSpace() const override
Returns default address space for the constant firstprivates, constant address space by default...
bool isInitCapture(const LambdaCapture *Capture) const
Determine whether one of this lambda's captures is an init-capture.
virtual void emitProcBindClause(CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc) override
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, int proc_bind) to generat...
llvm::omp::Directive OpenMPDirectiveKind
OpenMP directives.
static llvm::Value * createRuntimeShuffleFunction(CodeGenFunction &CGF, llvm::Value *Elem, QualType ElemType, llvm::Value *Offset, SourceLocation Loc)
This function creates calls to one of two shuffle functions to copy variables between lanes in a warp...
virtual Decl * getCanonicalDecl()
Retrieves the "canonical" declaration of the given declaration.
bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS) override
Checks if the variable has associated OMPAllocateDeclAttr attribute with the predefined allocator and...
llvm::FunctionCallee CreateConvergentRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeList ExtraAttrs=llvm::AttributeList(), bool Local=false)
Create or return a runtime function declaration with the specified type and name. ...
LValue EmitLValueForField(LValue Base, const FieldDecl *Field)
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
static ImplicitParamDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, ImplicitParamKind ParamKind)
Create implicit parameter.
Unknown execution mode (orphaned directive).
std::pair< llvm::Value *, llvm::Value * > ComplexPairTy
CXXRecordDecl * getAsCXXRecordDecl() const
Retrieves the CXXRecordDecl that this type refers to, either because the type is a RecordType or beca...
ASTContext & getContext() const
Describes the capture of either a variable, or 'this', or variable-length array type.
bool isOpenMPPrivate(OpenMPClauseKind Kind)
Checks if the specified clause is one of private clauses like 'private', 'firstprivate', 'reduction' etc.
void setAddress(Address address)
static void getDistributeLastprivateVars(ASTContext &Ctx, const OMPExecutableDirective &D, llvm::SmallVectorImpl< const ValueDecl *> &Vars)
Get list of lastprivate variables from the teams distribute ...
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
llvm::Function * emitParallelOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits inlined function for the specified OpenMP parallel.
TypeSourceInfo * getTrivialTypeSourceInfo(QualType T, SourceLocation Loc=SourceLocation()) const
Allocate a TypeSourceInfo where all locations have been initialized to a given location, which defaults to the empty location.
Address CreateDefaultAlignTempAlloca(llvm::Type *Ty, const Twine &Name="tmp")
CreateDefaultAlignedTempAlloca - This creates an alloca with the default ABI alignment of the given L...
This represents '#pragma omp requires...' directive.
static TypeEvaluationKind getEvaluationKind(QualType T)
getEvaluationKind - Return the TypeEvaluationKind of QualType T.
const Stmt * getAssociatedStmt() const
Returns statement associated with the directive.
virtual bool initFeatureMap(llvm::StringMap< bool > &Features, DiagnosticsEngine &Diags, StringRef CPU, const std::vector< std::string > &FeatureVec) const
Initialize the map with the default set of target features for the CPU this should include all legal ...
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
This represents one expression.
Address getAddress(CodeGenFunction &CGF) const
Enters a new scope for capturing cleanups, all of which will be executed once the scope is exited...
Address getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD) override
Gets the OpenMP-specific address of the local variable.
Stmt * IgnoreContainers(bool IgnoreCaptured=false)
Skip no-op (attributed, compound) container stmts and skip captured stmt at the top, if IgnoreCaptured is true.
bool isOpenMPParallelDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a parallel-kind directive.
const CGFunctionInfo & arrangeNullaryFunction()
A nullary function is a freestanding function of type 'void ()'.
BlockExpr - Adaptor class for mixing a BlockDecl with expressions.
VlaSizePair getVLASize(const VariableArrayType *vla)
Returns an LLVM value that corresponds to the size, in non-variably-sized elements, of a variable length array type, plus that largest non-variably-sized element type.
void getDefaultScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const override
Choose a default value for the schedule clause.
llvm::PointerType * getType() const
Return the type of the pointer value.
CharUnits getTypeAlignInChars(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in characters.
DeclContext * getDeclContext()
static llvm::iterator_range< specific_clause_iterator< SpecificClause > > getClausesOfKind(ArrayRef< OMPClause *> Clauses)
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
static llvm::Function * emitShuffleAndReduceFunction(CodeGenModule &CGM, ArrayRef< const Expr *> Privates, QualType ReductionArrayTy, llvm::Function *ReduceFn, SourceLocation Loc)
Emit a helper that reduces data across two OpenMP threads (lanes) in the same warp.
void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value *> CapturedVars, const Expr *IfCond) override
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
This represents 'ordered' clause in the '#pragma omp ...' directive.
llvm::IntegerType * Int32Ty
LValue MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T)
QualType getRecordType(const RecordDecl *Decl) const
UnaryOperator - This represents the unary-expression's (except sizeof and alignof), the postinc/postdec operators from postfix-expression, and various extensions.
MachineConfiguration
GPU Configuration: This information can be derived from cuda registers, however, providing compile ti...
llvm::Value * EmitCastToVoidPtr(llvm::Value *value)
Emit a cast to void* in the appropriate address space.
const TargetInfo & getTarget() const
virtual void emitProcBindClause(CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc)
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, int proc_bind) to generat...
const LangOptions & getLangOpts() const
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
ASTContext & getContext() const
Non-SPMD execution mode (1 master thread, others are workers).
llvm::Value * ScratchpadWidth
virtual void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr)
Emits a critical region.
VarDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
GlobalDecl - represents a global declaration.
bool hasClausesOfKind() const
Returns true if the current directive has one or more clauses of a specific kind. ...
std::string CPU
If given, the name of the target CPU to generate code for.
The l-value was considered opaque, so the alignment was determined from a type.
Address CreateConstGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = T* ...
Address CreateBitCast(Address Addr, llvm::Type *Ty, const llvm::Twine &Name="")
This captures a statement into a function.
QualType getCanonicalType() const
static unsigned getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind)
Returns default flags for the barriers depending on the directive, for which this barier is going to ...
Encodes a location in the source.
static llvm::Value * getThreadLimit(CodeGenFunction &CGF, bool IsInSPMDExecutionMode=false)
Get the value of the thread_limit clause in the teams directive.
llvm::Type * getIdentTyPointerTy()
Returns pointer to ident_t type.
QualType getUIntPtrType() const
Return a type compatible with "uintptr_t" (C99 7.18.1.4), as defined by the target.
Expr * getSubExpr() const
bool isVariablyModifiedType() const
Whether this type is a variably-modified type (C99 6.7.5).
void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr) override
Emits a critical region.
This is a basic class for representing single OpenMP executable directive.
static bool Ret(InterpState &S, CodePtr &PC, APValue &Result)
CastKind getCastKind() const
This represents 'schedule' clause in the '#pragma omp ...' directive.
llvm::Value * getPointer(CodeGenFunction &CGF) const
llvm::IntegerType * Int16Ty
const Decl * getDecl() const
DeclStmt - Adaptor class for mixing declarations with statements and expressions. ...
Address CreateConstArrayGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = [n x T]* ...
static llvm::Value * emitGlobalToListReduceFunction(CodeGenModule &CGM, ArrayRef< const Expr *> Privates, QualType ReductionArrayTy, SourceLocation Loc, const RecordDecl *TeamReductionRec, const llvm::SmallDenseMap< const ValueDecl *, const FieldDecl *> &VarFieldMap, llvm::Function *ReduceFn)
This function emits a helper that reduces all the reduction variables from the team into the provided...
This file defines OpenMP nodes for declarative directives.
std::vector< std::string > Features
The list of target specific features to enable or disable – this should be a list of strings startin...
llvm::Function * emitTeamsOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits inlined function for the specified OpenMP teams.
This is a basic class for representing single OpenMP clause.
void emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, ArrayRef< llvm::Value *> Args=llvm::None) const override
Emits call of the outlined function with the provided arguments, translating these arguments to corre...
static llvm::Value * emitListToGlobalReduceFunction(CodeGenModule &CGM, ArrayRef< const Expr *> Privates, QualType ReductionArrayTy, SourceLocation Loc, const RecordDecl *TeamReductionRec, const llvm::SmallDenseMap< const ValueDecl *, const FieldDecl *> &VarFieldMap, llvm::Function *ReduceFn)
This function emits a helper that reduces all the reduction variables from the team into the provided...
bool isOpenMPLoopBoundSharingDirective(OpenMPDirectiveKind Kind)
Checks if the specified directive kind is one of the composite or combined directives that need loop ...
static void setPropertyExecutionMode(CodeGenModule &CGM, StringRef Name, bool Mode)
void StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, const CGFunctionInfo &FnInfo, const FunctionArgList &Args, SourceLocation Loc=SourceLocation(), SourceLocation StartLoc=SourceLocation())
Emit code for the start of a function.
ImplicitCastExpr - Allows us to explicitly represent implicit type conversions, which have no direct ...
Stmt * getCapturedStmt()
Retrieve the statement being captured.
bool isLValue() const
isLValue - True if this expression is an "l-value" according to the rules of the current language...
void Error(SourceLocation loc, StringRef error)
Emit a general error that something can't be done.
virtual void functionFinished(CodeGenFunction &CGF)
Cleans up references to the objects in finished function.
const VarDecl * translateParameter(const FieldDecl *FD, const VarDecl *NativeParam) const override
Translates the native parameter of outlined function if this is required for target.
virtual void emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, ArrayRef< llvm::Value *> Args=llvm::None) const
Emits call of the outlined function with the provided arguments, translating these arguments to corre...
void FinishFunction(SourceLocation EndLoc=SourceLocation())
FinishFunction - Complete IR generation of the current function.
FunctionArgList - Type for representing both the decl and type of parameters to a function...
static CudaArch getCudaArch(CodeGenModule &CGM)
void setAction(PrePostActionTy &Action) const
CGFunctionInfo - Class to encapsulate the information about a function definition.
This class organizes the cross-function state that is used while generating LLVM code.
CGOpenMPRuntime & getOpenMPRuntime()
Return a reference to the configured OpenMP runtime.
static ParmVarDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, StorageClass S, Expr *DefArg)
Dataflow Directional Tag Classes.
Class provides a way to call simple version of codegen for OpenMP region, or an advanced with possibl...
LValue EmitLoadOfReferenceLValue(LValue RefLVal)
A qualifier set is used to build a set of qualifiers.
DeclContext - This is used only as base class of specific decl types that can act as declaration cont...
ArrayRef< Capture > captures() const
A basic class for pre|post-action for advanced codegen sequence for OpenMP region.
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
static void emitReductionListCopy(CopyAction Action, CodeGenFunction &CGF, QualType ReductionArrayTy, ArrayRef< const Expr *> Privates, Address SrcBase, Address DestBase, CopyOptionsTy CopyOptions={nullptr, nullptr, nullptr})
Emit instructions to copy a Reduce list, which contains partially aggregated values, in the specified direction.
const Type * strip(QualType type)
Collect any qualifiers on the given type and return an unqualified type.
llvm::FunctionCallee CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeList ExtraAttrs=llvm::AttributeList(), bool Local=false, bool AssumeConvergent=false)
Create or return a runtime function declaration with the specified type and name. ...
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
bool isInitCapture() const
Whether this variable is the implicit variable for a lambda init-capture.
llvm::Module & getModule() const
QualType apply(const ASTContext &Context, QualType QT) const
Apply the collected qualifiers to the given type.
LValue MakeAddrLValue(Address Addr, QualType T, AlignmentSource Source=AlignmentSource::Type)
virtual bool hasFeature(StringRef Feature) const
Determine whether the given target has the given feature.
Expr * IgnoreParenImpCasts() LLVM_READONLY
Skip past any parentheses and implicit casts which might surround this expression until reaching a fi...
virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc)
Emits address of the word in a memory where current thread id is stored.
void getOpenMPCaptureRegions(llvm::SmallVectorImpl< OpenMPDirectiveKind > &CaptureRegions, OpenMPDirectiveKind DKind)
Return the captured regions of an OpenMP directive.
bool isOpenMPDistributeDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a distribute directive.
virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr *> Privates, ArrayRef< const Expr *> LHSExprs, ArrayRef< const Expr *> RHSExprs, ArrayRef< const Expr *> ReductionOps, ReductionOptionsTy Options) override
Emit a code for reduction clause.
This file defines OpenMP AST classes for executable directives and clauses.
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
void EmitStoreOfComplex(ComplexPairTy V, LValue dest, bool isInit)
EmitStoreOfComplex - Store a complex number into the specified l-value.
llvm::Type * getElementType() const
Return the type of the values stored in this address.
llvm::PointerType * Int8PtrTy
OpenMPScheduleClauseKind
OpenMP attributes for 'schedule' clause.
void SetInternalFunctionAttributes(GlobalDecl GD, llvm::Function *F, const CGFunctionInfo &FI)
Set the attributes on the LLVM function for the given decl and function info.
static bool supportsSPMDExecutionMode(ASTContext &Ctx, const OMPExecutableDirective &D)
QualType getConstantArrayType(QualType EltTy, const llvm::APInt &ArySize, const Expr *SizeExpr, ArrayType::ArraySizeModifier ASM, unsigned IndexTypeQuals) const
Return the unique reference to the type for a constant array of the specified element type...
Internal linkage, which indicates that the entity can be referred to from within the translation unit...
void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)
EmitBlock - Emit the given block.
void addDecl(Decl *D)
Add the declaration D into this context.
bool hasAssociatedStmt() const
Returns true if directive has associated statement.
ExecutionMode
Defines the execution mode.
void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) override
Emits OpenMP-specific function prolog.
bool isLValueReferenceType() const
static std::pair< ValueDecl *, bool > getPrivateItem(Sema &S, Expr *&RefExpr, SourceLocation &ELoc, SourceRange &ERange, bool AllowArraySection=false)
static void shuffleAndStore(CodeGenFunction &CGF, Address SrcAddr, Address DestAddr, QualType ElemType, llvm::Value *Offset, SourceLocation Loc)
CapturedDecl * getCapturedDecl()
Retrieve the outlined function declaration.
Generic data-sharing mode.
int64_t toBits(CharUnits CharSize) const
Convert a size in characters to a size in bits.
virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr *> Privates, ArrayRef< const Expr *> LHSExprs, ArrayRef< const Expr *> RHSExprs, ArrayRef< const Expr *> ReductionOps, ReductionOptionsTy Options)
Emit a code for reduction clause.
bool hasSignedIntegerRepresentation() const
Determine whether this type has an signed integer representation of some sort, e.g., it is an signed integer type or a vector.
void EmitBranch(llvm::BasicBlock *Block)
EmitBranch - Emit a branch to the specified basic block from the current insert block, taking care to avoid creation of branches from dummy blocks.
Privates[]
Gets the list of initial values for linear variables.
virtual llvm::Function * emitTeamsOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP teams directive D.
LValue EmitLValue(const Expr *E)
EmitLValue - Emit code to compute a designator that specifies the location of the expression...
QualType getPointerType(QualType T) const
Return the uniqued reference to the type for a pointer to the specified type.
capture_range captures() const
Retrieve this lambda's captures.
CapturedRegionKind getKind() const
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
void getDefaultDistScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPDistScheduleClauseKind &ScheduleKind, llvm::Value *&Chunk) const override
Choose a default value for the dist_schedule clause.
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
static llvm::Value * getNVPTXThreadID(CodeGenFunction &CGF)
Get the id of the current thread on the GPU.
CGCapturedStmtInfo * CapturedStmtInfo
llvm::Value * EmitScalarConversion(llvm::Value *Src, QualType SrcTy, QualType DstTy, SourceLocation Loc)
Emit a conversion from the specified type to the specified destination type, both of which are LLVM s...
const VariableArrayType * getAsVariableArrayType(QualType T) const
static llvm::Value * getNVPTXWarpSize(CodeGenFunction &CGF)
Get the GPU warp size.
llvm::Value * RemoteLaneOffset
void EmitAggregateCopy(LValue Dest, LValue Src, QualType EltTy, AggValueSlot::Overlap_t MayOverlap, bool isVolatile=false)
EmitAggregateCopy - Emit an aggregate copy.
A reference to a declared variable, function, enum, etc.
CGOpenMPRuntimeNVPTX(CodeGenModule &CGM)
void addAddressSpace(LangAS space)
static llvm::Value * emitInterWarpCopyFunction(CodeGenModule &CGM, ArrayRef< const Expr *> Privates, QualType ReductionArrayTy, SourceLocation Loc)
This function emits a helper that gathers Reduce lists from the first lane of every active warp to la...
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
static ApplyDebugLocation CreateEmpty(CodeGenFunction &CGF)
Set the IRBuilder to not attach debug locations.
bool isOpenMPLoopDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a directive with an associated loop construct.
LValue - This represents an lvalue references.
Information for lazily generating a cleanup.
virtual void getDefaultDistScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPDistScheduleClauseKind &ScheduleKind, llvm::Value *&Chunk) const
Choose default schedule type and chunk value for the dist_schedule clause.
void setAccess(AccessSpecifier AS)
bool isConstant(const ASTContext &Ctx) const
unsigned getTargetAddressSpace(QualType T) const
llvm::CallInst * EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false) override
Emit an implicit/explicit barrier for OpenMP threads.
static FieldDecl * Create(const ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, Expr *BW, bool Mutable, InClassInitStyle InitStyle)
Address CreatePointerBitCastOrAddrSpaceCast(Address Addr, llvm::Type *Ty, const llvm::Twine &Name="")
const LangOptions & getLangOpts() const
static llvm::Value * getNVPTXNumThreads(CodeGenFunction &CGF)
Get the maximum number of threads in a block of the GPU.
virtual void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc)
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)...
Attr - This represents one attribute.
SourceLocation getLocation() const
QualType getIntTypeForBitwidth(unsigned DestWidth, unsigned Signed) const
getIntTypeForBitwidth - sets integer QualTy according to specified details: bitwidth, signed/unsigned.
Expr * IgnoreParens() LLVM_READONLY
Skip past any parentheses which might surround this expression until reaching a fixed point...
static OMPLinearClause * Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc, OpenMPLinearClauseKind Modifier, SourceLocation ModifierLoc, SourceLocation ColonLoc, SourceLocation EndLoc, ArrayRef< Expr *> VL, ArrayRef< Expr *> PL, ArrayRef< Expr *> IL, Expr *Step, Expr *CalcStep, Stmt *PreInit, Expr *PostUpdate)
Creates clause with a list of variables VL and a linear step Step.
CanQualType getSizeType() const
Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.
llvm::FunctionType * GetFunctionType(const CGFunctionInfo &Info)
GetFunctionType - Get the LLVM function type for.
static bool supportsLightweightRuntime(ASTContext &Ctx, const OMPExecutableDirective &D)
Checks if the construct supports lightweight runtime.