clang  8.0.0
CGOpenMPRuntime.cpp
Go to the documentation of this file.
1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This provides a class for OpenMP runtime code generation.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGOpenMPRuntime.h"
17 #include "CGRecordLayout.h"
18 #include "CodeGenFunction.h"
20 #include "clang/AST/Decl.h"
21 #include "clang/AST/StmtOpenMP.h"
23 #include "llvm/ADT/ArrayRef.h"
24 #include "llvm/Bitcode/BitcodeReader.h"
25 #include "llvm/IR/CallSite.h"
26 #include "llvm/IR/DerivedTypes.h"
27 #include "llvm/IR/GlobalValue.h"
28 #include "llvm/IR/Value.h"
29 #include "llvm/Support/Format.h"
30 #include "llvm/Support/raw_ostream.h"
31 #include <cassert>
32 
33 using namespace clang;
34 using namespace CodeGen;
35 
36 namespace {
37 /// Base class for handling code generation inside OpenMP regions.
38 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
39 public:
40  /// Kinds of OpenMP regions used in codegen.
41  enum CGOpenMPRegionKind {
42  /// Region with outlined function for standalone 'parallel'
43  /// directive.
44  ParallelOutlinedRegion,
45  /// Region with outlined function for standalone 'task' directive.
46  TaskOutlinedRegion,
47  /// Region for constructs that do not require function outlining,
48  /// like 'for', 'sections', 'atomic' etc. directives.
49  InlinedRegion,
50  /// Region with outlined function for standalone 'target' directive.
51  TargetRegion,
52  };
53 
54  CGOpenMPRegionInfo(const CapturedStmt &CS,
55  const CGOpenMPRegionKind RegionKind,
57  bool HasCancel)
58  : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
59  CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
60 
61  CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
63  bool HasCancel)
64  : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
65  Kind(Kind), HasCancel(HasCancel) {}
66 
67  /// Get a variable or parameter for storing global thread id
68  /// inside OpenMP construct.
69  virtual const VarDecl *getThreadIDVariable() const = 0;
70 
71  /// Emit the captured statement body.
72  void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
73 
74  /// Get an LValue for the current ThreadID variable.
75  /// \return LValue for thread id variable. This LValue always has type int32*.
76  virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
77 
78  virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
79 
80  CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
81 
82  OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
83 
84  bool hasCancel() const { return HasCancel; }
85 
86  static bool classof(const CGCapturedStmtInfo *Info) {
87  return Info->getKind() == CR_OpenMP;
88  }
89 
90  ~CGOpenMPRegionInfo() override = default;
91 
92 protected:
93  CGOpenMPRegionKind RegionKind;
94  RegionCodeGenTy CodeGen;
96  bool HasCancel;
97 };
98 
99 /// API for captured statement code generation in OpenMP constructs.
100 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
101 public:
102  CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
103  const RegionCodeGenTy &CodeGen,
104  OpenMPDirectiveKind Kind, bool HasCancel,
105  StringRef HelperName)
106  : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
107  HasCancel),
108  ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
109  assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
110  }
111 
112  /// Get a variable or parameter for storing global thread id
113  /// inside OpenMP construct.
114  const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
115 
116  /// Get the name of the capture helper.
117  StringRef getHelperName() const override { return HelperName; }
118 
119  static bool classof(const CGCapturedStmtInfo *Info) {
120  return CGOpenMPRegionInfo::classof(Info) &&
121  cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
122  ParallelOutlinedRegion;
123  }
124 
125 private:
126  /// A variable or parameter storing global thread id for OpenMP
127  /// constructs.
128  const VarDecl *ThreadIDVar;
129  StringRef HelperName;
130 };
131 
132 /// API for captured statement code generation in OpenMP constructs.
133 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
134 public:
135  class UntiedTaskActionTy final : public PrePostActionTy {
136  bool Untied;
137  const VarDecl *PartIDVar;
138  const RegionCodeGenTy UntiedCodeGen;
139  llvm::SwitchInst *UntiedSwitch = nullptr;
140 
141  public:
142  UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
143  const RegionCodeGenTy &UntiedCodeGen)
144  : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
145  void Enter(CodeGenFunction &CGF) override {
146  if (Untied) {
147  // Emit task switching point.
148  LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
149  CGF.GetAddrOfLocalVar(PartIDVar),
150  PartIDVar->getType()->castAs<PointerType>());
151  llvm::Value *Res =
152  CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
153  llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
154  UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
155  CGF.EmitBlock(DoneBB);
157  CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
158  UntiedSwitch->addCase(CGF.Builder.getInt32(0),
159  CGF.Builder.GetInsertBlock());
160  emitUntiedSwitch(CGF);
161  }
162  }
163  void emitUntiedSwitch(CodeGenFunction &CGF) const {
164  if (Untied) {
165  LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
166  CGF.GetAddrOfLocalVar(PartIDVar),
167  PartIDVar->getType()->castAs<PointerType>());
168  CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
169  PartIdLVal);
170  UntiedCodeGen(CGF);
171  CodeGenFunction::JumpDest CurPoint =
172  CGF.getJumpDestInCurrentScope(".untied.next.");
174  CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
175  UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
176  CGF.Builder.GetInsertBlock());
177  CGF.EmitBranchThroughCleanup(CurPoint);
178  CGF.EmitBlock(CurPoint.getBlock());
179  }
180  }
181  unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
182  };
183  CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
184  const VarDecl *ThreadIDVar,
185  const RegionCodeGenTy &CodeGen,
186  OpenMPDirectiveKind Kind, bool HasCancel,
187  const UntiedTaskActionTy &Action)
188  : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
189  ThreadIDVar(ThreadIDVar), Action(Action) {
190  assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
191  }
192 
193  /// Get a variable or parameter for storing global thread id
194  /// inside OpenMP construct.
195  const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
196 
197  /// Get an LValue for the current ThreadID variable.
198  LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
199 
200  /// Get the name of the capture helper.
201  StringRef getHelperName() const override { return ".omp_outlined."; }
202 
203  void emitUntiedSwitch(CodeGenFunction &CGF) override {
204  Action.emitUntiedSwitch(CGF);
205  }
206 
207  static bool classof(const CGCapturedStmtInfo *Info) {
208  return CGOpenMPRegionInfo::classof(Info) &&
209  cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
210  TaskOutlinedRegion;
211  }
212 
213 private:
214  /// A variable or parameter storing global thread id for OpenMP
215  /// constructs.
216  const VarDecl *ThreadIDVar;
217  /// Action for emitting code for untied tasks.
218  const UntiedTaskActionTy &Action;
219 };
220 
221 /// API for inlined captured statement code generation in OpenMP
222 /// constructs.
223 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
224 public:
225  CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
226  const RegionCodeGenTy &CodeGen,
227  OpenMPDirectiveKind Kind, bool HasCancel)
228  : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
229  OldCSI(OldCSI),
230  OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
231 
232  // Retrieve the value of the context parameter.
233  llvm::Value *getContextValue() const override {
234  if (OuterRegionInfo)
235  return OuterRegionInfo->getContextValue();
236  llvm_unreachable("No context value for inlined OpenMP region");
237  }
238 
239  void setContextValue(llvm::Value *V) override {
240  if (OuterRegionInfo) {
241  OuterRegionInfo->setContextValue(V);
242  return;
243  }
244  llvm_unreachable("No context value for inlined OpenMP region");
245  }
246 
247  /// Lookup the captured field decl for a variable.
248  const FieldDecl *lookup(const VarDecl *VD) const override {
249  if (OuterRegionInfo)
250  return OuterRegionInfo->lookup(VD);
251  // If there is no outer outlined region,no need to lookup in a list of
252  // captured variables, we can use the original one.
253  return nullptr;
254  }
255 
256  FieldDecl *getThisFieldDecl() const override {
257  if (OuterRegionInfo)
258  return OuterRegionInfo->getThisFieldDecl();
259  return nullptr;
260  }
261 
262  /// Get a variable or parameter for storing global thread id
263  /// inside OpenMP construct.
264  const VarDecl *getThreadIDVariable() const override {
265  if (OuterRegionInfo)
266  return OuterRegionInfo->getThreadIDVariable();
267  return nullptr;
268  }
269 
270  /// Get an LValue for the current ThreadID variable.
271  LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
272  if (OuterRegionInfo)
273  return OuterRegionInfo->getThreadIDVariableLValue(CGF);
274  llvm_unreachable("No LValue for inlined OpenMP construct");
275  }
276 
277  /// Get the name of the capture helper.
278  StringRef getHelperName() const override {
279  if (auto *OuterRegionInfo = getOldCSI())
280  return OuterRegionInfo->getHelperName();
281  llvm_unreachable("No helper name for inlined OpenMP construct");
282  }
283 
284  void emitUntiedSwitch(CodeGenFunction &CGF) override {
285  if (OuterRegionInfo)
286  OuterRegionInfo->emitUntiedSwitch(CGF);
287  }
288 
289  CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
290 
291  static bool classof(const CGCapturedStmtInfo *Info) {
292  return CGOpenMPRegionInfo::classof(Info) &&
293  cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
294  }
295 
296  ~CGOpenMPInlinedRegionInfo() override = default;
297 
298 private:
299  /// CodeGen info about outer OpenMP region.
301  CGOpenMPRegionInfo *OuterRegionInfo;
302 };
303 
304 /// API for captured statement code generation in OpenMP target
305 /// constructs. For this captures, implicit parameters are used instead of the
306 /// captured fields. The name of the target region has to be unique in a given
307 /// application so it is provided by the client, because only the client has
308 /// the information to generate that.
309 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
310 public:
311  CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
312  const RegionCodeGenTy &CodeGen, StringRef HelperName)
313  : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
314  /*HasCancel=*/false),
315  HelperName(HelperName) {}
316 
317  /// This is unused for target regions because each starts executing
318  /// with a single thread.
319  const VarDecl *getThreadIDVariable() const override { return nullptr; }
320 
321  /// Get the name of the capture helper.
322  StringRef getHelperName() const override { return HelperName; }
323 
324  static bool classof(const CGCapturedStmtInfo *Info) {
325  return CGOpenMPRegionInfo::classof(Info) &&
326  cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
327  }
328 
329 private:
330  StringRef HelperName;
331 };
332 
333 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
334  llvm_unreachable("No codegen for expressions");
335 }
336 /// API for generation of expressions captured in a innermost OpenMP
337 /// region.
338 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
339 public:
340  CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
341  : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
342  OMPD_unknown,
343  /*HasCancel=*/false),
344  PrivScope(CGF) {
345  // Make sure the globals captured in the provided statement are local by
346  // using the privatization logic. We assume the same variable is not
347  // captured more than once.
348  for (const auto &C : CS.captures()) {
349  if (!C.capturesVariable() && !C.capturesVariableByCopy())
350  continue;
351 
352  const VarDecl *VD = C.getCapturedVar();
353  if (VD->isLocalVarDeclOrParm())
354  continue;
355 
356  DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
357  /*RefersToEnclosingVariableOrCapture=*/false,
359  C.getLocation());
360  PrivScope.addPrivate(
361  VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(); });
362  }
363  (void)PrivScope.Privatize();
364  }
365 
366  /// Lookup the captured field decl for a variable.
367  const FieldDecl *lookup(const VarDecl *VD) const override {
368  if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
369  return FD;
370  return nullptr;
371  }
372 
373  /// Emit the captured statement body.
374  void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
375  llvm_unreachable("No body for expressions");
376  }
377 
378  /// Get a variable or parameter for storing global thread id
379  /// inside OpenMP construct.
380  const VarDecl *getThreadIDVariable() const override {
381  llvm_unreachable("No thread id for expressions");
382  }
383 
384  /// Get the name of the capture helper.
385  StringRef getHelperName() const override {
386  llvm_unreachable("No helper name for expressions");
387  }
388 
389  static bool classof(const CGCapturedStmtInfo *Info) { return false; }
390 
391 private:
392  /// Private scope to capture global variables.
394 };
395 
396 /// RAII for emitting code of OpenMP constructs.
397 class InlinedOpenMPRegionRAII {
398  CodeGenFunction &CGF;
399  llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
400  FieldDecl *LambdaThisCaptureField = nullptr;
401  const CodeGen::CGBlockInfo *BlockInfo = nullptr;
402 
403 public:
404  /// Constructs region for combined constructs.
405  /// \param CodeGen Code generation sequence for combined directives. Includes
406  /// a list of functions used for code generation of implicitly inlined
407  /// regions.
408  InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
409  OpenMPDirectiveKind Kind, bool HasCancel)
410  : CGF(CGF) {
411  // Start emission for the construct.
412  CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
413  CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
414  std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
415  LambdaThisCaptureField = CGF.LambdaThisCaptureField;
416  CGF.LambdaThisCaptureField = nullptr;
417  BlockInfo = CGF.BlockInfo;
418  CGF.BlockInfo = nullptr;
419  }
420 
421  ~InlinedOpenMPRegionRAII() {
422  // Restore original CapturedStmtInfo only if we're done with code emission.
423  auto *OldCSI =
424  cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
425  delete CGF.CapturedStmtInfo;
426  CGF.CapturedStmtInfo = OldCSI;
427  std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
428  CGF.LambdaThisCaptureField = LambdaThisCaptureField;
429  CGF.BlockInfo = BlockInfo;
430  }
431 };
432 
433 /// Values for bit flags used in the ident_t to describe the fields.
434 /// All enumeric elements are named and described in accordance with the code
435 /// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
436 enum OpenMPLocationFlags : unsigned {
437  /// Use trampoline for internal microtask.
438  OMP_IDENT_IMD = 0x01,
439  /// Use c-style ident structure.
440  OMP_IDENT_KMPC = 0x02,
441  /// Atomic reduction option for kmpc_reduce.
442  OMP_ATOMIC_REDUCE = 0x10,
443  /// Explicit 'barrier' directive.
444  OMP_IDENT_BARRIER_EXPL = 0x20,
445  /// Implicit barrier in code.
446  OMP_IDENT_BARRIER_IMPL = 0x40,
447  /// Implicit barrier in 'for' directive.
448  OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
449  /// Implicit barrier in 'sections' directive.
450  OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
451  /// Implicit barrier in 'single' directive.
452  OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
453  /// Call of __kmp_for_static_init for static loop.
454  OMP_IDENT_WORK_LOOP = 0x200,
455  /// Call of __kmp_for_static_init for sections.
456  OMP_IDENT_WORK_SECTIONS = 0x400,
457  /// Call of __kmp_for_static_init for distribute.
458  OMP_IDENT_WORK_DISTRIBUTE = 0x800,
459  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
460 };
461 
462 /// Describes ident structure that describes a source location.
463 /// All descriptions are taken from
464 /// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
465 /// Original structure:
466 /// typedef struct ident {
467 /// kmp_int32 reserved_1; /**< might be used in Fortran;
468 /// see above */
469 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
470 /// KMP_IDENT_KMPC identifies this union
471 /// member */
472 /// kmp_int32 reserved_2; /**< not really used in Fortran any more;
473 /// see above */
474 ///#if USE_ITT_BUILD
475 /// /* but currently used for storing
476 /// region-specific ITT */
477 /// /* contextual information. */
478 ///#endif /* USE_ITT_BUILD */
479 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
480 /// C++ */
481 /// char const *psource; /**< String describing the source location.
482 /// The string is composed of semi-colon separated
483 // fields which describe the source file,
484 /// the function and a pair of line numbers that
485 /// delimit the construct.
486 /// */
487 /// } ident_t;
489  /// might be used in Fortran
491  /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
493  /// Not really used in Fortran any more
495  /// Source[4] in Fortran, do not use for C++
497  /// String describing the source location. The string is composed of
498  /// semi-colon separated fields which describe the source file, the function
499  /// and a pair of line numbers that delimit the construct.
501 };
502 
503 /// Schedule types for 'omp for' loops (these enumerators are taken from
504 /// the enum sched_type in kmp.h).
506  /// Lower bound for default (unordered) versions.
514  /// static with chunk adjustment (e.g., simd)
516  /// Lower bound for 'ordered' versions.
525  /// dist_schedule types
528  /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
529  /// Set if the monotonic schedule modifier was present.
531  /// Set if the nonmonotonic schedule modifier was present.
533 };
534 
536  /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
537  /// kmpc_micro microtask, ...);
539  /// Call to void *__kmpc_threadprivate_cached(ident_t *loc,
540  /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
542  /// Call to void __kmpc_threadprivate_register( ident_t *,
543  /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
545  // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
547  // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
548  // kmp_critical_name *crit);
550  // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
551  // global_tid, kmp_critical_name *crit, uintptr_t hint);
553  // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
554  // kmp_critical_name *crit);
556  // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
557  // global_tid);
559  // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
561  // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
563  // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
564  // global_tid);
566  // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
567  // global_tid);
569  // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
570  // kmp_int32 num_threads);
572  // Call to void __kmpc_flush(ident_t *loc);
574  // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
576  // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
578  // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
579  // int end_part);
581  // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
583  // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
585  // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
586  // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
587  // kmp_routine_entry_t *task_entry);
589  // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
590  // new_task);
592  // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
593  // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
594  // kmp_int32 didit);
596  // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
597  // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
598  // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
600  // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
601  // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
602  // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
603  // *lck);
605  // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
606  // kmp_critical_name *lck);
608  // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
609  // kmp_critical_name *lck);
611  // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
612  // kmp_task_t * new_task);
614  // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
615  // kmp_task_t * new_task);
617  // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
619  // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
621  // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
622  // global_tid);
624  // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
626  // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
628  // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
629  // int proc_bind);
631  // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
632  // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
633  // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
635  // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
636  // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
637  // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
639  // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
640  // global_tid, kmp_int32 cncl_kind);
642  // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
643  // kmp_int32 cncl_kind);
645  // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
646  // kmp_int32 num_teams, kmp_int32 thread_limit);
648  // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
649  // microtask, ...);
651  // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
652  // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
653  // sched, kmp_uint64 grainsize, void *task_dup);
655  // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
656  // num_dims, struct kmp_dim *dims);
658  // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
660  // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
661  // *vec);
663  // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
664  // *vec);
666  // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void
667  // *data);
669  // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
670  // *d);
672 
673  //
674  // Offloading related calls
675  //
676  // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
677  // size);
679  // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
680  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
681  // *arg_types);
683  // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
684  // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
685  // *arg_types);
687  // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
688  // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
689  // *arg_types, int32_t num_teams, int32_t thread_limit);
691  // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void
692  // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t
693  // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
695  // Call to void __tgt_register_lib(__tgt_bin_desc *desc);
697  // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
699  // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
700  // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
702  // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
703  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
704  // *arg_types);
706  // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
707  // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
709  // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t
710  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
711  // *arg_types);
713  // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
714  // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
716  // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t
717  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
718  // *arg_types);
720 };
721 
722 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
723 /// region.
724 class CleanupTy final : public EHScopeStack::Cleanup {
725  PrePostActionTy *Action;
726 
727 public:
728  explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
729  void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
730  if (!CGF.HaveInsertPoint())
731  return;
732  Action->Exit(CGF);
733  }
734 };
735 
736 } // anonymous namespace
737 
740  if (PrePostAction) {
741  CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
742  Callback(CodeGen, CGF, *PrePostAction);
743  } else {
744  PrePostActionTy Action;
745  Callback(CodeGen, CGF, Action);
746  }
747 }
748 
749 /// Check if the combiner is a call to UDR combiner and if it is so return the
750 /// UDR decl used for reduction.
751 static const OMPDeclareReductionDecl *
752 getReductionInit(const Expr *ReductionOp) {
753  if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
754  if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
755  if (const auto *DRE =
756  dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
757  if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
758  return DRD;
759  return nullptr;
760 }
761 
763  const OMPDeclareReductionDecl *DRD,
764  const Expr *InitOp,
765  Address Private, Address Original,
766  QualType Ty) {
767  if (DRD->getInitializer()) {
768  std::pair<llvm::Function *, llvm::Function *> Reduction =
769  CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
770  const auto *CE = cast<CallExpr>(InitOp);
771  const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
772  const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
773  const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
774  const auto *LHSDRE =
775  cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
776  const auto *RHSDRE =
777  cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
778  CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
779  PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
780  [=]() { return Private; });
781  PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
782  [=]() { return Original; });
783  (void)PrivateScope.Privatize();
784  RValue Func = RValue::get(Reduction.second);
785  CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
786  CGF.EmitIgnoredExpr(InitOp);
787  } else {
788  llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
789  std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
790  auto *GV = new llvm::GlobalVariable(
791  CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
792  llvm::GlobalValue::PrivateLinkage, Init, Name);
793  LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
794  RValue InitRVal;
795  switch (CGF.getEvaluationKind(Ty)) {
796  case TEK_Scalar:
797  InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
798  break;
799  case TEK_Complex:
800  InitRVal =
802  break;
803  case TEK_Aggregate:
804  InitRVal = RValue::getAggregate(LV.getAddress());
805  break;
806  }
807  OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
808  CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
809  CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
810  /*IsInitializer=*/false);
811  }
812 }
813 
814 /// Emit initialization of arrays of complex types.
815 /// \param DestAddr Address of the array.
816 /// \param Type Type of array.
817 /// \param Init Initial expression of array.
818 /// \param SrcAddr Address of the original array.
819 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
820  QualType Type, bool EmitDeclareReductionInit,
821  const Expr *Init,
822  const OMPDeclareReductionDecl *DRD,
823  Address SrcAddr = Address::invalid()) {
824  // Perform element-by-element initialization.
825  QualType ElementTy;
826 
827  // Drill down to the base element type on both arrays.
828  const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
829  llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
830  DestAddr =
831  CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
832  if (DRD)
833  SrcAddr =
834  CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
835 
836  llvm::Value *SrcBegin = nullptr;
837  if (DRD)
838  SrcBegin = SrcAddr.getPointer();
839  llvm::Value *DestBegin = DestAddr.getPointer();
840  // Cast from pointer to array type to pointer to single element.
841  llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
842  // The basic structure here is a while-do loop.
843  llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
844  llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
845  llvm::Value *IsEmpty =
846  CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
847  CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
848 
849  // Enter the loop body, making that address the current address.
850  llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
851  CGF.EmitBlock(BodyBB);
852 
853  CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
854 
855  llvm::PHINode *SrcElementPHI = nullptr;
856  Address SrcElementCurrent = Address::invalid();
857  if (DRD) {
858  SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
859  "omp.arraycpy.srcElementPast");
860  SrcElementPHI->addIncoming(SrcBegin, EntryBB);
861  SrcElementCurrent =
862  Address(SrcElementPHI,
863  SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
864  }
865  llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
866  DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
867  DestElementPHI->addIncoming(DestBegin, EntryBB);
868  Address DestElementCurrent =
869  Address(DestElementPHI,
870  DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
871 
872  // Emit copy.
873  {
874  CodeGenFunction::RunCleanupsScope InitScope(CGF);
875  if (EmitDeclareReductionInit) {
876  emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
877  SrcElementCurrent, ElementTy);
878  } else
879  CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
880  /*IsInitializer=*/false);
881  }
882 
883  if (DRD) {
884  // Shift the address forward by one element.
885  llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
886  SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
887  SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
888  }
889 
890  // Shift the address forward by one element.
891  llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
892  DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
893  // Check whether we've reached the end.
894  llvm::Value *Done =
895  CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
896  CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
897  DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
898 
899  // Done.
900  CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
901 }
902 
903 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
904  return CGF.EmitOMPSharedLValue(E);
905 }
906 
907 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
908  const Expr *E) {
909  if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
910  return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
911  return LValue();
912 }
913 
914 void ReductionCodeGen::emitAggregateInitialization(
915  CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
916  const OMPDeclareReductionDecl *DRD) {
917  // Emit VarDecl with copy init for arrays.
918  // Get the address of the original variable captured in current
919  // captured region.
920  const auto *PrivateVD =
921  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
922  bool EmitDeclareReductionInit =
923  DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
924  EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
925  EmitDeclareReductionInit,
926  EmitDeclareReductionInit ? ClausesData[N].ReductionOp
927  : PrivateVD->getInit(),
928  DRD, SharedLVal.getAddress());
929 }
930 
933  ArrayRef<const Expr *> ReductionOps) {
934  ClausesData.reserve(Shareds.size());
935  SharedAddresses.reserve(Shareds.size());
936  Sizes.reserve(Shareds.size());
937  BaseDecls.reserve(Shareds.size());
938  auto IPriv = Privates.begin();
939  auto IRed = ReductionOps.begin();
940  for (const Expr *Ref : Shareds) {
941  ClausesData.emplace_back(Ref, *IPriv, *IRed);
942  std::advance(IPriv, 1);
943  std::advance(IRed, 1);
944  }
945 }
946 
947 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {
948  assert(SharedAddresses.size() == N &&
949  "Number of generated lvalues must be exactly N.");
950  LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
951  LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
952  SharedAddresses.emplace_back(First, Second);
953 }
954 
956  const auto *PrivateVD =
957  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
958  QualType PrivateType = PrivateVD->getType();
959  bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
960  if (!PrivateType->isVariablyModifiedType()) {
961  Sizes.emplace_back(
962  CGF.getTypeSize(
963  SharedAddresses[N].first.getType().getNonReferenceType()),
964  nullptr);
965  return;
966  }
967  llvm::Value *Size;
968  llvm::Value *SizeInChars;
969  auto *ElemType =
970  cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType())
971  ->getElementType();
972  auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
973  if (AsArraySection) {
974  Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(),
975  SharedAddresses[N].first.getPointer());
976  Size = CGF.Builder.CreateNUWAdd(
977  Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
978  SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
979  } else {
980  SizeInChars = CGF.getTypeSize(
981  SharedAddresses[N].first.getType().getNonReferenceType());
982  Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
983  }
984  Sizes.emplace_back(SizeInChars, Size);
986  CGF,
987  cast<OpaqueValueExpr>(
988  CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
989  RValue::get(Size));
990  CGF.EmitVariablyModifiedType(PrivateType);
991 }
992 
994  llvm::Value *Size) {
995  const auto *PrivateVD =
996  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
997  QualType PrivateType = PrivateVD->getType();
998  if (!PrivateType->isVariablyModifiedType()) {
999  assert(!Size && !Sizes[N].second &&
1000  "Size should be nullptr for non-variably modified reduction "
1001  "items.");
1002  return;
1003  }
1005  CGF,
1006  cast<OpaqueValueExpr>(
1007  CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1008  RValue::get(Size));
1009  CGF.EmitVariablyModifiedType(PrivateType);
1010 }
1011 
1013  CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
1014  llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
1015  assert(SharedAddresses.size() > N && "No variable was generated");
1016  const auto *PrivateVD =
1017  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1018  const OMPDeclareReductionDecl *DRD =
1019  getReductionInit(ClausesData[N].ReductionOp);
1020  QualType PrivateType = PrivateVD->getType();
1021  PrivateAddr = CGF.Builder.CreateElementBitCast(
1022  PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1023  QualType SharedType = SharedAddresses[N].first.getType();
1024  SharedLVal = CGF.MakeAddrLValue(
1025  CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(),
1026  CGF.ConvertTypeForMem(SharedType)),
1027  SharedType, SharedAddresses[N].first.getBaseInfo(),
1028  CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
1029  if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
1030  emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
1031  } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
1032  emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
1033  PrivateAddr, SharedLVal.getAddress(),
1034  SharedLVal.getType());
1035  } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
1036  !CGF.isTrivialInitializer(PrivateVD->getInit())) {
1037  CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
1038  PrivateVD->getType().getQualifiers(),
1039  /*IsInitializer=*/false);
1040  }
1041 }
1042 
1044  const auto *PrivateVD =
1045  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1046  QualType PrivateType = PrivateVD->getType();
1047  QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1048  return DTorKind != QualType::DK_none;
1049 }
1050 
1052  Address PrivateAddr) {
1053  const auto *PrivateVD =
1054  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1055  QualType PrivateType = PrivateVD->getType();
1056  QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1057  if (needCleanups(N)) {
1058  PrivateAddr = CGF.Builder.CreateElementBitCast(
1059  PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1060  CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
1061  }
1062 }
1063 
1065  LValue BaseLV) {
1066  BaseTy = BaseTy.getNonReferenceType();
1067  while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1068  !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1069  if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
1070  BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
1071  } else {
1072  LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
1073  BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
1074  }
1075  BaseTy = BaseTy->getPointeeType();
1076  }
1077  return CGF.MakeAddrLValue(
1079  CGF.ConvertTypeForMem(ElTy)),
1080  BaseLV.getType(), BaseLV.getBaseInfo(),
1081  CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
1082 }
1083 
1085  llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
1086  llvm::Value *Addr) {
1087  Address Tmp = Address::invalid();
1088  Address TopTmp = Address::invalid();
1089  Address MostTopTmp = Address::invalid();
1090  BaseTy = BaseTy.getNonReferenceType();
1091  while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1092  !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1093  Tmp = CGF.CreateMemTemp(BaseTy);
1094  if (TopTmp.isValid())
1095  CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
1096  else
1097  MostTopTmp = Tmp;
1098  TopTmp = Tmp;
1099  BaseTy = BaseTy->getPointeeType();
1100  }
1101  llvm::Type *Ty = BaseLVType;
1102  if (Tmp.isValid())
1103  Ty = Tmp.getElementType();
1104  Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
1105  if (Tmp.isValid()) {
1106  CGF.Builder.CreateStore(Addr, Tmp);
1107  return MostTopTmp;
1108  }
1109  return Address(Addr, BaseLVAlignment);
1110 }
1111 
1112 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
1113  const VarDecl *OrigVD = nullptr;
1114  if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
1115  const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
1116  while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
1117  Base = TempOASE->getBase()->IgnoreParenImpCasts();
1118  while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1119  Base = TempASE->getBase()->IgnoreParenImpCasts();
1120  DE = cast<DeclRefExpr>(Base);
1121  OrigVD = cast<VarDecl>(DE->getDecl());
1122  } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
1123  const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
1124  while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1125  Base = TempASE->getBase()->IgnoreParenImpCasts();
1126  DE = cast<DeclRefExpr>(Base);
1127  OrigVD = cast<VarDecl>(DE->getDecl());
1128  }
1129  return OrigVD;
1130 }
1131 
1133  Address PrivateAddr) {
1134  const DeclRefExpr *DE;
1135  if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1136  BaseDecls.emplace_back(OrigVD);
1137  LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1138  LValue BaseLValue =
1139  loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1140  OriginalBaseLValue);
1141  llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1142  BaseLValue.getPointer(), SharedAddresses[N].first.getPointer());
1143  llvm::Value *PrivatePointer =
1145  PrivateAddr.getPointer(),
1146  SharedAddresses[N].first.getAddress().getType());
1147  llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1148  return castToBase(CGF, OrigVD->getType(),
1149  SharedAddresses[N].first.getType(),
1150  OriginalBaseLValue.getAddress().getType(),
1151  OriginalBaseLValue.getAlignment(), Ptr);
1152  }
1153  BaseDecls.emplace_back(
1154  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1155  return PrivateAddr;
1156 }
1157 
1159  const OMPDeclareReductionDecl *DRD =
1160  getReductionInit(ClausesData[N].ReductionOp);
1161  return DRD && DRD->getInitializer();
1162 }
1163 
1164 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1165  return CGF.EmitLoadOfPointerLValue(
1166  CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1167  getThreadIDVariable()->getType()->castAs<PointerType>());
1168 }
1169 
1170 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1171  if (!CGF.HaveInsertPoint())
1172  return;
1173  // 1.2.2 OpenMP Language Terminology
1174  // Structured block - An executable statement with a single entry at the
1175  // top and a single exit at the bottom.
1176  // The point of exit cannot be a branch out of the structured block.
1177  // longjmp() and throw() must not violate the entry/exit criteria.
1178  CGF.EHStack.pushTerminate();
1179  CodeGen(CGF);
1180  CGF.EHStack.popTerminate();
1181 }
1182 
1183 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1184  CodeGenFunction &CGF) {
1185  return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1186  getThreadIDVariable()->getType(),
1188 }
1189 
1191  QualType FieldTy) {
1192  auto *Field = FieldDecl::Create(
1193  C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1195  /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1196  Field->setAccess(AS_public);
1197  DC->addDecl(Field);
1198  return Field;
1199 }
1200 
1201 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1202  StringRef Separator)
1203  : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1205  ASTContext &C = CGM.getContext();
1206  RecordDecl *RD = C.buildImplicitRecord("ident_t");
1207  QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1208  RD->startDefinition();
1209  // reserved_1
1210  addFieldToRecordDecl(C, RD, KmpInt32Ty);
1211  // flags
1212  addFieldToRecordDecl(C, RD, KmpInt32Ty);
1213  // reserved_2
1214  addFieldToRecordDecl(C, RD, KmpInt32Ty);
1215  // reserved_3
1216  addFieldToRecordDecl(C, RD, KmpInt32Ty);
1217  // psource
1218  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1219  RD->completeDefinition();
1220  IdentQTy = C.getRecordType(RD);
1221  IdentTy = CGM.getTypes().ConvertRecordDeclType(RD);
1222  KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1223 
1225 }
1226 
1227 void CGOpenMPRuntime::clear() {
1228  InternalVars.clear();
1229  // Clean non-target variable declarations possibly used only in debug info.
1230  for (const auto &Data : EmittedNonTargetVariables) {
1231  if (!Data.getValue().pointsToAliveValue())
1232  continue;
1233  auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1234  if (!GV)
1235  continue;
1236  if (!GV->isDeclaration() || GV->getNumUses() > 0)
1237  continue;
1238  GV->eraseFromParent();
1239  }
1240 }
1241 
1242 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1243  SmallString<128> Buffer;
1244  llvm::raw_svector_ostream OS(Buffer);
1245  StringRef Sep = FirstSeparator;
1246  for (StringRef Part : Parts) {
1247  OS << Sep << Part;
1248  Sep = Separator;
1249  }
1250  return OS.str();
1251 }
1252 
1253 static llvm::Function *
1255  const Expr *CombinerInitializer, const VarDecl *In,
1256  const VarDecl *Out, bool IsCombiner) {
1257  // void .omp_combiner.(Ty *in, Ty *out);
1258  ASTContext &C = CGM.getContext();
1259  QualType PtrTy = C.getPointerType(Ty).withRestrict();
1260  FunctionArgList Args;
1261  ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1262  /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1263  ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1264  /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1265  Args.push_back(&OmpOutParm);
1266  Args.push_back(&OmpInParm);
1267  const CGFunctionInfo &FnInfo =
1269  llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1270  std::string Name = CGM.getOpenMPRuntime().getName(
1271  {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1273  Name, &CGM.getModule());
1274  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1275  Fn->removeFnAttr(llvm::Attribute::NoInline);
1276  Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1277  Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1278  CodeGenFunction CGF(CGM);
1279  // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1280  // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1281  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1282  Out->getLocation());
1284  Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1285  Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1286  return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1287  .getAddress();
1288  });
1289  Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1290  Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1291  return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1292  .getAddress();
1293  });
1294  (void)Scope.Privatize();
1295  if (!IsCombiner && Out->hasInit() &&
1296  !CGF.isTrivialInitializer(Out->getInit())) {
1297  CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1298  Out->getType().getQualifiers(),
1299  /*IsInitializer=*/true);
1300  }
1301  if (CombinerInitializer)
1302  CGF.EmitIgnoredExpr(CombinerInitializer);
1303  Scope.ForceCleanup();
1304  CGF.FinishFunction();
1305  return Fn;
1306 }
1307 
1309  CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1310  if (UDRMap.count(D) > 0)
1311  return;
1312  llvm::Function *Combiner = emitCombinerOrInitializer(
1313  CGM, D->getType(), D->getCombiner(),
1314  cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1315  cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1316  /*IsCombiner=*/true);
1317  llvm::Function *Initializer = nullptr;
1318  if (const Expr *Init = D->getInitializer()) {
1319  Initializer = emitCombinerOrInitializer(
1320  CGM, D->getType(),
1322  : nullptr,
1323  cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1324  cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1325  /*IsCombiner=*/false);
1326  }
1327  UDRMap.try_emplace(D, Combiner, Initializer);
1328  if (CGF) {
1329  auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1330  Decls.second.push_back(D);
1331  }
1332 }
1333 
1334 std::pair<llvm::Function *, llvm::Function *>
1336  auto I = UDRMap.find(D);
1337  if (I != UDRMap.end())
1338  return I->second;
1339  emitUserDefinedReduction(/*CGF=*/nullptr, D);
1340  return UDRMap.lookup(D);
1341 }
1342 
1344  CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1345  const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1346  const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1347  assert(ThreadIDVar->getType()->isPointerType() &&
1348  "thread id variable must be of type kmp_int32 *");
1349  CodeGenFunction CGF(CGM, true);
1350  bool HasCancel = false;
1351  if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1352  HasCancel = OPD->hasCancel();
1353  else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1354  HasCancel = OPSD->hasCancel();
1355  else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1356  HasCancel = OPFD->hasCancel();
1357  else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1358  HasCancel = OPFD->hasCancel();
1359  else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1360  HasCancel = OPFD->hasCancel();
1361  else if (const auto *OPFD =
1362  dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1363  HasCancel = OPFD->hasCancel();
1364  else if (const auto *OPFD =
1365  dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1366  HasCancel = OPFD->hasCancel();
1367  CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1368  HasCancel, OutlinedHelperName);
1369  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1370  return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
1371 }
1372 
1374  const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1375  OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1376  const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1378  CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1379 }
1380 
1382  const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1383  OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1384  const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1386  CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1387 }
1388 
1390  const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1391  const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1392  OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1393  bool Tied, unsigned &NumberOfParts) {
1394  auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1395  PrePostActionTy &) {
1396  llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1397  llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1398  llvm::Value *TaskArgs[] = {
1399  UpLoc, ThreadID,
1400  CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1401  TaskTVar->getType()->castAs<PointerType>())
1402  .getPointer()};
1403  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
1404  };
1405  CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1406  UntiedCodeGen);
1407  CodeGen.setAction(Action);
1408  assert(!ThreadIDVar->getType()->isPointerType() &&
1409  "thread id variable must be of type kmp_int32 for tasks");
1410  const OpenMPDirectiveKind Region =
1411  isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1412  : OMPD_task;
1413  const CapturedStmt *CS = D.getCapturedStmt(Region);
1414  const auto *TD = dyn_cast<OMPTaskDirective>(&D);
1415  CodeGenFunction CGF(CGM, true);
1416  CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1417  InnermostKind,
1418  TD ? TD->hasCancel() : false, Action);
1419  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1420  llvm::Value *Res = CGF.GenerateCapturedStmtFunction(*CS);
1421  if (!Tied)
1422  NumberOfParts = Action.getNumberOfParts();
1423  return Res;
1424 }
1425 
1427  const RecordDecl *RD, const CGRecordLayout &RL,
1428  ArrayRef<llvm::Constant *> Data) {
1429  llvm::StructType *StructTy = RL.getLLVMType();
1430  unsigned PrevIdx = 0;
1431  ConstantInitBuilder CIBuilder(CGM);
1432  auto DI = Data.begin();
1433  for (const FieldDecl *FD : RD->fields()) {
1434  unsigned Idx = RL.getLLVMFieldNo(FD);
1435  // Fill the alignment.
1436  for (unsigned I = PrevIdx; I < Idx; ++I)
1437  Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1438  PrevIdx = Idx + 1;
1439  Fields.add(*DI);
1440  ++DI;
1441  }
1442 }
1443 
1444 template <class... As>
1445 static llvm::GlobalVariable *
1447  ArrayRef<llvm::Constant *> Data, const Twine &Name,
1448  As &&... Args) {
1449  const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1450  const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1451  ConstantInitBuilder CIBuilder(CGM);
1452  ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1453  buildStructValue(Fields, CGM, RD, RL, Data);
1454  return Fields.finishAndCreateGlobal(
1455  Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1456  std::forward<As>(Args)...);
1457 }
1458 
1459 template <typename T>
1460 static void
1462  ArrayRef<llvm::Constant *> Data,
1463  T &Parent) {
1464  const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1465  const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1466  ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1467  buildStructValue(Fields, CGM, RD, RL, Data);
1468  Fields.finishAndAddTo(Parent);
1469 }
1470 
1471 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
1472  CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1473  unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1474  FlagsTy FlagsKey(Flags, Reserved2Flags);
1475  llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey);
1476  if (!Entry) {
1477  if (!DefaultOpenMPPSource) {
1478  // Initialize default location for psource field of ident_t structure of
1479  // all ident_t objects. Format is ";file;function;line;column;;".
1480  // Taken from
1481  // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c
1482  DefaultOpenMPPSource =
1483  CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
1484  DefaultOpenMPPSource =
1485  llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
1486  }
1487 
1488  llvm::Constant *Data[] = {
1489  llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1490  llvm::ConstantInt::get(CGM.Int32Ty, Flags),
1491  llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags),
1492  llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource};
1493  llvm::GlobalValue *DefaultOpenMPLocation =
1494  createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "",
1495  llvm::GlobalValue::PrivateLinkage);
1496  DefaultOpenMPLocation->setUnnamedAddr(
1497  llvm::GlobalValue::UnnamedAddr::Global);
1498 
1499  OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation;
1500  }
1501  return Address(Entry, Align);
1502 }
1503 
1505  bool AtCurrentPoint) {
1506  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1507  assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1508 
1509  llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1510  if (AtCurrentPoint) {
1511  Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1512  Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1513  } else {
1514  Elem.second.ServiceInsertPt =
1515  new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1516  Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1517  }
1518 }
1519 
1521  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1522  if (Elem.second.ServiceInsertPt) {
1523  llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1524  Elem.second.ServiceInsertPt = nullptr;
1525  Ptr->eraseFromParent();
1526  }
1527 }
1528 
1530  SourceLocation Loc,
1531  unsigned Flags) {
1532  Flags |= OMP_IDENT_KMPC;
1533  // If no debug info is generated - return global default location.
1534  if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1535  Loc.isInvalid())
1536  return getOrCreateDefaultLocation(Flags).getPointer();
1537 
1538  assert(CGF.CurFn && "No function in current CodeGenFunction.");
1539 
1540  CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1541  Address LocValue = Address::invalid();
1542  auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1543  if (I != OpenMPLocThreadIDMap.end())
1544  LocValue = Address(I->second.DebugLoc, Align);
1545 
1546  // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
1547  // GetOpenMPThreadID was called before this routine.
1548  if (!LocValue.isValid()) {
1549  // Generate "ident_t .kmpc_loc.addr;"
1550  Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr");
1551  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1552  Elem.second.DebugLoc = AI.getPointer();
1553  LocValue = AI;
1554 
1555  if (!Elem.second.ServiceInsertPt)
1557  CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1558  CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1559  CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
1560  CGF.getTypeSize(IdentQTy));
1561  }
1562 
1563  // char **psource = &.kmpc_loc_<flags>.addr.psource;
1564  LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy);
1565  auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin();
1566  LValue PSource =
1567  CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource));
1568 
1569  llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
1570  if (OMPDebugLoc == nullptr) {
1571  SmallString<128> Buffer2;
1572  llvm::raw_svector_ostream OS2(Buffer2);
1573  // Build debug location
1575  OS2 << ";" << PLoc.getFilename() << ";";
1576  if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1577  OS2 << FD->getQualifiedNameAsString();
1578  OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1579  OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
1580  OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
1581  }
1582  // *psource = ";<File>;<Function>;<Line>;<Column>;;";
1583  CGF.EmitStoreOfScalar(OMPDebugLoc, PSource);
1584 
1585  // Our callers always pass this to a runtime function, so for
1586  // convenience, go ahead and return a naked pointer.
1587  return LocValue.getPointer();
1588 }
1589 
1591  SourceLocation Loc) {
1592  assert(CGF.CurFn && "No function in current CodeGenFunction.");
1593 
1594  llvm::Value *ThreadID = nullptr;
1595  // Check whether we've already cached a load of the thread id in this
1596  // function.
1597  auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1598  if (I != OpenMPLocThreadIDMap.end()) {
1599  ThreadID = I->second.ThreadID;
1600  if (ThreadID != nullptr)
1601  return ThreadID;
1602  }
1603  // If exceptions are enabled, do not use parameter to avoid possible crash.
1604  if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1605  !CGF.getLangOpts().CXXExceptions ||
1606  CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1607  if (auto *OMPRegionInfo =
1608  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1609  if (OMPRegionInfo->getThreadIDVariable()) {
1610  // Check if this an outlined function with thread id passed as argument.
1611  LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1612  ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1613  // If value loaded in entry block, cache it and use it everywhere in
1614  // function.
1615  if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1616  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1617  Elem.second.ThreadID = ThreadID;
1618  }
1619  return ThreadID;
1620  }
1621  }
1622  }
1623 
1624  // This is not an outlined function region - need to call __kmpc_int32
1625  // kmpc_global_thread_num(ident_t *loc).
1626  // Generate thread id value and cache this value for use across the
1627  // function.
1628  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1629  if (!Elem.second.ServiceInsertPt)
1631  CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1632  CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1633  llvm::CallInst *Call = CGF.Builder.CreateCall(
1635  emitUpdateLocation(CGF, Loc));
1636  Call->setCallingConv(CGF.getRuntimeCC());
1637  Elem.second.ThreadID = Call;
1638  return Call;
1639 }
1640 
1642  assert(CGF.CurFn && "No function in current CodeGenFunction.");
1643  if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1645  OpenMPLocThreadIDMap.erase(CGF.CurFn);
1646  }
1647  if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1648  for(auto *D : FunctionUDRMap[CGF.CurFn])
1649  UDRMap.erase(D);
1650  FunctionUDRMap.erase(CGF.CurFn);
1651  }
1652 }
1653 
1655  return IdentTy->getPointerTo();
1656 }
1657 
1659  if (!Kmpc_MicroTy) {
1660  // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1661  llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1662  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1663  Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1664  }
1665  return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1666 }
1667 
1668 llvm::Constant *
1670  llvm::Constant *RTLFn = nullptr;
1671  switch (static_cast<OpenMPRTLFunction>(Function)) {
1672  case OMPRTL__kmpc_fork_call: {
1673  // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
1674  // microtask, ...);
1675  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1677  auto *FnTy =
1678  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1679  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
1680  break;
1681  }
1683  // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
1684  llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1685  auto *FnTy =
1686  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1687  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
1688  break;
1689  }
1691  // Build void *__kmpc_threadprivate_cached(ident_t *loc,
1692  // kmp_int32 global_tid, void *data, size_t size, void ***cache);
1693  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1695  CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
1696  auto *FnTy =
1697  llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
1698  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
1699  break;
1700  }
1701  case OMPRTL__kmpc_critical: {
1702  // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1703  // kmp_critical_name *crit);
1704  llvm::Type *TypeParams[] = {
1706  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1707  auto *FnTy =
1708  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1709  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
1710  break;
1711  }
1713  // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1714  // kmp_critical_name *crit, uintptr_t hint);
1715  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1716  llvm::PointerType::getUnqual(KmpCriticalNameTy),
1717  CGM.IntPtrTy};
1718  auto *FnTy =
1719  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1720  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
1721  break;
1722  }
1724  // Build void __kmpc_threadprivate_register(ident_t *, void *data,
1725  // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
1726  // typedef void *(*kmpc_ctor)(void *);
1727  auto *KmpcCtorTy =
1728  llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1729  /*isVarArg*/ false)->getPointerTo();
1730  // typedef void *(*kmpc_cctor)(void *, void *);
1731  llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1732  auto *KmpcCopyCtorTy =
1733  llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
1734  /*isVarArg*/ false)
1735  ->getPointerTo();
1736  // typedef void (*kmpc_dtor)(void *);
1737  auto *KmpcDtorTy =
1738  llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
1739  ->getPointerTo();
1740  llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
1741  KmpcCopyCtorTy, KmpcDtorTy};
1742  auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
1743  /*isVarArg*/ false);
1744  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
1745  break;
1746  }
1748  // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1749  // kmp_critical_name *crit);
1750  llvm::Type *TypeParams[] = {
1752  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1753  auto *FnTy =
1754  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1755  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
1756  break;
1757  }
1759  // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
1760  // global_tid);
1761  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1762  auto *FnTy =
1763  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1764  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
1765  break;
1766  }
1767  case OMPRTL__kmpc_barrier: {
1768  // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
1769  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1770  auto *FnTy =
1771  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1772  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
1773  break;
1774  }
1776  // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
1777  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1778  auto *FnTy =
1779  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1780  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
1781  break;
1782  }
1784  // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
1785  // kmp_int32 num_threads)
1786  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1787  CGM.Int32Ty};
1788  auto *FnTy =
1789  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1790  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
1791  break;
1792  }
1794  // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
1795  // global_tid);
1796  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1797  auto *FnTy =
1798  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1799  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
1800  break;
1801  }
1803  // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
1804  // global_tid);
1805  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1806  auto *FnTy =
1807  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1808  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
1809  break;
1810  }
1811  case OMPRTL__kmpc_flush: {
1812  // Build void __kmpc_flush(ident_t *loc);
1813  llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1814  auto *FnTy =
1815  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1816  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
1817  break;
1818  }
1819  case OMPRTL__kmpc_master: {
1820  // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
1821  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1822  auto *FnTy =
1823  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1824  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
1825  break;
1826  }
1827  case OMPRTL__kmpc_end_master: {
1828  // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
1829  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1830  auto *FnTy =
1831  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1832  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
1833  break;
1834  }
1836  // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
1837  // int end_part);
1838  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1839  auto *FnTy =
1840  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1841  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
1842  break;
1843  }
1844  case OMPRTL__kmpc_single: {
1845  // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
1846  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1847  auto *FnTy =
1848  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1849  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
1850  break;
1851  }
1852  case OMPRTL__kmpc_end_single: {
1853  // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
1854  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1855  auto *FnTy =
1856  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1857  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
1858  break;
1859  }
1861  // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
1862  // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1863  // kmp_routine_entry_t *task_entry);
1864  assert(KmpRoutineEntryPtrTy != nullptr &&
1865  "Type kmp_routine_entry_t must be created.");
1866  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1867  CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
1868  // Return void * and then cast to particular kmp_task_t type.
1869  auto *FnTy =
1870  llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
1871  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
1872  break;
1873  }
1874  case OMPRTL__kmpc_omp_task: {
1875  // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1876  // *new_task);
1877  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1878  CGM.VoidPtrTy};
1879  auto *FnTy =
1880  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1881  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
1882  break;
1883  }
1884  case OMPRTL__kmpc_copyprivate: {
1885  // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
1886  // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
1887  // kmp_int32 didit);
1888  llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1889  auto *CpyFnTy =
1890  llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
1891  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
1892  CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
1893  CGM.Int32Ty};
1894  auto *FnTy =
1895  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1896  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
1897  break;
1898  }
1899  case OMPRTL__kmpc_reduce: {
1900  // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
1901  // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
1902  // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
1903  llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1904  auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1905  /*isVarArg=*/false);
1906  llvm::Type *TypeParams[] = {
1908  CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1909  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1910  auto *FnTy =
1911  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1912  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
1913  break;
1914  }
1916  // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
1917  // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
1918  // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
1919  // *lck);
1920  llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1921  auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1922  /*isVarArg=*/false);
1923  llvm::Type *TypeParams[] = {
1925  CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1926  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1927  auto *FnTy =
1928  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1929  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
1930  break;
1931  }
1932  case OMPRTL__kmpc_end_reduce: {
1933  // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
1934  // kmp_critical_name *lck);
1935  llvm::Type *TypeParams[] = {
1937  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1938  auto *FnTy =
1939  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1940  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
1941  break;
1942  }
1944  // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
1945  // kmp_critical_name *lck);
1946  llvm::Type *TypeParams[] = {
1948  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1949  auto *FnTy =
1950  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1951  RTLFn =
1952  CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
1953  break;
1954  }
1956  // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1957  // *new_task);
1958  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1959  CGM.VoidPtrTy};
1960  auto *FnTy =
1961  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1962  RTLFn =
1963  CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
1964  break;
1965  }
1967  // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1968  // *new_task);
1969  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1970  CGM.VoidPtrTy};
1971  auto *FnTy =
1972  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1973  RTLFn = CGM.CreateRuntimeFunction(FnTy,
1974  /*Name=*/"__kmpc_omp_task_complete_if0");
1975  break;
1976  }
1977  case OMPRTL__kmpc_ordered: {
1978  // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
1979  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1980  auto *FnTy =
1981  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1982  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
1983  break;
1984  }
1985  case OMPRTL__kmpc_end_ordered: {
1986  // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
1987  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1988  auto *FnTy =
1989  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1990  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
1991  break;
1992  }
1994  // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
1995  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1996  auto *FnTy =
1997  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1998  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
1999  break;
2000  }
2001  case OMPRTL__kmpc_taskgroup: {
2002  // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
2003  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2004  auto *FnTy =
2005  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2006  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
2007  break;
2008  }
2010  // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
2011  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2012  auto *FnTy =
2013  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2014  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
2015  break;
2016  }
2018  // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
2019  // int proc_bind)
2020  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2021  auto *FnTy =
2022  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2023  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
2024  break;
2025  }
2027  // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
2028  // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
2029  // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
2030  llvm::Type *TypeParams[] = {
2033  auto *FnTy =
2034  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2035  RTLFn =
2036  CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
2037  break;
2038  }
2040  // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
2041  // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
2042  // kmp_depend_info_t *noalias_dep_list);
2043  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2046  auto *FnTy =
2047  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2048  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
2049  break;
2050  }
2052  // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
2053  // global_tid, kmp_int32 cncl_kind)
2054  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2055  auto *FnTy =
2056  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2057  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
2058  break;
2059  }
2060  case OMPRTL__kmpc_cancel: {
2061  // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
2062  // kmp_int32 cncl_kind)
2063  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2064  auto *FnTy =
2065  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2066  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
2067  break;
2068  }
2070  // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
2071  // kmp_int32 num_teams, kmp_int32 num_threads)
2072  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2073  CGM.Int32Ty};
2074  auto *FnTy =
2075  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2076  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
2077  break;
2078  }
2079  case OMPRTL__kmpc_fork_teams: {
2080  // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
2081  // microtask, ...);
2082  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2084  auto *FnTy =
2085  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
2086  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
2087  break;
2088  }
2089  case OMPRTL__kmpc_taskloop: {
2090  // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
2091  // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
2092  // sched, kmp_uint64 grainsize, void *task_dup);
2093  llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2094  CGM.IntTy,
2095  CGM.VoidPtrTy,
2096  CGM.IntTy,
2097  CGM.Int64Ty->getPointerTo(),
2098  CGM.Int64Ty->getPointerTo(),
2099  CGM.Int64Ty,
2100  CGM.IntTy,
2101  CGM.IntTy,
2102  CGM.Int64Ty,
2103  CGM.VoidPtrTy};
2104  auto *FnTy =
2105  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2106  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
2107  break;
2108  }
2110  // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
2111  // num_dims, struct kmp_dim *dims);
2112  llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2113  CGM.Int32Ty,
2114  CGM.Int32Ty,
2115  CGM.VoidPtrTy};
2116  auto *FnTy =
2117  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2118  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
2119  break;
2120  }
2122  // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
2123  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2124  auto *FnTy =
2125  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2126  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
2127  break;
2128  }
2130  // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
2131  // *vec);
2132  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2133  CGM.Int64Ty->getPointerTo()};
2134  auto *FnTy =
2135  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2136  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
2137  break;
2138  }
2140  // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
2141  // *vec);
2142  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2143  CGM.Int64Ty->getPointerTo()};
2144  auto *FnTy =
2145  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2146  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
2147  break;
2148  }
2150  // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void
2151  // *data);
2152  llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
2153  auto *FnTy =
2154  llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2155  RTLFn =
2156  CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init");
2157  break;
2158  }
2160  // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
2161  // *d);
2162  llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2163  auto *FnTy =
2164  llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2165  RTLFn = CGM.CreateRuntimeFunction(
2166  FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
2167  break;
2168  }
2170  // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
2171  // size);
2172  llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty};
2173  llvm::FunctionType *FnTy =
2174  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2175  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount");
2176  break;
2177  }
2178  case OMPRTL__tgt_target: {
2179  // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
2180  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2181  // *arg_types);
2182  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2183  CGM.VoidPtrTy,
2184  CGM.Int32Ty,
2185  CGM.VoidPtrPtrTy,
2186  CGM.VoidPtrPtrTy,
2187  CGM.SizeTy->getPointerTo(),
2188  CGM.Int64Ty->getPointerTo()};
2189  auto *FnTy =
2190  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2191  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
2192  break;
2193  }
2195  // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
2196  // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
2197  // int64_t *arg_types);
2198  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2199  CGM.VoidPtrTy,
2200  CGM.Int32Ty,
2201  CGM.VoidPtrPtrTy,
2202  CGM.VoidPtrPtrTy,
2203  CGM.SizeTy->getPointerTo(),
2204  CGM.Int64Ty->getPointerTo()};
2205  auto *FnTy =
2206  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2207  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait");
2208  break;
2209  }
2210  case OMPRTL__tgt_target_teams: {
2211  // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
2212  // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
2213  // int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2214  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2215  CGM.VoidPtrTy,
2216  CGM.Int32Ty,
2217  CGM.VoidPtrPtrTy,
2218  CGM.VoidPtrPtrTy,
2219  CGM.SizeTy->getPointerTo(),
2220  CGM.Int64Ty->getPointerTo(),
2221  CGM.Int32Ty,
2222  CGM.Int32Ty};
2223  auto *FnTy =
2224  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2225  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
2226  break;
2227  }
2229  // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void
2230  // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t
2231  // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2232  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2233  CGM.VoidPtrTy,
2234  CGM.Int32Ty,
2235  CGM.VoidPtrPtrTy,
2236  CGM.VoidPtrPtrTy,
2237  CGM.SizeTy->getPointerTo(),
2238  CGM.Int64Ty->getPointerTo(),
2239  CGM.Int32Ty,
2240  CGM.Int32Ty};
2241  auto *FnTy =
2242  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2243  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait");
2244  break;
2245  }
2246  case OMPRTL__tgt_register_lib: {
2247  // Build void __tgt_register_lib(__tgt_bin_desc *desc);
2248  QualType ParamTy =
2250  llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2251  auto *FnTy =
2252  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2253  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib");
2254  break;
2255  }
2257  // Build void __tgt_unregister_lib(__tgt_bin_desc *desc);
2258  QualType ParamTy =
2260  llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2261  auto *FnTy =
2262  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2263  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib");
2264  break;
2265  }
2267  // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
2268  // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
2269  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2270  CGM.Int32Ty,
2271  CGM.VoidPtrPtrTy,
2272  CGM.VoidPtrPtrTy,
2273  CGM.SizeTy->getPointerTo(),
2274  CGM.Int64Ty->getPointerTo()};
2275  auto *FnTy =
2276  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2277  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
2278  break;
2279  }
2281  // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
2282  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2283  // *arg_types);
2284  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2285  CGM.Int32Ty,
2286  CGM.VoidPtrPtrTy,
2287  CGM.VoidPtrPtrTy,
2288  CGM.SizeTy->getPointerTo(),
2289  CGM.Int64Ty->getPointerTo()};
2290  auto *FnTy =
2291  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2292  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait");
2293  break;
2294  }
2296  // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
2297  // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
2298  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2299  CGM.Int32Ty,
2300  CGM.VoidPtrPtrTy,
2301  CGM.VoidPtrPtrTy,
2302  CGM.SizeTy->getPointerTo(),
2303  CGM.Int64Ty->getPointerTo()};
2304  auto *FnTy =
2305  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2306  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
2307  break;
2308  }
2310  // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t
2311  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2312  // *arg_types);
2313  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2314  CGM.Int32Ty,
2315  CGM.VoidPtrPtrTy,
2316  CGM.VoidPtrPtrTy,
2317  CGM.SizeTy->getPointerTo(),
2318  CGM.Int64Ty->getPointerTo()};
2319  auto *FnTy =
2320  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2321  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait");
2322  break;
2323  }
2325  // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
2326  // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
2327  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2328  CGM.Int32Ty,
2329  CGM.VoidPtrPtrTy,
2330  CGM.VoidPtrPtrTy,
2331  CGM.SizeTy->getPointerTo(),
2332  CGM.Int64Ty->getPointerTo()};
2333  auto *FnTy =
2334  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2335  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
2336  break;
2337  }
2339  // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t
2340  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2341  // *arg_types);
2342  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2343  CGM.Int32Ty,
2344  CGM.VoidPtrPtrTy,
2345  CGM.VoidPtrPtrTy,
2346  CGM.SizeTy->getPointerTo(),
2347  CGM.Int64Ty->getPointerTo()};
2348  auto *FnTy =
2349  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2350  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait");
2351  break;
2352  }
2353  }
2354  assert(RTLFn && "Unable to find OpenMP runtime function");
2355  return RTLFn;
2356 }
2357 
2358 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize,
2359  bool IVSigned) {
2360  assert((IVSize == 32 || IVSize == 64) &&
2361  "IV size is not compatible with the omp runtime");
2362  StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
2363  : "__kmpc_for_static_init_4u")
2364  : (IVSigned ? "__kmpc_for_static_init_8"
2365  : "__kmpc_for_static_init_8u");
2366  llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2367  auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2368  llvm::Type *TypeParams[] = {
2369  getIdentTyPointerTy(), // loc
2370  CGM.Int32Ty, // tid
2371  CGM.Int32Ty, // schedtype
2372  llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2373  PtrTy, // p_lower
2374  PtrTy, // p_upper
2375  PtrTy, // p_stride
2376  ITy, // incr
2377  ITy // chunk
2378  };
2379  auto *FnTy =
2380  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2381  return CGM.CreateRuntimeFunction(FnTy, Name);
2382 }
2383 
2384 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize,
2385  bool IVSigned) {
2386  assert((IVSize == 32 || IVSize == 64) &&
2387  "IV size is not compatible with the omp runtime");
2388  StringRef Name =
2389  IVSize == 32
2390  ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
2391  : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
2392  llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2393  llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
2394  CGM.Int32Ty, // tid
2395  CGM.Int32Ty, // schedtype
2396  ITy, // lower
2397  ITy, // upper
2398  ITy, // stride
2399  ITy // chunk
2400  };
2401  auto *FnTy =
2402  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2403  return CGM.CreateRuntimeFunction(FnTy, Name);
2404 }
2405 
2406 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize,
2407  bool IVSigned) {
2408  assert((IVSize == 32 || IVSize == 64) &&
2409  "IV size is not compatible with the omp runtime");
2410  StringRef Name =
2411  IVSize == 32
2412  ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
2413  : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
2414  llvm::Type *TypeParams[] = {
2415  getIdentTyPointerTy(), // loc
2416  CGM.Int32Ty, // tid
2417  };
2418  auto *FnTy =
2419  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2420  return CGM.CreateRuntimeFunction(FnTy, Name);
2421 }
2422 
2423 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize,
2424  bool IVSigned) {
2425  assert((IVSize == 32 || IVSize == 64) &&
2426  "IV size is not compatible with the omp runtime");
2427  StringRef Name =
2428  IVSize == 32
2429  ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
2430  : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
2431  llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2432  auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2433  llvm::Type *TypeParams[] = {
2434  getIdentTyPointerTy(), // loc
2435  CGM.Int32Ty, // tid
2436  llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2437  PtrTy, // p_lower
2438  PtrTy, // p_upper
2439  PtrTy // p_stride
2440  };
2441  auto *FnTy =
2442  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2443  return CGM.CreateRuntimeFunction(FnTy, Name);
2444 }
2445 
2447  if (CGM.getLangOpts().OpenMPSimd)
2448  return Address::invalid();
2450  OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2451  if (Res && *Res == OMPDeclareTargetDeclAttr::MT_Link) {
2452  SmallString<64> PtrName;
2453  {
2454  llvm::raw_svector_ostream OS(PtrName);
2455  OS << CGM.getMangledName(GlobalDecl(VD)) << "_decl_tgt_link_ptr";
2456  }
2457  llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
2458  if (!Ptr) {
2459  QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
2461  PtrName);
2462  if (!CGM.getLangOpts().OpenMPIsDevice) {
2463  auto *GV = cast<llvm::GlobalVariable>(Ptr);
2464  GV->setLinkage(llvm::GlobalValue::ExternalLinkage);
2465  GV->setInitializer(CGM.GetAddrOfGlobal(VD));
2466  }
2467  CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ptr));
2468  registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
2469  }
2470  return Address(Ptr, CGM.getContext().getDeclAlign(VD));
2471  }
2472  return Address::invalid();
2473 }
2474 
2475 llvm::Constant *
2477  assert(!CGM.getLangOpts().OpenMPUseTLS ||
2479  // Lookup the entry, lazily creating it if necessary.
2480  std::string Suffix = getName({"cache", ""});
2482  CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
2483 }
2484 
2486  const VarDecl *VD,
2487  Address VDAddr,
2488  SourceLocation Loc) {
2489  if (CGM.getLangOpts().OpenMPUseTLS &&
2491  return VDAddr;
2492 
2493  llvm::Type *VarTy = VDAddr.getElementType();
2494  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2495  CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
2496  CGM.Int8PtrTy),
2499  return Address(CGF.EmitRuntimeCall(
2501  VDAddr.getAlignment());
2502 }
2503 
2505  CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
2506  llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
2507  // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
2508  // library.
2509  llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
2511  OMPLoc);
2512  // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
2513  // to register constructor/destructor for variable.
2514  llvm::Value *Args[] = {
2515  OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
2516  Ctor, CopyCtor, Dtor};
2517  CGF.EmitRuntimeCall(
2519 }
2520 
2522  const VarDecl *VD, Address VDAddr, SourceLocation Loc,
2523  bool PerformInit, CodeGenFunction *CGF) {
2524  if (CGM.getLangOpts().OpenMPUseTLS &&
2526  return nullptr;
2527 
2528  VD = VD->getDefinition(CGM.getContext());
2529  if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
2530  QualType ASTTy = VD->getType();
2531 
2532  llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
2533  const Expr *Init = VD->getAnyInitializer();
2534  if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2535  // Generate function that re-emits the declaration's initializer into the
2536  // threadprivate copy of the variable VD
2537  CodeGenFunction CtorCGF(CGM);
2538  FunctionArgList Args;
2539  ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2540  /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2542  Args.push_back(&Dst);
2543 
2544  const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2545  CGM.getContext().VoidPtrTy, Args);
2546  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2547  std::string Name = getName({"__kmpc_global_ctor_", ""});
2548  llvm::Function *Fn =
2549  CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2550  CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
2551  Args, Loc, Loc);
2552  llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
2553  CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2554  CGM.getContext().VoidPtrTy, Dst.getLocation());
2555  Address Arg = Address(ArgVal, VDAddr.getAlignment());
2556  Arg = CtorCGF.Builder.CreateElementBitCast(
2557  Arg, CtorCGF.ConvertTypeForMem(ASTTy));
2558  CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
2559  /*IsInitializer=*/true);
2560  ArgVal = CtorCGF.EmitLoadOfScalar(
2561  CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2562  CGM.getContext().VoidPtrTy, Dst.getLocation());
2563  CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
2564  CtorCGF.FinishFunction();
2565  Ctor = Fn;
2566  }
2567  if (VD->getType().isDestructedType() != QualType::DK_none) {
2568  // Generate function that emits destructor call for the threadprivate copy
2569  // of the variable VD
2570  CodeGenFunction DtorCGF(CGM);
2571  FunctionArgList Args;
2572  ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2573  /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2575  Args.push_back(&Dst);
2576 
2577  const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2578  CGM.getContext().VoidTy, Args);
2579  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2580  std::string Name = getName({"__kmpc_global_dtor_", ""});
2581  llvm::Function *Fn =
2582  CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2583  auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2584  DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
2585  Loc, Loc);
2586  // Create a scope with an artificial location for the body of this function.
2587  auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2588  llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
2589  DtorCGF.GetAddrOfLocalVar(&Dst),
2590  /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
2591  DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
2592  DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2593  DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2594  DtorCGF.FinishFunction();
2595  Dtor = Fn;
2596  }
2597  // Do not emit init function if it is not required.
2598  if (!Ctor && !Dtor)
2599  return nullptr;
2600 
2601  llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2602  auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
2603  /*isVarArg=*/false)
2604  ->getPointerTo();
2605  // Copying constructor for the threadprivate variable.
2606  // Must be NULL - reserved by runtime, but currently it requires that this
2607  // parameter is always NULL. Otherwise it fires assertion.
2608  CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
2609  if (Ctor == nullptr) {
2610  auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
2611  /*isVarArg=*/false)
2612  ->getPointerTo();
2613  Ctor = llvm::Constant::getNullValue(CtorTy);
2614  }
2615  if (Dtor == nullptr) {
2616  auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
2617  /*isVarArg=*/false)
2618  ->getPointerTo();
2619  Dtor = llvm::Constant::getNullValue(DtorTy);
2620  }
2621  if (!CGF) {
2622  auto *InitFunctionTy =
2623  llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
2624  std::string Name = getName({"__omp_threadprivate_init_", ""});
2625  llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction(
2626  InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
2627  CodeGenFunction InitCGF(CGM);
2628  FunctionArgList ArgList;
2629  InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
2630  CGM.getTypes().arrangeNullaryFunction(), ArgList,
2631  Loc, Loc);
2632  emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2633  InitCGF.FinishFunction();
2634  return InitFunction;
2635  }
2636  emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2637  }
2638  return nullptr;
2639 }
2640 
2641 /// Obtain information that uniquely identifies a target entry. This
2642 /// consists of the file and device IDs as well as line number associated with
2643 /// the relevant entry source location.
2645  unsigned &DeviceID, unsigned &FileID,
2646  unsigned &LineNum) {
2648 
2649  // The loc should be always valid and have a file ID (the user cannot use
2650  // #pragma directives in macros)
2651 
2652  assert(Loc.isValid() && "Source location is expected to be always valid.");
2653 
2654  PresumedLoc PLoc = SM.getPresumedLoc(Loc);
2655  assert(PLoc.isValid() && "Source location is expected to be always valid.");
2656 
2657  llvm::sys::fs::UniqueID ID;
2658  if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
2659  SM.getDiagnostics().Report(diag::err_cannot_open_file)
2660  << PLoc.getFilename() << EC.message();
2661 
2662  DeviceID = ID.getDevice();
2663  FileID = ID.getFile();
2664  LineNum = PLoc.getLine();
2665 }
2666 
2668  llvm::GlobalVariable *Addr,
2669  bool PerformInit) {
2671  OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2672  if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link)
2673  return CGM.getLangOpts().OpenMPIsDevice;
2674  VD = VD->getDefinition(CGM.getContext());
2675  if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
2676  return CGM.getLangOpts().OpenMPIsDevice;
2677 
2678  QualType ASTTy = VD->getType();
2679 
2681  // Produce the unique prefix to identify the new target regions. We use
2682  // the source location of the variable declaration which we know to not
2683  // conflict with any target region.
2684  unsigned DeviceID;
2685  unsigned FileID;
2686  unsigned Line;
2687  getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
2688  SmallString<128> Buffer, Out;
2689  {
2690  llvm::raw_svector_ostream OS(Buffer);
2691  OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
2692  << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
2693  }
2694 
2695  const Expr *Init = VD->getAnyInitializer();
2696  if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2697  llvm::Constant *Ctor;
2698  llvm::Constant *ID;
2699  if (CGM.getLangOpts().OpenMPIsDevice) {
2700  // Generate function that re-emits the declaration's initializer into
2701  // the threadprivate copy of the variable VD
2702  CodeGenFunction CtorCGF(CGM);
2703 
2705  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2706  llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2707  FTy, Twine(Buffer, "_ctor"), FI, Loc);
2708  auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
2709  CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2710  FunctionArgList(), Loc, Loc);
2711  auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
2712  CtorCGF.EmitAnyExprToMem(Init,
2713  Address(Addr, CGM.getContext().getDeclAlign(VD)),
2714  Init->getType().getQualifiers(),
2715  /*IsInitializer=*/true);
2716  CtorCGF.FinishFunction();
2717  Ctor = Fn;
2718  ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2719  CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
2720  } else {
2721  Ctor = new llvm::GlobalVariable(
2722  CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2723  llvm::GlobalValue::PrivateLinkage,
2724  llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
2725  ID = Ctor;
2726  }
2727 
2728  // Register the information for the entry associated with the constructor.
2729  Out.clear();
2731  DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
2733  }
2734  if (VD->getType().isDestructedType() != QualType::DK_none) {
2735  llvm::Constant *Dtor;
2736  llvm::Constant *ID;
2737  if (CGM.getLangOpts().OpenMPIsDevice) {
2738  // Generate function that emits destructor call for the threadprivate
2739  // copy of the variable VD
2740  CodeGenFunction DtorCGF(CGM);
2741 
2743  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2744  llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2745  FTy, Twine(Buffer, "_dtor"), FI, Loc);
2746  auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2747  DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2748  FunctionArgList(), Loc, Loc);
2749  // Create a scope with an artificial location for the body of this
2750  // function.
2751  auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2752  DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
2753  ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2754  DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2755  DtorCGF.FinishFunction();
2756  Dtor = Fn;
2757  ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2758  CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
2759  } else {
2760  Dtor = new llvm::GlobalVariable(
2761  CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2762  llvm::GlobalValue::PrivateLinkage,
2763  llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
2764  ID = Dtor;
2765  }
2766  // Register the information for the entry associated with the destructor.
2767  Out.clear();
2769  DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2771  }
2772  return CGM.getLangOpts().OpenMPIsDevice;
2773 }
2774 
2776  QualType VarType,
2777  StringRef Name) {
2778  std::string Suffix = getName({"artificial", ""});
2779  std::string CacheSuffix = getName({"cache", ""});
2780  llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2781  llvm::Value *GAddr =
2782  getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2783  llvm::Value *Args[] = {
2785  getThreadID(CGF, SourceLocation()),
2787  CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2788  /*IsSigned=*/false),
2790  CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2791  return Address(
2793  CGF.EmitRuntimeCall(
2795  VarLVType->getPointerTo(/*AddrSpace=*/0)),
2796  CGM.getPointerAlign());
2797 }
2798 
2800  const RegionCodeGenTy &ThenGen,
2801  const RegionCodeGenTy &ElseGen) {
2802  CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2803 
2804  // If the condition constant folds and can be elided, try to avoid emitting
2805  // the condition and the dead arm of the if/else.
2806  bool CondConstant;
2807  if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2808  if (CondConstant)
2809  ThenGen(CGF);
2810  else
2811  ElseGen(CGF);
2812  return;
2813  }
2814 
2815  // Otherwise, the condition did not fold, or we couldn't elide it. Just
2816  // emit the conditional branch.
2817  llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2818  llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2819  llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2820  CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2821 
2822  // Emit the 'then' code.
2823  CGF.EmitBlock(ThenBlock);
2824  ThenGen(CGF);
2825  CGF.EmitBranch(ContBlock);
2826  // Emit the 'else' code if present.
2827  // There is no need to emit line number for unconditional branch.
2829  CGF.EmitBlock(ElseBlock);
2830  ElseGen(CGF);
2831  // There is no need to emit line number for unconditional branch.
2833  CGF.EmitBranch(ContBlock);
2834  // Emit the continuation block for code after the if.
2835  CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2836 }
2837 
2839  llvm::Value *OutlinedFn,
2840  ArrayRef<llvm::Value *> CapturedVars,
2841  const Expr *IfCond) {
2842  if (!CGF.HaveInsertPoint())
2843  return;
2844  llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2845  auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
2846  PrePostActionTy &) {
2847  // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2848  CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2849  llvm::Value *Args[] = {
2850  RTLoc,
2851  CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2852  CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2854  RealArgs.append(std::begin(Args), std::end(Args));
2855  RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2856 
2857  llvm::Value *RTLFn = RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
2858  CGF.EmitRuntimeCall(RTLFn, RealArgs);
2859  };
2860  auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
2861  PrePostActionTy &) {
2862  CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2863  llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2864  // Build calls:
2865  // __kmpc_serialized_parallel(&Loc, GTid);
2866  llvm::Value *Args[] = {RTLoc, ThreadID};
2867  CGF.EmitRuntimeCall(
2868  RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
2869 
2870  // OutlinedFn(&GTid, &zero, CapturedStruct);
2871  Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2872  /*Name*/ ".zero.addr");
2873  CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
2874  llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2875  // ThreadId for serialized parallels is 0.
2876  OutlinedFnArgs.push_back(ZeroAddr.getPointer());
2877  OutlinedFnArgs.push_back(ZeroAddr.getPointer());
2878  OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2879  RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2880 
2881  // __kmpc_end_serialized_parallel(&Loc, GTid);
2882  llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2883  CGF.EmitRuntimeCall(
2884  RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
2885  EndArgs);
2886  };
2887  if (IfCond) {
2888  emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
2889  } else {
2890  RegionCodeGenTy ThenRCG(ThenGen);
2891  ThenRCG(CGF);
2892  }
2893 }
2894 
2895 // If we're inside an (outlined) parallel region, use the region info's
2896 // thread-ID variable (it is passed in a first argument of the outlined function
2897 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2898 // regular serial code region, get thread ID by calling kmp_int32
2899 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2900 // return the address of that temp.
2902  SourceLocation Loc) {
2903  if (auto *OMPRegionInfo =
2904  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2905  if (OMPRegionInfo->getThreadIDVariable())
2906  return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
2907 
2908  llvm::Value *ThreadID = getThreadID(CGF, Loc);
2909  QualType Int32Ty =
2910  CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2911  Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2912  CGF.EmitStoreOfScalar(ThreadID,
2913  CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2914 
2915  return ThreadIDTemp;
2916 }
2917 
2918 llvm::Constant *
2920  const llvm::Twine &Name) {
2921  SmallString<256> Buffer;
2922  llvm::raw_svector_ostream Out(Buffer);
2923  Out << Name;
2924  StringRef RuntimeName = Out.str();
2925  auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2926  if (Elem.second) {
2927  assert(Elem.second->getType()->getPointerElementType() == Ty &&
2928  "OMP internal variable has different type than requested");
2929  return &*Elem.second;
2930  }
2931 
2932  return Elem.second = new llvm::GlobalVariable(
2933  CGM.getModule(), Ty, /*IsConstant*/ false,
2934  llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2935  Elem.first());
2936 }
2937 
2939  std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2940  std::string Name = getName({Prefix, "var"});
2941  return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2942 }
2943 
2944 namespace {
2945 /// Common pre(post)-action for different OpenMP constructs.
2946 class CommonActionTy final : public PrePostActionTy {
2947  llvm::Value *EnterCallee;
2948  ArrayRef<llvm::Value *> EnterArgs;
2949  llvm::Value *ExitCallee;
2950  ArrayRef<llvm::Value *> ExitArgs;
2951  bool Conditional;
2952  llvm::BasicBlock *ContBlock = nullptr;
2953 
2954 public:
2955  CommonActionTy(llvm::Value *EnterCallee, ArrayRef<llvm::Value *> EnterArgs,
2956  llvm::Value *ExitCallee, ArrayRef<llvm::Value *> ExitArgs,
2957  bool Conditional = false)
2958  : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2959  ExitArgs(ExitArgs), Conditional(Conditional) {}
2960  void Enter(CodeGenFunction &CGF) override {
2961  llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2962  if (Conditional) {
2963  llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2964  auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2965  ContBlock = CGF.createBasicBlock("omp_if.end");
2966  // Generate the branch (If-stmt)
2967  CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2968  CGF.EmitBlock(ThenBlock);
2969  }
2970  }
2971  void Done(CodeGenFunction &CGF) {
2972  // Emit the rest of blocks/branches
2973  CGF.EmitBranch(ContBlock);
2974  CGF.EmitBlock(ContBlock, true);
2975  }
2976  void Exit(CodeGenFunction &CGF) override {
2977  CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2978  }
2979 };
2980 } // anonymous namespace
2981 
2983  StringRef CriticalName,
2984  const RegionCodeGenTy &CriticalOpGen,
2985  SourceLocation Loc, const Expr *Hint) {
2986  // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2987  // CriticalOpGen();
2988  // __kmpc_end_critical(ident_t *, gtid, Lock);
2989  // Prepare arguments and build a call to __kmpc_critical
2990  if (!CGF.HaveInsertPoint())
2991  return;
2992  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2993  getCriticalRegionLock(CriticalName)};
2994  llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2995  std::end(Args));
2996  if (Hint) {
2997  EnterArgs.push_back(CGF.Builder.CreateIntCast(
2998  CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
2999  }
3000  CommonActionTy Action(
3004  CriticalOpGen.setAction(Action);
3005  emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
3006 }
3007 
3009  const RegionCodeGenTy &MasterOpGen,
3010  SourceLocation Loc) {
3011  if (!CGF.HaveInsertPoint())
3012  return;
3013  // if(__kmpc_master(ident_t *, gtid)) {
3014  // MasterOpGen();
3015  // __kmpc_end_master(ident_t *, gtid);
3016  // }
3017  // Prepare arguments and build a call to __kmpc_master
3018  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3019  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
3021  /*Conditional=*/true);
3022  MasterOpGen.setAction(Action);
3023  emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
3024  Action.Done(CGF);
3025 }
3026 
3028  SourceLocation Loc) {
3029  if (!CGF.HaveInsertPoint())
3030  return;
3031  // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
3032  llvm::Value *Args[] = {
3033  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3034  llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
3036  if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3037  Region->emitUntiedSwitch(CGF);
3038 }
3039 
3041  const RegionCodeGenTy &TaskgroupOpGen,
3042  SourceLocation Loc) {
3043  if (!CGF.HaveInsertPoint())
3044  return;
3045  // __kmpc_taskgroup(ident_t *, gtid);
3046  // TaskgroupOpGen();
3047  // __kmpc_end_taskgroup(ident_t *, gtid);
3048  // Prepare arguments and build a call to __kmpc_taskgroup
3049  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3050  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
3052  Args);
3053  TaskgroupOpGen.setAction(Action);
3054  emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
3055 }
3056 
3057 /// Given an array of pointers to variables, project the address of a
3058 /// given variable.
3060  unsigned Index, const VarDecl *Var) {
3061  // Pull out the pointer to the variable.
3062  Address PtrAddr =
3063  CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize());
3064  llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
3065 
3066  Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
3067  Addr = CGF.Builder.CreateElementBitCast(
3068  Addr, CGF.ConvertTypeForMem(Var->getType()));
3069  return Addr;
3070 }
3071 
3073  CodeGenModule &CGM, llvm::Type *ArgsType,
3074  ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
3075  ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
3076  SourceLocation Loc) {
3077  ASTContext &C = CGM.getContext();
3078  // void copy_func(void *LHSArg, void *RHSArg);
3079  FunctionArgList Args;
3080  ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3082  ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3084  Args.push_back(&LHSArg);
3085  Args.push_back(&RHSArg);
3086  const auto &CGFI =
3088  std::string Name =
3089  CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
3090  auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
3092  &CGM.getModule());
3093  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
3094  Fn->setDoesNotRecurse();
3095  CodeGenFunction CGF(CGM);
3096  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
3097  // Dest = (void*[n])(LHSArg);
3098  // Src = (void*[n])(RHSArg);
3100  CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
3101  ArgsType), CGF.getPointerAlign());
3103  CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
3104  ArgsType), CGF.getPointerAlign());
3105  // *(Type0*)Dst[0] = *(Type0*)Src[0];
3106  // *(Type1*)Dst[1] = *(Type1*)Src[1];
3107  // ...
3108  // *(Typen*)Dst[n] = *(Typen*)Src[n];
3109  for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
3110  const auto *DestVar =
3111  cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
3112  Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
3113 
3114  const auto *SrcVar =
3115  cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
3116  Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
3117 
3118  const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
3119  QualType Type = VD->getType();
3120  CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
3121  }
3122  CGF.FinishFunction();
3123  return Fn;
3124 }
3125 
3127  const RegionCodeGenTy &SingleOpGen,
3128  SourceLocation Loc,
3129  ArrayRef<const Expr *> CopyprivateVars,
3130  ArrayRef<const Expr *> SrcExprs,
3131  ArrayRef<const Expr *> DstExprs,
3132  ArrayRef<const Expr *> AssignmentOps) {
3133  if (!CGF.HaveInsertPoint())
3134  return;
3135  assert(CopyprivateVars.size() == SrcExprs.size() &&
3136  CopyprivateVars.size() == DstExprs.size() &&
3137  CopyprivateVars.size() == AssignmentOps.size());
3138  ASTContext &C = CGM.getContext();
3139  // int32 did_it = 0;
3140  // if(__kmpc_single(ident_t *, gtid)) {
3141  // SingleOpGen();
3142  // __kmpc_end_single(ident_t *, gtid);
3143  // did_it = 1;
3144  // }
3145  // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3146  // <copy_func>, did_it);
3147 
3148  Address DidIt = Address::invalid();
3149  if (!CopyprivateVars.empty()) {
3150  // int32 did_it = 0;
3151  QualType KmpInt32Ty =
3152  C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3153  DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
3154  CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
3155  }
3156  // Prepare arguments and build a call to __kmpc_single
3157  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3158  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
3160  /*Conditional=*/true);
3161  SingleOpGen.setAction(Action);
3162  emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
3163  if (DidIt.isValid()) {
3164  // did_it = 1;
3165  CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
3166  }
3167  Action.Done(CGF);
3168  // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3169  // <copy_func>, did_it);
3170  if (DidIt.isValid()) {
3171  llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
3172  QualType CopyprivateArrayTy =
3173  C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
3174  /*IndexTypeQuals=*/0);
3175  // Create a list of all private variables for copyprivate.
3176  Address CopyprivateList =
3177  CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
3178  for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
3179  Address Elem = CGF.Builder.CreateConstArrayGEP(
3180  CopyprivateList, I, CGF.getPointerSize());
3181  CGF.Builder.CreateStore(
3183  CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
3184  Elem);
3185  }
3186  // Build function that copies private values from single region to all other
3187  // threads in the corresponding parallel region.
3189  CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
3190  CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
3191  llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
3192  Address CL =
3193  CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
3194  CGF.VoidPtrTy);
3195  llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
3196  llvm::Value *Args[] = {
3197  emitUpdateLocation(CGF, Loc), // ident_t *<loc>
3198  getThreadID(CGF, Loc), // i32 <gtid>
3199  BufSize, // size_t <buf_size>
3200  CL.getPointer(), // void *<copyprivate list>
3201  CpyFn, // void (*) (void *, void *) <copy_func>
3202  DidItVal // i32 did_it
3203  };
3205  }
3206 }
3207 
3209  const RegionCodeGenTy &OrderedOpGen,
3210  SourceLocation Loc, bool IsThreads) {
3211  if (!CGF.HaveInsertPoint())
3212  return;
3213  // __kmpc_ordered(ident_t *, gtid);
3214  // OrderedOpGen();
3215  // __kmpc_end_ordered(ident_t *, gtid);
3216  // Prepare arguments and build a call to __kmpc_ordered
3217  if (IsThreads) {
3218  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3219  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
3221  Args);
3222  OrderedOpGen.setAction(Action);
3223  emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3224  return;
3225  }
3226  emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3227 }
3228 
3230  unsigned Flags;
3231  if (Kind == OMPD_for)
3232  Flags = OMP_IDENT_BARRIER_IMPL_FOR;
3233  else if (Kind == OMPD_sections)
3234  Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
3235  else if (Kind == OMPD_single)
3236  Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
3237  else if (Kind == OMPD_barrier)
3238  Flags = OMP_IDENT_BARRIER_EXPL;
3239  else
3240  Flags = OMP_IDENT_BARRIER_IMPL;
3241  return Flags;
3242 }
3243 
3245  OpenMPDirectiveKind Kind, bool EmitChecks,
3246  bool ForceSimpleCall) {
3247  if (!CGF.HaveInsertPoint())
3248  return;
3249  // Build call __kmpc_cancel_barrier(loc, thread_id);
3250  // Build call __kmpc_barrier(loc, thread_id);
3251  unsigned Flags = getDefaultFlagsForBarriers(Kind);
3252  // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
3253  // thread_id);
3254  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
3255  getThreadID(CGF, Loc)};
3256  if (auto *OMPRegionInfo =
3257  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
3258  if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
3259  llvm::Value *Result = CGF.EmitRuntimeCall(
3261  if (EmitChecks) {
3262  // if (__kmpc_cancel_barrier()) {
3263  // exit from construct;
3264  // }
3265  llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
3266  llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
3267  llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
3268  CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
3269  CGF.EmitBlock(ExitBB);
3270  // exit from construct;
3271  CodeGenFunction::JumpDest CancelDestination =
3272  CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
3273  CGF.EmitBranchThroughCleanup(CancelDestination);
3274  CGF.EmitBlock(ContBB, /*IsFinished=*/true);
3275  }
3276  return;
3277  }
3278  }
3280 }
3281 
3282 /// Map the OpenMP loop schedule to the runtime enumeration.
3284  bool Chunked, bool Ordered) {
3285  switch (ScheduleKind) {
3286  case OMPC_SCHEDULE_static:
3287  return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
3288  : (Ordered ? OMP_ord_static : OMP_sch_static);
3289  case OMPC_SCHEDULE_dynamic:
3291  case OMPC_SCHEDULE_guided:
3293  case OMPC_SCHEDULE_runtime:
3294  return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
3295  case OMPC_SCHEDULE_auto:
3296  return Ordered ? OMP_ord_auto : OMP_sch_auto;
3297  case OMPC_SCHEDULE_unknown:
3298  assert(!Chunked && "chunk was specified but schedule kind not known");
3299  return Ordered ? OMP_ord_static : OMP_sch_static;
3300  }
3301  llvm_unreachable("Unexpected runtime schedule");
3302 }
3303 
3304 /// Map the OpenMP distribute schedule to the runtime enumeration.
3305 static OpenMPSchedType
3307  // only static is allowed for dist_schedule
3309 }
3310 
3312  bool Chunked) const {
3313  OpenMPSchedType Schedule =
3314  getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3315  return Schedule == OMP_sch_static;
3316 }
3317 
3319  OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3320  OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3321  return Schedule == OMP_dist_sch_static;
3322 }
3323 
3325  bool Chunked) const {
3326  OpenMPSchedType Schedule =
3327  getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3328  return Schedule == OMP_sch_static_chunked;
3329 }
3330 
3332  OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3333  OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3334  return Schedule == OMP_dist_sch_static_chunked;
3335 }
3336 
3338  OpenMPSchedType Schedule =
3339  getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
3340  assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
3341  return Schedule != OMP_sch_static;
3342 }
3343 
3347  int Modifier = 0;
3348  switch (M1) {
3349  case OMPC_SCHEDULE_MODIFIER_monotonic:
3350  Modifier = OMP_sch_modifier_monotonic;
3351  break;
3352  case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3353  Modifier = OMP_sch_modifier_nonmonotonic;
3354  break;
3355  case OMPC_SCHEDULE_MODIFIER_simd:
3356  if (Schedule == OMP_sch_static_chunked)
3358  break;
3361  break;
3362  }
3363  switch (M2) {
3364  case OMPC_SCHEDULE_MODIFIER_monotonic:
3365  Modifier = OMP_sch_modifier_monotonic;
3366  break;
3367  case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3368  Modifier = OMP_sch_modifier_nonmonotonic;
3369  break;
3370  case OMPC_SCHEDULE_MODIFIER_simd:
3371  if (Schedule == OMP_sch_static_chunked)
3373  break;
3376  break;
3377  }
3378  return Schedule | Modifier;
3379 }
3380 
3382  CodeGenFunction &CGF, SourceLocation Loc,
3383  const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
3384  bool Ordered, const DispatchRTInput &DispatchValues) {
3385  if (!CGF.HaveInsertPoint())
3386  return;
3388  ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
3389  assert(Ordered ||
3390  (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
3391  Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
3392  Schedule != OMP_sch_static_balanced_chunked));
3393  // Call __kmpc_dispatch_init(
3394  // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
3395  // kmp_int[32|64] lower, kmp_int[32|64] upper,
3396  // kmp_int[32|64] stride, kmp_int[32|64] chunk);
3397 
3398  // If the Chunk was not specified in the clause - use default value 1.
3399  llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
3400  : CGF.Builder.getIntN(IVSize, 1);
3401  llvm::Value *Args[] = {
3402  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3403  CGF.Builder.getInt32(addMonoNonMonoModifier(
3404  Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
3405  DispatchValues.LB, // Lower
3406  DispatchValues.UB, // Upper
3407  CGF.Builder.getIntN(IVSize, 1), // Stride
3408  Chunk // Chunk
3409  };
3410  CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
3411 }
3412 
3414  CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
3415  llvm::Constant *ForStaticInitFunction, OpenMPSchedType Schedule,
3417  const CGOpenMPRuntime::StaticRTInput &Values) {
3418  if (!CGF.HaveInsertPoint())
3419  return;
3420 
3421  assert(!Values.Ordered);
3422  assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
3423  Schedule == OMP_sch_static_balanced_chunked ||
3424  Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
3425  Schedule == OMP_dist_sch_static ||
3426  Schedule == OMP_dist_sch_static_chunked);
3427 
3428  // Call __kmpc_for_static_init(
3429  // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
3430  // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
3431  // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
3432  // kmp_int[32|64] incr, kmp_int[32|64] chunk);
3433  llvm::Value *Chunk = Values.Chunk;
3434  if (Chunk == nullptr) {
3435  assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
3436  Schedule == OMP_dist_sch_static) &&
3437  "expected static non-chunked schedule");
3438  // If the Chunk was not specified in the clause - use default value 1.
3439  Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
3440  } else {
3441  assert((Schedule == OMP_sch_static_chunked ||
3442  Schedule == OMP_sch_static_balanced_chunked ||
3443  Schedule == OMP_ord_static_chunked ||
3444  Schedule == OMP_dist_sch_static_chunked) &&
3445  "expected static chunked schedule");
3446  }
3447  llvm::Value *Args[] = {
3448  UpdateLocation,
3449  ThreadId,
3450  CGF.Builder.getInt32(addMonoNonMonoModifier(Schedule, M1,
3451  M2)), // Schedule type
3452  Values.IL.getPointer(), // &isLastIter
3453  Values.LB.getPointer(), // &LB
3454  Values.UB.getPointer(), // &UB
3455  Values.ST.getPointer(), // &Stride
3456  CGF.Builder.getIntN(Values.IVSize, 1), // Incr
3457  Chunk // Chunk
3458  };
3459  CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
3460 }
3461 
3463  SourceLocation Loc,
3464  OpenMPDirectiveKind DKind,
3465  const OpenMPScheduleTy &ScheduleKind,
3466  const StaticRTInput &Values) {
3467  OpenMPSchedType ScheduleNum = getRuntimeSchedule(
3468  ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
3469  assert(isOpenMPWorksharingDirective(DKind) &&
3470  "Expected loop-based or sections-based directive.");
3471  llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
3472  isOpenMPLoopDirective(DKind)
3473  ? OMP_IDENT_WORK_LOOP
3474  : OMP_IDENT_WORK_SECTIONS);
3475  llvm::Value *ThreadId = getThreadID(CGF, Loc);
3476  llvm::Constant *StaticInitFunction =
3478  emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3479  ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
3480 }
3481 
3483  CodeGenFunction &CGF, SourceLocation Loc,
3484  OpenMPDistScheduleClauseKind SchedKind,
3485  const CGOpenMPRuntime::StaticRTInput &Values) {
3486  OpenMPSchedType ScheduleNum =
3487  getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
3488  llvm::Value *UpdatedLocation =
3489  emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
3490  llvm::Value *ThreadId = getThreadID(CGF, Loc);
3491  llvm::Constant *StaticInitFunction =
3492  createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3493  emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3494  ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
3496 }
3497 
3499  SourceLocation Loc,
3500  OpenMPDirectiveKind DKind) {
3501  if (!CGF.HaveInsertPoint())
3502  return;
3503  // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
3504  llvm::Value *Args[] = {
3505  emitUpdateLocation(CGF, Loc,
3507  ? OMP_IDENT_WORK_DISTRIBUTE
3508  : isOpenMPLoopDirective(DKind)
3509  ? OMP_IDENT_WORK_LOOP
3510  : OMP_IDENT_WORK_SECTIONS),
3511  getThreadID(CGF, Loc)};
3513  Args);
3514 }
3515 
3517  SourceLocation Loc,
3518  unsigned IVSize,
3519  bool IVSigned) {
3520  if (!CGF.HaveInsertPoint())
3521  return;
3522  // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
3523  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3524  CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
3525 }
3526 
3528  SourceLocation Loc, unsigned IVSize,
3529  bool IVSigned, Address IL,
3530  Address LB, Address UB,
3531  Address ST) {
3532  // Call __kmpc_dispatch_next(
3533  // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
3534  // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
3535  // kmp_int[32|64] *p_stride);
3536  llvm::Value *Args[] = {
3537  emitUpdateLocation(CGF, Loc),
3538  getThreadID(CGF, Loc),
3539  IL.getPointer(), // &isLastIter
3540  LB.getPointer(), // &Lower
3541  UB.getPointer(), // &Upper
3542  ST.getPointer() // &Stride
3543  };
3544  llvm::Value *Call =
3545  CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
3546  return CGF.EmitScalarConversion(
3547  Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
3548  CGF.getContext().BoolTy, Loc);
3549 }
3550 
3552  llvm::Value *NumThreads,
3553  SourceLocation Loc) {
3554  if (!CGF.HaveInsertPoint())
3555  return;
3556  // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
3557  llvm::Value *Args[] = {
3558  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3559  CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
3561  Args);
3562 }
3563 
3565  OpenMPProcBindClauseKind ProcBind,
3566  SourceLocation Loc) {
3567  if (!CGF.HaveInsertPoint())
3568  return;
3569  // Constants for proc bind value accepted by the runtime.
3570  enum ProcBindTy {
3571  ProcBindFalse = 0,
3572  ProcBindTrue,
3573  ProcBindMaster,
3574  ProcBindClose,
3575  ProcBindSpread,
3576  ProcBindIntel,
3577  ProcBindDefault
3578  } RuntimeProcBind;
3579  switch (ProcBind) {
3580  case OMPC_PROC_BIND_master:
3581  RuntimeProcBind = ProcBindMaster;
3582  break;
3583  case OMPC_PROC_BIND_close:
3584  RuntimeProcBind = ProcBindClose;
3585  break;
3586  case OMPC_PROC_BIND_spread:
3587  RuntimeProcBind = ProcBindSpread;
3588  break;
3590  llvm_unreachable("Unsupported proc_bind value.");
3591  }
3592  // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
3593  llvm::Value *Args[] = {
3594  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3595  llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
3597 }
3598 
3599 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
3600  SourceLocation Loc) {
3601  if (!CGF.HaveInsertPoint())
3602  return;
3603  // Build call void __kmpc_flush(ident_t *loc)
3605  emitUpdateLocation(CGF, Loc));
3606 }
3607 
3608 namespace {
3609 /// Indexes of fields for type kmp_task_t.
3611  /// List of shared variables.
3612  KmpTaskTShareds,
3613  /// Task routine.
3614  KmpTaskTRoutine,
3615  /// Partition id for the untied tasks.
3616  KmpTaskTPartId,
3617  /// Function with call of destructors for private variables.
3618  Data1,
3619  /// Task priority.
3620  Data2,
3621  /// (Taskloops only) Lower bound.
3622  KmpTaskTLowerBound,
3623  /// (Taskloops only) Upper bound.
3624  KmpTaskTUpperBound,
3625  /// (Taskloops only) Stride.
3626  KmpTaskTStride,
3627  /// (Taskloops only) Is last iteration flag.
3628  KmpTaskTLastIter,
3629  /// (Taskloops only) Reduction data.
3630  KmpTaskTReductions,
3631 };
3632 } // anonymous namespace
3633 
3634 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
3635  return OffloadEntriesTargetRegion.empty() &&
3636  OffloadEntriesDeviceGlobalVar.empty();
3637 }
3638 
3639 /// Initialize target region entry.
3640 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3641  initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3642  StringRef ParentName, unsigned LineNum,
3643  unsigned Order) {
3644  assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3645  "only required for the device "
3646  "code generation.");
3647  OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3648  OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3649  OMPTargetRegionEntryTargetRegion);
3650  ++OffloadingEntriesNum;
3651 }
3652 
3653 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3654  registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3655  StringRef ParentName, unsigned LineNum,
3656  llvm::Constant *Addr, llvm::Constant *ID,
3657  OMPTargetRegionEntryKind Flags) {
3658  // If we are emitting code for a target, the entry is already initialized,
3659  // only has to be registered.
3660  if (CGM.getLangOpts().OpenMPIsDevice) {
3661  if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
3662  unsigned DiagID = CGM.getDiags().getCustomDiagID(
3664  "Unable to find target region on line '%0' in the device code.");
3665  CGM.getDiags().Report(DiagID) << LineNum;
3666  return;
3667  }
3668  auto &Entry =
3669  OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3670  assert(Entry.isValid() && "Entry not initialized!");
3671  Entry.setAddress(Addr);
3672  Entry.setID(ID);
3673  Entry.setFlags(Flags);
3674  } else {
3675  OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3676  OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3677  ++OffloadingEntriesNum;
3678  }
3679 }
3680 
3681 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3682  unsigned DeviceID, unsigned FileID, StringRef ParentName,
3683  unsigned LineNum) const {
3684  auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3685  if (PerDevice == OffloadEntriesTargetRegion.end())
3686  return false;
3687  auto PerFile = PerDevice->second.find(FileID);
3688  if (PerFile == PerDevice->second.end())
3689  return false;
3690  auto PerParentName = PerFile->second.find(ParentName);
3691  if (PerParentName == PerFile->second.end())
3692  return false;
3693  auto PerLine = PerParentName->second.find(LineNum);
3694  if (PerLine == PerParentName->second.end())
3695  return false;
3696  // Fail if this entry is already registered.
3697  if (PerLine->second.getAddress() || PerLine->second.getID())
3698  return false;
3699  return true;
3700 }
3701 
3702 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3703  const OffloadTargetRegionEntryInfoActTy &Action) {
3704  // Scan all target region entries and perform the provided action.
3705  for (const auto &D : OffloadEntriesTargetRegion)
3706  for (const auto &F : D.second)
3707  for (const auto &P : F.second)
3708  for (const auto &L : P.second)
3709  Action(D.first, F.first, P.first(), L.first, L.second);
3710 }
3711 
3712 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3713  initializeDeviceGlobalVarEntryInfo(StringRef Name,
3714  OMPTargetGlobalVarEntryKind Flags,
3715  unsigned Order) {
3716  assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3717  "only required for the device "
3718  "code generation.");
3719  OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3720  ++OffloadingEntriesNum;
3721 }
3722 
3723 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3724  registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3725  CharUnits VarSize,
3726  OMPTargetGlobalVarEntryKind Flags,
3727  llvm::GlobalValue::LinkageTypes Linkage) {
3728  if (CGM.getLangOpts().OpenMPIsDevice) {
3729  auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3730  assert(Entry.isValid() && Entry.getFlags() == Flags &&
3731  "Entry not initialized!");
3732  assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3733  "Resetting with the new address.");
3734  if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName))
3735  return;
3736  Entry.setAddress(Addr);
3737  Entry.setVarSize(VarSize);
3738  Entry.setLinkage(Linkage);
3739  } else {
3740  if (hasDeviceGlobalVarEntryInfo(VarName))
3741  return;
3742  OffloadEntriesDeviceGlobalVar.try_emplace(
3743  VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3744  ++OffloadingEntriesNum;
3745  }
3746 }
3747 
3748 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3749  actOnDeviceGlobalVarEntriesInfo(
3750  const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3751  // Scan all target region entries and perform the provided action.
3752  for (const auto &E : OffloadEntriesDeviceGlobalVar)
3753  Action(E.getKey(), E.getValue());
3754 }
3755 
3756 llvm::Function *
3758  // If we don't have entries or if we are emitting code for the device, we
3759  // don't need to do anything.
3760  if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty())
3761  return nullptr;
3762 
3763  llvm::Module &M = CGM.getModule();
3764  ASTContext &C = CGM.getContext();
3765 
3766  // Get list of devices we care about
3767  const std::vector<llvm::Triple> &Devices = CGM.getLangOpts().OMPTargetTriples;
3768 
3769  // We should be creating an offloading descriptor only if there are devices
3770  // specified.
3771  assert(!Devices.empty() && "No OpenMP offloading devices??");
3772 
3773  // Create the external variables that will point to the begin and end of the
3774  // host entries section. These will be defined by the linker.
3775  llvm::Type *OffloadEntryTy =
3777  std::string EntriesBeginName = getName({"omp_offloading", "entries_begin"});
3778  auto *HostEntriesBegin = new llvm::GlobalVariable(
3779  M, OffloadEntryTy, /*isConstant=*/true,
3780  llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
3781  EntriesBeginName);
3782  std::string EntriesEndName = getName({"omp_offloading", "entries_end"});
3783  auto *HostEntriesEnd =
3784  new llvm::GlobalVariable(M, OffloadEntryTy, /*isConstant=*/true,
3786  /*Initializer=*/nullptr, EntriesEndName);
3787 
3788  // Create all device images
3789  auto *DeviceImageTy = cast<llvm::StructType>(
3791  ConstantInitBuilder DeviceImagesBuilder(CGM);
3792  ConstantArrayBuilder DeviceImagesEntries =
3793  DeviceImagesBuilder.beginArray(DeviceImageTy);
3794 
3795  for (const llvm::Triple &Device : Devices) {
3796  StringRef T = Device.getTriple();
3797  std::string BeginName = getName({"omp_offloading", "img_start", ""});
3798  auto *ImgBegin = new llvm::GlobalVariable(
3799  M, CGM.Int8Ty, /*isConstant=*/true,
3800  llvm::GlobalValue::ExternalWeakLinkage,
3801  /*Initializer=*/nullptr, Twine(BeginName).concat(T));
3802  std::string EndName = getName({"omp_offloading", "img_end", ""});
3803  auto *ImgEnd = new llvm::GlobalVariable(
3804  M, CGM.Int8Ty, /*isConstant=*/true,
3805  llvm::GlobalValue::ExternalWeakLinkage,
3806  /*Initializer=*/nullptr, Twine(EndName).concat(T));
3807 
3808  llvm::Constant *Data[] = {ImgBegin, ImgEnd, HostEntriesBegin,
3809  HostEntriesEnd};
3811  DeviceImagesEntries);
3812  }
3813 
3814  // Create device images global array.
3815  std::string ImagesName = getName({"omp_offloading", "device_images"});
3816  llvm::GlobalVariable *DeviceImages =
3817  DeviceImagesEntries.finishAndCreateGlobal(ImagesName,
3818  CGM.getPointerAlign(),
3819  /*isConstant=*/true);
3820  DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3821 
3822  // This is a Zero array to be used in the creation of the constant expressions
3823  llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty),
3824  llvm::Constant::getNullValue(CGM.Int32Ty)};
3825 
3826  // Create the target region descriptor.
3827  llvm::Constant *Data[] = {
3828  llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()),
3829  llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(),
3830  DeviceImages, Index),
3831  HostEntriesBegin, HostEntriesEnd};
3832  std::string Descriptor = getName({"omp_offloading", "descriptor"});
3833  llvm::GlobalVariable *Desc = createGlobalStruct(
3834  CGM, getTgtBinaryDescriptorQTy(), /*IsConstant=*/true, Data, Descriptor);
3835 
3836  // Emit code to register or unregister the descriptor at execution
3837  // startup or closing, respectively.
3838 
3839  llvm::Function *UnRegFn;
3840  {
3841  FunctionArgList Args;
3843  Args.push_back(&DummyPtr);
3844 
3845  CodeGenFunction CGF(CGM);
3846  // Disable debug info for global (de-)initializer because they are not part
3847  // of some particular construct.
3848  CGF.disableDebugInfo();
3849  const auto &FI =
3851  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
3852  std::string UnregName = getName({"omp_offloading", "descriptor_unreg"});
3853  UnRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, UnregName, FI);
3854  CGF.StartFunction(GlobalDecl(), C.VoidTy, UnRegFn, FI, Args);
3856  Desc);
3857  CGF.FinishFunction();
3858  }
3859  llvm::Function *RegFn;
3860  {
3861  CodeGenFunction CGF(CGM);
3862  // Disable debug info for global (de-)initializer because they are not part
3863  // of some particular construct.
3864  CGF.disableDebugInfo();
3865  const auto &FI = CGM.getTypes().arrangeNullaryFunction();
3866  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
3867 
3868  // Encode offload target triples into the registration function name. It
3869  // will serve as a comdat key for the registration/unregistration code for
3870  // this particular combination of offloading targets.
3871  SmallVector<StringRef, 4U> RegFnNameParts(Devices.size() + 2U);
3872  RegFnNameParts[0] = "omp_offloading";
3873  RegFnNameParts[1] = "descriptor_reg";
3874  llvm::transform(Devices, std::next(RegFnNameParts.begin(), 2),
3875  [](const llvm::Triple &T) -> const std::string& {
3876  return T.getTriple();
3877  });
3878  llvm::sort(std::next(RegFnNameParts.begin(), 2), RegFnNameParts.end());
3879  std::string Descriptor = getName(RegFnNameParts);
3880  RegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, Descriptor, FI);
3881  CGF.StartFunction(GlobalDecl(), C.VoidTy, RegFn, FI, FunctionArgList());
3883  // Create a variable to drive the registration and unregistration of the
3884  // descriptor, so we can reuse the logic that emits Ctors and Dtors.
3885  ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(),
3886  SourceLocation(), nullptr, C.CharTy,
3888  CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
3889  CGF.FinishFunction();
3890  }
3891  if (CGM.supportsCOMDAT()) {
3892  // It is sufficient to call registration function only once, so create a
3893  // COMDAT group for registration/unregistration functions and associated
3894  // data. That would reduce startup time and code size. Registration
3895  // function serves as a COMDAT group key.
3896  llvm::Comdat *ComdatKey = M.getOrInsertComdat(RegFn->getName());
3897  RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
3898  RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility);
3899  RegFn->setComdat(ComdatKey);
3900  UnRegFn->setComdat(ComdatKey);
3901  DeviceImages->setComdat(ComdatKey);
3902  Desc->setComdat(ComdatKey);
3903  }
3904  return RegFn;
3905 }
3906 
3908  llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3909  llvm::GlobalValue::LinkageTypes Linkage) {
3910  StringRef Name = Addr->getName();
3911  llvm::Module &M = CGM.getModule();
3912  llvm::LLVMContext &C = M.getContext();
3913 
3914  // Create constant string with the name.
3915  llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3916 
3917  std::string StringName = getName({"omp_offloading", "entry_name"});
3918  auto *Str = new llvm::GlobalVariable(
3919  M, StrPtrInit->getType(), /*isConstant=*/true,
3920  llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3921  Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3922 
3923  llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
3924  llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
3925  llvm::ConstantInt::get(CGM.SizeTy, Size),
3926  llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3927  llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3928  std::string EntryName = getName({"omp_offloading", "entry", ""});
3929  llvm::GlobalVariable *Entry = createGlobalStruct(
3930  CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3931  Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3932 
3933  // The entry has to be created in the section the linker expects it to be.
3934  std::string Section = getName({"omp_offloading", "entries"});
3935  Entry->setSection(Section);
3936 }
3937 
3939  // Emit the offloading entries and metadata so that the device codegen side
3940  // can easily figure out what to emit. The produced metadata looks like
3941  // this:
3942  //
3943  // !omp_offload.info = !{!1, ...}
3944  //
3945  // Right now we only generate metadata for function that contain target
3946  // regions.
3947 
3948  // If we do not have entries, we don't need to do anything.
3950  return;
3951 
3952  llvm::Module &M = CGM.getModule();
3953  llvm::LLVMContext &C = M.getContext();
3955  OrderedEntries(OffloadEntriesInfoManager.size());
3956  llvm::SmallVector<StringRef, 16> ParentFunctions(
3958 
3959  // Auxiliary methods to create metadata values and strings.
3960  auto &&GetMDInt = [this](unsigned V) {
3961  return llvm::ConstantAsMetadata::get(
3962  llvm::ConstantInt::get(CGM.Int32Ty, V));
3963  };
3964 
3965  auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3966 
3967  // Create the offloading info metadata node.
3968  llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3969 
3970  // Create function that emits metadata for each target region entry;
3971  auto &&TargetRegionMetadataEmitter =
3972  [&C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, &GetMDString](
3973  unsigned DeviceID, unsigned FileID, StringRef ParentName,
3974  unsigned Line,
3976  // Generate metadata for target regions. Each entry of this metadata
3977  // contains:
3978  // - Entry 0 -> Kind of this type of metadata (0).
3979  // - Entry 1 -> Device ID of the file where the entry was identified.
3980  // - Entry 2 -> File ID of the file where the entry was identified.
3981  // - Entry 3 -> Mangled name of the function where the entry was
3982  // identified.
3983  // - Entry 4 -> Line in the file where the entry was identified.
3984  // - Entry 5 -> Order the entry was created.
3985  // The first element of the metadata node is the kind.
3986  llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3987  GetMDInt(FileID), GetMDString(ParentName),
3988  GetMDInt(Line), GetMDInt(E.getOrder())};
3989 
3990  // Save this entry in the right position of the ordered entries array.
3991  OrderedEntries[E.getOrder()] = &E;
3992  ParentFunctions[E.getOrder()] = ParentName;
3993 
3994  // Add metadata to the named metadata node.
3995  MD->addOperand(llvm::MDNode::get(C, Ops));
3996  };
3997 
3999  TargetRegionMetadataEmitter);
4000 
4001  // Create function that emits metadata for each device global variable entry;
4002  auto &&DeviceGlobalVarMetadataEmitter =
4003  [&C, &OrderedEntries, &GetMDInt, &GetMDString,
4004  MD](StringRef MangledName,
4006  &E) {
4007  // Generate metadata for global variables. Each entry of this metadata
4008  // contains:
4009  // - Entry 0 -> Kind of this type of metadata (1).
4010  // - Entry 1 -> Mangled name of the variable.
4011  // - Entry 2 -> Declare target kind.
4012  // - Entry 3 -> Order the entry was created.
4013  // The first element of the metadata node is the kind.
4014  llvm::Metadata *Ops[] = {
4015  GetMDInt(E.getKind()), GetMDString(MangledName),
4016  GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
4017 
4018  // Save this entry in the right position of the ordered entries array.
4019  OrderedEntries[E.getOrder()] = &E;
4020 
4021  // Add metadata to the named metadata node.
4022  MD->addOperand(llvm::MDNode::get(C, Ops));
4023  };
4024 
4026  DeviceGlobalVarMetadataEmitter);
4027 
4028  for (const auto *E : OrderedEntries) {
4029  assert(E && "All ordered entries must exist!");
4030  if (const auto *CE =
4031  dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
4032  E)) {
4033  if (!CE->getID() || !CE->getAddress()) {
4034  // Do not blame the entry if the parent funtion is not emitted.
4035  StringRef FnName = ParentFunctions[CE->getOrder()];
4036  if (!CGM.GetGlobalValue(FnName))
4037  continue;
4038  unsigned DiagID = CGM.getDiags().getCustomDiagID(
4040  "Offloading entry for target region is incorrect: either the "
4041  "address or the ID is invalid.");
4042  CGM.getDiags().Report(DiagID);
4043  continue;
4044  }
4045  createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
4046  CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
4047  } else if (const auto *CE =
4048  dyn_cast<OffloadEntriesInfoManagerTy::
4049  OffloadEntryInfoDeviceGlobalVar>(E)) {
4052  CE->getFlags());
4053  switch (Flags) {
4055  if (!CE->getAddress()) {
4056  unsigned DiagID = CGM.getDiags().getCustomDiagID(
4058  "Offloading entry for declare target variable is incorrect: the "
4059  "address is invalid.");
4060  CGM.getDiags().Report(DiagID);
4061  continue;
4062  }
4063  // The vaiable has no definition - no need to add the entry.
4064  if (CE->getVarSize().isZero())
4065  continue;
4066  break;
4067  }
4069  assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
4070  (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
4071  "Declaret target link address is set.");
4072  if (CGM.getLangOpts().OpenMPIsDevice)
4073  continue;
4074  if (!CE->getAddress()) {
4075  unsigned DiagID = CGM.getDiags().getCustomDiagID(
4077  "Offloading entry for declare target variable is incorrect: the "
4078  "address is invalid.");
4079  CGM.getDiags().Report(DiagID);
4080  continue;
4081  }
4082  break;
4083  }
4084  createOffloadEntry(CE->getAddress(), CE->getAddress(),
4085  CE->getVarSize().getQuantity(), Flags,
4086  CE->getLinkage());
4087  } else {
4088  llvm_unreachable("Unsupported entry kind.");
4089  }
4090  }
4091 }
4092 
4093 /// Loads all the offload entries information from the host IR
4094 /// metadata.
4096  // If we are in target mode, load the metadata from the host IR. This code has
4097  // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
4098 
4099  if (!CGM.getLangOpts().OpenMPIsDevice)
4100  return;
4101 
4102  if (CGM.getLangOpts().OMPHostIRFile.empty())
4103  return;
4104 
4105  auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
4106  if (auto EC = Buf.getError()) {
4107  CGM.getDiags().Report(diag::err_cannot_open_file)
4108  << CGM.getLangOpts().OMPHostIRFile << EC.message();
4109  return;
4110  }
4111 
4112  llvm::LLVMContext C;
4113  auto ME = expectedToErrorOrAndEmitErrors(
4114  C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
4115 
4116  if (auto EC = ME.getError()) {
4117  unsigned DiagID = CGM.getDiags().getCustomDiagID(
4118  DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
4119  CGM.getDiags().Report(DiagID)
4120  << CGM.getLangOpts().OMPHostIRFile << EC.message();
4121  return;
4122  }
4123 
4124  llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
4125  if (!MD)
4126  return;
4127 
4128  for (llvm::MDNode *MN : MD->operands()) {
4129  auto &&GetMDInt = [MN](unsigned Idx) {
4130  auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
4131  return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
4132  };
4133 
4134  auto &&GetMDString = [MN](unsigned Idx) {
4135  auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
4136  return V->getString();
4137  };
4138 
4139  switch (GetMDInt(0)) {
4140  default:
4141  llvm_unreachable("Unexpected metadata!");
4142  break;
4146  /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
4147  /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
4148  /*Order=*/GetMDInt(5));
4149  break;
4153  /*MangledName=*/GetMDString(1),
4154  static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4155  /*Flags=*/GetMDInt(2)),
4156  /*Order=*/GetMDInt(3));
4157  break;
4158  }
4159  }
4160 }
4161 
4163  if (!KmpRoutineEntryPtrTy) {
4164  // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
4165  ASTContext &C = CGM.getContext();
4166  QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
4168  KmpRoutineEntryPtrQTy = C.getPointerType(
4169  C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
4170  KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
4171  }
4172 }
4173 
4175  // Make sure the type of the entry is already created. This is the type we
4176  // have to create:
4177  // struct __tgt_offload_entry{
4178  // void *addr; // Pointer to the offload entry info.
4179  // // (function or global)
4180  // char *name; // Name of the function or global.
4181  // size_t size; // Size of the entry info (0 if it a function).
4182  // int32_t flags; // Flags associated with the entry, e.g. 'link'.
4183  // int32_t reserved; // Reserved, to use by the runtime library.
4184  // };
4185  if (TgtOffloadEntryQTy.isNull()) {
4186  ASTContext &C = CGM.getContext();
4187  RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
4188  RD->startDefinition();
4189  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4191  addFieldToRecordDecl(C, RD, C.getSizeType());
4193  C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4195  C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4196  RD->completeDefinition();
4197  RD->addAttr(PackedAttr::CreateImplicit(C));
4199  }
4200  return TgtOffloadEntryQTy;
4201 }
4202 
4204  // These are the types we need to build:
4205  // struct __tgt_device_image{
4206  // void *ImageStart; // Pointer to the target code start.
4207  // void *ImageEnd; // Pointer to the target code end.
4208  // // We also add the host entries to the device image, as it may be useful
4209  // // for the target runtime to have access to that information.
4210  // __tgt_offload_entry *EntriesBegin; // Begin of the table with all
4211  // // the entries.
4212  // __tgt_offload_entry *EntriesEnd; // End of the table with all the
4213  // // entries (non inclusive).
4214  // };
4215  if (TgtDeviceImageQTy.isNull()) {
4216  ASTContext &C = CGM.getContext();
4217  RecordDecl *RD = C.buildImplicitRecord("__tgt_device_image");
4218  RD->startDefinition();
4219  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4220  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4223  RD->completeDefinition();
4225  }
4226  return TgtDeviceImageQTy;
4227 }
4228 
4230  // struct __tgt_bin_desc{
4231  // int32_t NumDevices; // Number of devices supported.
4232  // __tgt_device_image *DeviceImages; // Arrays of device images
4233  // // (one per device).
4234  // __tgt_offload_entry *EntriesBegin; // Begin of the table with all the
4235  // // entries.
4236  // __tgt_offload_entry *EntriesEnd; // End of the table with all the
4237  // // entries (non inclusive).
4238  // };
4240  ASTContext &C = CGM.getContext();
4241  RecordDecl *RD = C.buildImplicitRecord("__tgt_bin_desc");
4242  RD->startDefinition();
4244  C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4248  RD->completeDefinition();
4250  }
4251  return TgtBinaryDescriptorQTy;
4252 }
4253 
4254 namespace {
4255 struct PrivateHelpersTy {
4256  PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
4257  const VarDecl *PrivateElemInit)
4258  : Original(Original), PrivateCopy(PrivateCopy),
4259  PrivateElemInit(PrivateElemInit) {}
4260  const VarDecl *Original;
4261  const VarDecl *PrivateCopy;
4262  const VarDecl *PrivateElemInit;
4263 };
4264 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
4265 } // anonymous namespace
4266 
4267 static RecordDecl *
4269  if (!Privates.empty()) {
4270  ASTContext &C = CGM.getContext();
4271  // Build struct .kmp_privates_t. {
4272  // /* private vars */
4273  // };
4274  RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
4275  RD->startDefinition();
4276  for (const auto &Pair : Privates) {
4277  const VarDecl *VD = Pair.second.Original;
4278  QualType Type = VD->getType().getNonReferenceType();
4279  FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
4280  if (VD->hasAttrs()) {
4281  for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
4282  E(VD->getAttrs().end());
4283  I != E; ++I)
4284  FD->addAttr(*I);
4285  }
4286  }
4287  RD->completeDefinition();
4288  return RD;
4289  }
4290  return nullptr;
4291 }
4292 
4293 static RecordDecl *
4295  QualType KmpInt32Ty,
4296  QualType KmpRoutineEntryPointerQTy) {
4297  ASTContext &C = CGM.getContext();
4298  // Build struct kmp_task_t {
4299  // void * shareds;
4300  // kmp_routine_entry_t routine;
4301  // kmp_int32 part_id;
4302  // kmp_cmplrdata_t data1;
4303  // kmp_cmplrdata_t data2;
4304  // For taskloops additional fields:
4305  // kmp_uint64 lb;
4306  // kmp_uint64 ub;
4307  // kmp_int64 st;
4308  // kmp_int32 liter;
4309  // void * reductions;
4310  // };
4311  RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
4312  UD->startDefinition();
4313  addFieldToRecordDecl(C, UD, KmpInt32Ty);
4314  addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
4315  UD->completeDefinition();
4316  QualType KmpCmplrdataTy = C.getRecordType(UD);
4317  RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
4318  RD->startDefinition();
4319  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4320  addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
4321  addFieldToRecordDecl(C, RD, KmpInt32Ty);
4322  addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4323  addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4324  if (isOpenMPTaskLoopDirective(Kind)) {
4325  QualType KmpUInt64Ty =
4326  CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
4327  QualType KmpInt64Ty =
4328  CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
4329  addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4330  addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4331  addFieldToRecordDecl(C, RD, KmpInt64Ty);
4332  addFieldToRecordDecl(C, RD, KmpInt32Ty);
4333  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4334  }
4335  RD->completeDefinition();
4336  return RD;
4337 }
4338 
4339 static RecordDecl *
4341  ArrayRef<PrivateDataTy> Privates) {
4342  ASTContext &C = CGM.getContext();
4343  // Build struct kmp_task_t_with_privates {
4344  // kmp_task_t task_data;
4345  // .kmp_privates_t. privates;
4346  // };
4347  RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
4348  RD->startDefinition();
4349  addFieldToRecordDecl(C, RD, KmpTaskTQTy);
4350  if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
4351  addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
4352  RD->completeDefinition();
4353  return RD;
4354 }
4355 
4356 /// Emit a proxy function which accepts kmp_task_t as the second
4357 /// argument.
4358 /// \code
4359 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
4360 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
4361 /// For taskloops:
4362 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4363 /// tt->reductions, tt->shareds);
4364 /// return 0;
4365 /// }
4366 /// \endcode
4367 static llvm::Value *
4369  OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
4370  QualType KmpTaskTWithPrivatesPtrQTy,
4371  QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
4372  QualType SharedsPtrTy, llvm::Value *TaskFunction,
4373  llvm::Value *TaskPrivatesMap) {
4374  ASTContext &C = CGM.getContext();
4375  FunctionArgList Args;
4376  ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4378  ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4379  KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4381  Args.push_back(&GtidArg);
4382  Args.push_back(&TaskTypeArg);
4383  const auto &TaskEntryFnInfo =
4384  CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4385  llvm::FunctionType *TaskEntryTy =
4386  CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
4387  std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
4388  auto *TaskEntry = llvm::Function::Create(
4389  TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4390  CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
4391  TaskEntry->setDoesNotRecurse();
4392  CodeGenFunction CGF(CGM);
4393  CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
4394  Loc, Loc);
4395 
4396  // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
4397  // tt,
4398  // For taskloops:
4399  // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4400  // tt->task_data.shareds);
4401  llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
4402  CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
4403  LValue TDBase = CGF.EmitLoadOfPointerLValue(
4404  CGF.GetAddrOfLocalVar(&TaskTypeArg),
4405  KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4406  const auto *KmpTaskTWithPrivatesQTyRD =
4407  cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4408  LValue Base =
4409  CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4410  const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4411  auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4412  LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
4413  llvm::Value *PartidParam = PartIdLVal.getPointer();
4414 
4415  auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
4416  LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
4418  CGF.EmitLoadOfScalar(SharedsLVal, Loc),
4419  CGF.ConvertTypeForMem(SharedsPtrTy));
4420 
4421  auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4422  llvm::Value *PrivatesParam;
4423  if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
4424  LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
4425  PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4426  PrivatesLVal.getPointer(), CGF.VoidPtrTy);
4427  } else {
4428  PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4429  }
4430 
4431  llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
4432  TaskPrivatesMap,
4433  CGF.Builder
4435  TDBase.getAddress(), CGF.VoidPtrTy)
4436  .getPointer()};
4437  SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
4438  std::end(CommonArgs));
4439  if (isOpenMPTaskLoopDirective(Kind)) {
4440  auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
4441  LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
4442  llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
4443  auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
4444  LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
4445  llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
4446  auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
4447  LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
4448  llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
4449  auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4450  LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4451  llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
4452  auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
4453  LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
4454  llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
4455  CallArgs.push_back(LBParam);
4456  CallArgs.push_back(UBParam);
4457  CallArgs.push_back(StParam);
4458  CallArgs.push_back(LIParam);
4459  CallArgs.push_back(RParam);
4460  }
4461  CallArgs.push_back(SharedsParam);
4462 
4463  CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
4464  CallArgs);
4465  CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
4466  CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
4467  CGF.FinishFunction();
4468  return TaskEntry;
4469 }
4470 
4472  SourceLocation Loc,
4473  QualType KmpInt32Ty,
4474  QualType KmpTaskTWithPrivatesPtrQTy,
4475  QualType KmpTaskTWithPrivatesQTy) {
4476  ASTContext &C = CGM.getContext();
4477  FunctionArgList Args;
4478  ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4480  ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4481  KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4483  Args.push_back(&GtidArg);
4484  Args.push_back(&TaskTypeArg);
4485  const auto &DestructorFnInfo =
4486  CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4487  llvm::FunctionType *DestructorFnTy =
4488  CGM.getTypes().GetFunctionType(DestructorFnInfo);
4489  std::string Name =
4490  CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
4491  auto *DestructorFn =
4493  Name, &CGM.getModule());
4494  CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
4495  DestructorFnInfo);
4496  DestructorFn->setDoesNotRecurse();
4497  CodeGenFunction CGF(CGM);
4498  CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
4499  Args, Loc, Loc);
4500 
4502  CGF.GetAddrOfLocalVar(&TaskTypeArg),
4503  KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4504  const auto *KmpTaskTWithPrivatesQTyRD =
4505  cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4506  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4507  Base = CGF.EmitLValueForField(Base, *FI);
4508  for (const auto *Field :
4509  cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
4510  if (QualType::DestructionKind DtorKind =
4511  Field->getType().isDestructedType()) {
4512  LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
4513  CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
4514  }
4515  }
4516  CGF.FinishFunction();
4517  return DestructorFn;
4518 }
4519 
4520 /// Emit a privates mapping function for correct handling of private and
4521 /// firstprivate variables.
4522 /// \code
4523 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
4524 /// **noalias priv1,..., <tyn> **noalias privn) {
4525 /// *priv1 = &.privates.priv1;
4526 /// ...;
4527 /// *privn = &.privates.privn;
4528 /// }
4529 /// \endcode
4530 static llvm::Value *
4532  ArrayRef<const Expr *> PrivateVars,
4533  ArrayRef<const Expr *> FirstprivateVars,
4534  ArrayRef<const Expr *> LastprivateVars,
4535  QualType PrivatesQTy,
4536  ArrayRef<PrivateDataTy> Privates) {
4537  ASTContext &C = CGM.getContext();
4538  FunctionArgList Args;
4539  ImplicitParamDecl TaskPrivatesArg(
4540  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4541  C.getPointerType(PrivatesQTy).withConst().withRestrict(),
4543  Args.push_back(&TaskPrivatesArg);
4544  llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
4545  unsigned Counter = 1;
4546  for (const Expr *E : PrivateVars) {
4547  Args.push_back(ImplicitParamDecl::Create(
4548  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4550  .withConst()
4551  .withRestrict(),
4553  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4554  PrivateVarsPos[VD] = Counter;
4555  ++Counter;
4556  }
4557  for (const Expr *E : FirstprivateVars) {
4558  Args.push_back(ImplicitParamDecl::Create(
4559  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4561  .withConst()
4562  .withRestrict(),
4564  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4565  PrivateVarsPos[VD] = Counter;
4566  ++Counter;
4567  }
4568  for (const Expr *E : LastprivateVars) {
4569  Args.push_back(ImplicitParamDecl::Create(
4570  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4572  .withConst()
4573  .withRestrict(),
4575  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4576  PrivateVarsPos[VD] = Counter;
4577  ++Counter;
4578  }
4579  const auto &TaskPrivatesMapFnInfo =
4581  llvm::FunctionType *TaskPrivatesMapTy =
4582  CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
4583  std::string Name =
4584  CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
4585  auto *TaskPrivatesMap = llvm::Function::Create(
4586  TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
4587  &CGM.getModule());
4588  CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
4589  TaskPrivatesMapFnInfo);
4590  TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
4591  TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
4592  TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
4593  CodeGenFunction CGF(CGM);
4594  CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
4595  TaskPrivatesMapFnInfo, Args, Loc, Loc);
4596 
4597  // *privi = &.privates.privi;
4599  CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
4600  TaskPrivatesArg.getType()->castAs<PointerType>());
4601  const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
4602  Counter = 0;
4603  for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
4604  LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
4605  const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
4606  LValue RefLVal =
4607  CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
4608  LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
4609  RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
4610  CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
4611  ++Counter;
4612  }
4613  CGF.FinishFunction();
4614  return TaskPrivatesMap;
4615 }
4616 
4617 static bool stable_sort_comparator(const PrivateDataTy P1,
4618  const PrivateDataTy P2) {
4619  return P1.first > P2.first;
4620 }
4621 
4622 /// Emit initialization for private variables in task-based directives.
4624  const OMPExecutableDirective &D,
4625  Address KmpTaskSharedsPtr, LValue TDBase,
4626  const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4627  QualType SharedsTy, QualType SharedsPtrTy,
4628  const OMPTaskDataTy &Data,
4629  ArrayRef<PrivateDataTy> Privates, bool ForDup) {
4630  ASTContext &C = CGF.getContext();
4631  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4632  LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
4634  ? OMPD_taskloop
4635  : OMPD_task;
4636  const CapturedStmt &CS = *D.getCapturedStmt(Kind);
4637  CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
4638  LValue SrcBase;
4639  bool IsTargetTask =
4642  // For target-based directives skip 3 firstprivate arrays BasePointersArray,
4643  // PointersArray and SizesArray. The original variables for these arrays are
4644  // not captured and we get their addresses explicitly.
4645  if ((!IsTargetTask && !Data.FirstprivateVars.empty()) ||
4646  (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
4647  SrcBase = CGF.MakeAddrLValue(
4649  KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
4650  SharedsTy);
4651  }
4652  FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
4653  for (const PrivateDataTy &Pair : Privates) {
4654  const VarDecl *VD = Pair.second.PrivateCopy;
4655  const Expr *Init = VD->getAnyInitializer();
4656  if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
4657  !CGF.isTrivialInitializer(Init)))) {
4658  LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
4659  if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
4660  const VarDecl *OriginalVD = Pair.second.Original;
4661  // Check if the variable is the target-based BasePointersArray,
4662  // PointersArray or SizesArray.
4663  LValue SharedRefLValue;
4664  QualType Type = OriginalVD->getType();
4665  const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
4666  if (IsTargetTask && !SharedField) {
4667  assert(isa<ImplicitParamDecl>(OriginalVD) &&
4668  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
4669  cast<CapturedDecl>(OriginalVD->getDeclContext())
4670  ->getNumParams() == 0 &&
4671  isa<TranslationUnitDecl>(
4672  cast<CapturedDecl>(OriginalVD->getDeclContext())
4673  ->getDeclContext()) &&
4674  "Expected artificial target data variable.");
4675  SharedRefLValue =
4676  CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
4677  } else {
4678  SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
4679  SharedRefLValue = CGF.MakeAddrLValue(
4680  Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
4681  SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
4682  SharedRefLValue.getTBAAInfo());
4683  }
4684  if (Type->isArrayType()) {
4685  // Initialize firstprivate array.
4686  if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
4687  // Perform simple memcpy.
4688  CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
4689  } else {
4690  // Initialize firstprivate array using element-by-element
4691  // initialization.
4693  PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
4694  [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
4695  Address SrcElement) {
4696  // Clean up any temporaries needed by the initialization.
4697  CodeGenFunction::OMPPrivateScope InitScope(CGF);
4698  InitScope.addPrivate(
4699  Elem, [SrcElement]() -> Address { return SrcElement; });
4700  (void)InitScope.Privatize();
4701  // Emit initialization for single element.
4703  CGF, &CapturesInfo);
4704  CGF.EmitAnyExprToMem(Init, DestElement,
4705  Init->getType().getQualifiers(),
4706  /*IsInitializer=*/false);
4707  });
4708  }
4709  } else {
4710  CodeGenFunction::OMPPrivateScope InitScope(CGF);
4711  InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
4712  return SharedRefLValue.getAddress();
4713  });
4714  (void)InitScope.Privatize();
4715  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
4716  CGF.EmitExprAsInit(Init, VD, PrivateLValue,
4717  /*capturedByInit=*/false);
4718  }
4719  } else {
4720  CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
4721  }
4722  }
4723  ++FI;
4724  }
4725 }
4726 
4727 /// Check if duplication function is required for taskloops.
4729  ArrayRef<PrivateDataTy> Privates) {
4730  bool InitRequired = false;
4731  for (const PrivateDataTy &Pair : Privates) {
4732  const VarDecl *VD = Pair.second.PrivateCopy;
4733  const Expr *Init = VD->getAnyInitializer();
4734  InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
4735  !CGF.isTrivialInitializer(Init));
4736  if (InitRequired)
4737  break;
4738  }
4739  return InitRequired;
4740 }
4741 
4742 
4743 /// Emit task_dup function (for initialization of
4744 /// private/firstprivate/lastprivate vars and last_iter flag)
4745 /// \code
4746 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
4747 /// lastpriv) {
4748 /// // setup lastprivate flag
4749 /// task_dst->last = lastpriv;
4750 /// // could be constructor calls here...
4751 /// }
4752 /// \endcode
4753 static llvm::Value *
4755  const OMPExecutableDirective &D,
4756  QualType KmpTaskTWithPrivatesPtrQTy,
4757  const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4758  const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4759  QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4760  ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4761  ASTContext &C = CGM.getContext();
4762  FunctionArgList Args;
4763  ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4764  KmpTaskTWithPrivatesPtrQTy,
4766  ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4767  KmpTaskTWithPrivatesPtrQTy,
4769  ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4771  Args.push_back(&DstArg);
4772  Args.push_back(&SrcArg);
4773  Args.push_back(&LastprivArg);
4774  const auto &TaskDupFnInfo =
4776  llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4777  std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4778  auto *TaskDup = llvm::Function::Create(
4779  TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4780  CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4781  TaskDup->setDoesNotRecurse();
4782  CodeGenFunction CGF(CGM);
4783  CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4784  Loc);
4785 
4786  LValue TDBase = CGF.EmitLoadOfPointerLValue(
4787  CGF.GetAddrOfLocalVar(&DstArg),
4788  KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4789  // task_dst->liter = lastpriv;
4790  if (WithLastIter) {
4791  auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4793  TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4794  LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4795  llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4796  CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4797  CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4798  }
4799 
4800  // Emit initial values for private copies (if any).
4801  assert(!Privates.empty());
4802  Address KmpTaskSharedsPtr = Address::invalid();
4803  if (!Data.FirstprivateVars.empty()) {
4804  LValue TDBase = CGF.EmitLoadOfPointerLValue(
4805  CGF.GetAddrOfLocalVar(&SrcArg),
4806  KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4808  TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4809  KmpTaskSharedsPtr = Address(
4811  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4812  KmpTaskTShareds)),
4813  Loc),
4814  CGF.getNaturalTypeAlignment(SharedsTy));
4815  }
4816  emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4817  SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4818  CGF.FinishFunction();
4819  return TaskDup;
4820 }
4821 
4822 /// Checks if destructor function is required to be generated.
4823 /// \return true if cleanups are required, false otherwise.
4824 static bool
4825 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
4826  bool NeedsCleanup = false;
4827  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4828  const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
4829  for (const FieldDecl *FD : PrivateRD->fields()) {
4830  NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
4831  if (NeedsCleanup)
4832  break;
4833  }
4834  return NeedsCleanup;
4835 }
4836 
4837 CGOpenMPRuntime::TaskResultTy
4839  const OMPExecutableDirective &D,
4840  llvm::Value *TaskFunction, QualType SharedsTy,
4841  Address Shareds, const OMPTaskDataTy &Data) {
4842  ASTContext &C = CGM.getContext();
4844  // Aggregate privates and sort them by the alignment.
4845  auto I = Data.PrivateCopies.begin();
4846  for (const Expr *E : Data.PrivateVars) {
4847  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4848  Privates.emplace_back(
4849  C.getDeclAlign(VD),
4850  PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4851  /*PrivateElemInit=*/nullptr));
4852  ++I;
4853  }
4854  I = Data.FirstprivateCopies.begin();
4855  auto IElemInitRef = Data.FirstprivateInits.begin();
4856  for (const Expr *E : Data.FirstprivateVars) {
4857  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4858  Privates.emplace_back(
4859  C.getDeclAlign(VD),
4860  PrivateHelpersTy(
4861  VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4862  cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4863  ++I;
4864  ++IElemInitRef;
4865  }
4866  I = Data.LastprivateCopies.begin();
4867  for (const Expr *E : Data.LastprivateVars) {
4868  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4869  Privates.emplace_back(
4870  C.getDeclAlign(VD),
4871  PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4872  /*PrivateElemInit=*/nullptr));
4873  ++I;
4874  }
4875  std::stable_sort(Privates.begin(), Privates.end(), stable_sort_comparator);
4876  QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4877  // Build type kmp_routine_entry_t (if not built yet).
4878  emitKmpRoutineEntryT(KmpInt32Ty);
4879  // Build type kmp_task_t (if not built yet).
4881  if (SavedKmpTaskloopTQTy.isNull()) {
4883  CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4884  }
4886  } else {
4887  assert((D.getDirectiveKind() == OMPD_task ||
4890  "Expected taskloop, task or target directive");
4891  if (SavedKmpTaskTQTy.isNull()) {
4893  CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4894  }
4896  }
4897  const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4898  // Build particular struct kmp_task_t for the given task.
4899  const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4901  QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4902  QualType KmpTaskTWithPrivatesPtrQTy =
4903  C.getPointerType(KmpTaskTWithPrivatesQTy);
4904  llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4905  llvm::Type *KmpTaskTWithPrivatesPtrTy =
4906  KmpTaskTWithPrivatesTy->getPointerTo();
4907  llvm::Value *KmpTaskTWithPrivatesTySize =
4908  CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4909  QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4910 
4911  // Emit initial values for private copies (if any).
4912  llvm::Value *TaskPrivatesMap = nullptr;
4913  llvm::Type *TaskPrivatesMapTy =
4914  std::next(cast<llvm::Function>(TaskFunction)->arg_begin(), 3)->getType();
4915  if (!Privates.empty()) {
4916  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4917  TaskPrivatesMap = emitTaskPrivateMappingFunction(
4918  CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
4919  FI->getType(), Privates);
4920  TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4921  TaskPrivatesMap, TaskPrivatesMapTy);
4922  } else {
4923  TaskPrivatesMap = llvm::ConstantPointerNull::get(
4924  cast<llvm::PointerType>(TaskPrivatesMapTy));
4925  }
4926  // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4927  // kmp_task_t *tt);
4928  llvm::Value *TaskEntry = emitProxyTaskFunction(
4929  CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4930  KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4931  TaskPrivatesMap);
4932 
4933  // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4934  // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4935  // kmp_routine_entry_t *task_entry);
4936  // Task flags. Format is taken from
4937  // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h,
4938  // description of kmp_tasking_flags struct.
4939  enum {
4940  TiedFlag = 0x1,
4941  FinalFlag = 0x2,
4942  DestructorsFlag = 0x8,
4943  PriorityFlag = 0x20
4944  };
4945  unsigned Flags = Data.Tied ? TiedFlag : 0;
4946  bool NeedsCleanup = false;
4947  if (!Privates.empty()) {
4948  NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
4949  if (NeedsCleanup)
4950  Flags = Flags | DestructorsFlag;
4951  }
4952  if (Data.Priority.getInt())
4953  Flags = Flags | PriorityFlag;
4954  llvm::Value *TaskFlags =
4955  Data.Final.getPointer()
4956  ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4957  CGF.Builder.getInt32(FinalFlag),
4958  CGF.Builder.getInt32(/*C=*/0))
4959  : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4960  TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4961  llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4962  llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc),
4963  getThreadID(CGF, Loc), TaskFlags,
4964  KmpTaskTWithPrivatesTySize, SharedsSize,
4966  TaskEntry, KmpRoutineEntryPtrTy)};
4967  llvm::Value *NewTask = CGF.EmitRuntimeCall(
4969  llvm::Value *NewTaskNewTaskTTy =
4971  NewTask, KmpTaskTWithPrivatesPtrTy);
4972  LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4973  KmpTaskTWithPrivatesQTy);
4974  LValue TDBase =
4975  CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4976  // Fill the data in the resulting kmp_task_t record.
4977  // Copy shareds if there are any.
4978  Address KmpTaskSharedsPtr = Address::invalid();
4979  if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4980  KmpTaskSharedsPtr =
4982  CGF.EmitLValueForField(
4983  TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4984  KmpTaskTShareds)),
4985  Loc),
4986  CGF.getNaturalTypeAlignment(SharedsTy));
4987  LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4988  LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4989  CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4990  }
4991  // Emit initial values for private copies (if any).
4992  TaskResultTy Result;
4993  if (!Privates.empty()) {
4994  emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4995  SharedsTy, SharedsPtrTy, Data, Privates,
4996  /*ForDup=*/false);
4998  (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4999  Result.TaskDupFn = emitTaskDupFunction(
5000  CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
5001  KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
5002  /*WithLastIter=*/!Data.LastprivateVars.empty());
5003  }
5004  }
5005  // Fields of union "kmp_cmplrdata_t" for destructors and priority.
5006  enum { Priority = 0, Destructors = 1 };
5007  // Provide pointer to function with destructors for privates.
5008  auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
5009  const RecordDecl *KmpCmplrdataUD =
5010  (*FI)->getType()->getAsUnionType()->getDecl();
5011  if (NeedsCleanup) {
5012  llvm::Value *DestructorFn = emitDestructorsFunction(
5013  CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5014  KmpTaskTWithPrivatesQTy);
5015  LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
5016  LValue DestructorsLV = CGF.EmitLValueForField(
5017  Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
5019  DestructorFn, KmpRoutineEntryPtrTy),
5020  DestructorsLV);
5021  }
5022  // Set priority.
5023  if (Data.Priority.getInt()) {
5024  LValue Data2LV = CGF.EmitLValueForField(
5025  TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
5026  LValue PriorityLV = CGF.EmitLValueForField(
5027  Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
5028  CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
5029  }
5030  Result.NewTask = NewTask;
5031  Result.TaskEntry = TaskEntry;
5032  Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
5033  Result.TDBase = TDBase;
5034  Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
5035  return Result;
5036 }
5037 
5039  const OMPExecutableDirective &D,
5040  llvm::Value *TaskFunction,
5041  QualType SharedsTy, Address Shareds,
5042  const Expr *IfCond,
5043  const OMPTaskDataTy &Data) {
5044  if (!CGF.HaveInsertPoint())
5045  return;
5046 
5047  TaskResultTy Result =
5048  emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5049  llvm::Value *NewTask = Result.NewTask;
5050  llvm::Value *TaskEntry = Result.TaskEntry;
5051  llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5052  LValue TDBase = Result.TDBase;
5053  const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5054  ASTContext &C = CGM.getContext();
5055  // Process list of dependences.
5056  Address DependenciesArray = Address::invalid();
5057  unsigned NumDependencies = Data.Dependences.size();
5058  if (NumDependencies) {
5059  // Dependence kind for RTL.
5060  enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3 };
5061  enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
5062  RecordDecl *KmpDependInfoRD;
5063  QualType FlagsTy =
5064  C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
5065  llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5066  if (KmpDependInfoTy.isNull()) {
5067  KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
5068  KmpDependInfoRD->startDefinition();
5069  addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
5070  addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
5071  addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
5072  KmpDependInfoRD->completeDefinition();
5073  KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
5074  } else {
5075  KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5076  }
5077  CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy);
5078  // Define type kmp_depend_info[<Dependences.size()>];
5079  QualType KmpDependInfoArrayTy = C.getConstantArrayType(
5080  KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
5081  ArrayType::Normal, /*IndexTypeQuals=*/0);
5082  // kmp_depend_info[<Dependences.size()>] deps;
5083  DependenciesArray =
5084  CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
5085  for (unsigned I = 0; I < NumDependencies; ++I) {
5086  const Expr *E = Data.Dependences[I].second;
5087  LValue Addr = CGF.EmitLValue(E);
5088  llvm::Value *Size;
5089  QualType Ty = E->getType();
5090  if (const auto *ASE =
5091  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
5092  LValue UpAddrLVal =
5093  CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false);
5094  llvm::Value *UpAddr =
5095  CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1);
5096  llvm::Value *LowIntPtr =
5097  CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy);
5098  llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
5099  Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
5100  } else {
5101  Size = CGF.getTypeSize(Ty);
5102  }
5103  LValue Base = CGF.MakeAddrLValue(
5104  CGF.Builder.CreateConstArrayGEP(DependenciesArray, I, DependencySize),
5105  KmpDependInfoTy);
5106  // deps[i].base_addr = &<Dependences[i].second>;
5107  LValue BaseAddrLVal = CGF.EmitLValueForField(
5108  Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5109  CGF.EmitStoreOfScalar(
5110  CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy),
5111  BaseAddrLVal);
5112  // deps[i].len = sizeof(<Dependences[i].second>);
5113  LValue LenLVal = CGF.EmitLValueForField(
5114  Base, *std::next(KmpDependInfoRD->field_begin(), Len));
5115  CGF.EmitStoreOfScalar(Size, LenLVal);
5116  // deps[i].flags = <Dependences[i].first>;
5117  RTLDependenceKindTy DepKind;
5118  switch (Data.Dependences[I].first) {
5119  case OMPC_DEPEND_in:
5120  DepKind = DepIn;
5121  break;
5122  // Out and InOut dependencies must use the same code.
5123  case OMPC_DEPEND_out:
5124  case OMPC_DEPEND_inout:
5125  DepKind = DepInOut;
5126  break;
5127  case OMPC_DEPEND_source:
5128  case OMPC_DEPEND_sink:
5129  case OMPC_DEPEND_unknown:
5130  llvm_unreachable("Unknown task dependence type");
5131  }
5132  LValue FlagsLVal = CGF.EmitLValueForField(
5133  Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5134  CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5135  FlagsLVal);
5136  }
5137  DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5138  CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()),
5139  CGF.VoidPtrTy);
5140  }
5141 
5142  // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5143  // libcall.
5144  // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5145  // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5146  // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5147  // list is not empty
5148  llvm::Value *ThreadID = getThreadID(CGF, Loc);
5149  llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5150  llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5151  llvm::Value *DepTaskArgs[7];
5152  if (NumDependencies) {
5153  DepTaskArgs[0] = UpLoc;
5154  DepTaskArgs[1] = ThreadID;
5155  DepTaskArgs[2] = NewTask;
5156  DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
5157  DepTaskArgs[4] = DependenciesArray.getPointer();
5158  DepTaskArgs[5] = CGF.Builder.getInt32(0);
5159  DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5160  }
5161  auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies,
5162  &TaskArgs,
5163  &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5164  if (!Data.Tied) {
5165  auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5166  LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5167  CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5168  }
5169  if (NumDependencies) {
5170  CGF.EmitRuntimeCall(
5172  } else {
5173  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
5174  TaskArgs);
5175  }
5176  // Check if parent region is untied and build return for untied task;
5177  if (auto *Region =
5178  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5179  Region->emitUntiedSwitch(CGF);
5180  };
5181 
5182  llvm::Value *DepWaitTaskArgs[6];
5183  if (NumDependencies) {
5184  DepWaitTaskArgs[0] = UpLoc;
5185  DepWaitTaskArgs[1] = ThreadID;
5186  DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
5187  DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5188  DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5189  DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5190  }
5191  auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
5192  NumDependencies, &DepWaitTaskArgs,
5193  Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5194  CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5195  CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5196  // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5197  // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5198  // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5199  // is specified.
5200  if (NumDependencies)
5201  CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
5202  DepWaitTaskArgs);
5203  // Call proxy_task_entry(gtid, new_task);
5204  auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5205  Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5206  Action.Enter(CGF);
5207  llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5208  CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5209  OutlinedFnArgs);
5210  };
5211 
5212  // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5213  // kmp_task_t *new_task);
5214  // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5215  // kmp_task_t *new_task);
5216  RegionCodeGenTy RCG(CodeGen);
5217  CommonActionTy Action(
5218  RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs,
5219  RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs);
5220  RCG.setAction(Action);
5221  RCG(CGF);
5222  };
5223 
5224  if (IfCond) {
5225  emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5226  } else {
5227  RegionCodeGenTy ThenRCG(ThenCodeGen);
5228  ThenRCG(CGF);
5229  }
5230 }
5231 
5233  const OMPLoopDirective &D,
5234  llvm::Value *TaskFunction,
5235  QualType SharedsTy, Address Shareds,
5236  const Expr *IfCond,
5237  const OMPTaskDataTy &Data) {
5238  if (!CGF.HaveInsertPoint())
5239  return;
5240  TaskResultTy Result =
5241  emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5242  // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5243  // libcall.
5244  // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5245  // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5246  // sched, kmp_uint64 grainsize, void *task_dup);
5247  llvm::Value *ThreadID = getThreadID(CGF, Loc);
5248  llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5249  llvm::Value *IfVal;
5250  if (IfCond) {
5251  IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5252  /*isSigned=*/true);
5253  } else {
5254  IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5255  }
5256 
5257  LValue LBLVal = CGF.EmitLValueForField(
5258  Result.TDBase,
5259  *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5260  const auto *LBVar =
5261  cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5262  CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
5263  /*IsInitializer=*/true);
5264  LValue UBLVal = CGF.EmitLValueForField(
5265  Result.TDBase,
5266  *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5267  const auto *UBVar =
5268  cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5269  CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
5270  /*IsInitializer=*/true);
5271  LValue StLVal = CGF.EmitLValueForField(
5272  Result.TDBase,
5273  *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5274  const auto *StVar =
5275  cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5276  CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
5277  /*IsInitializer=*/true);
5278  // Store reductions address.
5279  LValue RedLVal = CGF.EmitLValueForField(
5280  Result.TDBase,
5281  *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5282  if (Data.Reductions) {
5283  CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5284  } else {
5285  CGF.EmitNullInitialization(RedLVal.getAddress(),
5286  CGF.getContext().VoidPtrTy);
5287  }
5288  enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5289  llvm::Value *TaskArgs[] = {
5290  UpLoc,
5291  ThreadID,
5292  Result.NewTask,
5293  IfVal,
5294  LBLVal.getPointer(),
5295  UBLVal.getPointer(),
5296  CGF.EmitLoadOfScalar(StLVal, Loc),
5297  llvm::ConstantInt::getSigned(
5298  CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5299  llvm::ConstantInt::getSigned(
5300  CGF.IntTy, Data.Schedule.getPointer()
5301  ? Data.Schedule.getInt() ? NumTasks : Grainsize
5302  : NoSchedule),
5303  Data.Schedule.getPointer()
5304  ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5305  /*isSigned=*/false)
5306  : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5308  Result.TaskDupFn, CGF.VoidPtrTy)
5309  : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5311 }
5312 
5313 /// Emit reduction operation for each element of array (required for
5314 /// array sections) LHS op = RHS.
5315 /// \param Type Type of array.
5316 /// \param LHSVar Variable on the left side of the reduction operation
5317 /// (references element of array in original variable).
5318 /// \param RHSVar Variable on the right side of the reduction operation
5319 /// (references element of array in original variable).
5320 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5321 /// RHSVar.
5323  CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5324  const VarDecl *RHSVar,
5325  const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5326  const Expr *, const Expr *)> &RedOpGen,
5327  const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5328  const Expr *UpExpr = nullptr) {
5329  // Perform element-by-element initialization.
5330  QualType ElementTy;
5331  Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5332  Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5333 
5334  // Drill down to the base element type on both arrays.
5335  const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5336  llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5337 
5338  llvm::Value *RHSBegin = RHSAddr.getPointer();
5339  llvm::Value *LHSBegin = LHSAddr.getPointer();
5340  // Cast from pointer to array type to pointer to single element.
5341  llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5342  // The basic structure here is a while-do loop.
5343  llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5344  llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5345  llvm::Value *IsEmpty =
5346  CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5347  CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5348 
5349  // Enter the loop body, making that address the current address.
5350  llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5351  CGF.EmitBlock(BodyBB);
5352 
5353  CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5354 
5355  llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5356  RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5357  RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5358  Address RHSElementCurrent =
5359  Address(RHSElementPHI,
5360  RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5361 
5362  llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5363  LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5364  LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5365  Address LHSElementCurrent =
5366  Address(LHSElementPHI,
5367  LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5368 
5369  // Emit copy.
5371  Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5372  Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5373  Scope.Privatize();
5374  RedOpGen(CGF, XExpr, EExpr, UpExpr);
5375  Scope.ForceCleanup();
5376 
5377  // Shift the address forward by one element.
5378  llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5379  LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5380  llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5381  RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5382  // Check whether we've reached the end.
5383  llvm::Value *Done =
5384  CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5385  CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5386  LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5387  RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5388 
5389  // Done.
5390  CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5391 }
5392 
5393 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5394 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5395 /// UDR combiner function.
5397  const Expr *ReductionOp) {
5398  if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5399  if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5400  if (const auto *DRE =
5401  dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5402  if (const auto *DRD =
5403  dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5404  std::pair<llvm::Function *, llvm::Function *> Reduction =
5405  CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5406  RValue Func = RValue::get(Reduction.first);
5407  CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5408  CGF.EmitIgnoredExpr(ReductionOp);
5409  return;
5410  }
5411  CGF.EmitIgnoredExpr(ReductionOp);
5412 }
5413 
5415  CodeGenModule &CGM, SourceLocation Loc, llvm::Type *ArgsType,
5416  ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs,
5417  ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
5418  ASTContext &C = CGM.getContext();
5419 
5420  // void reduction_func(void *LHSArg, void *RHSArg);
5421  FunctionArgList Args;
5422  ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5424  ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5426  Args.push_back(&LHSArg);
5427  Args.push_back(&RHSArg);
5428  const auto &CGFI =
5430  std::string Name = getName({"omp", "reduction", "reduction_func"});
5431  auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5433  &CGM.getModule());
5434  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5435  Fn->setDoesNotRecurse();
5436  CodeGenFunction CGF(CGM);
5437  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5438 
5439  // Dst = (void*[n])(LHSArg);
5440  // Src = (void*[n])(RHSArg);
5442  CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5443  ArgsType), CGF.getPointerAlign());
5445  CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5446  ArgsType), CGF.getPointerAlign());
5447 
5448  // ...
5449  // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5450  // ...
5452  auto IPriv = Privates.begin();
5453  unsigned Idx = 0;
5454  for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5455  const auto *RHSVar =
5456  cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5457  Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5458  return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5459  });
5460  const auto *LHSVar =
5461  cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5462  Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5463  return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5464  });
5465  QualType PrivTy = (*IPriv)->getType();
5466  if (PrivTy->isVariablyModifiedType()) {
5467  // Get array size and emit VLA type.
5468  ++Idx;
5469  Address Elem =
5470  CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize());
5471  llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5472  const VariableArrayType *VLA =
5473  CGF.getContext().getAsVariableArrayType(PrivTy);
5474  const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5476  CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5477  CGF.EmitVariablyModifiedType(PrivTy);
5478  }
5479  }
5480  Scope.Privatize();
5481  IPriv = Privates.begin();
5482  auto ILHS = LHSExprs.begin();
5483  auto IRHS = RHSExprs.begin();
5484  for (const Expr *E : ReductionOps) {
5485  if ((*IPriv)->getType()->isArrayType()) {
5486  // Emit reduction for array section.
5487  const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5488  const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5490  CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5491  [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5492  emitReductionCombiner(CGF, E);
5493  });
5494  } else {
5495  // Emit reduction for array subscript or single variable.
5496  emitReductionCombiner(CGF, E);
5497  }
5498  ++IPriv;
5499  ++ILHS;
5500  ++IRHS;
5501  }
5502  Scope.ForceCleanup();
5503  CGF.FinishFunction();
5504  return Fn;
5505 }
5506 
5508  const Expr *ReductionOp,
5509  const Expr *PrivateRef,
5510  const DeclRefExpr *LHS,
5511  const DeclRefExpr *RHS) {
5512  if (PrivateRef->getType()->isArrayType()) {
5513  // Emit reduction for array section.
5514  const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5515  const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5517  CGF, PrivateRef->getType(), LHSVar, RHSVar,
5518  [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5519  emitReductionCombiner(CGF, ReductionOp);
5520  });
5521  } else {
5522  // Emit reduction for array subscript or single variable.
5523  emitReductionCombiner(CGF, ReductionOp);
5524  }
5525 }
5526 
5528  ArrayRef<const Expr *> Privates,
5529  ArrayRef<const Expr *> LHSExprs,
5530  ArrayRef<const Expr *> RHSExprs,
5531  ArrayRef<const Expr *> ReductionOps,
5532  ReductionOptionsTy Options) {
5533  if (!CGF.HaveInsertPoint())
5534  return;
5535 
5536  bool WithNowait = Options.WithNowait;
5537  bool SimpleReduction = Options.SimpleReduction;
5538 
5539  // Next code should be emitted for reduction:
5540  //
5541  // static kmp_critical_name lock = { 0 };
5542  //
5543  // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5544  // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5545  // ...
5546  // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5547  // *(Type<n>-1*)rhs[<n>-1]);
5548  // }
5549  //
5550  // ...
5551  // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5552  // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5553  // RedList, reduce_func, &<lock>)) {
5554  // case 1:
5555  // ...
5556  // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5557  // ...
5558  // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5559  // break;
5560  // case 2:
5561  // ...
5562  // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5563  // ...
5564  // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5565  // break;
5566  // default:;
5567  // }
5568  //
5569  // if SimpleReduction is true, only the next code is generated:
5570  // ...
5571  // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5572  // ...
5573 
5574  ASTContext &C = CGM.getContext();
5575 
5576  if (SimpleReduction) {
5578  auto IPriv = Privates.begin();
5579  auto ILHS = LHSExprs.begin();
5580  auto IRHS = RHSExprs.begin();
5581  for (const Expr *E : ReductionOps) {
5582  emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5583  cast<DeclRefExpr>(*IRHS));
5584  ++IPriv;
5585  ++ILHS;
5586  ++IRHS;
5587  }
5588  return;
5589  }
5590 
5591  // 1. Build a list of reduction variables.
5592  // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5593  auto Size = RHSExprs.size();
5594  for (const Expr *E : Privates) {
5595  if (E->getType()->isVariablyModifiedType())
5596  // Reserve place for array size.
5597  ++Size;
5598  }
5599  llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5600  QualType ReductionArrayTy =
5602  /*IndexTypeQuals=*/0);
5603  Address ReductionList =
5604  CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5605  auto IPriv = Privates.begin();
5606  unsigned Idx = 0;
5607  for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5608  Address Elem =
5609  CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, CGF.getPointerSize());
5610  CGF.Builder.CreateStore(
5612  CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
5613  Elem);
5614  if ((*IPriv)->getType()->isVariablyModifiedType()) {
5615  // Store array size.
5616  ++Idx;
5617  Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx,
5618  CGF.getPointerSize());
5619  llvm::Value *Size = CGF.Builder.CreateIntCast(
5620  CGF.getVLASize(
5621  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5622  .NumElts,
5623  CGF.SizeTy, /*isSigned=*/false);
5624  CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5625  Elem);
5626  }
5627  }
5628 
5629  // 2. Emit reduce_func().
5630  llvm::Value *ReductionFn = emitReductionFunction(
5631  CGM, Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(),
5632  Privates, LHSExprs, RHSExprs, ReductionOps);
5633 
5634  // 3. Create static kmp_critical_name lock = { 0 };
5635  std::string Name = getName({"reduction"});
5636  llvm::Value *Lock = getCriticalRegionLock(Name);
5637 
5638  // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5639  // RedList, reduce_func, &<lock>);
5640  llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5641  llvm::Value *ThreadId = getThreadID(CGF, Loc);
5642  llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5644  ReductionList.getPointer(), CGF.VoidPtrTy);
5645  llvm::Value *Args[] = {
5646  IdentTLoc, // ident_t *<loc>
5647  ThreadId, // i32 <gtid>
5648  CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5649  ReductionArrayTySize, // size_type sizeof(RedList)
5650  RL, // void *RedList
5651  ReductionFn, // void (*) (void *, void *) <reduce_func>
5652  Lock // kmp_critical_name *&<lock>
5653  };
5654  llvm::Value *Res = CGF.EmitRuntimeCall(
5657  Args);
5658 
5659  // 5. Build switch(res)
5660  llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5661  llvm::SwitchInst *SwInst =
5662  CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5663 
5664  // 6. Build case 1:
5665  // ...
5666  // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5667  // ...
5668  // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5669  // break;
5670  llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5671  SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5672  CGF.EmitBlock(Case1BB);
5673 
5674  // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5675  llvm::Value *EndArgs[] = {
5676  IdentTLoc, // ident_t *<loc>
5677  ThreadId, // i32 <gtid>
5678  Lock // kmp_critical_name *&<lock>
5679  };
5680  auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5681  CodeGenFunction &CGF, PrePostActionTy &Action) {
5682  CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5683  auto IPriv = Privates.begin();
5684  auto ILHS = LHSExprs.begin();
5685  auto IRHS = RHSExprs.begin();
5686  for (const Expr *E : ReductionOps) {
5687  RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5688  cast<DeclRefExpr>(*IRHS));
5689  ++IPriv;
5690  ++ILHS;
5691  ++IRHS;
5692  }
5693  };
5694  RegionCodeGenTy RCG(CodeGen);
5695  CommonActionTy Action(
5696  nullptr, llvm::None,
5699  EndArgs);
5700  RCG.setAction(Action);
5701  RCG(CGF);
5702 
5703  CGF.EmitBranch(DefaultBB);
5704 
5705  // 7. Build case 2:
5706  // ...
5707  // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5708  // ...
5709  // break;
5710  llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5711  SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5712  CGF.EmitBlock(Case2BB);
5713 
5714  auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5715  CodeGenFunction &CGF, PrePostActionTy &Action) {
5716  auto ILHS = LHSExprs.begin();
5717  auto IRHS = RHSExprs.begin();
5718  auto IPriv = Privates.begin();
5719  for (const Expr *E : ReductionOps) {
5720  const Expr *XExpr = nullptr;
5721  const Expr *EExpr = nullptr;
5722  const Expr *UpExpr = nullptr;
5723  BinaryOperatorKind BO = BO_Comma;
5724  if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5725  if (BO->getOpcode() == BO_Assign) {
5726  XExpr = BO->getLHS();
5727  UpExpr = BO->getRHS();
5728  }
5729  }
5730  // Try to emit update expression as a simple atomic.
5731  const Expr *RHSExpr = UpExpr;
5732  if (RHSExpr) {
5733  // Analyze RHS part of the whole expression.
5734  if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5735  RHSExpr->IgnoreParenImpCasts())) {
5736  // If this is a conditional operator, analyze its condition for
5737  // min/max reduction operator.
5738  RHSExpr = ACO->getCond();
5739  }
5740  if (const auto *BORHS =
5741  dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5742  EExpr = BORHS->getRHS();
5743  BO = BORHS->getOpcode();
5744  }
5745  }
5746  if (XExpr) {
5747  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5748  auto &&AtomicRedGen = [BO, VD,
5749  Loc](CodeGenFunction &CGF, const Expr *XExpr,
5750  const Expr *EExpr, const Expr *UpExpr) {
5751  LValue X = CGF.EmitLValue(XExpr);
5752  RValue E;
5753  if (EExpr)
5754  E = CGF.EmitAnyExpr(EExpr);
5755  CGF.EmitOMPAtomicSimpleUpdateExpr(
5756  X, E, BO, /*IsXLHSInRHSPart=*/true,
5757  llvm::AtomicOrdering::Monotonic, Loc,
5758  [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5759  CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5760  PrivateScope.addPrivate(
5761  VD, [&CGF, VD, XRValue, Loc]() {
5762  Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5763  CGF.emitOMPSimpleStore(
5764  CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5765  VD->getType().getNonReferenceType(), Loc);
5766  return LHSTemp;
5767  });
5768  (void)PrivateScope.Privatize();
5769  return CGF.EmitAnyExpr(UpExpr);
5770  });
5771  };
5772  if ((*IPriv)->getType()->isArrayType()) {
5773  // Emit atomic reduction for array section.
5774  const auto *RHSVar =
5775  cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5776  EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5777  AtomicRedGen, XExpr, EExpr, UpExpr);
5778  } else {
5779  // Emit atomic reduction for array subscript or single variable.
5780  AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5781  }
5782  } else {
5783  // Emit as a critical region.
5784  auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5785  const Expr *, const Expr *) {
5786  CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5787  std::string Name = RT.getName({"atomic_reduction"});
5788  RT.emitCriticalRegion(
5789  CGF, Name,
5790  [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5791  Action.Enter(CGF);
5792  emitReductionCombiner(CGF, E);
5793  },
5794  Loc);
5795  };
5796  if ((*IPriv)->getType()->isArrayType()) {
5797  const auto *LHSVar =
5798  cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5799  const auto *RHSVar =
5800  cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5801  EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5802  CritRedGen);
5803  } else {
5804  CritRedGen(CGF, nullptr, nullptr, nullptr);
5805  }
5806  }
5807  ++ILHS;
5808  ++IRHS;
5809  ++IPriv;
5810  }
5811  };
5812  RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5813  if (!WithNowait) {
5814  // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5815  llvm::Value *EndArgs[] = {
5816  IdentTLoc, // ident_t *<loc>
5817  ThreadId, // i32 <gtid>
5818  Lock // kmp_critical_name *&<lock>
5819  };
5820  CommonActionTy Action(nullptr, llvm::None,
5821  createRuntimeFunction(OMPRTL__kmpc_end_reduce),
5822  EndArgs);
5823  AtomicRCG.setAction(Action);
5824  AtomicRCG(CGF);
5825  } else {
5826  AtomicRCG(CGF);
5827  }
5828 
5829  CGF.EmitBranch(DefaultBB);
5830  CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5831 }
5832 
5833 /// Generates unique name for artificial threadprivate variables.
5834 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5835 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5836  const Expr *Ref) {
5837  SmallString<256> Buffer;
5838  llvm::raw_svector_ostream Out(Buffer);
5839  const clang::DeclRefExpr *DE;
5840  const VarDecl *D = ::getBaseDecl(Ref, DE);
5841  if (!D)
5842  D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5843  D = D->getCanonicalDecl();
5844  std::string Name = CGM.getOpenMPRuntime().getName(
5845  {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5846  Out << Prefix << Name << "_"
5847  << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5848  return Out.str();
5849 }
5850 
5851 /// Emits reduction initializer function:
5852 /// \code
5853 /// void @.red_init(void* %arg) {
5854 /// %0 = bitcast void* %arg to <type>*
5855 /// store <type> <init>, <type>* %0
5856 /// ret void
5857 /// }
5858 /// \endcode
5860  SourceLocation Loc,
5861  ReductionCodeGen &RCG, unsigned N) {
5862  ASTContext &C = CGM.getContext();
5863  FunctionArgList Args;
5864  ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5866  Args.emplace_back(&Param);
5867  const auto &FnInfo =
5869  llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5870  std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5872  Name, &CGM.getModule());
5873  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5874  Fn->setDoesNotRecurse();
5875  CodeGenFunction CGF(CGM);
5876  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5877  Address PrivateAddr = CGF.EmitLoadOfPointer(
5878  CGF.GetAddrOfLocalVar(&Param),
5879  C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5880  llvm::Value *Size = nullptr;
5881  // If the size of the reduction item is non-constant, load it from global
5882  // threadprivate variable.
5883  if (RCG.getSizes(N).second) {
5884  Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5885  CGF, CGM.getContext().getSizeType(),
5886  generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5887  Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5888  CGM.getContext().getSizeType(), Loc);
5889  }
5890  RCG.emitAggregateType(CGF, N, Size);
5891  LValue SharedLVal;
5892  // If initializer uses initializer from declare reduction construct, emit a
5893  // pointer to the address of the original reduction item (reuired by reduction
5894  // initializer)
5895  if (RCG.usesReductionInitializer(N)) {
5896  Address SharedAddr =
5897  CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5898  CGF, CGM.getContext().VoidPtrTy,
5899  generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
5900  SharedAddr = CGF.EmitLoadOfPointer(
5901  SharedAddr,
5902  CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5903  SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
5904  } else {
5905  SharedLVal = CGF.MakeNaturalAlignAddrLValue(
5906  llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
5907  CGM.getContext().VoidPtrTy);
5908  }
5909  // Emit the initializer:
5910  // %0 = bitcast void* %arg to <type>*
5911  // store <type> <init>, <type>* %0
5912  RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal,
5913  [](CodeGenFunction &) { return false; });
5914  CGF.FinishFunction();
5915  return Fn;
5916 }
5917 
5918 /// Emits reduction combiner function:
5919 /// \code
5920 /// void @.red_comb(void* %arg0, void* %arg1) {
5921 /// %lhs = bitcast void* %arg0 to <type>*
5922 /// %rhs = bitcast void* %arg1 to <type>*
5923 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5924 /// store <type> %2, <type>* %lhs
5925 /// ret void
5926 /// }
5927 /// \endcode
5929  SourceLocation Loc,
5930  ReductionCodeGen &RCG, unsigned N,
5931  const Expr *ReductionOp,
5932  const Expr *LHS, const Expr *RHS,
5933  const Expr *PrivateRef) {
5934  ASTContext &C = CGM.getContext();
5935  const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5936  const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5937  FunctionArgList Args;
5938  ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5940  ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5942  Args.emplace_back(&ParamInOut);
5943  Args.emplace_back(&ParamIn);
5944  const auto &FnInfo =
5946  llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5947  std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5949  Name, &CGM.getModule());
5950  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5951  Fn->setDoesNotRecurse();
5952  CodeGenFunction CGF(CGM);
5953  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5954  llvm::Value *Size = nullptr;
5955  // If the size of the reduction item is non-constant, load it from global
5956  // threadprivate variable.
5957  if (RCG.getSizes(N).second) {
5958  Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5959  CGF, CGM.getContext().getSizeType(),
5960  generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5961  Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5962  CGM.getContext().getSizeType(), Loc);
5963  }
5964  RCG.emitAggregateType(CGF, N, Size);
5965  // Remap lhs and rhs variables to the addresses of the function arguments.
5966  // %lhs = bitcast void* %arg0 to <type>*
5967  // %rhs = bitcast void* %arg1 to <type>*
5968  CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5969  PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
5970  // Pull out the pointer to the variable.
5971  Address PtrAddr = CGF.EmitLoadOfPointer(
5972  CGF.GetAddrOfLocalVar(&ParamInOut),
5973  C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5974  return CGF.Builder.CreateElementBitCast(
5975  PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
5976  });
5977  PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
5978  // Pull out the pointer to the variable.
5979  Address PtrAddr = CGF.EmitLoadOfPointer(
5980  CGF.GetAddrOfLocalVar(&ParamIn),
5981  C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5982  return CGF.Builder.CreateElementBitCast(
5983  PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
5984  });
5985  PrivateScope.Privatize();
5986  // Emit the combiner body:
5987  // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5988  // store <type> %2, <type>* %lhs
5989  CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5990  CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5991  cast<DeclRefExpr>(RHS));
5992  CGF.FinishFunction();
5993  return Fn;
5994 }
5995 
5996 /// Emits reduction finalizer function:
5997 /// \code
5998 /// void @.red_fini(void* %arg) {
5999 /// %0 = bitcast void* %arg to <type>*
6000 /// <destroy>(<type>* %0)
6001 /// ret void
6002 /// }
6003 /// \endcode
6005  SourceLocation Loc,
6006  ReductionCodeGen &RCG, unsigned N) {
6007  if (!RCG.needCleanups(N))
6008  return nullptr;
6009  ASTContext &C = CGM.getContext();
6010  FunctionArgList Args;
6011  ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6013  Args.emplace_back(&Param);
6014  const auto &FnInfo =
6016  llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6017  std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6019  Name, &CGM.getModule());
6020  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6021  Fn->setDoesNotRecurse();
6022  CodeGenFunction CGF(CGM);
6023  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6024  Address PrivateAddr = CGF.EmitLoadOfPointer(
6025  CGF.GetAddrOfLocalVar(&Param),
6026  C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6027  llvm::Value *Size = nullptr;
6028  // If the size of the reduction item is non-constant, load it from global
6029  // threadprivate variable.
6030  if (RCG.getSizes(N).second) {
6031  Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6032  CGF, CGM.getContext().getSizeType(),
6033  generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6034  Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6035  CGM.getContext().getSizeType(), Loc);
6036  }
6037  RCG.emitAggregateType(CGF, N, Size);
6038  // Emit the finalizer body:
6039  // <destroy>(<type>* %0)
6040  RCG.emitCleanups(CGF, N, PrivateAddr);
6041  CGF.FinishFunction();
6042  return Fn;
6043 }
6044 
6046  CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6047  ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6048  if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6049  return nullptr;
6050 
6051  // Build typedef struct:
6052  // kmp_task_red_input {
6053  // void *reduce_shar; // shared reduction item
6054  // size_t reduce_size; // size of data item
6055  // void *reduce_init; // data initialization routine
6056  // void *reduce_fini; // data finalization routine
6057  // void *reduce_comb; // data combiner routine
6058  // kmp_task_red_flags_t flags; // flags for additional info from compiler
6059  // } kmp_task_red_input_t;
6060  ASTContext &C = CGM.getContext();
6061  RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t");
6062  RD->startDefinition();
6063  const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6064  const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6065  const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6066  const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6067  const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6068  const FieldDecl *FlagsFD = addFieldToRecordDecl(
6069  C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6070  RD->completeDefinition();
6071  QualType RDType = C.getRecordType(RD);
6072  unsigned Size = Data.ReductionVars.size();
6073  llvm::APInt ArraySize(/*numBits=*/64, Size);
6074  QualType ArrayRDType = C.getConstantArrayType(
6075  RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0);
6076  // kmp_task_red_input_t .rd_input.[Size];
6077  Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6079  Data.ReductionOps);
6080  for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6081  // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6082  llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6083  llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6085  TaskRedInput.getPointer(), Idxs,
6086  /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6087  ".rd_input.gep.");
6088  LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6089  // ElemLVal.reduce_shar = &Shareds[Cnt];
6090  LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6091  RCG.emitSharedLValue(CGF, Cnt);
6092  llvm::Value *CastedShared =
6094  CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6095  RCG.emitAggregateType(CGF, Cnt);
6096  llvm::Value *SizeValInChars;
6097  llvm::Value *SizeVal;
6098  std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6099  // We use delayed creation/initialization for VLAs, array sections and
6100  // custom reduction initializations. It is required because runtime does not
6101  // provide the way to pass the sizes of VLAs/array sections to
6102  // initializer/combiner/finalizer functions and does not pass the pointer to
6103  // original reduction item to the initializer. Instead threadprivate global
6104  // variables are used to store these values and use them in the functions.
6105  bool DelayedCreation = !!SizeVal;
6106  SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6107  /*isSigned=*/false);
6108  LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6109  CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6110  // ElemLVal.reduce_init = init;
6111  LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6112  llvm::Value *InitAddr =
6113  CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6114  CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6115  DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt);
6116  // ElemLVal.reduce_fini = fini;
6117  LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6118  llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6119  llvm::Value *FiniAddr = Fini
6120  ? CGF.EmitCastToVoidPtr(Fini)
6121  : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6122  CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6123  // ElemLVal.reduce_comb = comb;
6124  LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6126  CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6127  RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6128  CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6129  // ElemLVal.flags = 0;
6130  LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6131  if (DelayedCreation) {
6132  CGF.EmitStoreOfScalar(
6133  llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*IsSigned=*/true),
6134  FlagsLVal);
6135  } else
6136  CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
6137  }
6138  // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void
6139  // *data);
6140  llvm::Value *Args[] = {
6141  CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6142  /*isSigned=*/true),
6143  llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6145  CGM.VoidPtrTy)};
6146  return CGF.EmitRuntimeCall(
6148 }
6149 
6151  SourceLocation Loc,
6152  ReductionCodeGen &RCG,
6153  unsigned N) {
6154  auto Sizes = RCG.getSizes(N);
6155  // Emit threadprivate global variable if the type is non-constant
6156  // (Sizes.second = nullptr).
6157  if (Sizes.second) {
6158  llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6159  /*isSigned=*/false);
6161  CGF, CGM.getContext().getSizeType(),
6162  generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6163  CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6164  }
6165  // Store address of the original reduction item if custom initializer is used.
6166  if (RCG.usesReductionInitializer(N)) {
6168  CGF, CGM.getContext().VoidPtrTy,
6169  generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6170  CGF.Builder.CreateStore(
6172  RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy),
6173  SharedAddr, /*IsVolatile=*/false);
6174  }
6175 }
6176 
6178  SourceLocation Loc,
6179  llvm::Value *ReductionsPtr,
6180  LValue SharedLVal) {
6181  // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6182  // *d);
6183  llvm::Value *Args[] = {
6184  CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6185  /*isSigned=*/true),
6186  ReductionsPtr,
6188  CGM.VoidPtrTy)};
6189  return Address(
6190  CGF.EmitRuntimeCall(
6192  SharedLVal.getAlignment());
6193 }
6194 
6196  SourceLocation Loc) {
6197  if (!CGF.HaveInsertPoint())
6198  return;
6199  // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6200  // global_tid);
6201  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6202  // Ignore return result until untied tasks are supported.
6204  if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6205  Region->emitUntiedSwitch(CGF);
6206 }
6207 
6209  OpenMPDirectiveKind InnerKind,
6210  const RegionCodeGenTy &CodeGen,
6211  bool HasCancel) {
6212  if (!CGF.HaveInsertPoint())
6213  return;
6214  InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
6215  CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6216 }
6217 
6218 namespace {
6220  CancelNoreq = 0,
6221  CancelParallel = 1,
6222  CancelLoop = 2,
6223  CancelSections = 3,
6224  CancelTaskgroup = 4
6225 };
6226 } // anonymous namespace
6227 
6229  RTCancelKind CancelKind = CancelNoreq;
6230  if (CancelRegion == OMPD_parallel)
6231  CancelKind = CancelParallel;
6232  else if (CancelRegion == OMPD_for)
6233  CancelKind = CancelLoop;
6234  else if (CancelRegion == OMPD_sections)
6235  CancelKind = CancelSections;
6236  else {
6237  assert(CancelRegion == OMPD_taskgroup);
6238  CancelKind = CancelTaskgroup;
6239  }
6240  return CancelKind;
6241 }
6242 
6244  CodeGenFunction &CGF, SourceLocation Loc,
6245  OpenMPDirectiveKind CancelRegion) {
6246  if (!CGF.HaveInsertPoint())
6247  return;
6248  // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6249  // global_tid, kmp_int32 cncl_kind);
6250  if (auto *OMPRegionInfo =
6251  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6252  // For 'cancellation point taskgroup', the task region info may not have a
6253  // cancel. This may instead happen in another adjacent task.
6254  if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6255  llvm::Value *Args[] = {
6256  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6257  CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6258  // Ignore return result until untied tasks are supported.
6259  llvm::Value *Result = CGF.EmitRuntimeCall(
6261  // if (__kmpc_cancellationpoint()) {
6262  // exit from construct;
6263  // }
6264  llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6265  llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6266  llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6267  CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6268  CGF.EmitBlock(ExitBB);
6269  // exit from construct;
6270  CodeGenFunction::JumpDest CancelDest =
6271  CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6272  CGF.EmitBranchThroughCleanup(CancelDest);
6273  CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6274  }
6275  }
6276 }
6277 
6279  const Expr *IfCond,
6280  OpenMPDirectiveKind CancelRegion) {
6281  if (!CGF.HaveInsertPoint())
6282  return;
6283  // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6284  // kmp_int32 cncl_kind);
6285  if (auto *OMPRegionInfo =
6286  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6287  auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF,
6288  PrePostActionTy &) {
6289  CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6290  llvm::Value *Args[] = {
6291  RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6292  CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6293  // Ignore return result until untied tasks are supported.
6294  llvm::Value *Result = CGF.EmitRuntimeCall(
6295  RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
6296  // if (__kmpc_cancel()) {
6297  // exit from construct;
6298  // }
6299  llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6300  llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6301  llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6302  CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6303  CGF.EmitBlock(ExitBB);
6304  // exit from construct;
6305  CodeGenFunction::JumpDest CancelDest =
6306  CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6307  CGF.EmitBranchThroughCleanup(CancelDest);
6308  CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6309  };
6310  if (IfCond) {
6311  emitOMPIfClause(CGF, IfCond, ThenGen,
6312  [](CodeGenFunction &, PrePostActionTy &) {});
6313  } else {
6314  RegionCodeGenTy ThenRCG(ThenGen);
6315  ThenRCG(CGF);
6316  }
6317  }
6318 }
6319 
6321  const OMPExecutableDirective &D, StringRef ParentName,
6322  llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6323  bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6324  assert(!ParentName.empty() && "Invalid target region parent name!");
6325  emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6326  IsOffloadEntry, CodeGen);
6327 }
6328 
6330  const OMPExecutableDirective &D, StringRef ParentName,
6331  llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6332  bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6333  // Create a unique name for the entry function using the source location
6334  // information of the current target region. The name will be something like:
6335  //
6336  // __omp_offloading_DD_FFFF_PP_lBB
6337  //
6338  // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6339  // mangled name of the function that encloses the target region and BB is the
6340  // line number of the target region.
6341 
6342  unsigned DeviceID;
6343  unsigned FileID;
6344  unsigned Line;
6345  getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6346  Line);
6347  SmallString<64> EntryFnName;
6348  {
6349  llvm::raw_svector_ostream OS(EntryFnName);
6350  OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6351  << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6352  }
6353 
6354  const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6355 
6356  CodeGenFunction CGF(CGM, true);
6357  CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6358  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6359 
6360  OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS);
6361 
6362  // If this target outline function is not an offload entry, we don't need to
6363  // register it.
6364  if (!IsOffloadEntry)
6365  return;
6366 
6367  // The target region ID is used by the runtime library to identify the current
6368  // target region, so it only has to be unique and not necessarily point to
6369  // anything. It could be the pointer to the outlined function that implements
6370  // the target region, but we aren't using that so that the compiler doesn't
6371  // need to keep that, and could therefore inline the host function if proven
6372  // worthwhile during optimization. In the other hand, if emitting code for the
6373  // device, the ID has to be the function address so that it can retrieved from
6374  // the offloading entry and launched by the runtime library. We also mark the
6375  // outlined function to have external linkage in case we are emitting code for
6376  // the device, because these functions will be entry points to the device.
6377 
6378  if (CGM.getLangOpts().OpenMPIsDevice) {
6379  OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6380  OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6381  OutlinedFn->setDSOLocal(false);
6382  } else {
6383  std::string Name = getName({EntryFnName, "region_id"});
6384  OutlinedFnID = new llvm::GlobalVariable(
6385  CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6386  llvm::GlobalValue::WeakAnyLinkage,
6387  llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6388  }
6389 
6390  // Register the information for the entry associated with this target region.
6392  DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6394 }
6395 
6396 /// discard all CompoundStmts intervening between two constructs
6397 static const Stmt *ignoreCompoundStmts(const Stmt *Body) {
6398  while (const auto *CS = dyn_cast_or_null<CompoundStmt>(Body))
6399  Body = CS->body_front();
6400 
6401  return Body;
6402 }
6403 
6404 /// Emit the number of teams for a target directive. Inspect the num_teams
6405 /// clause associated with a teams construct combined or closely nested
6406 /// with the target directive.
6407 ///
6408 /// Emit a team of size one for directives such as 'target parallel' that
6409 /// have no associated teams construct.
6410 ///
6411 /// Otherwise, return nullptr.
6412 static llvm::Value *
6414  CodeGenFunction &CGF,
6415  const OMPExecutableDirective &D) {
6416  assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
6417  "teams directive expected to be "
6418  "emitted only for the host!");
6419 
6420  CGBuilderTy &Bld = CGF.Builder;
6421 
6422  // If the target directive is combined with a teams directive:
6423  // Return the value in the num_teams clause, if any.
6424  // Otherwise, return 0 to denote the runtime default.
6426  if (const auto *NumTeamsClause = D.getSingleClause<OMPNumTeamsClause>()) {
6427  CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6428  llvm::Value *NumTeams = CGF.EmitScalarExpr(NumTeamsClause->getNumTeams(),
6429  /*IgnoreResultAssign*/ true);
6430  return Bld.CreateIntCast(NumTeams, CGF.Int32Ty,
6431  /*IsSigned=*/true);
6432  }
6433 
6434  // The default value is 0.
6435  return Bld.getInt32(0);
6436  }
6437 
6438  // If the target directive is combined with a parallel directive but not a
6439  // teams directive, start one team.
6441  return Bld.getInt32(1);
6442 
6443  // If the current target region has a teams region enclosed, we need to get
6444  // the number of teams to pass to the runtime function call. This is done
6445  // by generating the expression in a inlined region. This is required because
6446  // the expression is captured in the enclosing target environment when the
6447  // teams directive is not combined with target.
6448 
6449  const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6450 
6451  if (const auto *TeamsDir = dyn_cast_or_null<OMPExecutableDirective>(
6453  if (isOpenMPTeamsDirective(TeamsDir->getDirectiveKind())) {
6454  if (const auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) {
6455  CGOpenMPInnerExprInfo CGInfo(CGF, CS);
6456  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6457  llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams());
6458  return Bld.CreateIntCast(NumTeams, CGF.Int32Ty,
6459  /*IsSigned=*/true);
6460  }
6461 
6462  // If we have an enclosed teams directive but no num_teams clause we use
6463  // the default value 0.
6464  return Bld.getInt32(0);
6465  }
6466  }
6467 
6468  // No teams associated with the directive.
6469  return nullptr;
6470 }
6471 
6472 /// Emit the number of threads for a target directive. Inspect the
6473 /// thread_limit clause associated with a teams construct combined or closely
6474 /// nested with the target directive.
6475 ///
6476 /// Emit the num_threads clause for directives such as 'target parallel' that
6477 /// have no associated teams construct.
6478 ///
6479 /// Otherwise, return nullptr.
6480 static llvm::Value *
6482  CodeGenFunction &CGF,
6483  const OMPExecutableDirective &D) {
6484  assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
6485  "teams directive expected to be "
6486  "emitted only for the host!");
6487 
6488  CGBuilderTy &Bld = CGF.Builder;
6489 
6490  //
6491  // If the target directive is combined with a teams directive:
6492  // Return the value in the thread_limit clause, if any.
6493  //
6494  // If the target directive is combined with a parallel directive:
6495  // Return the value in the num_threads clause, if any.
6496  //
6497  // If both clauses are set, select the minimum of the two.
6498  //
6499  // If neither teams or parallel combined directives set the number of threads
6500  // in a team, return 0 to denote the runtime default.
6501  //
6502  // If this is not a teams directive return nullptr.
6503 
6506  llvm::Value *DefaultThreadLimitVal = Bld.getInt32(0);
6507  llvm::Value *NumThreadsVal = nullptr;
6508  llvm::Value *ThreadLimitVal = nullptr;
6509 
6510  if (const auto *ThreadLimitClause =
6512  CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6513  llvm::Value *ThreadLimit =
6514  CGF.EmitScalarExpr(ThreadLimitClause->getThreadLimit(),
6515  /*IgnoreResultAssign*/ true);
6516  ThreadLimitVal = Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty,
6517  /*IsSigned=*/true);
6518  }
6519 
6520  if (const auto *NumThreadsClause =
6522  CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6523  llvm::Value *NumThreads =
6524  CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
6525  /*IgnoreResultAssign*/ true);
6526  NumThreadsVal =
6527  Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*IsSigned=*/true);
6528  }
6529 
6530  // Select the lesser of thread_limit and num_threads.
6531  if (NumThreadsVal)
6532  ThreadLimitVal = ThreadLimitVal
6533  ? Bld.CreateSelect(Bld.CreateICmpSLT(NumThreadsVal,
6534  ThreadLimitVal),
6535  NumThreadsVal, ThreadLimitVal)
6536  : NumThreadsVal;
6537 
6538  // Set default value passed to the runtime if either teams or a target
6539  // parallel type directive is found but no clause is specified.
6540  if (!ThreadLimitVal)
6541  ThreadLimitVal = DefaultThreadLimitVal;
6542 
6543  return ThreadLimitVal;
6544  }
6545 
6546  // If the current target region has a teams region enclosed, we need to get
6547  // the thread limit to pass to the runtime function call. This is done
6548  // by generating the expression in a inlined region. This is required because
6549  // the expression is captured in the enclosing target environment when the
6550  // teams directive is not combined with target.
6551 
6552  const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6553 
6554  if (const auto *TeamsDir = dyn_cast_or_null<OMPExecutableDirective>(
6556  if (isOpenMPTeamsDirective(TeamsDir->getDirectiveKind())) {
6557  if (const auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) {
6558  CGOpenMPInnerExprInfo CGInfo(CGF, CS);
6559  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6560  llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit());
6561  return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty,
6562  /*IsSigned=*/true);
6563  }
6564 
6565  // If we have an enclosed teams directive but no thread_limit clause we
6566  // use the default value 0.
6567  return CGF.Builder.getInt32(0);
6568  }
6569  }
6570 
6571  // No teams associated with the directive.
6572  return nullptr;
6573 }
6574 
6575 namespace {
6577 
6578 // Utility to handle information from clauses associated with a given
6579 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6580 // It provides a convenient interface to obtain the information and generate
6581 // code for that information.
6582 class MappableExprsHandler {
6583 public:
6584  /// Values for bit flags used to specify the mapping type for
6585  /// offloading.
6586  enum OpenMPOffloadMappingFlags : uint64_t {
6587  /// No flags
6588  OMP_MAP_NONE = 0x0,
6589  /// Allocate memory on the device and move data from host to device.
6590  OMP_MAP_TO = 0x01,
6591  /// Allocate memory on the device and move data from device to host.
6592  OMP_MAP_FROM = 0x02,
6593  /// Always perform the requested mapping action on the element, even
6594  /// if it was already mapped before.
6595  OMP_MAP_ALWAYS = 0x04,
6596  /// Delete the element from the device environment, ignoring the
6597  /// current reference count associated with the element.
6598  OMP_MAP_DELETE = 0x08,
6599  /// The element being mapped is a pointer-pointee pair; both the
6600  /// pointer and the pointee should be mapped.
6601  OMP_MAP_PTR_AND_OBJ = 0x10,
6602  /// This flags signals that the base address of an entry should be
6603  /// passed to the target kernel as an argument.
6604  OMP_MAP_TARGET_PARAM = 0x20,
6605  /// Signal that the runtime library has to return the device pointer
6606  /// in the current position for the data being mapped. Used when we have the
6607  /// use_device_ptr clause.
6608  OMP_MAP_RETURN_PARAM = 0x40,
6609  /// This flag signals that the reference being passed is a pointer to
6610  /// private data.
6611  OMP_MAP_PRIVATE = 0x80,
6612  /// Pass the element to the device by value.
6613  OMP_MAP_LITERAL = 0x100,
6614  /// Implicit map
6615  OMP_MAP_IMPLICIT = 0x200,
6616  /// The 16 MSBs of the flags indicate whether the entry is member of some
6617  /// struct/class.
6618  OMP_MAP_MEMBER_OF = 0xffff000000000000,
6619  LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
6620  };
6621 
6622  /// Class that associates information with a base pointer to be passed to the
6623  /// runtime library.
6624  class BasePointerInfo {
6625  /// The base pointer.
6626  llvm::Value *Ptr = nullptr;
6627  /// The base declaration that refers to this device pointer, or null if
6628  /// there is none.
6629  const ValueDecl *DevPtrDecl = nullptr;
6630 
6631  public:
6632  BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
6633  : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
6634  llvm::Value *operator*() const { return Ptr; }
6635  const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
6636  void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
6637  };
6638 
6639  using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
6640  using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
6641  using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
6642 
6643  /// Map between a struct and the its lowest & highest elements which have been
6644  /// mapped.
6645  /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
6646  /// HE(FieldIndex, Pointer)}
6647  struct StructRangeInfoTy {
6648  std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
6649  0, Address::invalid()};
6650  std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
6651  0, Address::invalid()};
6653  };
6654 
6655 private:
6656  /// Kind that defines how a device pointer has to be returned.
6657  struct MapInfo {
6660  ArrayRef<OpenMPMapModifierKind> MapModifiers;
6661  bool ReturnDevicePointer = false;
6662  bool IsImplicit = false;
6663 
6664  MapInfo() = default;
6665  MapInfo(
6667  OpenMPMapClauseKind MapType,
6668  ArrayRef<OpenMPMapModifierKind> MapModifiers,
6669  bool ReturnDevicePointer, bool IsImplicit)
6670  : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
6671  ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {}
6672  };
6673 
6674  /// If use_device_ptr is used on a pointer which is a struct member and there
6675  /// is no map information about it, then emission of that entry is deferred
6676  /// until the whole struct has been processed.
6677  struct DeferredDevicePtrEntryTy {
6678  const Expr *IE = nullptr;
6679  const ValueDecl *VD = nullptr;
6680 
6681  DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD)
6682  : IE(IE), VD(VD) {}
6683  };
6684 
6685  /// Directive from where the map clauses were extracted.
6686  const OMPExecutableDirective &CurDir;
6687 
6688  /// Function the directive is being generated for.
6689  CodeGenFunction &CGF;
6690 
6691  /// Set of all first private variables in the current directive.
6692  llvm::SmallPtrSet<const VarDecl *, 8> FirstPrivateDecls;
6693 
6694  /// Map between device pointer declarations and their expression components.
6695  /// The key value for declarations in 'this' is null.
6696  llvm::DenseMap<
6697  const ValueDecl *,
6699  DevPointersMap;
6700 
6701  llvm::Value *getExprTypeSize(const Expr *E) const {
6702  QualType ExprTy = E->getType().getCanonicalType();
6703 
6704  // Reference types are ignored for mapping purposes.
6705  if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
6706  ExprTy = RefTy->getPointeeType().getCanonicalType();
6707 
6708  // Given that an array section is considered a built-in type, we need to
6709  // do the calculation based on the length of the section instead of relying
6710  // on CGF.getTypeSize(E->getType()).
6711  if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
6713  OAE->getBase()->IgnoreParenImpCasts())
6714  .getCanonicalType();
6715 
6716  // If there is no length associated with the expression, that means we
6717  // are using the whole length of the base.
6718  if (!OAE->getLength() && OAE->getColonLoc().isValid())
6719  return CGF.getTypeSize(BaseTy);
6720 
6721  llvm::Value *ElemSize;
6722  if (const auto *PTy = BaseTy->getAs<PointerType>()) {
6723  ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
6724  } else {
6725  const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
6726  assert(ATy && "Expecting array type if not a pointer type.");
6727  ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
6728  }
6729 
6730  // If we don't have a length at this point, that is because we have an
6731  // array section with a single element.
6732  if (!OAE->getLength())
6733  return ElemSize;
6734 
6735  llvm::Value *LengthVal = CGF.EmitScalarExpr(OAE->getLength());
6736  LengthVal =
6737  CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false);
6738  return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
6739  }
6740  return CGF.getTypeSize(ExprTy);
6741  }
6742 
6743  /// Return the corresponding bits for a given map clause modifier. Add
6744  /// a flag marking the map as a pointer if requested. Add a flag marking the
6745  /// map as the first one of a series of maps that relate to the same map
6746  /// expression.
6747  OpenMPOffloadMappingFlags getMapTypeBits(
6748  OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
6749  bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const {
6750  OpenMPOffloadMappingFlags Bits =
6751  IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
6752  switch (MapType) {
6753  case OMPC_MAP_alloc:
6754  case OMPC_MAP_release:
6755  // alloc and release is the default behavior in the runtime library, i.e.
6756  // if we don't pass any bits alloc/release that is what the runtime is
6757  // going to do. Therefore, we don't need to signal anything for these two
6758  // type modifiers.
6759  break;
6760  case OMPC_MAP_to:
6761  Bits |= OMP_MAP_TO;
6762  break;
6763  case OMPC_MAP_from:
6764  Bits |= OMP_MAP_FROM;
6765  break;
6766  case OMPC_MAP_tofrom:
6767  Bits |= OMP_MAP_TO | OMP_MAP_FROM;
6768  break;
6769  case OMPC_MAP_delete:
6770  Bits |= OMP_MAP_DELETE;
6771  break;
6772  case OMPC_MAP_unknown:
6773  llvm_unreachable("Unexpected map type!");
6774  }
6775  if (AddPtrFlag)
6776  Bits |= OMP_MAP_PTR_AND_OBJ;
6777  if (AddIsTargetParamFlag)
6778  Bits |= OMP_MAP_TARGET_PARAM;
6779  if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
6780  != MapModifiers.end())
6781  Bits |= OMP_MAP_ALWAYS;
6782  return Bits;
6783  }
6784 
6785  /// Return true if the provided expression is a final array section. A
6786  /// final array section, is one whose length can't be proved to be one.
6787  bool isFinalArraySectionExpression(const Expr *E) const {
6788  const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
6789 
6790  // It is not an array section and therefore not a unity-size one.
6791  if (!OASE)
6792  return false;
6793 
6794  // An array section with no colon always refer to a single element.
6795  if (OASE->getColonLoc().isInvalid())
6796  return false;
6797 
6798  const Expr *Length = OASE->getLength();
6799 
6800  // If we don't have a length we have to check if the array has size 1
6801  // for this dimension. Also, we should always expect a length if the
6802  // base type is pointer.
6803  if (!Length) {
6805  OASE->getBase()->IgnoreParenImpCasts())
6806  .getCanonicalType();
6807  if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
6808  return ATy->getSize().getSExtValue() != 1;
6809  // If we don't have a constant dimension length, we have to consider
6810  // the current section as having any size, so it is not necessarily
6811  // unitary. If it happen to be unity size, that's user fault.
6812  return true;
6813  }
6814 
6815  // Check if the length evaluates to 1.
6816  Expr::EvalResult Result;
6817  if (!Length->EvaluateAsInt(Result, CGF.getContext()))
6818  return true; // Can have more that size 1.
6819 
6820  llvm::APSInt ConstLength = Result.Val.getInt();
6821  return ConstLength.getSExtValue() != 1;
6822  }
6823 
6824  /// Generate the base pointers, section pointers, sizes and map type
6825  /// bits for the provided map type, map modifier, and expression components.
6826  /// \a IsFirstComponent should be set to true if the provided set of
6827  /// components is the first associated with a capture.
6828  void generateInfoForComponentList(
6829  OpenMPMapClauseKind MapType,
6830  ArrayRef<OpenMPMapModifierKind> MapModifiers,
6832  MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
6833  MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
6834  StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
6835  bool IsImplicit,
6836  ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
6837  OverlappedElements = llvm::None) const {
6838  // The following summarizes what has to be generated for each map and the
6839  // types below. The generated information is expressed in this order:
6840  // base pointer, section pointer, size, flags
6841  // (to add to the ones that come from the map type and modifier).
6842  //
6843  // double d;
6844  // int i[100];
6845  // float *p;
6846  //
6847  // struct S1 {
6848  // int i;
6849  // float f[50];
6850  // }
6851  // struct S2 {
6852  // int i;
6853  // float f[50];
6854  // S1 s;
6855  // double *p;
6856  // struct S2 *ps;
6857  // }
6858  // S2 s;
6859  // S2 *ps;
6860  //
6861  // map(d)
6862  // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
6863  //
6864  // map(i)
6865  // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
6866  //
6867  // map(i[1:23])
6868  // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
6869  //
6870  // map(p)
6871  // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
6872  //
6873  // map(p[1:24])
6874  // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
6875  //
6876  // map(s)
6877  // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
6878  //
6879  // map(s.i)
6880  // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
6881  //
6882  // map(s.s.f)
6883  // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
6884  //
6885  // map(s.p)
6886  // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
6887  //
6888  // map(to: s.p[:22])
6889  // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
6890  // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
6891  // &(s.p), &(s.p[0]), 22*sizeof(double),
6892  // MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
6893  // (*) alloc space for struct members, only this is a target parameter
6894  // (**) map the pointer (nothing to be mapped in this example) (the compiler
6895  // optimizes this entry out, same in the examples below)
6896  // (***) map the pointee (map: to)
6897  //
6898  // map(s.ps)
6899  // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
6900  //
6901  // map(from: s.ps->s.i)
6902  // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6903  // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6904  // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6905  //
6906  // map(to: s.ps->ps)
6907  // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6908  // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6909  // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO
6910  //
6911  // map(s.ps->ps->ps)
6912  // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6913  // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6914  // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6915  // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
6916  //
6917  // map(to: s.ps->ps->s.f[:22])
6918  // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6919  // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6920  // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6921  // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
6922  //
6923  // map(ps)
6924  // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
6925  //
6926  // map(ps->i)
6927  // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
6928  //
6929  // map(ps->s.f)
6930  // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
6931  //
6932  // map(from: ps->p)
6933  // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
6934  //
6935  // map(to: ps->p[:22])
6936  // ps, &(ps->p), sizeof(double*), TARGET_PARAM
6937  // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
6938  // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
6939  //
6940  // map(ps->ps)
6941  // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
6942  //
6943  // map(from: ps->ps->s.i)
6944  // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6945  // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6946  // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6947  //
6948  // map(from: ps->ps->ps)
6949  // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6950  // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6951  // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6952  //
6953  // map(ps->ps->ps->ps)
6954  // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6955  // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6956  // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6957  // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
6958  //
6959  // map(to: ps->ps->ps->s.f[:22])
6960  // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6961  // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6962  // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6963  // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
6964  //
6965  // map(to: s.f[:22]) map(from: s.p[:33])
6966  // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
6967  // sizeof(double*) (**), TARGET_PARAM
6968  // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
6969  // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
6970  // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6971  // (*) allocate contiguous space needed to fit all mapped members even if
6972  // we allocate space for members not mapped (in this example,
6973  // s.f[22..49] and s.s are not mapped, yet we must allocate space for
6974  // them as well because they fall between &s.f[0] and &s.p)
6975  //
6976  // map(from: s.f[:22]) map(to: ps->p[:33])
6977  // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
6978  // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
6979  // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
6980  // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
6981  // (*) the struct this entry pertains to is the 2nd element in the list of
6982  // arguments, hence MEMBER_OF(2)
6983  //
6984  // map(from: s.f[:22], s.s) map(to: ps->p[:33])
6985  // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
6986  // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
6987  // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
6988  // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
6989  // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
6990  // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
6991  // (*) the struct this entry pertains to is the 4th element in the list
6992  // of arguments, hence MEMBER_OF(4)
6993 
6994  // Track if the map information being generated is the first for a capture.
6995  bool IsCaptureFirstInfo = IsFirstComponentList;
6996  bool IsLink = false; // Is this variable a "declare target link"?
6997 
6998  // Scan the components from the base to the complete expression.
6999  auto CI = Components.rbegin();
7000  auto CE = Components.rend();
7001  auto I = CI;
7002 
7003  // Track if the map information being generated is the first for a list of
7004  // components.
7005  bool IsExpressionFirstInfo = true;
7006  Address BP = Address::invalid();
7007  const Expr *AssocExpr = I->getAssociatedExpression();
7008  const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7009  const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7010 
7011  if (isa<MemberExpr>(AssocExpr)) {
7012  // The base is the 'this' pointer. The content of the pointer is going
7013  // to be the base of the field being mapped.
7014  BP = CGF.LoadCXXThisAddress();
7015  } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7016  (OASE &&
7017  isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7018  BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7019  } else {
7020  // The base is the reference to the variable.
7021  // BP = &Var.
7022  BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7023  if (const auto *VD =
7024  dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7026  OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD))
7027  if (*Res == OMPDeclareTargetDeclAttr::MT_Link) {
7028  IsLink = true;
7029  BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD);
7030  }
7031  }
7032 
7033  // If the variable is a pointer and is being dereferenced (i.e. is not
7034  // the last component), the base has to be the pointer itself, not its
7035  // reference. References are ignored for mapping purposes.
7036  QualType Ty =
7037  I->getAssociatedDeclaration()->getType().getNonReferenceType();
7038  if (Ty->isAnyPointerType() && std::next(I) != CE) {
7039  BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7040 
7041  // We do not need to generate individual map information for the
7042  // pointer, it can be associated with the combined storage.
7043  ++I;
7044  }
7045  }
7046 
7047  // Track whether a component of the list should be marked as MEMBER_OF some
7048  // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7049  // in a component list should be marked as MEMBER_OF, all subsequent entries
7050  // do not belong to the base struct. E.g.
7051  // struct S2 s;
7052  // s.ps->ps->ps->f[:]
7053  // (1) (2) (3) (4)
7054  // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7055  // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7056  // is the pointee of ps(2) which is not member of struct s, so it should not
7057  // be marked as such (it is still PTR_AND_OBJ).
7058  // The variable is initialized to false so that PTR_AND_OBJ entries which
7059  // are not struct members are not considered (e.g. array of pointers to
7060  // data).
7061  bool ShouldBeMemberOf = false;
7062 
7063  // Variable keeping track of whether or not we have encountered a component
7064  // in the component list which is a member expression. Useful when we have a
7065  // pointer or a final array section, in which case it is the previous
7066  // component in the list which tells us whether we have a member expression.
7067  // E.g. X.f[:]
7068  // While processing the final array section "[:]" it is "f" which tells us
7069  // whether we are dealing with a member of a declared struct.
7070  const MemberExpr *EncounteredME = nullptr;
7071 
7072  for (; I != CE; ++I) {
7073  // If the current component is member of a struct (parent struct) mark it.
7074  if (!EncounteredME) {
7075  EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7076  // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7077  // as MEMBER_OF the parent struct.
7078  if (EncounteredME)
7079  ShouldBeMemberOf = true;
7080  }
7081 
7082  auto Next = std::next(I);
7083 
7084  // We need to generate the addresses and sizes if this is the last
7085  // component, if the component is a pointer or if it is an array section
7086  // whose length can't be proved to be one. If this is a pointer, it
7087  // becomes the base address for the following components.
7088 
7089  // A final array section, is one whose length can't be proved to be one.
7090  bool IsFinalArraySection =
7091  isFinalArraySectionExpression(I->getAssociatedExpression());
7092 
7093  // Get information on whether the element is a pointer. Have to do a
7094  // special treatment for array sections given that they are built-in
7095  // types.
7096  const auto *OASE =
7097  dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7098  bool IsPointer =
7100  .getCanonicalType()
7101  ->isAnyPointerType()) ||
7102  I->getAssociatedExpression()->getType()->isAnyPointerType();
7103 
7104  if (Next == CE || IsPointer || IsFinalArraySection) {
7105  // If this is not the last component, we expect the pointer to be
7106  // associated with an array expression or member expression.
7107  assert((Next == CE ||
7108  isa<MemberExpr>(Next->getAssociatedExpression()) ||
7109  isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7110  isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) &&
7111  "Unexpected expression");
7112 
7113  Address LB =
7114  CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress();
7115 
7116  // If this component is a pointer inside the base struct then we don't
7117  // need to create any entry for it - it will be combined with the object
7118  // it is pointing to into a single PTR_AND_OBJ entry.
7119  bool IsMemberPointer =
7120  IsPointer && EncounteredME &&
7121  (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
7122  EncounteredME);
7123  if (!OverlappedElements.empty()) {
7124  // Handle base element with the info for overlapped elements.
7125  assert(!PartialStruct.Base.isValid() && "The base element is set.");
7126  assert(Next == CE &&
7127  "Expected last element for the overlapped elements.");
7128  assert(!IsPointer &&
7129  "Unexpected base element with the pointer type.");
7130  // Mark the whole struct as the struct that requires allocation on the
7131  // device.
7132  PartialStruct.LowestElem = {0, LB};
7133  CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7134  I->getAssociatedExpression()->getType());
7135  Address HB = CGF.Builder.CreateConstGEP(
7137  CGF.VoidPtrTy),
7138  TypeSize.getQuantity() - 1, CharUnits::One());
7139  PartialStruct.HighestElem = {
7140  std::numeric_limits<decltype(
7141  PartialStruct.HighestElem.first)>::max(),
7142  HB};
7143  PartialStruct.Base = BP;
7144  // Emit data for non-overlapped data.
7145  OpenMPOffloadMappingFlags Flags =
7146  OMP_MAP_MEMBER_OF |
7147  getMapTypeBits(MapType, MapModifiers, IsImplicit,
7148  /*AddPtrFlag=*/false,
7149  /*AddIsTargetParamFlag=*/false);
7150  LB = BP;
7151  llvm::Value *Size = nullptr;
7152  // Do bitcopy of all non-overlapped structure elements.
7154  Component : OverlappedElements) {
7155  Address ComponentLB = Address::invalid();
7157  Component) {
7158  if (MC.getAssociatedDeclaration()) {
7159  ComponentLB =
7160  CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7161  .getAddress();
7162  Size = CGF.Builder.CreatePtrDiff(
7163  CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7164  CGF.EmitCastToVoidPtr(LB.getPointer()));
7165  break;
7166  }
7167  }
7168  BasePointers.push_back(BP.getPointer());
7169  Pointers.push_back(LB.getPointer());
7170  Sizes.push_back(Size);
7171  Types.push_back(Flags);
7172  LB = CGF.Builder.CreateConstGEP(ComponentLB, 1,
7173  CGF.getPointerSize());
7174  }
7175  BasePointers.push_back(BP.getPointer());
7176  Pointers.push_back(LB.getPointer());
7177  Size = CGF.Builder.CreatePtrDiff(
7178  CGF.EmitCastToVoidPtr(
7179  CGF.Builder.CreateConstGEP(HB, 1, CharUnits::One())
7180  .getPointer()),
7181  CGF.EmitCastToVoidPtr(LB.getPointer()));
7182  Sizes.push_back(Size);
7183  Types.push_back(Flags);
7184  break;
7185  }
7186  llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7187  if (!IsMemberPointer) {
7188  BasePointers.push_back(BP.getPointer());
7189  Pointers.push_back(LB.getPointer());
7190  Sizes.push_back(Size);
7191 
7192  // We need to add a pointer flag for each map that comes from the
7193  // same expression except for the first one. We also need to signal
7194  // this map is the first one that relates with the current capture
7195  // (there is a set of entries for each capture).
7196  OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7197  MapType, MapModifiers, IsImplicit,
7198  !IsExpressionFirstInfo || IsLink, IsCaptureFirstInfo && !IsLink);
7199 
7200  if (!IsExpressionFirstInfo) {
7201  // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7202  // then we reset the TO/FROM/ALWAYS/DELETE flags.
7203  if (IsPointer)
7204  Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7205  OMP_MAP_DELETE);
7206 
7207  if (ShouldBeMemberOf) {
7208  // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7209  // should be later updated with the correct value of MEMBER_OF.
7210  Flags |= OMP_MAP_MEMBER_OF;
7211  // From now on, all subsequent PTR_AND_OBJ entries should not be
7212  // marked as MEMBER_OF.
7213  ShouldBeMemberOf = false;
7214  }
7215  }
7216 
7217  Types.push_back(Flags);
7218  }
7219 
7220  // If we have encountered a member expression so far, keep track of the
7221  // mapped member. If the parent is "*this", then the value declaration
7222  // is nullptr.
7223  if (EncounteredME) {
7224  const auto *FD = dyn_cast<FieldDecl>(EncounteredME->getMemberDecl());
7225  unsigned FieldIndex = FD->getFieldIndex();
7226 
7227  // Update info about the lowest and highest elements for this struct
7228  if (!PartialStruct.Base.isValid()) {
7229  PartialStruct.LowestElem = {FieldIndex, LB};
7230  PartialStruct.HighestElem = {FieldIndex, LB};
7231  PartialStruct.Base = BP;
7232  } else if (FieldIndex < PartialStruct.LowestElem.first) {
7233  PartialStruct.LowestElem = {FieldIndex, LB};
7234  } else if (FieldIndex > PartialStruct.HighestElem.first) {
7235  PartialStruct.HighestElem = {FieldIndex, LB};
7236  }
7237  }
7238 
7239  // If we have a final array section, we are done with this expression.
7240  if (IsFinalArraySection)
7241  break;
7242 
7243  // The pointer becomes the base for the next element.
7244  if (Next != CE)
7245  BP = LB;
7246 
7247  IsExpressionFirstInfo = false;
7248  IsCaptureFirstInfo = false;
7249  }
7250  }
7251  }
7252 
7253  /// Return the adjusted map modifiers if the declaration a capture refers to
7254  /// appears in a first-private clause. This is expected to be used only with
7255  /// directives that start with 'target'.
7256  MappableExprsHandler::OpenMPOffloadMappingFlags
7257  getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7258  assert(Cap.capturesVariable() && "Expected capture by reference only!");
7259 
7260  // A first private variable captured by reference will use only the
7261  // 'private ptr' and 'map to' flag. Return the right flags if the captured
7262  // declaration is known as first-private in this handler.
7263  if (FirstPrivateDecls.count(Cap.getCapturedVar()))
7264  return MappableExprsHandler::OMP_MAP_PRIVATE |
7265  MappableExprsHandler::OMP_MAP_TO;
7266  return MappableExprsHandler::OMP_MAP_TO |
7267  MappableExprsHandler::OMP_MAP_FROM;
7268  }
7269 
7270  static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
7271  // Member of is given by the 16 MSB of the flag, so rotate by 48 bits.
7272  return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
7273  << 48);
7274  }
7275 
7276  static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
7277  OpenMPOffloadMappingFlags MemberOfFlag) {
7278  // If the entry is PTR_AND_OBJ but has not been marked with the special
7279  // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
7280  // marked as MEMBER_OF.
7281  if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
7282  ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
7283  return;
7284 
7285  // Reset the placeholder value to prepare the flag for the assignment of the
7286  // proper MEMBER_OF value.
7287  Flags &= ~OMP_MAP_MEMBER_OF;
7288  Flags |= MemberOfFlag;
7289  }
7290 
7291  void getPlainLayout(const CXXRecordDecl *RD,
7293  bool AsBase) const {
7294  const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7295 
7296  llvm::StructType *St =
7297  AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7298 
7299  unsigned NumElements = St->getNumElements();
7301  llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7302  RecordLayout(NumElements);
7303 
7304  // Fill bases.
7305  for (const auto &I : RD->bases()) {
7306  if (I.isVirtual())
7307  continue;
7308  const auto *Base = I.getType()->getAsCXXRecordDecl();
7309  // Ignore empty bases.
7310  if (Base->isEmpty() || CGF.getContext()
7313  .isZero())
7314  continue;
7315 
7316  unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7317  RecordLayout[FieldIndex] = Base;
7318  }
7319  // Fill in virtual bases.
7320  for (const auto &I : RD->vbases()) {
7321  const auto *Base = I.getType()->getAsCXXRecordDecl();
7322  // Ignore empty bases.
7323  if (Base->isEmpty())
7324  continue;
7325  unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7326  if (RecordLayout[FieldIndex])
7327  continue;
7328  RecordLayout[FieldIndex] = Base;
7329  }
7330  // Fill in all the fields.
7331  assert(!RD->isUnion() && "Unexpected union.");
7332  for (const auto *Field : RD->fields()) {
7333  // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7334  // will fill in later.)
7335  if (!Field->isBitField()) {
7336  unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7337  RecordLayout[FieldIndex] = Field;
7338  }
7339  }
7340  for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7341  &Data : RecordLayout) {
7342  if (Data.isNull())
7343  continue;
7344  if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7345  getPlainLayout(Base, Layout, /*AsBase=*/true);
7346  else
7347  Layout.push_back(Data.get<const FieldDecl *>());
7348  }
7349  }
7350 
7351 public:
7352  MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
7353  : CurDir(Dir), CGF(CGF) {
7354  // Extract firstprivate clause information.
7355  for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
7356  for (const auto *D : C->varlists())
7357  FirstPrivateDecls.insert(
7358  cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
7359  // Extract device pointer clause information.
7360  for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
7361  for (auto L : C->component_lists())
7362  DevPointersMap[L.first].push_back(L.second);
7363  }
7364 
7365  /// Generate code for the combined entry if we have a partially mapped struct
7366  /// and take care of the mapping flags of the arguments corresponding to
7367  /// individual struct members.
7368  void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers,
7369  MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7370  MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes,
7371  const StructRangeInfoTy &PartialStruct) const {
7372  // Base is the base of the struct
7373  BasePointers.push_back(PartialStruct.Base.getPointer());
7374  // Pointer is the address of the lowest element
7375  llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
7376  Pointers.push_back(LB);
7377  // Size is (addr of {highest+1} element) - (addr of lowest element)
7378  llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
7379  llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
7380  llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
7381  llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
7382  llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
7383  llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.SizeTy,
7384  /*isSinged=*/false);
7385  Sizes.push_back(Size);
7386  // Map type is always TARGET_PARAM
7387  Types.push_back(OMP_MAP_TARGET_PARAM);
7388  // Remove TARGET_PARAM flag from the first element
7389  (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
7390 
7391  // All other current entries will be MEMBER_OF the combined entry
7392  // (except for PTR_AND_OBJ entries which do not have a placeholder value
7393  // 0xFFFF in the MEMBER_OF field).
7394  OpenMPOffloadMappingFlags MemberOfFlag =
7395  getMemberOfFlag(BasePointers.size() - 1);
7396  for (auto &M : CurTypes)
7397  setCorrectMemberOfFlag(M, MemberOfFlag);
7398  }
7399 
7400  /// Generate all the base pointers, section pointers, sizes and map
7401  /// types for the extracted mappable expressions. Also, for each item that
7402  /// relates with a device pointer, a pair of the relevant declaration and
7403  /// index where it occurs is appended to the device pointers info array.
7404  void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
7405  MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7406  MapFlagsArrayTy &Types) const {
7407  // We have to process the component lists that relate with the same
7408  // declaration in a single chunk so that we can generate the map flags
7409  // correctly. Therefore, we organize all lists in a map.
7410  llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
7411 
7412  // Helper function to fill the information map for the different supported
7413  // clauses.
7414  auto &&InfoGen = [&Info](
7415  const ValueDecl *D,
7417  OpenMPMapClauseKind MapType,
7418  ArrayRef<OpenMPMapModifierKind> MapModifiers,
7419  bool ReturnDevicePointer, bool IsImplicit) {
7420  const ValueDecl *VD =
7421  D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
7422  Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
7423  IsImplicit);
7424  };
7425 
7426  // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
7427  for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>())
7428  for (const auto &L : C->component_lists()) {
7429  InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(),
7430  /*ReturnDevicePointer=*/false, C->isImplicit());
7431  }
7432  for (const auto *C : this->CurDir.getClausesOfKind<OMPToClause>())
7433  for (const auto &L : C->component_lists()) {
7434  InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None,
7435  /*ReturnDevicePointer=*/false, C->isImplicit());
7436  }
7437  for (const auto *C : this->CurDir.getClausesOfKind<OMPFromClause>())
7438  for (const auto &L : C->component_lists()) {
7439  InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None,
7440  /*ReturnDevicePointer=*/false, C->isImplicit());
7441  }
7442 
7443  // Look at the use_device_ptr clause information and mark the existing map
7444  // entries as such. If there is no map information for an entry in the
7445  // use_device_ptr list, we create one with map type 'alloc' and zero size
7446  // section. It is the user fault if that was not mapped before. If there is
7447  // no map information and the pointer is a struct member, then we defer the
7448  // emission of that entry until the whole struct has been processed.
7449  llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
7450  DeferredInfo;
7451 
7452  // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
7453  for (const auto *C :
7454  this->CurDir.getClausesOfKind<OMPUseDevicePtrClause>()) {
7455  for (const auto &L : C->component_lists()) {
7456  assert(!L.second.empty() && "Not expecting empty list of components!");
7457  const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
7458  VD = cast<ValueDecl>(VD->getCanonicalDecl());
7459  const Expr *IE = L.second.back().getAssociatedExpression();
7460  // If the first component is a member expression, we have to look into
7461  // 'this', which maps to null in the map of map information. Otherwise
7462  // look directly for the information.
7463  auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
7464 
7465  // We potentially have map information for this declaration already.
7466  // Look for the first set of components that refer to it.
7467  if (It != Info.end()) {
7468  auto CI = std::find_if(
7469  It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
7470  return MI.Components.back().getAssociatedDeclaration() == VD;
7471  });
7472  // If we found a map entry, signal that the pointer has to be returned
7473  // and move on to the next declaration.
7474  if (CI != It->second.end()) {
7475  CI->ReturnDevicePointer = true;
7476  continue;
7477  }
7478  }
7479 
7480  // We didn't find any match in our map information - generate a zero
7481  // size array section - if the pointer is a struct member we defer this
7482  // action until the whole struct has been processed.
7483  // FIXME: MSVC 2013 seems to require this-> to find member CGF.
7484  if (isa<MemberExpr>(IE)) {
7485  // Insert the pointer into Info to be processed by
7486  // generateInfoForComponentList. Because it is a member pointer
7487  // without a pointee, no entry will be generated for it, therefore
7488  // we need to generate one after the whole struct has been processed.
7489  // Nonetheless, generateInfoForComponentList must be called to take
7490  // the pointer into account for the calculation of the range of the
7491  // partial struct.
7492  InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
7493  /*ReturnDevicePointer=*/false, C->isImplicit());
7494  DeferredInfo[nullptr].emplace_back(IE, VD);
7495  } else {
7496  llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
7497  this->CGF.EmitLValue(IE), IE->getExprLoc());
7498  BasePointers.emplace_back(Ptr, VD);
7499  Pointers.push_back(Ptr);
7500  Sizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy));
7501  Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
7502  }
7503  }
7504  }
7505 
7506  for (const auto &M : Info) {
7507  // We need to know when we generate information for the first component
7508  // associated with a capture, because the mapping flags depend on it.
7509  bool IsFirstComponentList = true;
7510 
7511  // Temporary versions of arrays
7512  MapBaseValuesArrayTy CurBasePointers;
7513  MapValuesArrayTy CurPointers;
7514  MapValuesArrayTy CurSizes;
7515  MapFlagsArrayTy CurTypes;
7516  StructRangeInfoTy PartialStruct;
7517 
7518  for (const MapInfo &L : M.second) {
7519  assert(!L.Components.empty() &&
7520  "Not expecting declaration with no component lists.");
7521 
7522  // Remember the current base pointer index.
7523  unsigned CurrentBasePointersIdx = CurBasePointers.size();
7524  // FIXME: MSVC 2013 seems to require this-> to find the member method.
7525  this->generateInfoForComponentList(
7526  L.MapType, L.MapModifiers, L.Components, CurBasePointers,
7527  CurPointers, CurSizes, CurTypes, PartialStruct,
7528  IsFirstComponentList, L.IsImplicit);
7529 
7530  // If this entry relates with a device pointer, set the relevant
7531  // declaration and add the 'return pointer' flag.
7532  if (L.ReturnDevicePointer) {
7533  assert(CurBasePointers.size() > CurrentBasePointersIdx &&
7534  "Unexpected number of mapped base pointers.");
7535 
7536  const ValueDecl *RelevantVD =
7537  L.Components.back().getAssociatedDeclaration();
7538  assert(RelevantVD &&
7539  "No relevant declaration related with device pointer??");
7540 
7541  CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
7542  CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
7543  }
7544  IsFirstComponentList = false;
7545  }
7546 
7547  // Append any pending zero-length pointers which are struct members and
7548  // used with use_device_ptr.
7549  auto CI = DeferredInfo.find(M.first);
7550  if (CI != DeferredInfo.end()) {
7551  for (const DeferredDevicePtrEntryTy &L : CI->second) {
7552  llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer();
7553  llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
7554  this->CGF.EmitLValue(L.IE), L.IE->getExprLoc());
7555  CurBasePointers.emplace_back(BasePtr, L.VD);
7556  CurPointers.push_back(Ptr);
7557  CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy));
7558  // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
7559  // value MEMBER_OF=FFFF so that the entry is later updated with the
7560  // correct value of MEMBER_OF.
7561  CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
7562  OMP_MAP_MEMBER_OF);
7563  }
7564  }
7565 
7566  // If there is an entry in PartialStruct it means we have a struct with
7567  // individual members mapped. Emit an extra combined entry.
7568  if (PartialStruct.Base.isValid())
7569  emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
7570  PartialStruct);
7571 
7572  // We need to append the results of this capture to what we already have.
7573  BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
7574  Pointers.append(CurPointers.begin(), CurPointers.end());
7575  Sizes.append(CurSizes.begin(), CurSizes.end());
7576  Types.append(CurTypes.begin(), CurTypes.end());
7577  }
7578  }
7579 
7580  /// Emit capture info for lambdas for variables captured by reference.
7581  void generateInfoForLambdaCaptures(
7582  const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers,
7583  MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7584  MapFlagsArrayTy &Types,
7585  llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
7586  const auto *RD = VD->getType()
7587  .getCanonicalType()
7589  ->getAsCXXRecordDecl();
7590  if (!RD || !RD->isLambda())
7591  return;
7592  Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
7593  LValue VDLVal = CGF.MakeAddrLValue(
7594  VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
7595  llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
7596  FieldDecl *ThisCapture = nullptr;
7597  RD->getCaptureFields(Captures, ThisCapture);
7598  if (ThisCapture) {
7599  LValue ThisLVal =
7600  CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
7601  LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
7602  LambdaPointers.try_emplace(ThisLVal.getPointer(), VDLVal.getPointer());
7603  BasePointers.push_back(ThisLVal.getPointer());
7604  Pointers.push_back(ThisLValVal.getPointer());
7605  Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy));
7606  Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
7607  OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
7608  }
7609  for (const LambdaCapture &LC : RD->captures()) {
7610  if (LC.getCaptureKind() != LCK_ByRef)
7611  continue;
7612  const VarDecl *VD = LC.getCapturedVar();
7613  auto It = Captures.find(VD);
7614  assert(It != Captures.end() && "Found lambda capture without field.");
7615  LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
7616  LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
7617  LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer());
7618  BasePointers.push_back(VarLVal.getPointer());
7619  Pointers.push_back(VarLValVal.getPointer());
7620  Sizes.push_back(CGF.getTypeSize(
7622  Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
7623  OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
7624  }
7625  }
7626 
7627  /// Set correct indices for lambdas captures.
7628  void adjustMemberOfForLambdaCaptures(
7629  const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
7630  MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
7631  MapFlagsArrayTy &Types) const {
7632  for (unsigned I = 0, E = Types.size(); I < E; ++I) {
7633  // Set correct member_of idx for all implicit lambda captures.
7634  if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
7635  OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
7636  continue;
7637  llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
7638  assert(BasePtr && "Unable to find base lambda address.");
7639  int TgtIdx = -1;
7640  for (unsigned J = I; J > 0; --J) {
7641  unsigned Idx = J - 1;
7642  if (Pointers[Idx] != BasePtr)
7643  continue;
7644  TgtIdx = Idx;
7645  break;
7646  }
7647  assert(TgtIdx != -1 && "Unable to find parent lambda.");
7648  // All other current entries will be MEMBER_OF the combined entry
7649  // (except for PTR_AND_OBJ entries which do not have a placeholder value
7650  // 0xFFFF in the MEMBER_OF field).
7651  OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
7652  setCorrectMemberOfFlag(Types[I], MemberOfFlag);
7653  }
7654  }
7655 
7656  /// Generate the base pointers, section pointers, sizes and map types
7657  /// associated to a given capture.
7658  void generateInfoForCapture(const CapturedStmt::Capture *Cap,
7659  llvm::Value *Arg,
7660  MapBaseValuesArrayTy &BasePointers,
7661  MapValuesArrayTy &Pointers,
7662  MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
7663  StructRangeInfoTy &PartialStruct) const {
7664  assert(!Cap->capturesVariableArrayType() &&
7665  "Not expecting to generate map info for a variable array type!");
7666 
7667  // We need to know when we generating information for the first component
7668  const ValueDecl *VD = Cap->capturesThis()
7669  ? nullptr
7670  : Cap->getCapturedVar()->getCanonicalDecl();
7671 
7672  // If this declaration appears in a is_device_ptr clause we just have to
7673  // pass the pointer by value. If it is a reference to a declaration, we just
7674  // pass its value.
7675  if (DevPointersMap.count(VD)) {
7676  BasePointers.emplace_back(Arg, VD);
7677  Pointers.push_back(Arg);
7678  Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy));
7679  Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
7680  return;
7681  }
7682 
7683  using MapData =
7685  OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>;
7686  SmallVector<MapData, 4> DeclComponentLists;
7687  // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
7688  for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) {
7689  for (const auto &L : C->decl_component_lists(VD)) {
7690  assert(L.first == VD &&
7691  "We got information for the wrong declaration??");
7692  assert(!L.second.empty() &&
7693  "Not expecting declaration with no component lists.");
7694  DeclComponentLists.emplace_back(L.second, C->getMapType(),
7695  C->getMapTypeModifiers(),
7696  C->isImplicit());
7697  }
7698  }
7699 
7700  // Find overlapping elements (including the offset from the base element).
7701  llvm::SmallDenseMap<
7702  const MapData *,
7705  4>
7706  OverlappedData;
7707  size_t Count = 0;
7708  for (const MapData &L : DeclComponentLists) {
7709  OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7710  OpenMPMapClauseKind MapType;
7711  ArrayRef<OpenMPMapModifierKind> MapModifiers;
7712  bool IsImplicit;
7713  std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
7714  ++Count;
7715  for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
7716  OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
7717  std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1;
7718  auto CI = Components.rbegin();
7719  auto CE = Components.rend();
7720  auto SI = Components1.rbegin();
7721  auto SE = Components1.rend();
7722  for (; CI != CE && SI != SE; ++CI, ++SI) {
7723  if (CI->getAssociatedExpression()->getStmtClass() !=
7724  SI->getAssociatedExpression()->getStmtClass())
7725  break;
7726  // Are we dealing with different variables/fields?
7727  if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
7728  break;
7729  }
7730  // Found overlapping if, at least for one component, reached the head of
7731  // the components list.
7732  if (CI == CE || SI == SE) {
7733  assert((CI != CE || SI != SE) &&
7734  "Unexpected full match of the mapping components.");
7735  const MapData &BaseData = CI == CE ? L : L1;
7736  OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
7737  SI == SE ? Components : Components1;
7738  auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
7739  OverlappedElements.getSecond().push_back(SubData);
7740  }
7741  }
7742  }
7743  // Sort the overlapped elements for each item.
7745  if (!OverlappedData.empty()) {
7746  if (const auto *CRD =
7747  VD->getType().getCanonicalType()->getAsCXXRecordDecl())
7748  getPlainLayout(CRD, Layout, /*AsBase=*/false);
7749  else {
7750  const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
7751  Layout.append(RD->field_begin(), RD->field_end());
7752  }
7753  }
7754  for (auto &Pair : OverlappedData) {
7755  llvm::sort(
7756  Pair.getSecond(),
7757  [&Layout](
7758  OMPClauseMappableExprCommon::MappableExprComponentListRef First,
7759  OMPClauseMappableExprCommon::MappableExprComponentListRef
7760  Second) {
7761  auto CI = First.rbegin();
7762  auto CE = First.rend();
7763  auto SI = Second.rbegin();
7764  auto SE = Second.rend();
7765  for (; CI != CE && SI != SE; ++CI, ++SI) {
7766  if (CI->getAssociatedExpression()->getStmtClass() !=
7767  SI->getAssociatedExpression()->getStmtClass())
7768  break;
7769  // Are we dealing with different variables/fields?
7770  if (CI->getAssociatedDeclaration() !=
7771  SI->getAssociatedDeclaration())
7772  break;
7773  }
7774 
7775  // Lists contain the same elements.
7776  if (CI == CE && SI == SE)
7777  return false;
7778 
7779  // List with less elements is less than list with more elements.
7780  if (CI == CE || SI == SE)
7781  return CI == CE;
7782 
7783  const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
7784  const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
7785  if (FD1->getParent() == FD2->getParent())
7786  return FD1->getFieldIndex() < FD2->getFieldIndex();
7787  const auto It =
7788  llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
7789  return FD == FD1 || FD == FD2;
7790  });
7791  return *It == FD1;
7792  });
7793  }
7794 
7795  // Associated with a capture, because the mapping flags depend on it.
7796  // Go through all of the elements with the overlapped elements.
7797  for (const auto &Pair : OverlappedData) {
7798  const MapData &L = *Pair.getFirst();
7799  OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7800  OpenMPMapClauseKind MapType;
7801  ArrayRef<OpenMPMapModifierKind> MapModifiers;
7802  bool IsImplicit;
7803  std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
7804  ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7805  OverlappedComponents = Pair.getSecond();
7806  bool IsFirstComponentList = true;
7807  generateInfoForComponentList(MapType, MapModifiers, Components,
7808  BasePointers, Pointers, Sizes, Types,
7809  PartialStruct, IsFirstComponentList,
7810  IsImplicit, OverlappedComponents);
7811  }
7812  // Go through other elements without overlapped elements.
7813  bool IsFirstComponentList = OverlappedData.empty();
7814  for (const MapData &L : DeclComponentLists) {
7815  OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7816  OpenMPMapClauseKind MapType;
7817  ArrayRef<OpenMPMapModifierKind> MapModifiers;
7818  bool IsImplicit;
7819  std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
7820  auto It = OverlappedData.find(&L);
7821  if (It == OverlappedData.end())
7822  generateInfoForComponentList(MapType, MapModifiers, Components,
7823  BasePointers, Pointers, Sizes, Types,
7824  PartialStruct, IsFirstComponentList,
7825  IsImplicit);
7826  IsFirstComponentList = false;
7827  }
7828  }
7829 
7830  /// Generate the base pointers, section pointers, sizes and map types
7831  /// associated with the declare target link variables.
7832  void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers,
7833  MapValuesArrayTy &Pointers,
7834  MapValuesArrayTy &Sizes,
7835  MapFlagsArrayTy &Types) const {
7836  // Map other list items in the map clause which are not captured variables
7837  // but "declare target link" global variables.,
7838  for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) {
7839  for (const auto &L : C->component_lists()) {
7840  if (!L.first)
7841  continue;
7842  const auto *VD = dyn_cast<VarDecl>(L.first);
7843  if (!VD)
7844  continue;
7846  OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
7847  if (!Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)
7848  continue;
7849  StructRangeInfoTy PartialStruct;
7850  generateInfoForComponentList(
7851  C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers,
7852  Pointers, Sizes, Types, PartialStruct,
7853  /*IsFirstComponentList=*/true, C->isImplicit());
7854  assert(!PartialStruct.Base.isValid() &&
7855  "No partial structs for declare target link expected.");
7856  }
7857  }
7858  }
7859 
7860  /// Generate the default map information for a given capture \a CI,
7861  /// record field declaration \a RI and captured value \a CV.
7862  void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
7863  const FieldDecl &RI, llvm::Value *CV,
7864  MapBaseValuesArrayTy &CurBasePointers,
7865  MapValuesArrayTy &CurPointers,
7866  MapValuesArrayTy &CurSizes,
7867  MapFlagsArrayTy &CurMapTypes) const {
7868  // Do the default mapping.
7869  if (CI.capturesThis()) {
7870  CurBasePointers.push_back(CV);
7871  CurPointers.push_back(CV);
7872  const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
7873  CurSizes.push_back(CGF.getTypeSize(PtrTy->getPointeeType()));
7874  // Default map type.
7875  CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
7876  } else if (CI.capturesVariableByCopy()) {
7877  CurBasePointers.push_back(CV);
7878  CurPointers.push_back(CV);
7879  if (!RI.getType()->isAnyPointerType()) {
7880  // We have to signal to the runtime captures passed by value that are
7881  // not pointers.
7882  CurMapTypes.push_back(OMP_MAP_LITERAL);
7883  CurSizes.push_back(CGF.getTypeSize(RI.getType()));
7884  } else {
7885  // Pointers are implicitly mapped with a zero size and no flags
7886  // (other than first map that is added for all implicit maps).
7887  CurMapTypes.push_back(OMP_MAP_NONE);
7888  CurSizes.push_back(llvm::Constant::getNullValue(CGF.SizeTy));
7889  }
7890  } else {
7891  assert(CI.capturesVariable() && "Expected captured reference.");
7892  CurBasePointers.push_back(CV);
7893  CurPointers.push_back(CV);
7894 
7895  const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
7896  QualType ElementType = PtrTy->getPointeeType();
7897  CurSizes.push_back(CGF.getTypeSize(ElementType));
7898  // The default map type for a scalar/complex type is 'to' because by
7899  // default the value doesn't have to be retrieved. For an aggregate
7900  // type, the default is 'tofrom'.
7901  CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI));
7902  }
7903  // Every default map produces a single argument which is a target parameter.
7904  CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;
7905 
7906  // Add flag stating this is an implicit map.
7907  CurMapTypes.back() |= OMP_MAP_IMPLICIT;
7908  }
7909 };
7910 
7912  /// Device ID if the device was not defined, runtime should get it
7913  /// from environment variables in the spec.
7914  OMP_DEVICEID_UNDEF = -1,
7915 };
7916 } // anonymous namespace
7917 
7918 /// Emit the arrays used to pass the captures and map information to the
7919 /// offloading runtime library. If there is no map or capture information,
7920 /// return nullptr by reference.
7921 static void
7924  MappableExprsHandler::MapValuesArrayTy &Pointers,
7925  MappableExprsHandler::MapValuesArrayTy &Sizes,
7927  CGOpenMPRuntime::TargetDataInfo &Info) {
7928  CodeGenModule &CGM = CGF.CGM;
7929  ASTContext &Ctx = CGF.getContext();
7930 
7931  // Reset the array information.
7932  Info.clearArrayInfo();
7933  Info.NumberOfPtrs = BasePointers.size();
7934 
7935  if (Info.NumberOfPtrs) {
7936  // Detect if we have any capture size requiring runtime evaluation of the
7937  // size so that a constant array could be eventually used.
7938  bool hasRuntimeEvaluationCaptureSize = false;
7939  for (llvm::Value *S : Sizes)
7940  if (!isa<llvm::Constant>(S)) {
7941  hasRuntimeEvaluationCaptureSize = true;
7942  break;
7943  }
7944 
7945  llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
7946  QualType PointerArrayType =
7947  Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal,
7948  /*IndexTypeQuals=*/0);
7949 
7950  Info.BasePointersArray =
7951  CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
7952  Info.PointersArray =
7953  CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
7954 
7955  // If we don't have any VLA types or other types that require runtime
7956  // evaluation, we can use a constant array for the map sizes, otherwise we
7957  // need to fill up the arrays as we do for the pointers.
7958  if (hasRuntimeEvaluationCaptureSize) {
7959  QualType SizeArrayType = Ctx.getConstantArrayType(
7960  Ctx.getSizeType(), PointerNumAP, ArrayType::Normal,
7961  /*IndexTypeQuals=*/0);
7962  Info.SizesArray =
7963  CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
7964  } else {
7965  // We expect all the sizes to be constant, so we collect them to create
7966  // a constant array.
7968  for (llvm::Value *S : Sizes)
7969  ConstSizes.push_back(cast<llvm::Constant>(S));
7970 
7971  auto *SizesArrayInit = llvm::ConstantArray::get(
7972  llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes);
7973  std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
7974  auto *SizesArrayGbl = new llvm::GlobalVariable(
7975  CGM.getModule(), SizesArrayInit->getType(),
7976  /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
7977  SizesArrayInit, Name);
7978  SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
7979  Info.SizesArray = SizesArrayGbl;
7980  }
7981 
7982  // The map types are always constant so we don't need to generate code to
7983  // fill arrays. Instead, we create an array constant.
7984  SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0);
7985  llvm::copy(MapTypes, Mapping.begin());
7986  llvm::Constant *MapTypesArrayInit =
7987  llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
7988  std::string MaptypesName =
7989  CGM.getOpenMPRuntime().getName({"offload_maptypes"});
7990  auto *MapTypesArrayGbl = new llvm::GlobalVariable(
7991  CGM.getModule(), MapTypesArrayInit->getType(),
7992  /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
7993  MapTypesArrayInit, MaptypesName);
7994  MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
7995  Info.MapTypesArray = MapTypesArrayGbl;
7996 
7997  for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
7998  llvm::Value *BPVal = *BasePointers[I];
8000  llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8001  Info.BasePointersArray, 0, I);
8003  BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
8004  Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8005  CGF.Builder.CreateStore(BPVal, BPAddr);
8006 
8007  if (Info.requiresDevicePointerInfo())
8008  if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl())
8009  Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
8010 
8011  llvm::Value *PVal = Pointers[I];
8013  llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8014  Info.PointersArray, 0, I);
8016  P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
8017  Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8018  CGF.Builder.CreateStore(PVal, PAddr);
8019 
8020  if (hasRuntimeEvaluationCaptureSize) {
8022  llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs),
8023  Info.SizesArray,
8024  /*Idx0=*/0,
8025  /*Idx1=*/I);
8026  Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType()));
8027  CGF.Builder.CreateStore(
8028  CGF.Builder.CreateIntCast(Sizes[I], CGM.SizeTy, /*isSigned=*/true),
8029  SAddr);
8030  }
8031  }
8032  }
8033 }
8034 /// Emit the arguments to be passed to the runtime library based on the
8035 /// arrays of pointers, sizes and map types.
8037  CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
8038  llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
8039  llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
8040  CodeGenModule &CGM = CGF.CGM;
8041  if (Info.NumberOfPtrs) {
8042  BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8043  llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8044  Info.BasePointersArray,
8045  /*Idx0=*/0, /*Idx1=*/0);
8046  PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8047  llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8048  Info.PointersArray,
8049  /*Idx0=*/0,
8050  /*Idx1=*/0);
8051  SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8052  llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), Info.SizesArray,
8053  /*Idx0=*/0, /*Idx1=*/0);
8054  MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8055  llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8056  Info.MapTypesArray,
8057  /*Idx0=*/0,
8058  /*Idx1=*/0);
8059  } else {
8060  BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8061  PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8062  SizesArrayArg = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo());
8063  MapTypesArrayArg =
8064  llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8065  }
8066 }
8067 
8068 /// Checks if the expression is constant or does not have non-trivial function
8069 /// calls.
8070 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
8071  // We can skip constant expressions.
8072  // We can skip expressions with trivial calls or simple expressions.
8074  !E->hasNonTrivialCall(Ctx)) &&
8075  !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
8076 }
8077 
8078 /// Checks if the \p Body is the \a CompoundStmt and returns its child statement
8079 /// iff there is only one that is not evaluatable at the compile time.
8080 static const Stmt *getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body) {
8081  if (const auto *C = dyn_cast<CompoundStmt>(Body)) {
8082  const Stmt *Child = nullptr;
8083  for (const Stmt *S : C->body()) {
8084  if (const auto *E = dyn_cast<Expr>(S)) {
8085  if (isTrivial(Ctx, E))
8086  continue;
8087  }
8088  // Some of the statements can be ignored.
8089  if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
8090  isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
8091  continue;
8092  // Analyze declarations.
8093  if (const auto *DS = dyn_cast<DeclStmt>(S)) {
8094  if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
8095  if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
8096  isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
8097  isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
8098  isa<UsingDirectiveDecl>(D) ||
8099  isa<OMPDeclareReductionDecl>(D) ||
8100  isa<OMPThreadPrivateDecl>(D))
8101  return true;
8102  const auto *VD = dyn_cast<VarDecl>(D);
8103  if (!VD)
8104  return false;
8105  return VD->isConstexpr() ||
8106  ((VD->getType().isTrivialType(Ctx) ||
8107  VD->getType()->isReferenceType()) &&
8108  (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
8109  }))
8110  continue;
8111  }
8112  // Found multiple children - cannot get the one child only.
8113  if (Child)
8114  return Body;
8115  Child = S;
8116  }
8117  if (Child)
8118  return Child;
8119  }
8120  return Body;
8121 }
8122 
8123 /// Check for inner distribute directive.
8124 static const OMPExecutableDirective *
8126  const auto *CS = D.getInnermostCapturedStmt();
8127  const auto *Body =
8128  CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8129  const Stmt *ChildStmt = getSingleCompoundChild(Ctx, Body);
8130 
8131  if (const auto *NestedDir = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
8132  OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8133  switch (D.getDirectiveKind()) {
8134  case OMPD_target:
8135  if (isOpenMPDistributeDirective(DKind))
8136  return NestedDir;
8137  if (DKind == OMPD_teams) {
8138  Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8139  /*IgnoreCaptured=*/true);
8140  if (!Body)
8141  return nullptr;
8142  ChildStmt = getSingleCompoundChild(Ctx, Body);
8143  if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
8144  DKind = NND->getDirectiveKind();
8145  if (isOpenMPDistributeDirective(DKind))
8146  return NND;
8147  }
8148  }
8149  return nullptr;
8150  case OMPD_target_teams:
8151  if (isOpenMPDistributeDirective(DKind))
8152  return NestedDir;
8153  return nullptr;
8154  case OMPD_target_parallel:
8155  case OMPD_target_simd:
8156  case OMPD_target_parallel_for:
8157  case OMPD_target_parallel_for_simd:
8158  return nullptr;
8159  case OMPD_target_teams_distribute:
8160  case OMPD_target_teams_distribute_simd:
8161  case OMPD_target_teams_distribute_parallel_for:
8162  case OMPD_target_teams_distribute_parallel_for_simd:
8163  case OMPD_parallel:
8164  case OMPD_for:
8165  case OMPD_parallel_for:
8166  case OMPD_parallel_sections:
8167  case OMPD_for_simd:
8168  case OMPD_parallel_for_simd:
8169  case OMPD_cancel:
8170  case OMPD_cancellation_point:
8171  case OMPD_ordered:
8172  case OMPD_threadprivate:
8173  case OMPD_task:
8174  case OMPD_simd:
8175  case OMPD_sections:
8176  case OMPD_section:
8177  case OMPD_single:
8178  case OMPD_master:
8179  case OMPD_critical:
8180  case OMPD_taskyield:
8181  case OMPD_barrier:
8182  case OMPD_taskwait:
8183  case OMPD_taskgroup:
8184  case OMPD_atomic:
8185  case OMPD_flush:
8186  case OMPD_teams:
8187  case OMPD_target_data:
8188  case OMPD_target_exit_data:
8189  case OMPD_target_enter_data:
8190  case OMPD_distribute:
8191  case OMPD_distribute_simd:
8192  case OMPD_distribute_parallel_for:
8193  case OMPD_distribute_parallel_for_simd:
8194  case OMPD_teams_distribute:
8195  case OMPD_teams_distribute_simd:
8196  case OMPD_teams_distribute_parallel_for:
8197  case OMPD_teams_distribute_parallel_for_simd:
8198  case OMPD_target_update:
8199  case OMPD_declare_simd:
8200  case OMPD_declare_target:
8201  case OMPD_end_declare_target:
8202  case OMPD_declare_reduction:
8203  case OMPD_taskloop:
8204  case OMPD_taskloop_simd:
8205  case OMPD_requires:
8206  case OMPD_unknown:
8207  llvm_unreachable("Unexpected directive.");
8208  }
8209  }
8210 
8211  return nullptr;
8212 }
8213 
8215  CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *Device,
8216  const llvm::function_ref<llvm::Value *(
8217  CodeGenFunction &CGF, const OMPLoopDirective &D)> &SizeEmitter) {
8219  const OMPExecutableDirective *TD = &D;
8220  // Get nested teams distribute kind directive, if any.
8223  if (!TD)
8224  return;
8225  const auto *LD = cast<OMPLoopDirective>(TD);
8226  auto &&CodeGen = [LD, &Device, &SizeEmitter, this](CodeGenFunction &CGF,
8227  PrePostActionTy &) {
8228  llvm::Value *NumIterations = SizeEmitter(CGF, *LD);
8229 
8230  // Emit device ID if any.
8231  llvm::Value *DeviceID;
8232  if (Device)
8233  DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
8234  CGF.Int64Ty, /*isSigned=*/true);
8235  else
8236  DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
8237 
8238  llvm::Value *Args[] = {DeviceID, NumIterations};
8239  CGF.EmitRuntimeCall(
8241  };
8242  emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
8243 }
8244 
8246  const OMPExecutableDirective &D,
8247  llvm::Value *OutlinedFn,
8248  llvm::Value *OutlinedFnID,
8249  const Expr *IfCond, const Expr *Device) {
8250  if (!CGF.HaveInsertPoint())
8251  return;
8252 
8253  assert(OutlinedFn && "Invalid outlined function!");
8254 
8255  const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>();
8257  const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
8258  auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
8259  PrePostActionTy &) {
8260  CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
8261  };
8262  emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
8263 
8265  llvm::Value *MapTypesArray = nullptr;
8266  // Fill up the pointer arrays and transfer execution to the device.
8267  auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
8268  &MapTypesArray, &CS, RequiresOuterTask,
8269  &CapturedVars](CodeGenFunction &CGF, PrePostActionTy &) {
8270  // On top of the arrays that were filled up, the target offloading call
8271  // takes as arguments the device id as well as the host pointer. The host
8272  // pointer is used by the runtime library to identify the current target
8273  // region, so it only has to be unique and not necessarily point to
8274  // anything. It could be the pointer to the outlined function that
8275  // implements the target region, but we aren't using that so that the
8276  // compiler doesn't need to keep that, and could therefore inline the host
8277  // function if proven worthwhile during optimization.
8278 
8279  // From this point on, we need to have an ID of the target region defined.
8280  assert(OutlinedFnID && "Invalid outlined function ID!");
8281 
8282  // Emit device ID if any.
8283  llvm::Value *DeviceID;
8284  if (Device) {
8285  DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
8286  CGF.Int64Ty, /*isSigned=*/true);
8287  } else {
8288  DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
8289  }
8290 
8291  // Emit the number of elements in the offloading arrays.
8292  llvm::Value *PointerNum =
8293  CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
8294 
8295  // Return value of the runtime offloading call.
8296  llvm::Value *Return;
8297 
8298  llvm::Value *NumTeams = emitNumTeamsForTargetDirective(*this, CGF, D);
8299  llvm::Value *NumThreads = emitNumThreadsForTargetDirective(*this, CGF, D);
8300 
8301  bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
8302  // The target region is an outlined function launched by the runtime
8303  // via calls __tgt_target() or __tgt_target_teams().
8304  //
8305  // __tgt_target() launches a target region with one team and one thread,
8306  // executing a serial region. This master thread may in turn launch
8307  // more threads within its team upon encountering a parallel region,
8308  // however, no additional teams can be launched on the device.
8309  //
8310  // __tgt_target_teams() launches a target region with one or more teams,
8311  // each with one or more threads. This call is required for target
8312  // constructs such as:
8313  // 'target teams'
8314  // 'target' / 'teams'
8315  // 'target teams distribute parallel for'
8316  // 'target parallel'
8317  // and so on.
8318  //
8319  // Note that on the host and CPU targets, the runtime implementation of
8320  // these calls simply call the outlined function without forking threads.
8321  // The outlined functions themselves have runtime calls to
8322  // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
8323  // the compiler in emitTeamsCall() and emitParallelCall().
8324  //
8325  // In contrast, on the NVPTX target, the implementation of
8326  // __tgt_target_teams() launches a GPU kernel with the requested number
8327  // of teams and threads so no additional calls to the runtime are required.
8328  if (NumTeams) {
8329  // If we have NumTeams defined this means that we have an enclosed teams
8330  // region. Therefore we also expect to have NumThreads defined. These two
8331  // values should be defined in the presence of a teams directive,
8332  // regardless of having any clauses associated. If the user is using teams
8333  // but no clauses, these two values will be the default that should be
8334  // passed to the runtime library - a 32-bit integer with the value zero.
8335  assert(NumThreads && "Thread limit expression should be available along "
8336  "with number of teams.");
8337  llvm::Value *OffloadingArgs[] = {DeviceID,
8338  OutlinedFnID,
8339  PointerNum,
8340  InputInfo.BasePointersArray.getPointer(),
8341  InputInfo.PointersArray.getPointer(),
8342  InputInfo.SizesArray.getPointer(),
8343  MapTypesArray,
8344  NumTeams,
8345  NumThreads};
8346  Return = CGF.EmitRuntimeCall(
8349  OffloadingArgs);
8350  } else {
8351  llvm::Value *OffloadingArgs[] = {DeviceID,
8352  OutlinedFnID,
8353  PointerNum,
8354  InputInfo.BasePointersArray.getPointer(),
8355  InputInfo.PointersArray.getPointer(),
8356  InputInfo.SizesArray.getPointer(),
8357  MapTypesArray};
8358  Return = CGF.EmitRuntimeCall(
8360  : OMPRTL__tgt_target),
8361  OffloadingArgs);
8362  }
8363 
8364  // Check the error code and execute the host version if required.
8365  llvm::BasicBlock *OffloadFailedBlock =
8366  CGF.createBasicBlock("omp_offload.failed");
8367  llvm::BasicBlock *OffloadContBlock =
8368  CGF.createBasicBlock("omp_offload.cont");
8369  llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
8370  CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
8371 
8372  CGF.EmitBlock(OffloadFailedBlock);
8373  if (RequiresOuterTask) {
8374  CapturedVars.clear();
8375  CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
8376  }
8377  emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
8378  CGF.EmitBranch(OffloadContBlock);
8379 
8380  CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
8381  };
8382 
8383  // Notify that the host version must be executed.
8384  auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
8385  RequiresOuterTask](CodeGenFunction &CGF,
8386  PrePostActionTy &) {
8387  if (RequiresOuterTask) {
8388  CapturedVars.clear();
8389  CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
8390  }
8391  emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
8392  };
8393 
8394  auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
8395  &CapturedVars, RequiresOuterTask,
8396  &CS](CodeGenFunction &CGF, PrePostActionTy &) {
8397  // Fill up the arrays with all the captured variables.
8399  MappableExprsHandler::MapValuesArrayTy Pointers;
8400  MappableExprsHandler::MapValuesArrayTy Sizes;
8402 
8403  // Get mappable expression information.
8404  MappableExprsHandler MEHandler(D, CGF);
8405  llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
8406 
8407  auto RI = CS.getCapturedRecordDecl()->field_begin();
8408  auto CV = CapturedVars.begin();
8409  for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
8410  CE = CS.capture_end();
8411  CI != CE; ++CI, ++RI, ++CV) {
8413  MappableExprsHandler::MapValuesArrayTy CurPointers;
8414  MappableExprsHandler::MapValuesArrayTy CurSizes;
8416  MappableExprsHandler::StructRangeInfoTy PartialStruct;
8417 
8418  // VLA sizes are passed to the outlined region by copy and do not have map
8419  // information associated.
8420  if (CI->capturesVariableArrayType()) {
8421  CurBasePointers.push_back(*CV);
8422  CurPointers.push_back(*CV);
8423  CurSizes.push_back(CGF.getTypeSize(RI->getType()));
8424  // Copy to the device as an argument. No need to retrieve it.
8425  CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
8426  MappableExprsHandler::OMP_MAP_TARGET_PARAM);
8427  } else {
8428  // If we have any information in the map clause, we use it, otherwise we
8429  // just do a default mapping.
8430  MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
8431  CurSizes, CurMapTypes, PartialStruct);
8432  if (CurBasePointers.empty())
8433  MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
8434  CurPointers, CurSizes, CurMapTypes);
8435  // Generate correct mapping for variables captured by reference in
8436  // lambdas.
8437  if (CI->capturesVariable())
8438  MEHandler.generateInfoForLambdaCaptures(
8439  CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes,
8440  CurMapTypes, LambdaPointers);
8441  }
8442  // We expect to have at least an element of information for this capture.
8443  assert(!CurBasePointers.empty() &&
8444  "Non-existing map pointer for capture!");
8445  assert(CurBasePointers.size() == CurPointers.size() &&
8446  CurBasePointers.size() == CurSizes.size() &&
8447  CurBasePointers.size() == CurMapTypes.size() &&
8448  "Inconsistent map information sizes!");
8449 
8450  // If there is an entry in PartialStruct it means we have a struct with
8451  // individual members mapped. Emit an extra combined entry.
8452  if (PartialStruct.Base.isValid())
8453  MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes,
8454  CurMapTypes, PartialStruct);
8455 
8456  // We need to append the results of this capture to what we already have.
8457  BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8458  Pointers.append(CurPointers.begin(), CurPointers.end());
8459  Sizes.append(CurSizes.begin(), CurSizes.end());
8460  MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
8461  }
8462  // Adjust MEMBER_OF flags for the lambdas captures.
8463  MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers,
8464  Pointers, MapTypes);
8465  // Map other list items in the map clause which are not captured variables
8466  // but "declare target link" global variables.
8467  MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes,
8468  MapTypes);
8469 
8470  TargetDataInfo Info;
8471  // Fill up the arrays and create the arguments.
8472  emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
8474  Info.PointersArray, Info.SizesArray,
8475  Info.MapTypesArray, Info);
8476  InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
8477  InputInfo.BasePointersArray =
8479  InputInfo.PointersArray =
8480  Address(Info.PointersArray, CGM.getPointerAlign());
8481  InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
8482  MapTypesArray = Info.MapTypesArray;
8483  if (RequiresOuterTask)
8484  CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
8485  else
8486  emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
8487  };
8488 
8489  auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
8490  CodeGenFunction &CGF, PrePostActionTy &) {
8491  if (RequiresOuterTask) {
8493  CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
8494  } else {
8495  emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
8496  }
8497  };
8498 
8499  // If we have a target function ID it means that we need to support
8500  // offloading, otherwise, just execute on the host. We need to execute on host
8501  // regardless of the conditional in the if clause if, e.g., the user do not
8502  // specify target triples.
8503  if (OutlinedFnID) {
8504  if (IfCond) {
8505  emitOMPIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
8506  } else {
8507  RegionCodeGenTy ThenRCG(TargetThenGen);
8508  ThenRCG(CGF);
8509  }
8510  } else {
8511  RegionCodeGenTy ElseRCG(TargetElseGen);
8512  ElseRCG(CGF);
8513  }
8514 }
8515 
8517  StringRef ParentName) {
8518  if (!S)
8519  return;
8520 
8521  // Codegen OMP target directives that offload compute to the device.
8522  bool RequiresDeviceCodegen =
8523  isa<OMPExecutableDirective>(S) &&
8525  cast<OMPExecutableDirective>(S)->getDirectiveKind());
8526 
8527  if (RequiresDeviceCodegen) {
8528  const auto &E = *cast<OMPExecutableDirective>(S);
8529  unsigned DeviceID;
8530  unsigned FileID;
8531  unsigned Line;
8532  getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
8533  FileID, Line);
8534 
8535  // Is this a target region that should not be emitted as an entry point? If
8536  // so just signal we are done with this target region.
8538  ParentName, Line))
8539  return;
8540 
8541  switch (E.getDirectiveKind()) {
8542  case OMPD_target:
8544  cast<OMPTargetDirective>(E));
8545  break;
8546  case OMPD_target_parallel:
8548  CGM, ParentName, cast<OMPTargetParallelDirective>(E));
8549  break;
8550  case OMPD_target_teams:
8552  CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
8553  break;
8554  case OMPD_target_teams_distribute:
8556  CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
8557  break;
8558  case OMPD_target_teams_distribute_simd:
8560  CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
8561  break;
8562  case OMPD_target_parallel_for:
8564  CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
8565  break;
8566  case OMPD_target_parallel_for_simd:
8568  CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
8569  break;
8570  case OMPD_target_simd:
8572  CGM, ParentName, cast<OMPTargetSimdDirective>(E));
8573  break;
8574  case OMPD_target_teams_distribute_parallel_for:
8576  CGM, ParentName,
8577  cast<OMPTargetTeamsDistributeParallelForDirective>(E));
8578  break;
8579  case OMPD_target_teams_distribute_parallel_for_simd:
8582  CGM, ParentName,
8583  cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
8584  break;
8585  case OMPD_parallel:
8586  case OMPD_for:
8587  case OMPD_parallel_for:
8588  case OMPD_parallel_sections:
8589  case OMPD_for_simd:
8590  case OMPD_parallel_for_simd:
8591  case OMPD_cancel:
8592  case OMPD_cancellation_point:
8593  case OMPD_ordered:
8594  case OMPD_threadprivate:
8595  case OMPD_task:
8596  case OMPD_simd:
8597  case OMPD_sections:
8598  case OMPD_section:
8599  case OMPD_single:
8600  case OMPD_master:
8601  case OMPD_critical:
8602  case OMPD_taskyield:
8603  case OMPD_barrier:
8604  case OMPD_taskwait:
8605  case OMPD_taskgroup:
8606  case OMPD_atomic:
8607  case OMPD_flush:
8608  case OMPD_teams:
8609  case OMPD_target_data:
8610  case OMPD_target_exit_data:
8611  case OMPD_target_enter_data:
8612  case OMPD_distribute:
8613  case OMPD_distribute_simd:
8614  case OMPD_distribute_parallel_for:
8615  case OMPD_distribute_parallel_for_simd:
8616  case OMPD_teams_distribute:
8617  case OMPD_teams_distribute_simd:
8618  case OMPD_teams_distribute_parallel_for:
8619  case OMPD_teams_distribute_parallel_for_simd:
8620  case OMPD_target_update:
8621  case OMPD_declare_simd:
8622  case OMPD_declare_target:
8623  case OMPD_end_declare_target:
8624  case OMPD_declare_reduction:
8625  case OMPD_taskloop:
8626  case OMPD_taskloop_simd:
8627  case OMPD_requires:
8628  case OMPD_unknown:
8629  llvm_unreachable("Unknown target directive for OpenMP device codegen.");
8630  }
8631  return;
8632  }
8633 
8634  if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
8635  if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
8636  return;
8637 
8639  E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName);
8640  return;
8641  }
8642 
8643  // If this is a lambda function, look into its body.
8644  if (const auto *L = dyn_cast<LambdaExpr>(S))
8645  S = L->getBody();
8646 
8647  // Keep looking for target regions recursively.
8648  for (const Stmt *II : S->children())
8649  scanForTargetRegionsFunctions(II, ParentName);
8650 }
8651 
8653  // If emitting code for the host, we do not process FD here. Instead we do
8654  // the normal code generation.
8655  if (!CGM.getLangOpts().OpenMPIsDevice)
8656  return false;
8657 
8658  const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
8659  StringRef Name = CGM.getMangledName(GD);
8660  // Try to detect target regions in the function.
8661  if (const auto *FD = dyn_cast<FunctionDecl>(VD))
8662  scanForTargetRegionsFunctions(FD->getBody(), Name);
8663 
8664  // Do not to emit function if it is not marked as declare target.
8665  return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
8666  AlreadyEmittedTargetFunctions.count(Name) == 0;
8667 }
8668 
8670  if (!CGM.getLangOpts().OpenMPIsDevice)
8671  return false;
8672 
8673  // Check if there are Ctors/Dtors in this declaration and look for target
8674  // regions in it. We use the complete variant to produce the kernel name
8675  // mangling.
8676  QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
8677  if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
8678  for (const CXXConstructorDecl *Ctor : RD->ctors()) {
8679  StringRef ParentName =
8681  scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
8682  }
8683  if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
8684  StringRef ParentName =
8686  scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
8687  }
8688  }
8689 
8690  // Do not to emit variable if it is not marked as declare target.
8692  OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
8693  cast<VarDecl>(GD.getDecl()));
8694  if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link) {
8695  DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
8696  return true;
8697  }
8698  return false;
8699 }
8700 
8702  llvm::Constant *Addr) {
8704  OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
8705  if (!Res) {
8706  if (CGM.getLangOpts().OpenMPIsDevice) {
8707  // Register non-target variables being emitted in device code (debug info
8708  // may cause this).
8709  StringRef VarName = CGM.getMangledName(VD);
8710  EmittedNonTargetVariables.try_emplace(VarName, Addr);
8711  }
8712  return;
8713  }
8714  // Register declare target variables.
8716  StringRef VarName;
8717  CharUnits VarSize;
8718  llvm::GlobalValue::LinkageTypes Linkage;
8719  switch (*Res) {
8720  case OMPDeclareTargetDeclAttr::MT_To:
8722  VarName = CGM.getMangledName(VD);
8724  VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
8725  assert(!VarSize.isZero() && "Expected non-zero size of the variable");
8726  } else {
8727  VarSize = CharUnits::Zero();
8728  }
8729  Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
8730  // Temp solution to prevent optimizations of the internal variables.
8731  if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
8732  std::string RefName = getName({VarName, "ref"});
8733  if (!CGM.GetGlobalValue(RefName)) {
8734  llvm::Constant *AddrRef =
8735  getOrCreateInternalVariable(Addr->getType(), RefName);
8736  auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
8737  GVAddrRef->setConstant(/*Val=*/true);
8738  GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
8739  GVAddrRef->setInitializer(Addr);
8740  CGM.addCompilerUsedGlobal(GVAddrRef);
8741  }
8742  }
8743  break;
8744  case OMPDeclareTargetDeclAttr::MT_Link:
8746  if (CGM.getLangOpts().OpenMPIsDevice) {
8747  VarName = Addr->getName();
8748  Addr = nullptr;
8749  } else {
8750  VarName = getAddrOfDeclareTargetLink(VD).getName();
8751  Addr = cast<llvm::Constant>(getAddrOfDeclareTargetLink(VD).getPointer());
8752  }
8753  VarSize = CGM.getPointerSize();
8754  Linkage = llvm::GlobalValue::WeakAnyLinkage;
8755  break;
8756  }
8758  VarName, Addr, VarSize, Flags, Linkage);
8759 }
8760 
8762  if (isa<FunctionDecl>(GD.getDecl()) ||
8763  isa<OMPDeclareReductionDecl>(GD.getDecl()))
8764  return emitTargetFunctions(GD);
8765 
8766  return emitTargetGlobalVariable(GD);
8767 }
8768 
8770  for (const VarDecl *VD : DeferredGlobalVariables) {
8772  OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
8773  if (!Res)
8774  continue;
8775  if (*Res == OMPDeclareTargetDeclAttr::MT_To) {
8776  CGM.EmitGlobal(VD);
8777  } else {
8778  assert(*Res == OMPDeclareTargetDeclAttr::MT_Link &&
8779  "Expected to or link clauses.");
8780  (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD);
8781  }
8782  }
8783 }
8784 
8786  CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
8788  " Expected target-based directive.");
8789 }
8790 
8792  CodeGenModule &CGM)
8793  : CGM(CGM) {
8794  if (CGM.getLangOpts().OpenMPIsDevice) {
8795  SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
8796  CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
8797  }
8798 }
8799 
8801  if (CGM.getLangOpts().OpenMPIsDevice)
8802  CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
8803 }
8804 
8806  if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
8807  return true;
8808 
8809  StringRef Name = CGM.getMangledName(GD);
8810  const auto *D = cast<FunctionDecl>(GD.getDecl());
8811  // Do not to emit function if it is marked as declare target as it was already
8812  // emitted.
8813  if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
8814  if (D->hasBody() && AlreadyEmittedTargetFunctions.count(Name) == 0) {
8815  if (auto *F = dyn_cast_or_null<llvm::Function>(CGM.GetGlobalValue(Name)))
8816  return !F->isDeclaration();
8817  return false;
8818  }
8819  return true;
8820  }
8821 
8822  return !AlreadyEmittedTargetFunctions.insert(Name).second;
8823 }
8824 
8826  // If we have offloading in the current module, we need to emit the entries
8827  // now and register the offloading descriptor.
8829 
8830  // Create and register the offloading binary descriptors. This is the main
8831  // entity that captures all the information about offloading in the current
8832  // compilation unit.
8834 }
8835 
8837  const OMPExecutableDirective &D,
8838  SourceLocation Loc,
8839  llvm::Value *OutlinedFn,
8840  ArrayRef<llvm::Value *> CapturedVars) {
8841  if (!CGF.HaveInsertPoint())
8842  return;
8843 
8844  llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
8846 
8847  // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
8848  llvm::Value *Args[] = {
8849  RTLoc,
8850  CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
8851  CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
8853  RealArgs.append(std::begin(Args), std::end(Args));
8854  RealArgs.append(CapturedVars.begin(), CapturedVars.end());
8855 
8857  CGF.EmitRuntimeCall(RTLFn, RealArgs);
8858 }
8859 
8861  const Expr *NumTeams,
8862  const Expr *ThreadLimit,
8863  SourceLocation Loc) {
8864  if (!CGF.HaveInsertPoint())
8865  return;
8866 
8867  llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
8868 
8869  llvm::Value *NumTeamsVal =
8870  NumTeams
8871  ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
8872  CGF.CGM.Int32Ty, /* isSigned = */ true)
8873  : CGF.Builder.getInt32(0);
8874 
8875  llvm::Value *ThreadLimitVal =
8876  ThreadLimit
8877  ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
8878  CGF.CGM.Int32Ty, /* isSigned = */ true)
8879  : CGF.Builder.getInt32(0);
8880 
8881  // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
8882  llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
8883  ThreadLimitVal};
8885  PushNumTeamsArgs);
8886 }
8887 
8889  CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
8890  const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
8891  if (!CGF.HaveInsertPoint())
8892  return;
8893 
8894  // Action used to replace the default codegen action and turn privatization
8895  // off.
8896  PrePostActionTy NoPrivAction;
8897 
8898  // Generate the code for the opening of the data environment. Capture all the
8899  // arguments of the runtime call by reference because they are used in the
8900  // closing of the region.
8901  auto &&BeginThenGen = [this, &D, Device, &Info,
8902  &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
8903  // Fill up the arrays with all the mapped variables.
8905  MappableExprsHandler::MapValuesArrayTy Pointers;
8906  MappableExprsHandler::MapValuesArrayTy Sizes;
8908 
8909  // Get map clause information.
8910  MappableExprsHandler MCHandler(D, CGF);
8911  MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
8912 
8913  // Fill up the arrays and create the arguments.
8914  emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
8915 
8916  llvm::Value *BasePointersArrayArg = nullptr;
8917  llvm::Value *PointersArrayArg = nullptr;
8918  llvm::Value *SizesArrayArg = nullptr;
8919  llvm::Value *MapTypesArrayArg = nullptr;
8920  emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
8921  SizesArrayArg, MapTypesArrayArg, Info);
8922 
8923  // Emit device ID if any.
8924  llvm::Value *DeviceID = nullptr;
8925  if (Device) {
8926  DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
8927  CGF.Int64Ty, /*isSigned=*/true);
8928  } else {
8929  DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
8930  }
8931 
8932  // Emit the number of elements in the offloading arrays.
8933  llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
8934 
8935  llvm::Value *OffloadingArgs[] = {
8936  DeviceID, PointerNum, BasePointersArrayArg,
8937  PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
8939  OffloadingArgs);
8940 
8941  // If device pointer privatization is required, emit the body of the region
8942  // here. It will have to be duplicated: with and without privatization.
8943  if (!Info.CaptureDeviceAddrMap.empty())
8944  CodeGen(CGF);
8945  };
8946 
8947  // Generate code for the closing of the data region.
8948  auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
8949  PrePostActionTy &) {
8950  assert(Info.isValid() && "Invalid data environment closing arguments.");
8951 
8952  llvm::Value *BasePointersArrayArg = nullptr;
8953  llvm::Value *PointersArrayArg = nullptr;
8954  llvm::Value *SizesArrayArg = nullptr;
8955  llvm::Value *MapTypesArrayArg = nullptr;
8956  emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
8957  SizesArrayArg, MapTypesArrayArg, Info);
8958 
8959  // Emit device ID if any.
8960  llvm::Value *DeviceID = nullptr;
8961  if (Device) {
8962  DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
8963  CGF.Int64Ty, /*isSigned=*/true);
8964  } else {
8965  DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
8966  }
8967 
8968  // Emit the number of elements in the offloading arrays.
8969  llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
8970 
8971  llvm::Value *OffloadingArgs[] = {
8972  DeviceID, PointerNum, BasePointersArrayArg,
8973  PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
8975  OffloadingArgs);
8976  };
8977 
8978  // If we need device pointer privatization, we need to emit the body of the
8979  // region with no privatization in the 'else' branch of the conditional.
8980  // Otherwise, we don't have to do anything.
8981  auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
8982  PrePostActionTy &) {
8983  if (!Info.CaptureDeviceAddrMap.empty()) {
8984  CodeGen.setAction(NoPrivAction);
8985  CodeGen(CGF);
8986  }
8987  };
8988 
8989  // We don't have to do anything to close the region if the if clause evaluates
8990  // to false.
8991  auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
8992 
8993  if (IfCond) {
8994  emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
8995  } else {
8996  RegionCodeGenTy RCG(BeginThenGen);
8997  RCG(CGF);
8998  }
8999 
9000  // If we don't require privatization of device pointers, we emit the body in
9001  // between the runtime calls. This avoids duplicating the body code.
9002  if (Info.CaptureDeviceAddrMap.empty()) {
9003  CodeGen.setAction(NoPrivAction);
9004  CodeGen(CGF);
9005  }
9006 
9007  if (IfCond) {
9008  emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen);
9009  } else {
9010  RegionCodeGenTy RCG(EndThenGen);
9011  RCG(CGF);
9012  }
9013 }
9014 
9016  CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
9017  const Expr *Device) {
9018  if (!CGF.HaveInsertPoint())
9019  return;
9020 
9021  assert((isa<OMPTargetEnterDataDirective>(D) ||
9022  isa<OMPTargetExitDataDirective>(D) ||
9023  isa<OMPTargetUpdateDirective>(D)) &&
9024  "Expecting either target enter, exit data, or update directives.");
9025 
9027  llvm::Value *MapTypesArray = nullptr;
9028  // Generate the code for the opening of the data environment.
9029  auto &&ThenGen = [this, &D, Device, &InputInfo,
9030  &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
9031  // Emit device ID if any.
9032  llvm::Value *DeviceID = nullptr;
9033  if (Device) {
9034  DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9035  CGF.Int64Ty, /*isSigned=*/true);
9036  } else {
9037  DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9038  }
9039 
9040  // Emit the number of elements in the offloading arrays.
9041  llvm::Constant *PointerNum =
9042  CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
9043 
9044  llvm::Value *OffloadingArgs[] = {DeviceID,
9045  PointerNum,
9046  InputInfo.BasePointersArray.getPointer(),
9047  InputInfo.PointersArray.getPointer(),
9048  InputInfo.SizesArray.getPointer(),
9049  MapTypesArray};
9050 
9051  // Select the right runtime function call for each expected standalone
9052  // directive.
9053  const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
9054  OpenMPRTLFunction RTLFn;
9055  switch (D.getDirectiveKind()) {
9056  case OMPD_target_enter_data:
9057  RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait
9059  break;
9060  case OMPD_target_exit_data:
9061  RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait
9063  break;
9064  case OMPD_target_update:
9065  RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait
9067  break;
9068  case OMPD_parallel:
9069  case OMPD_for:
9070  case OMPD_parallel_for:
9071  case OMPD_parallel_sections:
9072  case OMPD_for_simd:
9073  case OMPD_parallel_for_simd:
9074  case OMPD_cancel:
9075  case OMPD_cancellation_point:
9076  case OMPD_ordered:
9077  case OMPD_threadprivate:
9078  case OMPD_task:
9079  case OMPD_simd:
9080  case OMPD_sections:
9081  case OMPD_section:
9082  case OMPD_single:
9083  case OMPD_master:
9084  case OMPD_critical:
9085  case OMPD_taskyield:
9086  case OMPD_barrier:
9087  case OMPD_taskwait:
9088  case OMPD_taskgroup:
9089  case OMPD_atomic:
9090  case OMPD_flush:
9091  case OMPD_teams:
9092  case OMPD_target_data:
9093  case OMPD_distribute:
9094  case OMPD_distribute_simd:
9095  case OMPD_distribute_parallel_for:
9096  case OMPD_distribute_parallel_for_simd:
9097  case OMPD_teams_distribute:
9098  case OMPD_teams_distribute_simd:
9099  case OMPD_teams_distribute_parallel_for:
9100  case OMPD_teams_distribute_parallel_for_simd:
9101  case OMPD_declare_simd:
9102  case OMPD_declare_target:
9103  case OMPD_end_declare_target:
9104  case OMPD_declare_reduction:
9105  case OMPD_taskloop:
9106  case OMPD_taskloop_simd:
9107  case OMPD_target:
9108  case OMPD_target_simd:
9109  case OMPD_target_teams_distribute:
9110  case OMPD_target_teams_distribute_simd:
9111  case OMPD_target_teams_distribute_parallel_for:
9112  case OMPD_target_teams_distribute_parallel_for_simd:
9113  case OMPD_target_teams:
9114  case OMPD_target_parallel:
9115  case OMPD_target_parallel_for:
9116  case OMPD_target_parallel_for_simd:
9117  case OMPD_requires:
9118  case OMPD_unknown:
9119  llvm_unreachable("Unexpected standalone target data directive.");
9120  break;
9121  }
9122  CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs);
9123  };
9124 
9125  auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
9126  CodeGenFunction &CGF, PrePostActionTy &) {
9127  // Fill up the arrays with all the mapped variables.
9129  MappableExprsHandler::MapValuesArrayTy Pointers;
9130  MappableExprsHandler::MapValuesArrayTy Sizes;
9132 
9133  // Get map clause information.
9134  MappableExprsHandler MEHandler(D, CGF);
9135  MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
9136 
9137  TargetDataInfo Info;
9138  // Fill up the arrays and create the arguments.
9139  emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9141  Info.PointersArray, Info.SizesArray,
9142  Info.MapTypesArray, Info);
9143  InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9144  InputInfo.BasePointersArray =
9146  InputInfo.PointersArray =
9147  Address(Info.PointersArray, CGM.getPointerAlign());
9148  InputInfo.SizesArray =
9149  Address(Info.SizesArray, CGM.getPointerAlign());
9150  MapTypesArray = Info.MapTypesArray;
9151  if (D.hasClausesOfKind<OMPDependClause>())
9152  CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9153  else
9154  emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9155  };
9156 
9157  if (IfCond) {
9158  emitOMPIfClause(CGF, IfCond, TargetThenGen,
9159  [](CodeGenFunction &CGF, PrePostActionTy &) {});
9160  } else {
9161  RegionCodeGenTy ThenRCG(TargetThenGen);
9162  ThenRCG(CGF);
9163  }
9164 }
9165 
9166 namespace {
9167  /// Kind of parameter in a function with 'declare simd' directive.
9168  enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
9169  /// Attribute set of the parameter.
9170  struct ParamAttrTy {
9171  ParamKindTy Kind = Vector;
9172  llvm::APSInt StrideOrArg;
9173  llvm::APSInt Alignment;
9174  };
9175 } // namespace
9176 
9177 static unsigned evaluateCDTSize(const FunctionDecl *FD,
9178  ArrayRef<ParamAttrTy> ParamAttrs) {
9179  // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
9180  // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
9181  // of that clause. The VLEN value must be power of 2.
9182  // In other case the notion of the function`s "characteristic data type" (CDT)
9183  // is used to compute the vector length.
9184  // CDT is defined in the following order:
9185  // a) For non-void function, the CDT is the return type.
9186  // b) If the function has any non-uniform, non-linear parameters, then the
9187  // CDT is the type of the first such parameter.
9188  // c) If the CDT determined by a) or b) above is struct, union, or class
9189  // type which is pass-by-value (except for the type that maps to the
9190  // built-in complex data type), the characteristic data type is int.
9191  // d) If none of the above three cases is applicable, the CDT is int.
9192  // The VLEN is then determined based on the CDT and the size of vector
9193  // register of that ISA for which current vector version is generated. The
9194  // VLEN is computed using the formula below:
9195  // VLEN = sizeof(vector_register) / sizeof(CDT),
9196  // where vector register size specified in section 3.2.1 Registers and the
9197  // Stack Frame of original AMD64 ABI document.
9198  QualType RetType = FD->getReturnType();
9199  if (RetType.isNull())
9200  return 0;
9201  ASTContext &C = FD->getASTContext();
9202  QualType CDT;
9203  if (!RetType.isNull() && !RetType->isVoidType()) {
9204  CDT = RetType;
9205  } else {
9206  unsigned Offset = 0;
9207  if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
9208  if (ParamAttrs[Offset].Kind == Vector)
9209  CDT = C.getPointerType(C.getRecordType(MD->getParent()));
9210  ++Offset;
9211  }
9212  if (CDT.isNull()) {
9213  for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
9214  if (ParamAttrs[I + Offset].Kind == Vector) {
9215  CDT = FD->getParamDecl(I)->getType();
9216  break;
9217  }
9218  }
9219  }
9220  }
9221  if (CDT.isNull())
9222  CDT = C.IntTy;
9223  CDT = CDT->getCanonicalTypeUnqualified();
9224  if (CDT->isRecordType() || CDT->isUnionType())
9225  CDT = C.IntTy;
9226  return C.getTypeSize(CDT);
9227 }
9228 
9229 static void
9230 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
9231  const llvm::APSInt &VLENVal,
9232  ArrayRef<ParamAttrTy> ParamAttrs,
9233  OMPDeclareSimdDeclAttr::BranchStateTy State) {
9234  struct ISADataTy {
9235  char ISA;
9236  unsigned VecRegSize;
9237  };
9238  ISADataTy ISAData[] = {
9239  {
9240  'b', 128
9241  }, // SSE
9242  {
9243  'c', 256
9244  }, // AVX
9245  {
9246  'd', 256
9247  }, // AVX2
9248  {
9249  'e', 512
9250  }, // AVX512
9251  };
9253  switch (State) {
9254  case OMPDeclareSimdDeclAttr::BS_Undefined:
9255  Masked.push_back('N');
9256  Masked.push_back('M');
9257  break;
9258  case OMPDeclareSimdDeclAttr::BS_Notinbranch:
9259  Masked.push_back('N');
9260  break;
9261  case OMPDeclareSimdDeclAttr::BS_Inbranch:
9262  Masked.push_back('M');
9263  break;
9264  }
9265  for (char Mask : Masked) {
9266  for (const ISADataTy &Data : ISAData) {
9267  SmallString<256> Buffer;
9268  llvm::raw_svector_ostream Out(Buffer);
9269  Out << "_ZGV" << Data.ISA << Mask;
9270  if (!VLENVal) {
9271  Out << llvm::APSInt::getUnsigned(Data.VecRegSize /
9272  evaluateCDTSize(FD, ParamAttrs));
9273  } else {
9274  Out << VLENVal;
9275  }
9276  for (const ParamAttrTy &ParamAttr : ParamAttrs) {
9277  switch (ParamAttr.Kind){
9278  case LinearWithVarStride:
9279  Out << 's' << ParamAttr.StrideOrArg;
9280  break;
9281  case Linear:
9282  Out << 'l';
9283  if (!!ParamAttr.StrideOrArg)
9284  Out << ParamAttr.StrideOrArg;
9285  break;
9286  case Uniform:
9287  Out << 'u';
9288  break;
9289  case Vector:
9290  Out << 'v';
9291  break;
9292  }
9293  if (!!ParamAttr.Alignment)
9294  Out << 'a' << ParamAttr.Alignment;
9295  }
9296  Out << '_' << Fn->getName();
9297  Fn->addFnAttr(Out.str());
9298  }
9299  }
9300 }
9301 
9303  llvm::Function *Fn) {
9304  ASTContext &C = CGM.getContext();
9305  FD = FD->getMostRecentDecl();
9306  // Map params to their positions in function decl.
9307  llvm::DenseMap<const Decl *, unsigned> ParamPositions;
9308  if (isa<CXXMethodDecl>(FD))
9309  ParamPositions.try_emplace(FD, 0);
9310  unsigned ParamPos = ParamPositions.size();
9311  for (const ParmVarDecl *P : FD->parameters()) {
9312  ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
9313  ++ParamPos;
9314  }
9315  while (FD) {
9316  for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
9317  llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
9318  // Mark uniform parameters.
9319  for (const Expr *E : Attr->uniforms()) {
9320  E = E->IgnoreParenImpCasts();
9321  unsigned Pos;
9322  if (isa<CXXThisExpr>(E)) {
9323  Pos = ParamPositions[FD];
9324  } else {
9325  const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
9326  ->getCanonicalDecl();
9327  Pos = ParamPositions[PVD];
9328  }
9329  ParamAttrs[Pos].Kind = Uniform;
9330  }
9331  // Get alignment info.
9332  auto NI = Attr->alignments_begin();
9333  for (const Expr *E : Attr->aligneds()) {
9334  E = E->IgnoreParenImpCasts();
9335  unsigned Pos;
9336  QualType ParmTy;
9337  if (isa<CXXThisExpr>(E)) {
9338  Pos = ParamPositions[FD];
9339  ParmTy = E->getType();
9340  } else {
9341  const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
9342  ->getCanonicalDecl();
9343  Pos = ParamPositions[PVD];
9344  ParmTy = PVD->getType();
9345  }
9346  ParamAttrs[Pos].Alignment =
9347  (*NI)
9348  ? (*NI)->EvaluateKnownConstInt(C)
9349  : llvm::APSInt::getUnsigned(
9351  .getQuantity());
9352  ++NI;
9353  }
9354  // Mark linear parameters.
9355  auto SI = Attr->steps_begin();
9356  auto MI = Attr->modifiers_begin();
9357  for (const Expr *E : Attr->linears()) {
9358  E = E->IgnoreParenImpCasts();
9359  unsigned Pos;
9360  if (isa<CXXThisExpr>(E)) {
9361  Pos = ParamPositions[FD];
9362  } else {
9363  const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
9364  ->getCanonicalDecl();
9365  Pos = ParamPositions[PVD];
9366  }
9367  ParamAttrTy &ParamAttr = ParamAttrs[Pos];
9368  ParamAttr.Kind = Linear;
9369  if (*SI) {
9370  Expr::EvalResult Result;
9371  if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
9372  if (const auto *DRE =
9373  cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
9374  if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
9375  ParamAttr.Kind = LinearWithVarStride;
9376  ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
9377  ParamPositions[StridePVD->getCanonicalDecl()]);
9378  }
9379  }
9380  } else {
9381  ParamAttr.StrideOrArg = Result.Val.getInt();
9382  }
9383  }
9384  ++SI;
9385  ++MI;
9386  }
9387  llvm::APSInt VLENVal;
9388  if (const Expr *VLEN = Attr->getSimdlen())
9389  VLENVal = VLEN->EvaluateKnownConstInt(C);
9390  OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
9391  if (CGM.getTriple().getArch() == llvm::Triple::x86 ||
9392  CGM.getTriple().getArch() == llvm::Triple::x86_64)
9393  emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
9394  }
9395  FD = FD->getPreviousDecl();
9396  }
9397 }
9398 
9399 namespace {
9400 /// Cleanup action for doacross support.
9401 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
9402 public:
9403  static const int DoacrossFinArgs = 2;
9404 
9405 private:
9406  llvm::Value *RTLFn;
9407  llvm::Value *Args[DoacrossFinArgs];
9408 
9409 public:
9410  DoacrossCleanupTy(llvm::Value *RTLFn, ArrayRef<llvm::Value *> CallArgs)
9411  : RTLFn(RTLFn) {
9412  assert(CallArgs.size() == DoacrossFinArgs);
9413  std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
9414  }
9415  void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
9416  if (!CGF.HaveInsertPoint())
9417  return;
9418  CGF.EmitRuntimeCall(RTLFn, Args);
9419  }
9420 };
9421 } // namespace
9422 
9424  const OMPLoopDirective &D,
9425  ArrayRef<Expr *> NumIterations) {
9426  if (!CGF.HaveInsertPoint())
9427  return;
9428 
9429  ASTContext &C = CGM.getContext();
9430  QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9431  RecordDecl *RD;
9432  if (KmpDimTy.isNull()) {
9433  // Build struct kmp_dim { // loop bounds info casted to kmp_int64
9434  // kmp_int64 lo; // lower
9435  // kmp_int64 up; // upper
9436  // kmp_int64 st; // stride
9437  // };
9438  RD = C.buildImplicitRecord("kmp_dim");
9439  RD->startDefinition();
9440  addFieldToRecordDecl(C, RD, Int64Ty);
9441  addFieldToRecordDecl(C, RD, Int64Ty);
9442  addFieldToRecordDecl(C, RD, Int64Ty);
9443  RD->completeDefinition();
9444  KmpDimTy = C.getRecordType(RD);
9445  } else {
9446  RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
9447  }
9448  llvm::APInt Size(/*numBits=*/32, NumIterations.size());
9449  QualType ArrayTy =
9451 
9452  Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
9453  CGF.EmitNullInitialization(DimsAddr, ArrayTy);
9454  enum { LowerFD = 0, UpperFD, StrideFD };
9455  // Fill dims with data.
9456  for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
9457  LValue DimsLVal =
9459  DimsAddr, I, C.getTypeSizeInChars(KmpDimTy)),
9460  KmpDimTy);
9461  // dims.upper = num_iterations;
9462  LValue UpperLVal = CGF.EmitLValueForField(
9463  DimsLVal, *std::next(RD->field_begin(), UpperFD));
9464  llvm::Value *NumIterVal =
9465  CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]),
9466  D.getNumIterations()->getType(), Int64Ty,
9467  D.getNumIterations()->getExprLoc());
9468  CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
9469  // dims.stride = 1;
9470  LValue StrideLVal = CGF.EmitLValueForField(
9471  DimsLVal, *std::next(RD->field_begin(), StrideFD));
9472  CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
9473  StrideLVal);
9474  }
9475 
9476  // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
9477  // kmp_int32 num_dims, struct kmp_dim * dims);
9478  llvm::Value *Args[] = {
9479  emitUpdateLocation(CGF, D.getBeginLoc()),
9480  getThreadID(CGF, D.getBeginLoc()),
9481  llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
9483  CGF.Builder
9485  .getPointer(),
9486  CGM.VoidPtrTy)};
9487 
9489  CGF.EmitRuntimeCall(RTLFn, Args);
9490  llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
9491  emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
9493  CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
9494  llvm::makeArrayRef(FiniArgs));
9495 }
9496 
9498  const OMPDependClause *C) {
9499  QualType Int64Ty =
9500  CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9501  llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
9502  QualType ArrayTy = CGM.getContext().getConstantArrayType(
9503  Int64Ty, Size, ArrayType::Normal, 0);
9504  Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
9505  for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
9506  const Expr *CounterVal = C->getLoopData(I);
9507  assert(CounterVal);
9508  llvm::Value *CntVal = CGF.EmitScalarConversion(
9509  CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
9510  CounterVal->getExprLoc());
9511  CGF.EmitStoreOfScalar(
9512  CntVal,
9514  CntAddr, I, CGM.getContext().getTypeSizeInChars(Int64Ty)),
9515  /*Volatile=*/false, Int64Ty);
9516  }
9517  llvm::Value *Args[] = {
9518  emitUpdateLocation(CGF, C->getBeginLoc()),
9519  getThreadID(CGF, C->getBeginLoc()),
9520  CGF.Builder
9521  .CreateConstArrayGEP(CntAddr, 0,
9522  CGM.getContext().getTypeSizeInChars(Int64Ty))
9523  .getPointer()};
9524  llvm::Value *RTLFn;
9525  if (C->getDependencyKind() == OMPC_DEPEND_source) {
9527  } else {
9528  assert(C->getDependencyKind() == OMPC_DEPEND_sink);
9530  }
9531  CGF.EmitRuntimeCall(RTLFn, Args);
9532 }
9533 
9535  llvm::Value *Callee,
9536  ArrayRef<llvm::Value *> Args) const {
9537  assert(Loc.isValid() && "Outlined function call location must be valid.");
9538  auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
9539 
9540  if (auto *Fn = dyn_cast<llvm::Function>(Callee)) {
9541  if (Fn->doesNotThrow()) {
9542  CGF.EmitNounwindRuntimeCall(Fn, Args);
9543  return;
9544  }
9545  }
9546  CGF.EmitRuntimeCall(Callee, Args);
9547 }
9548 
9550  CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn,
9551  ArrayRef<llvm::Value *> Args) const {
9552  emitCall(CGF, Loc, OutlinedFn, Args);
9553 }
9554 
9556  const VarDecl *NativeParam,
9557  const VarDecl *TargetParam) const {
9558  return CGF.GetAddrOfLocalVar(NativeParam);
9559 }
9560 
9562  const VarDecl *VD) {
9563  return Address::invalid();
9564 }
9565 
9567  const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
9568  OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
9569  llvm_unreachable("Not supported in SIMD-only mode");
9570 }
9571 
9573  const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
9574  OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
9575  llvm_unreachable("Not supported in SIMD-only mode");
9576 }
9577 
9579  const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
9580  const VarDecl *PartIDVar, const VarDecl *TaskTVar,
9581  OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
9582  bool Tied, unsigned &NumberOfParts) {
9583  llvm_unreachable("Not supported in SIMD-only mode");
9584 }
9585 
9587  SourceLocation Loc,
9588  llvm::Value *OutlinedFn,
9589  ArrayRef<llvm::Value *> CapturedVars,
9590  const Expr *IfCond) {
9591  llvm_unreachable("Not supported in SIMD-only mode");
9592 }
9593 
9595  CodeGenFunction &CGF, StringRef CriticalName,
9596  const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
9597  const Expr *Hint) {
9598  llvm_unreachable("Not supported in SIMD-only mode");
9599 }
9600 
9602  const RegionCodeGenTy &MasterOpGen,
9603  SourceLocation Loc) {
9604  llvm_unreachable("Not supported in SIMD-only mode");
9605 }
9606 
9608  SourceLocation Loc) {
9609  llvm_unreachable("Not supported in SIMD-only mode");
9610 }
9611 
9613  CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
9614  SourceLocation Loc) {
9615  llvm_unreachable("Not supported in SIMD-only mode");
9616 }
9617 
9619  CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
9620  SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
9621  ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
9622  ArrayRef<const Expr *> AssignmentOps) {
9623  llvm_unreachable("Not supported in SIMD-only mode");
9624 }
9625 
9627  const RegionCodeGenTy &OrderedOpGen,
9628  SourceLocation Loc,
9629  bool IsThreads) {
9630  llvm_unreachable("Not supported in SIMD-only mode");
9631 }
9632 
9634  SourceLocation Loc,
9635  OpenMPDirectiveKind Kind,
9636  bool EmitChecks,
9637  bool ForceSimpleCall) {
9638  llvm_unreachable("Not supported in SIMD-only mode");
9639 }
9640 
9642  CodeGenFunction &CGF, SourceLocation Loc,
9643  const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
9644  bool Ordered, const DispatchRTInput &DispatchValues) {
9645  llvm_unreachable("Not supported in SIMD-only mode");
9646 }
9647 
9650  const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
9651  llvm_unreachable("Not supported in SIMD-only mode");
9652 }
9653 
9655  CodeGenFunction &CGF, SourceLocation Loc,
9656  OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
9657  llvm_unreachable("Not supported in SIMD-only mode");
9658 }
9659 
9661  SourceLocation Loc,
9662  unsigned IVSize,
9663  bool IVSigned) {
9664  llvm_unreachable("Not supported in SIMD-only mode");
9665 }
9666 
9668  SourceLocation Loc,
9669  OpenMPDirectiveKind DKind) {
9670  llvm_unreachable("Not supported in SIMD-only mode");
9671 }
9672 
9674  SourceLocation Loc,
9675  unsigned IVSize, bool IVSigned,
9676  Address IL, Address LB,
9677  Address UB, Address ST) {
9678  llvm_unreachable("Not supported in SIMD-only mode");
9679 }
9680 
9682  llvm::Value *NumThreads,
9683  SourceLocation Loc) {
9684  llvm_unreachable("Not supported in SIMD-only mode");
9685 }
9686 
9688  OpenMPProcBindClauseKind ProcBind,
9689  SourceLocation Loc) {
9690  llvm_unreachable("Not supported in SIMD-only mode");
9691 }
9692 
9694  const VarDecl *VD,
9695  Address VDAddr,
9696  SourceLocation Loc) {
9697  llvm_unreachable("Not supported in SIMD-only mode");
9698 }
9699 
9701  const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
9702  CodeGenFunction *CGF) {
9703  llvm_unreachable("Not supported in SIMD-only mode");
9704 }
9705 
9707  CodeGenFunction &CGF, QualType VarType, StringRef Name) {
9708  llvm_unreachable("Not supported in SIMD-only mode");
9709 }
9710 
9713  SourceLocation Loc) {
9714  llvm_unreachable("Not supported in SIMD-only mode");
9715 }
9716 
9718  const OMPExecutableDirective &D,
9719  llvm::Value *TaskFunction,
9720  QualType SharedsTy, Address Shareds,
9721  const Expr *IfCond,
9722  const OMPTaskDataTy &Data) {
9723  llvm_unreachable("Not supported in SIMD-only mode");
9724 }
9725 
9727  CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
9728  llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds,
9729  const Expr *IfCond, const OMPTaskDataTy &Data) {
9730  llvm_unreachable("Not supported in SIMD-only mode");
9731 }
9732 
9736  ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
9737  assert(Options.SimpleReduction && "Only simple reduction is expected.");
9738  CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
9739  ReductionOps, Options);
9740 }
9741 
9744  ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
9745  llvm_unreachable("Not supported in SIMD-only mode");
9746 }
9747 
9749  SourceLocation Loc,
9750  ReductionCodeGen &RCG,
9751  unsigned N) {
9752  llvm_unreachable("Not supported in SIMD-only mode");
9753 }
9754 
9756  SourceLocation Loc,
9757  llvm::Value *ReductionsPtr,
9758  LValue SharedLVal) {
9759  llvm_unreachable("Not supported in SIMD-only mode");
9760 }
9761 
9763  SourceLocation Loc) {
9764  llvm_unreachable("Not supported in SIMD-only mode");
9765 }
9766 
9768  CodeGenFunction &CGF, SourceLocation Loc,
9769  OpenMPDirectiveKind CancelRegion) {
9770  llvm_unreachable("Not supported in SIMD-only mode");
9771 }
9772 
9774  SourceLocation Loc, const Expr *IfCond,
9775  OpenMPDirectiveKind CancelRegion) {
9776  llvm_unreachable("Not supported in SIMD-only mode");
9777 }
9778 
9780  const OMPExecutableDirective &D, StringRef ParentName,
9781  llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
9782  bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
9783  llvm_unreachable("Not supported in SIMD-only mode");
9784 }
9785 
9787  const OMPExecutableDirective &D,
9788  llvm::Value *OutlinedFn,
9789  llvm::Value *OutlinedFnID,
9790  const Expr *IfCond, const Expr *Device) {
9791  llvm_unreachable("Not supported in SIMD-only mode");
9792 }
9793 
9795  llvm_unreachable("Not supported in SIMD-only mode");
9796 }
9797 
9799  llvm_unreachable("Not supported in SIMD-only mode");
9800 }
9801 
9803  return false;
9804 }
9805 
9807  return nullptr;
9808 }
9809 
9811  const OMPExecutableDirective &D,
9812  SourceLocation Loc,
9813  llvm::Value *OutlinedFn,
9814  ArrayRef<llvm::Value *> CapturedVars) {
9815  llvm_unreachable("Not supported in SIMD-only mode");
9816 }
9817 
9819  const Expr *NumTeams,
9820  const Expr *ThreadLimit,
9821  SourceLocation Loc) {
9822  llvm_unreachable("Not supported in SIMD-only mode");
9823 }
9824 
9826  CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
9827  const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
9828  llvm_unreachable("Not supported in SIMD-only mode");
9829 }
9830 
9832  CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
9833  const Expr *Device) {
9834  llvm_unreachable("Not supported in SIMD-only mode");
9835 }
9836 
9838  const OMPLoopDirective &D,
9839  ArrayRef<Expr *> NumIterations) {
9840  llvm_unreachable("Not supported in SIMD-only mode");
9841 }
9842 
9844  const OMPDependClause *C) {
9845  llvm_unreachable("Not supported in SIMD-only mode");
9846 }
9847 
9848 const VarDecl *
9850  const VarDecl *NativeParam) const {
9851  llvm_unreachable("Not supported in SIMD-only mode");
9852 }
9853 
9854 Address
9856  const VarDecl *NativeParam,
9857  const VarDecl *TargetParam) const {
9858  llvm_unreachable("Not supported in SIMD-only mode");
9859 }
9860 
const CGFunctionInfo & arrangeBuiltinFunctionDeclaration(QualType resultType, const FunctionArgList &args)
A builtin function is a freestanding function using the default C conventions.
Definition: CGCall.cpp:659
RecordDecl * buildImplicitRecord(StringRef Name, RecordDecl::TagKind TK=TTK_Struct) const
Create a new implicit TU-level CXXRecordDecl or RecordDecl declaration.
TBAAAccessInfo getTBAAInfoForSubobject(LValue Base, QualType AccessType)
getTBAAInfoForSubobject - Get TBAA information for an access with a given base lvalue.
void pushTerminate()
Push a terminate handler on the stack.
Definition: CGCleanup.cpp:259
static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, llvm::Type *BaseLVType, CharUnits BaseLVAlignment, llvm::Value *Addr)
virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S)
Emit the captured statement body.
static llvm::Value * emitParallelOrTeamsOutlinedFunction(CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen)
This represents &#39;#pragma omp task&#39; directive.
Definition: StmtOpenMP.h:1771
static const Decl * getCanonicalDecl(const Decl *D)
Represents a function declaration or definition.
Definition: Decl.h:1738
llvm::IntegerType * IntTy
int
This represents &#39;thread_limit&#39; clause in the &#39;#pragma omp ...&#39; directive.
External linkage, which indicates that the entity can be referred to from other translation units...
Definition: Linkage.h:60
virtual Address getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD)
Gets the OpenMP-specific address of the local variable.
Expr * getUpperBoundVariable() const
Definition: StmtOpenMP.h:833
Other implicit parameter.
Definition: Decl.h:1510
QualType TgtDeviceImageQTy
struct __tgt_device_image{ void *ImageStart; // Pointer to the target code start. ...
Complete object ctor.
Definition: ABI.h:26
unsigned getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it...
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition: Type.h:2537
CanQualType VoidPtrTy
Definition: ASTContext.h:1044
Scheduling data for loop-based OpenMP directives.
Definition: OpenMPKinds.h:141
Destroyer * getDestroyer(QualType::DestructionKind destructionKind)
Definition: CGDecl.cpp:1921
A (possibly-)qualified type.
Definition: Type.h:638
base_class_range bases()
Definition: DeclCXX.h:823
bool isArrayType() const
Definition: Type.h:6345
llvm::Type * ConvertTypeForMem(QualType T)
void EmitBranchOnBoolExpr(const Expr *Cond, llvm::BasicBlock *TrueBlock, llvm::BasicBlock *FalseBlock, uint64_t TrueCount)
EmitBranchOnBoolExpr - Emit a branch on a boolean condition (e.g.
const CodeGenOptions & getCodeGenOpts() const
i32 captured_struct **param SharedsTy A type which contains references the shared variables *param Shareds Context with the list of shared variables from the p *TaskFunction *param IfCond Not a nullptr if if clause was nullptr *otherwise *param Data Additional data for task generation like final list of privates etc *void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) override
Address CreateMemTemp(QualType T, const Twine &Name="tmp", Address *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
Definition: CGExpr.cpp:139
CGRecordLayout - This class handles struct and union layout info while lowering AST types to LLVM typ...
bool HaveInsertPoint() const
HaveInsertPoint - True if an insertion point is defined.
const RecordDecl * KmpTaskTQTyRD
static void EmitOMPTargetParallelForSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelForSimdDirective &S)
Emit device code for the target parallel for simd directive.
void emitSingleReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp, const Expr *PrivateRef, const DeclRefExpr *LHS, const DeclRefExpr *RHS)
Emits single reduction combiner.
Address CreateConstGEP(Address Addr, uint64_t Index, CharUnits EltSize, const llvm::Twine &Name="")
Given addr = T* ...
Definition: CGBuilder.h:226
void actOnDeviceGlobalVarEntriesInfo(const OffloadDeviceGlobalVarEntryInfoActTy &Action)
The standard implementation of ConstantInitBuilder used in Clang.
const ASTRecordLayout & getASTRecordLayout(const RecordDecl *D) const
Get or compute information about the layout of the specified record (struct/union/class) D...
bool EvaluateAsInt(EvalResult &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects) const
EvaluateAsInt - Return true if this is a constant which we can fold and convert to an integer...
Stmt - This represents one statement.
Definition: Stmt.h:66
Expr * getLowerBoundVariable() const
Definition: StmtOpenMP.h:825
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee...
Definition: Type.cpp:505
virtual void emitUserDefinedReduction(CodeGenFunction *CGF, const OMPDeclareReductionDecl *D)
Emit code for the specified user defined reduction construct.
Expr * getLoopData(unsigned NumLoop)
Get the loop data.
bool capturesThis() const
Determine whether this capture handles the C++ &#39;this&#39; pointer.
Definition: Stmt.h:3143
VarDecl * getCapturedVar() const
Retrieve the declaration of the variable being captured.
Definition: Stmt.cpp:1172
bool emitTargetGlobal(GlobalDecl GD) override
Emit the global GD if it is meaningful for the target.
CharUnits getAlignOfGlobalVarInChars(QualType T) const
Return the alignment in characters that should be given to a global variable with type T...
QualType getTgtBinaryDescriptorQTy()
Returns __tgt_bin_desc type.
bool hasNonTrivialCall(const ASTContext &Ctx) const
Determine whether this expression involves a call to any function that is not trivial.
Definition: Expr.cpp:3426
SmallVector< std::pair< OpenMPDependClauseKind, const Expr * >, 4 > Dependences
CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, StringRef Separator)
Constructor allowing to redefine the name separator for the variables.
bool isRecordType() const
Definition: Type.h:6369
void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, const DispatchRTInput &DispatchValues) override
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
void clearLocThreadIdInsertPt(CodeGenFunction &CGF)
Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const override
Gets the address of the native argument basing on the address of the target-specific parameter...
SmallVector< const Expr *, 4 > LastprivateCopies
Decl - This represents one declaration (or definition), e.g.
Definition: DeclBase.h:87
specific_attr_iterator - Iterates over a subrange of an AttrVec, only providing attributes that are o...
Definition: AttrIterator.h:35
SourceLocation getBeginLoc() const
Returns starting location of directive kind.
Definition: StmtOpenMP.h:168
llvm::Constant * getOrCreateInternalVariable(llvm::Type *Ty, const llvm::Twine &Name)
Gets (if variable with the given name already exist) or creates internal global variable with the spe...
virtual llvm::Value * emitTaskReductionInit(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr *> LHSExprs, ArrayRef< const Expr *> RHSExprs, const OMPTaskDataTy &Data)
Emit a code for initialization of task reduction clause.
const RecordType * getAsStructureType() const
Definition: Type.cpp:521
llvm::SmallDenseSet< const VarDecl * > DeferredGlobalVariables
List of variables that can become declare target implicitly and, thus, must be emitted.
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
VarDecl * getDefinition(ASTContext &)
Get the real (not just tentative) definition for this declaration.
Definition: Decl.cpp:2132
static bool stable_sort_comparator(const PrivateDataTy P1, const PrivateDataTy P2)
static llvm::Value * emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, ArrayRef< const Expr *> PrivateVars, ArrayRef< const Expr *> FirstprivateVars, ArrayRef< const Expr *> LastprivateVars, QualType PrivatesQTy, ArrayRef< PrivateDataTy > Privates)
Emit a privates mapping function for correct handling of private and firstprivate variables...
virtual Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name)
Creates artificial threadprivate variable with name Name and type VarType.
StringRef P
CapturedStmt * getInnermostCapturedStmt()
Get innermost captured statement for the construct.
Definition: StmtOpenMP.h:226
unsigned getFieldIndex() const
Returns the index of this field within its record, as appropriate for passing to ASTRecordLayout::get...
Definition: Decl.cpp:3797
Call to void __kmpc_threadprivate_register( ident_t *, void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);.
QualType getNonReferenceType() const
If Type is a reference type (e.g., const int&), returns the type that the reference refers to ("const...
Definition: Type.h:6245
ReductionCodeGen(ArrayRef< const Expr *> Shareds, ArrayRef< const Expr *> Privates, ArrayRef< const Expr *> ReductionOps)
static RecordDecl * createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef< PrivateDataTy > Privates)
bool isOpenMPTargetDataManagementDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target data offload directive.
The base class of the type hierarchy.
Definition: Type.h:1407
void EmitStoreThroughLValue(RValue Src, LValue Dst, bool isInit=false)
EmitStoreThroughLValue - Store the specified rvalue into the specified lvalue, where both are guarant...
Definition: CGExpr.cpp:1914
void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr *> Privates, ArrayRef< const Expr *> LHSExprs, ArrayRef< const Expr *> RHSExprs, ArrayRef< const Expr *> ReductionOps, ReductionOptionsTy Options) override
Emit a code for reduction clause.
void emitTargetDataStandAloneCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device) override
Emit the data mapping/movement code associated with the directive D that should be of the form &#39;targe...
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
Definition: Diagnostic.h:1295
Represents an array type, per C99 6.7.5.2 - Array Declarators.
Definition: Type.h:2812
virtual const FieldDecl * lookup(const VarDecl *VD) const
Lookup the captured field decl for a variable.
llvm::Value * PointersArray
The array of section pointers passed to the runtime library.
virtual void clear()
virtual void completeDefinition()
Note that the definition of this type is now complete.
Definition: Decl.cpp:4154
bool isZero() const
isZero - Test whether the quantity equals zero.
Definition: CharUnits.h:116
QualType withConst() const
Definition: Type.h:810
bool markAsGlobalTarget(GlobalDecl GD)
Marks the declaration as already emitted for the device code and returns true, if it was marked alrea...
const TargetInfo & getTargetInfo() const
Definition: ASTContext.h:690
The l-value was an access to a declared entity or something equivalently strong, like the address of ...
struct with the values to be passed to the dispatch runtime function
capture_const_range captures() const
Definition: DeclCXX.h:1248
Address EmitLoadOfPointer(Address Ptr, const PointerType *PtrTy, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
Definition: CGExpr.cpp:2322
LValue EmitLValueForFieldInitialization(LValue Base, const FieldDecl *Field)
EmitLValueForFieldInitialization - Like EmitLValueForField, except that if the Field is a reference...
Definition: CGExpr.cpp:3985
const Expr * getAnyInitializer() const
Get the initializer for this variable, no matter which declaration it is attached to...
Definition: Decl.h:1210
Describes the capture of a variable or of this, or of a C++1y init-capture.
Definition: LambdaCapture.h:26
void scanForTargetRegionsFunctions(const Stmt *S, StringRef ParentName)
Start scanning from statement S and and emit all target regions found along the way.
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
Represents a C++ constructor within a class.
Definition: DeclCXX.h:2484
static void EmitOMPAggregateReduction(CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, const VarDecl *RHSVar, const llvm::function_ref< void(CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *)> &RedOpGen, const Expr *XExpr=nullptr, const Expr *EExpr=nullptr, const Expr *UpExpr=nullptr)
Emit reduction operation for each element of array (required for array sections) LHS op = RHS...
bool ConstantFoldsToSimpleInteger(const Expr *Cond, bool &Result, bool AllowLabels=false)
ConstantFoldsToSimpleInteger - If the specified expression does not fold to a constant, or if it does but contains a label, return false.
void EmitOMPCopy(QualType OriginalType, Address DestAddr, Address SrcAddr, const VarDecl *DestVD, const VarDecl *SrcVD, const Expr *Copy)
Emit proper copying of data from one variable to another.
OpenMPSchedType
Schedule types for &#39;omp for&#39; loops (these enumerators are taken from the enum sched_type in kmp...
bool isTrivialType(const ASTContext &Context) const
Return true if this is a trivial type per (C++0x [basic.types]p9)
Definition: Type.cpp:2157
SmallVector< const Expr *, 4 > ReductionCopies
SourceLocation getEndLoc() const
Returns ending location of directive.
Definition: StmtOpenMP.h:170
Address GetAddrOfLocalVar(const VarDecl *VD)
GetAddrOfLocalVar - Return the address of a local variable.
llvm::Function * GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S)
Represents a variable declaration or definition.
Definition: Decl.h:813
Objects with "hidden" visibility are not seen by the dynamic linker.
Definition: Visibility.h:37
llvm::Value * getThreadID(CodeGenFunction &CGF, SourceLocation Loc)
Gets thread id value for the current thread.
QualType getReturnType() const
Definition: Decl.h:2302
This represents &#39;num_threads&#39; clause in the &#39;#pragma omp ...&#39; directive.
Definition: OpenMPClause.h:382
virtual void emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, ArrayRef< llvm::Value *> Args=llvm::None) const
Emits call of the outlined function with the provided arguments, translating these arguments to corre...
const T * getAs() const
Member-template getAs<specific type>&#39;.
Definition: Type.h:6748
The "union" keyword.
Definition: Type.h:5039
Extra information about a function prototype.
Definition: Type.h:3767
llvm::GlobalVariable * finishAndCreateGlobal(As &&...args)
Given that this builder was created by beginning an array or struct directly on a ConstantInitBuilder...
virtual llvm::Value * emitForNext(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned, Address IL, Address LB, Address UB, Address ST)
Call __kmpc_dispatch_next( ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, kmp_int[32|64] *p_lowe...
bool field_empty() const
Definition: Decl.h:3792
DiagnosticsEngine & getDiags() const
void EmitVariablyModifiedType(QualType Ty)
EmitVLASize - Capture all the sizes for the VLA expressions in the given variably-modified type and s...
llvm::Value * getPointer() const
Definition: Address.h:38
llvm::Type * ConvertTypeForMem(QualType T)
ConvertTypeForMem - Convert type T into a llvm::Type.
static llvm::GlobalVariable * createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, ArrayRef< llvm::Constant *> Data, const Twine &Name, As &&... Args)
Represents a parameter to a function.
Definition: Decl.h:1550
Linkage
Describes the different kinds of linkage (C++ [basic.link], C99 6.2.2) that an entity may have...
Definition: Linkage.h:24
static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, const Expr *Ref)
Generates unique name for artificial threadprivate variables.
void createOffloadEntriesAndInfoMetadata()
Creates all the offload entries in the current compilation unit along with the associated metadata...
static unsigned evaluateCDTSize(const FunctionDecl *FD, ArrayRef< ParamAttrTy > ParamAttrs)
std::string getName(ArrayRef< StringRef > Parts) const
Get the platform-specific name separator.
static void EmitOMPTargetParallelDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelDirective &S)
A jump destination is an abstract label, branching to which may require a jump out through normal cle...
Struct that keeps all the relevant information that should be kept throughout a &#39;target data&#39; region...
QualType getTgtOffloadEntryQTy()
Returns __tgt_offload_entry type.
virtual void emitTargetDataStandAloneCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device)
Emit the data mapping/movement code associated with the directive D that should be of the form &#39;targe...
SmallVector< const Expr *, 4 > PrivateVars
Represents a struct/union/class.
Definition: Decl.h:3593
llvm::DenseMap< const VarDecl *, FieldDecl * > LambdaCaptureFields
Source[4] in Fortran, do not use for C++.
llvm::StringMap< llvm::WeakTrackingVH > EmittedNonTargetVariables
List of the global variables with their addresses that should not be emitted for the target...
static llvm::Value * emitDestructorsFunction(CodeGenModule &CGM, SourceLocation Loc, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy)
void emitDestroy(Address addr, QualType type, Destroyer *destroyer, bool useEHCleanupForArray)
emitDestroy - Immediately perform the destruction of the given object.
Definition: CGDecl.cpp:1996
static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, LValue BaseLV)
Address getAddress() const
Definition: CGValue.h:327
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
llvm::Type * ConvertType(QualType T)
ConvertType - Convert type T into a llvm::Type.
virtual void emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc)
Emits a master region.
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:155
LineState State
virtual void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned)
Call the appropriate runtime routine to notify that we finished iteration of the ordered loop with th...
void EmitExprAsInit(const Expr *init, const ValueDecl *D, LValue lvalue, bool capturedByInit)
EmitExprAsInit - Emits the code necessary to initialize a location in memory with the given initializ...
Definition: CGDecl.cpp:1762
Call to void *__kmpc_threadprivate_cached(ident_t *loc, kmp_int32 global_tid, void *data...
The scope used to remap some variables as private in the OpenMP loop body (or other captured region e...
field_range fields() const
Definition: Decl.h:3784
SmallVector< const Expr *, 4 > LastprivateVars
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: Stmt.cpp:288
virtual void emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C)
Emit code for doacross ordered directive with &#39;depend&#39; clause.
Represents a member of a struct/union/class.
Definition: Decl.h:2579
CharUnits getAlignment() const
Definition: CGValue.h:316
const CapturedStmt * getCapturedStmt(OpenMPDirectiveKind RegionKind) const
Returns the captured statement associated with the component region within the (combined) directive...
Definition: StmtOpenMP.h:209
An RAII object to set (and then clear) a mapping for an OpaqueValueExpr.
might be used in Fortran
void setLocThreadIdInsertPt(CodeGenFunction &CGF, bool AtCurrentPoint=false)
std::pair< llvm::Value *, llvm::Value * > getSizes(unsigned N) const
Returns the size of the reduction item (in chars and total number of elements in the item)...
LValue EmitOMPArraySectionExpr(const OMPArraySectionExpr *E, bool IsLowerBound=true)
Definition: CGExpr.cpp:3550
unsigned getNonVirtualBaseLLVMFieldNo(const CXXRecordDecl *RD) const
static void emitOffloadingArrays(CodeGenFunction &CGF, MappableExprsHandler::MapBaseValuesArrayTy &BasePointers, MappableExprsHandler::MapValuesArrayTy &Pointers, MappableExprsHandler::MapValuesArrayTy &Sizes, MappableExprsHandler::MapFlagsArrayTy &MapTypes, CGOpenMPRuntime::TargetDataInfo &Info)
Emit the arrays used to pass the captures and map information to the offloading runtime library...
llvm::CallInst * EmitRuntimeCall(llvm::Value *callee, const Twine &name="")
void startDefinition()
Starts the definition of this tag declaration.
Definition: Decl.cpp:3877
bool isReferenceType() const
Definition: Type.h:6308
This represents clause &#39;map&#39; in the &#39;#pragma omp ...&#39; directives.
InitKind getInitializerKind() const
Get initializer kind.
Definition: DeclOpenMP.h:174
void EmitOMPAggregateAssign(Address DestAddr, Address SrcAddr, QualType OriginalType, const llvm::function_ref< void(Address, Address)> CopyGen)
Perform element by element copying of arrays with type OriginalType from SrcAddr to DestAddr using co...
OMPTargetGlobalVarEntryKind
Kind of the global variable entry..
virtual bool isDynamic(OpenMPScheduleClauseKind ScheduleKind) const
Check if the specified ScheduleKind is dynamic.
static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, unsigned Index, const VarDecl *Var)
Given an array of pointers to variables, project the address of a given variable. ...
This represents clause &#39;to&#39; in the &#39;#pragma omp ...&#39; directives.
static CharUnits Zero()
Zero - Construct a CharUnits quantity of zero.
Definition: CharUnits.h:53
clang::CharUnits operator*(clang::CharUnits::QuantityType Scale, const clang::CharUnits &CU)
Definition: CharUnits.h:208
llvm::Type * getKmpc_MicroPointerTy()
Returns pointer to kmpc_micro type.
OpenMPDirectiveKind getDirectiveKind() const
Definition: StmtOpenMP.h:244
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: Decl.h:739
This is a common base class for loop directives (&#39;omp simd&#39;, &#39;omp for&#39;, &#39;omp for simd&#39; etc...
Definition: StmtOpenMP.h:338
void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc) override
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)...
virtual bool emitTargetFunctions(GlobalDecl GD)
Emit the target regions enclosed in GD function definition or the function itself in case it is a val...
An r-value expression (a pr-value in the C++11 taxonomy) produces a temporary value.
Definition: Specifiers.h:110
CharUnits GetTargetTypeStoreSize(llvm::Type *Ty) const
Return the store size, in character units, of the given LLVM type.
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
ComplexPairTy EmitLoadOfComplex(LValue src, SourceLocation loc)
EmitLoadOfComplex - Load a complex number from the specified l-value.
static ApplyDebugLocation CreateArtificial(CodeGenFunction &CGF)
Apply TemporaryLocation if it is valid.
Definition: CGDebugInfo.h:715
ArrayRef< ParmVarDecl * > parameters() const
Definition: Decl.h:2262
OpenMPDistScheduleClauseKind
OpenMP attributes for &#39;dist_schedule&#39; clause.
Definition: OpenMPKinds.h:109
void emitProcBindClause(CodeGenFunction &CGF, OpenMPProcBindClauseKind ProcBind, SourceLocation Loc) override
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, int proc_bind) to generat...
virtual void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D, ArrayRef< Expr *> NumIterations)
Emit initialization for doacross loop nesting support.
virtual void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Value *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, const Expr *Device)
Emit the target offloading code associated with D.
BinaryOperatorKind
Expr * getInitializer()
Get initializer expression (if specified) of the declare reduction construct.
Definition: DeclOpenMP.h:171
QualType TgtOffloadEntryQTy
Type struct __tgt_offload_entry{ void *addr; // Pointer to the offload entry info.
static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, const RecordDecl *RD, const CGRecordLayout &RL, ArrayRef< llvm::Constant *> Data)
static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, QualType Type, bool EmitDeclareReductionInit, const Expr *Init, const OMPDeclareReductionDecl *DRD, Address SrcAddr=Address::invalid())
Emit initialization of arrays of complex types.
llvm::Function * emitRegistrationFunction() override
Creates the offloading descriptor in the event any target region was emitted in the current module an...
OpenMPScheduleClauseKind Schedule
Definition: OpenMPKinds.h:142
Address CreateElementBitCast(Address Addr, llvm::Type *Ty, const llvm::Twine &Name="")
Cast the element type of the given address to a different type, preserving information like the align...
Definition: CGBuilder.h:157
CharUnits - This is an opaque type for sizes expressed in character units.
Definition: CharUnits.h:38
bool Ordered
true if loop is ordered, false otherwise.
void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned) override
Call the appropriate runtime routine to notify that we finished iteration of the ordered loop with th...
bool isOpenMPTeamsDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a teams-kind directive.
APValue Val
Val - This is the value the expression can be folded to.
Definition: Expr.h:573
virtual void emitTargetDataCalls(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info)
Emit the target data mapping code associated with D.
const ArrayType * getAsArrayTypeUnsafe() const
A variant of getAs<> for array types which silently discards qualifiers from the outermost type...
Definition: Type.h:6797
void emitCleanups(CodeGenFunction &CGF, unsigned N, Address PrivateAddr)
Emits cleanup code for the reduction item.
RAII for correct setting/restoring of CapturedStmtInfo.
i32 captured_struct **param SharedsTy A type which contains references the shared variables *param Shareds Context with the list of shared variables from the p *TaskFunction *param IfCond Not a nullptr if if clause was nullptr *otherwise *param Data Additional data for task generation like final list of privates etc *virtual void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data)
CharUnits getAlignment() const
Return the alignment of this pointer.
Definition: Address.h:67
void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc) override
Emits code for a taskyield directive.
llvm::StringSet ThreadPrivateWithDefinition
Set of threadprivate variables with the generated initializer.
bool empty() const
Return true if a there are no entries defined.
child_range children()
Definition: Stmt.cpp:237
String describing the source location.
void emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc) override
Emits a master region.
virtual std::pair< llvm::Function *, llvm::Function * > getUserDefinedReduction(const OMPDeclareReductionDecl *D)
Get combiner/initializer for the specified user-defined reduction, if any.
void actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy &Action)
OpenMPScheduleClauseModifier M2
Definition: OpenMPKinds.h:144
bool needsEHCleanup(QualType::DestructionKind kind)
Determines whether an EH cleanup is required to destroy a type with the given destruction kind...
void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Value *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, const Expr *Device) override
Emit the target offloading code associated with D.
SmallVector< const Expr *, 4 > PrivateCopies
llvm::CallInst * CreateMemCpy(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile=false)
Definition: CGBuilder.h:274
llvm::StructType * getBaseSubobjectLLVMType() const
Return the "base subobject" LLVM type associated with this record.
unsigned NumberOfPtrs
The total number of pointers passed to the runtime library.
void operator()(CodeGenFunction &CGF) const
bool isConstexpr() const
Whether this variable is (C++11) constexpr.
Definition: Decl.h:1382
CharUnits getDeclAlign(const Decl *D, bool ForAlignof=false) const
Return a conservative estimate of the alignment of the specified decl D.
bool isLambda() const
Determine whether this class describes a lambda function object.
Definition: DeclCXX.h:1196
llvm::StringSet AlreadyEmittedTargetFunctions
List of the emitted functions.
Address adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, Address PrivateAddr)
Adjusts PrivatedAddr for using instead of the original variable address in normal operations...
Expr * getSizeExpr() const
Definition: Type.h:2991
Scope - A scope is a transient data structure that is used while parsing the program.
Definition: Scope.h:41
const Type * getTypePtr() const
Retrieves a pointer to the underlying (unqualified) type.
Definition: Type.h:6072
virtual void emitDeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn)
Marks function Fn with properly mangled versions of vector functions.
field_iterator field_begin() const
Definition: Decl.cpp:4145
bool usesReductionInitializer(unsigned N) const
Returns true if the initialization of the reduction item uses initializer from declare reduction cons...
bool isOpenMPTargetExecutionDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target code offload directive.
virtual void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Value *OutlinedFn, ArrayRef< llvm::Value *> CapturedVars)
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
ArrayRef< MappableComponent > MappableExprComponentListRef
void EmitIgnoredExpr(const Expr *E)
EmitIgnoredExpr - Emit an expression in a context which ignores the result.
Definition: CGExpr.cpp:182
void addCompilerUsedGlobal(llvm::GlobalValue *GV)
Add a global to a list to be added to the llvm.compiler.used metadata.
SmallVector< const Expr *, 4 > FirstprivateCopies
OpenMPDependClauseKind getDependencyKind() const
Get dependency type.
static void emitPrivatesInit(CodeGenFunction &CGF, const OMPExecutableDirective &D, Address KmpTaskSharedsPtr, LValue TDBase, const RecordDecl *KmpTaskTWithPrivatesQTyRD, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, ArrayRef< PrivateDataTy > Privates, bool ForDup)
Emit initialization for private variables in task-based directives.
static int addMonoNonMonoModifier(OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2)
llvm::Value * emitReductionFunction(CodeGenModule &CGM, SourceLocation Loc, llvm::Type *ArgsType, ArrayRef< const Expr *> Privates, ArrayRef< const Expr *> LHSExprs, ArrayRef< const Expr *> RHSExprs, ArrayRef< const Expr *> ReductionOps)
Emits reduction function.
DiagnosticsEngine & getDiagnostics() const
static llvm::Function * emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, const Expr *CombinerInitializer, const VarDecl *In, const VarDecl *Out, bool IsCombiner)
static void emitOffloadingArraysArgument(CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info)
Emit the arguments to be passed to the runtime library based on the arrays of pointers, sizes and map types.
Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
static const OMPDeclareReductionDecl * getReductionInit(const Expr *ReductionOp)
Check if the combiner is a call to UDR combiner and if it is so return the UDR decl used for reductio...
llvm::Value * emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, unsigned Flags=0)
Emits object of ident_t type with info for source location.
bool isOpenMPWorksharingDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a worksharing directive.
virtual llvm::Value * emitTeamsOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP teams directive D.
SmallVector< const Expr *, 4 > ReductionOps
This represents clause &#39;is_device_ptr&#39; in the &#39;#pragma omp ...&#39; directives.
SmallVector< const Expr *, 4 > ReductionVars
virtual void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind)
Call the appropriate runtime routine to notify that we finished all the work with current loop...
virtual Decl * getCanonicalDecl()
Retrieves the "canonical" declaration of the given declaration.
Definition: DeclBase.h:870
This represents clause &#39;from&#39; in the &#39;#pragma omp ...&#39; directives.
LValue EmitLValueForField(LValue Base, const FieldDecl *Field)
Definition: CGExpr.cpp:3857
const VarDecl * translateParameter(const FieldDecl *FD, const VarDecl *NativeParam) const override
Translates the native parameter of outlined function if this is required for target.
llvm::Constant * CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeList ExtraAttrs=llvm::AttributeList(), bool Local=false)
Create a new runtime function with the specified type and name.
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
static ImplicitParamDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, ImplicitParamKind ParamKind)
Create implicit parameter.
Definition: Decl.cpp:4404
NodeId Parent
Definition: ASTDiff.cpp:192
OpenMP 4.0 [2.4, Array Sections].
Definition: ExprOpenMP.h:45
bool isValid() const
Definition: Address.h:36
static CharUnits One()
One - Construct a CharUnits quantity of one.
Definition: CharUnits.h:58
CXXRecordDecl * getAsCXXRecordDecl() const
Retrieves the CXXRecordDecl that this type refers to, either because the type is a RecordType or beca...
Definition: Type.cpp:1613
void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr) override
Emits a critical region.
virtual Address getAddrOfDeclareTargetLink(const VarDecl *VD)
Returns the address of the variable marked as declare target with link clause.
llvm::CallInst * EmitNounwindRuntimeCall(llvm::Value *callee, const Twine &name="")
Describes the capture of either a variable, or &#39;this&#39;, or variable-length array type.
Definition: Stmt.h:3118
const CodeGen::CGBlockInfo * BlockInfo
ArrayBuilder beginArray(llvm::Type *eltTy=nullptr)
virtual Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const
Gets the address of the native argument basing on the address of the target-specific parameter...
llvm::Constant * createForStaticInitFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_for_static_init_* runtime function for the specified size IVSize and sign IVSigned...
CGBlockInfo - Information to generate a block literal.
Definition: CGBlocks.h:153
void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer)
EmitAnyExprToMem - Emits the code necessary to evaluate an arbitrary expression into the given memory...
Definition: CGExpr.cpp:223
OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
LValueBaseInfo getBaseInfo() const
Definition: CGValue.h:319
RValue - This trivial value class is used to represent the result of an expression that is evaluated...
Definition: CGValue.h:39
bool needCleanups(unsigned N)
Returns true if the private copy requires cleanups.
Class intended to support codegen of all kind of the reduction clauses.
llvm::Constant * createDispatchFiniFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_fini_* runtime function for the specified size IVSize and sign IVSigned...
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Definition: CharUnits.h:179
Expr * getCombiner()
Get combiner expression of the declare reduction construct.
Definition: DeclOpenMP.h:153
TypeSourceInfo * getTrivialTypeSourceInfo(QualType T, SourceLocation Loc=SourceLocation()) const
Allocate a TypeSourceInfo where all locations have been initialized to a given location, which defaults to the empty location.
virtual bool isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const
Check if the specified ScheduleKind is static chunked.
unsigned getNumLoops() const
Get number of loops associated with the clause.
virtual void createOffloadEntry(llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, llvm::GlobalValue::LinkageTypes Linkage)
Creates offloading entry for the provided entry ID ID, address Addr, size Size, and flags Flags...
unsigned Offset
Definition: Format.cpp:1631
llvm::CallingConv::ID getRuntimeCC() const
bool HasSideEffects(const ASTContext &Ctx, bool IncludePossibleEffects=true) const
HasSideEffects - This routine returns true for all those expressions which have any effect other than...
Definition: Expr.cpp:3101
QualType getTgtDeviceImageQTy()
Returns __tgt_device_image type.
This represents implicit clause &#39;depend&#39; for the &#39;#pragma omp task&#39; directive.
KmpTaskTFields
Indexes of fields for type kmp_task_t.
void emitDeferredTargetDecls() const
Emit deferred declare target variables marked for deferred emission.
static TypeEvaluationKind getEvaluationKind(QualType T)
getEvaluationKind - Return the TypeEvaluationKind of QualType T.
virtual void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Required to resolve existing problems in the runtime.
virtual void emitFlush(CodeGenFunction &CGF, ArrayRef< const Expr *> Vars, SourceLocation Loc)
Emit flush of the variables specified in &#39;omp flush&#39; directive.
static void createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, ArrayRef< llvm::Constant *> Data, T &Parent)
bool addPrivate(const VarDecl *LocalVD, const llvm::function_ref< Address()> PrivateGen)
Registers LocalVD variable as a private and apply PrivateGen function for it to generate correspondin...
void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc) override
Emit code for &#39;taskwait&#39; directive.
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
Definition: Decl.h:637
This represents one expression.
Definition: Expr.h:106
Allow any unmodeled side effect.
Definition: Expr.h:599
virtual llvm::Value * emitParallelOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP parallel directive D.
static Address invalid()
Definition: Address.h:35
void loadOffloadInfoMetadata()
Loads all the offload entries information from the host IR metadata.
static void emitInitWithReductionInitializer(CodeGenFunction &CGF, const OMPDeclareReductionDecl *DRD, const Expr *InitOp, Address Private, Address Original, QualType Ty)
static llvm::Value * emitReduceCombFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N, const Expr *ReductionOp, const Expr *LHS, const Expr *RHS, const Expr *PrivateRef)
Emits reduction combiner function:
const AnnotatedLine * Line
Enters a new scope for capturing cleanups, all of which will be executed once the scope is exited...
Stmt * IgnoreContainers(bool IgnoreCaptured=false)
Skip no-op (attributed, compound) container stmts and skip captured stmt at the top, if IgnoreCaptured is true.
Definition: Stmt.cpp:147
static llvm::Value * emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, QualType SharedsPtrTy, llvm::Value *TaskFunction, llvm::Value *TaskPrivatesMap)
Emit a proxy function which accepts kmp_task_t as the second argument.
bool isOpenMPParallelDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a parallel-kind directive.
const CGFunctionInfo & arrangeNullaryFunction()
A nullary function is a freestanding function of type &#39;void ()&#39;.
Definition: CGCall.cpp:701
std::string OMPHostIRFile
Name of the IR file that contains the result of the OpenMP target host code generation.
Definition: LangOptions.h:247
void emitKmpRoutineEntryT(QualType KmpInt32Ty)
Build type kmp_routine_entry_t (if not built yet).
const T * castAs() const
Member-template castAs<specific type>.
Definition: Type.h:6811
unsigned getLine() const
Return the presumed line number of this location.
llvm::Value * emitForNext(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned, Address IL, Address LB, Address UB, Address ST) override
Call __kmpc_dispatch_next( ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, kmp_int[32|64] *p_lowe...
unsigned IVSize
Size of the iteration variable in bits.
static bool checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD)
Checks if destructor function is required to be generated.
void emitSingleRegion(CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, ArrayRef< const Expr *> CopyprivateVars, ArrayRef< const Expr *> DestExprs, ArrayRef< const Expr *> SrcExprs, ArrayRef< const Expr *> AssignmentOps) override
Emits a single region.
Represents a C++ destructor within a class.
Definition: DeclCXX.h:2706
static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion)
QualType SavedKmpTaskloopTQTy
Saved kmp_task_t for taskloop-based directive.
static RecordDecl * createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, QualType KmpRoutineEntryPointerQTy)
VlaSizePair getVLASize(const VariableArrayType *vla)
Returns an LLVM value that corresponds to the size, in non-variably-sized elements, of a variable length array type, plus that largest non-variably-sized element type.
field_iterator field_end() const
Definition: Decl.h:3787
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition: Address.h:44
CharUnits getTypeAlignInChars(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in characters.
Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name) override
Creates artificial threadprivate variable with name Name and type VarType.
DeclContext * getDeclContext()
Definition: DeclBase.h:427
static llvm::iterator_range< specific_clause_iterator< SpecificClause > > getClausesOfKind(ArrayRef< OMPClause *> Clauses)
Definition: StmtOpenMP.h:130
void emitFlush(CodeGenFunction &CGF, ArrayRef< const Expr *> Vars, SourceLocation Loc) override
Emit flush of the variables specified in &#39;omp flush&#39; directive.
virtual void registerGlobalDtor(CodeGenFunction &CGF, const VarDecl &D, llvm::Constant *Dtor, llvm::Constant *Addr)=0
Emit code to force the execution of a destructor during global teardown.
static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, bool Chunked, bool Ordered)
Map the OpenMP loop schedule to the runtime enumeration.
QualType getType() const
Definition: Expr.h:128
llvm::GlobalValue::LinkageTypes getLLVMLinkageVarDefinition(const VarDecl *VD, bool IsConstant)
Returns LLVM linkage for a declarator.
DefinitionKind hasDefinition(ASTContext &) const
Check whether this variable is defined in this translation unit.
Definition: Decl.cpp:2141
TagDecl * getAsTagDecl() const
Retrieves the TagDecl that this type refers to, either because the type is a TagType or because it is...
Definition: Type.cpp:1621
Provides LLVM&#39;s BitmaskEnum facility to enumeration types declared in namespace clang.
SmallVector< const Expr *, 4 > FirstprivateVars
QualType getConstantArrayType(QualType EltTy, const llvm::APInt &ArySize, ArrayType::ArraySizeModifier ASM, unsigned IndexTypeQuals) const
Return the unique reference to the type for a constant array of the specified element type...
TBAAAccessInfo getTBAAInfo() const
Definition: CGValue.h:308
CharUnits alignmentOfArrayElement(CharUnits elementSize) const
Given that this is the alignment of the first element of an array, return the minimum alignment of an...
Definition: CharUnits.h:197
static llvm::Value * emitCopyprivateCopyFunction(CodeGenModule &CGM, llvm::Type *ArgsType, ArrayRef< const Expr *> CopyprivateVars, ArrayRef< const Expr *> DestExprs, ArrayRef< const Expr *> SrcExprs, ArrayRef< const Expr *> AssignmentOps, SourceLocation Loc)
virtual Address getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, SourceLocation Loc)
Returns address of the threadprivate variable for the current thread.
LValue MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T)
QualType getRecordType(const RecordDecl *Decl) const
Represents an unpacked "presumed" location which can be presented to the user.
void Emit(CodeGenFunction &CGF, Flags) override
Emit the cleanup.
virtual void emitTargetOutlinedFunctionHelper(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
Helper to emit outlined function for &#39;target&#39; directive.
void registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, llvm::Constant *Addr, llvm::Constant *ID, OMPTargetRegionEntryKind Flags)
Register target region entry.
bool isOpenMPTaskLoopDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a taskloop directive.
QualType getFunctionType(QualType ResultTy, ArrayRef< QualType > Args, const FunctionProtoType::ExtProtoInfo &EPI) const
Return a normal function type with a typed argument list.
Definition: ASTContext.h:1380
llvm::Function * emitThreadPrivateVarDefinition(const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF=nullptr) override
Emit a code for initialization of threadprivate variable.
llvm::Value * EmitCastToVoidPtr(llvm::Value *value)
Emit a cast to void* in the appropriate address space.
Definition: CGExpr.cpp:50
Allow UB that we can give a value, but not arbitrary unmodeled side effects.
Definition: Expr.h:597
const Type * getBaseElementTypeUnsafe() const
Get the base element type of this type, potentially discarding type qualifiers.
Definition: Type.h:6688
Expr * getInitPriv()
Get Priv variable of the initializer.
Definition: DeclOpenMP.h:181
This represents clause &#39;firstprivate&#39; in the &#39;#pragma omp ...&#39; directives.
ValueDecl * getDecl()
Definition: Expr.h:1114
void emitTaskgroupRegion(CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, SourceLocation Loc) override
Emit a taskgroup region.
*QualType KmpTaskTQTy
const Qualifiers & getQuals() const
Definition: CGValue.h:311
Address getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, SourceLocation Loc) override
Returns address of the threadprivate variable for the current thread.
bool isUnionType() const
Definition: Type.cpp:475
const LangOptions & getLangOpts() const
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
ASTContext & getContext() const
bool isNull() const
Return true if this QualType doesn&#39;t point to a type yet.
Definition: Type.h:703
OpenMPProcBindClauseKind
OpenMP attributes for &#39;proc_bind&#39; clause.
Definition: OpenMPKinds.h:51
llvm::Constant * createDispatchInitFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_init_* runtime function for the specified size IVSize and sign IVSigned...
const SourceManager & SM
Definition: Format.cpp:1490
virtual void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr)
Emits a critical region.
VarDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
Definition: Decl.cpp:2026
GlobalDecl - represents a global declaration.
Definition: GlobalDecl.h:35
bool hasClausesOfKind() const
Returns true if the current directive has one or more clauses of a specific kind. ...
Definition: StmtOpenMP.h:162
llvm::Value * emitParallelOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits outlined function for the specified OpenMP parallel directive D.
virtual bool emitTargetGlobalVariable(GlobalDecl GD)
Emit the global variable if it is a valid device global variable.
void finishAndAddTo(AggregateBuilderBase &parent)
Given that this builder was created by beginning an array or struct component on the given parent bui...
AttrVec & getAttrs()
Definition: DeclBase.h:479
CanQualType getCanonicalTypeUnqualified() const
bool hasAttrs() const
Definition: DeclBase.h:473
QualType KmpDependInfoTy
Type typedef struct kmp_depend_info { kmp_intptr_t base_addr; size_t len; struct { bool in:1; bool ou...
static llvm::Value * emitNumThreadsForTargetDirective(CGOpenMPRuntime &OMPRuntime, CodeGenFunction &CGF, const OMPExecutableDirective &D)
Emit the number of threads for a target directive.
LValue EmitLoadOfPointerLValue(Address Ptr, const PointerType *PtrTy)
Definition: CGExpr.cpp:2332
QualType TgtBinaryDescriptorQTy
struct __tgt_bin_desc{ int32_t NumDevices; // Number of devices supported.
The l-value was considered opaque, so the alignment was determined from a type.
RecordDecl * getDecl() const
Definition: Type.h:4380
const char * getFilename() const
Return the presumed filename of this location.
const SpecificClause * getSingleClause() const
Gets a single clause of the specified kind associated with the current directive iff there is only on...
Definition: StmtOpenMP.h:148
static void emitReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp)
Emit reduction combiner.
Expr * getStrideVariable() const
Definition: StmtOpenMP.h:841
This represents &#39;num_teams&#39; clause in the &#39;#pragma omp ...&#39; directive.
OpaqueValueExpr - An expression referring to an opaque object of a fixed type and value class...
Definition: Expr.h:945
Address CreateBitCast(Address Addr, llvm::Type *Ty, const llvm::Twine &Name="")
Definition: CGBuilder.h:142
virtual void registerTargetGlobalVariable(const VarDecl *VD, llvm::Constant *Addr)
Checks if the provided global decl GD is a declare target variable and registers it when emitting cod...
static llvm::Value * emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime, CodeGenFunction &CGF, const OMPExecutableDirective &D)
Emit the number of teams for a target directive.
#define false
Definition: stdbool.h:33
Kind
CanProxy< U > castAs() const
This captures a statement into a function.
Definition: Stmt.h:3105
QualType getCanonicalType() const
Definition: Type.h:6111
IdentFieldIndex
decl_type * getPreviousDecl()
Return the previous declaration of this declaration or NULL if this is the first declaration.
Definition: Redeclarable.h:204
static unsigned getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind)
Returns default flags for the barriers depending on the directive, for which this barier is going to ...
unsigned getColumn() const
Return the presumed column number of this location.
static const Stmt * getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body)
Checks if the Body is the CompoundStmt and returns its child statement iff there is only one that is ...
static with chunk adjustment (e.g., simd)
void pushDestroy(QualType::DestructionKind dtorKind, Address addr, QualType type)
pushDestroy - Push the standard destructor for the given type as at least a normal cleanup...
Definition: CGDecl.cpp:1948
void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, const RegionCodeGenTy &ThenGen, const RegionCodeGenTy &ElseGen)
Emits code for OpenMP &#39;if&#39; clause using specified CodeGen function.
void emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond, OpenMPDirectiveKind CancelRegion) override
Emit code for &#39;cancel&#39; construct.
Encodes a location in the source.
static const OMPExecutableDirective * getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D)
Check for inner distribute directive.
llvm::Type * getIdentTyPointerTy()
Returns pointer to ident_t type.
llvm::Value * MapTypesArray
The array of map types passed to the runtime library.
This represents &#39;#pragma omp declare reduction ...&#39; directive.
Definition: DeclOpenMP.h:103
unsigned getOpenMPDefaultSimdAlign(QualType T) const
Get default simd alignment of the specified complete type in bits.
llvm::Value * EvaluateExprAsBool(const Expr *E)
EvaluateExprAsBool - Perform the usual unary conversions on the specified expression and compare the ...
Definition: CGExpr.cpp:164
llvm::PointerIntPair< llvm::Value *, 1, bool > Final
bool isVariablyModifiedType() const
Whether this type is a variably-modified type (C99 6.7.5).
Definition: Type.h:2095
virtual void emitSingleRegion(CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, ArrayRef< const Expr *> CopyprivateVars, ArrayRef< const Expr *> DestExprs, ArrayRef< const Expr *> SrcExprs, ArrayRef< const Expr *> AssignmentOps)
Emits a single region.
virtual void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc)
Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams...
void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false) override
Emit an implicit/explicit barrier for OpenMP threads.
This is a basic class for representing single OpenMP executable directive.
Definition: StmtOpenMP.h:33
bool emitTargetGlobalVariable(GlobalDecl GD) override
Emit the global variable if it is a valid device global variable.
llvm::Value * emitTeamsOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits outlined function for the specified OpenMP teams directive D.
Lower bound for &#39;ordered&#39; versions.
ASTContext & getASTContext() const LLVM_READONLY
Definition: DeclBase.cpp:376
const Decl * getDecl() const
Definition: GlobalDecl.h:69
virtual void emitCancellationPointCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind CancelRegion)
Emit code for &#39;cancellation point&#39; construct.
OpenMPDirectiveKind
OpenMP directives.
Definition: OpenMPKinds.h:23
bool capturesVariable() const
Determine whether this capture handles a variable (by reference).
Definition: Stmt.h:3146
void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D, ArrayRef< Expr *> NumIterations) override
Emit initialization for doacross loop nesting support.
Set if the nonmonotonic schedule modifier was present.
virtual llvm::Function * emitThreadPrivateVarDefinition(const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF=nullptr)
Emit a code for initialization of threadprivate variable.
OpenMPLinearClauseKind Modifier
Modifier of &#39;linear&#39; clause.
Definition: OpenMPClause.h:102
const ParmVarDecl * getParamDecl(unsigned i) const
Definition: Decl.h:2285
static void EmitOMPTargetSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S)
Emit device code for the target simd directive.
void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) override
const ArrayType * getAsArrayType(QualType T) const
Type Query functions.
bool isTrivialInitializer(const Expr *Init)
Determine whether the given initializer is trivial in the sense that it requires no code to be genera...
Definition: CGDecl.cpp:1564
CanQualType VoidTy
Definition: ASTContext.h:1016
bool IVSigned
Sign of the iteration variable.
void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) override
Call the appropriate runtime routine to initialize it before start of loop.
virtual llvm::Value * emitTaskOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool Tied, unsigned &NumberOfParts)
Emits outlined function for the OpenMP task directive D.
void emitAggregateType(CodeGenFunction &CGF, unsigned N)
Emits the code for the variable-modified type, if required.
static void EmitOMPTargetTeamsDistributeParallelForDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeParallelForDirective &S)
bool isAnyPointerType() const
Definition: Type.h:6300
This declaration is only a declaration.
Definition: Decl.h:1147
unsigned size() const
Return number of entries defined so far.
virtual void Enter(CodeGenFunction &CGF)
An aligned address.
Definition: Address.h:25
void StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, const CGFunctionInfo &FnInfo, const FunctionArgList &Args, SourceLocation Loc=SourceLocation(), SourceLocation StartLoc=SourceLocation())
Emit code for the start of a function.
Stmt * getCapturedStmt()
Retrieve the statement being captured.
Definition: Stmt.h:3206
void emitTargetDataCalls(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) override
Emit the target data mapping code associated with D.
DestructionKind isDestructedType() const
Returns a nonzero value if objects of this type require non-trivial work to clean up after...
Definition: Type.h:1152
llvm::Value * getCriticalRegionLock(StringRef CriticalName)
Returns corresponding lock object for the specified critical region name.
virtual void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, ArrayRef< llvm::Value *> CapturedVars, const Expr *IfCond)
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
unsigned getCustomDiagID(Level L, const char(&FormatString)[N])
Return an ID for a diagnostic with the specified format string and level.
Definition: Diagnostic.h:776
void emitThreadPrivateVarInit(CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc)
Emits initialization code for the threadprivate variables.
Complete object dtor.
Definition: ABI.h:36
Address CreateConstInBoundsGEP2_32(Address Addr, unsigned Idx0, unsigned Idx1, const llvm::DataLayout &DL, const llvm::Twine &Name="")
Definition: CGBuilder.h:248
JumpDest getJumpDestInCurrentScope(llvm::BasicBlock *Target)
The given basic block lies in the current EH scope, but may be a target of a potentially scope-crossi...
virtual void emitProcBindClause(CodeGenFunction &CGF, OpenMPProcBindClauseKind ProcBind, SourceLocation Loc)
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, int proc_bind) to generat...
virtual void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc)
Emits code for a taskyield directive.
void EmitAggregateAssign(LValue Dest, LValue Src, QualType EltTy)
Emit an aggregate assignment.
QualType getType() const
Definition: CGValue.h:264
CharUnits getNaturalTypeAlignment(QualType T, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, bool forPointeeType=false)
bool hasTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum) const
Return true if a target region entry with the provided information exists.
virtual void functionFinished(CodeGenFunction &CGF)
Cleans up references to the objects in finished function.
const Expr * getRefExpr(unsigned N) const
Returns the base declaration of the reduction item.
void FinishFunction(SourceLocation EndLoc=SourceLocation())
FinishFunction - Complete IR generation of the current function.
void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N) override
Required to resolve existing problems in the runtime.
Expr * getInitOrig()
Get Orig variable of the initializer.
Definition: DeclOpenMP.h:178
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition: Expr.cpp:215
void registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, CharUnits VarSize, OMPTargetGlobalVarEntryKind Flags, llvm::GlobalValue::LinkageTypes Linkage)
Register device global variable entry.
Struct with the values to be passed to the static runtime function.
ConstantAddress GetAddrOfConstantCString(const std::string &Str, const char *GlobalName=nullptr)
Returns a pointer to a character array containing the literal and a terminating &#39;\0&#39; character...
i32 captured_struct **param SharedsTy A type which contains references the shared variables *param Shareds Context with the list of shared variables from the p *TaskFunction *param Data Additional data for task generation like final list of privates etc *TaskResultTy emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds, const OMPTaskDataTy &Data)
llvm::Constant * GetAddrOfGlobal(GlobalDecl GD, ForDefinition_t IsForDefinition=NotForDefinition)
static void EmitOMPTargetTeamsDistributeDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeDirective &S)
Emit device code for the target teams distribute directive.
A basic class for pre|post-action for advanced codegen sequence for OpenMP region.
JumpDest getOMPCancelDestination(OpenMPDirectiveKind Kind)
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type, returning the result.
void emitOrderedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, SourceLocation Loc, bool IsThreads) override
Emit an ordered region.
void addUsedGlobal(llvm::GlobalValue *GV)
Add a global to a list to be added to the llvm.used metadata.
FunctionArgList - Type for representing both the decl and type of parameters to a function...
Definition: CGCall.h:356
i32 captured_struct **param SharedsTy A type which contains references the shared variables *param Shareds Context with the list of shared variables from the p *TaskFunction *param IfCond Not a nullptr if if clause was nullptr *otherwise *param Data Additional data for task generation like final list of privates etc *void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) override
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
static void emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, const llvm::APSInt &VLENVal, ArrayRef< ParamAttrTy > ParamAttrs, OMPDeclareSimdDeclAttr::BranchStateTy State)
CanQualType CharTy
Definition: ASTContext.h:1018
void setAction(PrePostActionTy &Action) const
CGFunctionInfo - Class to encapsulate the information about a function definition.
This class organizes the cross-function state that is used while generating LLVM code.
QualType withRestrict() const
Definition: Type.h:826
CGOpenMPRuntime & getOpenMPRuntime()
Return a reference to the configured OpenMP runtime.
static const VarDecl * getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE)
StructBuilder beginStruct(llvm::StructType *ty=nullptr)
OpenMPScheduleClauseModifier
OpenMP modifiers for &#39;schedule&#39; clause.
Definition: OpenMPKinds.h:67
LValue EmitOMPSharedLValue(const Expr *E)
Emits the lvalue for the expression with possibly captured variable.
llvm::GlobalValue * GetGlobalValue(StringRef Ref)
Dataflow Directional Tag Classes.
static ApplyDebugLocation CreateDefaultArtificial(CodeGenFunction &CGF, SourceLocation TemporaryLocation)
Apply TemporaryLocation if it is valid.
Definition: CGDebugInfo.h:722
static void emitForStaticInitCall(CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, llvm::Constant *ForStaticInitFunction, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, const CGOpenMPRuntime::StaticRTInput &Values)
Class provides a way to call simple version of codegen for OpenMP region, or an advanced with possibl...
LValue EmitLoadOfReferenceLValue(LValue RefLVal)
Definition: CGExpr.cpp:2313
static llvm::Value * emitReduceInitFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Emits reduction initializer function:
bool isValid() const
Return true if this is a valid SourceLocation object.
bool emitTargetFunctions(GlobalDecl GD) override
Emit the target regions enclosed in GD function definition or the function itself in case it is a val...
llvm::Value * emitTaskOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool Tied, unsigned &NumberOfParts) override
Emits outlined function for the OpenMP task directive D.
DeclContext - This is used only as base class of specific decl types that can act as declaration cont...
Definition: DeclBase.h:1262
static RValue getComplex(llvm::Value *V1, llvm::Value *V2)
Definition: CGValue.h:93
EvalResult is a struct with detailed info about an evaluated expression.
Definition: Expr.h:571
Address CreateStructGEP(Address Addr, unsigned Index, CharUnits Offset, const llvm::Twine &Name="")
Definition: CGBuilder.h:172
virtual bool emitTargetGlobal(GlobalDecl GD)
Emit the global GD if it is meaningful for the target.
virtual void emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond, OpenMPDirectiveKind CancelRegion)
Emit code for &#39;cancel&#39; construct.
std::vector< llvm::Triple > OMPTargetTriples
Triples of the OpenMP targets that the host code codegen should take into account in order to generat...
Definition: LangOptions.h:243
OpenMPScheduleClauseModifier M1
Definition: OpenMPKinds.h:143
llvm::Value * LB
Loop lower bound.
A basic class for pre|post-action for advanced codegen sequence for OpenMP region.
LValue getSharedLValue(unsigned N) const
Returns LValue for the reduction item.
const Expr * getInit() const
Definition: Decl.h:1220
llvm::Constant * getPointer() const
Definition: Address.h:84
i32 captured_struct **param SharedsTy A type which contains references the shared variables *param Shareds Context with the list of shared variables from the p *TaskFunction *param IfCond Not a nullptr if if clause was nullptr *otherwise *param Data Additional data for task generation like final list of privates etc *virtual void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data)
void initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, unsigned Order)
Initialize target region entry.
llvm::Function * createOffloadingBinaryDescriptorRegistration()
Creates and registers offloading binary descriptor for the current compilation unit.
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
Definition: CGBuilder.h:70
llvm::PointerIntPair< llvm::Value *, 1, bool > Priority
RTCancelKind
llvm::Constant * EmitNullConstant(QualType T)
Return the result of value-initializing the given type, i.e.
PresumedLoc getPresumedLoc(SourceLocation Loc, bool UseLineDirectives=true) const
Returns the "presumed" location of a SourceLocation specifies.
llvm::Value * UB
Loop upper bound.
llvm::Value * Chunk
Chunk size specified using &#39;schedule&#39; clause (nullptr if chunk was not specified) ...
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
Definition: CGBuilder.h:108
llvm::Module & getModule() const
llvm::Value * Chunk
Value of the chunk for the static_chunked scheduled loop.
Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...
Not really used in Fortran any more.
LValue MakeAddrLValue(Address Addr, QualType T, AlignmentSource Source=AlignmentSource::Type)
virtual void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
Emit outilined function for &#39;target&#39; directive.
llvm::StructType * ConvertRecordDeclType(const RecordDecl *TD)
ConvertRecordDeclType - Lay out a tagged decl type like struct or union.
virtual bool isDefaultLocationConstant() const
Check if the default location must be constant.
Expr * IgnoreParenImpCasts() LLVM_READONLY
IgnoreParenImpCasts - Ignore parentheses and implicit casts.
Definition: Expr.cpp:2693
virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc)
Emits address of the word in a memory where current thread id is stored.
void emitInitialization(CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, llvm::function_ref< bool(CodeGenFunction &)> DefaultInit)
Performs initialization of the private copy for the reduction item.
JumpDest ReturnBlock
ReturnBlock - Unified return block.
OffloadEntriesInfoManagerTy OffloadEntriesInfoManager
virtual bool isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const
Check if the specified ScheduleKind is static non-chunked.
bool capturesVariableByCopy() const
Determine whether this capture handles a variable by copy.
Definition: Stmt.h:3149
Class that represents a component of a mappable expression.
API for captured statement code generation.
virtual bool emitDeclareTargetVarDefinition(const VarDecl *VD, llvm::GlobalVariable *Addr, bool PerformInit)
Emit a code for initialization of declare target variable.
virtual void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values)
bool isOpenMPDistributeDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a distribute directive.
virtual StringRef getOutlinedHelperName() const
Get the function name of an outlined region.
static bool classof(const OMPClause *T)
ArraySubscriptExpr - [C99 6.5.2.1] Array Subscripting.
Definition: Expr.h:2312
virtual void emitOrderedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, SourceLocation Loc, bool IsThreads)
Emit an ordered region.
This file defines OpenMP AST classes for executable directives and clauses.
static FieldDecl * addFieldToRecordDecl(ASTContext &C, DeclContext *DC, QualType FieldTy)
CodeGenTypes & getTypes() const
StructBuilder beginStruct(llvm::StructType *structTy=nullptr)
Address CreateConstArrayGEP(Address Addr, uint64_t Index, CharUnits EltSize, const llvm::Twine &Name="")
Given addr = [n x T]* ...
Definition: CGBuilder.h:195
static void EmitOMPTargetTeamsDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDirective &S)
Emit device code for the target teams directive.
CleanupTy(PrePostActionTy *Action)
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition: Address.h:52
void initializeDeviceGlobalVarEntryInfo(StringRef Name, OMPTargetGlobalVarEntryKind Flags, unsigned Order)
Initialize device global variable entry.
static bool checkInitIsRequired(CodeGenFunction &CGF, ArrayRef< PrivateDataTy > Privates)
Check if duplication function is required for taskloops.
llvm::Value * EmitCheckedInBoundsGEP(llvm::Value *Ptr, ArrayRef< llvm::Value *> IdxList, bool SignedIndices, bool IsSubtraction, SourceLocation Loc, const Twine &Name="")
Same as IRBuilder::CreateInBoundsGEP, but additionally emits a check to detect undefined behavior whe...
static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, unsigned &DeviceID, unsigned &FileID, unsigned &LineNum)
Obtain information that uniquely identifies a target entry.
Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *ReductionsPtr, LValue SharedLVal) override
Get the address of void * type of the privatue copy of the reduction item specified by the SharedLVal...
llvm::AssertingVH< llvm::Instruction > AllocaInsertPt
AllocaInsertPoint - This is an instruction in the entry block before which we prefer to insert alloca...
OpenMPLocationFlags
Values for bit flags used in the ident_t to describe the fields.
virtual void emitInlinedDirective(CodeGenFunction &CGF, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool HasCancel=false)
Emit code for the directive that does not require outlining.
OpenMPScheduleClauseKind
OpenMP attributes for &#39;schedule&#39; clause.
Definition: OpenMPKinds.h:59
Expr * getNumIterations() const
Definition: StmtOpenMP.h:873
llvm::StringRef getName() const
Return the IR name of the pointer value.
Definition: Address.h:62
llvm::StructType * getLLVMType() const
Return the "complete object" LLVM type associated with this record.
Base for LValueReferenceType and RValueReferenceType.
Definition: Type.h:2673
bool hasSameType(QualType T1, QualType T2) const
Determine whether the given types T1 and T2 are equivalent.
Definition: ASTContext.h:2269
Entity that registers the offloading constants that were emitted so far.
void SetInternalFunctionAttributes(GlobalDecl GD, llvm::Function *F, const CGFunctionInfo &FI)
Set the attributes on the LLVM function for the given decl and function info.
StringRef getMangledName(GlobalDecl GD)
Internal linkage, which indicates that the entity can be referred to from within the translation unit...
Definition: Linkage.h:32
void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)
EmitBlock - Emit the given block.
Definition: CGStmt.cpp:443
void addDecl(Decl *D)
Add the declaration D into this context.
Definition: DeclBase.cpp:1507
void emitCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *Callee, ArrayRef< llvm::Value *> Args=llvm::None) const
Emits Callee function call with arguments Args with location Loc.
llvm::Constant * createDispatchNextFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_next_* runtime function for the specified size IVSize and sign IVSigned...
void getCaptureFields(llvm::DenseMap< const VarDecl *, FieldDecl *> &Captures, FieldDecl *&ThisCapture) const
For a closure type, retrieve the mapping from captured variables and this to the non-static data memb...
Definition: DeclCXX.cpp:1394
uint64_t getTypeSize(QualType T) const
Return the size of the specified (complete) type T, in bits.
Definition: ASTContext.h:2070
virtual void emitTaskgroupRegion(CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, SourceLocation Loc)
Emit a taskgroup region.
SourceManager & getSourceManager()
Definition: ASTContext.h:662
virtual llvm::Function * emitRegistrationFunction()
Creates the offloading descriptor in the event any target region was emitted in the current module an...
llvm::ConstantInt * getSize(CharUnits numChars)
Emit the given number of characters as a value of type size_t.
const Decl * CurFuncDecl
CurFuncDecl - Holds the Decl for the current outermost non-closure context.
llvm::iterator_range< specific_attr_iterator< T > > specific_attrs() const
Definition: DeclBase.h:513
virtual void adjustTargetSpecificDataForLambdas(CodeGenFunction &CGF, const OMPExecutableDirective &D) const
Adjust some parameters for the target-based directives, like addresses of the variables captured by r...
X
Add a minimal nested name specifier fixit hint to allow lookup of a tag name from an outer enclosing ...
Definition: SemaDecl.cpp:13954
CharUnits toCharUnitsFromBits(int64_t BitSize) const
Convert a size in bits to a size in characters.
Lower bound for default (unordered) versions.
void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, ArrayRef< llvm::Value *> CapturedVars, const Expr *IfCond) override
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc) override
Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams...
TranslationUnitDecl * getTranslationUnitDecl() const
Definition: ASTContext.h:1009
MemberExpr - [C99 6.5.2.3] Structure and Union Members.
Definition: Expr.h:2682
virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr *> Privates, ArrayRef< const Expr *> LHSExprs, ArrayRef< const Expr *> RHSExprs, ArrayRef< const Expr *> ReductionOps, ReductionOptionsTy Options)
Emit a code for reduction clause.
This represents &#39;nowait&#39; clause in the &#39;#pragma omp ...&#39; directive.
static bool isTrivial(ASTContext &Ctx, const Expr *E)
Checks if the expression is constant or does not have non-trivial function calls. ...
llvm::PointerIntPair< llvm::Value *, 1, bool > Schedule
Represents a C++ struct/union/class.
Definition: DeclCXX.h:300
void EmitBranch(llvm::BasicBlock *Block)
EmitBranch - Emit a branch to the specified basic block from the current insert block, taking care to avoid creation of branches from dummy blocks.
Definition: CGStmt.cpp:463
llvm::Function * CreateGlobalInitOrDestructFunction(llvm::FunctionType *ty, const Twine &name, const CGFunctionInfo &FI, SourceLocation Loc=SourceLocation(), bool TLS=false)
Definition: CGDeclCXX.cpp:324
bool isVoidType() const
Definition: Type.h:6544
llvm::Value * BasePointersArray
The array of base pointer passed to the runtime library.
static QualType getBaseOriginalType(const Expr *Base)
Return original type of the base expression for array section.
Definition: Expr.cpp:4257
static llvm::Value * emitReduceFiniFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Emits reduction finalizer function:
OpenMPOffloadingReservedDeviceIDs
llvm::Type * ConvertType(QualType T)
bool isTLSSupported() const
Whether the target supports thread-local storage.
Definition: TargetInfo.h:1137
void emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C) override
Emit code for doacross ordered directive with &#39;depend&#39; clause.
Privates[]
Gets the list of initial values for linear variables.
Definition: OpenMPClause.h:151
OpenMPMapClauseKind
OpenMP mapping kind for &#39;map&#39; clause.
Definition: OpenMPKinds.h:92
Qualifiers getQualifiers() const
Retrieve the set of qualifiers applied to this type.
Definition: Type.h:6099
Capturing by reference.
Definition: Lambda.h:38
static const Stmt * ignoreCompoundStmts(const Stmt *Body)
discard all CompoundStmts intervening between two constructs
LValue EmitLValue(const Expr *E)
EmitLValue - Emit code to compute a designator that specifies the location of the expression...
Definition: CGExpr.cpp:1236
Address ReturnValue
ReturnValue - The temporary alloca to hold the return value.
A helper class of ConstantInitBuilder, used for building constant struct initializers.
void popTerminate()
Pops a terminate handler off the stack.
Definition: CGCleanup.h:583
CharUnits getNonVirtualSize() const
getNonVirtualSize - Get the non-virtual size (in chars) of an object, which is the size of the object...
Definition: RecordLayout.h:203
QualType getPointerType(QualType T) const
Return the uniqued reference to the type for a pointer to the specified type.
OpenMPRTLFunction
void addAttr(Attr *A)
Definition: DeclBase.cpp:840
unsigned getVirtualBaseIndex(const CXXRecordDecl *base) const
Return the LLVM field index corresponding to the given virtual base.
virtual void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false)
Emit an implicit/explicit barrier for OpenMP threads.
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
Definition: Stmt.cpp:276
void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) override
Emit outilined function for &#39;target&#39; directive.
RValue EmitLoadOfLValue(LValue V, SourceLocation Loc)
EmitLoadOfLValue - Given an expression that represents a value lvalue, this method emits the address ...
Definition: CGExpr.cpp:1760
static llvm::Value * emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, const OMPExecutableDirective &D, QualType KmpTaskTWithPrivatesPtrQTy, const RecordDecl *KmpTaskTWithPrivatesQTyRD, const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, ArrayRef< PrivateDataTy > Privates, bool WithLastIter)
Emit task_dup function (for initialization of private/firstprivate/lastprivate vars and last_iter fla...
virtual void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, const DispatchRTInput &DispatchValues)
Call the appropriate runtime routine to initialize it before start of loop.
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition: Decl.h:276
static RecordDecl * createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, ArrayRef< PrivateDataTy > Privates)
CGCapturedStmtInfo * CapturedStmtInfo
llvm::Value * EmitScalarConversion(llvm::Value *Src, QualType SrcTy, QualType DstTy, SourceLocation Loc)
Emit a conversion from the specified type to the specified destination type, both of which are LLVM s...
CGCXXABI & getCXXABI() const
Expr * getCombinerIn()
Get In variable of the combiner.
Definition: DeclOpenMP.h:156
const VariableArrayType * getAsVariableArrayType(QualType T) const
Definition: ASTContext.h:2416
__DEVICE__ int max(int __a, int __b)
CanQualType IntTy
Definition: ASTContext.h:1025
decl_type * getMostRecentDecl()
Returns the most recent (re)declaration of this declaration.
Definition: Redeclarable.h:226
bool isEvaluatable(const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects) const
isEvaluatable - Call EvaluateAsRValue to see if this expression can be constant folded without side-e...
void EmitAggregateCopy(LValue Dest, LValue Src, QualType EltTy, AggValueSlot::Overlap_t MayOverlap, bool isVolatile=false)
EmitAggregateCopy - Emit an aggregate copy.
Definition: CGExprAgg.cpp:1823
capture_range captures()
Definition: Stmt.h:3240
A reference to a declared variable, function, enum, etc.
Definition: Expr.h:1041
QualType getIntPtrType() const
Return a type compatible with "intptr_t" (C99 7.18.1.4), as defined by the target.
static RValue get(llvm::Value *V)
Definition: CGValue.h:86
bool isUnion() const
Definition: Decl.h:3252
virtual void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values)
Call the appropriate runtime routine to initialize it before start of loop.
llvm::Value * emitTaskReductionInit(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr *> LHSExprs, ArrayRef< const Expr *> RHSExprs, const OMPTaskDataTy &Data) override
Emit a code for initialization of task reduction clause.
bool isPointerType() const
Definition: Type.h:6296
static void EmitOMPTargetDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetDirective &S)
Emit device code for the target directive.
void EmitBranchThroughCleanup(JumpDest Dest)
EmitBranchThroughCleanup - Emit a branch from the current insert block through the normal cleanup han...
Definition: CGCleanup.cpp:1050
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
static ApplyDebugLocation CreateEmpty(CodeGenFunction &CGF)
Set the IRBuilder to not attach debug locations.
Definition: CGDebugInfo.h:732
ParamKindTy
Kind of parameter in a function with &#39;declare simd&#39; directive.
static void EmitOMPTargetParallelForDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelForDirective &S)
Emit device code for the target parallel for directive.
QualType KmpDimTy
struct kmp_dim { // loop bounds info casted to kmp_int64 kmp_int64 lo; // lower kmp_int64 up; // uppe...
void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind) override
Call the appropriate runtime routine to notify that we finished all the work with current loop...
QualType getType() const
Definition: Decl.h:648
An l-value expression is a reference to an object with independent storage.
Definition: Specifiers.h:114
static RValue getAggregate(Address addr, bool isVolatile=false)
Definition: CGValue.h:107
bool isOpenMPLoopDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a directive with an associated loop construct.
LValue - This represents an lvalue references.
Definition: CGValue.h:167
virtual unsigned getDefaultLocationReserved2Flags() const
Returns additional flags that can be stored in reserved_2 field of the default location.
Information for lazily generating a cleanup.
Definition: EHScopeStack.h:147
QualType SavedKmpTaskTQTy
Saved kmp_task_t for task directive.
virtual Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *ReductionsPtr, LValue SharedLVal)
Get the address of void * type of the privatue copy of the reduction item specified by the SharedLVal...
Represents a C array with a specified size that is not an integer-constant-expression.
Definition: Type.h:2971
CanQualType BoolTy
Definition: ASTContext.h:1017
void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Value *OutlinedFn, ArrayRef< llvm::Value *> CapturedVars) override
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
APSInt & getInt()
Definition: APValue.h:252
const LangOptions & getLangOpts() const
llvm::Constant * createRuntimeFunction(unsigned Function)
Returns specified OpenMP runtime function.
llvm::StringSet DeclareTargetWithDefinition
Set of declare target variables with the generated initializer.
static FieldDecl * Create(const ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, Expr *BW, bool Mutable, InClassInitStyle InitStyle)
Definition: Decl.cpp:3762
Address CreatePointerBitCastOrAddrSpaceCast(Address Addr, llvm::Type *Ty, const llvm::Twine &Name="")
Definition: CGBuilder.h:164
llvm::Value * emitArrayLength(const ArrayType *arrayType, QualType &baseType, Address &addr)
emitArrayLength - Compute the length of an array, even if it&#39;s a VLA, and drill down to the base elem...
Expr * getCombinerOut()
Get Out variable of the combiner.
Definition: DeclOpenMP.h:159
unsigned getNumParams() const
Return the number of parameters this function must have based on its FunctionType.
Definition: Decl.cpp:3055
const CGRecordLayout & getCGRecordLayout(const RecordDecl *)
getCGRecordLayout - Return record layout info for the given record decl.
bool hasInit() const
Definition: Decl.cpp:2164
No in-class initializer.
Definition: Specifiers.h:230
llvm::Value * getPointer() const
Definition: CGValue.h:323
base_class_range vbases()
Definition: DeclCXX.h:840
This class handles loading and caching of source files into memory.
void emitCancellationPointCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind CancelRegion) override
Emit code for &#39;cancellation point&#39; construct.
A helper class of ConstantInitBuilder, used for building constant array initializers.
void EmitGlobal(GlobalDecl D)
Emit code for a single global function or var decl.
virtual void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc)
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)...
bool capturesVariableArrayType() const
Determine whether this capture handles a variable-length array type.
Definition: Stmt.h:3155
bool isLocalVarDeclOrParm() const
Similar to isLocalVarDecl but also includes parameters.
Definition: Decl.h:1114
Attr - This represents one attribute.
Definition: Attr.h:44
SmallVector< const Expr *, 4 > FirstprivateInits
SourceLocation getLocation() const
Definition: DeclBase.h:418
This represents clause &#39;use_device_ptr&#39; in the &#39;#pragma omp ...&#39; directives.
static void EmitOMPTargetTeamsDistributeSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeSimdDirective &S)
Emit device code for the target teams distribute simd directive.
void EmitNullInitialization(Address DestPtr, QualType Ty)
EmitNullInitialization - Generate code to set a value of the given type to null, If the type contains...
bool isExternallyVisible() const
Definition: Decl.h:380
llvm::Constant * getOrCreateThreadPrivateCache(const VarDecl *VD)
If the specified mangled name is not in the module, create and return threadprivate cache object...
llvm::Value * SizesArray
The array of sizes passed to the runtime library.
virtual void emitTargetNumIterationsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *Device, const llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> &SizeEmitter)
Emit code that pushes the trip count of loops associated with constructs &#39;target teams distribute&#39; an...
QualType getIntTypeForBitwidth(unsigned DestWidth, unsigned Signed) const
getIntTypeForBitwidth - sets integer QualTy according to specified details: bitwidth, signed/unsigned.
static OMPLinearClause * Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc, OpenMPLinearClauseKind Modifier, SourceLocation ModifierLoc, SourceLocation ColonLoc, SourceLocation EndLoc, ArrayRef< Expr *> VL, ArrayRef< Expr *> PL, ArrayRef< Expr *> IL, Expr *Step, Expr *CalcStep, Stmt *PreInit, Expr *PostUpdate)
Creates clause with a list of variables VL and a linear step Step.
bool Privatize()
Privatizes local variables previously registered as private.
static void EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeParallelForSimdDirective &S)
Emit device code for the target teams distribute parallel for simd directive.
CanQualType getSizeType() const
Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.
virtual void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc)
Emit code for &#39;taskwait&#39; directive.
unsigned getLLVMFieldNo(const FieldDecl *FD) const
Return llvm::StructType element number that corresponds to the field FD.
llvm::FunctionType * GetFunctionType(const CGFunctionInfo &Info)
GetFunctionType - Get the LLVM function type for.
Definition: CGCall.cpp:1550
const llvm::Triple & getTriple() const