clang  6.0.0
CGOpenMPRuntime.cpp
Go to the documentation of this file.
1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This provides a class for OpenMP runtime code generation.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGOpenMPRuntime.h"
17 #include "CodeGenFunction.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/StmtOpenMP.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/BitmaskEnum.h"
23 #include "llvm/Bitcode/BitcodeReader.h"
24 #include "llvm/IR/CallSite.h"
25 #include "llvm/IR/DerivedTypes.h"
26 #include "llvm/IR/GlobalValue.h"
27 #include "llvm/IR/Value.h"
28 #include "llvm/Support/Format.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include <cassert>
31 
32 using namespace clang;
33 using namespace CodeGen;
34 
35 namespace {
36 /// \brief Base class for handling code generation inside OpenMP regions.
37 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
38 public:
39  /// \brief Kinds of OpenMP regions used in codegen.
40  enum CGOpenMPRegionKind {
41  /// \brief Region with outlined function for standalone 'parallel'
42  /// directive.
43  ParallelOutlinedRegion,
44  /// \brief Region with outlined function for standalone 'task' directive.
45  TaskOutlinedRegion,
46  /// \brief Region for constructs that do not require function outlining,
47  /// like 'for', 'sections', 'atomic' etc. directives.
48  InlinedRegion,
49  /// \brief Region with outlined function for standalone 'target' directive.
50  TargetRegion,
51  };
52 
53  CGOpenMPRegionInfo(const CapturedStmt &CS,
54  const CGOpenMPRegionKind RegionKind,
56  bool HasCancel)
57  : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
58  CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
59 
60  CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
62  bool HasCancel)
63  : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
64  Kind(Kind), HasCancel(HasCancel) {}
65 
66  /// \brief Get a variable or parameter for storing global thread id
67  /// inside OpenMP construct.
68  virtual const VarDecl *getThreadIDVariable() const = 0;
69 
70  /// \brief Emit the captured statement body.
71  void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
72 
73  /// \brief Get an LValue for the current ThreadID variable.
74  /// \return LValue for thread id variable. This LValue always has type int32*.
75  virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
76 
77  virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
78 
79  CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
80 
81  OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
82 
83  bool hasCancel() const { return HasCancel; }
84 
85  static bool classof(const CGCapturedStmtInfo *Info) {
86  return Info->getKind() == CR_OpenMP;
87  }
88 
89  ~CGOpenMPRegionInfo() override = default;
90 
91 protected:
92  CGOpenMPRegionKind RegionKind;
93  RegionCodeGenTy CodeGen;
95  bool HasCancel;
96 };
97 
98 /// \brief API for captured statement code generation in OpenMP constructs.
99 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
100 public:
101  CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
102  const RegionCodeGenTy &CodeGen,
103  OpenMPDirectiveKind Kind, bool HasCancel,
104  StringRef HelperName)
105  : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
106  HasCancel),
107  ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
108  assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
109  }
110 
111  /// \brief Get a variable or parameter for storing global thread id
112  /// inside OpenMP construct.
113  const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
114 
115  /// \brief Get the name of the capture helper.
116  StringRef getHelperName() const override { return HelperName; }
117 
118  static bool classof(const CGCapturedStmtInfo *Info) {
119  return CGOpenMPRegionInfo::classof(Info) &&
120  cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
121  ParallelOutlinedRegion;
122  }
123 
124 private:
125  /// \brief A variable or parameter storing global thread id for OpenMP
126  /// constructs.
127  const VarDecl *ThreadIDVar;
128  StringRef HelperName;
129 };
130 
131 /// \brief API for captured statement code generation in OpenMP constructs.
132 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
133 public:
134  class UntiedTaskActionTy final : public PrePostActionTy {
135  bool Untied;
136  const VarDecl *PartIDVar;
137  const RegionCodeGenTy UntiedCodeGen;
138  llvm::SwitchInst *UntiedSwitch = nullptr;
139 
140  public:
141  UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
142  const RegionCodeGenTy &UntiedCodeGen)
143  : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
144  void Enter(CodeGenFunction &CGF) override {
145  if (Untied) {
146  // Emit task switching point.
147  auto PartIdLVal = CGF.EmitLoadOfPointerLValue(
148  CGF.GetAddrOfLocalVar(PartIDVar),
149  PartIDVar->getType()->castAs<PointerType>());
150  auto *Res = CGF.EmitLoadOfScalar(PartIdLVal, SourceLocation());
151  auto *DoneBB = CGF.createBasicBlock(".untied.done.");
152  UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
153  CGF.EmitBlock(DoneBB);
155  CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
156  UntiedSwitch->addCase(CGF.Builder.getInt32(0),
157  CGF.Builder.GetInsertBlock());
158  emitUntiedSwitch(CGF);
159  }
160  }
161  void emitUntiedSwitch(CodeGenFunction &CGF) const {
162  if (Untied) {
163  auto PartIdLVal = CGF.EmitLoadOfPointerLValue(
164  CGF.GetAddrOfLocalVar(PartIDVar),
165  PartIDVar->getType()->castAs<PointerType>());
166  CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
167  PartIdLVal);
168  UntiedCodeGen(CGF);
169  CodeGenFunction::JumpDest CurPoint =
170  CGF.getJumpDestInCurrentScope(".untied.next.");
172  CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
173  UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
174  CGF.Builder.GetInsertBlock());
175  CGF.EmitBranchThroughCleanup(CurPoint);
176  CGF.EmitBlock(CurPoint.getBlock());
177  }
178  }
179  unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
180  };
181  CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
182  const VarDecl *ThreadIDVar,
183  const RegionCodeGenTy &CodeGen,
184  OpenMPDirectiveKind Kind, bool HasCancel,
185  const UntiedTaskActionTy &Action)
186  : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
187  ThreadIDVar(ThreadIDVar), Action(Action) {
188  assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
189  }
190 
191  /// \brief Get a variable or parameter for storing global thread id
192  /// inside OpenMP construct.
193  const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
194 
195  /// \brief Get an LValue for the current ThreadID variable.
196  LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
197 
198  /// \brief Get the name of the capture helper.
199  StringRef getHelperName() const override { return ".omp_outlined."; }
200 
201  void emitUntiedSwitch(CodeGenFunction &CGF) override {
202  Action.emitUntiedSwitch(CGF);
203  }
204 
205  static bool classof(const CGCapturedStmtInfo *Info) {
206  return CGOpenMPRegionInfo::classof(Info) &&
207  cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
208  TaskOutlinedRegion;
209  }
210 
211 private:
212  /// \brief A variable or parameter storing global thread id for OpenMP
213  /// constructs.
214  const VarDecl *ThreadIDVar;
215  /// Action for emitting code for untied tasks.
216  const UntiedTaskActionTy &Action;
217 };
218 
219 /// \brief API for inlined captured statement code generation in OpenMP
220 /// constructs.
221 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
222 public:
223  CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
224  const RegionCodeGenTy &CodeGen,
225  OpenMPDirectiveKind Kind, bool HasCancel)
226  : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
227  OldCSI(OldCSI),
228  OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
229 
230  // \brief Retrieve the value of the context parameter.
231  llvm::Value *getContextValue() const override {
232  if (OuterRegionInfo)
233  return OuterRegionInfo->getContextValue();
234  llvm_unreachable("No context value for inlined OpenMP region");
235  }
236 
237  void setContextValue(llvm::Value *V) override {
238  if (OuterRegionInfo) {
239  OuterRegionInfo->setContextValue(V);
240  return;
241  }
242  llvm_unreachable("No context value for inlined OpenMP region");
243  }
244 
245  /// \brief Lookup the captured field decl for a variable.
246  const FieldDecl *lookup(const VarDecl *VD) const override {
247  if (OuterRegionInfo)
248  return OuterRegionInfo->lookup(VD);
249  // If there is no outer outlined region,no need to lookup in a list of
250  // captured variables, we can use the original one.
251  return nullptr;
252  }
253 
254  FieldDecl *getThisFieldDecl() const override {
255  if (OuterRegionInfo)
256  return OuterRegionInfo->getThisFieldDecl();
257  return nullptr;
258  }
259 
260  /// \brief Get a variable or parameter for storing global thread id
261  /// inside OpenMP construct.
262  const VarDecl *getThreadIDVariable() const override {
263  if (OuterRegionInfo)
264  return OuterRegionInfo->getThreadIDVariable();
265  return nullptr;
266  }
267 
268  /// \brief Get an LValue for the current ThreadID variable.
269  LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
270  if (OuterRegionInfo)
271  return OuterRegionInfo->getThreadIDVariableLValue(CGF);
272  llvm_unreachable("No LValue for inlined OpenMP construct");
273  }
274 
275  /// \brief Get the name of the capture helper.
276  StringRef getHelperName() const override {
277  if (auto *OuterRegionInfo = getOldCSI())
278  return OuterRegionInfo->getHelperName();
279  llvm_unreachable("No helper name for inlined OpenMP construct");
280  }
281 
282  void emitUntiedSwitch(CodeGenFunction &CGF) override {
283  if (OuterRegionInfo)
284  OuterRegionInfo->emitUntiedSwitch(CGF);
285  }
286 
287  CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
288 
289  static bool classof(const CGCapturedStmtInfo *Info) {
290  return CGOpenMPRegionInfo::classof(Info) &&
291  cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
292  }
293 
294  ~CGOpenMPInlinedRegionInfo() override = default;
295 
296 private:
297  /// \brief CodeGen info about outer OpenMP region.
299  CGOpenMPRegionInfo *OuterRegionInfo;
300 };
301 
302 /// \brief API for captured statement code generation in OpenMP target
303 /// constructs. For this captures, implicit parameters are used instead of the
304 /// captured fields. The name of the target region has to be unique in a given
305 /// application so it is provided by the client, because only the client has
306 /// the information to generate that.
307 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
308 public:
309  CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
310  const RegionCodeGenTy &CodeGen, StringRef HelperName)
311  : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
312  /*HasCancel=*/false),
313  HelperName(HelperName) {}
314 
315  /// \brief This is unused for target regions because each starts executing
316  /// with a single thread.
317  const VarDecl *getThreadIDVariable() const override { return nullptr; }
318 
319  /// \brief Get the name of the capture helper.
320  StringRef getHelperName() const override { return HelperName; }
321 
322  static bool classof(const CGCapturedStmtInfo *Info) {
323  return CGOpenMPRegionInfo::classof(Info) &&
324  cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
325  }
326 
327 private:
328  StringRef HelperName;
329 };
330 
331 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
332  llvm_unreachable("No codegen for expressions");
333 }
334 /// \brief API for generation of expressions captured in a innermost OpenMP
335 /// region.
336 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
337 public:
338  CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
339  : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
340  OMPD_unknown,
341  /*HasCancel=*/false),
342  PrivScope(CGF) {
343  // Make sure the globals captured in the provided statement are local by
344  // using the privatization logic. We assume the same variable is not
345  // captured more than once.
346  for (auto &C : CS.captures()) {
347  if (!C.capturesVariable() && !C.capturesVariableByCopy())
348  continue;
349 
350  const VarDecl *VD = C.getCapturedVar();
351  if (VD->isLocalVarDeclOrParm())
352  continue;
353 
354  DeclRefExpr DRE(const_cast<VarDecl *>(VD),
355  /*RefersToEnclosingVariableOrCapture=*/false,
357  SourceLocation());
358  PrivScope.addPrivate(VD, [&CGF, &DRE]() -> Address {
359  return CGF.EmitLValue(&DRE).getAddress();
360  });
361  }
362  (void)PrivScope.Privatize();
363  }
364 
365  /// \brief Lookup the captured field decl for a variable.
366  const FieldDecl *lookup(const VarDecl *VD) const override {
367  if (auto *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
368  return FD;
369  return nullptr;
370  }
371 
372  /// \brief Emit the captured statement body.
373  void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
374  llvm_unreachable("No body for expressions");
375  }
376 
377  /// \brief Get a variable or parameter for storing global thread id
378  /// inside OpenMP construct.
379  const VarDecl *getThreadIDVariable() const override {
380  llvm_unreachable("No thread id for expressions");
381  }
382 
383  /// \brief Get the name of the capture helper.
384  StringRef getHelperName() const override {
385  llvm_unreachable("No helper name for expressions");
386  }
387 
388  static bool classof(const CGCapturedStmtInfo *Info) { return false; }
389 
390 private:
391  /// Private scope to capture global variables.
393 };
394 
395 /// \brief RAII for emitting code of OpenMP constructs.
396 class InlinedOpenMPRegionRAII {
397  CodeGenFunction &CGF;
398  llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
399  FieldDecl *LambdaThisCaptureField = nullptr;
400  const CodeGen::CGBlockInfo *BlockInfo = nullptr;
401 
402 public:
403  /// \brief Constructs region for combined constructs.
404  /// \param CodeGen Code generation sequence for combined directives. Includes
405  /// a list of functions used for code generation of implicitly inlined
406  /// regions.
407  InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
408  OpenMPDirectiveKind Kind, bool HasCancel)
409  : CGF(CGF) {
410  // Start emission for the construct.
411  CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
412  CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
413  std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
414  LambdaThisCaptureField = CGF.LambdaThisCaptureField;
415  CGF.LambdaThisCaptureField = nullptr;
416  BlockInfo = CGF.BlockInfo;
417  CGF.BlockInfo = nullptr;
418  }
419 
420  ~InlinedOpenMPRegionRAII() {
421  // Restore original CapturedStmtInfo only if we're done with code emission.
422  auto *OldCSI =
423  cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
424  delete CGF.CapturedStmtInfo;
425  CGF.CapturedStmtInfo = OldCSI;
426  std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
427  CGF.LambdaThisCaptureField = LambdaThisCaptureField;
428  CGF.BlockInfo = BlockInfo;
429  }
430 };
431 
432 /// \brief Values for bit flags used in the ident_t to describe the fields.
433 /// All enumeric elements are named and described in accordance with the code
434 /// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
435 enum OpenMPLocationFlags : unsigned {
436  /// \brief Use trampoline for internal microtask.
437  OMP_IDENT_IMD = 0x01,
438  /// \brief Use c-style ident structure.
439  OMP_IDENT_KMPC = 0x02,
440  /// \brief Atomic reduction option for kmpc_reduce.
441  OMP_ATOMIC_REDUCE = 0x10,
442  /// \brief Explicit 'barrier' directive.
443  OMP_IDENT_BARRIER_EXPL = 0x20,
444  /// \brief Implicit barrier in code.
445  OMP_IDENT_BARRIER_IMPL = 0x40,
446  /// \brief Implicit barrier in 'for' directive.
447  OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
448  /// \brief Implicit barrier in 'sections' directive.
449  OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
450  /// \brief Implicit barrier in 'single' directive.
451  OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
452  /// Call of __kmp_for_static_init for static loop.
453  OMP_IDENT_WORK_LOOP = 0x200,
454  /// Call of __kmp_for_static_init for sections.
455  OMP_IDENT_WORK_SECTIONS = 0x400,
456  /// Call of __kmp_for_static_init for distribute.
457  OMP_IDENT_WORK_DISTRIBUTE = 0x800,
458  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
459 };
460 
461 /// \brief Describes ident structure that describes a source location.
462 /// All descriptions are taken from
463 /// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
464 /// Original structure:
465 /// typedef struct ident {
466 /// kmp_int32 reserved_1; /**< might be used in Fortran;
467 /// see above */
468 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
469 /// KMP_IDENT_KMPC identifies this union
470 /// member */
471 /// kmp_int32 reserved_2; /**< not really used in Fortran any more;
472 /// see above */
473 ///#if USE_ITT_BUILD
474 /// /* but currently used for storing
475 /// region-specific ITT */
476 /// /* contextual information. */
477 ///#endif /* USE_ITT_BUILD */
478 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
479 /// C++ */
480 /// char const *psource; /**< String describing the source location.
481 /// The string is composed of semi-colon separated
482 // fields which describe the source file,
483 /// the function and a pair of line numbers that
484 /// delimit the construct.
485 /// */
486 /// } ident_t;
488  /// \brief might be used in Fortran
490  /// \brief OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
492  /// \brief Not really used in Fortran any more
494  /// \brief Source[4] in Fortran, do not use for C++
496  /// \brief String describing the source location. The string is composed of
497  /// semi-colon separated fields which describe the source file, the function
498  /// and a pair of line numbers that delimit the construct.
500 };
501 
502 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from
503 /// the enum sched_type in kmp.h).
505  /// \brief Lower bound for default (unordered) versions.
513  /// static with chunk adjustment (e.g., simd)
515  /// \brief Lower bound for 'ordered' versions.
524  /// \brief dist_schedule types
527  /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
528  /// Set if the monotonic schedule modifier was present.
530  /// Set if the nonmonotonic schedule modifier was present.
532 };
533 
535  /// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
536  /// kmpc_micro microtask, ...);
538  /// \brief Call to void *__kmpc_threadprivate_cached(ident_t *loc,
539  /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
541  /// \brief Call to void __kmpc_threadprivate_register( ident_t *,
542  /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
544  // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
546  // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
547  // kmp_critical_name *crit);
549  // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
550  // global_tid, kmp_critical_name *crit, uintptr_t hint);
552  // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
553  // kmp_critical_name *crit);
555  // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
556  // global_tid);
558  // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
560  // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
562  // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
563  // global_tid);
565  // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
566  // global_tid);
568  // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
569  // kmp_int32 num_threads);
571  // Call to void __kmpc_flush(ident_t *loc);
573  // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
575  // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
577  // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
578  // int end_part);
580  // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
582  // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
584  // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
585  // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
586  // kmp_routine_entry_t *task_entry);
588  // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
589  // new_task);
591  // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
592  // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
593  // kmp_int32 didit);
595  // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
596  // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
597  // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
599  // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
600  // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
601  // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
602  // *lck);
604  // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
605  // kmp_critical_name *lck);
607  // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
608  // kmp_critical_name *lck);
610  // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
611  // kmp_task_t * new_task);
613  // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
614  // kmp_task_t * new_task);
616  // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
618  // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
620  // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
621  // global_tid);
623  // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
625  // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
627  // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
628  // int proc_bind);
630  // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
631  // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
632  // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
634  // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
635  // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
636  // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
638  // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
639  // global_tid, kmp_int32 cncl_kind);
641  // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
642  // kmp_int32 cncl_kind);
644  // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
645  // kmp_int32 num_teams, kmp_int32 thread_limit);
647  // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
648  // microtask, ...);
650  // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
651  // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
652  // sched, kmp_uint64 grainsize, void *task_dup);
654  // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
655  // num_dims, struct kmp_dim *dims);
657  // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
659  // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
660  // *vec);
662  // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
663  // *vec);
665  // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void
666  // *data);
668  // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
669  // *d);
671 
672  //
673  // Offloading related calls
674  //
675  // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
676  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
677  // *arg_types);
679  // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
680  // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
681  // *arg_types);
683  // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
684  // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
685  // *arg_types, int32_t num_teams, int32_t thread_limit);
687  // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void
688  // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t
689  // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
691  // Call to void __tgt_register_lib(__tgt_bin_desc *desc);
693  // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
695  // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
696  // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
698  // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
699  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
700  // *arg_types);
702  // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
703  // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
705  // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t
706  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
707  // *arg_types);
709  // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
710  // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
712  // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t
713  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
714  // *arg_types);
716 };
717 
718 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
719 /// region.
720 class CleanupTy final : public EHScopeStack::Cleanup {
721  PrePostActionTy *Action;
722 
723 public:
724  explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
725  void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
726  if (!CGF.HaveInsertPoint())
727  return;
728  Action->Exit(CGF);
729  }
730 };
731 
732 } // anonymous namespace
733 
736  if (PrePostAction) {
737  CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
738  Callback(CodeGen, CGF, *PrePostAction);
739  } else {
740  PrePostActionTy Action;
741  Callback(CodeGen, CGF, Action);
742  }
743 }
744 
745 /// Check if the combiner is a call to UDR combiner and if it is so return the
746 /// UDR decl used for reduction.
747 static const OMPDeclareReductionDecl *
748 getReductionInit(const Expr *ReductionOp) {
749  if (auto *CE = dyn_cast<CallExpr>(ReductionOp))
750  if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
751  if (auto *DRE =
752  dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
753  if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
754  return DRD;
755  return nullptr;
756 }
757 
759  const OMPDeclareReductionDecl *DRD,
760  const Expr *InitOp,
761  Address Private, Address Original,
762  QualType Ty) {
763  if (DRD->getInitializer()) {
764  std::pair<llvm::Function *, llvm::Function *> Reduction =
765  CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
766  auto *CE = cast<CallExpr>(InitOp);
767  auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
768  const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
769  const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
770  auto *LHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
771  auto *RHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
772  CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
773  PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
774  [=]() -> Address { return Private; });
775  PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
776  [=]() -> Address { return Original; });
777  (void)PrivateScope.Privatize();
778  RValue Func = RValue::get(Reduction.second);
779  CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
780  CGF.EmitIgnoredExpr(InitOp);
781  } else {
782  llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
783  auto *GV = new llvm::GlobalVariable(
784  CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
785  llvm::GlobalValue::PrivateLinkage, Init, ".init");
786  LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
787  RValue InitRVal;
788  switch (CGF.getEvaluationKind(Ty)) {
789  case TEK_Scalar:
790  InitRVal = CGF.EmitLoadOfLValue(LV, SourceLocation());
791  break;
792  case TEK_Complex:
793  InitRVal =
795  break;
796  case TEK_Aggregate:
797  InitRVal = RValue::getAggregate(LV.getAddress());
798  break;
799  }
801  CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
802  CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
803  /*IsInitializer=*/false);
804  }
805 }
806 
807 /// \brief Emit initialization of arrays of complex types.
808 /// \param DestAddr Address of the array.
809 /// \param Type Type of array.
810 /// \param Init Initial expression of array.
811 /// \param SrcAddr Address of the original array.
812 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
813  QualType Type, bool EmitDeclareReductionInit,
814  const Expr *Init,
815  const OMPDeclareReductionDecl *DRD,
816  Address SrcAddr = Address::invalid()) {
817  // Perform element-by-element initialization.
818  QualType ElementTy;
819 
820  // Drill down to the base element type on both arrays.
821  auto ArrayTy = Type->getAsArrayTypeUnsafe();
822  auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
823  DestAddr =
824  CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
825  if (DRD)
826  SrcAddr =
827  CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
828 
829  llvm::Value *SrcBegin = nullptr;
830  if (DRD)
831  SrcBegin = SrcAddr.getPointer();
832  auto DestBegin = DestAddr.getPointer();
833  // Cast from pointer to array type to pointer to single element.
834  auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
835  // The basic structure here is a while-do loop.
836  auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
837  auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
838  auto IsEmpty =
839  CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
840  CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
841 
842  // Enter the loop body, making that address the current address.
843  auto EntryBB = CGF.Builder.GetInsertBlock();
844  CGF.EmitBlock(BodyBB);
845 
846  CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
847 
848  llvm::PHINode *SrcElementPHI = nullptr;
849  Address SrcElementCurrent = Address::invalid();
850  if (DRD) {
851  SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
852  "omp.arraycpy.srcElementPast");
853  SrcElementPHI->addIncoming(SrcBegin, EntryBB);
854  SrcElementCurrent =
855  Address(SrcElementPHI,
856  SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
857  }
858  llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
859  DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
860  DestElementPHI->addIncoming(DestBegin, EntryBB);
861  Address DestElementCurrent =
862  Address(DestElementPHI,
863  DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
864 
865  // Emit copy.
866  {
867  CodeGenFunction::RunCleanupsScope InitScope(CGF);
868  if (EmitDeclareReductionInit) {
869  emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
870  SrcElementCurrent, ElementTy);
871  } else
872  CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
873  /*IsInitializer=*/false);
874  }
875 
876  if (DRD) {
877  // Shift the address forward by one element.
878  auto SrcElementNext = CGF.Builder.CreateConstGEP1_32(
879  SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
880  SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
881  }
882 
883  // Shift the address forward by one element.
884  auto DestElementNext = CGF.Builder.CreateConstGEP1_32(
885  DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
886  // Check whether we've reached the end.
887  auto Done =
888  CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
889  CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
890  DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
891 
892  // Done.
893  CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
894 }
895 
896 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
897  return CGF.EmitOMPSharedLValue(E);
898 }
899 
900 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
901  const Expr *E) {
902  if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
903  return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
904  return LValue();
905 }
906 
907 void ReductionCodeGen::emitAggregateInitialization(
908  CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
909  const OMPDeclareReductionDecl *DRD) {
910  // Emit VarDecl with copy init for arrays.
911  // Get the address of the original variable captured in current
912  // captured region.
913  auto *PrivateVD =
914  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
915  bool EmitDeclareReductionInit =
916  DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
917  EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
918  EmitDeclareReductionInit,
919  EmitDeclareReductionInit ? ClausesData[N].ReductionOp
920  : PrivateVD->getInit(),
921  DRD, SharedLVal.getAddress());
922 }
923 
926  ArrayRef<const Expr *> ReductionOps) {
927  ClausesData.reserve(Shareds.size());
928  SharedAddresses.reserve(Shareds.size());
929  Sizes.reserve(Shareds.size());
930  BaseDecls.reserve(Shareds.size());
931  auto IPriv = Privates.begin();
932  auto IRed = ReductionOps.begin();
933  for (const auto *Ref : Shareds) {
934  ClausesData.emplace_back(Ref, *IPriv, *IRed);
935  std::advance(IPriv, 1);
936  std::advance(IRed, 1);
937  }
938 }
939 
940 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {
941  assert(SharedAddresses.size() == N &&
942  "Number of generated lvalues must be exactly N.");
943  LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
944  LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
945  SharedAddresses.emplace_back(First, Second);
946 }
947 
949  auto *PrivateVD =
950  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
951  QualType PrivateType = PrivateVD->getType();
952  bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
953  if (!PrivateType->isVariablyModifiedType()) {
954  Sizes.emplace_back(
955  CGF.getTypeSize(
956  SharedAddresses[N].first.getType().getNonReferenceType()),
957  nullptr);
958  return;
959  }
960  llvm::Value *Size;
961  llvm::Value *SizeInChars;
962  llvm::Type *ElemType =
963  cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType())
964  ->getElementType();
965  auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
966  if (AsArraySection) {
967  Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(),
968  SharedAddresses[N].first.getPointer());
969  Size = CGF.Builder.CreateNUWAdd(
970  Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
971  SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
972  } else {
973  SizeInChars = CGF.getTypeSize(
974  SharedAddresses[N].first.getType().getNonReferenceType());
975  Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
976  }
977  Sizes.emplace_back(SizeInChars, Size);
979  CGF,
980  cast<OpaqueValueExpr>(
981  CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
982  RValue::get(Size));
983  CGF.EmitVariablyModifiedType(PrivateType);
984 }
985 
987  llvm::Value *Size) {
988  auto *PrivateVD =
989  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
990  QualType PrivateType = PrivateVD->getType();
991  if (!PrivateType->isVariablyModifiedType()) {
992  assert(!Size && !Sizes[N].second &&
993  "Size should be nullptr for non-variably modified reduction "
994  "items.");
995  return;
996  }
998  CGF,
999  cast<OpaqueValueExpr>(
1000  CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1001  RValue::get(Size));
1002  CGF.EmitVariablyModifiedType(PrivateType);
1003 }
1004 
1006  CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
1007  llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
1008  assert(SharedAddresses.size() > N && "No variable was generated");
1009  auto *PrivateVD =
1010  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1011  auto *DRD = getReductionInit(ClausesData[N].ReductionOp);
1012  QualType PrivateType = PrivateVD->getType();
1013  PrivateAddr = CGF.Builder.CreateElementBitCast(
1014  PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1015  QualType SharedType = SharedAddresses[N].first.getType();
1016  SharedLVal = CGF.MakeAddrLValue(
1017  CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(),
1018  CGF.ConvertTypeForMem(SharedType)),
1019  SharedType, SharedAddresses[N].first.getBaseInfo(),
1020  CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
1021  if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
1022  emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
1023  } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
1024  emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
1025  PrivateAddr, SharedLVal.getAddress(),
1026  SharedLVal.getType());
1027  } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
1028  !CGF.isTrivialInitializer(PrivateVD->getInit())) {
1029  CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
1030  PrivateVD->getType().getQualifiers(),
1031  /*IsInitializer=*/false);
1032  }
1033 }
1034 
1036  auto *PrivateVD =
1037  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1038  QualType PrivateType = PrivateVD->getType();
1039  QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1040  return DTorKind != QualType::DK_none;
1041 }
1042 
1044  Address PrivateAddr) {
1045  auto *PrivateVD =
1046  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1047  QualType PrivateType = PrivateVD->getType();
1048  QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1049  if (needCleanups(N)) {
1050  PrivateAddr = CGF.Builder.CreateElementBitCast(
1051  PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1052  CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
1053  }
1054 }
1055 
1057  LValue BaseLV) {
1058  BaseTy = BaseTy.getNonReferenceType();
1059  while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1060  !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1061  if (auto *PtrTy = BaseTy->getAs<PointerType>())
1062  BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
1063  else {
1064  LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
1065  BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
1066  }
1067  BaseTy = BaseTy->getPointeeType();
1068  }
1069  return CGF.MakeAddrLValue(
1071  CGF.ConvertTypeForMem(ElTy)),
1072  BaseLV.getType(), BaseLV.getBaseInfo(),
1073  CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
1074 }
1075 
1077  llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
1078  llvm::Value *Addr) {
1079  Address Tmp = Address::invalid();
1080  Address TopTmp = Address::invalid();
1081  Address MostTopTmp = Address::invalid();
1082  BaseTy = BaseTy.getNonReferenceType();
1083  while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1084  !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1085  Tmp = CGF.CreateMemTemp(BaseTy);
1086  if (TopTmp.isValid())
1087  CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
1088  else
1089  MostTopTmp = Tmp;
1090  TopTmp = Tmp;
1091  BaseTy = BaseTy->getPointeeType();
1092  }
1093  llvm::Type *Ty = BaseLVType;
1094  if (Tmp.isValid())
1095  Ty = Tmp.getElementType();
1096  Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
1097  if (Tmp.isValid()) {
1098  CGF.Builder.CreateStore(Addr, Tmp);
1099  return MostTopTmp;
1100  }
1101  return Address(Addr, BaseLVAlignment);
1102 }
1103 
1105  Address PrivateAddr) {
1106  const DeclRefExpr *DE;
1107  const VarDecl *OrigVD = nullptr;
1108  if (auto *OASE = dyn_cast<OMPArraySectionExpr>(ClausesData[N].Ref)) {
1109  auto *Base = OASE->getBase()->IgnoreParenImpCasts();
1110  while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
1111  Base = TempOASE->getBase()->IgnoreParenImpCasts();
1112  while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1113  Base = TempASE->getBase()->IgnoreParenImpCasts();
1114  DE = cast<DeclRefExpr>(Base);
1115  OrigVD = cast<VarDecl>(DE->getDecl());
1116  } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(ClausesData[N].Ref)) {
1117  auto *Base = ASE->getBase()->IgnoreParenImpCasts();
1118  while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1119  Base = TempASE->getBase()->IgnoreParenImpCasts();
1120  DE = cast<DeclRefExpr>(Base);
1121  OrigVD = cast<VarDecl>(DE->getDecl());
1122  }
1123  if (OrigVD) {
1124  BaseDecls.emplace_back(OrigVD);
1125  auto OriginalBaseLValue = CGF.EmitLValue(DE);
1126  LValue BaseLValue =
1127  loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1128  OriginalBaseLValue);
1129  llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1130  BaseLValue.getPointer(), SharedAddresses[N].first.getPointer());
1131  llvm::Value *PrivatePointer =
1133  PrivateAddr.getPointer(),
1134  SharedAddresses[N].first.getAddress().getType());
1135  llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1136  return castToBase(CGF, OrigVD->getType(),
1137  SharedAddresses[N].first.getType(),
1138  OriginalBaseLValue.getAddress().getType(),
1139  OriginalBaseLValue.getAlignment(), Ptr);
1140  }
1141  BaseDecls.emplace_back(
1142  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1143  return PrivateAddr;
1144 }
1145 
1147  auto *DRD = getReductionInit(ClausesData[N].ReductionOp);
1148  return DRD && DRD->getInitializer();
1149 }
1150 
1151 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1152  return CGF.EmitLoadOfPointerLValue(
1153  CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1154  getThreadIDVariable()->getType()->castAs<PointerType>());
1155 }
1156 
1157 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1158  if (!CGF.HaveInsertPoint())
1159  return;
1160  // 1.2.2 OpenMP Language Terminology
1161  // Structured block - An executable statement with a single entry at the
1162  // top and a single exit at the bottom.
1163  // The point of exit cannot be a branch out of the structured block.
1164  // longjmp() and throw() must not violate the entry/exit criteria.
1165  CGF.EHStack.pushTerminate();
1166  CodeGen(CGF);
1167  CGF.EHStack.popTerminate();
1168 }
1169 
1170 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1171  CodeGenFunction &CGF) {
1172  return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1173  getThreadIDVariable()->getType(),
1175 }
1176 
1178  : CGM(CGM), OffloadEntriesInfoManager(CGM) {
1179  IdentTy = llvm::StructType::create(
1180  "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */,
1181  CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */,
1182  CGM.Int8PtrTy /* psource */);
1183  KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1184 
1186 }
1187 
1188 void CGOpenMPRuntime::clear() {
1189  InternalVars.clear();
1190 }
1191 
1192 static llvm::Function *
1194  const Expr *CombinerInitializer, const VarDecl *In,
1195  const VarDecl *Out, bool IsCombiner) {
1196  // void .omp_combiner.(Ty *in, Ty *out);
1197  auto &C = CGM.getContext();
1198  QualType PtrTy = C.getPointerType(Ty).withRestrict();
1199  FunctionArgList Args;
1200  ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1201  /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1202  ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1203  /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1204  Args.push_back(&OmpOutParm);
1205  Args.push_back(&OmpInParm);
1206  auto &FnInfo =
1207  CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1208  auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1209  auto *Fn = llvm::Function::Create(
1211  IsCombiner ? ".omp_combiner." : ".omp_initializer.", &CGM.getModule());
1212  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
1213  Fn->removeFnAttr(llvm::Attribute::NoInline);
1214  Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1215  Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1216  CodeGenFunction CGF(CGM);
1217  // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1218  // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1219  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args);
1221  Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1222  Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() -> Address {
1223  return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1224  .getAddress();
1225  });
1226  Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1227  Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() -> Address {
1228  return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1229  .getAddress();
1230  });
1231  (void)Scope.Privatize();
1232  if (!IsCombiner && Out->hasInit() &&
1233  !CGF.isTrivialInitializer(Out->getInit())) {
1234  CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1235  Out->getType().getQualifiers(),
1236  /*IsInitializer=*/true);
1237  }
1238  if (CombinerInitializer)
1239  CGF.EmitIgnoredExpr(CombinerInitializer);
1240  Scope.ForceCleanup();
1241  CGF.FinishFunction();
1242  return Fn;
1243 }
1244 
1246  CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1247  if (UDRMap.count(D) > 0)
1248  return;
1249  auto &C = CGM.getContext();
1250  if (!In || !Out) {
1251  In = &C.Idents.get("omp_in");
1252  Out = &C.Idents.get("omp_out");
1253  }
1254  llvm::Function *Combiner = emitCombinerOrInitializer(
1255  CGM, D->getType(), D->getCombiner(), cast<VarDecl>(D->lookup(In).front()),
1256  cast<VarDecl>(D->lookup(Out).front()),
1257  /*IsCombiner=*/true);
1258  llvm::Function *Initializer = nullptr;
1259  if (auto *Init = D->getInitializer()) {
1260  if (!Priv || !Orig) {
1261  Priv = &C.Idents.get("omp_priv");
1262  Orig = &C.Idents.get("omp_orig");
1263  }
1264  Initializer = emitCombinerOrInitializer(
1265  CGM, D->getType(),
1267  : nullptr,
1268  cast<VarDecl>(D->lookup(Orig).front()),
1269  cast<VarDecl>(D->lookup(Priv).front()),
1270  /*IsCombiner=*/false);
1271  }
1272  UDRMap.insert(std::make_pair(D, std::make_pair(Combiner, Initializer)));
1273  if (CGF) {
1274  auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1275  Decls.second.push_back(D);
1276  }
1277 }
1278 
1279 std::pair<llvm::Function *, llvm::Function *>
1281  auto I = UDRMap.find(D);
1282  if (I != UDRMap.end())
1283  return I->second;
1284  emitUserDefinedReduction(/*CGF=*/nullptr, D);
1285  return UDRMap.lookup(D);
1286 }
1287 
1288 // Layout information for ident_t.
1290  return CGM.getPointerAlign();
1291 }
1293  assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign()));
1294  return CharUnits::fromQuantity(16) + CGM.getPointerSize();
1295 }
1297  // All the fields except the last are i32, so this works beautifully.
1298  return unsigned(Field) * CharUnits::fromQuantity(4);
1299 }
1301  IdentFieldIndex Field,
1302  const llvm::Twine &Name = "") {
1303  auto Offset = getOffsetOfIdentField(Field);
1304  return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name);
1305 }
1306 
1308  CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1309  const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1310  const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1311  assert(ThreadIDVar->getType()->isPointerType() &&
1312  "thread id variable must be of type kmp_int32 *");
1313  CodeGenFunction CGF(CGM, true);
1314  bool HasCancel = false;
1315  if (auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1316  HasCancel = OPD->hasCancel();
1317  else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1318  HasCancel = OPSD->hasCancel();
1319  else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1320  HasCancel = OPFD->hasCancel();
1321  else if (auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1322  HasCancel = OPFD->hasCancel();
1323  else if (auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1324  HasCancel = OPFD->hasCancel();
1325  else if (auto *OPFD = dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1326  HasCancel = OPFD->hasCancel();
1327  else if (auto *OPFD =
1328  dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1329  HasCancel = OPFD->hasCancel();
1330  CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1331  HasCancel, OutlinedHelperName);
1332  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1333  return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
1334 }
1335 
1337  const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1338  OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1339  const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1341  CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1342 }
1343 
1345  const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1346  OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1347  const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1349  CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1350 }
1351 
1353  const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1354  const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1355  OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1356  bool Tied, unsigned &NumberOfParts) {
1357  auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1358  PrePostActionTy &) {
1359  auto *ThreadID = getThreadID(CGF, D.getLocStart());
1360  auto *UpLoc = emitUpdateLocation(CGF, D.getLocStart());
1361  llvm::Value *TaskArgs[] = {
1362  UpLoc, ThreadID,
1363  CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1364  TaskTVar->getType()->castAs<PointerType>())
1365  .getPointer()};
1366  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
1367  };
1368  CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1369  UntiedCodeGen);
1370  CodeGen.setAction(Action);
1371  assert(!ThreadIDVar->getType()->isPointerType() &&
1372  "thread id variable must be of type kmp_int32 for tasks");
1373  auto *CS = cast<CapturedStmt>(D.getAssociatedStmt());
1374  auto *TD = dyn_cast<OMPTaskDirective>(&D);
1375  CodeGenFunction CGF(CGM, true);
1376  CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1377  InnermostKind,
1378  TD ? TD->hasCancel() : false, Action);
1379  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1380  auto *Res = CGF.GenerateCapturedStmtFunction(*CS);
1381  if (!Tied)
1382  NumberOfParts = Action.getNumberOfParts();
1383  return Res;
1384 }
1385 
1386 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
1387  CharUnits Align = getIdentAlign(CGM);
1388  llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
1389  if (!Entry) {
1390  if (!DefaultOpenMPPSource) {
1391  // Initialize default location for psource field of ident_t structure of
1392  // all ident_t objects. Format is ";file;function;line;column;;".
1393  // Taken from
1394  // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c
1395  DefaultOpenMPPSource =
1396  CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
1397  DefaultOpenMPPSource =
1398  llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
1399  }
1400 
1401  ConstantInitBuilder builder(CGM);
1402  auto fields = builder.beginStruct(IdentTy);
1403  fields.addInt(CGM.Int32Ty, 0);
1404  fields.addInt(CGM.Int32Ty, Flags);
1405  fields.addInt(CGM.Int32Ty, 0);
1406  fields.addInt(CGM.Int32Ty, 0);
1407  fields.add(DefaultOpenMPPSource);
1408  auto DefaultOpenMPLocation =
1409  fields.finishAndCreateGlobal("", Align, /*isConstant*/ true,
1410  llvm::GlobalValue::PrivateLinkage);
1411  DefaultOpenMPLocation->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
1412 
1413  OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation;
1414  }
1415  return Address(Entry, Align);
1416 }
1417 
1419  SourceLocation Loc,
1420  unsigned Flags) {
1421  Flags |= OMP_IDENT_KMPC;
1422  // If no debug info is generated - return global default location.
1423  if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1424  Loc.isInvalid())
1425  return getOrCreateDefaultLocation(Flags).getPointer();
1426 
1427  assert(CGF.CurFn && "No function in current CodeGenFunction.");
1428 
1429  Address LocValue = Address::invalid();
1430  auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1431  if (I != OpenMPLocThreadIDMap.end())
1432  LocValue = Address(I->second.DebugLoc, getIdentAlign(CGF.CGM));
1433 
1434  // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
1435  // GetOpenMPThreadID was called before this routine.
1436  if (!LocValue.isValid()) {
1437  // Generate "ident_t .kmpc_loc.addr;"
1438  Address AI = CGF.CreateTempAlloca(IdentTy, getIdentAlign(CGF.CGM),
1439  ".kmpc_loc.addr");
1440  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1441  Elem.second.DebugLoc = AI.getPointer();
1442  LocValue = AI;
1443 
1444  CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1445  CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
1446  CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
1447  CGM.getSize(getIdentSize(CGF.CGM)));
1448  }
1449 
1450  // char **psource = &.kmpc_loc_<flags>.addr.psource;
1451  Address PSource = createIdentFieldGEP(CGF, LocValue, IdentField_PSource);
1452 
1453  auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
1454  if (OMPDebugLoc == nullptr) {
1455  SmallString<128> Buffer2;
1456  llvm::raw_svector_ostream OS2(Buffer2);
1457  // Build debug location
1459  OS2 << ";" << PLoc.getFilename() << ";";
1460  if (const FunctionDecl *FD =
1461  dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) {
1462  OS2 << FD->getQualifiedNameAsString();
1463  }
1464  OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1465  OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
1466  OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
1467  }
1468  // *psource = ";<File>;<Function>;<Line>;<Column>;;";
1469  CGF.Builder.CreateStore(OMPDebugLoc, PSource);
1470 
1471  // Our callers always pass this to a runtime function, so for
1472  // convenience, go ahead and return a naked pointer.
1473  return LocValue.getPointer();
1474 }
1475 
1477  SourceLocation Loc) {
1478  assert(CGF.CurFn && "No function in current CodeGenFunction.");
1479 
1480  llvm::Value *ThreadID = nullptr;
1481  // Check whether we've already cached a load of the thread id in this
1482  // function.
1483  auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1484  if (I != OpenMPLocThreadIDMap.end()) {
1485  ThreadID = I->second.ThreadID;
1486  if (ThreadID != nullptr)
1487  return ThreadID;
1488  }
1489  // If exceptions are enabled, do not use parameter to avoid possible crash.
1490  if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1491  !CGF.getLangOpts().CXXExceptions ||
1492  CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1493  if (auto *OMPRegionInfo =
1494  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1495  if (OMPRegionInfo->getThreadIDVariable()) {
1496  // Check if this an outlined function with thread id passed as argument.
1497  auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1498  ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
1499  // If value loaded in entry block, cache it and use it everywhere in
1500  // function.
1501  if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1502  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1503  Elem.second.ThreadID = ThreadID;
1504  }
1505  return ThreadID;
1506  }
1507  }
1508  }
1509 
1510  // This is not an outlined function region - need to call __kmpc_int32
1511  // kmpc_global_thread_num(ident_t *loc).
1512  // Generate thread id value and cache this value for use across the
1513  // function.
1514  CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1515  CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
1516  auto *Call = CGF.Builder.CreateCall(
1518  emitUpdateLocation(CGF, Loc));
1519  Call->setCallingConv(CGF.getRuntimeCC());
1520  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1521  Elem.second.ThreadID = Call;
1522  return Call;
1523 }
1524 
1526  assert(CGF.CurFn && "No function in current CodeGenFunction.");
1527  if (OpenMPLocThreadIDMap.count(CGF.CurFn))
1528  OpenMPLocThreadIDMap.erase(CGF.CurFn);
1529  if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1530  for(auto *D : FunctionUDRMap[CGF.CurFn]) {
1531  UDRMap.erase(D);
1532  }
1533  FunctionUDRMap.erase(CGF.CurFn);
1534  }
1535 }
1536 
1538  if (!IdentTy) {
1539  }
1540  return llvm::PointerType::getUnqual(IdentTy);
1541 }
1542 
1544  if (!Kmpc_MicroTy) {
1545  // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1546  llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1547  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1548  Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1549  }
1550  return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1551 }
1552 
1553 llvm::Constant *
1555  llvm::Constant *RTLFn = nullptr;
1556  switch (static_cast<OpenMPRTLFunction>(Function)) {
1557  case OMPRTL__kmpc_fork_call: {
1558  // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
1559  // microtask, ...);
1560  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1562  llvm::FunctionType *FnTy =
1563  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1564  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
1565  break;
1566  }
1568  // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
1569  llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1570  llvm::FunctionType *FnTy =
1571  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1572  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
1573  break;
1574  }
1576  // Build void *__kmpc_threadprivate_cached(ident_t *loc,
1577  // kmp_int32 global_tid, void *data, size_t size, void ***cache);
1578  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1580  CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
1581  llvm::FunctionType *FnTy =
1582  llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
1583  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
1584  break;
1585  }
1586  case OMPRTL__kmpc_critical: {
1587  // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1588  // kmp_critical_name *crit);
1589  llvm::Type *TypeParams[] = {
1591  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1592  llvm::FunctionType *FnTy =
1593  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1594  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
1595  break;
1596  }
1598  // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1599  // kmp_critical_name *crit, uintptr_t hint);
1600  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1601  llvm::PointerType::getUnqual(KmpCriticalNameTy),
1602  CGM.IntPtrTy};
1603  llvm::FunctionType *FnTy =
1604  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1605  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
1606  break;
1607  }
1609  // Build void __kmpc_threadprivate_register(ident_t *, void *data,
1610  // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
1611  // typedef void *(*kmpc_ctor)(void *);
1612  auto KmpcCtorTy =
1613  llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1614  /*isVarArg*/ false)->getPointerTo();
1615  // typedef void *(*kmpc_cctor)(void *, void *);
1616  llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1617  auto KmpcCopyCtorTy =
1618  llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
1619  /*isVarArg*/ false)->getPointerTo();
1620  // typedef void (*kmpc_dtor)(void *);
1621  auto KmpcDtorTy =
1622  llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
1623  ->getPointerTo();
1624  llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
1625  KmpcCopyCtorTy, KmpcDtorTy};
1626  auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
1627  /*isVarArg*/ false);
1628  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
1629  break;
1630  }
1632  // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1633  // kmp_critical_name *crit);
1634  llvm::Type *TypeParams[] = {
1636  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1637  llvm::FunctionType *FnTy =
1638  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1639  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
1640  break;
1641  }
1643  // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
1644  // global_tid);
1645  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1646  llvm::FunctionType *FnTy =
1647  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1648  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
1649  break;
1650  }
1651  case OMPRTL__kmpc_barrier: {
1652  // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
1653  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1654  llvm::FunctionType *FnTy =
1655  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1656  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
1657  break;
1658  }
1660  // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
1661  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1662  llvm::FunctionType *FnTy =
1663  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1664  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
1665  break;
1666  }
1668  // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
1669  // kmp_int32 num_threads)
1670  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1671  CGM.Int32Ty};
1672  llvm::FunctionType *FnTy =
1673  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1674  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
1675  break;
1676  }
1678  // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
1679  // global_tid);
1680  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1681  llvm::FunctionType *FnTy =
1682  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1683  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
1684  break;
1685  }
1687  // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
1688  // global_tid);
1689  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1690  llvm::FunctionType *FnTy =
1691  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1692  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
1693  break;
1694  }
1695  case OMPRTL__kmpc_flush: {
1696  // Build void __kmpc_flush(ident_t *loc);
1697  llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1698  llvm::FunctionType *FnTy =
1699  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1700  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
1701  break;
1702  }
1703  case OMPRTL__kmpc_master: {
1704  // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
1705  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1706  llvm::FunctionType *FnTy =
1707  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1708  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
1709  break;
1710  }
1711  case OMPRTL__kmpc_end_master: {
1712  // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
1713  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1714  llvm::FunctionType *FnTy =
1715  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1716  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
1717  break;
1718  }
1720  // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
1721  // int end_part);
1722  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1723  llvm::FunctionType *FnTy =
1724  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1725  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
1726  break;
1727  }
1728  case OMPRTL__kmpc_single: {
1729  // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
1730  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1731  llvm::FunctionType *FnTy =
1732  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1733  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
1734  break;
1735  }
1736  case OMPRTL__kmpc_end_single: {
1737  // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
1738  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1739  llvm::FunctionType *FnTy =
1740  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1741  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
1742  break;
1743  }
1745  // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
1746  // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1747  // kmp_routine_entry_t *task_entry);
1748  assert(KmpRoutineEntryPtrTy != nullptr &&
1749  "Type kmp_routine_entry_t must be created.");
1750  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1751  CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
1752  // Return void * and then cast to particular kmp_task_t type.
1753  llvm::FunctionType *FnTy =
1754  llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
1755  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
1756  break;
1757  }
1758  case OMPRTL__kmpc_omp_task: {
1759  // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1760  // *new_task);
1761  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1762  CGM.VoidPtrTy};
1763  llvm::FunctionType *FnTy =
1764  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1765  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
1766  break;
1767  }
1768  case OMPRTL__kmpc_copyprivate: {
1769  // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
1770  // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
1771  // kmp_int32 didit);
1772  llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1773  auto *CpyFnTy =
1774  llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
1775  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
1776  CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
1777  CGM.Int32Ty};
1778  llvm::FunctionType *FnTy =
1779  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1780  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
1781  break;
1782  }
1783  case OMPRTL__kmpc_reduce: {
1784  // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
1785  // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
1786  // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
1787  llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1788  auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1789  /*isVarArg=*/false);
1790  llvm::Type *TypeParams[] = {
1792  CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1793  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1794  llvm::FunctionType *FnTy =
1795  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1796  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
1797  break;
1798  }
1800  // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
1801  // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
1802  // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
1803  // *lck);
1804  llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1805  auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1806  /*isVarArg=*/false);
1807  llvm::Type *TypeParams[] = {
1809  CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1810  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1811  llvm::FunctionType *FnTy =
1812  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1813  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
1814  break;
1815  }
1816  case OMPRTL__kmpc_end_reduce: {
1817  // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
1818  // kmp_critical_name *lck);
1819  llvm::Type *TypeParams[] = {
1821  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1822  llvm::FunctionType *FnTy =
1823  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1824  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
1825  break;
1826  }
1828  // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
1829  // kmp_critical_name *lck);
1830  llvm::Type *TypeParams[] = {
1832  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1833  llvm::FunctionType *FnTy =
1834  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1835  RTLFn =
1836  CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
1837  break;
1838  }
1840  // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1841  // *new_task);
1842  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1843  CGM.VoidPtrTy};
1844  llvm::FunctionType *FnTy =
1845  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1846  RTLFn =
1847  CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
1848  break;
1849  }
1851  // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1852  // *new_task);
1853  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1854  CGM.VoidPtrTy};
1855  llvm::FunctionType *FnTy =
1856  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1857  RTLFn = CGM.CreateRuntimeFunction(FnTy,
1858  /*Name=*/"__kmpc_omp_task_complete_if0");
1859  break;
1860  }
1861  case OMPRTL__kmpc_ordered: {
1862  // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
1863  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1864  llvm::FunctionType *FnTy =
1865  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1866  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
1867  break;
1868  }
1869  case OMPRTL__kmpc_end_ordered: {
1870  // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
1871  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1872  llvm::FunctionType *FnTy =
1873  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1874  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
1875  break;
1876  }
1878  // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
1879  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1880  llvm::FunctionType *FnTy =
1881  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1882  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
1883  break;
1884  }
1885  case OMPRTL__kmpc_taskgroup: {
1886  // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
1887  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1888  llvm::FunctionType *FnTy =
1889  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1890  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
1891  break;
1892  }
1894  // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
1895  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1896  llvm::FunctionType *FnTy =
1897  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1898  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
1899  break;
1900  }
1902  // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
1903  // int proc_bind)
1904  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1905  llvm::FunctionType *FnTy =
1906  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1907  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
1908  break;
1909  }
1911  // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
1912  // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
1913  // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
1914  llvm::Type *TypeParams[] = {
1917  llvm::FunctionType *FnTy =
1918  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1919  RTLFn =
1920  CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
1921  break;
1922  }
1924  // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
1925  // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
1926  // kmp_depend_info_t *noalias_dep_list);
1927  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1930  llvm::FunctionType *FnTy =
1931  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1932  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
1933  break;
1934  }
1936  // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
1937  // global_tid, kmp_int32 cncl_kind)
1938  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1939  llvm::FunctionType *FnTy =
1940  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1941  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
1942  break;
1943  }
1944  case OMPRTL__kmpc_cancel: {
1945  // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
1946  // kmp_int32 cncl_kind)
1947  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1948  llvm::FunctionType *FnTy =
1949  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1950  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
1951  break;
1952  }
1954  // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
1955  // kmp_int32 num_teams, kmp_int32 num_threads)
1956  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1957  CGM.Int32Ty};
1958  llvm::FunctionType *FnTy =
1959  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1960  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
1961  break;
1962  }
1963  case OMPRTL__kmpc_fork_teams: {
1964  // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
1965  // microtask, ...);
1966  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1968  llvm::FunctionType *FnTy =
1969  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1970  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
1971  break;
1972  }
1973  case OMPRTL__kmpc_taskloop: {
1974  // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
1975  // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
1976  // sched, kmp_uint64 grainsize, void *task_dup);
1977  llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
1978  CGM.IntTy,
1979  CGM.VoidPtrTy,
1980  CGM.IntTy,
1981  CGM.Int64Ty->getPointerTo(),
1982  CGM.Int64Ty->getPointerTo(),
1983  CGM.Int64Ty,
1984  CGM.IntTy,
1985  CGM.IntTy,
1986  CGM.Int64Ty,
1987  CGM.VoidPtrTy};
1988  llvm::FunctionType *FnTy =
1989  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1990  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
1991  break;
1992  }
1994  // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
1995  // num_dims, struct kmp_dim *dims);
1996  llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
1997  CGM.Int32Ty,
1998  CGM.Int32Ty,
1999  CGM.VoidPtrTy};
2000  llvm::FunctionType *FnTy =
2001  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2002  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
2003  break;
2004  }
2006  // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
2007  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2008  llvm::FunctionType *FnTy =
2009  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2010  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
2011  break;
2012  }
2014  // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
2015  // *vec);
2016  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2017  CGM.Int64Ty->getPointerTo()};
2018  llvm::FunctionType *FnTy =
2019  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2020  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
2021  break;
2022  }
2024  // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
2025  // *vec);
2026  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2027  CGM.Int64Ty->getPointerTo()};
2028  llvm::FunctionType *FnTy =
2029  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2030  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
2031  break;
2032  }
2034  // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void
2035  // *data);
2036  llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
2037  llvm::FunctionType *FnTy =
2038  llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2039  RTLFn =
2040  CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init");
2041  break;
2042  }
2044  // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
2045  // *d);
2046  llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2047  llvm::FunctionType *FnTy =
2048  llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2049  RTLFn = CGM.CreateRuntimeFunction(
2050  FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
2051  break;
2052  }
2053  case OMPRTL__tgt_target: {
2054  // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
2055  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2056  // *arg_types);
2057  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2058  CGM.VoidPtrTy,
2059  CGM.Int32Ty,
2060  CGM.VoidPtrPtrTy,
2061  CGM.VoidPtrPtrTy,
2062  CGM.SizeTy->getPointerTo(),
2063  CGM.Int64Ty->getPointerTo()};
2064  llvm::FunctionType *FnTy =
2065  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2066  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
2067  break;
2068  }
2070  // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
2071  // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
2072  // int64_t *arg_types);
2073  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2074  CGM.VoidPtrTy,
2075  CGM.Int32Ty,
2076  CGM.VoidPtrPtrTy,
2077  CGM.VoidPtrPtrTy,
2078  CGM.SizeTy->getPointerTo(),
2079  CGM.Int64Ty->getPointerTo()};
2080  llvm::FunctionType *FnTy =
2081  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2082  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait");
2083  break;
2084  }
2085  case OMPRTL__tgt_target_teams: {
2086  // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
2087  // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
2088  // int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2089  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2090  CGM.VoidPtrTy,
2091  CGM.Int32Ty,
2092  CGM.VoidPtrPtrTy,
2093  CGM.VoidPtrPtrTy,
2094  CGM.SizeTy->getPointerTo(),
2095  CGM.Int64Ty->getPointerTo(),
2096  CGM.Int32Ty,
2097  CGM.Int32Ty};
2098  llvm::FunctionType *FnTy =
2099  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2100  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
2101  break;
2102  }
2104  // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void
2105  // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t
2106  // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2107  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2108  CGM.VoidPtrTy,
2109  CGM.Int32Ty,
2110  CGM.VoidPtrPtrTy,
2111  CGM.VoidPtrPtrTy,
2112  CGM.SizeTy->getPointerTo(),
2113  CGM.Int64Ty->getPointerTo(),
2114  CGM.Int32Ty,
2115  CGM.Int32Ty};
2116  llvm::FunctionType *FnTy =
2117  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2118  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait");
2119  break;
2120  }
2121  case OMPRTL__tgt_register_lib: {
2122  // Build void __tgt_register_lib(__tgt_bin_desc *desc);
2123  QualType ParamTy =
2125  llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2126  llvm::FunctionType *FnTy =
2127  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2128  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib");
2129  break;
2130  }
2132  // Build void __tgt_unregister_lib(__tgt_bin_desc *desc);
2133  QualType ParamTy =
2135  llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2136  llvm::FunctionType *FnTy =
2137  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2138  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib");
2139  break;
2140  }
2142  // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
2143  // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
2144  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2145  CGM.Int32Ty,
2146  CGM.VoidPtrPtrTy,
2147  CGM.VoidPtrPtrTy,
2148  CGM.SizeTy->getPointerTo(),
2149  CGM.Int64Ty->getPointerTo()};
2150  llvm::FunctionType *FnTy =
2151  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2152  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
2153  break;
2154  }
2156  // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
2157  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2158  // *arg_types);
2159  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2160  CGM.Int32Ty,
2161  CGM.VoidPtrPtrTy,
2162  CGM.VoidPtrPtrTy,
2163  CGM.SizeTy->getPointerTo(),
2164  CGM.Int64Ty->getPointerTo()};
2165  auto *FnTy =
2166  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2167  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait");
2168  break;
2169  }
2171  // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
2172  // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
2173  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2174  CGM.Int32Ty,
2175  CGM.VoidPtrPtrTy,
2176  CGM.VoidPtrPtrTy,
2177  CGM.SizeTy->getPointerTo(),
2178  CGM.Int64Ty->getPointerTo()};
2179  llvm::FunctionType *FnTy =
2180  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2181  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
2182  break;
2183  }
2185  // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t
2186  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2187  // *arg_types);
2188  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2189  CGM.Int32Ty,
2190  CGM.VoidPtrPtrTy,
2191  CGM.VoidPtrPtrTy,
2192  CGM.SizeTy->getPointerTo(),
2193  CGM.Int64Ty->getPointerTo()};
2194  auto *FnTy =
2195  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2196  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait");
2197  break;
2198  }
2200  // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
2201  // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
2202  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2203  CGM.Int32Ty,
2204  CGM.VoidPtrPtrTy,
2205  CGM.VoidPtrPtrTy,
2206  CGM.SizeTy->getPointerTo(),
2207  CGM.Int64Ty->getPointerTo()};
2208  llvm::FunctionType *FnTy =
2209  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2210  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
2211  break;
2212  }
2214  // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t
2215  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2216  // *arg_types);
2217  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2218  CGM.Int32Ty,
2219  CGM.VoidPtrPtrTy,
2220  CGM.VoidPtrPtrTy,
2221  CGM.SizeTy->getPointerTo(),
2222  CGM.Int64Ty->getPointerTo()};
2223  auto *FnTy =
2224  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2225  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait");
2226  break;
2227  }
2228  }
2229  assert(RTLFn && "Unable to find OpenMP runtime function");
2230  return RTLFn;
2231 }
2232 
2233 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize,
2234  bool IVSigned) {
2235  assert((IVSize == 32 || IVSize == 64) &&
2236  "IV size is not compatible with the omp runtime");
2237  auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
2238  : "__kmpc_for_static_init_4u")
2239  : (IVSigned ? "__kmpc_for_static_init_8"
2240  : "__kmpc_for_static_init_8u");
2241  auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2242  auto PtrTy = llvm::PointerType::getUnqual(ITy);
2243  llvm::Type *TypeParams[] = {
2244  getIdentTyPointerTy(), // loc
2245  CGM.Int32Ty, // tid
2246  CGM.Int32Ty, // schedtype
2247  llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2248  PtrTy, // p_lower
2249  PtrTy, // p_upper
2250  PtrTy, // p_stride
2251  ITy, // incr
2252  ITy // chunk
2253  };
2254  llvm::FunctionType *FnTy =
2255  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2256  return CGM.CreateRuntimeFunction(FnTy, Name);
2257 }
2258 
2259 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize,
2260  bool IVSigned) {
2261  assert((IVSize == 32 || IVSize == 64) &&
2262  "IV size is not compatible with the omp runtime");
2263  auto Name =
2264  IVSize == 32
2265  ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
2266  : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
2267  auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2268  llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
2269  CGM.Int32Ty, // tid
2270  CGM.Int32Ty, // schedtype
2271  ITy, // lower
2272  ITy, // upper
2273  ITy, // stride
2274  ITy // chunk
2275  };
2276  llvm::FunctionType *FnTy =
2277  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2278  return CGM.CreateRuntimeFunction(FnTy, Name);
2279 }
2280 
2281 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize,
2282  bool IVSigned) {
2283  assert((IVSize == 32 || IVSize == 64) &&
2284  "IV size is not compatible with the omp runtime");
2285  auto Name =
2286  IVSize == 32
2287  ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
2288  : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
2289  llvm::Type *TypeParams[] = {
2290  getIdentTyPointerTy(), // loc
2291  CGM.Int32Ty, // tid
2292  };
2293  llvm::FunctionType *FnTy =
2294  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2295  return CGM.CreateRuntimeFunction(FnTy, Name);
2296 }
2297 
2298 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize,
2299  bool IVSigned) {
2300  assert((IVSize == 32 || IVSize == 64) &&
2301  "IV size is not compatible with the omp runtime");
2302  auto Name =
2303  IVSize == 32
2304  ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
2305  : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
2306  auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2307  auto PtrTy = llvm::PointerType::getUnqual(ITy);
2308  llvm::Type *TypeParams[] = {
2309  getIdentTyPointerTy(), // loc
2310  CGM.Int32Ty, // tid
2311  llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2312  PtrTy, // p_lower
2313  PtrTy, // p_upper
2314  PtrTy // p_stride
2315  };
2316  llvm::FunctionType *FnTy =
2317  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2318  return CGM.CreateRuntimeFunction(FnTy, Name);
2319 }
2320 
2321 llvm::Constant *
2323  assert(!CGM.getLangOpts().OpenMPUseTLS ||
2325  // Lookup the entry, lazily creating it if necessary.
2327  Twine(CGM.getMangledName(VD)) + ".cache.");
2328 }
2329 
2331  const VarDecl *VD,
2332  Address VDAddr,
2333  SourceLocation Loc) {
2334  if (CGM.getLangOpts().OpenMPUseTLS &&
2336  return VDAddr;
2337 
2338  auto VarTy = VDAddr.getElementType();
2339  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2340  CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
2341  CGM.Int8PtrTy),
2344  return Address(CGF.EmitRuntimeCall(
2346  VDAddr.getAlignment());
2347 }
2348 
2350  CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
2351  llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
2352  // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
2353  // library.
2354  auto OMPLoc = emitUpdateLocation(CGF, Loc);
2356  OMPLoc);
2357  // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
2358  // to register constructor/destructor for variable.
2359  llvm::Value *Args[] = {OMPLoc,
2360  CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
2361  CGM.VoidPtrTy),
2362  Ctor, CopyCtor, Dtor};
2363  CGF.EmitRuntimeCall(
2365 }
2366 
2368  const VarDecl *VD, Address VDAddr, SourceLocation Loc,
2369  bool PerformInit, CodeGenFunction *CGF) {
2370  if (CGM.getLangOpts().OpenMPUseTLS &&
2372  return nullptr;
2373 
2374  VD = VD->getDefinition(CGM.getContext());
2375  if (VD && ThreadPrivateWithDefinition.count(VD) == 0) {
2376  ThreadPrivateWithDefinition.insert(VD);
2377  QualType ASTTy = VD->getType();
2378 
2379  llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
2380  auto Init = VD->getAnyInitializer();
2381  if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2382  // Generate function that re-emits the declaration's initializer into the
2383  // threadprivate copy of the variable VD
2384  CodeGenFunction CtorCGF(CGM);
2385  FunctionArgList Args;
2388  Args.push_back(&Dst);
2389 
2391  CGM.getContext().VoidPtrTy, Args);
2392  auto FTy = CGM.getTypes().GetFunctionType(FI);
2394  FTy, ".__kmpc_global_ctor_.", FI, Loc);
2395  CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
2396  Args, SourceLocation());
2397  auto ArgVal = CtorCGF.EmitLoadOfScalar(
2398  CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2399  CGM.getContext().VoidPtrTy, Dst.getLocation());
2400  Address Arg = Address(ArgVal, VDAddr.getAlignment());
2401  Arg = CtorCGF.Builder.CreateElementBitCast(Arg,
2402  CtorCGF.ConvertTypeForMem(ASTTy));
2403  CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
2404  /*IsInitializer=*/true);
2405  ArgVal = CtorCGF.EmitLoadOfScalar(
2406  CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2407  CGM.getContext().VoidPtrTy, Dst.getLocation());
2408  CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
2409  CtorCGF.FinishFunction();
2410  Ctor = Fn;
2411  }
2412  if (VD->getType().isDestructedType() != QualType::DK_none) {
2413  // Generate function that emits destructor call for the threadprivate copy
2414  // of the variable VD
2415  CodeGenFunction DtorCGF(CGM);
2416  FunctionArgList Args;
2419  Args.push_back(&Dst);
2420 
2422  CGM.getContext().VoidTy, Args);
2423  auto FTy = CGM.getTypes().GetFunctionType(FI);
2425  FTy, ".__kmpc_global_dtor_.", FI, Loc);
2426  auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2427  DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
2428  SourceLocation());
2429  // Create a scope with an artificial location for the body of this function.
2430  auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2431  auto ArgVal = DtorCGF.EmitLoadOfScalar(
2432  DtorCGF.GetAddrOfLocalVar(&Dst),
2433  /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
2434  DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
2435  DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2436  DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2437  DtorCGF.FinishFunction();
2438  Dtor = Fn;
2439  }
2440  // Do not emit init function if it is not required.
2441  if (!Ctor && !Dtor)
2442  return nullptr;
2443 
2444  llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2445  auto CopyCtorTy =
2446  llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
2447  /*isVarArg=*/false)->getPointerTo();
2448  // Copying constructor for the threadprivate variable.
2449  // Must be NULL - reserved by runtime, but currently it requires that this
2450  // parameter is always NULL. Otherwise it fires assertion.
2451  CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
2452  if (Ctor == nullptr) {
2453  auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
2454  /*isVarArg=*/false)->getPointerTo();
2455  Ctor = llvm::Constant::getNullValue(CtorTy);
2456  }
2457  if (Dtor == nullptr) {
2458  auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
2459  /*isVarArg=*/false)->getPointerTo();
2460  Dtor = llvm::Constant::getNullValue(DtorTy);
2461  }
2462  if (!CGF) {
2463  auto InitFunctionTy =
2464  llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
2465  auto InitFunction = CGM.CreateGlobalInitOrDestructFunction(
2466  InitFunctionTy, ".__omp_threadprivate_init_.",
2468  CodeGenFunction InitCGF(CGM);
2469  FunctionArgList ArgList;
2470  InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
2471  CGM.getTypes().arrangeNullaryFunction(), ArgList,
2472  Loc);
2473  emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2474  InitCGF.FinishFunction();
2475  return InitFunction;
2476  }
2477  emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2478  }
2479  return nullptr;
2480 }
2481 
2483  QualType VarType,
2484  StringRef Name) {
2485  llvm::Twine VarName(Name, ".artificial.");
2486  llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2487  llvm::Value *GAddr = getOrCreateInternalVariable(VarLVType, VarName);
2488  llvm::Value *Args[] = {
2490  getThreadID(CGF, SourceLocation()),
2492  CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2493  /*IsSigned=*/false),
2494  getOrCreateInternalVariable(CGM.VoidPtrPtrTy, VarName + ".cache.")};
2495  return Address(
2497  CGF.EmitRuntimeCall(
2499  VarLVType->getPointerTo(/*AddrSpace=*/0)),
2500  CGM.getPointerAlign());
2501 }
2502 
2503 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
2504 /// function. Here is the logic:
2505 /// if (Cond) {
2506 /// ThenGen();
2507 /// } else {
2508 /// ElseGen();
2509 /// }
2511  const RegionCodeGenTy &ThenGen,
2512  const RegionCodeGenTy &ElseGen) {
2513  CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2514 
2515  // If the condition constant folds and can be elided, try to avoid emitting
2516  // the condition and the dead arm of the if/else.
2517  bool CondConstant;
2518  if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2519  if (CondConstant)
2520  ThenGen(CGF);
2521  else
2522  ElseGen(CGF);
2523  return;
2524  }
2525 
2526  // Otherwise, the condition did not fold, or we couldn't elide it. Just
2527  // emit the conditional branch.
2528  auto ThenBlock = CGF.createBasicBlock("omp_if.then");
2529  auto ElseBlock = CGF.createBasicBlock("omp_if.else");
2530  auto ContBlock = CGF.createBasicBlock("omp_if.end");
2531  CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2532 
2533  // Emit the 'then' code.
2534  CGF.EmitBlock(ThenBlock);
2535  ThenGen(CGF);
2536  CGF.EmitBranch(ContBlock);
2537  // Emit the 'else' code if present.
2538  // There is no need to emit line number for unconditional branch.
2540  CGF.EmitBlock(ElseBlock);
2541  ElseGen(CGF);
2542  // There is no need to emit line number for unconditional branch.
2544  CGF.EmitBranch(ContBlock);
2545  // Emit the continuation block for code after the if.
2546  CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2547 }
2548 
2550  llvm::Value *OutlinedFn,
2551  ArrayRef<llvm::Value *> CapturedVars,
2552  const Expr *IfCond) {
2553  if (!CGF.HaveInsertPoint())
2554  return;
2555  auto *RTLoc = emitUpdateLocation(CGF, Loc);
2556  auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
2557  PrePostActionTy &) {
2558  // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2559  auto &RT = CGF.CGM.getOpenMPRuntime();
2560  llvm::Value *Args[] = {
2561  RTLoc,
2562  CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2563  CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2565  RealArgs.append(std::begin(Args), std::end(Args));
2566  RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2567 
2568  auto RTLFn = RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
2569  CGF.EmitRuntimeCall(RTLFn, RealArgs);
2570  };
2571  auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
2572  PrePostActionTy &) {
2573  auto &RT = CGF.CGM.getOpenMPRuntime();
2574  auto ThreadID = RT.getThreadID(CGF, Loc);
2575  // Build calls:
2576  // __kmpc_serialized_parallel(&Loc, GTid);
2577  llvm::Value *Args[] = {RTLoc, ThreadID};
2578  CGF.EmitRuntimeCall(
2579  RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
2580 
2581  // OutlinedFn(&GTid, &zero, CapturedStruct);
2582  auto ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2583  Address ZeroAddr =
2584  CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4),
2585  /*Name*/ ".zero.addr");
2586  CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
2587  llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2588  OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2589  OutlinedFnArgs.push_back(ZeroAddr.getPointer());
2590  OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2591  RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2592 
2593  // __kmpc_end_serialized_parallel(&Loc, GTid);
2594  llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2595  CGF.EmitRuntimeCall(
2596  RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
2597  EndArgs);
2598  };
2599  if (IfCond)
2600  emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
2601  else {
2602  RegionCodeGenTy ThenRCG(ThenGen);
2603  ThenRCG(CGF);
2604  }
2605 }
2606 
2607 // If we're inside an (outlined) parallel region, use the region info's
2608 // thread-ID variable (it is passed in a first argument of the outlined function
2609 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2610 // regular serial code region, get thread ID by calling kmp_int32
2611 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2612 // return the address of that temp.
2614  SourceLocation Loc) {
2615  if (auto *OMPRegionInfo =
2616  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2617  if (OMPRegionInfo->getThreadIDVariable())
2618  return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
2619 
2620  auto ThreadID = getThreadID(CGF, Loc);
2621  auto Int32Ty =
2622  CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2623  auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2624  CGF.EmitStoreOfScalar(ThreadID,
2625  CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2626 
2627  return ThreadIDTemp;
2628 }
2629 
2630 llvm::Constant *
2632  const llvm::Twine &Name) {
2633  SmallString<256> Buffer;
2634  llvm::raw_svector_ostream Out(Buffer);
2635  Out << Name;
2636  auto RuntimeName = Out.str();
2637  auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first;
2638  if (Elem.second) {
2639  assert(Elem.second->getType()->getPointerElementType() == Ty &&
2640  "OMP internal variable has different type than requested");
2641  return &*Elem.second;
2642  }
2643 
2644  return Elem.second = new llvm::GlobalVariable(
2645  CGM.getModule(), Ty, /*IsConstant*/ false,
2646  llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2647  Elem.first());
2648 }
2649 
2651  llvm::Twine Name(".gomp_critical_user_", CriticalName);
2652  return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var"));
2653 }
2654 
2655 namespace {
2656 /// Common pre(post)-action for different OpenMP constructs.
2657 class CommonActionTy final : public PrePostActionTy {
2658  llvm::Value *EnterCallee;
2659  ArrayRef<llvm::Value *> EnterArgs;
2660  llvm::Value *ExitCallee;
2661  ArrayRef<llvm::Value *> ExitArgs;
2662  bool Conditional;
2663  llvm::BasicBlock *ContBlock = nullptr;
2664 
2665 public:
2666  CommonActionTy(llvm::Value *EnterCallee, ArrayRef<llvm::Value *> EnterArgs,
2667  llvm::Value *ExitCallee, ArrayRef<llvm::Value *> ExitArgs,
2668  bool Conditional = false)
2669  : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2670  ExitArgs(ExitArgs), Conditional(Conditional) {}
2671  void Enter(CodeGenFunction &CGF) override {
2672  llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2673  if (Conditional) {
2674  llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2675  auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2676  ContBlock = CGF.createBasicBlock("omp_if.end");
2677  // Generate the branch (If-stmt)
2678  CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2679  CGF.EmitBlock(ThenBlock);
2680  }
2681  }
2682  void Done(CodeGenFunction &CGF) {
2683  // Emit the rest of blocks/branches
2684  CGF.EmitBranch(ContBlock);
2685  CGF.EmitBlock(ContBlock, true);
2686  }
2687  void Exit(CodeGenFunction &CGF) override {
2688  CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2689  }
2690 };
2691 } // anonymous namespace
2692 
2694  StringRef CriticalName,
2695  const RegionCodeGenTy &CriticalOpGen,
2696  SourceLocation Loc, const Expr *Hint) {
2697  // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2698  // CriticalOpGen();
2699  // __kmpc_end_critical(ident_t *, gtid, Lock);
2700  // Prepare arguments and build a call to __kmpc_critical
2701  if (!CGF.HaveInsertPoint())
2702  return;
2703  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2704  getCriticalRegionLock(CriticalName)};
2705  llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2706  std::end(Args));
2707  if (Hint) {
2708  EnterArgs.push_back(CGF.Builder.CreateIntCast(
2709  CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
2710  }
2711  CommonActionTy Action(
2715  CriticalOpGen.setAction(Action);
2716  emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2717 }
2718 
2720  const RegionCodeGenTy &MasterOpGen,
2721  SourceLocation Loc) {
2722  if (!CGF.HaveInsertPoint())
2723  return;
2724  // if(__kmpc_master(ident_t *, gtid)) {
2725  // MasterOpGen();
2726  // __kmpc_end_master(ident_t *, gtid);
2727  // }
2728  // Prepare arguments and build a call to __kmpc_master
2729  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2730  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
2732  /*Conditional=*/true);
2733  MasterOpGen.setAction(Action);
2734  emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2735  Action.Done(CGF);
2736 }
2737 
2739  SourceLocation Loc) {
2740  if (!CGF.HaveInsertPoint())
2741  return;
2742  // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2743  llvm::Value *Args[] = {
2744  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2745  llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2747  if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2748  Region->emitUntiedSwitch(CGF);
2749 }
2750 
2752  const RegionCodeGenTy &TaskgroupOpGen,
2753  SourceLocation Loc) {
2754  if (!CGF.HaveInsertPoint())
2755  return;
2756  // __kmpc_taskgroup(ident_t *, gtid);
2757  // TaskgroupOpGen();
2758  // __kmpc_end_taskgroup(ident_t *, gtid);
2759  // Prepare arguments and build a call to __kmpc_taskgroup
2760  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2761  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
2763  Args);
2764  TaskgroupOpGen.setAction(Action);
2765  emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2766 }
2767 
2768 /// Given an array of pointers to variables, project the address of a
2769 /// given variable.
2771  unsigned Index, const VarDecl *Var) {
2772  // Pull out the pointer to the variable.
2773  Address PtrAddr =
2774  CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize());
2775  llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2776 
2777  Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2778  Addr = CGF.Builder.CreateElementBitCast(
2779  Addr, CGF.ConvertTypeForMem(Var->getType()));
2780  return Addr;
2781 }
2782 
2784  CodeGenModule &CGM, llvm::Type *ArgsType,
2785  ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2786  ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) {
2787  auto &C = CGM.getContext();
2788  // void copy_func(void *LHSArg, void *RHSArg);
2789  FunctionArgList Args;
2790  ImplicitParamDecl LHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
2791  ImplicitParamDecl RHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
2792  Args.push_back(&LHSArg);
2793  Args.push_back(&RHSArg);
2794  auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2795  auto *Fn = llvm::Function::Create(
2797  ".omp.copyprivate.copy_func", &CGM.getModule());
2798  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
2799  CodeGenFunction CGF(CGM);
2800  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
2801  // Dest = (void*[n])(LHSArg);
2802  // Src = (void*[n])(RHSArg);
2804  CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2805  ArgsType), CGF.getPointerAlign());
2807  CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2808  ArgsType), CGF.getPointerAlign());
2809  // *(Type0*)Dst[0] = *(Type0*)Src[0];
2810  // *(Type1*)Dst[1] = *(Type1*)Src[1];
2811  // ...
2812  // *(Typen*)Dst[n] = *(Typen*)Src[n];
2813  for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2814  auto DestVar = cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2815  Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2816 
2817  auto SrcVar = cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2818  Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2819 
2820  auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2821  QualType Type = VD->getType();
2822  CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2823  }
2824  CGF.FinishFunction();
2825  return Fn;
2826 }
2827 
2829  const RegionCodeGenTy &SingleOpGen,
2830  SourceLocation Loc,
2831  ArrayRef<const Expr *> CopyprivateVars,
2832  ArrayRef<const Expr *> SrcExprs,
2833  ArrayRef<const Expr *> DstExprs,
2834  ArrayRef<const Expr *> AssignmentOps) {
2835  if (!CGF.HaveInsertPoint())
2836  return;
2837  assert(CopyprivateVars.size() == SrcExprs.size() &&
2838  CopyprivateVars.size() == DstExprs.size() &&
2839  CopyprivateVars.size() == AssignmentOps.size());
2840  auto &C = CGM.getContext();
2841  // int32 did_it = 0;
2842  // if(__kmpc_single(ident_t *, gtid)) {
2843  // SingleOpGen();
2844  // __kmpc_end_single(ident_t *, gtid);
2845  // did_it = 1;
2846  // }
2847  // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2848  // <copy_func>, did_it);
2849 
2850  Address DidIt = Address::invalid();
2851  if (!CopyprivateVars.empty()) {
2852  // int32 did_it = 0;
2853  auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2854  DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2855  CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2856  }
2857  // Prepare arguments and build a call to __kmpc_single
2858  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2859  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
2861  /*Conditional=*/true);
2862  SingleOpGen.setAction(Action);
2863  emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2864  if (DidIt.isValid()) {
2865  // did_it = 1;
2866  CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2867  }
2868  Action.Done(CGF);
2869  // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2870  // <copy_func>, did_it);
2871  if (DidIt.isValid()) {
2872  llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2873  auto CopyprivateArrayTy =
2874  C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
2875  /*IndexTypeQuals=*/0);
2876  // Create a list of all private variables for copyprivate.
2877  Address CopyprivateList =
2878  CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2879  for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2880  Address Elem = CGF.Builder.CreateConstArrayGEP(
2881  CopyprivateList, I, CGF.getPointerSize());
2882  CGF.Builder.CreateStore(
2884  CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
2885  Elem);
2886  }
2887  // Build function that copies private values from single region to all other
2888  // threads in the corresponding parallel region.
2889  auto *CpyFn = emitCopyprivateCopyFunction(
2890  CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2891  CopyprivateVars, SrcExprs, DstExprs, AssignmentOps);
2892  auto *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2893  Address CL =
2894  CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2895  CGF.VoidPtrTy);
2896  auto *DidItVal = CGF.Builder.CreateLoad(DidIt);
2897  llvm::Value *Args[] = {
2898  emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2899  getThreadID(CGF, Loc), // i32 <gtid>
2900  BufSize, // size_t <buf_size>
2901  CL.getPointer(), // void *<copyprivate list>
2902  CpyFn, // void (*) (void *, void *) <copy_func>
2903  DidItVal // i32 did_it
2904  };
2906  }
2907 }
2908 
2910  const RegionCodeGenTy &OrderedOpGen,
2911  SourceLocation Loc, bool IsThreads) {
2912  if (!CGF.HaveInsertPoint())
2913  return;
2914  // __kmpc_ordered(ident_t *, gtid);
2915  // OrderedOpGen();
2916  // __kmpc_end_ordered(ident_t *, gtid);
2917  // Prepare arguments and build a call to __kmpc_ordered
2918  if (IsThreads) {
2919  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2920  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
2922  Args);
2923  OrderedOpGen.setAction(Action);
2924  emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2925  return;
2926  }
2927  emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2928 }
2929 
2931  OpenMPDirectiveKind Kind, bool EmitChecks,
2932  bool ForceSimpleCall) {
2933  if (!CGF.HaveInsertPoint())
2934  return;
2935  // Build call __kmpc_cancel_barrier(loc, thread_id);
2936  // Build call __kmpc_barrier(loc, thread_id);
2937  unsigned Flags;
2938  if (Kind == OMPD_for)
2939  Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2940  else if (Kind == OMPD_sections)
2941  Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2942  else if (Kind == OMPD_single)
2943  Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2944  else if (Kind == OMPD_barrier)
2945  Flags = OMP_IDENT_BARRIER_EXPL;
2946  else
2947  Flags = OMP_IDENT_BARRIER_IMPL;
2948  // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2949  // thread_id);
2950  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2951  getThreadID(CGF, Loc)};
2952  if (auto *OMPRegionInfo =
2953  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
2954  if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2955  auto *Result = CGF.EmitRuntimeCall(
2957  if (EmitChecks) {
2958  // if (__kmpc_cancel_barrier()) {
2959  // exit from construct;
2960  // }
2961  auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
2962  auto *ContBB = CGF.createBasicBlock(".cancel.continue");
2963  auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
2964  CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2965  CGF.EmitBlock(ExitBB);
2966  // exit from construct;
2967  auto CancelDestination =
2968  CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2969  CGF.EmitBranchThroughCleanup(CancelDestination);
2970  CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2971  }
2972  return;
2973  }
2974  }
2976 }
2977 
2978 /// \brief Map the OpenMP loop schedule to the runtime enumeration.
2980  bool Chunked, bool Ordered) {
2981  switch (ScheduleKind) {
2982  case OMPC_SCHEDULE_static:
2983  return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2984  : (Ordered ? OMP_ord_static : OMP_sch_static);
2985  case OMPC_SCHEDULE_dynamic:
2987  case OMPC_SCHEDULE_guided:
2989  case OMPC_SCHEDULE_runtime:
2990  return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2991  case OMPC_SCHEDULE_auto:
2992  return Ordered ? OMP_ord_auto : OMP_sch_auto;
2993  case OMPC_SCHEDULE_unknown:
2994  assert(!Chunked && "chunk was specified but schedule kind not known");
2995  return Ordered ? OMP_ord_static : OMP_sch_static;
2996  }
2997  llvm_unreachable("Unexpected runtime schedule");
2998 }
2999 
3000 /// \brief Map the OpenMP distribute schedule to the runtime enumeration.
3001 static OpenMPSchedType
3003  // only static is allowed for dist_schedule
3005 }
3006 
3008  bool Chunked) const {
3009  auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3010  return Schedule == OMP_sch_static;
3011 }
3012 
3014  OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3015  auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3016  return Schedule == OMP_dist_sch_static;
3017 }
3018 
3019 
3021  auto Schedule =
3022  getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
3023  assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
3024  return Schedule != OMP_sch_static;
3025 }
3026 
3030  int Modifier = 0;
3031  switch (M1) {
3032  case OMPC_SCHEDULE_MODIFIER_monotonic:
3033  Modifier = OMP_sch_modifier_monotonic;
3034  break;
3035  case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3036  Modifier = OMP_sch_modifier_nonmonotonic;
3037  break;
3038  case OMPC_SCHEDULE_MODIFIER_simd:
3039  if (Schedule == OMP_sch_static_chunked)
3041  break;
3044  break;
3045  }
3046  switch (M2) {
3047  case OMPC_SCHEDULE_MODIFIER_monotonic:
3048  Modifier = OMP_sch_modifier_monotonic;
3049  break;
3050  case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3051  Modifier = OMP_sch_modifier_nonmonotonic;
3052  break;
3053  case OMPC_SCHEDULE_MODIFIER_simd:
3054  if (Schedule == OMP_sch_static_chunked)
3056  break;
3059  break;
3060  }
3061  return Schedule | Modifier;
3062 }
3063 
3065  CodeGenFunction &CGF, SourceLocation Loc,
3066  const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
3067  bool Ordered, const DispatchRTInput &DispatchValues) {
3068  if (!CGF.HaveInsertPoint())
3069  return;
3071  ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
3072  assert(Ordered ||
3073  (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
3074  Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
3075  Schedule != OMP_sch_static_balanced_chunked));
3076  // Call __kmpc_dispatch_init(
3077  // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
3078  // kmp_int[32|64] lower, kmp_int[32|64] upper,
3079  // kmp_int[32|64] stride, kmp_int[32|64] chunk);
3080 
3081  // If the Chunk was not specified in the clause - use default value 1.
3082  llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
3083  : CGF.Builder.getIntN(IVSize, 1);
3084  llvm::Value *Args[] = {
3085  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3086  CGF.Builder.getInt32(addMonoNonMonoModifier(
3087  Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
3088  DispatchValues.LB, // Lower
3089  DispatchValues.UB, // Upper
3090  CGF.Builder.getIntN(IVSize, 1), // Stride
3091  Chunk // Chunk
3092  };
3093  CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
3094 }
3095 
3097  CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
3098  llvm::Constant *ForStaticInitFunction, OpenMPSchedType Schedule,
3100  const CGOpenMPRuntime::StaticRTInput &Values) {
3101  if (!CGF.HaveInsertPoint())
3102  return;
3103 
3104  assert(!Values.Ordered);
3105  assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
3106  Schedule == OMP_sch_static_balanced_chunked ||
3107  Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
3108  Schedule == OMP_dist_sch_static ||
3109  Schedule == OMP_dist_sch_static_chunked);
3110 
3111  // Call __kmpc_for_static_init(
3112  // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
3113  // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
3114  // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
3115  // kmp_int[32|64] incr, kmp_int[32|64] chunk);
3116  llvm::Value *Chunk = Values.Chunk;
3117  if (Chunk == nullptr) {
3118  assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
3119  Schedule == OMP_dist_sch_static) &&
3120  "expected static non-chunked schedule");
3121  // If the Chunk was not specified in the clause - use default value 1.
3122  Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
3123  } else {
3124  assert((Schedule == OMP_sch_static_chunked ||
3125  Schedule == OMP_sch_static_balanced_chunked ||
3126  Schedule == OMP_ord_static_chunked ||
3127  Schedule == OMP_dist_sch_static_chunked) &&
3128  "expected static chunked schedule");
3129  }
3130  llvm::Value *Args[] = {
3131  UpdateLocation,
3132  ThreadId,
3133  CGF.Builder.getInt32(addMonoNonMonoModifier(Schedule, M1,
3134  M2)), // Schedule type
3135  Values.IL.getPointer(), // &isLastIter
3136  Values.LB.getPointer(), // &LB
3137  Values.UB.getPointer(), // &UB
3138  Values.ST.getPointer(), // &Stride
3139  CGF.Builder.getIntN(Values.IVSize, 1), // Incr
3140  Chunk // Chunk
3141  };
3142  CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
3143 }
3144 
3146  SourceLocation Loc,
3147  OpenMPDirectiveKind DKind,
3148  const OpenMPScheduleTy &ScheduleKind,
3149  const StaticRTInput &Values) {
3150  OpenMPSchedType ScheduleNum = getRuntimeSchedule(
3151  ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
3152  assert(isOpenMPWorksharingDirective(DKind) &&
3153  "Expected loop-based or sections-based directive.");
3154  auto *UpdatedLocation = emitUpdateLocation(CGF, Loc,
3155  isOpenMPLoopDirective(DKind)
3156  ? OMP_IDENT_WORK_LOOP
3157  : OMP_IDENT_WORK_SECTIONS);
3158  auto *ThreadId = getThreadID(CGF, Loc);
3159  auto *StaticInitFunction =
3161  emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3162  ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
3163 }
3164 
3166  CodeGenFunction &CGF, SourceLocation Loc,
3167  OpenMPDistScheduleClauseKind SchedKind,
3168  const CGOpenMPRuntime::StaticRTInput &Values) {
3169  OpenMPSchedType ScheduleNum =
3170  getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
3171  auto *UpdatedLocation =
3172  emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
3173  auto *ThreadId = getThreadID(CGF, Loc);
3174  auto *StaticInitFunction =
3175  createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3176  emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3177  ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
3179 }
3180 
3182  SourceLocation Loc,
3183  OpenMPDirectiveKind DKind) {
3184  if (!CGF.HaveInsertPoint())
3185  return;
3186  // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
3187  llvm::Value *Args[] = {
3188  emitUpdateLocation(CGF, Loc,
3190  ? OMP_IDENT_WORK_DISTRIBUTE
3191  : isOpenMPLoopDirective(DKind)
3192  ? OMP_IDENT_WORK_LOOP
3193  : OMP_IDENT_WORK_SECTIONS),
3194  getThreadID(CGF, Loc)};
3196  Args);
3197 }
3198 
3200  SourceLocation Loc,
3201  unsigned IVSize,
3202  bool IVSigned) {
3203  if (!CGF.HaveInsertPoint())
3204  return;
3205  // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
3206  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3207  CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
3208 }
3209 
3211  SourceLocation Loc, unsigned IVSize,
3212  bool IVSigned, Address IL,
3213  Address LB, Address UB,
3214  Address ST) {
3215  // Call __kmpc_dispatch_next(
3216  // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
3217  // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
3218  // kmp_int[32|64] *p_stride);
3219  llvm::Value *Args[] = {
3220  emitUpdateLocation(CGF, Loc),
3221  getThreadID(CGF, Loc),
3222  IL.getPointer(), // &isLastIter
3223  LB.getPointer(), // &Lower
3224  UB.getPointer(), // &Upper
3225  ST.getPointer() // &Stride
3226  };
3227  llvm::Value *Call =
3228  CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
3229  return CGF.EmitScalarConversion(
3230  Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true),
3231  CGF.getContext().BoolTy, Loc);
3232 }
3233 
3235  llvm::Value *NumThreads,
3236  SourceLocation Loc) {
3237  if (!CGF.HaveInsertPoint())
3238  return;
3239  // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
3240  llvm::Value *Args[] = {
3241  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3242  CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
3244  Args);
3245 }
3246 
3248  OpenMPProcBindClauseKind ProcBind,
3249  SourceLocation Loc) {
3250  if (!CGF.HaveInsertPoint())
3251  return;
3252  // Constants for proc bind value accepted by the runtime.
3253  enum ProcBindTy {
3254  ProcBindFalse = 0,
3255  ProcBindTrue,
3256  ProcBindMaster,
3257  ProcBindClose,
3258  ProcBindSpread,
3259  ProcBindIntel,
3260  ProcBindDefault
3261  } RuntimeProcBind;
3262  switch (ProcBind) {
3263  case OMPC_PROC_BIND_master:
3264  RuntimeProcBind = ProcBindMaster;
3265  break;
3266  case OMPC_PROC_BIND_close:
3267  RuntimeProcBind = ProcBindClose;
3268  break;
3269  case OMPC_PROC_BIND_spread:
3270  RuntimeProcBind = ProcBindSpread;
3271  break;
3273  llvm_unreachable("Unsupported proc_bind value.");
3274  }
3275  // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
3276  llvm::Value *Args[] = {
3277  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3278  llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
3280 }
3281 
3282 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
3283  SourceLocation Loc) {
3284  if (!CGF.HaveInsertPoint())
3285  return;
3286  // Build call void __kmpc_flush(ident_t *loc)
3288  emitUpdateLocation(CGF, Loc));
3289 }
3290 
3291 namespace {
3292 /// \brief Indexes of fields for type kmp_task_t.
3294  /// \brief List of shared variables.
3295  KmpTaskTShareds,
3296  /// \brief Task routine.
3297  KmpTaskTRoutine,
3298  /// \brief Partition id for the untied tasks.
3299  KmpTaskTPartId,
3300  /// Function with call of destructors for private variables.
3301  Data1,
3302  /// Task priority.
3303  Data2,
3304  /// (Taskloops only) Lower bound.
3305  KmpTaskTLowerBound,
3306  /// (Taskloops only) Upper bound.
3307  KmpTaskTUpperBound,
3308  /// (Taskloops only) Stride.
3309  KmpTaskTStride,
3310  /// (Taskloops only) Is last iteration flag.
3311  KmpTaskTLastIter,
3312  /// (Taskloops only) Reduction data.
3313  KmpTaskTReductions,
3314 };
3315 } // anonymous namespace
3316 
3317 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
3318  // FIXME: Add other entries type when they become supported.
3319  return OffloadEntriesTargetRegion.empty();
3320 }
3321 
3322 /// \brief Initialize target region entry.
3323 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3324  initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3325  StringRef ParentName, unsigned LineNum,
3326  unsigned Order) {
3327  assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3328  "only required for the device "
3329  "code generation.");
3330  OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3331  OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3332  /*Flags=*/0);
3333  ++OffloadingEntriesNum;
3334 }
3335 
3336 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3337  registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3338  StringRef ParentName, unsigned LineNum,
3339  llvm::Constant *Addr, llvm::Constant *ID,
3340  int32_t Flags) {
3341  // If we are emitting code for a target, the entry is already initialized,
3342  // only has to be registered.
3343  if (CGM.getLangOpts().OpenMPIsDevice) {
3344  assert(hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
3345  "Entry must exist.");
3346  auto &Entry =
3347  OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3348  assert(Entry.isValid() && "Entry not initialized!");
3349  Entry.setAddress(Addr);
3350  Entry.setID(ID);
3351  Entry.setFlags(Flags);
3352  return;
3353  } else {
3354  OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum++, Addr, ID, Flags);
3355  OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3356  }
3357 }
3358 
3359 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3360  unsigned DeviceID, unsigned FileID, StringRef ParentName,
3361  unsigned LineNum) const {
3362  auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3363  if (PerDevice == OffloadEntriesTargetRegion.end())
3364  return false;
3365  auto PerFile = PerDevice->second.find(FileID);
3366  if (PerFile == PerDevice->second.end())
3367  return false;
3368  auto PerParentName = PerFile->second.find(ParentName);
3369  if (PerParentName == PerFile->second.end())
3370  return false;
3371  auto PerLine = PerParentName->second.find(LineNum);
3372  if (PerLine == PerParentName->second.end())
3373  return false;
3374  // Fail if this entry is already registered.
3375  if (PerLine->second.getAddress() || PerLine->second.getID())
3376  return false;
3377  return true;
3378 }
3379 
3380 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3381  const OffloadTargetRegionEntryInfoActTy &Action) {
3382  // Scan all target region entries and perform the provided action.
3383  for (auto &D : OffloadEntriesTargetRegion)
3384  for (auto &F : D.second)
3385  for (auto &P : F.second)
3386  for (auto &L : P.second)
3387  Action(D.first, F.first, P.first(), L.first, L.second);
3388 }
3389 
3390 /// \brief Create a Ctor/Dtor-like function whose body is emitted through
3391 /// \a Codegen. This is used to emit the two functions that register and
3392 /// unregister the descriptor of the current compilation unit.
3393 static llvm::Function *
3395  const RegionCodeGenTy &Codegen) {
3396  auto &C = CGM.getContext();
3397  FunctionArgList Args;
3398  ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other);
3399  Args.push_back(&DummyPtr);
3400 
3401  CodeGenFunction CGF(CGM);
3402  auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3403  auto FTy = CGM.getTypes().GetFunctionType(FI);
3404  auto *Fn =
3405  CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, SourceLocation());
3406  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FI, Args, SourceLocation());
3407  Codegen(CGF);
3408  CGF.FinishFunction();
3409  return Fn;
3410 }
3411 
3412 llvm::Function *
3414 
3415  // If we don't have entries or if we are emitting code for the device, we
3416  // don't need to do anything.
3417  if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty())
3418  return nullptr;
3419 
3420  auto &M = CGM.getModule();
3421  auto &C = CGM.getContext();
3422 
3423  // Get list of devices we care about
3424  auto &Devices = CGM.getLangOpts().OMPTargetTriples;
3425 
3426  // We should be creating an offloading descriptor only if there are devices
3427  // specified.
3428  assert(!Devices.empty() && "No OpenMP offloading devices??");
3429 
3430  // Create the external variables that will point to the begin and end of the
3431  // host entries section. These will be defined by the linker.
3432  auto *OffloadEntryTy =
3434  llvm::GlobalVariable *HostEntriesBegin = new llvm::GlobalVariable(
3435  M, OffloadEntryTy, /*isConstant=*/true,
3436  llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
3437  ".omp_offloading.entries_begin");
3438  llvm::GlobalVariable *HostEntriesEnd = new llvm::GlobalVariable(
3439  M, OffloadEntryTy, /*isConstant=*/true,
3440  llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
3441  ".omp_offloading.entries_end");
3442 
3443  // Create all device images
3444  auto *DeviceImageTy = cast<llvm::StructType>(
3446  ConstantInitBuilder DeviceImagesBuilder(CGM);
3447  auto DeviceImagesEntries = DeviceImagesBuilder.beginArray(DeviceImageTy);
3448 
3449  for (unsigned i = 0; i < Devices.size(); ++i) {
3450  StringRef T = Devices[i].getTriple();
3451  auto *ImgBegin = new llvm::GlobalVariable(
3452  M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage,
3453  /*Initializer=*/nullptr,
3454  Twine(".omp_offloading.img_start.") + Twine(T));
3455  auto *ImgEnd = new llvm::GlobalVariable(
3456  M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage,
3457  /*Initializer=*/nullptr, Twine(".omp_offloading.img_end.") + Twine(T));
3458 
3459  auto Dev = DeviceImagesEntries.beginStruct(DeviceImageTy);
3460  Dev.add(ImgBegin);
3461  Dev.add(ImgEnd);
3462  Dev.add(HostEntriesBegin);
3463  Dev.add(HostEntriesEnd);
3464  Dev.finishAndAddTo(DeviceImagesEntries);
3465  }
3466 
3467  // Create device images global array.
3468  llvm::GlobalVariable *DeviceImages =
3469  DeviceImagesEntries.finishAndCreateGlobal(".omp_offloading.device_images",
3470  CGM.getPointerAlign(),
3471  /*isConstant=*/true);
3472  DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3473 
3474  // This is a Zero array to be used in the creation of the constant expressions
3475  llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty),
3476  llvm::Constant::getNullValue(CGM.Int32Ty)};
3477 
3478  // Create the target region descriptor.
3479  auto *BinaryDescriptorTy = cast<llvm::StructType>(
3481  ConstantInitBuilder DescBuilder(CGM);
3482  auto DescInit = DescBuilder.beginStruct(BinaryDescriptorTy);
3483  DescInit.addInt(CGM.Int32Ty, Devices.size());
3484  DescInit.add(llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(),
3485  DeviceImages,
3486  Index));
3487  DescInit.add(HostEntriesBegin);
3488  DescInit.add(HostEntriesEnd);
3489 
3490  auto *Desc = DescInit.finishAndCreateGlobal(".omp_offloading.descriptor",
3491  CGM.getPointerAlign(),
3492  /*isConstant=*/true);
3493 
3494  // Emit code to register or unregister the descriptor at execution
3495  // startup or closing, respectively.
3496 
3497  // Create a variable to drive the registration and unregistration of the
3498  // descriptor, so we can reuse the logic that emits Ctors and Dtors.
3499  auto *IdentInfo = &C.Idents.get(".omp_offloading.reg_unreg_var");
3500  ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), SourceLocation(),
3501  IdentInfo, C.CharTy, ImplicitParamDecl::Other);
3502 
3504  CGM, ".omp_offloading.descriptor_unreg",
3505  [&](CodeGenFunction &CGF, PrePostActionTy &) {
3507  Desc);
3508  });
3510  CGM, ".omp_offloading.descriptor_reg",
3511  [&](CodeGenFunction &CGF, PrePostActionTy &) {
3513  Desc);
3514  CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
3515  });
3516  if (CGM.supportsCOMDAT()) {
3517  // It is sufficient to call registration function only once, so create a
3518  // COMDAT group for registration/unregistration functions and associated
3519  // data. That would reduce startup time and code size. Registration
3520  // function serves as a COMDAT group key.
3521  auto ComdatKey = M.getOrInsertComdat(RegFn->getName());
3522  RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
3523  RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility);
3524  RegFn->setComdat(ComdatKey);
3525  UnRegFn->setComdat(ComdatKey);
3526  DeviceImages->setComdat(ComdatKey);
3527  Desc->setComdat(ComdatKey);
3528  }
3529  return RegFn;
3530 }
3531 
3532 void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *ID,
3533  llvm::Constant *Addr, uint64_t Size,
3534  int32_t Flags) {
3535  StringRef Name = Addr->getName();
3536  auto *TgtOffloadEntryType = cast<llvm::StructType>(
3538  llvm::LLVMContext &C = CGM.getModule().getContext();
3539  llvm::Module &M = CGM.getModule();
3540 
3541  // Make sure the address has the right type.
3542  llvm::Constant *AddrPtr = llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy);
3543 
3544  // Create constant string with the name.
3545  llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3546 
3547  llvm::GlobalVariable *Str =
3548  new llvm::GlobalVariable(M, StrPtrInit->getType(), /*isConstant=*/true,
3550  ".omp_offloading.entry_name");
3551  Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3552  llvm::Constant *StrPtr = llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy);
3553 
3554  // We can't have any padding between symbols, so we need to have 1-byte
3555  // alignment.
3556  auto Align = CharUnits::fromQuantity(1);
3557 
3558  // Create the entry struct.
3559  ConstantInitBuilder EntryBuilder(CGM);
3560  auto EntryInit = EntryBuilder.beginStruct(TgtOffloadEntryType);
3561  EntryInit.add(AddrPtr);
3562  EntryInit.add(StrPtr);
3563  EntryInit.addInt(CGM.SizeTy, Size);
3564  EntryInit.addInt(CGM.Int32Ty, Flags);
3565  EntryInit.addInt(CGM.Int32Ty, 0);
3566  llvm::GlobalVariable *Entry =
3567  EntryInit.finishAndCreateGlobal(".omp_offloading.entry",
3568  Align,
3569  /*constant*/ true,
3571 
3572  // The entry has to be created in the section the linker expects it to be.
3573  Entry->setSection(".omp_offloading.entries");
3574 }
3575 
3577  // Emit the offloading entries and metadata so that the device codegen side
3578  // can easily figure out what to emit. The produced metadata looks like
3579  // this:
3580  //
3581  // !omp_offload.info = !{!1, ...}
3582  //
3583  // Right now we only generate metadata for function that contain target
3584  // regions.
3585 
3586  // If we do not have entries, we dont need to do anything.
3588  return;
3589 
3590  llvm::Module &M = CGM.getModule();
3591  llvm::LLVMContext &C = M.getContext();
3593  OrderedEntries(OffloadEntriesInfoManager.size());
3594 
3595  // Create the offloading info metadata node.
3596  llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3597 
3598  // Auxiliary methods to create metadata values and strings.
3599  auto getMDInt = [&](unsigned v) {
3600  return llvm::ConstantAsMetadata::get(
3601  llvm::ConstantInt::get(llvm::Type::getInt32Ty(C), v));
3602  };
3603 
3604  auto getMDString = [&](StringRef v) { return llvm::MDString::get(C, v); };
3605 
3606  // Create function that emits metadata for each target region entry;
3607  auto &&TargetRegionMetadataEmitter = [&](
3608  unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned Line,
3611  // Generate metadata for target regions. Each entry of this metadata
3612  // contains:
3613  // - Entry 0 -> Kind of this type of metadata (0).
3614  // - Entry 1 -> Device ID of the file where the entry was identified.
3615  // - Entry 2 -> File ID of the file where the entry was identified.
3616  // - Entry 3 -> Mangled name of the function where the entry was identified.
3617  // - Entry 4 -> Line in the file where the entry was identified.
3618  // - Entry 5 -> Order the entry was created.
3619  // The first element of the metadata node is the kind.
3620  Ops.push_back(getMDInt(E.getKind()));
3621  Ops.push_back(getMDInt(DeviceID));
3622  Ops.push_back(getMDInt(FileID));
3623  Ops.push_back(getMDString(ParentName));
3624  Ops.push_back(getMDInt(Line));
3625  Ops.push_back(getMDInt(E.getOrder()));
3626 
3627  // Save this entry in the right position of the ordered entries array.
3628  OrderedEntries[E.getOrder()] = &E;
3629 
3630  // Add metadata to the named metadata node.
3631  MD->addOperand(llvm::MDNode::get(C, Ops));
3632  };
3633 
3635  TargetRegionMetadataEmitter);
3636 
3637  for (auto *E : OrderedEntries) {
3638  assert(E && "All ordered entries must exist!");
3639  if (auto *CE =
3640  dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3641  E)) {
3642  assert(CE->getID() && CE->getAddress() &&
3643  "Entry ID and Addr are invalid!");
3644  createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0);
3645  } else
3646  llvm_unreachable("Unsupported entry kind.");
3647  }
3648 }
3649 
3650 /// \brief Loads all the offload entries information from the host IR
3651 /// metadata.
3653  // If we are in target mode, load the metadata from the host IR. This code has
3654  // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3655 
3656  if (!CGM.getLangOpts().OpenMPIsDevice)
3657  return;
3658 
3659  if (CGM.getLangOpts().OMPHostIRFile.empty())
3660  return;
3661 
3662  auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3663  if (Buf.getError())
3664  return;
3665 
3666  llvm::LLVMContext C;
3667  auto ME = expectedToErrorOrAndEmitErrors(
3668  C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3669 
3670  if (ME.getError())
3671  return;
3672 
3673  llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3674  if (!MD)
3675  return;
3676 
3677  for (auto I : MD->operands()) {
3678  llvm::MDNode *MN = cast<llvm::MDNode>(I);
3679 
3680  auto getMDInt = [&](unsigned Idx) {
3681  llvm::ConstantAsMetadata *V =
3682  cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3683  return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3684  };
3685 
3686  auto getMDString = [&](unsigned Idx) {
3687  llvm::MDString *V = cast<llvm::MDString>(MN->getOperand(Idx));
3688  return V->getString();
3689  };
3690 
3691  switch (getMDInt(0)) {
3692  default:
3693  llvm_unreachable("Unexpected metadata!");
3694  break;
3698  /*DeviceID=*/getMDInt(1), /*FileID=*/getMDInt(2),
3699  /*ParentName=*/getMDString(3), /*Line=*/getMDInt(4),
3700  /*Order=*/getMDInt(5));
3701  break;
3702  }
3703  }
3704 }
3705 
3707  if (!KmpRoutineEntryPtrTy) {
3708  // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3709  auto &C = CGM.getContext();
3710  QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3712  KmpRoutineEntryPtrQTy = C.getPointerType(
3713  C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3714  KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3715  }
3716 }
3717 
3719  QualType FieldTy) {
3720  auto *Field = FieldDecl::Create(
3721  C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
3723  /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
3724  Field->setAccess(AS_public);
3725  DC->addDecl(Field);
3726  return Field;
3727 }
3728 
3730 
3731  // Make sure the type of the entry is already created. This is the type we
3732  // have to create:
3733  // struct __tgt_offload_entry{
3734  // void *addr; // Pointer to the offload entry info.
3735  // // (function or global)
3736  // char *name; // Name of the function or global.
3737  // size_t size; // Size of the entry info (0 if it a function).
3738  // int32_t flags; // Flags associated with the entry, e.g. 'link'.
3739  // int32_t reserved; // Reserved, to use by the runtime library.
3740  // };
3741  if (TgtOffloadEntryQTy.isNull()) {
3742  ASTContext &C = CGM.getContext();
3743  auto *RD = C.buildImplicitRecord("__tgt_offload_entry");
3744  RD->startDefinition();
3745  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3747  addFieldToRecordDecl(C, RD, C.getSizeType());
3749  C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3751  C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3752  RD->completeDefinition();
3754  }
3755  return TgtOffloadEntryQTy;
3756 }
3757 
3759  // These are the types we need to build:
3760  // struct __tgt_device_image{
3761  // void *ImageStart; // Pointer to the target code start.
3762  // void *ImageEnd; // Pointer to the target code end.
3763  // // We also add the host entries to the device image, as it may be useful
3764  // // for the target runtime to have access to that information.
3765  // __tgt_offload_entry *EntriesBegin; // Begin of the table with all
3766  // // the entries.
3767  // __tgt_offload_entry *EntriesEnd; // End of the table with all the
3768  // // entries (non inclusive).
3769  // };
3770  if (TgtDeviceImageQTy.isNull()) {
3771  ASTContext &C = CGM.getContext();
3772  auto *RD = C.buildImplicitRecord("__tgt_device_image");
3773  RD->startDefinition();
3774  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3775  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3778  RD->completeDefinition();
3780  }
3781  return TgtDeviceImageQTy;
3782 }
3783 
3785  // struct __tgt_bin_desc{
3786  // int32_t NumDevices; // Number of devices supported.
3787  // __tgt_device_image *DeviceImages; // Arrays of device images
3788  // // (one per device).
3789  // __tgt_offload_entry *EntriesBegin; // Begin of the table with all the
3790  // // entries.
3791  // __tgt_offload_entry *EntriesEnd; // End of the table with all the
3792  // // entries (non inclusive).
3793  // };
3795  ASTContext &C = CGM.getContext();
3796  auto *RD = C.buildImplicitRecord("__tgt_bin_desc");
3797  RD->startDefinition();
3799  C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3803  RD->completeDefinition();
3805  }
3806  return TgtBinaryDescriptorQTy;
3807 }
3808 
3809 namespace {
3810 struct PrivateHelpersTy {
3811  PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
3812  const VarDecl *PrivateElemInit)
3813  : Original(Original), PrivateCopy(PrivateCopy),
3814  PrivateElemInit(PrivateElemInit) {}
3815  const VarDecl *Original;
3816  const VarDecl *PrivateCopy;
3817  const VarDecl *PrivateElemInit;
3818 };
3819 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3820 } // anonymous namespace
3821 
3822 static RecordDecl *
3824  if (!Privates.empty()) {
3825  auto &C = CGM.getContext();
3826  // Build struct .kmp_privates_t. {
3827  // /* private vars */
3828  // };
3829  auto *RD = C.buildImplicitRecord(".kmp_privates.t");
3830  RD->startDefinition();
3831  for (auto &&Pair : Privates) {
3832  auto *VD = Pair.second.Original;
3833  auto Type = VD->getType();
3834  Type = Type.getNonReferenceType();
3835  auto *FD = addFieldToRecordDecl(C, RD, Type);
3836  if (VD->hasAttrs()) {
3837  for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3838  E(VD->getAttrs().end());
3839  I != E; ++I)
3840  FD->addAttr(*I);
3841  }
3842  }
3843  RD->completeDefinition();
3844  return RD;
3845  }
3846  return nullptr;
3847 }
3848 
3849 static RecordDecl *
3851  QualType KmpInt32Ty,
3852  QualType KmpRoutineEntryPointerQTy) {
3853  auto &C = CGM.getContext();
3854  // Build struct kmp_task_t {
3855  // void * shareds;
3856  // kmp_routine_entry_t routine;
3857  // kmp_int32 part_id;
3858  // kmp_cmplrdata_t data1;
3859  // kmp_cmplrdata_t data2;
3860  // For taskloops additional fields:
3861  // kmp_uint64 lb;
3862  // kmp_uint64 ub;
3863  // kmp_int64 st;
3864  // kmp_int32 liter;
3865  // void * reductions;
3866  // };
3867  auto *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3868  UD->startDefinition();
3869  addFieldToRecordDecl(C, UD, KmpInt32Ty);
3870  addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3871  UD->completeDefinition();
3872  QualType KmpCmplrdataTy = C.getRecordType(UD);
3873  auto *RD = C.buildImplicitRecord("kmp_task_t");
3874  RD->startDefinition();
3875  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3876  addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3877  addFieldToRecordDecl(C, RD, KmpInt32Ty);
3878  addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3879  addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3880  if (isOpenMPTaskLoopDirective(Kind)) {
3881  QualType KmpUInt64Ty =
3882  CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3883  QualType KmpInt64Ty =
3884  CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3885  addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3886  addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3887  addFieldToRecordDecl(C, RD, KmpInt64Ty);
3888  addFieldToRecordDecl(C, RD, KmpInt32Ty);
3889  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3890  }
3891  RD->completeDefinition();
3892  return RD;
3893 }
3894 
3895 static RecordDecl *
3897  ArrayRef<PrivateDataTy> Privates) {
3898  auto &C = CGM.getContext();
3899  // Build struct kmp_task_t_with_privates {
3900  // kmp_task_t task_data;
3901  // .kmp_privates_t. privates;
3902  // };
3903  auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3904  RD->startDefinition();
3905  addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3906  if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) {
3907  addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3908  }
3909  RD->completeDefinition();
3910  return RD;
3911 }
3912 
3913 /// \brief Emit a proxy function which accepts kmp_task_t as the second
3914 /// argument.
3915 /// \code
3916 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3917 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3918 /// For taskloops:
3919 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3920 /// tt->reductions, tt->shareds);
3921 /// return 0;
3922 /// }
3923 /// \endcode
3924 static llvm::Value *
3926  OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3927  QualType KmpTaskTWithPrivatesPtrQTy,
3928  QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3929  QualType SharedsPtrTy, llvm::Value *TaskFunction,
3930  llvm::Value *TaskPrivatesMap) {
3931  auto &C = CGM.getContext();
3932  FunctionArgList Args;
3933  ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3935  ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3936  KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3938  Args.push_back(&GtidArg);
3939  Args.push_back(&TaskTypeArg);
3940  auto &TaskEntryFnInfo =
3941  CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3942  auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3943  auto *TaskEntry =
3945  ".omp_task_entry.", &CGM.getModule());
3946  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskEntry, TaskEntryFnInfo);
3947  CodeGenFunction CGF(CGM);
3948  CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args);
3949 
3950  // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3951  // tt,
3952  // For taskloops:
3953  // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3954  // tt->task_data.shareds);
3955  auto *GtidParam = CGF.EmitLoadOfScalar(
3956  CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3957  LValue TDBase = CGF.EmitLoadOfPointerLValue(
3958  CGF.GetAddrOfLocalVar(&TaskTypeArg),
3959  KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3960  auto *KmpTaskTWithPrivatesQTyRD =
3961  cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3962  LValue Base =
3963  CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3964  auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3965  auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3966  auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3967  auto *PartidParam = PartIdLVal.getPointer();
3968 
3969  auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3970  auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3971  auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3972  CGF.EmitLoadOfLValue(SharedsLVal, Loc).getScalarVal(),
3973  CGF.ConvertTypeForMem(SharedsPtrTy));
3974 
3975  auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3976  llvm::Value *PrivatesParam;
3977  if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3978  auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3979  PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3980  PrivatesLVal.getPointer(), CGF.VoidPtrTy);
3981  } else
3982  PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3983 
3984  llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3985  TaskPrivatesMap,
3986  CGF.Builder
3988  TDBase.getAddress(), CGF.VoidPtrTy)
3989  .getPointer()};
3990  SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3991  std::end(CommonArgs));
3992  if (isOpenMPTaskLoopDirective(Kind)) {
3993  auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3994  auto LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3995  auto *LBParam = CGF.EmitLoadOfLValue(LBLVal, Loc).getScalarVal();
3996  auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3997  auto UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3998  auto *UBParam = CGF.EmitLoadOfLValue(UBLVal, Loc).getScalarVal();
3999  auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
4000  auto StLVal = CGF.EmitLValueForField(Base, *StFI);
4001  auto *StParam = CGF.EmitLoadOfLValue(StLVal, Loc).getScalarVal();
4002  auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4003  auto LILVal = CGF.EmitLValueForField(Base, *LIFI);
4004  auto *LIParam = CGF.EmitLoadOfLValue(LILVal, Loc).getScalarVal();
4005  auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
4006  auto RLVal = CGF.EmitLValueForField(Base, *RFI);
4007  auto *RParam = CGF.EmitLoadOfLValue(RLVal, Loc).getScalarVal();
4008  CallArgs.push_back(LBParam);
4009  CallArgs.push_back(UBParam);
4010  CallArgs.push_back(StParam);
4011  CallArgs.push_back(LIParam);
4012  CallArgs.push_back(RParam);
4013  }
4014  CallArgs.push_back(SharedsParam);
4015 
4016  CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
4017  CallArgs);
4019  RValue::get(CGF.Builder.getInt32(/*C=*/0)),
4020  CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
4021  CGF.FinishFunction();
4022  return TaskEntry;
4023 }
4024 
4026  SourceLocation Loc,
4027  QualType KmpInt32Ty,
4028  QualType KmpTaskTWithPrivatesPtrQTy,
4029  QualType KmpTaskTWithPrivatesQTy) {
4030  auto &C = CGM.getContext();
4031  FunctionArgList Args;
4032  ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4034  ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4035  KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4037  Args.push_back(&GtidArg);
4038  Args.push_back(&TaskTypeArg);
4039  auto &DestructorFnInfo =
4040  CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4041  auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo);
4042  auto *DestructorFn =
4044  ".omp_task_destructor.", &CGM.getModule());
4045  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, DestructorFn,
4046  DestructorFnInfo);
4047  CodeGenFunction CGF(CGM);
4048  CGF.disableDebugInfo();
4049  CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
4050  Args);
4051 
4053  CGF.GetAddrOfLocalVar(&TaskTypeArg),
4054  KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4055  auto *KmpTaskTWithPrivatesQTyRD =
4056  cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4057  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4058  Base = CGF.EmitLValueForField(Base, *FI);
4059  for (auto *Field :
4060  cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
4061  if (auto DtorKind = Field->getType().isDestructedType()) {
4062  auto FieldLValue = CGF.EmitLValueForField(Base, Field);
4063  CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
4064  }
4065  }
4066  CGF.FinishFunction();
4067  return DestructorFn;
4068 }
4069 
4070 /// \brief Emit a privates mapping function for correct handling of private and
4071 /// firstprivate variables.
4072 /// \code
4073 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
4074 /// **noalias priv1,..., <tyn> **noalias privn) {
4075 /// *priv1 = &.privates.priv1;
4076 /// ...;
4077 /// *privn = &.privates.privn;
4078 /// }
4079 /// \endcode
4080 static llvm::Value *
4082  ArrayRef<const Expr *> PrivateVars,
4083  ArrayRef<const Expr *> FirstprivateVars,
4084  ArrayRef<const Expr *> LastprivateVars,
4085  QualType PrivatesQTy,
4086  ArrayRef<PrivateDataTy> Privates) {
4087  auto &C = CGM.getContext();
4088  FunctionArgList Args;
4089  ImplicitParamDecl TaskPrivatesArg(
4090  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4091  C.getPointerType(PrivatesQTy).withConst().withRestrict(),
4093  Args.push_back(&TaskPrivatesArg);
4094  llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
4095  unsigned Counter = 1;
4096  for (auto *E: PrivateVars) {
4097  Args.push_back(ImplicitParamDecl::Create(
4098  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4099  C.getPointerType(C.getPointerType(E->getType()))
4100  .withConst()
4101  .withRestrict(),
4103  auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4104  PrivateVarsPos[VD] = Counter;
4105  ++Counter;
4106  }
4107  for (auto *E : FirstprivateVars) {
4108  Args.push_back(ImplicitParamDecl::Create(
4109  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4110  C.getPointerType(C.getPointerType(E->getType()))
4111  .withConst()
4112  .withRestrict(),
4114  auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4115  PrivateVarsPos[VD] = Counter;
4116  ++Counter;
4117  }
4118  for (auto *E: LastprivateVars) {
4119  Args.push_back(ImplicitParamDecl::Create(
4120  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4121  C.getPointerType(C.getPointerType(E->getType()))
4122  .withConst()
4123  .withRestrict(),
4125  auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4126  PrivateVarsPos[VD] = Counter;
4127  ++Counter;
4128  }
4129  auto &TaskPrivatesMapFnInfo =
4130  CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4131  auto *TaskPrivatesMapTy =
4132  CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
4133  auto *TaskPrivatesMap = llvm::Function::Create(
4134  TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage,
4135  ".omp_task_privates_map.", &CGM.getModule());
4136  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskPrivatesMap,
4137  TaskPrivatesMapFnInfo);
4138  TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
4139  TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
4140  TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
4141  CodeGenFunction CGF(CGM);
4142  CGF.disableDebugInfo();
4143  CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
4144  TaskPrivatesMapFnInfo, Args);
4145 
4146  // *privi = &.privates.privi;
4148  CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
4149  TaskPrivatesArg.getType()->castAs<PointerType>());
4150  auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
4151  Counter = 0;
4152  for (auto *Field : PrivatesQTyRD->fields()) {
4153  auto FieldLVal = CGF.EmitLValueForField(Base, Field);
4154  auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
4155  auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
4156  auto RefLoadLVal = CGF.EmitLoadOfPointerLValue(
4157  RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
4158  CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
4159  ++Counter;
4160  }
4161  CGF.FinishFunction();
4162  return TaskPrivatesMap;
4163 }
4164 
4165 static bool stable_sort_comparator(const PrivateDataTy P1,
4166  const PrivateDataTy P2) {
4167  return P1.first > P2.first;
4168 }
4169 
4170 /// Emit initialization for private variables in task-based directives.
4172  const OMPExecutableDirective &D,
4173  Address KmpTaskSharedsPtr, LValue TDBase,
4174  const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4175  QualType SharedsTy, QualType SharedsPtrTy,
4176  const OMPTaskDataTy &Data,
4177  ArrayRef<PrivateDataTy> Privates, bool ForDup) {
4178  auto &C = CGF.getContext();
4179  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4180  LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
4181  LValue SrcBase;
4182  bool IsTargetTask =
4185  // For target-based directives skip 3 firstprivate arrays BasePointersArray,
4186  // PointersArray and SizesArray. The original variables for these arrays are
4187  // not captured and we get their addresses explicitly.
4188  if ((!IsTargetTask && !Data.FirstprivateVars.empty()) ||
4189  (IsTargetTask && Data.FirstprivateVars.size() > 3)) {
4190  SrcBase = CGF.MakeAddrLValue(
4192  KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
4193  SharedsTy);
4194  }
4196  ? OMPD_taskloop
4197  : OMPD_task;
4199  FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
4200  for (auto &&Pair : Privates) {
4201  auto *VD = Pair.second.PrivateCopy;
4202  auto *Init = VD->getAnyInitializer();
4203  if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
4204  !CGF.isTrivialInitializer(Init)))) {
4205  LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
4206  if (auto *Elem = Pair.second.PrivateElemInit) {
4207  auto *OriginalVD = Pair.second.Original;
4208  // Check if the variable is the target-based BasePointersArray,
4209  // PointersArray or SizesArray.
4210  LValue SharedRefLValue;
4211  QualType Type = OriginalVD->getType();
4212  if (IsTargetTask && isa<ImplicitParamDecl>(OriginalVD) &&
4213  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
4214  cast<CapturedDecl>(OriginalVD->getDeclContext())->getNumParams() ==
4215  0 &&
4216  isa<TranslationUnitDecl>(
4217  cast<CapturedDecl>(OriginalVD->getDeclContext())
4218  ->getDeclContext())) {
4219  SharedRefLValue =
4220  CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
4221  } else {
4222  auto *SharedField = CapturesInfo.lookup(OriginalVD);
4223  SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
4224  SharedRefLValue = CGF.MakeAddrLValue(
4225  Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
4226  SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
4227  SharedRefLValue.getTBAAInfo());
4228  }
4229  if (Type->isArrayType()) {
4230  // Initialize firstprivate array.
4231  if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
4232  // Perform simple memcpy.
4233  CGF.EmitAggregateAssign(PrivateLValue.getAddress(),
4234  SharedRefLValue.getAddress(), Type);
4235  } else {
4236  // Initialize firstprivate array using element-by-element
4237  // initialization.
4239  PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
4240  [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
4241  Address SrcElement) {
4242  // Clean up any temporaries needed by the initialization.
4243  CodeGenFunction::OMPPrivateScope InitScope(CGF);
4244  InitScope.addPrivate(
4245  Elem, [SrcElement]() -> Address { return SrcElement; });
4246  (void)InitScope.Privatize();
4247  // Emit initialization for single element.
4249  CGF, &CapturesInfo);
4250  CGF.EmitAnyExprToMem(Init, DestElement,
4251  Init->getType().getQualifiers(),
4252  /*IsInitializer=*/false);
4253  });
4254  }
4255  } else {
4256  CodeGenFunction::OMPPrivateScope InitScope(CGF);
4257  InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
4258  return SharedRefLValue.getAddress();
4259  });
4260  (void)InitScope.Privatize();
4261  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
4262  CGF.EmitExprAsInit(Init, VD, PrivateLValue,
4263  /*capturedByInit=*/false);
4264  }
4265  } else
4266  CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
4267  }
4268  ++FI;
4269  }
4270 }
4271 
4272 /// Check if duplication function is required for taskloops.
4274  ArrayRef<PrivateDataTy> Privates) {
4275  bool InitRequired = false;
4276  for (auto &&Pair : Privates) {
4277  auto *VD = Pair.second.PrivateCopy;
4278  auto *Init = VD->getAnyInitializer();
4279  InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
4280  !CGF.isTrivialInitializer(Init));
4281  }
4282  return InitRequired;
4283 }
4284 
4285 
4286 /// Emit task_dup function (for initialization of
4287 /// private/firstprivate/lastprivate vars and last_iter flag)
4288 /// \code
4289 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
4290 /// lastpriv) {
4291 /// // setup lastprivate flag
4292 /// task_dst->last = lastpriv;
4293 /// // could be constructor calls here...
4294 /// }
4295 /// \endcode
4296 static llvm::Value *
4298  const OMPExecutableDirective &D,
4299  QualType KmpTaskTWithPrivatesPtrQTy,
4300  const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4301  const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4302  QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4303  ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4304  auto &C = CGM.getContext();
4305  FunctionArgList Args;
4306  ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4307  KmpTaskTWithPrivatesPtrQTy,
4309  ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4310  KmpTaskTWithPrivatesPtrQTy,
4312  ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4314  Args.push_back(&DstArg);
4315  Args.push_back(&SrcArg);
4316  Args.push_back(&LastprivArg);
4317  auto &TaskDupFnInfo =
4318  CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4319  auto *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4320  auto *TaskDup =
4322  ".omp_task_dup.", &CGM.getModule());
4323  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskDup, TaskDupFnInfo);
4324  CodeGenFunction CGF(CGM);
4325  CGF.disableDebugInfo();
4326  CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args);
4327 
4328  LValue TDBase = CGF.EmitLoadOfPointerLValue(
4329  CGF.GetAddrOfLocalVar(&DstArg),
4330  KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4331  // task_dst->liter = lastpriv;
4332  if (WithLastIter) {
4333  auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4335  TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4336  LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4337  llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4338  CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4339  CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4340  }
4341 
4342  // Emit initial values for private copies (if any).
4343  assert(!Privates.empty());
4344  Address KmpTaskSharedsPtr = Address::invalid();
4345  if (!Data.FirstprivateVars.empty()) {
4346  LValue TDBase = CGF.EmitLoadOfPointerLValue(
4347  CGF.GetAddrOfLocalVar(&SrcArg),
4348  KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4350  TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4351  KmpTaskSharedsPtr = Address(
4353  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4354  KmpTaskTShareds)),
4355  Loc),
4356  CGF.getNaturalTypeAlignment(SharedsTy));
4357  }
4358  emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4359  SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4360  CGF.FinishFunction();
4361  return TaskDup;
4362 }
4363 
4364 /// Checks if destructor function is required to be generated.
4365 /// \return true if cleanups are required, false otherwise.
4366 static bool
4367 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
4368  bool NeedsCleanup = false;
4369  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4370  auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
4371  for (auto *FD : PrivateRD->fields()) {
4372  NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
4373  if (NeedsCleanup)
4374  break;
4375  }
4376  return NeedsCleanup;
4377 }
4378 
4379 CGOpenMPRuntime::TaskResultTy
4381  const OMPExecutableDirective &D,
4382  llvm::Value *TaskFunction, QualType SharedsTy,
4383  Address Shareds, const OMPTaskDataTy &Data) {
4384  auto &C = CGM.getContext();
4386  // Aggregate privates and sort them by the alignment.
4387  auto I = Data.PrivateCopies.begin();
4388  for (auto *E : Data.PrivateVars) {
4389  auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4390  Privates.push_back(std::make_pair(
4391  C.getDeclAlign(VD),
4392  PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4393  /*PrivateElemInit=*/nullptr)));
4394  ++I;
4395  }
4396  I = Data.FirstprivateCopies.begin();
4397  auto IElemInitRef = Data.FirstprivateInits.begin();
4398  for (auto *E : Data.FirstprivateVars) {
4399  auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4400  Privates.push_back(std::make_pair(
4401  C.getDeclAlign(VD),
4402  PrivateHelpersTy(
4403  VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4404  cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))));
4405  ++I;
4406  ++IElemInitRef;
4407  }
4408  I = Data.LastprivateCopies.begin();
4409  for (auto *E : Data.LastprivateVars) {
4410  auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4411  Privates.push_back(std::make_pair(
4412  C.getDeclAlign(VD),
4413  PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4414  /*PrivateElemInit=*/nullptr)));
4415  ++I;
4416  }
4417  std::stable_sort(Privates.begin(), Privates.end(), stable_sort_comparator);
4418  auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4419  // Build type kmp_routine_entry_t (if not built yet).
4420  emitKmpRoutineEntryT(KmpInt32Ty);
4421  // Build type kmp_task_t (if not built yet).
4423  if (SavedKmpTaskloopTQTy.isNull()) {
4425  CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4426  }
4428  } else {
4429  assert((D.getDirectiveKind() == OMPD_task ||
4432  "Expected taskloop, task or target directive");
4433  if (SavedKmpTaskTQTy.isNull()) {
4434  SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4435  CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4436  }
4438  }
4439  auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4440  // Build particular struct kmp_task_t for the given task.
4441  auto *KmpTaskTWithPrivatesQTyRD =
4443  auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4444  QualType KmpTaskTWithPrivatesPtrQTy =
4445  C.getPointerType(KmpTaskTWithPrivatesQTy);
4446  auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4447  auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo();
4448  auto *KmpTaskTWithPrivatesTySize = CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4449  QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4450 
4451  // Emit initial values for private copies (if any).
4452  llvm::Value *TaskPrivatesMap = nullptr;
4453  auto *TaskPrivatesMapTy =
4454  std::next(cast<llvm::Function>(TaskFunction)->arg_begin(), 3)->getType();
4455  if (!Privates.empty()) {
4456  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4457  TaskPrivatesMap = emitTaskPrivateMappingFunction(
4458  CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
4459  FI->getType(), Privates);
4460  TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4461  TaskPrivatesMap, TaskPrivatesMapTy);
4462  } else {
4463  TaskPrivatesMap = llvm::ConstantPointerNull::get(
4464  cast<llvm::PointerType>(TaskPrivatesMapTy));
4465  }
4466  // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4467  // kmp_task_t *tt);
4468  auto *TaskEntry = emitProxyTaskFunction(
4469  CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4470  KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4471  TaskPrivatesMap);
4472 
4473  // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4474  // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4475  // kmp_routine_entry_t *task_entry);
4476  // Task flags. Format is taken from
4477  // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h,
4478  // description of kmp_tasking_flags struct.
4479  enum {
4480  TiedFlag = 0x1,
4481  FinalFlag = 0x2,
4482  DestructorsFlag = 0x8,
4483  PriorityFlag = 0x20
4484  };
4485  unsigned Flags = Data.Tied ? TiedFlag : 0;
4486  bool NeedsCleanup = false;
4487  if (!Privates.empty()) {
4488  NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
4489  if (NeedsCleanup)
4490  Flags = Flags | DestructorsFlag;
4491  }
4492  if (Data.Priority.getInt())
4493  Flags = Flags | PriorityFlag;
4494  auto *TaskFlags =
4495  Data.Final.getPointer()
4496  ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4497  CGF.Builder.getInt32(FinalFlag),
4498  CGF.Builder.getInt32(/*C=*/0))
4499  : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4500  TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4501  auto *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4502  llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc),
4503  getThreadID(CGF, Loc), TaskFlags,
4504  KmpTaskTWithPrivatesTySize, SharedsSize,
4506  TaskEntry, KmpRoutineEntryPtrTy)};
4507  auto *NewTask = CGF.EmitRuntimeCall(
4509  auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4510  NewTask, KmpTaskTWithPrivatesPtrTy);
4511  LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4512  KmpTaskTWithPrivatesQTy);
4513  LValue TDBase =
4514  CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4515  // Fill the data in the resulting kmp_task_t record.
4516  // Copy shareds if there are any.
4517  Address KmpTaskSharedsPtr = Address::invalid();
4518  if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4519  KmpTaskSharedsPtr =
4521  CGF.EmitLValueForField(
4522  TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4523  KmpTaskTShareds)),
4524  Loc),
4525  CGF.getNaturalTypeAlignment(SharedsTy));
4526  CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy);
4527  }
4528  // Emit initial values for private copies (if any).
4529  TaskResultTy Result;
4530  if (!Privates.empty()) {
4531  emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4532  SharedsTy, SharedsPtrTy, Data, Privates,
4533  /*ForDup=*/false);
4535  (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4536  Result.TaskDupFn = emitTaskDupFunction(
4537  CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4538  KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4539  /*WithLastIter=*/!Data.LastprivateVars.empty());
4540  }
4541  }
4542  // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4543  enum { Priority = 0, Destructors = 1 };
4544  // Provide pointer to function with destructors for privates.
4545  auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4546  auto *KmpCmplrdataUD = (*FI)->getType()->getAsUnionType()->getDecl();
4547  if (NeedsCleanup) {
4548  llvm::Value *DestructorFn = emitDestructorsFunction(
4549  CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4550  KmpTaskTWithPrivatesQTy);
4551  LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4552  LValue DestructorsLV = CGF.EmitLValueForField(
4553  Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4555  DestructorFn, KmpRoutineEntryPtrTy),
4556  DestructorsLV);
4557  }
4558  // Set priority.
4559  if (Data.Priority.getInt()) {
4560  LValue Data2LV = CGF.EmitLValueForField(
4561  TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4562  LValue PriorityLV = CGF.EmitLValueForField(
4563  Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4564  CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4565  }
4566  Result.NewTask = NewTask;
4567  Result.TaskEntry = TaskEntry;
4568  Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4569  Result.TDBase = TDBase;
4570  Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4571  return Result;
4572 }
4573 
4575  const OMPExecutableDirective &D,
4576  llvm::Value *TaskFunction,
4577  QualType SharedsTy, Address Shareds,
4578  const Expr *IfCond,
4579  const OMPTaskDataTy &Data) {
4580  if (!CGF.HaveInsertPoint())
4581  return;
4582 
4583  TaskResultTy Result =
4584  emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4585  llvm::Value *NewTask = Result.NewTask;
4586  llvm::Value *TaskEntry = Result.TaskEntry;
4587  llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4588  LValue TDBase = Result.TDBase;
4589  RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4590  auto &C = CGM.getContext();
4591  // Process list of dependences.
4592  Address DependenciesArray = Address::invalid();
4593  unsigned NumDependencies = Data.Dependences.size();
4594  if (NumDependencies) {
4595  // Dependence kind for RTL.
4596  enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3 };
4597  enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4598  RecordDecl *KmpDependInfoRD;
4599  QualType FlagsTy =
4600  C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4601  llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4602  if (KmpDependInfoTy.isNull()) {
4603  KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4604  KmpDependInfoRD->startDefinition();
4605  addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4606  addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4607  addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4608  KmpDependInfoRD->completeDefinition();
4609  KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4610  } else
4611  KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4612  CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy);
4613  // Define type kmp_depend_info[<Dependences.size()>];
4614  QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4615  KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
4616  ArrayType::Normal, /*IndexTypeQuals=*/0);
4617  // kmp_depend_info[<Dependences.size()>] deps;
4618  DependenciesArray =
4619  CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4620  for (unsigned i = 0; i < NumDependencies; ++i) {
4621  const Expr *E = Data.Dependences[i].second;
4622  auto Addr = CGF.EmitLValue(E);
4623  llvm::Value *Size;
4624  QualType Ty = E->getType();
4625  if (auto *ASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4626  LValue UpAddrLVal =
4627  CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false);
4628  llvm::Value *UpAddr =
4629  CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1);
4630  llvm::Value *LowIntPtr =
4631  CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy);
4632  llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
4633  Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4634  } else
4635  Size = CGF.getTypeSize(Ty);
4636  auto Base = CGF.MakeAddrLValue(
4637  CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize),
4638  KmpDependInfoTy);
4639  // deps[i].base_addr = &<Dependences[i].second>;
4640  auto BaseAddrLVal = CGF.EmitLValueForField(
4641  Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4642  CGF.EmitStoreOfScalar(
4643  CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy),
4644  BaseAddrLVal);
4645  // deps[i].len = sizeof(<Dependences[i].second>);
4646  auto LenLVal = CGF.EmitLValueForField(
4647  Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4648  CGF.EmitStoreOfScalar(Size, LenLVal);
4649  // deps[i].flags = <Dependences[i].first>;
4650  RTLDependenceKindTy DepKind;
4651  switch (Data.Dependences[i].first) {
4652  case OMPC_DEPEND_in:
4653  DepKind = DepIn;
4654  break;
4655  // Out and InOut dependencies must use the same code.
4656  case OMPC_DEPEND_out:
4657  case OMPC_DEPEND_inout:
4658  DepKind = DepInOut;
4659  break;
4660  case OMPC_DEPEND_source:
4661  case OMPC_DEPEND_sink:
4662  case OMPC_DEPEND_unknown:
4663  llvm_unreachable("Unknown task dependence type");
4664  }
4665  auto FlagsLVal = CGF.EmitLValueForField(
4666  Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4667  CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4668  FlagsLVal);
4669  }
4670  DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4671  CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()),
4672  CGF.VoidPtrTy);
4673  }
4674 
4675  // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
4676  // libcall.
4677  // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4678  // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4679  // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4680  // list is not empty
4681  auto *ThreadID = getThreadID(CGF, Loc);
4682  auto *UpLoc = emitUpdateLocation(CGF, Loc);
4683  llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4684  llvm::Value *DepTaskArgs[7];
4685  if (NumDependencies) {
4686  DepTaskArgs[0] = UpLoc;
4687  DepTaskArgs[1] = ThreadID;
4688  DepTaskArgs[2] = NewTask;
4689  DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
4690  DepTaskArgs[4] = DependenciesArray.getPointer();
4691  DepTaskArgs[5] = CGF.Builder.getInt32(0);
4692  DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4693  }
4694  auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies,
4695  &TaskArgs,
4696  &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4697  if (!Data.Tied) {
4698  auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4699  auto PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
4700  CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
4701  }
4702  if (NumDependencies) {
4703  CGF.EmitRuntimeCall(
4705  } else {
4706  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
4707  TaskArgs);
4708  }
4709  // Check if parent region is untied and build return for untied task;
4710  if (auto *Region =
4711  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4712  Region->emitUntiedSwitch(CGF);
4713  };
4714 
4715  llvm::Value *DepWaitTaskArgs[6];
4716  if (NumDependencies) {
4717  DepWaitTaskArgs[0] = UpLoc;
4718  DepWaitTaskArgs[1] = ThreadID;
4719  DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
4720  DepWaitTaskArgs[3] = DependenciesArray.getPointer();
4721  DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
4722  DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4723  }
4724  auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
4725  NumDependencies, &DepWaitTaskArgs,
4726  Loc](CodeGenFunction &CGF, PrePostActionTy &) {
4727  auto &RT = CGF.CGM.getOpenMPRuntime();
4728  CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4729  // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4730  // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4731  // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4732  // is specified.
4733  if (NumDependencies)
4734  CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
4735  DepWaitTaskArgs);
4736  // Call proxy_task_entry(gtid, new_task);
4737  auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
4738  Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
4739  Action.Enter(CGF);
4740  llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4741  CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
4742  OutlinedFnArgs);
4743  };
4744 
4745  // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4746  // kmp_task_t *new_task);
4747  // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4748  // kmp_task_t *new_task);
4749  RegionCodeGenTy RCG(CodeGen);
4750  CommonActionTy Action(
4751  RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs,
4752  RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs);
4753  RCG.setAction(Action);
4754  RCG(CGF);
4755  };
4756 
4757  if (IfCond)
4758  emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
4759  else {
4760  RegionCodeGenTy ThenRCG(ThenCodeGen);
4761  ThenRCG(CGF);
4762  }
4763 }
4764 
4766  const OMPLoopDirective &D,
4767  llvm::Value *TaskFunction,
4768  QualType SharedsTy, Address Shareds,
4769  const Expr *IfCond,
4770  const OMPTaskDataTy &Data) {
4771  if (!CGF.HaveInsertPoint())
4772  return;
4773  TaskResultTy Result =
4774  emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4775  // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
4776  // libcall.
4777  // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4778  // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4779  // sched, kmp_uint64 grainsize, void *task_dup);
4780  llvm::Value *ThreadID = getThreadID(CGF, Loc);
4781  llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4782  llvm::Value *IfVal;
4783  if (IfCond) {
4784  IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
4785  /*isSigned=*/true);
4786  } else
4787  IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
4788 
4789  LValue LBLVal = CGF.EmitLValueForField(
4790  Result.TDBase,
4791  *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
4792  auto *LBVar =
4793  cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
4794  CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
4795  /*IsInitializer=*/true);
4796  LValue UBLVal = CGF.EmitLValueForField(
4797  Result.TDBase,
4798  *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
4799  auto *UBVar =
4800  cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
4801  CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
4802  /*IsInitializer=*/true);
4803  LValue StLVal = CGF.EmitLValueForField(
4804  Result.TDBase,
4805  *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
4806  auto *StVar =
4807  cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
4808  CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
4809  /*IsInitializer=*/true);
4810  // Store reductions address.
4811  LValue RedLVal = CGF.EmitLValueForField(
4812  Result.TDBase,
4813  *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
4814  if (Data.Reductions)
4815  CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
4816  else {
4817  CGF.EmitNullInitialization(RedLVal.getAddress(),
4818  CGF.getContext().VoidPtrTy);
4819  }
4820  enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4821  llvm::Value *TaskArgs[] = {
4822  UpLoc,
4823  ThreadID,
4824  Result.NewTask,
4825  IfVal,
4826  LBLVal.getPointer(),
4827  UBLVal.getPointer(),
4828  CGF.EmitLoadOfScalar(StLVal, SourceLocation()),
4829  llvm::ConstantInt::getNullValue(
4830  CGF.IntTy), // Always 0 because taskgroup emitted by the compiler
4831  llvm::ConstantInt::getSigned(
4832  CGF.IntTy, Data.Schedule.getPointer()
4833  ? Data.Schedule.getInt() ? NumTasks : Grainsize
4834  : NoSchedule),
4835  Data.Schedule.getPointer()
4836  ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
4837  /*isSigned=*/false)
4838  : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
4840  Result.TaskDupFn, CGF.VoidPtrTy)
4841  : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
4843 }
4844 
4845 /// \brief Emit reduction operation for each element of array (required for
4846 /// array sections) LHS op = RHS.
4847 /// \param Type Type of array.
4848 /// \param LHSVar Variable on the left side of the reduction operation
4849 /// (references element of array in original variable).
4850 /// \param RHSVar Variable on the right side of the reduction operation
4851 /// (references element of array in original variable).
4852 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
4853 /// RHSVar.
4855  CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4856  const VarDecl *RHSVar,
4857  const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4858  const Expr *, const Expr *)> &RedOpGen,
4859  const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4860  const Expr *UpExpr = nullptr) {
4861  // Perform element-by-element initialization.
4862  QualType ElementTy;
4863  Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
4864  Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
4865 
4866  // Drill down to the base element type on both arrays.
4867  auto ArrayTy = Type->getAsArrayTypeUnsafe();
4868  auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
4869 
4870  auto RHSBegin = RHSAddr.getPointer();
4871  auto LHSBegin = LHSAddr.getPointer();
4872  // Cast from pointer to array type to pointer to single element.
4873  auto LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
4874  // The basic structure here is a while-do loop.
4875  auto BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
4876  auto DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
4877  auto IsEmpty =
4878  CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
4879  CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4880 
4881  // Enter the loop body, making that address the current address.
4882  auto EntryBB = CGF.Builder.GetInsertBlock();
4883  CGF.EmitBlock(BodyBB);
4884 
4885  CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
4886 
4887  llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4888  RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
4889  RHSElementPHI->addIncoming(RHSBegin, EntryBB);
4890  Address RHSElementCurrent =
4891  Address(RHSElementPHI,
4892  RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4893 
4894  llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4895  LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
4896  LHSElementPHI->addIncoming(LHSBegin, EntryBB);
4897  Address LHSElementCurrent =
4898  Address(LHSElementPHI,
4899  LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4900 
4901  // Emit copy.
4903  Scope.addPrivate(LHSVar, [=]() -> Address { return LHSElementCurrent; });
4904  Scope.addPrivate(RHSVar, [=]() -> Address { return RHSElementCurrent; });
4905  Scope.Privatize();
4906  RedOpGen(CGF, XExpr, EExpr, UpExpr);
4907  Scope.ForceCleanup();
4908 
4909  // Shift the address forward by one element.
4910  auto LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4911  LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
4912  auto RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4913  RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
4914  // Check whether we've reached the end.
4915  auto Done =
4916  CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
4917  CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
4918  LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
4919  RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
4920 
4921  // Done.
4922  CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4923 }
4924 
4925 /// Emit reduction combiner. If the combiner is a simple expression emit it as
4926 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4927 /// UDR combiner function.
4929  const Expr *ReductionOp) {
4930  if (auto *CE = dyn_cast<CallExpr>(ReductionOp))
4931  if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
4932  if (auto *DRE =
4933  dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
4934  if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
4935  std::pair<llvm::Function *, llvm::Function *> Reduction =
4936  CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
4937  RValue Func = RValue::get(Reduction.first);
4938  CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
4939  CGF.EmitIgnoredExpr(ReductionOp);
4940  return;
4941  }
4942  CGF.EmitIgnoredExpr(ReductionOp);
4943 }
4944 
4946  CodeGenModule &CGM, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
4947  ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
4948  ArrayRef<const Expr *> ReductionOps) {
4949  auto &C = CGM.getContext();
4950 
4951  // void reduction_func(void *LHSArg, void *RHSArg);
4952  FunctionArgList Args;
4953  ImplicitParamDecl LHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
4954  ImplicitParamDecl RHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
4955  Args.push_back(&LHSArg);
4956  Args.push_back(&RHSArg);
4957  auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4958  auto *Fn = llvm::Function::Create(
4960  ".omp.reduction.reduction_func", &CGM.getModule());
4961  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
4962  CodeGenFunction CGF(CGM);
4963  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
4964 
4965  // Dst = (void*[n])(LHSArg);
4966  // Src = (void*[n])(RHSArg);
4968  CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
4969  ArgsType), CGF.getPointerAlign());
4971  CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
4972  ArgsType), CGF.getPointerAlign());
4973 
4974  // ...
4975  // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
4976  // ...
4978  auto IPriv = Privates.begin();
4979  unsigned Idx = 0;
4980  for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
4981  auto RHSVar = cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
4982  Scope.addPrivate(RHSVar, [&]() -> Address {
4983  return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
4984  });
4985  auto LHSVar = cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
4986  Scope.addPrivate(LHSVar, [&]() -> Address {
4987  return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
4988  });
4989  QualType PrivTy = (*IPriv)->getType();
4990  if (PrivTy->isVariablyModifiedType()) {
4991  // Get array size and emit VLA type.
4992  ++Idx;
4993  Address Elem =
4994  CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize());
4995  llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
4996  auto *VLA = CGF.getContext().getAsVariableArrayType(PrivTy);
4997  auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
4999  CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5000  CGF.EmitVariablyModifiedType(PrivTy);
5001  }
5002  }
5003  Scope.Privatize();
5004  IPriv = Privates.begin();
5005  auto ILHS = LHSExprs.begin();
5006  auto IRHS = RHSExprs.begin();
5007  for (auto *E : ReductionOps) {
5008  if ((*IPriv)->getType()->isArrayType()) {
5009  // Emit reduction for array section.
5010  auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5011  auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5013  CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5014  [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5015  emitReductionCombiner(CGF, E);
5016  });
5017  } else
5018  // Emit reduction for array subscript or single variable.
5019  emitReductionCombiner(CGF, E);
5020  ++IPriv;
5021  ++ILHS;
5022  ++IRHS;
5023  }
5024  Scope.ForceCleanup();
5025  CGF.FinishFunction();
5026  return Fn;
5027 }
5028 
5030  const Expr *ReductionOp,
5031  const Expr *PrivateRef,
5032  const DeclRefExpr *LHS,
5033  const DeclRefExpr *RHS) {
5034  if (PrivateRef->getType()->isArrayType()) {
5035  // Emit reduction for array section.
5036  auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5037  auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5039  CGF, PrivateRef->getType(), LHSVar, RHSVar,
5040  [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5041  emitReductionCombiner(CGF, ReductionOp);
5042  });
5043  } else
5044  // Emit reduction for array subscript or single variable.
5045  emitReductionCombiner(CGF, ReductionOp);
5046 }
5047 
5049  ArrayRef<const Expr *> Privates,
5050  ArrayRef<const Expr *> LHSExprs,
5051  ArrayRef<const Expr *> RHSExprs,
5052  ArrayRef<const Expr *> ReductionOps,
5053  ReductionOptionsTy Options) {
5054  if (!CGF.HaveInsertPoint())
5055  return;
5056 
5057  bool WithNowait = Options.WithNowait;
5058  bool SimpleReduction = Options.SimpleReduction;
5059 
5060  // Next code should be emitted for reduction:
5061  //
5062  // static kmp_critical_name lock = { 0 };
5063  //
5064  // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5065  // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5066  // ...
5067  // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5068  // *(Type<n>-1*)rhs[<n>-1]);
5069  // }
5070  //
5071  // ...
5072  // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5073  // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5074  // RedList, reduce_func, &<lock>)) {
5075  // case 1:
5076  // ...
5077  // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5078  // ...
5079  // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5080  // break;
5081  // case 2:
5082  // ...
5083  // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5084  // ...
5085  // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5086  // break;
5087  // default:;
5088  // }
5089  //
5090  // if SimpleReduction is true, only the next code is generated:
5091  // ...
5092  // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5093  // ...
5094 
5095  auto &C = CGM.getContext();
5096 
5097  if (SimpleReduction) {
5099  auto IPriv = Privates.begin();
5100  auto ILHS = LHSExprs.begin();
5101  auto IRHS = RHSExprs.begin();
5102  for (auto *E : ReductionOps) {
5103  emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5104  cast<DeclRefExpr>(*IRHS));
5105  ++IPriv;
5106  ++ILHS;
5107  ++IRHS;
5108  }
5109  return;
5110  }
5111 
5112  // 1. Build a list of reduction variables.
5113  // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5114  auto Size = RHSExprs.size();
5115  for (auto *E : Privates) {
5116  if (E->getType()->isVariablyModifiedType())
5117  // Reserve place for array size.
5118  ++Size;
5119  }
5120  llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5121  QualType ReductionArrayTy =
5122  C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
5123  /*IndexTypeQuals=*/0);
5124  Address ReductionList =
5125  CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5126  auto IPriv = Privates.begin();
5127  unsigned Idx = 0;
5128  for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5129  Address Elem =
5130  CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, CGF.getPointerSize());
5131  CGF.Builder.CreateStore(
5133  CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
5134  Elem);
5135  if ((*IPriv)->getType()->isVariablyModifiedType()) {
5136  // Store array size.
5137  ++Idx;
5138  Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx,
5139  CGF.getPointerSize());
5140  llvm::Value *Size = CGF.Builder.CreateIntCast(
5141  CGF.getVLASize(
5142  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5143  .first,
5144  CGF.SizeTy, /*isSigned=*/false);
5145  CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5146  Elem);
5147  }
5148  }
5149 
5150  // 2. Emit reduce_func().
5151  auto *ReductionFn = emitReductionFunction(
5152  CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5153  LHSExprs, RHSExprs, ReductionOps);
5154 
5155  // 3. Create static kmp_critical_name lock = { 0 };
5156  auto *Lock = getCriticalRegionLock(".reduction");
5157 
5158  // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5159  // RedList, reduce_func, &<lock>);
5160  auto *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5161  auto *ThreadId = getThreadID(CGF, Loc);
5162  auto *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5164  ReductionList.getPointer(), CGF.VoidPtrTy);
5165  llvm::Value *Args[] = {
5166  IdentTLoc, // ident_t *<loc>
5167  ThreadId, // i32 <gtid>
5168  CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5169  ReductionArrayTySize, // size_type sizeof(RedList)
5170  RL, // void *RedList
5171  ReductionFn, // void (*) (void *, void *) <reduce_func>
5172  Lock // kmp_critical_name *&<lock>
5173  };
5174  auto Res = CGF.EmitRuntimeCall(
5177  Args);
5178 
5179  // 5. Build switch(res)
5180  auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5181  auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5182 
5183  // 6. Build case 1:
5184  // ...
5185  // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5186  // ...
5187  // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5188  // break;
5189  auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5190  SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5191  CGF.EmitBlock(Case1BB);
5192 
5193  // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5194  llvm::Value *EndArgs[] = {
5195  IdentTLoc, // ident_t *<loc>
5196  ThreadId, // i32 <gtid>
5197  Lock // kmp_critical_name *&<lock>
5198  };
5199  auto &&CodeGen = [&Privates, &LHSExprs, &RHSExprs, &ReductionOps](
5200  CodeGenFunction &CGF, PrePostActionTy &Action) {
5201  auto &RT = CGF.CGM.getOpenMPRuntime();
5202  auto IPriv = Privates.begin();
5203  auto ILHS = LHSExprs.begin();
5204  auto IRHS = RHSExprs.begin();
5205  for (auto *E : ReductionOps) {
5206  RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5207  cast<DeclRefExpr>(*IRHS));
5208  ++IPriv;
5209  ++ILHS;
5210  ++IRHS;
5211  }
5212  };
5213  RegionCodeGenTy RCG(CodeGen);
5214  CommonActionTy Action(
5215  nullptr, llvm::None,
5218  EndArgs);
5219  RCG.setAction(Action);
5220  RCG(CGF);
5221 
5222  CGF.EmitBranch(DefaultBB);
5223 
5224  // 7. Build case 2:
5225  // ...
5226  // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5227  // ...
5228  // break;
5229  auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5230  SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5231  CGF.EmitBlock(Case2BB);
5232 
5233  auto &&AtomicCodeGen = [Loc, &Privates, &LHSExprs, &RHSExprs, &ReductionOps](
5234  CodeGenFunction &CGF, PrePostActionTy &Action) {
5235  auto ILHS = LHSExprs.begin();
5236  auto IRHS = RHSExprs.begin();
5237  auto IPriv = Privates.begin();
5238  for (auto *E : ReductionOps) {
5239  const Expr *XExpr = nullptr;
5240  const Expr *EExpr = nullptr;
5241  const Expr *UpExpr = nullptr;
5242  BinaryOperatorKind BO = BO_Comma;
5243  if (auto *BO = dyn_cast<BinaryOperator>(E)) {
5244  if (BO->getOpcode() == BO_Assign) {
5245  XExpr = BO->getLHS();
5246  UpExpr = BO->getRHS();
5247  }
5248  }
5249  // Try to emit update expression as a simple atomic.
5250  auto *RHSExpr = UpExpr;
5251  if (RHSExpr) {
5252  // Analyze RHS part of the whole expression.
5253  if (auto *ACO = dyn_cast<AbstractConditionalOperator>(
5254  RHSExpr->IgnoreParenImpCasts())) {
5255  // If this is a conditional operator, analyze its condition for
5256  // min/max reduction operator.
5257  RHSExpr = ACO->getCond();
5258  }
5259  if (auto *BORHS =
5260  dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5261  EExpr = BORHS->getRHS();
5262  BO = BORHS->getOpcode();
5263  }
5264  }
5265  if (XExpr) {
5266  auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5267  auto &&AtomicRedGen = [BO, VD,
5268  Loc](CodeGenFunction &CGF, const Expr *XExpr,
5269  const Expr *EExpr, const Expr *UpExpr) {
5270  LValue X = CGF.EmitLValue(XExpr);
5271  RValue E;
5272  if (EExpr)
5273  E = CGF.EmitAnyExpr(EExpr);
5274  CGF.EmitOMPAtomicSimpleUpdateExpr(
5275  X, E, BO, /*IsXLHSInRHSPart=*/true,
5276  llvm::AtomicOrdering::Monotonic, Loc,
5277  [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5278  CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5279  PrivateScope.addPrivate(
5280  VD, [&CGF, VD, XRValue, Loc]() -> Address {
5281  Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5282  CGF.emitOMPSimpleStore(
5283  CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5284  VD->getType().getNonReferenceType(), Loc);
5285  return LHSTemp;
5286  });
5287  (void)PrivateScope.Privatize();
5288  return CGF.EmitAnyExpr(UpExpr);
5289  });
5290  };
5291  if ((*IPriv)->getType()->isArrayType()) {
5292  // Emit atomic reduction for array section.
5293  auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5294  EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5295  AtomicRedGen, XExpr, EExpr, UpExpr);
5296  } else
5297  // Emit atomic reduction for array subscript or single variable.
5298  AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5299  } else {
5300  // Emit as a critical region.
5301  auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5302  const Expr *, const Expr *) {
5303  auto &RT = CGF.CGM.getOpenMPRuntime();
5304  RT.emitCriticalRegion(
5305  CGF, ".atomic_reduction",
5306  [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5307  Action.Enter(CGF);
5308  emitReductionCombiner(CGF, E);
5309  },
5310  Loc);
5311  };
5312  if ((*IPriv)->getType()->isArrayType()) {
5313  auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5314  auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5315  EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5316  CritRedGen);
5317  } else
5318  CritRedGen(CGF, nullptr, nullptr, nullptr);
5319  }
5320  ++ILHS;
5321  ++IRHS;
5322  ++IPriv;
5323  }
5324  };
5325  RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5326  if (!WithNowait) {
5327  // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5328  llvm::Value *EndArgs[] = {
5329  IdentTLoc, // ident_t *<loc>
5330  ThreadId, // i32 <gtid>
5331  Lock // kmp_critical_name *&<lock>
5332  };
5333  CommonActionTy Action(nullptr, llvm::None,
5334  createRuntimeFunction(OMPRTL__kmpc_end_reduce),
5335  EndArgs);
5336  AtomicRCG.setAction(Action);
5337  AtomicRCG(CGF);
5338  } else
5339  AtomicRCG(CGF);
5340 
5341  CGF.EmitBranch(DefaultBB);
5342  CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5343 }
5344 
5345 /// Generates unique name for artificial threadprivate variables.
5346 /// Format is: <Prefix> "." <Loc_raw_encoding> "_" <N>
5347 static std::string generateUniqueName(StringRef Prefix, SourceLocation Loc,
5348  unsigned N) {
5349  SmallString<256> Buffer;
5350  llvm::raw_svector_ostream Out(Buffer);
5351  Out << Prefix << "." << Loc.getRawEncoding() << "_" << N;
5352  return Out.str();
5353 }
5354 
5355 /// Emits reduction initializer function:
5356 /// \code
5357 /// void @.red_init(void* %arg) {
5358 /// %0 = bitcast void* %arg to <type>*
5359 /// store <type> <init>, <type>* %0
5360 /// ret void
5361 /// }
5362 /// \endcode
5364  SourceLocation Loc,
5365  ReductionCodeGen &RCG, unsigned N) {
5366  auto &C = CGM.getContext();
5367  FunctionArgList Args;
5368  ImplicitParamDecl Param(C, C.VoidPtrTy, ImplicitParamDecl::Other);
5369  Args.emplace_back(&Param);
5370  auto &FnInfo =
5371  CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5372  auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5374  ".red_init.", &CGM.getModule());
5375  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
5376  CodeGenFunction CGF(CGM);
5377  CGF.disableDebugInfo();
5378  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args);
5379  Address PrivateAddr = CGF.EmitLoadOfPointer(
5380  CGF.GetAddrOfLocalVar(&Param),
5381  C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5382  llvm::Value *Size = nullptr;
5383  // If the size of the reduction item is non-constant, load it from global
5384  // threadprivate variable.
5385  if (RCG.getSizes(N).second) {
5386  Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5387  CGF, CGM.getContext().getSizeType(),
5388  generateUniqueName("reduction_size", Loc, N));
5389  Size =
5390  CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5392  }
5393  RCG.emitAggregateType(CGF, N, Size);
5394  LValue SharedLVal;
5395  // If initializer uses initializer from declare reduction construct, emit a
5396  // pointer to the address of the original reduction item (reuired by reduction
5397  // initializer)
5398  if (RCG.usesReductionInitializer(N)) {
5399  Address SharedAddr =
5400  CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5401  CGF, CGM.getContext().VoidPtrTy,
5402  generateUniqueName("reduction", Loc, N));
5403  SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
5404  } else {
5405  SharedLVal = CGF.MakeNaturalAlignAddrLValue(
5406  llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
5407  CGM.getContext().VoidPtrTy);
5408  }
5409  // Emit the initializer:
5410  // %0 = bitcast void* %arg to <type>*
5411  // store <type> <init>, <type>* %0
5412  RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal,
5413  [](CodeGenFunction &) { return false; });
5414  CGF.FinishFunction();
5415  return Fn;
5416 }
5417 
5418 /// Emits reduction combiner function:
5419 /// \code
5420 /// void @.red_comb(void* %arg0, void* %arg1) {
5421 /// %lhs = bitcast void* %arg0 to <type>*
5422 /// %rhs = bitcast void* %arg1 to <type>*
5423 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5424 /// store <type> %2, <type>* %lhs
5425 /// ret void
5426 /// }
5427 /// \endcode
5429  SourceLocation Loc,
5430  ReductionCodeGen &RCG, unsigned N,
5431  const Expr *ReductionOp,
5432  const Expr *LHS, const Expr *RHS,
5433  const Expr *PrivateRef) {
5434  auto &C = CGM.getContext();
5435  auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5436  auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5437  FunctionArgList Args;
5438  ImplicitParamDecl ParamInOut(C, C.VoidPtrTy, ImplicitParamDecl::Other);
5439  ImplicitParamDecl ParamIn(C, C.VoidPtrTy, ImplicitParamDecl::Other);
5440  Args.emplace_back(&ParamInOut);
5441  Args.emplace_back(&ParamIn);
5442  auto &FnInfo =
5443  CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5444  auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5446  ".red_comb.", &CGM.getModule());
5447  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
5448  CodeGenFunction CGF(CGM);
5449  CGF.disableDebugInfo();
5450  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args);
5451  llvm::Value *Size = nullptr;
5452  // If the size of the reduction item is non-constant, load it from global
5453  // threadprivate variable.
5454  if (RCG.getSizes(N).second) {
5455  Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5456  CGF, CGM.getContext().getSizeType(),
5457  generateUniqueName("reduction_size", Loc, N));
5458  Size =
5459  CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5461  }
5462  RCG.emitAggregateType(CGF, N, Size);
5463  // Remap lhs and rhs variables to the addresses of the function arguments.
5464  // %lhs = bitcast void* %arg0 to <type>*
5465  // %rhs = bitcast void* %arg1 to <type>*
5466  CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5467  PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() -> Address {
5468  // Pull out the pointer to the variable.
5469  Address PtrAddr = CGF.EmitLoadOfPointer(
5470  CGF.GetAddrOfLocalVar(&ParamInOut),
5471  C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5472  return CGF.Builder.CreateElementBitCast(
5473  PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
5474  });
5475  PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() -> Address {
5476  // Pull out the pointer to the variable.
5477  Address PtrAddr = CGF.EmitLoadOfPointer(
5478  CGF.GetAddrOfLocalVar(&ParamIn),
5479  C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5480  return CGF.Builder.CreateElementBitCast(
5481  PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
5482  });
5483  PrivateScope.Privatize();
5484  // Emit the combiner body:
5485  // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5486  // store <type> %2, <type>* %lhs
5487  CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5488  CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5489  cast<DeclRefExpr>(RHS));
5490  CGF.FinishFunction();
5491  return Fn;
5492 }
5493 
5494 /// Emits reduction finalizer function:
5495 /// \code
5496 /// void @.red_fini(void* %arg) {
5497 /// %0 = bitcast void* %arg to <type>*
5498 /// <destroy>(<type>* %0)
5499 /// ret void
5500 /// }
5501 /// \endcode
5503  SourceLocation Loc,
5504  ReductionCodeGen &RCG, unsigned N) {
5505  if (!RCG.needCleanups(N))
5506  return nullptr;
5507  auto &C = CGM.getContext();
5508  FunctionArgList Args;
5509  ImplicitParamDecl Param(C, C.VoidPtrTy, ImplicitParamDecl::Other);
5510  Args.emplace_back(&Param);
5511  auto &FnInfo =
5512  CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5513  auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5515  ".red_fini.", &CGM.getModule());
5516  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
5517  CodeGenFunction CGF(CGM);
5518  CGF.disableDebugInfo();
5519  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args);
5520  Address PrivateAddr = CGF.EmitLoadOfPointer(
5521  CGF.GetAddrOfLocalVar(&Param),
5522  C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5523  llvm::Value *Size = nullptr;
5524  // If the size of the reduction item is non-constant, load it from global
5525  // threadprivate variable.
5526  if (RCG.getSizes(N).second) {
5527  Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5528  CGF, CGM.getContext().getSizeType(),
5529  generateUniqueName("reduction_size", Loc, N));
5530  Size =
5531  CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5533  }
5534  RCG.emitAggregateType(CGF, N, Size);
5535  // Emit the finalizer body:
5536  // <destroy>(<type>* %0)
5537  RCG.emitCleanups(CGF, N, PrivateAddr);
5538  CGF.FinishFunction();
5539  return Fn;
5540 }
5541 
5543  CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
5544  ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5545  if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5546  return nullptr;
5547 
5548  // Build typedef struct:
5549  // kmp_task_red_input {
5550  // void *reduce_shar; // shared reduction item
5551  // size_t reduce_size; // size of data item
5552  // void *reduce_init; // data initialization routine
5553  // void *reduce_fini; // data finalization routine
5554  // void *reduce_comb; // data combiner routine
5555  // kmp_task_red_flags_t flags; // flags for additional info from compiler
5556  // } kmp_task_red_input_t;
5557  ASTContext &C = CGM.getContext();
5558  auto *RD = C.buildImplicitRecord("kmp_task_red_input_t");
5559  RD->startDefinition();
5560  const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5561  const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5562  const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5563  const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5564  const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5565  const FieldDecl *FlagsFD = addFieldToRecordDecl(
5566  C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5567  RD->completeDefinition();
5568  QualType RDType = C.getRecordType(RD);
5569  unsigned Size = Data.ReductionVars.size();
5570  llvm::APInt ArraySize(/*numBits=*/64, Size);
5571  QualType ArrayRDType = C.getConstantArrayType(
5572  RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0);
5573  // kmp_task_red_input_t .rd_input.[Size];
5574  Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
5576  Data.ReductionOps);
5577  for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5578  // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5579  llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
5580  llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
5582  TaskRedInput.getPointer(), Idxs,
5583  /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5584  ".rd_input.gep.");
5585  LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
5586  // ElemLVal.reduce_shar = &Shareds[Cnt];
5587  LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
5588  RCG.emitSharedLValue(CGF, Cnt);
5589  llvm::Value *CastedShared =
5591  CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
5592  RCG.emitAggregateType(CGF, Cnt);
5593  llvm::Value *SizeValInChars;
5594  llvm::Value *SizeVal;
5595  std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
5596  // We use delayed creation/initialization for VLAs, array sections and
5597  // custom reduction initializations. It is required because runtime does not
5598  // provide the way to pass the sizes of VLAs/array sections to
5599  // initializer/combiner/finalizer functions and does not pass the pointer to
5600  // original reduction item to the initializer. Instead threadprivate global
5601  // variables are used to store these values and use them in the functions.
5602  bool DelayedCreation = !!SizeVal;
5603  SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
5604  /*isSigned=*/false);
5605  LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
5606  CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
5607  // ElemLVal.reduce_init = init;
5608  LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
5609  llvm::Value *InitAddr =
5610  CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
5611  CGF.EmitStoreOfScalar(InitAddr, InitLVal);
5612  DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt);
5613  // ElemLVal.reduce_fini = fini;
5614  LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
5615  llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
5616  llvm::Value *FiniAddr = Fini
5617  ? CGF.EmitCastToVoidPtr(Fini)
5618  : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
5619  CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
5620  // ElemLVal.reduce_comb = comb;
5621  LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
5623  CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
5624  RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
5625  CGF.EmitStoreOfScalar(CombAddr, CombLVal);
5626  // ElemLVal.flags = 0;
5627  LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
5628  if (DelayedCreation) {
5629  CGF.EmitStoreOfScalar(
5630  llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*IsSigned=*/true),
5631  FlagsLVal);
5632  } else
5633  CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
5634  }
5635  // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void
5636  // *data);
5637  llvm::Value *Args[] = {
5638  CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
5639  /*isSigned=*/true),
5640  llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5642  CGM.VoidPtrTy)};
5643  return CGF.EmitRuntimeCall(
5645 }
5646 
5648  SourceLocation Loc,
5649  ReductionCodeGen &RCG,
5650  unsigned N) {
5651  auto Sizes = RCG.getSizes(N);
5652  // Emit threadprivate global variable if the type is non-constant
5653  // (Sizes.second = nullptr).
5654  if (Sizes.second) {
5655  llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
5656  /*isSigned=*/false);
5658  CGF, CGM.getContext().getSizeType(),
5659  generateUniqueName("reduction_size", Loc, N));
5660  CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
5661  }
5662  // Store address of the original reduction item if custom initializer is used.
5663  if (RCG.usesReductionInitializer(N)) {
5665  CGF, CGM.getContext().VoidPtrTy,
5666  generateUniqueName("reduction", Loc, N));
5667  CGF.Builder.CreateStore(
5669  RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy),
5670  SharedAddr, /*IsVolatile=*/false);
5671  }
5672 }
5673 
5675  SourceLocation Loc,
5676  llvm::Value *ReductionsPtr,
5677  LValue SharedLVal) {
5678  // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
5679  // *d);
5680  llvm::Value *Args[] = {
5681  CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
5682  /*isSigned=*/true),
5683  ReductionsPtr,
5685  CGM.VoidPtrTy)};
5686  return Address(
5687  CGF.EmitRuntimeCall(
5689  SharedLVal.getAlignment());
5690 }
5691 
5693  SourceLocation Loc) {
5694  if (!CGF.HaveInsertPoint())
5695  return;
5696  // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
5697  // global_tid);
5698  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
5699  // Ignore return result until untied tasks are supported.
5701  if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5702  Region->emitUntiedSwitch(CGF);
5703 }
5704 
5706  OpenMPDirectiveKind InnerKind,
5707  const RegionCodeGenTy &CodeGen,
5708  bool HasCancel) {
5709  if (!CGF.HaveInsertPoint())
5710  return;
5711  InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
5712  CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
5713 }
5714 
5715 namespace {
5717  CancelNoreq = 0,
5718  CancelParallel = 1,
5719  CancelLoop = 2,
5720  CancelSections = 3,
5721  CancelTaskgroup = 4
5722 };
5723 } // anonymous namespace
5724 
5726  RTCancelKind CancelKind = CancelNoreq;
5727  if (CancelRegion == OMPD_parallel)
5728  CancelKind = CancelParallel;
5729  else if (CancelRegion == OMPD_for)
5730  CancelKind = CancelLoop;
5731  else if (CancelRegion == OMPD_sections)
5732  CancelKind = CancelSections;
5733  else {
5734  assert(CancelRegion == OMPD_taskgroup);
5735  CancelKind = CancelTaskgroup;
5736  }
5737  return CancelKind;
5738 }
5739 
5741  CodeGenFunction &CGF, SourceLocation Loc,
5742  OpenMPDirectiveKind CancelRegion) {
5743  if (!CGF.HaveInsertPoint())
5744  return;
5745  // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
5746  // global_tid, kmp_int32 cncl_kind);
5747  if (auto *OMPRegionInfo =
5748  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
5749  // For 'cancellation point taskgroup', the task region info may not have a
5750  // cancel. This may instead happen in another adjacent task.
5751  if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
5752  llvm::Value *Args[] = {
5753  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
5754  CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5755  // Ignore return result until untied tasks are supported.
5756  auto *Result = CGF.EmitRuntimeCall(
5758  // if (__kmpc_cancellationpoint()) {
5759  // exit from construct;
5760  // }
5761  auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
5762  auto *ContBB = CGF.createBasicBlock(".cancel.continue");
5763  auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
5764  CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
5765  CGF.EmitBlock(ExitBB);
5766  // exit from construct;
5767  auto CancelDest =
5768  CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5769  CGF.EmitBranchThroughCleanup(CancelDest);
5770  CGF.EmitBlock(ContBB, /*IsFinished=*/true);
5771  }
5772  }
5773 }
5774 
5776  const Expr *IfCond,
5777  OpenMPDirectiveKind CancelRegion) {
5778  if (!CGF.HaveInsertPoint())
5779  return;
5780  // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
5781  // kmp_int32 cncl_kind);
5782  if (auto *OMPRegionInfo =
5783  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
5784  auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF,
5785  PrePostActionTy &) {
5786  auto &RT = CGF.CGM.getOpenMPRuntime();
5787  llvm::Value *Args[] = {
5788  RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
5789  CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5790  // Ignore return result until untied tasks are supported.
5791  auto *Result = CGF.EmitRuntimeCall(
5792  RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
5793  // if (__kmpc_cancel()) {
5794  // exit from construct;
5795  // }
5796  auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
5797  auto *ContBB = CGF.createBasicBlock(".cancel.continue");
5798  auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
5799  CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
5800  CGF.EmitBlock(ExitBB);
5801  // exit from construct;
5802  auto CancelDest =
5803  CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5804  CGF.EmitBranchThroughCleanup(CancelDest);
5805  CGF.EmitBlock(ContBB, /*IsFinished=*/true);
5806  };
5807  if (IfCond)
5808  emitOMPIfClause(CGF, IfCond, ThenGen,
5809  [](CodeGenFunction &, PrePostActionTy &) {});
5810  else {
5811  RegionCodeGenTy ThenRCG(ThenGen);
5812  ThenRCG(CGF);
5813  }
5814  }
5815 }
5816 
5817 /// \brief Obtain information that uniquely identifies a target entry. This
5818 /// consists of the file and device IDs as well as line number associated with
5819 /// the relevant entry source location.
5821  unsigned &DeviceID, unsigned &FileID,
5822  unsigned &LineNum) {
5823 
5824  auto &SM = C.getSourceManager();
5825 
5826  // The loc should be always valid and have a file ID (the user cannot use
5827  // #pragma directives in macros)
5828 
5829  assert(Loc.isValid() && "Source location is expected to be always valid.");
5830  assert(Loc.isFileID() && "Source location is expected to refer to a file.");
5831 
5832  PresumedLoc PLoc = SM.getPresumedLoc(Loc);
5833  assert(PLoc.isValid() && "Source location is expected to be always valid.");
5834 
5835  llvm::sys::fs::UniqueID ID;
5836  if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
5837  llvm_unreachable("Source file with target region no longer exists!");
5838 
5839  DeviceID = ID.getDevice();
5840  FileID = ID.getFile();
5841  LineNum = PLoc.getLine();
5842 }
5843 
5845  const OMPExecutableDirective &D, StringRef ParentName,
5846  llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
5847  bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
5848  assert(!ParentName.empty() && "Invalid target region parent name!");
5849 
5850  emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
5851  IsOffloadEntry, CodeGen);
5852 }
5853 
5855  const OMPExecutableDirective &D, StringRef ParentName,
5856  llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
5857  bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
5858  // Create a unique name for the entry function using the source location
5859  // information of the current target region. The name will be something like:
5860  //
5861  // __omp_offloading_DD_FFFF_PP_lBB
5862  //
5863  // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
5864  // mangled name of the function that encloses the target region and BB is the
5865  // line number of the target region.
5866 
5867  unsigned DeviceID;
5868  unsigned FileID;
5869  unsigned Line;
5870  getTargetEntryUniqueInfo(CGM.getContext(), D.getLocStart(), DeviceID, FileID,
5871  Line);
5872  SmallString<64> EntryFnName;
5873  {
5874  llvm::raw_svector_ostream OS(EntryFnName);
5875  OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
5876  << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
5877  }
5878 
5879  const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
5880 
5881  CodeGenFunction CGF(CGM, true);
5882  CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
5883  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
5884 
5885  OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS);
5886 
5887  // If this target outline function is not an offload entry, we don't need to
5888  // register it.
5889  if (!IsOffloadEntry)
5890  return;
5891 
5892  // The target region ID is used by the runtime library to identify the current
5893  // target region, so it only has to be unique and not necessarily point to
5894  // anything. It could be the pointer to the outlined function that implements
5895  // the target region, but we aren't using that so that the compiler doesn't
5896  // need to keep that, and could therefore inline the host function if proven
5897  // worthwhile during optimization. In the other hand, if emitting code for the
5898  // device, the ID has to be the function address so that it can retrieved from
5899  // the offloading entry and launched by the runtime library. We also mark the
5900  // outlined function to have external linkage in case we are emitting code for
5901  // the device, because these functions will be entry points to the device.
5902 
5903  if (CGM.getLangOpts().OpenMPIsDevice) {
5904  OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
5905  OutlinedFn->setLinkage(llvm::GlobalValue::ExternalLinkage);
5906  } else
5907  OutlinedFnID = new llvm::GlobalVariable(
5908  CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
5909  llvm::GlobalValue::PrivateLinkage,
5910  llvm::Constant::getNullValue(CGM.Int8Ty), ".omp_offload.region_id");
5911 
5912  // Register the information for the entry associated with this target region.
5914  DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
5915  /*Flags=*/0);
5916 }
5917 
5918 /// discard all CompoundStmts intervening between two constructs
5919 static const Stmt *ignoreCompoundStmts(const Stmt *Body) {
5920  while (auto *CS = dyn_cast_or_null<CompoundStmt>(Body))
5921  Body = CS->body_front();
5922 
5923  return Body;
5924 }
5925 
5926 /// Emit the number of teams for a target directive. Inspect the num_teams
5927 /// clause associated with a teams construct combined or closely nested
5928 /// with the target directive.
5929 ///
5930 /// Emit a team of size one for directives such as 'target parallel' that
5931 /// have no associated teams construct.
5932 ///
5933 /// Otherwise, return nullptr.
5934 static llvm::Value *
5936  CodeGenFunction &CGF,
5937  const OMPExecutableDirective &D) {
5938 
5939  assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
5940  "teams directive expected to be "
5941  "emitted only for the host!");
5942 
5943  auto &Bld = CGF.Builder;
5944 
5945  // If the target directive is combined with a teams directive:
5946  // Return the value in the num_teams clause, if any.
5947  // Otherwise, return 0 to denote the runtime default.
5949  if (const auto *NumTeamsClause = D.getSingleClause<OMPNumTeamsClause>()) {
5950  CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
5951  auto NumTeams = CGF.EmitScalarExpr(NumTeamsClause->getNumTeams(),
5952  /*IgnoreResultAssign*/ true);
5953  return Bld.CreateIntCast(NumTeams, CGF.Int32Ty,
5954  /*IsSigned=*/true);
5955  }
5956 
5957  // The default value is 0.
5958  return Bld.getInt32(0);
5959  }
5960 
5961  // If the target directive is combined with a parallel directive but not a
5962  // teams directive, start one team.
5964  return Bld.getInt32(1);
5965 
5966  // If the current target region has a teams region enclosed, we need to get
5967  // the number of teams to pass to the runtime function call. This is done
5968  // by generating the expression in a inlined region. This is required because
5969  // the expression is captured in the enclosing target environment when the
5970  // teams directive is not combined with target.
5971 
5972  const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
5973 
5974  if (auto *TeamsDir = dyn_cast_or_null<OMPExecutableDirective>(
5976  if (isOpenMPTeamsDirective(TeamsDir->getDirectiveKind())) {
5977  if (auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) {
5978  CGOpenMPInnerExprInfo CGInfo(CGF, CS);
5979  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
5980  llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams());
5981  return Bld.CreateIntCast(NumTeams, CGF.Int32Ty,
5982  /*IsSigned=*/true);
5983  }
5984 
5985  // If we have an enclosed teams directive but no num_teams clause we use
5986  // the default value 0.
5987  return Bld.getInt32(0);
5988  }
5989  }
5990 
5991  // No teams associated with the directive.
5992  return nullptr;
5993 }
5994 
5995 /// Emit the number of threads for a target directive. Inspect the
5996 /// thread_limit clause associated with a teams construct combined or closely
5997 /// nested with the target directive.
5998 ///
5999 /// Emit the num_threads clause for directives such as 'target parallel' that
6000 /// have no associated teams construct.
6001 ///
6002 /// Otherwise, return nullptr.
6003 static llvm::Value *
6005  CodeGenFunction &CGF,
6006  const OMPExecutableDirective &D) {
6007 
6008  assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
6009  "teams directive expected to be "
6010  "emitted only for the host!");
6011 
6012  auto &Bld = CGF.Builder;
6013 
6014  //
6015  // If the target directive is combined with a teams directive:
6016  // Return the value in the thread_limit clause, if any.
6017  //
6018  // If the target directive is combined with a parallel directive:
6019  // Return the value in the num_threads clause, if any.
6020  //
6021  // If both clauses are set, select the minimum of the two.
6022  //
6023  // If neither teams or parallel combined directives set the number of threads
6024  // in a team, return 0 to denote the runtime default.
6025  //
6026  // If this is not a teams directive return nullptr.
6027 
6030  llvm::Value *DefaultThreadLimitVal = Bld.getInt32(0);
6031  llvm::Value *NumThreadsVal = nullptr;
6032  llvm::Value *ThreadLimitVal = nullptr;
6033 
6034  if (const auto *ThreadLimitClause =
6036  CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6037  auto ThreadLimit = CGF.EmitScalarExpr(ThreadLimitClause->getThreadLimit(),
6038  /*IgnoreResultAssign*/ true);
6039  ThreadLimitVal = Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty,
6040  /*IsSigned=*/true);
6041  }
6042 
6043  if (const auto *NumThreadsClause =
6045  CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6046  llvm::Value *NumThreads =
6047  CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
6048  /*IgnoreResultAssign*/ true);
6049  NumThreadsVal =
6050  Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*IsSigned=*/true);
6051  }
6052 
6053  // Select the lesser of thread_limit and num_threads.
6054  if (NumThreadsVal)
6055  ThreadLimitVal = ThreadLimitVal
6056  ? Bld.CreateSelect(Bld.CreateICmpSLT(NumThreadsVal,
6057  ThreadLimitVal),
6058  NumThreadsVal, ThreadLimitVal)
6059  : NumThreadsVal;
6060 
6061  // Set default value passed to the runtime if either teams or a target
6062  // parallel type directive is found but no clause is specified.
6063  if (!ThreadLimitVal)
6064  ThreadLimitVal = DefaultThreadLimitVal;
6065 
6066  return ThreadLimitVal;
6067  }
6068 
6069  // If the current target region has a teams region enclosed, we need to get
6070  // the thread limit to pass to the runtime function call. This is done
6071  // by generating the expression in a inlined region. This is required because
6072  // the expression is captured in the enclosing target environment when the
6073  // teams directive is not combined with target.
6074 
6075  const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
6076 
6077  if (auto *TeamsDir = dyn_cast_or_null<OMPExecutableDirective>(
6079  if (isOpenMPTeamsDirective(TeamsDir->getDirectiveKind())) {
6080  if (auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) {
6081  CGOpenMPInnerExprInfo CGInfo(CGF, CS);
6082  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6083  llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit());
6084  return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty,
6085  /*IsSigned=*/true);
6086  }
6087 
6088  // If we have an enclosed teams directive but no thread_limit clause we
6089  // use the default value 0.
6090  return CGF.Builder.getInt32(0);
6091  }
6092  }
6093 
6094  // No teams associated with the directive.
6095  return nullptr;
6096 }
6097 
6098 namespace {
6099 // \brief Utility to handle information from clauses associated with a given
6100 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6101 // It provides a convenient interface to obtain the information and generate
6102 // code for that information.
6103 class MappableExprsHandler {
6104 public:
6105  /// \brief Values for bit flags used to specify the mapping type for
6106  /// offloading.
6107  enum OpenMPOffloadMappingFlags {
6108  /// \brief Allocate memory on the device and move data from host to device.
6109  OMP_MAP_TO = 0x01,
6110  /// \brief Allocate memory on the device and move data from device to host.
6111  OMP_MAP_FROM = 0x02,
6112  /// \brief Always perform the requested mapping action on the element, even
6113  /// if it was already mapped before.
6114  OMP_MAP_ALWAYS = 0x04,
6115  /// \brief Delete the element from the device environment, ignoring the
6116  /// current reference count associated with the element.
6117  OMP_MAP_DELETE = 0x08,
6118  /// \brief The element being mapped is a pointer-pointee pair; both the
6119  /// pointer and the pointee should be mapped.
6120  OMP_MAP_PTR_AND_OBJ = 0x10,
6121  /// \brief This flags signals that the base address of an entry should be
6122  /// passed to the target kernel as an argument.
6123  OMP_MAP_TARGET_PARAM = 0x20,
6124  /// \brief Signal that the runtime library has to return the device pointer
6125  /// in the current position for the data being mapped. Used when we have the
6126  /// use_device_ptr clause.
6127  OMP_MAP_RETURN_PARAM = 0x40,
6128  /// \brief This flag signals that the reference being passed is a pointer to
6129  /// private data.
6130  OMP_MAP_PRIVATE = 0x80,
6131  /// \brief Pass the element to the device by value.
6132  OMP_MAP_LITERAL = 0x100,
6133  /// Implicit map
6134  OMP_MAP_IMPLICIT = 0x200,
6135  };
6136 
6137  /// Class that associates information with a base pointer to be passed to the
6138  /// runtime library.
6139  class BasePointerInfo {
6140  /// The base pointer.
6141  llvm::Value *Ptr = nullptr;
6142  /// The base declaration that refers to this device pointer, or null if
6143  /// there is none.
6144  const ValueDecl *DevPtrDecl = nullptr;
6145 
6146  public:
6147  BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
6148  : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
6149  llvm::Value *operator*() const { return Ptr; }
6150  const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
6151  void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
6152  };
6153 
6154  typedef SmallVector<BasePointerInfo, 16> MapBaseValuesArrayTy;
6155  typedef SmallVector<llvm::Value *, 16> MapValuesArrayTy;
6156  typedef SmallVector<uint64_t, 16> MapFlagsArrayTy;
6157 
6158 private:
6159  /// \brief Directive from where the map clauses were extracted.
6160  const OMPExecutableDirective &CurDir;
6161 
6162  /// \brief Function the directive is being generated for.
6163  CodeGenFunction &CGF;
6164 
6165  /// \brief Set of all first private variables in the current directive.
6166  llvm::SmallPtrSet<const VarDecl *, 8> FirstPrivateDecls;
6167  /// Set of all reduction variables in the current directive.
6168  llvm::SmallPtrSet<const VarDecl *, 8> ReductionDecls;
6169 
6170  /// Map between device pointer declarations and their expression components.
6171  /// The key value for declarations in 'this' is null.
6172  llvm::DenseMap<
6173  const ValueDecl *,
6175  DevPointersMap;
6176 
6177  llvm::Value *getExprTypeSize(const Expr *E) const {
6178  auto ExprTy = E->getType().getCanonicalType();
6179 
6180  // Reference types are ignored for mapping purposes.
6181  if (auto *RefTy = ExprTy->getAs<ReferenceType>())
6182  ExprTy = RefTy->getPointeeType().getCanonicalType();
6183 
6184  // Given that an array section is considered a built-in type, we need to
6185  // do the calculation based on the length of the section instead of relying
6186  // on CGF.getTypeSize(E->getType()).
6187  if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
6189  OAE->getBase()->IgnoreParenImpCasts())
6190  .getCanonicalType();
6191 
6192  // If there is no length associated with the expression, that means we
6193  // are using the whole length of the base.
6194  if (!OAE->getLength() && OAE->getColonLoc().isValid())
6195  return CGF.getTypeSize(BaseTy);
6196 
6197  llvm::Value *ElemSize;
6198  if (auto *PTy = BaseTy->getAs<PointerType>())
6199  ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
6200  else {
6201  auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
6202  assert(ATy && "Expecting array type if not a pointer type.");
6203  ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
6204  }
6205 
6206  // If we don't have a length at this point, that is because we have an
6207  // array section with a single element.
6208  if (!OAE->getLength())
6209  return ElemSize;
6210 
6211  auto *LengthVal = CGF.EmitScalarExpr(OAE->getLength());
6212  LengthVal =
6213  CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false);
6214  return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
6215  }
6216  return CGF.getTypeSize(ExprTy);
6217  }
6218 
6219  /// \brief Return the corresponding bits for a given map clause modifier. Add
6220  /// a flag marking the map as a pointer if requested. Add a flag marking the
6221  /// map as the first one of a series of maps that relate to the same map
6222  /// expression.
6223  uint64_t getMapTypeBits(OpenMPMapClauseKind MapType,
6224  OpenMPMapClauseKind MapTypeModifier, bool AddPtrFlag,
6225  bool AddIsTargetParamFlag) const {
6226  uint64_t Bits = 0u;
6227  switch (MapType) {
6228  case OMPC_MAP_alloc:
6229  case OMPC_MAP_release:
6230  // alloc and release is the default behavior in the runtime library, i.e.
6231  // if we don't pass any bits alloc/release that is what the runtime is
6232  // going to do. Therefore, we don't need to signal anything for these two
6233  // type modifiers.
6234  break;
6235  case OMPC_MAP_to:
6236  Bits = OMP_MAP_TO;
6237  break;
6238  case OMPC_MAP_from:
6239  Bits = OMP_MAP_FROM;
6240  break;
6241  case OMPC_MAP_tofrom:
6242  Bits = OMP_MAP_TO | OMP_MAP_FROM;
6243  break;
6244  case OMPC_MAP_delete:
6245  Bits = OMP_MAP_DELETE;
6246  break;
6247  default:
6248  llvm_unreachable("Unexpected map type!");
6249  break;
6250  }
6251  if (AddPtrFlag)
6252  Bits |= OMP_MAP_PTR_AND_OBJ;
6253  if (AddIsTargetParamFlag)
6254  Bits |= OMP_MAP_TARGET_PARAM;
6255  if (MapTypeModifier == OMPC_MAP_always)
6256  Bits |= OMP_MAP_ALWAYS;
6257  return Bits;
6258  }
6259 
6260  /// \brief Return true if the provided expression is a final array section. A
6261  /// final array section, is one whose length can't be proved to be one.
6262  bool isFinalArraySectionExpression(const Expr *E) const {
6263  auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
6264 
6265  // It is not an array section and therefore not a unity-size one.
6266  if (!OASE)
6267  return false;
6268 
6269  // An array section with no colon always refer to a single element.
6270  if (OASE->getColonLoc().isInvalid())
6271  return false;
6272 
6273  auto *Length = OASE->getLength();
6274 
6275  // If we don't have a length we have to check if the array has size 1
6276  // for this dimension. Also, we should always expect a length if the
6277  // base type is pointer.
6278  if (!Length) {
6280  OASE->getBase()->IgnoreParenImpCasts())
6281  .getCanonicalType();
6282  if (auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
6283  return ATy->getSize().getSExtValue() != 1;
6284  // If we don't have a constant dimension length, we have to consider
6285  // the current section as having any size, so it is not necessarily
6286  // unitary. If it happen to be unity size, that's user fault.
6287  return true;
6288  }
6289 
6290  // Check if the length evaluates to 1.
6291  llvm::APSInt ConstLength;
6292  if (!Length->EvaluateAsInt(ConstLength, CGF.getContext()))
6293  return true; // Can have more that size 1.
6294 
6295  return ConstLength.getSExtValue() != 1;
6296  }
6297 
6298  /// \brief Generate the base pointers, section pointers, sizes and map type
6299  /// bits for the provided map type, map modifier, and expression components.
6300  /// \a IsFirstComponent should be set to true if the provided set of
6301  /// components is the first associated with a capture.
6302  void generateInfoForComponentList(
6303  OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier,
6305  MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
6306  MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
6307  bool IsFirstComponentList, bool IsImplicit) const {
6308 
6309  // The following summarizes what has to be generated for each map and the
6310  // types bellow. The generated information is expressed in this order:
6311  // base pointer, section pointer, size, flags
6312  // (to add to the ones that come from the map type and modifier).
6313  //
6314  // double d;
6315  // int i[100];
6316  // float *p;
6317  //
6318  // struct S1 {
6319  // int i;
6320  // float f[50];
6321  // }
6322  // struct S2 {
6323  // int i;
6324  // float f[50];
6325  // S1 s;
6326  // double *p;
6327  // struct S2 *ps;
6328  // }
6329  // S2 s;
6330  // S2 *ps;
6331  //
6332  // map(d)
6333  // &d, &d, sizeof(double), noflags
6334  //
6335  // map(i)
6336  // &i, &i, 100*sizeof(int), noflags
6337  //
6338  // map(i[1:23])
6339  // &i(=&i[0]), &i[1], 23*sizeof(int), noflags
6340  //
6341  // map(p)
6342  // &p, &p, sizeof(float*), noflags
6343  //
6344  // map(p[1:24])
6345  // p, &p[1], 24*sizeof(float), noflags
6346  //
6347  // map(s)
6348  // &s, &s, sizeof(S2), noflags
6349  //
6350  // map(s.i)
6351  // &s, &(s.i), sizeof(int), noflags
6352  //
6353  // map(s.s.f)
6354  // &s, &(s.i.f), 50*sizeof(int), noflags
6355  //
6356  // map(s.p)
6357  // &s, &(s.p), sizeof(double*), noflags
6358  //
6359  // map(s.p[:22], s.a s.b)
6360  // &s, &(s.p), sizeof(double*), noflags
6361  // &(s.p), &(s.p[0]), 22*sizeof(double), ptr_flag
6362  //
6363  // map(s.ps)
6364  // &s, &(s.ps), sizeof(S2*), noflags
6365  //
6366  // map(s.ps->s.i)
6367  // &s, &(s.ps), sizeof(S2*), noflags
6368  // &(s.ps), &(s.ps->s.i), sizeof(int), ptr_flag
6369  //
6370  // map(s.ps->ps)
6371  // &s, &(s.ps), sizeof(S2*), noflags
6372  // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag
6373  //
6374  // map(s.ps->ps->ps)
6375  // &s, &(s.ps), sizeof(S2*), noflags
6376  // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag
6377  // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), ptr_flag
6378  //
6379  // map(s.ps->ps->s.f[:22])
6380  // &s, &(s.ps), sizeof(S2*), noflags
6381  // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag
6382  // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), ptr_flag
6383  //
6384  // map(ps)
6385  // &ps, &ps, sizeof(S2*), noflags
6386  //
6387  // map(ps->i)
6388  // ps, &(ps->i), sizeof(int), noflags
6389  //
6390  // map(ps->s.f)
6391  // ps, &(ps->s.f[0]), 50*sizeof(float), noflags
6392  //
6393  // map(ps->p)
6394  // ps, &(ps->p), sizeof(double*), noflags
6395  //
6396  // map(ps->p[:22])
6397  // ps, &(ps->p), sizeof(double*), noflags
6398  // &(ps->p), &(ps->p[0]), 22*sizeof(double), ptr_flag
6399  //
6400  // map(ps->ps)
6401  // ps, &(ps->ps), sizeof(S2*), noflags
6402  //
6403  // map(ps->ps->s.i)
6404  // ps, &(ps->ps), sizeof(S2*), noflags
6405  // &(ps->ps), &(ps->ps->s.i), sizeof(int), ptr_flag
6406  //
6407  // map(ps->ps->ps)
6408  // ps, &(ps->ps), sizeof(S2*), noflags
6409  // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag
6410  //
6411  // map(ps->ps->ps->ps)
6412  // ps, &(ps->ps), sizeof(S2*), noflags
6413  // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag
6414  // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), ptr_flag
6415  //
6416  // map(ps->ps->ps->s.f[:22])
6417  // ps, &(ps->ps), sizeof(S2*), noflags
6418  // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag
6419  // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), ptr_flag
6420 
6421  // Track if the map information being generated is the first for a capture.
6422  bool IsCaptureFirstInfo = IsFirstComponentList;
6423 
6424  // Scan the components from the base to the complete expression.
6425  auto CI = Components.rbegin();
6426  auto CE = Components.rend();
6427  auto I = CI;
6428 
6429  // Track if the map information being generated is the first for a list of
6430  // components.
6431  bool IsExpressionFirstInfo = true;
6432  llvm::Value *BP = nullptr;
6433 
6434  if (auto *ME = dyn_cast<MemberExpr>(I->getAssociatedExpression())) {
6435  // The base is the 'this' pointer. The content of the pointer is going
6436  // to be the base of the field being mapped.
6437  BP = CGF.EmitScalarExpr(ME->getBase());
6438  } else {
6439  // The base is the reference to the variable.
6440  // BP = &Var.
6441  BP = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getPointer();
6442 
6443  // If the variable is a pointer and is being dereferenced (i.e. is not
6444  // the last component), the base has to be the pointer itself, not its
6445  // reference. References are ignored for mapping purposes.
6446  QualType Ty =
6447  I->getAssociatedDeclaration()->getType().getNonReferenceType();
6448  if (Ty->isAnyPointerType() && std::next(I) != CE) {
6449  auto PtrAddr = CGF.MakeNaturalAlignAddrLValue(BP, Ty);
6450  BP = CGF.EmitLoadOfPointerLValue(PtrAddr.getAddress(),
6451  Ty->castAs<PointerType>())
6452  .getPointer();
6453 
6454  // We do not need to generate individual map information for the
6455  // pointer, it can be associated with the combined storage.
6456  ++I;
6457  }
6458  }
6459 
6460  uint64_t DefaultFlags = IsImplicit ? OMP_MAP_IMPLICIT : 0;
6461  for (; I != CE; ++I) {
6462  auto Next = std::next(I);
6463 
6464  // We need to generate the addresses and sizes if this is the last
6465  // component, if the component is a pointer or if it is an array section
6466  // whose length can't be proved to be one. If this is a pointer, it
6467  // becomes the base address for the following components.
6468 
6469  // A final array section, is one whose length can't be proved to be one.
6470  bool IsFinalArraySection =
6471  isFinalArraySectionExpression(I->getAssociatedExpression());
6472 
6473  // Get information on whether the element is a pointer. Have to do a
6474  // special treatment for array sections given that they are built-in
6475  // types.
6476  const auto *OASE =
6477  dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
6478  bool IsPointer =
6479  (OASE &&
6481  .getCanonicalType()
6482  ->isAnyPointerType()) ||
6483  I->getAssociatedExpression()->getType()->isAnyPointerType();
6484 
6485  if (Next == CE || IsPointer || IsFinalArraySection) {
6486 
6487  // If this is not the last component, we expect the pointer to be
6488  // associated with an array expression or member expression.
6489  assert((Next == CE ||
6490  isa<MemberExpr>(Next->getAssociatedExpression()) ||
6491  isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
6492  isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) &&
6493  "Unexpected expression");
6494 
6495  llvm::Value *LB =
6496  CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getPointer();
6497  auto *Size = getExprTypeSize(I->getAssociatedExpression());
6498 
6499  // If we have a member expression and the current component is a
6500  // reference, we have to map the reference too. Whenever we have a
6501  // reference, the section that reference refers to is going to be a
6502  // load instruction from the storage assigned to the reference.
6503  if (isa<MemberExpr>(I->getAssociatedExpression()) &&
6504  I->getAssociatedDeclaration()->getType()->isReferenceType()) {
6505  auto *LI = cast<llvm::LoadInst>(LB);
6506  auto *RefAddr = LI->getPointerOperand();
6507 
6508  BasePointers.push_back(BP);
6509  Pointers.push_back(RefAddr);
6510  Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy));
6511  Types.push_back(DefaultFlags |
6512  getMapTypeBits(
6513  /*MapType*/ OMPC_MAP_alloc,
6514  /*MapTypeModifier=*/OMPC_MAP_unknown,
6515  !IsExpressionFirstInfo, IsCaptureFirstInfo));
6516  IsExpressionFirstInfo = false;
6517  IsCaptureFirstInfo = false;
6518  // The reference will be the next base address.
6519  BP = RefAddr;
6520  }
6521 
6522  BasePointers.push_back(BP);
6523  Pointers.push_back(LB);
6524  Sizes.push_back(Size);
6525 
6526  // We need to add a pointer flag for each map that comes from the
6527  // same expression except for the first one. We also need to signal
6528  // this map is the first one that relates with the current capture
6529  // (there is a set of entries for each capture).
6530  Types.push_back(DefaultFlags | getMapTypeBits(MapType, MapTypeModifier,
6531  !IsExpressionFirstInfo,
6532  IsCaptureFirstInfo));
6533 
6534  // If we have a final array section, we are done with this expression.
6535  if (IsFinalArraySection)
6536  break;
6537 
6538  // The pointer becomes the base for the next element.
6539  if (Next != CE)
6540  BP = LB;
6541 
6542  IsExpressionFirstInfo = false;
6543  IsCaptureFirstInfo = false;
6544  }
6545  }
6546  }
6547 
6548  /// \brief Return the adjusted map modifiers if the declaration a capture
6549  /// refers to appears in a first-private clause. This is expected to be used
6550  /// only with directives that start with 'target'.
6551  unsigned adjustMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap,
6552  unsigned CurrentModifiers) {
6553  assert(Cap.capturesVariable() && "Expected capture by reference only!");
6554 
6555  // A first private variable captured by reference will use only the
6556  // 'private ptr' and 'map to' flag. Return the right flags if the captured
6557  // declaration is known as first-private in this handler.
6558  if (FirstPrivateDecls.count(Cap.getCapturedVar()))
6559  return MappableExprsHandler::OMP_MAP_PRIVATE |
6560  MappableExprsHandler::OMP_MAP_TO;
6561  // Reduction variable will use only the 'private ptr' and 'map to_from'
6562  // flag.
6563  if (ReductionDecls.count(Cap.getCapturedVar())) {
6564  return MappableExprsHandler::OMP_MAP_TO |
6565  MappableExprsHandler::OMP_MAP_FROM;
6566  }
6567 
6568  // We didn't modify anything.
6569  return CurrentModifiers;
6570  }
6571 
6572 public:
6573  MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
6574  : CurDir(Dir), CGF(CGF) {
6575  // Extract firstprivate clause information.
6576  for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
6577  for (const auto *D : C->varlists())
6578  FirstPrivateDecls.insert(
6579  cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
6580  for (const auto *C : Dir.getClausesOfKind<OMPReductionClause>()) {
6581  for (const auto *D : C->varlists()) {
6582  ReductionDecls.insert(
6583  cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
6584  }
6585  }
6586  // Extract device pointer clause information.
6587  for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
6588  for (auto L : C->component_lists())
6589  DevPointersMap[L.first].push_back(L.second);
6590  }
6591 
6592  /// \brief Generate all the base pointers, section pointers, sizes and map
6593  /// types for the extracted mappable expressions. Also, for each item that
6594  /// relates with a device pointer, a pair of the relevant declaration and
6595  /// index where it occurs is appended to the device pointers info array.
6596  void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
6597  MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
6598  MapFlagsArrayTy &Types) const {
6599  BasePointers.clear();
6600  Pointers.clear();
6601  Sizes.clear();
6602  Types.clear();
6603 
6604  struct MapInfo {
6605  /// Kind that defines how a device pointer has to be returned.
6606  enum ReturnPointerKind {
6607  // Don't have to return any pointer.
6608  RPK_None,
6609  // Pointer is the base of the declaration.
6610  RPK_Base,
6611  // Pointer is a member of the base declaration - 'this'
6612  RPK_Member,
6613  // Pointer is a reference and a member of the base declaration - 'this'
6614  RPK_MemberReference,
6615  };
6618  OpenMPMapClauseKind MapTypeModifier = OMPC_MAP_unknown;
6619  ReturnPointerKind ReturnDevicePointer = RPK_None;
6620  bool IsImplicit = false;
6621 
6622  MapInfo() = default;
6623  MapInfo(
6625  OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier,
6626  ReturnPointerKind ReturnDevicePointer, bool IsImplicit)
6627  : Components(Components), MapType(MapType),
6628  MapTypeModifier(MapTypeModifier),
6629  ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {}
6630  };
6631 
6632  // We have to process the component lists that relate with the same
6633  // declaration in a single chunk so that we can generate the map flags
6634  // correctly. Therefore, we organize all lists in a map.
6635  llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
6636 
6637  // Helper function to fill the information map for the different supported
6638  // clauses.
6639  auto &&InfoGen = [&Info](
6640  const ValueDecl *D,
6642  OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapModifier,
6643  MapInfo::ReturnPointerKind ReturnDevicePointer, bool IsImplicit) {
6644  const ValueDecl *VD =
6645  D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
6646  Info[VD].emplace_back(L, MapType, MapModifier, ReturnDevicePointer,
6647  IsImplicit);
6648  };
6649 
6650  // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
6651  for (auto *C : this->CurDir.getClausesOfKind<OMPMapClause>())
6652  for (auto L : C->component_lists()) {
6653  InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifier(),
6654  MapInfo::RPK_None, C->isImplicit());
6655  }
6656  for (auto *C : this->CurDir.getClausesOfKind<OMPToClause>())
6657  for (auto L : C->component_lists()) {
6658  InfoGen(L.first, L.second, OMPC_MAP_to, OMPC_MAP_unknown,
6659  MapInfo::RPK_None, C->isImplicit());
6660  }
6661  for (auto *C : this->CurDir.getClausesOfKind<OMPFromClause>())
6662  for (auto L : C->component_lists()) {
6663  InfoGen(L.first, L.second, OMPC_MAP_from, OMPC_MAP_unknown,
6664  MapInfo::RPK_None, C->isImplicit());
6665  }
6666 
6667  // Look at the use_device_ptr clause information and mark the existing map
6668  // entries as such. If there is no map information for an entry in the
6669  // use_device_ptr list, we create one with map type 'alloc' and zero size
6670  // section. It is the user fault if that was not mapped before.
6671  // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
6672  for (auto *C : this->CurDir.getClausesOfKind<OMPUseDevicePtrClause>())
6673  for (auto L : C->component_lists()) {
6674  assert(!L.second.empty() && "Not expecting empty list of components!");
6675  const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
6676  VD = cast<ValueDecl>(VD->getCanonicalDecl());
6677  auto *IE = L.second.back().getAssociatedExpression();
6678  // If the first component is a member expression, we have to look into
6679  // 'this', which maps to null in the map of map information. Otherwise
6680  // look directly for the information.
6681  auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
6682 
6683  // We potentially have map information for this declaration already.
6684  // Look for the first set of components that refer to it.
6685  if (It != Info.end()) {
6686  auto CI = std::find_if(
6687  It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
6688  return MI.Components.back().getAssociatedDeclaration() == VD;
6689  });
6690  // If we found a map entry, signal that the pointer has to be returned
6691  // and move on to the next declaration.
6692  if (CI != It->second.end()) {
6693  CI->ReturnDevicePointer = isa<MemberExpr>(IE)
6694  ? (VD->getType()->isReferenceType()
6695  ? MapInfo::RPK_MemberReference
6696  : MapInfo::RPK_Member)
6697  : MapInfo::RPK_Base;
6698  continue;
6699  }
6700  }
6701 
6702  // We didn't find any match in our map information - generate a zero
6703  // size array section.
6704  // FIXME: MSVC 2013 seems to require this-> to find member CGF.
6705  llvm::Value *Ptr =
6706  this->CGF
6707  .EmitLoadOfLValue(this->CGF.EmitLValue(IE), SourceLocation())
6708  .getScalarVal();
6709  BasePointers.push_back({Ptr, VD});
6710  Pointers.push_back(Ptr);
6711  Sizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy));
6712  Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
6713  }
6714 
6715  for (auto &M : Info) {
6716  // We need to know when we generate information for the first component
6717  // associated with a capture, because the mapping flags depend on it.
6718  bool IsFirstComponentList = true;
6719  for (MapInfo &L : M.second) {
6720  assert(!L.Components.empty() &&
6721  "Not expecting declaration with no component lists.");
6722 
6723  // Remember the current base pointer index.
6724  unsigned CurrentBasePointersIdx = BasePointers.size();
6725  // FIXME: MSVC 2013 seems to require this-> to find the member method.
6726  this->generateInfoForComponentList(
6727  L.MapType, L.MapTypeModifier, L.Components, BasePointers, Pointers,
6728  Sizes, Types, IsFirstComponentList, L.IsImplicit);
6729 
6730  // If this entry relates with a device pointer, set the relevant
6731  // declaration and add the 'return pointer' flag.
6732  if (IsFirstComponentList &&
6733  L.ReturnDevicePointer != MapInfo::RPK_None) {
6734  // If the pointer is not the base of the map, we need to skip the
6735  // base. If it is a reference in a member field, we also need to skip
6736  // the map of the reference.
6737  if (L.ReturnDevicePointer != MapInfo::RPK_Base) {
6738  ++CurrentBasePointersIdx;
6739  if (L.ReturnDevicePointer == MapInfo::RPK_MemberReference)
6740  ++CurrentBasePointersIdx;
6741  }
6742  assert(BasePointers.size() > CurrentBasePointersIdx &&
6743  "Unexpected number of mapped base pointers.");
6744 
6745  auto *RelevantVD = L.Components.back().getAssociatedDeclaration();
6746  assert(RelevantVD &&
6747  "No relevant declaration related with device pointer??");
6748 
6749  BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
6750  Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
6751  }
6752  IsFirstComponentList = false;
6753  }
6754  }
6755  }
6756 
6757  /// \brief Generate the base pointers, section pointers, sizes and map types
6758  /// associated to a given capture.
6759  void generateInfoForCapture(const CapturedStmt::Capture *Cap,
6760  llvm::Value *Arg,
6761  MapBaseValuesArrayTy &BasePointers,
6762  MapValuesArrayTy &Pointers,
6763  MapValuesArrayTy &Sizes,
6764  MapFlagsArrayTy &Types) const {
6765  assert(!Cap->capturesVariableArrayType() &&
6766  "Not expecting to generate map info for a variable array type!");
6767 
6768  BasePointers.clear();
6769  Pointers.clear();
6770  Sizes.clear();
6771  Types.clear();
6772 
6773  // We need to know when we generating information for the first component
6774  // associated with a capture, because the mapping flags depend on it.
6775  bool IsFirstComponentList = true;
6776 
6777  const ValueDecl *VD =
6778  Cap->capturesThis()
6779  ? nullptr
6780  : cast<ValueDecl>(Cap->getCapturedVar()->getCanonicalDecl());
6781 
6782  // If this declaration appears in a is_device_ptr clause we just have to
6783  // pass the pointer by value. If it is a reference to a declaration, we just
6784  // pass its value, otherwise, if it is a member expression, we need to map
6785  // 'to' the field.
6786  if (!VD) {
6787  auto It = DevPointersMap.find(VD);
6788  if (It != DevPointersMap.end()) {
6789  for (auto L : It->second) {
6790  generateInfoForComponentList(
6791  /*MapType=*/OMPC_MAP_to, /*MapTypeModifier=*/OMPC_MAP_unknown, L,
6792  BasePointers, Pointers, Sizes, Types, IsFirstComponentList,
6793  /*IsImplicit=*/false);
6794  IsFirstComponentList = false;
6795  }
6796  return;
6797  }
6798  } else if (DevPointersMap.count(VD)) {
6799  BasePointers.push_back({Arg, VD});
6800  Pointers.push_back(Arg);
6801  Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy));
6802  Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
6803  return;
6804  }
6805 
6806  // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
6807  for (auto *C : this->CurDir.getClausesOfKind<OMPMapClause>())
6808  for (auto L : C->decl_component_lists(VD)) {
6809  assert(L.first == VD &&
6810  "We got information for the wrong declaration??");
6811  assert(!L.second.empty() &&
6812  "Not expecting declaration with no component lists.");
6813  generateInfoForComponentList(
6814  C->getMapType(), C->getMapTypeModifier(), L.second, BasePointers,
6815  Pointers, Sizes, Types, IsFirstComponentList, C->isImplicit());
6816  IsFirstComponentList = false;
6817  }
6818 
6819  return;
6820  }
6821 
6822  /// \brief Generate the default map information for a given capture \a CI,
6823  /// record field declaration \a RI and captured value \a CV.
6824  void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
6825  const FieldDecl &RI, llvm::Value *CV,
6826  MapBaseValuesArrayTy &CurBasePointers,
6827  MapValuesArrayTy &CurPointers,
6828  MapValuesArrayTy &CurSizes,
6829  MapFlagsArrayTy &CurMapTypes) {
6830 
6831  // Do the default mapping.
6832  if (CI.capturesThis()) {
6833  CurBasePointers.push_back(CV);
6834  CurPointers.push_back(CV);
6835  const PointerType *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
6836  CurSizes.push_back(CGF.getTypeSize(PtrTy->getPointeeType()));
6837  // Default map type.
6838  CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
6839  } else if (CI.capturesVariableByCopy()) {
6840  CurBasePointers.push_back(CV);
6841  CurPointers.push_back(CV);
6842  if (!RI.getType()->isAnyPointerType()) {
6843  // We have to signal to the runtime captures passed by value that are
6844  // not pointers.
6845  CurMapTypes.push_back(OMP_MAP_LITERAL);
6846  CurSizes.push_back(CGF.getTypeSize(RI.getType()));
6847  } else {
6848  // Pointers are implicitly mapped with a zero size and no flags
6849  // (other than first map that is added for all implicit maps).
6850  CurMapTypes.push_back(0u);
6851  CurSizes.push_back(llvm::Constant::getNullValue(CGF.SizeTy));
6852  }
6853  } else {
6854  assert(CI.capturesVariable() && "Expected captured reference.");
6855  CurBasePointers.push_back(CV);
6856  CurPointers.push_back(CV);
6857 
6858  const ReferenceType *PtrTy =
6859  cast<ReferenceType>(RI.getType().getTypePtr());
6860  QualType ElementType = PtrTy->getPointeeType();
6861  CurSizes.push_back(CGF.getTypeSize(ElementType));
6862  // The default map type for a scalar/complex type is 'to' because by
6863  // default the value doesn't have to be retrieved. For an aggregate
6864  // type, the default is 'tofrom'.
6865  CurMapTypes.emplace_back(adjustMapModifiersForPrivateClauses(
6866  CI, ElementType->isAggregateType() ? (OMP_MAP_TO | OMP_MAP_FROM)
6867  : OMP_MAP_TO));
6868  }
6869  // Every default map produces a single argument which is a target parameter.
6870  CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;
6871  }
6872 };
6873 
6875  /// \brief Device ID if the device was not defined, runtime should get it
6876  /// from environment variables in the spec.
6877  OMP_DEVICEID_UNDEF = -1,
6878 };
6879 } // anonymous namespace
6880 
6881 /// \brief Emit the arrays used to pass the captures and map information to the
6882 /// offloading runtime library. If there is no map or capture information,
6883 /// return nullptr by reference.
6884 static void
6890  CGOpenMPRuntime::TargetDataInfo &Info) {
6891  auto &CGM = CGF.CGM;
6892  auto &Ctx = CGF.getContext();
6893 
6894  // Reset the array information.
6895  Info.clearArrayInfo();
6896  Info.NumberOfPtrs = BasePointers.size();
6897 
6898  if (Info.NumberOfPtrs) {
6899  // Detect if we have any capture size requiring runtime evaluation of the
6900  // size so that a constant array could be eventually used.
6901  bool hasRuntimeEvaluationCaptureSize = false;
6902  for (auto *S : Sizes)
6903  if (!isa<llvm::Constant>(S)) {
6904  hasRuntimeEvaluationCaptureSize = true;
6905  break;
6906  }
6907 
6908  llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
6909  QualType PointerArrayType =
6910  Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal,
6911  /*IndexTypeQuals=*/0);
6912 
6913  Info.BasePointersArray =
6914  CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
6915  Info.PointersArray =
6916  CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
6917 
6918  // If we don't have any VLA types or other types that require runtime
6919  // evaluation, we can use a constant array for the map sizes, otherwise we
6920  // need to fill up the arrays as we do for the pointers.
6921  if (hasRuntimeEvaluationCaptureSize) {
6922  QualType SizeArrayType = Ctx.getConstantArrayType(
6923  Ctx.getSizeType(), PointerNumAP, ArrayType::Normal,
6924  /*IndexTypeQuals=*/0);
6925  Info.SizesArray =
6926  CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
6927  } else {
6928  // We expect all the sizes to be constant, so we collect them to create
6929  // a constant array.
6931  for (auto S : Sizes)
6932  ConstSizes.push_back(cast<llvm::Constant>(S));
6933 
6934  auto *SizesArrayInit = llvm::ConstantArray::get(
6935  llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes);
6936  auto *SizesArrayGbl = new llvm::GlobalVariable(
6937  CGM.getModule(), SizesArrayInit->getType(),
6938  /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
6939  SizesArrayInit, ".offload_sizes");
6940  SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
6941  Info.SizesArray = SizesArrayGbl;
6942  }
6943 
6944  // The map types are always constant so we don't need to generate code to
6945  // fill arrays. Instead, we create an array constant.
6946  llvm::Constant *MapTypesArrayInit =
6947  llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes);
6948  auto *MapTypesArrayGbl = new llvm::GlobalVariable(
6949  CGM.getModule(), MapTypesArrayInit->getType(),
6950  /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
6951  MapTypesArrayInit, ".offload_maptypes");
6952  MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
6953  Info.MapTypesArray = MapTypesArrayGbl;
6954 
6955  for (unsigned i = 0; i < Info.NumberOfPtrs; ++i) {
6956  llvm::Value *BPVal = *BasePointers[i];
6957  llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
6958  llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
6959  Info.BasePointersArray, 0, i);
6961  BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
6962  Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
6963  CGF.Builder.CreateStore(BPVal, BPAddr);
6964 
6965  if (Info.requiresDevicePointerInfo())
6966  if (auto *DevVD = BasePointers[i].getDevicePtrDecl())
6967  Info.CaptureDeviceAddrMap.insert(std::make_pair(DevVD, BPAddr));
6968 
6969  llvm::Value *PVal = Pointers[i];
6970  llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
6971  llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
6972  Info.PointersArray, 0, i);
6974  P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
6975  Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
6976  CGF.Builder.CreateStore(PVal, PAddr);
6977 
6978  if (hasRuntimeEvaluationCaptureSize) {
6979  llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
6980  llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs),
6981  Info.SizesArray,
6982  /*Idx0=*/0,
6983  /*Idx1=*/i);
6984  Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType()));
6985  CGF.Builder.CreateStore(
6986  CGF.Builder.CreateIntCast(Sizes[i], CGM.SizeTy, /*isSigned=*/true),
6987  SAddr);
6988  }
6989  }
6990  }
6991 }
6992 /// \brief Emit the arguments to be passed to the runtime library based on the
6993 /// arrays of pointers, sizes and map types.
6995  CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
6996  llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
6997  llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
6998  auto &CGM = CGF.CGM;
6999  if (Info.NumberOfPtrs) {
7000  BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
7001  llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
7002  Info.BasePointersArray,
7003  /*Idx0=*/0, /*Idx1=*/0);
7004  PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
7005  llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
7006  Info.PointersArray,
7007  /*Idx0=*/0,
7008  /*Idx1=*/0);
7009  SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
7010  llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), Info.SizesArray,
7011  /*Idx0=*/0, /*Idx1=*/0);
7012  MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
7013  llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
7014  Info.MapTypesArray,
7015  /*Idx0=*/0,
7016  /*Idx1=*/0);
7017  } else {
7018  BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
7019  PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
7020  SizesArrayArg = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo());
7021  MapTypesArrayArg =
7022  llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
7023  }
7024 }
7025 
7027  const OMPExecutableDirective &D,
7028  llvm::Value *OutlinedFn,
7029  llvm::Value *OutlinedFnID,
7030  const Expr *IfCond, const Expr *Device,
7031  ArrayRef<llvm::Value *> CapturedVars) {
7032  if (!CGF.HaveInsertPoint())
7033  return;
7034 
7035  assert(OutlinedFn && "Invalid outlined function!");
7036 
7037  // Fill up the arrays with all the captured variables.
7043 
7048 
7049  // Get mappable expression information.
7050  MappableExprsHandler MEHandler(D, CGF);
7051 
7052  const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
7053  auto RI = CS.getCapturedRecordDecl()->field_begin();
7054  auto CV = CapturedVars.begin();
7056  CE = CS.capture_end();
7057  CI != CE; ++CI, ++RI, ++CV) {
7058  CurBasePointers.clear();
7059  CurPointers.clear();
7060  CurSizes.clear();
7061  CurMapTypes.clear();
7062 
7063  // VLA sizes are passed to the outlined region by copy and do not have map
7064  // information associated.
7065  if (CI->capturesVariableArrayType()) {
7066  CurBasePointers.push_back(*CV);
7067  CurPointers.push_back(*CV);
7068  CurSizes.push_back(CGF.getTypeSize(RI->getType()));
7069  // Copy to the device as an argument. No need to retrieve it.
7070  CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
7071  MappableExprsHandler::OMP_MAP_TARGET_PARAM);
7072  } else {
7073  // If we have any information in the map clause, we use it, otherwise we
7074  // just do a default mapping.
7075  MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
7076  CurSizes, CurMapTypes);
7077  if (CurBasePointers.empty())
7078  MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
7079  CurPointers, CurSizes, CurMapTypes);
7080  }
7081  // We expect to have at least an element of information for this capture.
7082  assert(!CurBasePointers.empty() && "Non-existing map pointer for capture!");
7083  assert(CurBasePointers.size() == CurPointers.size() &&
7084  CurBasePointers.size() == CurSizes.size() &&
7085  CurBasePointers.size() == CurMapTypes.size() &&
7086  "Inconsistent map information sizes!");
7087 
7088  // The kernel args are always the first elements of the base pointers
7089  // associated with a capture.
7090  KernelArgs.push_back(*CurBasePointers.front());
7091  // We need to append the results of this capture to what we already have.
7092  BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
7093  Pointers.append(CurPointers.begin(), CurPointers.end());
7094  Sizes.append(CurSizes.begin(), CurSizes.end());
7095  MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
7096  }
7097 
7098  // Fill up the pointer arrays and transfer execution to the device.
7099  auto &&ThenGen = [this, &BasePointers, &Pointers, &Sizes, &MapTypes, Device,
7100  OutlinedFn, OutlinedFnID, &D,
7101  &KernelArgs](CodeGenFunction &CGF, PrePostActionTy &) {
7102  auto &RT = CGF.CGM.getOpenMPRuntime();
7103  // Emit the offloading arrays.
7104  TargetDataInfo Info;
7105  emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
7107  Info.PointersArray, Info.SizesArray,
7108  Info.MapTypesArray, Info);
7109 
7110  // On top of the arrays that were filled up, the target offloading call
7111  // takes as arguments the device id as well as the host pointer. The host
7112  // pointer is used by the runtime library to identify the current target
7113  // region, so it only has to be unique and not necessarily point to
7114  // anything. It could be the pointer to the outlined function that
7115  // implements the target region, but we aren't using that so that the
7116  // compiler doesn't need to keep that, and could therefore inline the host
7117  // function if proven worthwhile during optimization.
7118 
7119  // From this point on, we need to have an ID of the target region defined.
7120  assert(OutlinedFnID && "Invalid outlined function ID!");
7121 
7122  // Emit device ID if any.
7123  llvm::Value *DeviceID;
7124  if (Device) {
7125  DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
7126  CGF.Int64Ty, /*isSigned=*/true);
7127  } else {
7128  DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
7129  }
7130 
7131  // Emit the number of elements in the offloading arrays.
7132  llvm::Value *PointerNum = CGF.Builder.getInt32(BasePointers.size());
7133 
7134  // Return value of the runtime offloading call.
7135  llvm::Value *Return;
7136 
7137  auto *NumTeams = emitNumTeamsForTargetDirective(RT, CGF, D);
7138  auto *NumThreads = emitNumThreadsForTargetDirective(RT, CGF, D);
7139 
7140  bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
7141  // The target region is an outlined function launched by the runtime
7142  // via calls __tgt_target() or __tgt_target_teams().
7143  //
7144  // __tgt_target() launches a target region with one team and one thread,
7145  // executing a serial region. This master thread may in turn launch
7146  // more threads within its team upon encountering a parallel region,
7147  // however, no additional teams can be launched on the device.
7148  //
7149  // __tgt_target_teams() launches a target region with one or more teams,
7150  // each with one or more threads. This call is required for target
7151  // constructs such as:
7152  // 'target teams'
7153  // 'target' / 'teams'
7154  // 'target teams distribute parallel for'
7155  // 'target parallel'
7156  // and so on.
7157  //
7158  // Note that on the host and CPU targets, the runtime implementation of
7159  // these calls simply call the outlined function without forking threads.
7160  // The outlined functions themselves have runtime calls to
7161  // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
7162  // the compiler in emitTeamsCall() and emitParallelCall().
7163  //
7164  // In contrast, on the NVPTX target, the implementation of
7165  // __tgt_target_teams() launches a GPU kernel with the requested number
7166  // of teams and threads so no additional calls to the runtime are required.
7167  if (NumTeams) {
7168  // If we have NumTeams defined this means that we have an enclosed teams
7169  // region. Therefore we also expect to have NumThreads defined. These two
7170  // values should be defined in the presence of a teams directive,
7171  // regardless of having any clauses associated. If the user is using teams
7172  // but no clauses, these two values will be the default that should be
7173  // passed to the runtime library - a 32-bit integer with the value zero.
7174  assert(NumThreads && "Thread limit expression should be available along "
7175  "with number of teams.");
7176  llvm::Value *OffloadingArgs[] = {
7177  DeviceID, OutlinedFnID,
7178  PointerNum, Info.BasePointersArray,
7179  Info.PointersArray, Info.SizesArray,
7180  Info.MapTypesArray, NumTeams,
7181  NumThreads};
7182  Return = CGF.EmitRuntimeCall(
7183  RT.createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait
7185  OffloadingArgs);
7186  } else {
7187  llvm::Value *OffloadingArgs[] = {
7188  DeviceID, OutlinedFnID,
7189  PointerNum, Info.BasePointersArray,
7190  Info.PointersArray, Info.SizesArray,
7191  Info.MapTypesArray};
7192  Return = CGF.EmitRuntimeCall(
7193  RT.createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait
7194  : OMPRTL__tgt_target),
7195  OffloadingArgs);
7196  }
7197 
7198  // Check the error code and execute the host version if required.
7199  llvm::BasicBlock *OffloadFailedBlock =
7200  CGF.createBasicBlock("omp_offload.failed");
7201  llvm::BasicBlock *OffloadContBlock =
7202  CGF.createBasicBlock("omp_offload.cont");
7203  llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
7204  CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
7205 
7206  CGF.EmitBlock(OffloadFailedBlock);
7207  emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, KernelArgs);
7208  CGF.EmitBranch(OffloadContBlock);
7209 
7210  CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
7211  };
7212 
7213  // Notify that the host version must be executed.
7214  auto &&ElseGen = [this, &D, OutlinedFn, &KernelArgs](CodeGenFunction &CGF,
7215  PrePostActionTy &) {
7216  emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn,
7217  KernelArgs);
7218  };
7219 
7220  // If we have a target function ID it means that we need to support
7221  // offloading, otherwise, just execute on the host. We need to execute on host
7222  // regardless of the conditional in the if clause if, e.g., the user do not
7223  // specify target triples.
7224  if (OutlinedFnID) {
7225  if (IfCond)
7226  emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
7227  else {
7228  RegionCodeGenTy ThenRCG(ThenGen);
7229  ThenRCG(CGF);
7230  }
7231  } else {
7232  RegionCodeGenTy ElseRCG(ElseGen);
7233  ElseRCG(CGF);
7234  }
7235 }
7236 
7238  StringRef ParentName) {
7239  if (!S)
7240  return;
7241 
7242  // Codegen OMP target directives that offload compute to the device.
7243  bool requiresDeviceCodegen =
7244  isa<OMPExecutableDirective>(S) &&
7246  cast<OMPExecutableDirective>(S)->getDirectiveKind());
7247 
7248  if (requiresDeviceCodegen) {
7249  auto &E = *cast<OMPExecutableDirective>(S);
7250  unsigned DeviceID;
7251  unsigned FileID;
7252  unsigned Line;
7253  getTargetEntryUniqueInfo(CGM.getContext(), E.getLocStart(), DeviceID,
7254  FileID, Line);
7255 
7256  // Is this a target region that should not be emitted as an entry point? If
7257  // so just signal we are done with this target region.
7259  ParentName, Line))
7260  return;
7261 
7262  switch (S->getStmtClass()) {
7263  case Stmt::OMPTargetDirectiveClass:
7265  CGM, ParentName, cast<OMPTargetDirective>(*S));
7266  break;
7267  case Stmt::OMPTargetParallelDirectiveClass:
7269  CGM, ParentName, cast<OMPTargetParallelDirective>(*S));
7270  break;
7271  case Stmt::OMPTargetTeamsDirectiveClass:
7273  CGM, ParentName, cast<OMPTargetTeamsDirective>(*S));
7274  break;
7275  case Stmt::OMPTargetTeamsDistributeDirectiveClass:
7277  CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(*S));
7278  break;
7279  case Stmt::OMPTargetTeamsDistributeSimdDirectiveClass:
7281  CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(*S));
7282  break;
7283  case Stmt::OMPTargetParallelForDirectiveClass:
7285  CGM, ParentName, cast<OMPTargetParallelForDirective>(*S));
7286  break;
7287  case Stmt::OMPTargetParallelForSimdDirectiveClass:
7289  CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(*S));
7290  break;
7291  case Stmt::OMPTargetSimdDirectiveClass:
7293  CGM, ParentName, cast<OMPTargetSimdDirective>(*S));
7294  break;
7295  default:
7296  llvm_unreachable("Unknown target directive for OpenMP device codegen.");
7297  }
7298  return;
7299  }
7300 
7301  if (const OMPExecutableDirective *E = dyn_cast<OMPExecutableDirective>(S)) {
7302  if (!E->hasAssociatedStmt())
7303  return;
7304 
7306  cast<CapturedStmt>(E->getAssociatedStmt())->getCapturedStmt(),
7307  ParentName);
7308  return;
7309  }
7310 
7311  // If this is a lambda function, look into its body.
7312  if (auto *L = dyn_cast<LambdaExpr>(S))
7313  S = L->getBody();
7314 
7315  // Keep looking for target regions recursively.
7316  for (auto *II : S->children())
7317  scanForTargetRegionsFunctions(II, ParentName);
7318 }
7319 
7321  auto &FD = *cast<FunctionDecl>(GD.getDecl());
7322 
7323  // If emitting code for the host, we do not process FD here. Instead we do
7324  // the normal code generation.
7325  if (!CGM.getLangOpts().OpenMPIsDevice)
7326  return false;
7327 
7328  // Try to detect target regions in the function.
7329  scanForTargetRegionsFunctions(FD.getBody(), CGM.getMangledName(GD));
7330 
7331  // We should not emit any function other that the ones created during the
7332  // scanning. Therefore, we signal that this function is completely dealt
7333  // with.
7334  return true;
7335 }
7336 
7338  if (!CGM.getLangOpts().OpenMPIsDevice)
7339  return false;
7340 
7341  // Check if there are Ctors/Dtors in this declaration and look for target
7342  // regions in it. We use the complete variant to produce the kernel name
7343  // mangling.
7344  QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
7345  if (auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
7346  for (auto *Ctor : RD->ctors()) {
7347  StringRef ParentName =
7349  scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
7350  }
7351  auto *Dtor = RD->getDestructor();
7352  if (Dtor) {
7353  StringRef ParentName =
7355  scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
7356  }
7357  }
7358 
7359  // If we are in target mode, we do not emit any global (declare target is not
7360  // implemented yet). Therefore we signal that GD was processed in this case.
7361  return true;
7362 }
7363 
7365  auto *VD = GD.getDecl();
7366  if (isa<FunctionDecl>(VD))
7367  return emitTargetFunctions(GD);
7368 
7369  return emitTargetGlobalVariable(GD);
7370 }
7371 
7373  // If we have offloading in the current module, we need to emit the entries
7374  // now and register the offloading descriptor.
7376 
7377  // Create and register the offloading binary descriptors. This is the main
7378  // entity that captures all the information about offloading in the current
7379  // compilation unit.
7381 }
7382 
7384  const OMPExecutableDirective &D,
7385  SourceLocation Loc,
7386  llvm::Value *OutlinedFn,
7387  ArrayRef<llvm::Value *> CapturedVars) {
7388  if (!CGF.HaveInsertPoint())
7389  return;
7390 
7391  auto *RTLoc = emitUpdateLocation(CGF, Loc);
7393 
7394  // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
7395  llvm::Value *Args[] = {
7396  RTLoc,
7397  CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
7398  CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
7400  RealArgs.append(std::begin(Args), std::end(Args));
7401  RealArgs.append(CapturedVars.begin(), CapturedVars.end());
7402 
7404  CGF.EmitRuntimeCall(RTLFn, RealArgs);
7405 }
7406 
7408  const Expr *NumTeams,
7409  const Expr *ThreadLimit,
7410  SourceLocation Loc) {
7411  if (!CGF.HaveInsertPoint())
7412  return;
7413 
7414  auto *RTLoc = emitUpdateLocation(CGF, Loc);
7415 
7416  llvm::Value *NumTeamsVal =
7417  (NumTeams)
7418  ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
7419  CGF.CGM.Int32Ty, /* isSigned = */ true)
7420  : CGF.Builder.getInt32(0);
7421 
7422  llvm::Value *ThreadLimitVal =
7423  (ThreadLimit)
7424  ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
7425  CGF.CGM.Int32Ty, /* isSigned = */ true)
7426  : CGF.Builder.getInt32(0);
7427 
7428  // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
7429  llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
7430  ThreadLimitVal};
7432  PushNumTeamsArgs);
7433 }
7434 
7436  CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
7437  const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
7438  if (!CGF.HaveInsertPoint())
7439  return;
7440 
7441  // Action used to replace the default codegen action and turn privatization
7442  // off.
7443  PrePostActionTy NoPrivAction;
7444 
7445  // Generate the code for the opening of the data environment. Capture all the
7446  // arguments of the runtime call by reference because they are used in the
7447  // closing of the region.
7448  auto &&BeginThenGen = [this, &D, Device, &Info,
7449  &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
7450  // Fill up the arrays with all the mapped variables.
7455 
7456  // Get map clause information.
7457  MappableExprsHandler MCHandler(D, CGF);
7458  MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
7459 
7460  // Fill up the arrays and create the arguments.
7461  emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
7462 
7463  llvm::Value *BasePointersArrayArg = nullptr;
7464  llvm::Value *PointersArrayArg = nullptr;
7465  llvm::Value *SizesArrayArg = nullptr;
7466  llvm::Value *MapTypesArrayArg = nullptr;
7467  emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
7468  SizesArrayArg, MapTypesArrayArg, Info);
7469 
7470  // Emit device ID if any.
7471  llvm::Value *DeviceID = nullptr;
7472  if (Device) {
7473  DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
7474  CGF.Int64Ty, /*isSigned=*/true);
7475  } else {
7476  DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
7477  }
7478 
7479  // Emit the number of elements in the offloading arrays.
7480  auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
7481 
7482  llvm::Value *OffloadingArgs[] = {
7483  DeviceID, PointerNum, BasePointersArrayArg,
7484  PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
7486  OffloadingArgs);
7487 
7488  // If device pointer privatization is required, emit the body of the region
7489  // here. It will have to be duplicated: with and without privatization.
7490  if (!Info.CaptureDeviceAddrMap.empty())
7491  CodeGen(CGF);
7492  };
7493 
7494  // Generate code for the closing of the data region.
7495  auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
7496  PrePostActionTy &) {
7497  assert(Info.isValid() && "Invalid data environment closing arguments.");
7498 
7499  llvm::Value *BasePointersArrayArg = nullptr;
7500  llvm::Value *PointersArrayArg = nullptr;
7501  llvm::Value *SizesArrayArg = nullptr;
7502  llvm::Value *MapTypesArrayArg = nullptr;
7503  emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
7504  SizesArrayArg, MapTypesArrayArg, Info);
7505 
7506  // Emit device ID if any.
7507  llvm::Value *DeviceID = nullptr;
7508  if (Device) {
7509  DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
7510  CGF.Int64Ty, /*isSigned=*/true);
7511  } else {
7512  DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
7513  }
7514 
7515  // Emit the number of elements in the offloading arrays.
7516  auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
7517 
7518  llvm::Value *OffloadingArgs[] = {
7519  DeviceID, PointerNum, BasePointersArrayArg,
7520  PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
7522  OffloadingArgs);
7523  };
7524 
7525  // If we need device pointer privatization, we need to emit the body of the
7526  // region with no privatization in the 'else' branch of the conditional.
7527  // Otherwise, we don't have to do anything.
7528  auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
7529  PrePostActionTy &) {
7530  if (!Info.CaptureDeviceAddrMap.empty()) {
7531  CodeGen.setAction(NoPrivAction);
7532  CodeGen(CGF);
7533  }
7534  };
7535 
7536  // We don't have to do anything to close the region if the if clause evaluates
7537  // to false.
7538  auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
7539 
7540  if (IfCond) {
7541  emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
7542  } else {
7543  RegionCodeGenTy RCG(BeginThenGen);
7544  RCG(CGF);
7545  }
7546 
7547  // If we don't require privatization of device pointers, we emit the body in
7548  // between the runtime calls. This avoids duplicating the body code.
7549  if (Info.CaptureDeviceAddrMap.empty()) {
7550  CodeGen.setAction(NoPrivAction);
7551  CodeGen(CGF);
7552  }
7553 
7554  if (IfCond) {
7555  emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen);
7556  } else {
7557  RegionCodeGenTy RCG(EndThenGen);
7558  RCG(CGF);
7559  }
7560 }
7561 
7563  CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
7564  const Expr *Device) {
7565  if (!CGF.HaveInsertPoint())
7566  return;
7567 
7568  assert((isa<OMPTargetEnterDataDirective>(D) ||
7569  isa<OMPTargetExitDataDirective>(D) ||
7570  isa<OMPTargetUpdateDirective>(D)) &&
7571  "Expecting either target enter, exit data, or update directives.");
7572 
7574  llvm::Value *MapTypesArray = nullptr;
7575  // Generate the code for the opening of the data environment.
7576  auto &&ThenGen = [this, &D, Device, &InputInfo,
7577  &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
7578  // Emit device ID if any.
7579  llvm::Value *DeviceID = nullptr;
7580  if (Device) {
7581  DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
7582  CGF.Int64Ty, /*isSigned=*/true);
7583  } else {
7584  DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
7585  }
7586 
7587  // Emit the number of elements in the offloading arrays.
7588  llvm::Constant *PointerNum =
7589  CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
7590 
7591  llvm::Value *OffloadingArgs[] = {DeviceID,
7592  PointerNum,
7593  InputInfo.BasePointersArray.getPointer(),
7594  InputInfo.PointersArray.getPointer(),
7595  InputInfo.SizesArray.getPointer(),
7596  MapTypesArray};
7597 
7598  // Select the right runtime function call for each expected standalone
7599  // directive.
7600  const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
7601  OpenMPRTLFunction RTLFn;
7602  switch (D.getDirectiveKind()) {
7603  default:
7604  llvm_unreachable("Unexpected standalone target data directive.");
7605  break;
7606  case OMPD_target_enter_data:
7607  RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait
7609  break;
7610  case OMPD_target_exit_data:
7611  RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait
7613  break;
7614  case OMPD_target_update:
7615  RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait
7617  break;
7618  }
7619  CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs);
7620  };
7621 
7622  auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
7623  CodeGenFunction &CGF, PrePostActionTy &) {
7624  // Fill up the arrays with all the mapped variables.
7629 
7630  // Get map clause information.
7631  MappableExprsHandler MEHandler(D, CGF);
7632  MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
7633 
7634  TargetDataInfo Info;
7635  // Fill up the arrays and create the arguments.
7636  emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
7638  Info.PointersArray, Info.SizesArray,
7639  Info.MapTypesArray, Info);
7640  InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
7641  InputInfo.BasePointersArray =
7643  InputInfo.PointersArray =
7644  Address(Info.PointersArray, CGM.getPointerAlign());
7645  InputInfo.SizesArray =
7646  Address(Info.SizesArray, CGM.getPointerAlign());
7647  MapTypesArray = Info.MapTypesArray;
7648  if (D.hasClausesOfKind<OMPDependClause>())
7649  CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
7650  else
7651  emitInlinedDirective(CGF, OMPD_target_update, ThenGen);
7652  };
7653 
7654  if (IfCond)
7655  emitOMPIfClause(CGF, IfCond, TargetThenGen,
7656  [](CodeGenFunction &CGF, PrePostActionTy &) {});
7657  else {
7658  RegionCodeGenTy ThenRCG(TargetThenGen);
7659  ThenRCG(CGF);
7660  }
7661 }
7662 
7663 namespace {
7664  /// Kind of parameter in a function with 'declare simd' directive.
7665  enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
7666  /// Attribute set of the parameter.
7667  struct ParamAttrTy {
7668  ParamKindTy Kind = Vector;
7669  llvm::APSInt StrideOrArg;
7670  llvm::APSInt Alignment;
7671  };
7672 } // namespace
7673 
7674 static unsigned evaluateCDTSize(const FunctionDecl *FD,
7675  ArrayRef<ParamAttrTy> ParamAttrs) {
7676  // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
7677  // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
7678  // of that clause. The VLEN value must be power of 2.
7679  // In other case the notion of the function`s "characteristic data type" (CDT)
7680  // is used to compute the vector length.
7681  // CDT is defined in the following order:
7682  // a) For non-void function, the CDT is the return type.
7683  // b) If the function has any non-uniform, non-linear parameters, then the
7684  // CDT is the type of the first such parameter.
7685  // c) If the CDT determined by a) or b) above is struct, union, or class
7686  // type which is pass-by-value (except for the type that maps to the
7687  // built-in complex data type), the characteristic data type is int.
7688  // d) If none of the above three cases is applicable, the CDT is int.
7689  // The VLEN is then determined based on the CDT and the size of vector
7690  // register of that ISA for which current vector version is generated. The
7691  // VLEN is computed using the formula below:
7692  // VLEN = sizeof(vector_register) / sizeof(CDT),
7693  // where vector register size specified in section 3.2.1 Registers and the
7694  // Stack Frame of original AMD64 ABI document.
7695  QualType RetType = FD->getReturnType();
7696  if (RetType.isNull())
7697  return 0;
7698  ASTContext &C = FD->getASTContext();
7699  QualType CDT;
7700  if (!RetType.isNull() && !RetType->isVoidType())
7701  CDT = RetType;
7702  else {
7703  unsigned Offset = 0;
7704  if (auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
7705  if (ParamAttrs[Offset].Kind == Vector)
7706  CDT = C.getPointerType(C.getRecordType(MD->getParent()));
7707  ++Offset;
7708  }
7709  if (CDT.isNull()) {
7710  for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
7711  if (ParamAttrs[I + Offset].Kind == Vector) {
7712  CDT = FD->getParamDecl(I)->getType();
7713  break;
7714  }
7715  }
7716  }
7717  }
7718  if (CDT.isNull())
7719  CDT = C.IntTy;
7720  CDT = CDT->getCanonicalTypeUnqualified();
7721  if (CDT->isRecordType() || CDT->isUnionType())
7722  CDT = C.IntTy;
7723  return C.getTypeSize(CDT);
7724 }
7725 
7726 static void
7727 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
7728  const llvm::APSInt &VLENVal,
7729  ArrayRef<ParamAttrTy> ParamAttrs,
7730  OMPDeclareSimdDeclAttr::BranchStateTy State) {
7731  struct ISADataTy {
7732  char ISA;
7733  unsigned VecRegSize;
7734  };
7735  ISADataTy ISAData[] = {
7736  {
7737  'b', 128
7738  }, // SSE
7739  {
7740  'c', 256
7741  }, // AVX
7742  {
7743  'd', 256
7744  }, // AVX2
7745  {
7746  'e', 512
7747  }, // AVX512
7748  };
7750  switch (State) {
7751  case OMPDeclareSimdDeclAttr::BS_Undefined:
7752  Masked.push_back('N');
7753  Masked.push_back('M');
7754  break;
7755  case OMPDeclareSimdDeclAttr::BS_Notinbranch:
7756  Masked.push_back('N');
7757  break;
7758  case OMPDeclareSimdDeclAttr::BS_Inbranch:
7759  Masked.push_back('M');
7760  break;
7761  }
7762  for (auto Mask : Masked) {
7763  for (auto &Data : ISAData) {
7764  SmallString<256> Buffer;
7765  llvm::raw_svector_ostream Out(Buffer);
7766  Out << "_ZGV" << Data.ISA << Mask;
7767  if (!VLENVal) {
7768  Out << llvm::APSInt::getUnsigned(Data.VecRegSize /
7769  evaluateCDTSize(FD, ParamAttrs));
7770  } else
7771  Out << VLENVal;
7772  for (auto &ParamAttr : ParamAttrs) {
7773  switch (ParamAttr.Kind){
7774  case LinearWithVarStride:
7775  Out << 's' << ParamAttr.StrideOrArg;
7776  break;
7777  case Linear:
7778  Out << 'l';
7779  if (!!ParamAttr.StrideOrArg)
7780  Out << ParamAttr.StrideOrArg;
7781  break;
7782  case Uniform:
7783  Out << 'u';
7784  break;
7785  case Vector:
7786  Out << 'v';
7787  break;
7788  }
7789  if (!!ParamAttr.Alignment)
7790  Out << 'a' << ParamAttr.Alignment;
7791  }
7792  Out << '_' << Fn->getName();
7793  Fn->addFnAttr(Out.str());
7794  }
7795  }
7796 }
7797 
7799  llvm::Function *Fn) {
7800  ASTContext &C = CGM.getContext();
7801  FD = FD->getCanonicalDecl();
7802  // Map params to their positions in function decl.
7803  llvm::DenseMap<const Decl *, unsigned> ParamPositions;
7804  if (isa<CXXMethodDecl>(FD))
7805  ParamPositions.insert({FD, 0});
7806  unsigned ParamPos = ParamPositions.size();
7807  for (auto *P : FD->parameters()) {
7808  ParamPositions.insert({P->getCanonicalDecl(), ParamPos});
7809  ++ParamPos;
7810  }
7811  for (auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
7812  llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
7813  // Mark uniform parameters.
7814  for (auto *E : Attr->uniforms()) {
7815  E = E->IgnoreParenImpCasts();
7816  unsigned Pos;
7817  if (isa<CXXThisExpr>(E))
7818  Pos = ParamPositions[FD];
7819  else {
7820  auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
7821  ->getCanonicalDecl();
7822  Pos = ParamPositions[PVD];
7823  }
7824  ParamAttrs[Pos].Kind = Uniform;
7825  }
7826  // Get alignment info.
7827  auto NI = Attr->alignments_begin();
7828  for (auto *E : Attr->aligneds()) {
7829  E = E->IgnoreParenImpCasts();
7830  unsigned Pos;
7831  QualType ParmTy;
7832  if (isa<CXXThisExpr>(E)) {
7833  Pos = ParamPositions[FD];
7834  ParmTy = E->getType();
7835  } else {
7836  auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
7837  ->getCanonicalDecl();
7838  Pos = ParamPositions[PVD];
7839  ParmTy = PVD->getType();
7840  }
7841  ParamAttrs[Pos].Alignment =
7842  (*NI) ? (*NI)->EvaluateKnownConstInt(C)
7843  : llvm::APSInt::getUnsigned(
7845  .getQuantity());
7846  ++NI;
7847  }
7848  // Mark linear parameters.
7849  auto SI = Attr->steps_begin();
7850  auto MI = Attr->modifiers_begin();
7851  for (auto *E : Attr->linears()) {
7852  E = E->IgnoreParenImpCasts();
7853  unsigned Pos;
7854  if (isa<CXXThisExpr>(E))
7855  Pos = ParamPositions[FD];
7856  else {
7857  auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
7858  ->getCanonicalDecl();
7859  Pos = ParamPositions[PVD];
7860  }
7861  auto &ParamAttr = ParamAttrs[Pos];
7862  ParamAttr.Kind = Linear;
7863  if (*SI) {
7864  if (!(*SI)->EvaluateAsInt(ParamAttr.StrideOrArg, C,
7866  if (auto *DRE = cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
7867  if (auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
7868  ParamAttr.Kind = LinearWithVarStride;
7869  ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
7870  ParamPositions[StridePVD->getCanonicalDecl()]);
7871  }
7872  }
7873  }
7874  }
7875  ++SI;
7876  ++MI;
7877  }
7878  llvm::APSInt VLENVal;
7879  if (const Expr *VLEN = Attr->getSimdlen())
7880  VLENVal = VLEN->EvaluateKnownConstInt(C);
7881  OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
7882  if (CGM.getTriple().getArch() == llvm::Triple::x86 ||
7883  CGM.getTriple().getArch() == llvm::Triple::x86_64)
7884  emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
7885  }
7886 }
7887 
7888 namespace {
7889 /// Cleanup action for doacross support.
7890 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
7891 public:
7892  static const int DoacrossFinArgs = 2;
7893 
7894 private:
7895  llvm::Value *RTLFn;
7896  llvm::Value *Args[DoacrossFinArgs];
7897 
7898 public:
7899  DoacrossCleanupTy(llvm::Value *RTLFn, ArrayRef<llvm::Value *> CallArgs)
7900  : RTLFn(RTLFn) {
7901  assert(CallArgs.size() == DoacrossFinArgs);
7902  std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
7903  }
7904  void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
7905  if (!CGF.HaveInsertPoint())
7906  return;
7907  CGF.EmitRuntimeCall(RTLFn, Args);
7908  }
7909 };
7910 } // namespace
7911 
7913  const OMPLoopDirective &D) {
7914  if (!CGF.HaveInsertPoint())
7915  return;
7916 
7917  ASTContext &C = CGM.getContext();
7918  QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
7919  RecordDecl *RD;
7920  if (KmpDimTy.isNull()) {
7921  // Build struct kmp_dim { // loop bounds info casted to kmp_int64
7922  // kmp_int64 lo; // lower
7923  // kmp_int64 up; // upper
7924  // kmp_int64 st; // stride
7925  // };
7926  RD = C.buildImplicitRecord("kmp_dim");
7927  RD->startDefinition();
7928  addFieldToRecordDecl(C, RD, Int64Ty);
7929  addFieldToRecordDecl(C, RD, Int64Ty);
7930  addFieldToRecordDecl(C, RD, Int64Ty);
7931  RD->completeDefinition();
7932  KmpDimTy = C.getRecordType(RD);
7933  } else
7934  RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
7935 
7936  Address DimsAddr = CGF.CreateMemTemp(KmpDimTy, "dims");
7937  CGF.EmitNullInitialization(DimsAddr, KmpDimTy);
7938  enum { LowerFD = 0, UpperFD, StrideFD };
7939  // Fill dims with data.
7940  LValue DimsLVal = CGF.MakeAddrLValue(DimsAddr, KmpDimTy);
7941  // dims.upper = num_iterations;
7942  LValue UpperLVal =
7943  CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), UpperFD));
7944  llvm::Value *NumIterVal = CGF.EmitScalarConversion(
7946  Int64Ty, D.getNumIterations()->getExprLoc());
7947  CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
7948  // dims.stride = 1;
7949  LValue StrideLVal =
7950  CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), StrideFD));
7951  CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
7952  StrideLVal);
7953 
7954  // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
7955  // kmp_int32 num_dims, struct kmp_dim * dims);
7956  llvm::Value *Args[] = {emitUpdateLocation(CGF, D.getLocStart()),
7957  getThreadID(CGF, D.getLocStart()),
7958  llvm::ConstantInt::getSigned(CGM.Int32Ty, 1),
7960  DimsAddr.getPointer(), CGM.VoidPtrTy)};
7961 
7963  CGF.EmitRuntimeCall(RTLFn, Args);
7964  llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
7965  emitUpdateLocation(CGF, D.getLocEnd()), getThreadID(CGF, D.getLocEnd())};
7967  CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
7968  llvm::makeArrayRef(FiniArgs));
7969 }
7970 
7972  const OMPDependClause *C) {
7973  QualType Int64Ty =
7974  CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
7975  const Expr *CounterVal = C->getCounterValue();
7976  assert(CounterVal);
7977  llvm::Value *CntVal = CGF.EmitScalarConversion(CGF.EmitScalarExpr(CounterVal),
7978  CounterVal->getType(), Int64Ty,
7979  CounterVal->getExprLoc());
7980  Address CntAddr = CGF.CreateMemTemp(Int64Ty, ".cnt.addr");
7981  CGF.EmitStoreOfScalar(CntVal, CntAddr, /*Volatile=*/false, Int64Ty);
7982  llvm::Value *Args[] = {emitUpdateLocation(CGF, C->getLocStart()),
7983  getThreadID(CGF, C->getLocStart()),
7984  CntAddr.getPointer()};
7985  llvm::Value *RTLFn;
7986  if (C->getDependencyKind() == OMPC_DEPEND_source)
7988  else {
7989  assert(C->getDependencyKind() == OMPC_DEPEND_sink);
7991  }
7992  CGF.EmitRuntimeCall(RTLFn, Args);
7993 }
7994 
7997  SourceLocation Loc) const {
7998  auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
7999 
8000  if (auto *Fn = dyn_cast<llvm::Function>(Callee)) {
8001  if (Fn->doesNotThrow()) {
8002  CGF.EmitNounwindRuntimeCall(Fn, Args);
8003  return;
8004  }
8005  }
8006  CGF.EmitRuntimeCall(Callee, Args);
8007 }
8008 
8010  CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn,
8011  ArrayRef<llvm::Value *> Args) const {
8012  assert(Loc.isValid() && "Outlined function call location must be valid.");
8013  emitCall(CGF, OutlinedFn, Args, Loc);
8014 }
8015 
8017  const VarDecl *NativeParam,
8018  const VarDecl *TargetParam) const {
8019  return CGF.GetAddrOfLocalVar(NativeParam);
8020 }
8021 
8023  const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
8024  OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
8025  llvm_unreachable("Not supported in SIMD-only mode");
8026 }
8027 
8029  const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
8030  OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
8031  llvm_unreachable("Not supported in SIMD-only mode");
8032 }
8033 
8035  const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
8036  const VarDecl *PartIDVar, const VarDecl *TaskTVar,
8037  OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
8038  bool Tied, unsigned &NumberOfParts) {
8039  llvm_unreachable("Not supported in SIMD-only mode");
8040 }
8041 
8043  SourceLocation Loc,
8044  llvm::Value *OutlinedFn,
8045  ArrayRef<llvm::Value *> CapturedVars,
8046  const Expr *IfCond) {
8047  llvm_unreachable("Not supported in SIMD-only mode");
8048 }
8049 
8051  CodeGenFunction &CGF, StringRef CriticalName,
8052  const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
8053  const Expr *Hint) {
8054  llvm_unreachable("Not supported in SIMD-only mode");
8055 }
8056 
8058  const RegionCodeGenTy &MasterOpGen,
8059  SourceLocation Loc) {
8060  llvm_unreachable("Not supported in SIMD-only mode");
8061 }
8062 
8064  SourceLocation Loc) {
8065  llvm_unreachable("Not supported in SIMD-only mode");
8066 }
8067 
8069  CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
8070  SourceLocation Loc) {
8071  llvm_unreachable("Not supported in SIMD-only mode");
8072 }
8073 
8075  CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
8076  SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
8077  ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
8078  ArrayRef<const Expr *> AssignmentOps) {
8079  llvm_unreachable("Not supported in SIMD-only mode");
8080 }
8081 
8083  const RegionCodeGenTy &OrderedOpGen,
8084  SourceLocation Loc,
8085  bool IsThreads) {
8086  llvm_unreachable("Not supported in SIMD-only mode");
8087 }
8088 
8090  SourceLocation Loc,
8091  OpenMPDirectiveKind Kind,
8092  bool EmitChecks,
8093  bool ForceSimpleCall) {
8094  llvm_unreachable("Not supported in SIMD-only mode");
8095 }
8096 
8098  CodeGenFunction &CGF, SourceLocation Loc,
8099  const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
8100  bool Ordered, const DispatchRTInput &DispatchValues) {
8101  llvm_unreachable("Not supported in SIMD-only mode");
8102 }
8103 
8106  const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
8107  llvm_unreachable("Not supported in SIMD-only mode");
8108 }
8109 
8111  CodeGenFunction &CGF, SourceLocation Loc,
8112  OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
8113  llvm_unreachable("Not supported in SIMD-only mode");
8114 }
8115 
8117  SourceLocation Loc,
8118  unsigned IVSize,
8119  bool IVSigned) {
8120  llvm_unreachable("Not supported in SIMD-only mode");
8121 }
8122 
8124  SourceLocation Loc,
8125  OpenMPDirectiveKind DKind) {
8126  llvm_unreachable("Not supported in SIMD-only mode");
8127 }
8128 
8130  SourceLocation Loc,
8131  unsigned IVSize, bool IVSigned,
8132  Address IL, Address LB,
8133  Address UB, Address ST) {
8134  llvm_unreachable("Not supported in SIMD-only mode");
8135 }
8136 
8138  llvm::Value *NumThreads,
8139  SourceLocation Loc) {
8140  llvm_unreachable("Not supported in SIMD-only mode");
8141 }
8142 
8144  OpenMPProcBindClauseKind ProcBind,
8145  SourceLocation Loc) {
8146  llvm_unreachable("Not supported in SIMD-only mode");
8147 }
8148 
8150  const VarDecl *VD,
8151  Address VDAddr,
8152  SourceLocation Loc) {
8153  llvm_unreachable("Not supported in SIMD-only mode");
8154 }
8155 
8157  const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
8158  CodeGenFunction *CGF) {
8159  llvm_unreachable("Not supported in SIMD-only mode");
8160 }
8161 
8163  CodeGenFunction &CGF, QualType VarType, StringRef Name) {
8164  llvm_unreachable("Not supported in SIMD-only mode");
8165 }
8166 
8169  SourceLocation Loc) {
8170  llvm_unreachable("Not supported in SIMD-only mode");
8171 }
8172 
8174  const OMPExecutableDirective &D,
8175  llvm::Value *TaskFunction,
8176  QualType SharedsTy, Address Shareds,
8177  const Expr *IfCond,
8178  const OMPTaskDataTy &Data) {
8179  llvm_unreachable("Not supported in SIMD-only mode");
8180 }
8181 
8183  CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
8184  llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds,
8185  const Expr *IfCond, const OMPTaskDataTy &Data) {
8186  llvm_unreachable("Not supported in SIMD-only mode");
8187 }
8188 
8192  ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
8193  assert(Options.SimpleReduction && "Only simple reduction is expected.");
8194  CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
8195  ReductionOps, Options);
8196 }
8197 
8200  ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
8201  llvm_unreachable("Not supported in SIMD-only mode");
8202 }
8203 
8205  SourceLocation Loc,
8206  ReductionCodeGen &RCG,
8207  unsigned N) {
8208  llvm_unreachable("Not supported in SIMD-only mode");
8209 }
8210 
8212  SourceLocation Loc,
8213  llvm::Value *ReductionsPtr,
8214  LValue SharedLVal) {
8215  llvm_unreachable("Not supported in SIMD-only mode");
8216 }
8217 
8219  SourceLocation Loc) {
8220  llvm_unreachable("Not supported in SIMD-only mode");
8221 }
8222 
8224  CodeGenFunction &CGF, SourceLocation Loc,
8225  OpenMPDirectiveKind CancelRegion) {
8226  llvm_unreachable("Not supported in SIMD-only mode");
8227 }
8228 
8230  SourceLocation Loc, const Expr *IfCond,
8231  OpenMPDirectiveKind CancelRegion) {
8232  llvm_unreachable("Not supported in SIMD-only mode");
8233 }
8234 
8236  const OMPExecutableDirective &D, StringRef ParentName,
8237  llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
8238  bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
8239  llvm_unreachable("Not supported in SIMD-only mode");
8240 }
8241 
8243  const OMPExecutableDirective &D,
8244  llvm::Value *OutlinedFn,
8245  llvm::Value *OutlinedFnID,
8246  const Expr *IfCond, const Expr *Device,
8247  ArrayRef<llvm::Value *> CapturedVars) {
8248  llvm_unreachable("Not supported in SIMD-only mode");
8249 }
8250 
8252  llvm_unreachable("Not supported in SIMD-only mode");
8253 }
8254 
8256  llvm_unreachable("Not supported in SIMD-only mode");
8257 }
8258 
8260  return false;
8261 }
8262 
8264  return nullptr;
8265 }
8266 
8268  const OMPExecutableDirective &D,
8269  SourceLocation Loc,
8270  llvm::Value *OutlinedFn,
8271  ArrayRef<llvm::Value *> CapturedVars) {
8272  llvm_unreachable("Not supported in SIMD-only mode");
8273 }
8274 
8276  const Expr *NumTeams,
8277  const Expr *ThreadLimit,
8278  SourceLocation Loc) {
8279  llvm_unreachable("Not supported in SIMD-only mode");
8280 }
8281 
8283  CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
8284  const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
8285  llvm_unreachable("Not supported in SIMD-only mode");
8286 }
8287 
8289  CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
8290  const Expr *Device) {
8291  llvm_unreachable("Not supported in SIMD-only mode");
8292 }
8293 
8295  const OMPLoopDirective &D) {
8296  llvm_unreachable("Not supported in SIMD-only mode");
8297 }
8298 
8300  const OMPDependClause *C) {
8301  llvm_unreachable("Not supported in SIMD-only mode");
8302 }
8303 
8304 const VarDecl *
8306  const VarDecl *NativeParam) const {
8307  llvm_unreachable("Not supported in SIMD-only mode");
8308 }
8309 
8310 Address
8312  const VarDecl *NativeParam,
8313  const VarDecl *TargetParam) const {
8314  llvm_unreachable("Not supported in SIMD-only mode");
8315 }
8316 
const CGFunctionInfo & arrangeBuiltinFunctionDeclaration(QualType resultType, const FunctionArgList &args)
A builtin function is a freestanding function using the default C conventions.
Definition: CGCall.cpp:640
RecordDecl * buildImplicitRecord(StringRef Name, RecordDecl::TagKind TK=TTK_Struct) const
Create a new implicit TU-level CXXRecordDecl or RecordDecl declaration.
TBAAAccessInfo getTBAAInfoForSubobject(LValue Base, QualType AccessType)
getTBAAInfoForSubobject - Get TBAA information for an access with a given base lvalue.
void pushTerminate()
Push a terminate handler on the stack.
Definition: CGCleanup.cpp:259
static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, llvm::Type *BaseLVType, CharUnits BaseLVAlignment, llvm::Value *Addr)
virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S)
Emit the captured statement body.
static llvm::Value * emitParallelOrTeamsOutlinedFunction(CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen)
This represents &#39;#pragma omp task&#39; directive.
Definition: StmtOpenMP.h:1717
static const Decl * getCanonicalDecl(const Decl *D)
An instance of this class is created to represent a function declaration or definition.
Definition: Decl.h:1697
llvm::IntegerType * IntTy
int
This represents &#39;thread_limit&#39; clause in the &#39;#pragma omp ...&#39; directive.
External linkage, which indicates that the entity can be referred to from other translation units...
Definition: Linkage.h:61
Expr * getUpperBoundVariable() const
Definition: StmtOpenMP.h:790
Other implicit parameter.
Definition: Decl.h:1473
QualType TgtDeviceImageQTy
struct __tgt_device_image{ void *ImageStart; // Pointer to the target code start. ...
Complete object ctor.
Definition: ABI.h:26
unsigned getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it...
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition: Type.h:2283
QualType getPointeeType() const
Definition: Type.h:2296
CanQualType VoidPtrTy
Definition: ASTContext.h:1012
Scheduling data for loop-based OpenMP directives.
Definition: OpenMPKinds.h:124
Destroyer * getDestroyer(QualType::DestructionKind destructionKind)
Definition: CGDecl.cpp:1461
A (possibly-)qualified type.
Definition: Type.h:653
bool isArrayType() const
Definition: Type.h:5991
llvm::Type * ConvertTypeForMem(QualType T)
void EmitBranchOnBoolExpr(const Expr *Cond, llvm::BasicBlock *TrueBlock, llvm::BasicBlock *FalseBlock, uint64_t TrueCount)
EmitBranchOnBoolExpr - Emit a branch on a boolean condition (e.g.
const CodeGenOptions & getCodeGenOpts() const
i32 captured_struct **param SharedsTy A type which contains references the shared variables *param Shareds Context with the list of shared variables from the p *TaskFunction *param IfCond Not a nullptr if if clause was nullptr *otherwise *param Data Additional data for task generation like final list of privates etc *void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) override
bool HaveInsertPoint() const
HaveInsertPoint - True if an insertion point is defined.
static void EmitOMPTargetParallelForSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelForSimdDirective &S)
Emit device code for the target parallel for simd directive.
void emitSingleReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp, const Expr *PrivateRef, const DeclRefExpr *LHS, const DeclRefExpr *RHS)
Emits single reduction combiner.
llvm::SmallPtrSet< const VarDecl *, 4 > ThreadPrivateWithDefinition
Set of threadprivate variables with the generated initializer.
void EmitOMPAggregateAssign(Address DestAddr, Address SrcAddr, QualType OriginalType, const llvm::function_ref< void(Address, Address)> &CopyGen)
Perform element by element copying of arrays with type OriginalType from SrcAddr to DestAddr using co...
The standard implementation of ConstantInitBuilder used in Clang.
Stmt - This represents one statement.
Definition: Stmt.h:66
Expr * getLowerBoundVariable() const
Definition: StmtOpenMP.h:782
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee...
Definition: Type.cpp:456
virtual void emitUserDefinedReduction(CodeGenFunction *CGF, const OMPDeclareReductionDecl *D)
Emit code for the specified user defined reduction construct.
bool capturesThis() const
Determine whether this capture handles the C++ &#39;this&#39; pointer.
Definition: Stmt.h:2096
VarDecl * getCapturedVar() const
Retrieve the declaration of the variable being captured.
Definition: Stmt.cpp:1007
bool emitTargetGlobal(GlobalDecl GD) override
Emit the global GD if it is meaningful for the target.
QualType getTgtBinaryDescriptorQTy()
Returns __tgt_bin_desc type.
SmallVector< std::pair< OpenMPDependClauseKind, const Expr * >, 4 > Dependences
bool isRecordType() const
Definition: Type.h:6015
void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, const DispatchRTInput &DispatchValues) override
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const override
Gets the address of the native argument basing on the address of the target-specific parameter...
SmallVector< const Expr *, 4 > LastprivateCopies
specific_attr_iterator - Iterates over a subrange of an AttrVec, only providing attributes that are o...
Definition: AttrIterator.h:54
llvm::Constant * getOrCreateInternalVariable(llvm::Type *Ty, const llvm::Twine &Name)
Gets (if variable with the given name already exist) or creates internal global variable with the spe...
virtual llvm::Value * emitTaskReductionInit(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr *> LHSExprs, ArrayRef< const Expr *> RHSExprs, const OMPTaskDataTy &Data)
Emit a code for initialization of task reduction clause.
const RecordType * getAsStructureType() const
Definition: Type.cpp:472
static llvm::Value * emitCopyprivateCopyFunction(CodeGenModule &CGM, llvm::Type *ArgsType, ArrayRef< const Expr *> CopyprivateVars, ArrayRef< const Expr *> DestExprs, ArrayRef< const Expr *> SrcExprs, ArrayRef< const Expr *> AssignmentOps)
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
VarDecl * getDefinition(ASTContext &)
Get the real (not just tentative) definition for this declaration.
Definition: Decl.cpp:2083
static bool stable_sort_comparator(const PrivateDataTy P1, const PrivateDataTy P2)
static llvm::Value * emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, ArrayRef< const Expr *> PrivateVars, ArrayRef< const Expr *> FirstprivateVars, ArrayRef< const Expr *> LastprivateVars, QualType PrivatesQTy, ArrayRef< PrivateDataTy > Privates)
Emit a privates mapping function for correct handling of private and firstprivate variables...
virtual Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name)
Creates artificial threadprivate variable with name Name and type VarType.
StringRef P
static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr, IdentFieldIndex Field, const llvm::Twine &Name="")
Call to void __kmpc_threadprivate_register( ident_t *, void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);.
QualType getNonReferenceType() const
If Type is a reference type (e.g., const int&), returns the type that the reference refers to ("const...
Definition: Type.h:5891
ReductionCodeGen(ArrayRef< const Expr *> Shareds, ArrayRef< const Expr *> Privates, ArrayRef< const Expr *> ReductionOps)
static RecordDecl * createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef< PrivateDataTy > Privates)
bool isOpenMPTargetDataManagementDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target data offload directive.
The base class of the type hierarchy.
Definition: Type.h:1351
void EmitStoreThroughLValue(RValue Src, LValue Dst, bool isInit=false)
EmitStoreThroughLValue - Store the specified rvalue into the specified lvalue, where both are guarant...
Definition: CGExpr.cpp:1836
void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr *> Privates, ArrayRef< const Expr *> LHSExprs, ArrayRef< const Expr *> RHSExprs, ArrayRef< const Expr *> ReductionOps, ReductionOptionsTy Options) override
Emit a code for reduction clause.
void emitTargetDataStandAloneCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device) override
Emit the data mapping/movement code associated with the directive D that should be of the form &#39;targe...
const RecordDecl * getCapturedRecordDecl() const
Retrieve the record declaration for captured variables.
Definition: Stmt.h:2176
llvm::Value * PointersArray
The array of section pointers passed to the runtime library.
virtual void clear()
virtual void completeDefinition()
completeDefinition - Notes that the definition of this type is now complete.
Definition: Decl.cpp:3969
const TargetInfo & getTargetInfo() const
Definition: ASTContext.h:671
The l-value was an access to a declared entity or something equivalently strong, like the address of ...
struct with the values to be passed to the dispatch runtime function
Address EmitLoadOfPointer(Address Ptr, const PointerType *PtrTy, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
Definition: CGExpr.cpp:2232
llvm::Value * emitReductionFunction(CodeGenModule &CGM, llvm::Type *ArgsType, ArrayRef< const Expr *> Privates, ArrayRef< const Expr *> LHSExprs, ArrayRef< const Expr *> RHSExprs, ArrayRef< const Expr *> ReductionOps)
Emits reduction function.
const Expr * getAnyInitializer() const
getAnyInitializer - Get the initializer for this variable, no matter which declaration it is attached...
Definition: Decl.h:1202
void scanForTargetRegionsFunctions(const Stmt *S, StringRef ParentName)
Start scanning from statement S and and emit all target regions found along the way.
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
static CharUnits getOffsetOfIdentField(IdentFieldIndex Field)
static void EmitOMPAggregateReduction(CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, const VarDecl *RHSVar, const llvm::function_ref< void(CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *)> &RedOpGen, const Expr *XExpr=nullptr, const Expr *EExpr=nullptr, const Expr *UpExpr=nullptr)
Emit reduction operation for each element of array (required for array sections) LHS op = RHS...
llvm::Value * getCriticalRegionLock(StringRef CriticalName)
Returns corresponding lock object for the specified critical region name.
bool ConstantFoldsToSimpleInteger(const Expr *Cond, bool &Result, bool AllowLabels=false)
ConstantFoldsToSimpleInteger - If the specified expression does not fold to a constant, or if it does but contains a label, return false.
void EmitOMPCopy(QualType OriginalType, Address DestAddr, Address SrcAddr, const VarDecl *DestVD, const VarDecl *SrcVD, const Expr *Copy)
Emit proper copying of data from one variable to another.
OpenMPSchedType
Schedule types for &#39;omp for&#39; loops (these enumerators are taken from the enum sched_type in kmp...
SmallVector< const Expr *, 4 > ReductionCopies
Address GetAddrOfLocalVar(const VarDecl *VD)
GetAddrOfLocalVar - Return the address of a local variable.
VarDecl - An instance of this class is created to represent a variable declaration or definition...
Definition: Decl.h:806
Objects with "hidden" visibility are not seen by the dynamic linker.
Definition: Visibility.h:35
llvm::Value * getThreadID(CodeGenFunction &CGF, SourceLocation Loc)
Gets thread id value for the current thread.
QualType getReturnType() const
Definition: Decl.h:2207
This represents &#39;num_threads&#39; clause in the &#39;#pragma omp ...&#39; directive.
Definition: OpenMPClause.h:382
virtual void emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, ArrayRef< llvm::Value *> Args=llvm::None) const
Emits call of the outlined function with the provided arguments, translating these arguments to corre...
const T * getAs() const
Member-template getAs<specific type>&#39;.
Definition: Type.h:6305
The "union" keyword.
Definition: Type.h:4694
Extra information about a function prototype.
Definition: Type.h:3387
virtual llvm::Value * emitForNext(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned, Address IL, Address LB, Address UB, Address ST)
Call __kmpc_dispatch_next( ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, kmp_int[32|64] *p_lowe...
bool field_empty() const
Definition: Decl.h:3628
reference front() const
Definition: DeclBase.h:1230
void EmitVariablyModifiedType(QualType Ty)
EmitVLASize - Capture all the sizes for the VLA expressions in the given variably-modified type and s...
llvm::Value * getPointer() const
Definition: Address.h:38
capture_iterator capture_begin()
Retrieve an iterator pointing to the first capture.
Definition: Stmt.h:2201
llvm::Type * ConvertTypeForMem(QualType T)
ConvertTypeForMem - Convert type T into a llvm::Type.
virtual void createOffloadEntry(llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags=0)
Creates offloading entry for the provided entry ID ID, address Addr, size Size, and flags Flags...
void createOffloadEntriesAndInfoMetadata()
Creates all the offload entries in the current compilation unit along with the associated metadata...
static unsigned evaluateCDTSize(const FunctionDecl *FD, ArrayRef< ParamAttrTy > ParamAttrs)
static void EmitOMPTargetParallelDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelDirective &S)
A jump destination is an abstract label, branching to which may require a jump out through normal cle...
void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Value *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, const Expr *Device, ArrayRef< llvm::Value *> CapturedVars) override
Emit the target offloading code associated with D.
Struct that keeps all the relevant information that should be kept throughout a &#39;target data&#39; region...
QualType getTgtOffloadEntryQTy()
Returns __tgt_offload_entry type.
virtual void emitTargetDataStandAloneCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device)
Emit the data mapping/movement code associated with the directive D that should be of the form &#39;targe...
SmallVector< const Expr *, 4 > PrivateVars
RecordDecl - Represents a struct/union/class.
Definition: Decl.h:3488
llvm::DenseMap< const VarDecl *, FieldDecl * > LambdaCaptureFields
Source[4] in Fortran, do not use for C++.
static llvm::Value * emitDestructorsFunction(CodeGenModule &CGM, SourceLocation Loc, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy)
void emitDestroy(Address addr, QualType type, Destroyer *destroyer, bool useEHCleanupForArray)
emitDestroy - Immediately perform the destruction of the given object.
Definition: CGDecl.cpp:1537
static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, LValue BaseLV)
Address getAddress() const
Definition: CGValue.h:324
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
llvm::Type * ConvertType(QualType T)
ConvertType - Convert type T into a llvm::Type.
virtual void emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc)
Emits a master region.
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:149
LineState State
virtual void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned)
Call the appropriate runtime routine to notify that we finished iteration of the ordered loop with th...
void EmitExprAsInit(const Expr *init, const ValueDecl *D, LValue lvalue, bool capturedByInit)
EmitExprAsInit - Emits the code necessary to initialize a location in memory with the given initializ...
Definition: CGDecl.cpp:1326
Call to void *__kmpc_threadprivate_cached(ident_t *loc, kmp_int32 global_tid, void *data...
The scope used to remap some variables as private in the OpenMP loop body (or other captured region e...
SmallVector< const Expr *, 4 > LastprivateVars
virtual void emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C)
Emit code for doacross ordered directive with &#39;depend&#39; clause.
FieldDecl - An instance of this class is created by Sema::ActOnField to represent a member of a struc...
Definition: Decl.h:2467
CharUnits getAlignment() const
Definition: CGValue.h:313
const CapturedStmt * getCapturedStmt(OpenMPDirectiveKind RegionKind) const
Returns the captured statement associated with the component region within the (combined) directive...
Definition: StmtOpenMP.h:209
An RAII object to set (and then clear) a mapping for an OpaqueValueExpr.
might be used in Fortran
std::pair< llvm::Value *, llvm::Value * > getSizes(unsigned N) const
Returns the size of the reduction item (in chars and total number of elements in the item)...
LValue EmitOMPArraySectionExpr(const OMPArraySectionExpr *E, bool IsLowerBound=true)
Definition: CGExpr.cpp:3440
static void emitOffloadingArrays(CodeGenFunction &CGF, MappableExprsHandler::MapBaseValuesArrayTy &BasePointers, MappableExprsHandler::MapValuesArrayTy &Pointers, MappableExprsHandler::MapValuesArrayTy &Sizes, MappableExprsHandler::MapFlagsArrayTy &MapTypes, CGOpenMPRuntime::TargetDataInfo &Info)
Emit the arrays used to pass the captures and map information to the offloading runtime library...
llvm::CallInst * EmitRuntimeCall(llvm::Value *callee, const Twine &name="")
void startDefinition()
Starts the definition of this tag declaration.
Definition: Decl.cpp:3725
bool isReferenceType() const
Definition: Type.h:5954
This represents clause &#39;map&#39; in the &#39;#pragma omp ...&#39; directives.
InitKind getInitializerKind() const
Get initializer kind.
Definition: DeclOpenMP.h:157
virtual bool isDynamic(OpenMPScheduleClauseKind ScheduleKind) const
Check if the specified ScheduleKind is dynamic.
static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, unsigned Index, const VarDecl *Var)
Given an array of pointers to variables, project the address of a given variable. ...
This represents clause &#39;to&#39; in the &#39;#pragma omp ...&#39; directives.
static CharUnits Zero()
Zero - Construct a CharUnits quantity of zero.
Definition: CharUnits.h:53
clang::CharUnits operator*(clang::CharUnits::QuantityType Scale, const clang::CharUnits &CU)
Definition: CharUnits.h:208
llvm::Type * getKmpc_MicroPointerTy()
Returns pointer to kmpc_micro type.
OpenMPDirectiveKind getDirectiveKind() const
Definition: StmtOpenMP.h:225
Expr * getCounterValue()
Get the loop counter value.
This is a common base class for loop directives (&#39;omp simd&#39;, &#39;omp for&#39;, &#39;omp for simd&#39; etc...
Definition: StmtOpenMP.h:317
void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc) override
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)...
virtual bool emitTargetFunctions(GlobalDecl GD)
Emit the target regions enclosed in GD function definition or the function itself in case it is a val...
An r-value expression (a pr-value in the C++11 taxonomy) produces a temporary value.
Definition: Specifiers.h:107
CharUnits GetTargetTypeStoreSize(llvm::Type *Ty) const
Return the store size, in character units, of the given LLVM type.
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
ComplexPairTy EmitLoadOfComplex(LValue src, SourceLocation loc)
EmitLoadOfComplex - Load a complex number from the specified l-value.
static ApplyDebugLocation CreateArtificial(CodeGenFunction &CGF)
Apply TemporaryLocation if it is valid.
Definition: CGDebugInfo.h:651
ArrayRef< ParmVarDecl * > parameters() const
Definition: Decl.h:2167
OpenMPDistScheduleClauseKind
OpenMP attributes for &#39;dist_schedule&#39; clause.
Definition: OpenMPKinds.h:100
void emitProcBindClause(CodeGenFunction &CGF, OpenMPProcBindClauseKind ProcBind, SourceLocation Loc) override
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, int proc_bind) to generat...
BinaryOperatorKind
Expr * getInitializer()
Get initializer expression (if specified) of the declare reduction construct.
Definition: DeclOpenMP.h:154
QualType TgtOffloadEntryQTy
Type struct __tgt_offload_entry{ void *addr; // Pointer to the offload entry info.
SourceLocation getLocEnd() const
Returns ending location of directive.
Definition: StmtOpenMP.h:170
static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, QualType Type, bool EmitDeclareReductionInit, const Expr *Init, const OMPDeclareReductionDecl *DRD, Address SrcAddr=Address::invalid())
Emit initialization of arrays of complex types.
llvm::Function * emitRegistrationFunction() override
Creates the offloading descriptor in the event any target region was emitted in the current module an...
static CharUnits getIdentAlign(CodeGenModule &CGM)
OpenMPScheduleClauseKind Schedule
Definition: OpenMPKinds.h:125
Address CreateElementBitCast(Address Addr, llvm::Type *Ty, const llvm::Twine &Name="")
Cast the element type of the given address to a different type, preserving information like the align...
Definition: CGBuilder.h:157
CharUnits - This is an opaque type for sizes expressed in character units.
Definition: CharUnits.h:38
bool Ordered
true if loop is ordered, false otherwise.
void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned) override
Call the appropriate runtime routine to notify that we finished iteration of the ordered loop with th...
bool isOpenMPTeamsDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a teams-kind directive.
uint32_t Offset
Definition: CacheTokens.cpp:43
virtual void emitTargetDataCalls(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info)
Emit the target data mapping code associated with D.
const ArrayType * getAsArrayTypeUnsafe() const
A variant of getAs<> for array types which silently discards qualifiers from the outermost type...
Definition: Type.h:6354
void emitCleanups(CodeGenFunction &CGF, unsigned N, Address PrivateAddr)
Emits cleanup code for the reduction item.
RAII for correct setting/restoring of CapturedStmtInfo.
i32 captured_struct **param SharedsTy A type which contains references the shared variables *param Shareds Context with the list of shared variables from the p *TaskFunction *param IfCond Not a nullptr if if clause was nullptr *otherwise *param Data Additional data for task generation like final list of privates etc *virtual void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data)
CharUnits getAlignment() const
Return the alignment of this pointer.
Definition: Address.h:67
void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc) override
Emits code for a taskyield directive.
bool empty() const
Return true if a there are no entries defined.
child_range children()
Definition: Stmt.cpp:226
String describing the source location.
void emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc) override
Emits a master region.
virtual std::pair< llvm::Function *, llvm::Function * > getUserDefinedReduction(const OMPDeclareReductionDecl *D)
Get combiner/initializer for the specified user-defined reduction, if any.
void actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy &Action)
OpenMPScheduleClauseModifier M2
Definition: OpenMPKinds.h:127
bool needsEHCleanup(QualType::DestructionKind kind)
Determines whether an EH cleanup is required to destroy a type with the given destruction kind...
SmallVector< const Expr *, 4 > PrivateCopies
llvm::CallInst * CreateMemCpy(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile=false)
Definition: CGBuilder.h:259
unsigned NumberOfPtrs
The total number of pointers passed to the runtime library.
void operator()(CodeGenFunction &CGF) const
CharUnits getDeclAlign(const Decl *D, bool ForAlignof=false) const
Return a conservative estimate of the alignment of the specified decl D.
void EmitAggregateAssign(Address DestPtr, Address SrcPtr, QualType EltTy)
EmitAggregateCopy - Emit an aggregate assignment.
Address adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, Address PrivateAddr)
Adjusts PrivatedAddr for using instead of the original variable address in normal operations...
Expr * getSizeExpr() const
Definition: Type.h:2737
Scope - A scope is a transient data structure that is used while parsing the program.
Definition: Scope.h:39
const Type * getTypePtr() const
Retrieves a pointer to the underlying (unqualified) type.
Definition: Type.h:5718
virtual void emitDeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn)
Marks function Fn with properly mangled versions of vector functions.
field_iterator field_begin() const
Definition: Decl.cpp:3960
bool usesReductionInitializer(unsigned N) const
Returns true if the initialization of the reduction item uses initializer from declare reduction cons...
bool isOpenMPTargetExecutionDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target code offload directive.
virtual void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Value *OutlinedFn, ArrayRef< llvm::Value *> CapturedVars)
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
void EmitIgnoredExpr(const Expr *E)
EmitIgnoredExpr - Emit an expression in a context which ignores the result.
Definition: CGExpr.cpp:161
SmallVector< const Expr *, 4 > FirstprivateCopies
OpenMPDependClauseKind getDependencyKind() const
Get dependency type.
static void emitPrivatesInit(CodeGenFunction &CGF, const OMPExecutableDirective &D, Address KmpTaskSharedsPtr, LValue TDBase, const RecordDecl *KmpTaskTWithPrivatesQTyRD, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, ArrayRef< PrivateDataTy > Privates, bool ForDup)
Emit initialization for private variables in task-based directives.
static int addMonoNonMonoModifier(OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2)
static llvm::Function * emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, const Expr *CombinerInitializer, const VarDecl *In, const VarDecl *Out, bool IsCombiner)
This represents clause &#39;reduction&#39; in the &#39;#pragma omp ...&#39; directives.
static void emitOffloadingArraysArgument(CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info)
Emit the arguments to be passed to the runtime library based on the arrays of pointers, sizes and map types.
Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
static const OMPDeclareReductionDecl * getReductionInit(const Expr *ReductionOp)
Check if the combiner is a call to UDR combiner and if it is so return the UDR decl used for reductio...
llvm::Value * emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, unsigned Flags=0)
Emits object of ident_t type with info for source location.
bool isOpenMPWorksharingDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a worksharing directive.
virtual llvm::Value * emitTeamsOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP teams directive D.
SmallVector< const Expr *, 4 > ReductionOps
This represents clause &#39;is_device_ptr&#39; in the &#39;#pragma omp ...&#39; directives.
llvm::AllocaInst * CreateTempAlloca(llvm::Type *Ty, const Twine &Name="tmp", llvm::Value *ArraySize=nullptr)
CreateTempAlloca - This creates an alloca and inserts it into the entry block if ArraySize is nullptr...
Definition: CGExpr.cpp:94
SmallVector< const Expr *, 4 > ReductionVars
virtual void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind)
Call the appropriate runtime routine to notify that we finished all the work with current loop...
virtual Decl * getCanonicalDecl()
Retrieves the "canonical" declaration of the given declaration.
Definition: DeclBase.h:869
This represents clause &#39;from&#39; in the &#39;#pragma omp ...&#39; directives.
LValue EmitLValueForField(LValue Base, const FieldDecl *Field)
Definition: CGExpr.cpp:3747
const VarDecl * translateParameter(const FieldDecl *FD, const VarDecl *NativeParam) const override
Translates the native parameter of outlined function if this is required for target.
llvm::Constant * CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeList ExtraAttrs=llvm::AttributeList(), bool Local=false)
Create a new runtime function with the specified type and name.
lookup_result lookup(DeclarationName Name) const
lookup - Find the declarations (if any) with the given Name in this context.
Definition: DeclBase.cpp:1536
SourceLocation getLocStart() const
Returns starting location of directive kind.
Definition: StmtOpenMP.h:168
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
static ImplicitParamDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, ImplicitParamKind ParamKind)
Create implicit parameter.
Definition: Decl.cpp:4210
OpenMP 4.0 [2.4, Array Sections].
Definition: ExprOpenMP.h:45
bool isValid() const
Definition: Address.h:36
CXXRecordDecl * getAsCXXRecordDecl() const
Retrieves the CXXRecordDecl that this type refers to, either because the type is a RecordType or beca...
Definition: Type.cpp:1590
void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr) override
Emits a critical region.
llvm::CallInst * EmitNounwindRuntimeCall(llvm::Value *callee, const Twine &name="")
Describes the capture of either a variable, or &#39;this&#39;, or variable-length array type.
Definition: Stmt.h:2071
const CodeGen::CGBlockInfo * BlockInfo
virtual Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const
Gets the address of the native argument basing on the address of the target-specific parameter...
llvm::Constant * createForStaticInitFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_for_static_init_* runtime function for the specified size IVSize and sign IVSigned...
CGBlockInfo - Information to generate a block literal.
Definition: CGBlocks.h:149
void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer)
EmitAnyExprToMem - Emits the code necessary to evaluate an arbitrary expression into the given memory...
Definition: CGExpr.cpp:202
OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
LValueBaseInfo getBaseInfo() const
Definition: CGValue.h:316
RValue - This trivial value class is used to represent the result of an expression that is evaluated...
Definition: CGValue.h:39
bool addPrivate(const VarDecl *LocalVD, llvm::function_ref< Address()> PrivateGen)
Registers LocalVD variable as a private and apply PrivateGen function for it to generate correspondin...
bool needCleanups(unsigned N)
Returns true if the private copy requires cleanups.
Class intended to support codegen of all kind of the reduction clauses.
llvm::Constant * createDispatchFiniFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_fini_* runtime function for the specified size IVSize and sign IVSigned...
Expr * getCombiner()
Get combiner expression of the declare reduction construct.
Definition: DeclOpenMP.h:147
TypeSourceInfo * getTrivialTypeSourceInfo(QualType T, SourceLocation Loc=SourceLocation()) const
Allocate a TypeSourceInfo where all locations have been initialized to a given location, which defaults to the empty location.
llvm::CallingConv::ID getRuntimeCC() const
QualType getTgtDeviceImageQTy()
Returns __tgt_device_image type.
This represents implicit clause &#39;depend&#39; for the &#39;#pragma omp task&#39; directive.
KmpTaskTFields
Indexes of fields for type kmp_task_t.
static TypeEvaluationKind getEvaluationKind(QualType T)
getEvaluationKind - Return the TypeEvaluationKind of QualType T.
virtual void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Required to resolve existing problems in the runtime.
const Stmt * getAssociatedStmt() const
Returns statement associated with the directive.
Definition: StmtOpenMP.h:196
virtual void emitFlush(CodeGenFunction &CGF, ArrayRef< const Expr *> Vars, SourceLocation Loc)
Emit flush of the variables specified in &#39;omp flush&#39; directive.
void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc) override
Emit code for &#39;taskwait&#39; directive.
ValueDecl - Represent the declaration of a variable (in which case it is an lvalue) a function (in wh...
Definition: Decl.h:627
Expr - This represents one expression.
Definition: Expr.h:106
QualType getPointeeType() const
Definition: Type.h:2440
Allow any unmodeled side effect.
Definition: Expr.h:598
virtual llvm::Value * emitParallelOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP parallel directive D.
static Address invalid()
Definition: Address.h:35
void loadOffloadInfoMetadata()
Loads all the offload entries information from the host IR metadata.
static void emitInitWithReductionInitializer(CodeGenFunction &CGF, const OMPDeclareReductionDecl *DRD, const Expr *InitOp, Address Private, Address Original, QualType Ty)
static llvm::Value * emitReduceCombFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N, const Expr *ReductionOp, const Expr *LHS, const Expr *RHS, const Expr *PrivateRef)
Emits reduction combiner function:
const AnnotatedLine * Line
Enters a new scope for capturing cleanups, all of which will be executed once the scope is exited...
const FunctionProtoType * T
static llvm::Value * emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, QualType SharedsPtrTy, llvm::Value *TaskFunction, llvm::Value *TaskPrivatesMap)
Emit a proxy function which accepts kmp_task_t as the second argument.
bool isOpenMPParallelDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a parallel-kind directive.
const CGFunctionInfo & arrangeNullaryFunction()
A nullary function is a freestanding function of type &#39;void ()&#39;.
Definition: CGCall.cpp:682
std::string OMPHostIRFile
Name of the IR file that contains the result of the OpenMP target host code generation.
Definition: LangOptions.h:151
static llvm::Function * createOffloadingBinaryDescriptorFunction(CodeGenModule &CGM, StringRef Name, const RegionCodeGenTy &Codegen)
Create a Ctor/Dtor-like function whose body is emitted through Codegen.
void emitKmpRoutineEntryT(QualType KmpInt32Ty)
Build type kmp_routine_entry_t (if not built yet).
const T * castAs() const
Member-template castAs<specific type>.
Definition: Type.h:6368
void SetInternalFunctionAttributes(const Decl *D, llvm::Function *F, const CGFunctionInfo &FI)
Set the attributes on the LLVM function for the given decl and function info.
unsigned getLine() const
Return the presumed line number of this location.
llvm::Value * emitForNext(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned, Address IL, Address LB, Address UB, Address ST) override
Call __kmpc_dispatch_next( ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, kmp_int[32|64] *p_lowe...
unsigned IVSize
Size of the iteration variable in bits.
static bool checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD)
Checks if destructor function is required to be generated.
void emitSingleRegion(CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, ArrayRef< const Expr *> CopyprivateVars, ArrayRef< const Expr *> DestExprs, ArrayRef< const Expr *> SrcExprs, ArrayRef< const Expr *> AssignmentOps) override
Emits a single region.
static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion)
QualType SavedKmpTaskloopTQTy
Saved kmp_task_t for taskloop-based directive.
static RecordDecl * createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, QualType KmpRoutineEntryPointerQTy)
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition: Address.h:44
Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name) override
Creates artificial threadprivate variable with name Name and type VarType.
static llvm::iterator_range< specific_clause_iterator< SpecificClause > > getClausesOfKind(ArrayRef< OMPClause *> Clauses)
Definition: StmtOpenMP.h:130
void registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, llvm::Constant *Addr, llvm::Constant *ID, int32_t Flags)
Register target region entry.
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition: CharUnits.h:63
void emitFlush(CodeGenFunction &CGF, ArrayRef< const Expr *> Vars, SourceLocation Loc) override
Emit flush of the variables specified in &#39;omp flush&#39; directive.
virtual void registerGlobalDtor(CodeGenFunction &CGF, const VarDecl &D, llvm::Constant *Dtor, llvm::Constant *Addr)=0
Emit code to force the execution of a destructor during global teardown.
static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, bool Chunked, bool Ordered)
Map the OpenMP loop schedule to the runtime enumeration.
QualType getType() const
Definition: Expr.h:128
TagDecl * getAsTagDecl() const
Retrieves the TagDecl that this type refers to, either because the type is a TagType or because it is...
Definition: Type.cpp:1594
SmallVector< const Expr *, 4 > FirstprivateVars
QualType getConstantArrayType(QualType EltTy, const llvm::APInt &ArySize, ArrayType::ArraySizeModifier ASM, unsigned IndexTypeQuals) const
Return the unique reference to the type for a constant array of the specified element type...
TBAAAccessInfo getTBAAInfo() const
Definition: CGValue.h:305
CharUnits alignmentOfArrayElement(CharUnits elementSize) const
Given that this is the alignment of the first element of an array, return the minimum alignment of an...
Definition: CharUnits.h:197
virtual Address getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, SourceLocation Loc)
Returns address of the threadprivate variable for the current thread.
LValue MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T)
QualType getRecordType(const RecordDecl *Decl) const
Represents an unpacked "presumed" location which can be presented to the user.
void Emit(CodeGenFunction &CGF, Flags) override
Emit the cleanup.
virtual void emitTargetOutlinedFunctionHelper(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
Helper to emit outlined function for &#39;target&#39; directive.
bool isOpenMPTaskLoopDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a taskloop directive.
llvm::Function * emitThreadPrivateVarDefinition(const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF=nullptr) override
Emit a code for initialization of threadprivate variable.
llvm::Value * EmitCastToVoidPtr(llvm::Value *value)
Emit a cast to void* in the appropriate address space.
Definition: CGExpr.cpp:50
const Type * getBaseElementTypeUnsafe() const
Get the base element type of this type, potentially discarding type qualifiers.
Definition: Type.h:6263
This represents clause &#39;firstprivate&#39; in the &#39;#pragma omp ...&#39; directives.
CGOpenMPRuntime(CodeGenModule &CGM)
ValueDecl * getDecl()
Definition: Expr.h:1041
void emitTaskgroupRegion(CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, SourceLocation Loc) override
Emit a taskgroup region.
*QualType KmpTaskTQTy
const Qualifiers & getQuals() const
Definition: CGValue.h:308
Address getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, SourceLocation Loc) override
Returns address of the threadprivate variable for the current thread.
bool isUnionType() const
Definition: Type.cpp:432
const LangOptions & getLangOpts() const
ASTContext & getContext() const
bool isNull() const
Return true if this QualType doesn&#39;t point to a type yet.
Definition: Type.h:719
OpenMPProcBindClauseKind
OpenMP attributes for &#39;proc_bind&#39; clause.
Definition: OpenMPKinds.h:51
do v
Definition: arm_acle.h:78
llvm::Constant * createDispatchInitFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_init_* runtime function for the specified size IVSize and sign IVSigned...
const SourceManager & SM
Definition: Format.cpp:1337
virtual void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr)
Emits a critical region.
VarDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
Definition: Decl.cpp:1977
GlobalDecl - represents a global declaration.
Definition: GlobalDecl.h:35
bool hasClausesOfKind() const
Returns true if the current directive has one or more clauses of a specific kind. ...
Definition: StmtOpenMP.h:162
llvm::Value * emitParallelOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits outlined function for the specified OpenMP parallel directive D.
virtual bool emitTargetGlobalVariable(GlobalDecl GD)
Emit the global variable if it is a valid device global variable.
AttrVec & getAttrs()
Definition: DeclBase.h:477
CanQualType getCanonicalTypeUnqualified() const
bool hasAttrs() const
Definition: DeclBase.h:471
QualType KmpDependInfoTy
Type typedef struct kmp_depend_info { kmp_intptr_t base_addr; size_t len; struct { bool in:1; bool ou...
static llvm::Value * emitNumThreadsForTargetDirective(CGOpenMPRuntime &OMPRuntime, CodeGenFunction &CGF, const OMPExecutableDirective &D)
Emit the number of threads for a target directive.
LValue EmitLoadOfPointerLValue(Address Ptr, const PointerType *PtrTy)
Definition: CGExpr.cpp:2242
QualType TgtBinaryDescriptorQTy
struct __tgt_bin_desc{ int32_t NumDevices; // Number of devices supported.
The l-value was considered opaque, so the alignment was determined from a type.
RecordDecl * getDecl() const
Definition: Type.h:3986
const char * getFilename() const
Return the presumed filename of this location.
const SpecificClause * getSingleClause() const
Gets a single clause of the specified kind associated with the current directive iff there is only on...
Definition: StmtOpenMP.h:148
static void emitReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp)
Emit reduction combiner.
Expr * getStrideVariable() const
Definition: StmtOpenMP.h:798
This represents &#39;num_teams&#39; clause in the &#39;#pragma omp ...&#39; directive.
OpaqueValueExpr - An expression referring to an opaque object of a fixed type and value class...
Definition: Expr.h:868
Address CreateBitCast(Address Addr, llvm::Type *Ty, const llvm::Twine &Name="")
Definition: CGBuilder.h:142
static llvm::Value * emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime, CodeGenFunction &CGF, const OMPExecutableDirective &D)
Emit the number of teams for a target directive.
#define false
Definition: stdbool.h:33
Kind
This captures a statement into a function.
Definition: Stmt.h:2058
QualType getCanonicalType() const
Definition: Type.h:5757
IdentFieldIndex
unsigned getColumn() const
Return the presumed column number of this location.
static with chunk adjustment (e.g., simd)
void pushDestroy(QualType::DestructionKind dtorKind, Address addr, QualType type)
pushDestroy - Push the standard destructor for the given type as at least a normal cleanup...
Definition: CGDecl.cpp:1486
void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, const RegionCodeGenTy &ThenGen, const RegionCodeGenTy &ElseGen)
Emits code for OpenMP &#39;if&#39; clause using specified CodeGen function.
void functionFinished(CodeGenFunction &CGF)
Cleans up references to the objects in finished function.
void emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond, OpenMPDirectiveKind CancelRegion) override
Emit code for &#39;cancel&#39; construct.
Encodes a location in the source.
llvm::Type * getIdentTyPointerTy()
Returns pointer to ident_t type.
llvm::Value * MapTypesArray
The array of map types passed to the runtime library.
This represents &#39;#pragma omp declare reduction ...&#39; directive.
Definition: DeclOpenMP.h:102
unsigned getOpenMPDefaultSimdAlign(QualType T) const
Get default simd alignment of the specified complete type in bits.
llvm::Value * EvaluateExprAsBool(const Expr *E)
EvaluateExprAsBool - Perform the usual unary conversions on the specified expression and compare the ...
Definition: CGExpr.cpp:143
llvm::PointerIntPair< llvm::Value *, 1, bool > Final
bool isVariablyModifiedType() const
Whether this type is a variably-modified type (C99 6.7.5).
Definition: Type.h:1874
virtual void emitSingleRegion(CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, ArrayRef< const Expr *> CopyprivateVars, ArrayRef< const Expr *> DestExprs, ArrayRef< const Expr *> SrcExprs, ArrayRef< const Expr *> AssignmentOps)
Emits a single region.
virtual void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc)
Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams...
void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false) override
Emit an implicit/explicit barrier for OpenMP threads.
This is a basic class for representing single OpenMP executable directive.
Definition: StmtOpenMP.h:33
bool emitTargetGlobalVariable(GlobalDecl GD) override
Emit the global variable if it is a valid device global variable.
llvm::Value * emitTeamsOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits outlined function for the specified OpenMP teams directive D.
Lower bound for &#39;ordered&#39; versions.
ASTContext & getASTContext() const LLVM_READONLY
Definition: DeclBase.cpp:378
const Decl * getDecl() const
Definition: GlobalDecl.h:64
virtual void emitCancellationPointCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind CancelRegion)
Emit code for &#39;cancellation point&#39; construct.
OpenMPDirectiveKind
OpenMP directives.
Definition: OpenMPKinds.h:23
bool capturesVariable() const
Determine whether this capture handles a variable (by reference).
Definition: Stmt.h:2099
Set if the nonmonotonic schedule modifier was present.
virtual llvm::Function * emitThreadPrivateVarDefinition(const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF=nullptr)
Emit a code for initialization of threadprivate variable.
virtual void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Value *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, const Expr *Device, ArrayRef< llvm::Value *> CapturedVars)
Emit the target offloading code associated with D.
OpenMPLinearClauseKind Modifier
Modifier of &#39;linear&#39; clause.
Definition: OpenMPClause.h:91
const ParmVarDecl * getParamDecl(unsigned i) const
Definition: Decl.h:2190
static void EmitOMPTargetSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S)
Emit device code for the target simd directive.
void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D) override
Emit initialization for doacross loop nesting support.
void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) override
virtual void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D)
Emit initialization for doacross loop nesting support.
const ArrayType * getAsArrayType(QualType T) const
Type Query functions.
bool isTrivialInitializer(const Expr *Init)
Determine whether the given initializer is trivial in the sense that it requires no code to be genera...
Definition: CGDecl.cpp:1197
CanQualType VoidTy
Definition: ASTContext.h:996
bool IVSigned
Sign of the iteration variable.
void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) override
Call the appropriate runtime routine to initialize it before start of loop.
virtual llvm::Value * emitTaskOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool Tied, unsigned &NumberOfParts)
Emits outlined function for the OpenMP task directive D.
void emitAggregateType(CodeGenFunction &CGF, unsigned N)
Emits the code for the variable-modified type, if required.
bool isAnyPointerType() const
Definition: Type.h:5946
unsigned size() const
Return number of entries defined so far.
virtual void Enter(CodeGenFunction &CGF)
An aligned address.
Definition: Address.h:25
void StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, const CGFunctionInfo &FnInfo, const FunctionArgList &Args, SourceLocation Loc=SourceLocation(), SourceLocation StartLoc=SourceLocation())
Emit code for the start of a function.
Stmt * getCapturedStmt()
Retrieve the statement being captured.
Definition: Stmt.h:2159
void emitTargetDataCalls(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) override
Emit the target data mapping code associated with D.
DestructionKind isDestructedType() const
Returns a nonzero value if objects of this type require non-trivial work to clean up after...
Definition: Type.h:1096
virtual void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, ArrayRef< llvm::Value *> CapturedVars, const Expr *IfCond)
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
void emitThreadPrivateVarInit(CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc)
Emits initialization code for the threadprivate variables.
Complete object dtor.
Definition: ABI.h:36
JumpDest getJumpDestInCurrentScope(llvm::BasicBlock *Target)
The given basic block lies in the current EH scope, but may be a target of a potentially scope-crossi...
virtual void emitProcBindClause(CodeGenFunction &CGF, OpenMPProcBindClauseKind ProcBind, SourceLocation Loc)
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, int proc_bind) to generat...
virtual void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc)
Emits code for a taskyield directive.
QualType getType() const
Definition: CGValue.h:261
CharUnits getNaturalTypeAlignment(QualType T, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, bool forPointeeType=false)
bool hasTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum) const
Return true if a target region entry with the provided information exists.
void FinishFunction(SourceLocation EndLoc=SourceLocation())
FinishFunction - Complete IR generation of the current function.
void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N) override
Required to resolve existing problems in the runtime.
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition: Expr.cpp:216
Struct with the values to be passed to the static runtime function.
ConstantAddress GetAddrOfConstantCString(const std::string &Str, const char *GlobalName=nullptr)
Returns a pointer to a character array containing the literal and a terminating &#39;\0&#39; character...
i32 captured_struct **param SharedsTy A type which contains references the shared variables *param Shareds Context with the list of shared variables from the p *TaskFunction *param Data Additional data for task generation like final list of privates etc *TaskResultTy emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds, const OMPTaskDataTy &Data)
void emitCall(CodeGenFunction &CGF, llvm::Value *Callee, ArrayRef< llvm::Value *> Args=llvm::None, SourceLocation Loc=SourceLocation()) const
Emits Callee function call with arguments Args with location Loc.
static void EmitOMPTargetTeamsDistributeDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeDirective &S)
Emit device code for the target teams distribute directive.
A basic class for pre|post-action for advanced codegen sequence for OpenMP region.
virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc)
Emits address of the word in a memory where current thread id is stored.
JumpDest getOMPCancelDestination(OpenMPDirectiveKind Kind)
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type, returning the result.
void emitOrderedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, SourceLocation Loc, bool IsThreads) override
Emit an ordered region.
FunctionArgList - Type for representing both the decl and type of parameters to a function...
Definition: CGCall.h:276
i32 captured_struct **param SharedsTy A type which contains references the shared variables *param Shareds Context with the list of shared variables from the p *TaskFunction *param IfCond Not a nullptr if if clause was nullptr *otherwise *param Data Additional data for task generation like final list of privates etc *void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) override
llvm::Value * getScalarVal() const
getScalarVal() - Return the Value* of this scalar value.
Definition: CGValue.h:59
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
static void emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, const llvm::APSInt &VLENVal, ArrayRef< ParamAttrTy > ParamAttrs, OMPDeclareSimdDeclAttr::BranchStateTy State)
CanQualType CharTy
Definition: ASTContext.h:998
void setAction(PrePostActionTy &Action) const
This class organizes the cross-function state that is used while generating LLVM code.
QualType withRestrict() const
Definition: Type.h:834
CGOpenMPRuntime & getOpenMPRuntime()
Return a reference to the configured OpenMP runtime.
OpenMPScheduleClauseModifier
OpenMP modifiers for &#39;schedule&#39; clause.
Definition: OpenMPKinds.h:67
LValue EmitOMPSharedLValue(const Expr *E)
Emits the lvalue for the expression with possibly captured variable.
Dataflow Directional Tag Classes.
static ApplyDebugLocation CreateDefaultArtificial(CodeGenFunction &CGF, SourceLocation TemporaryLocation)
Apply TemporaryLocation if it is valid.
Definition: CGDebugInfo.h:658
static void emitForStaticInitCall(CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, llvm::Constant *ForStaticInitFunction, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, const CGOpenMPRuntime::StaticRTInput &Values)
Class provides a way to call simple version of codegen for OpenMP region, or an advanced with possibl...
LValue EmitLoadOfReferenceLValue(LValue RefLVal)
Definition: CGExpr.cpp:2223
static llvm::Value * emitReduceInitFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Emits reduction initializer function:
bool isValid() const
Return true if this is a valid SourceLocation object.
bool emitTargetFunctions(GlobalDecl GD) override
Emit the target regions enclosed in GD function definition or the function itself in case it is a val...
llvm::Value * emitTaskOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool Tied, unsigned &NumberOfParts) override
Emits outlined function for the OpenMP task directive D.
DeclContext - This is used only as base class of specific decl types that can act as declaration cont...
Definition: DeclBase.h:1256
static RValue getComplex(llvm::Value *V1, llvm::Value *V2)
Definition: CGValue.h:93
Address CreateStructGEP(Address Addr, unsigned Index, CharUnits Offset, const llvm::Twine &Name="")
Definition: CGBuilder.h:172
virtual bool emitTargetGlobal(GlobalDecl GD)
Emit the global GD if it is meaningful for the target.
virtual void emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond, OpenMPDirectiveKind CancelRegion)
Emit code for &#39;cancel&#39; construct.
std::vector< llvm::Triple > OMPTargetTriples
Triples of the OpenMP targets that the host code codegen should take into account in order to generat...
Definition: LangOptions.h:147
OpenMPScheduleClauseModifier M1
Definition: OpenMPKinds.h:126
llvm::Value * LB
Loop lower bound.
A basic class for pre|post-action for advanced codegen sequence for OpenMP region.
LValue getSharedLValue(unsigned N) const
Returns LValue for the reduction item.
const Expr * getInit() const
Definition: Decl.h:1212
llvm::Constant * getPointer() const
Definition: Address.h:84
i32 captured_struct **param SharedsTy A type which contains references the shared variables *param Shareds Context with the list of shared variables from the p *TaskFunction *param IfCond Not a nullptr if if clause was nullptr *otherwise *param Data Additional data for task generation like final list of privates etc *virtual void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data)
void initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, unsigned Order)
Initialize target region entry.
std::unique_ptr< DiagnosticConsumer > create(StringRef OutputFile, DiagnosticOptions *Diags, bool MergeChildRecords=false)
Returns a DiagnosticConsumer that serializes diagnostics to a bitcode file.
llvm::Function * createOffloadingBinaryDescriptorRegistration()
Creates and registers offloading binary descriptor for the current compilation unit.
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
Definition: CGBuilder.h:70
StmtClass getStmtClass() const
Definition: Stmt.h:378
llvm::PointerIntPair< llvm::Value *, 1, bool > Priority
RTCancelKind
llvm::Constant * EmitNullConstant(QualType T)
Return the result of value-initializing the given type, i.e.
PresumedLoc getPresumedLoc(SourceLocation Loc, bool UseLineDirectives=true) const
Returns the "presumed" location of a SourceLocation specifies.
llvm::Value * UB
Loop upper bound.
llvm::Value * Chunk
Chunk size specified using &#39;schedule&#39; clause (nullptr if chunk was not specified) ...
void EmitAggregateCopy(Address DestPtr, Address SrcPtr, QualType EltTy, bool isVolatile=false, bool isAssignment=false)
EmitAggregateCopy - Emit an aggregate copy.
Definition: CGExprAgg.cpp:1544
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
Definition: CGBuilder.h:108
llvm::Module & getModule() const
llvm::Value * Chunk
Value of the chunk for the static_chunked scheduled loop.
Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...
Not really used in Fortran any more.
LValue MakeAddrLValue(Address Addr, QualType T, AlignmentSource Source=AlignmentSource::Type)
virtual void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
Emit outilined function for &#39;target&#39; directive.
Expr * IgnoreParenImpCasts() LLVM_READONLY
IgnoreParenImpCasts - Ignore parentheses and implicit casts.
Definition: Expr.cpp:2552
static CharUnits getIdentSize(CodeGenModule &CGM)
void emitInitialization(CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, llvm::function_ref< bool(CodeGenFunction &)> DefaultInit)
Performs initialization of the private copy for the reduction item.
JumpDest ReturnBlock
ReturnBlock - Unified return block.
OffloadEntriesInfoManagerTy OffloadEntriesInfoManager
virtual bool isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const
Check if the specified ScheduleKind is static non-chunked.
bool capturesVariableByCopy() const
Determine whether this capture handles a variable by copy.
Definition: Stmt.h:2102
API for captured statement code generation.
virtual void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values)
bool isOpenMPDistributeDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a distribute directive.
virtual StringRef getOutlinedHelperName() const
Get the function name of an outlined region.
static bool classof(const OMPClause *T)
virtual void emitOrderedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, SourceLocation Loc, bool IsThreads)
Emit an ordered region.
This file defines OpenMP AST classes for executable directives and clauses.
static FieldDecl * addFieldToRecordDecl(ASTContext &C, DeclContext *DC, QualType FieldTy)
StructBuilder beginStruct(llvm::StructType *structTy=nullptr)
Address CreateConstArrayGEP(Address Addr, uint64_t Index, CharUnits EltSize, const llvm::Twine &Name="")
Given addr = [n x T]* ...
Definition: CGBuilder.h:195
static void EmitOMPTargetTeamsDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDirective &S)
Emit device code for the target teams directive.
CleanupTy(PrePostActionTy *Action)
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition: Address.h:52
static bool checkInitIsRequired(CodeGenFunction &CGF, ArrayRef< PrivateDataTy > Privates)
Check if duplication function is required for taskloops.
llvm::Value * EmitCheckedInBoundsGEP(llvm::Value *Ptr, ArrayRef< llvm::Value *> IdxList, bool SignedIndices, bool IsSubtraction, SourceLocation Loc, const Twine &Name="")
Same as IRBuilder::CreateInBoundsGEP, but additionally emits a check to detect undefined behavior whe...
static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, unsigned &DeviceID, unsigned &FileID, unsigned &LineNum)
Obtain information that uniquely identifies a target entry.
Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *ReductionsPtr, LValue SharedLVal) override
Get the address of void * type of the privatue copy of the reduction item specified by the SharedLVal...
llvm::AssertingVH< llvm::Instruction > AllocaInsertPt
AllocaInsertPoint - This is an instruction in the entry block before which we prefer to insert alloca...
OpenMPLocationFlags
Values for bit flags used in the ident_t to describe the fields.
virtual void emitInlinedDirective(CodeGenFunction &CGF, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool HasCancel=false)
Emit code for the directive that does not require outlining.
OpenMPScheduleClauseKind
OpenMP attributes for &#39;schedule&#39; clause.
Definition: OpenMPKinds.h:59
Expr * getNumIterations() const
Definition: StmtOpenMP.h:830
Base for LValueReferenceType and RValueReferenceType.
Definition: Type.h:2419
bool hasSameType(QualType T1, QualType T2) const
Determine whether the given types T1 and T2 are equivalent.
Definition: ASTContext.h:2190
StringRef getMangledName(GlobalDecl GD)
Internal linkage, which indicates that the entity can be referred to from within the translation unit...
Definition: Linkage.h:33
void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)
EmitBlock - Emit the given block.
Definition: CGStmt.cpp:445
void addDecl(Decl *D)
Add the declaration D into this context.
Definition: DeclBase.cpp:1425
llvm::Constant * createDispatchNextFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_next_* runtime function for the specified size IVSize and sign IVSigned...
uint64_t getTypeSize(QualType T) const
Return the size of the specified (complete) type T, in bits.
Definition: ASTContext.h:2007
virtual void emitTaskgroupRegion(CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, SourceLocation Loc)
Emit a taskgroup region.
SourceManager & getSourceManager()
Definition: ASTContext.h:643
virtual llvm::Function * emitRegistrationFunction()
Creates the offloading descriptor in the event any target region was emitted in the current module an...
llvm::ConstantInt * getSize(CharUnits numChars)
Emit the given number of characters as a value of type size_t.
const Decl * CurFuncDecl
CurFuncDecl - Holds the Decl for the current outermost non-closure context.
llvm::iterator_range< specific_attr_iterator< T > > specific_attrs() const
Definition: DeclBase.h:517
X
Add a minimal nested name specifier fixit hint to allow lookup of a tag name from an outer enclosing ...
Definition: SemaDecl.cpp:13010
CharUnits toCharUnitsFromBits(int64_t BitSize) const
Convert a size in bits to a size in characters.
Lower bound for default (unordered) versions.
void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, ArrayRef< llvm::Value *> CapturedVars, const Expr *IfCond) override
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
static std::string generateUniqueName(StringRef Prefix, SourceLocation Loc, unsigned N)
Generates unique name for artificial threadprivate variables.
void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc) override
Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams...
virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr *> Privates, ArrayRef< const Expr *> LHSExprs, ArrayRef< const Expr *> RHSExprs, ArrayRef< const Expr *> ReductionOps, ReductionOptionsTy Options)
Emit a code for reduction clause.
This represents &#39;nowait&#39; clause in the &#39;#pragma omp ...&#39; directive.
Definition: OpenMPClause.h:975
llvm::PointerIntPair< llvm::Value *, 1, bool > Schedule
void EmitBranch(llvm::BasicBlock *Block)
EmitBranch - Emit a branch to the specified basic block from the current insert block, taking care to avoid creation of branches from dummy blocks.
Definition: CGStmt.cpp:465
llvm::Function * CreateGlobalInitOrDestructFunction(llvm::FunctionType *ty, const Twine &name, const CGFunctionInfo &FI, SourceLocation Loc=SourceLocation(), bool TLS=false)
Definition: CGDeclCXX.cpp:302
bool isVoidType() const
Definition: Type.h:6169
llvm::Value * BasePointersArray
The array of base pointer passed to the runtime library.
static QualType getBaseOriginalType(const Expr *Base)
Return original type of the base expression for array section.
Definition: Expr.cpp:4071
static llvm::Value * emitReduceFiniFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Emits reduction finalizer function:
OpenMPOffloadingReservedDeviceIDs
llvm::Type * ConvertType(QualType T)
bool isTLSSupported() const
Whether the target supports thread-local storage.
Definition: TargetInfo.h:938
void emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C) override
Emit code for doacross ordered directive with &#39;depend&#39; clause.
Privates[]
Gets the list of initial values for linear variables.
Definition: OpenMPClause.h:140
OpenMPMapClauseKind
OpenMP mapping kind for &#39;map&#39; clause.
Definition: OpenMPKinds.h:92
Qualifiers getQualifiers() const
Retrieve the set of qualifiers applied to this type.
Definition: Type.h:5745
static const Stmt * ignoreCompoundStmts(const Stmt *Body)
discard all CompoundStmts intervening between two constructs
LValue EmitLValue(const Expr *E)
EmitLValue - Emit code to compute a designator that specifies the location of the expression...
Definition: CGExpr.cpp:1170
Address ReturnValue
ReturnValue - The temporary alloca to hold the return value.
void popTerminate()
Pops a terminate handler off the stack.
Definition: CGCleanup.h:583
QualType getPointerType(QualType T) const
Return the uniqued reference to the type for a pointer to the specified type.
OpenMPRTLFunction
capture_iterator capture_end() const
Retrieve an iterator pointing past the end of the sequence of captures.
Definition: Stmt.h:2206
virtual void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false)
Emit an implicit/explicit barrier for OpenMP threads.
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
Definition: Stmt.cpp:265
void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) override
Emit outilined function for &#39;target&#39; directive.
RValue EmitLoadOfLValue(LValue V, SourceLocation Loc)
EmitLoadOfLValue - Given an expression that represents a value lvalue, this method emits the address ...
Definition: CGExpr.cpp:1682
static llvm::Value * emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, const OMPExecutableDirective &D, QualType KmpTaskTWithPrivatesPtrQTy, const RecordDecl *KmpTaskTWithPrivatesQTyRD, const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, ArrayRef< PrivateDataTy > Privates, bool WithLastIter)
Emit task_dup function (for initialization of private/firstprivate/lastprivate vars and last_iter fla...
std::pair< llvm::Value *, QualType > getVLASize(const VariableArrayType *vla)
getVLASize - Returns an LLVM value that corresponds to the size, in non-variably-sized elements...
virtual void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, const DispatchRTInput &DispatchValues)
Call the appropriate runtime routine to initialize it before start of loop.
static RecordDecl * createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, ArrayRef< PrivateDataTy > Privates)
CGCapturedStmtInfo * CapturedStmtInfo
llvm::Value * EmitScalarConversion(llvm::Value *Src, QualType SrcTy, QualType DstTy, SourceLocation Loc)
Emit a conversion from the specified type to the specified destination type, both of which are LLVM s...
CGCXXABI & getCXXABI() const
const VariableArrayType * getAsVariableArrayType(QualType T) const
Definition: ASTContext.h:2324
CanQualType IntTy
Definition: ASTContext.h:1004
capture_range captures()
Definition: Stmt.h:2193
A reference to a declared variable, function, enum, etc.
Definition: Expr.h:956
static RValue get(llvm::Value *V)
Definition: CGValue.h:86
virtual void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values)
Call the appropriate runtime routine to initialize it before start of loop.
llvm::Value * emitTaskReductionInit(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr *> LHSExprs, ArrayRef< const Expr *> RHSExprs, const OMPTaskDataTy &Data) override
Emit a code for initialization of task reduction clause.
bool isPointerType() const
Definition: Type.h:5942
static void EmitOMPTargetDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetDirective &S)
Emit device code for the target directive.
void EmitBranchThroughCleanup(JumpDest Dest)
EmitBranchThroughCleanup - Emit a branch from the current insert block through the normal cleanup han...
Definition: CGCleanup.cpp:1034
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
static ApplyDebugLocation CreateEmpty(CodeGenFunction &CGF)
Set the IRBuilder to not attach debug locations.
Definition: CGDebugInfo.h:668
ParamKindTy
Kind of parameter in a function with &#39;declare simd&#39; directive.
static void EmitOMPTargetParallelForDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelForDirective &S)
Emit device code for the target parallel for directive.
QualType KmpDimTy
struct kmp_dim { // loop bounds info casted to kmp_int64 kmp_int64 lo; // lower kmp_int64 up; // uppe...
void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind) override
Call the appropriate runtime routine to notify that we finished all the work with current loop...
QualType getType() const
Definition: Decl.h:638
An l-value expression is a reference to an object with independent storage.
Definition: Specifiers.h:111
static RValue getAggregate(Address addr, bool isVolatile=false)
Definition: CGValue.h:107
bool isOpenMPLoopDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a directive with an associated loop construct.
LValue - This represents an lvalue references.
Definition: CGValue.h:167
FunctionDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
Definition: Decl.cpp:2845
Information for lazily generating a cleanup.
Definition: EHScopeStack.h:147
QualType SavedKmpTaskTQTy
Saved kmp_task_t for task directive.
virtual Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *ReductionsPtr, LValue SharedLVal)
Get the address of void * type of the privatue copy of the reduction item specified by the SharedLVal...
CanQualType BoolTy
Definition: ASTContext.h:997
void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Value *OutlinedFn, ArrayRef< llvm::Value *> CapturedVars) override
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
const LangOptions & getLangOpts() const
llvm::Constant * createRuntimeFunction(unsigned Function)
Returns specified OpenMP runtime function.
static FieldDecl * Create(const ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, Expr *BW, bool Mutable, InClassInitStyle InitStyle)
Definition: Decl.cpp:3631
Address CreatePointerBitCastOrAddrSpaceCast(Address Addr, llvm::Type *Ty, const llvm::Twine &Name="")
Definition: CGBuilder.h:164
llvm::Value * emitArrayLength(const ArrayType *arrayType, QualType &baseType, Address &addr)
emitArrayLength - Compute the length of an array, even if it&#39;s a VLA, and drill down to the base elem...
unsigned getNumParams() const
Return the number of parameters this function must have based on its FunctionType.
Definition: Decl.cpp:2910
SourceLocation getLocStart() const LLVM_READONLY
Definition: Stmt.cpp:277
bool hasInit() const
Definition: Decl.cpp:2115
Address CreateMemTemp(QualType T, const Twine &Name="tmp", bool CastToDefaultAddrSpace=true)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignment...
Definition: CGExpr.cpp:127
No in-class initializer.
Definition: Specifiers.h:227
llvm::Value * getPointer() const
Definition: CGValue.h:320
void emitCancellationPointCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind CancelRegion) override
Emit code for &#39;cancellation point&#39; construct.
virtual void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc)
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)...
bool capturesVariableArrayType() const
Determine whether this capture handles a variable-length array type.
Definition: Stmt.h:2108
bool isLocalVarDeclOrParm() const
Similar to isLocalVarDecl but also includes parameters.
Definition: Decl.h:1104
Attr - This represents one attribute.
Definition: Attr.h:43
SmallVector< const Expr *, 4 > FirstprivateInits
SourceLocation getLocation() const
Definition: DeclBase.h:416
This represents clause &#39;use_device_ptr&#39; in the &#39;#pragma omp ...&#39; directives.
static void EmitOMPTargetTeamsDistributeSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeSimdDirective &S)
Emit device code for the target teams distribute simd directive.
void EmitNullInitialization(Address DestPtr, QualType Ty)
EmitNullInitialization - Generate code to set a value of the given type to null, If the type contains...
llvm::Constant * getOrCreateThreadPrivateCache(const VarDecl *VD)
If the specified mangled name is not in the module, create and return threadprivate cache object...
llvm::Value * SizesArray
The array of sizes passed to the runtime library.
QualType getIntTypeForBitwidth(unsigned DestWidth, unsigned Signed) const
getIntTypeForBitwidth - sets integer QualTy according to specified details: bitwidth, signed/unsigned.
static OMPLinearClause * Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc, OpenMPLinearClauseKind Modifier, SourceLocation ModifierLoc, SourceLocation ColonLoc, SourceLocation EndLoc, ArrayRef< Expr *> VL, ArrayRef< Expr *> PL, ArrayRef< Expr *> IL, Expr *Step, Expr *CalcStep, Stmt *PreInit, Expr *PostUpdate)
Creates clause with a list of variables VL and a linear step Step.
bool Privatize()
Privatizes local variables previously registered as private.
CanQualType getSizeType() const
Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.
virtual void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc)
Emit code for &#39;taskwait&#39; directive.
llvm::FunctionType * GetFunctionType(const CGFunctionInfo &Info)
GetFunctionType - Get the LLVM function type for.
Definition: CGCall.cpp:1524
const llvm::Triple & getTriple() const