clang  10.0.0git
CGOpenMPRuntime.cpp
Go to the documentation of this file.
1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/Attr.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/OpenMPClause.h"
21 #include "clang/AST/StmtOpenMP.h"
22 #include "clang/AST/StmtVisitor.h"
25 #include "llvm/ADT/ArrayRef.h"
26 #include "llvm/ADT/SetOperations.h"
27 #include "llvm/Bitcode/BitcodeReader.h"
28 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
29 #include "llvm/IR/DerivedTypes.h"
30 #include "llvm/IR/GlobalValue.h"
31 #include "llvm/IR/Value.h"
32 #include "llvm/Support/Format.h"
33 #include "llvm/Support/raw_ostream.h"
34 #include <cassert>
35 
36 using namespace clang;
37 using namespace CodeGen;
38 using namespace llvm::omp;
39 
40 namespace {
41 /// Base class for handling code generation inside OpenMP regions.
42 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
43 public:
44  /// Kinds of OpenMP regions used in codegen.
45  enum CGOpenMPRegionKind {
46  /// Region with outlined function for standalone 'parallel'
47  /// directive.
48  ParallelOutlinedRegion,
49  /// Region with outlined function for standalone 'task' directive.
50  TaskOutlinedRegion,
51  /// Region for constructs that do not require function outlining,
52  /// like 'for', 'sections', 'atomic' etc. directives.
53  InlinedRegion,
54  /// Region with outlined function for standalone 'target' directive.
55  TargetRegion,
56  };
57 
58  CGOpenMPRegionInfo(const CapturedStmt &CS,
59  const CGOpenMPRegionKind RegionKind,
61  bool HasCancel)
62  : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
63  CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
64 
65  CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
67  bool HasCancel)
68  : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
69  Kind(Kind), HasCancel(HasCancel) {}
70 
71  /// Get a variable or parameter for storing global thread id
72  /// inside OpenMP construct.
73  virtual const VarDecl *getThreadIDVariable() const = 0;
74 
75  /// Emit the captured statement body.
76  void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
77 
78  /// Get an LValue for the current ThreadID variable.
79  /// \return LValue for thread id variable. This LValue always has type int32*.
80  virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
81 
82  virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
83 
84  CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
85 
86  OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
87 
88  bool hasCancel() const { return HasCancel; }
89 
90  static bool classof(const CGCapturedStmtInfo *Info) {
91  return Info->getKind() == CR_OpenMP;
92  }
93 
94  ~CGOpenMPRegionInfo() override = default;
95 
96 protected:
97  CGOpenMPRegionKind RegionKind;
98  RegionCodeGenTy CodeGen;
100  bool HasCancel;
101 };
102 
103 /// API for captured statement code generation in OpenMP constructs.
104 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
105 public:
106  CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
107  const RegionCodeGenTy &CodeGen,
108  OpenMPDirectiveKind Kind, bool HasCancel,
109  StringRef HelperName)
110  : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
111  HasCancel),
112  ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
113  assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
114  }
115 
116  /// Get a variable or parameter for storing global thread id
117  /// inside OpenMP construct.
118  const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
119 
120  /// Get the name of the capture helper.
121  StringRef getHelperName() const override { return HelperName; }
122 
123  static bool classof(const CGCapturedStmtInfo *Info) {
124  return CGOpenMPRegionInfo::classof(Info) &&
125  cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
126  ParallelOutlinedRegion;
127  }
128 
129 private:
130  /// A variable or parameter storing global thread id for OpenMP
131  /// constructs.
132  const VarDecl *ThreadIDVar;
133  StringRef HelperName;
134 };
135 
136 /// API for captured statement code generation in OpenMP constructs.
137 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
138 public:
139  class UntiedTaskActionTy final : public PrePostActionTy {
140  bool Untied;
141  const VarDecl *PartIDVar;
142  const RegionCodeGenTy UntiedCodeGen;
143  llvm::SwitchInst *UntiedSwitch = nullptr;
144 
145  public:
146  UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
147  const RegionCodeGenTy &UntiedCodeGen)
148  : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
149  void Enter(CodeGenFunction &CGF) override {
150  if (Untied) {
151  // Emit task switching point.
152  LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
153  CGF.GetAddrOfLocalVar(PartIDVar),
154  PartIDVar->getType()->castAs<PointerType>());
155  llvm::Value *Res =
156  CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
157  llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
158  UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
159  CGF.EmitBlock(DoneBB);
161  CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
162  UntiedSwitch->addCase(CGF.Builder.getInt32(0),
163  CGF.Builder.GetInsertBlock());
164  emitUntiedSwitch(CGF);
165  }
166  }
167  void emitUntiedSwitch(CodeGenFunction &CGF) const {
168  if (Untied) {
169  LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
170  CGF.GetAddrOfLocalVar(PartIDVar),
171  PartIDVar->getType()->castAs<PointerType>());
172  CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
173  PartIdLVal);
174  UntiedCodeGen(CGF);
175  CodeGenFunction::JumpDest CurPoint =
176  CGF.getJumpDestInCurrentScope(".untied.next.");
178  CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
179  UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
180  CGF.Builder.GetInsertBlock());
181  CGF.EmitBranchThroughCleanup(CurPoint);
182  CGF.EmitBlock(CurPoint.getBlock());
183  }
184  }
185  unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
186  };
187  CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
188  const VarDecl *ThreadIDVar,
189  const RegionCodeGenTy &CodeGen,
190  OpenMPDirectiveKind Kind, bool HasCancel,
191  const UntiedTaskActionTy &Action)
192  : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
193  ThreadIDVar(ThreadIDVar), Action(Action) {
194  assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
195  }
196 
197  /// Get a variable or parameter for storing global thread id
198  /// inside OpenMP construct.
199  const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
200 
201  /// Get an LValue for the current ThreadID variable.
202  LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
203 
204  /// Get the name of the capture helper.
205  StringRef getHelperName() const override { return ".omp_outlined."; }
206 
207  void emitUntiedSwitch(CodeGenFunction &CGF) override {
208  Action.emitUntiedSwitch(CGF);
209  }
210 
211  static bool classof(const CGCapturedStmtInfo *Info) {
212  return CGOpenMPRegionInfo::classof(Info) &&
213  cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
214  TaskOutlinedRegion;
215  }
216 
217 private:
218  /// A variable or parameter storing global thread id for OpenMP
219  /// constructs.
220  const VarDecl *ThreadIDVar;
221  /// Action for emitting code for untied tasks.
222  const UntiedTaskActionTy &Action;
223 };
224 
225 /// API for inlined captured statement code generation in OpenMP
226 /// constructs.
227 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
228 public:
229  CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
230  const RegionCodeGenTy &CodeGen,
231  OpenMPDirectiveKind Kind, bool HasCancel)
232  : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
233  OldCSI(OldCSI),
234  OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
235 
236  // Retrieve the value of the context parameter.
237  llvm::Value *getContextValue() const override {
238  if (OuterRegionInfo)
239  return OuterRegionInfo->getContextValue();
240  llvm_unreachable("No context value for inlined OpenMP region");
241  }
242 
243  void setContextValue(llvm::Value *V) override {
244  if (OuterRegionInfo) {
245  OuterRegionInfo->setContextValue(V);
246  return;
247  }
248  llvm_unreachable("No context value for inlined OpenMP region");
249  }
250 
251  /// Lookup the captured field decl for a variable.
252  const FieldDecl *lookup(const VarDecl *VD) const override {
253  if (OuterRegionInfo)
254  return OuterRegionInfo->lookup(VD);
255  // If there is no outer outlined region,no need to lookup in a list of
256  // captured variables, we can use the original one.
257  return nullptr;
258  }
259 
260  FieldDecl *getThisFieldDecl() const override {
261  if (OuterRegionInfo)
262  return OuterRegionInfo->getThisFieldDecl();
263  return nullptr;
264  }
265 
266  /// Get a variable or parameter for storing global thread id
267  /// inside OpenMP construct.
268  const VarDecl *getThreadIDVariable() const override {
269  if (OuterRegionInfo)
270  return OuterRegionInfo->getThreadIDVariable();
271  return nullptr;
272  }
273 
274  /// Get an LValue for the current ThreadID variable.
275  LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
276  if (OuterRegionInfo)
277  return OuterRegionInfo->getThreadIDVariableLValue(CGF);
278  llvm_unreachable("No LValue for inlined OpenMP construct");
279  }
280 
281  /// Get the name of the capture helper.
282  StringRef getHelperName() const override {
283  if (auto *OuterRegionInfo = getOldCSI())
284  return OuterRegionInfo->getHelperName();
285  llvm_unreachable("No helper name for inlined OpenMP construct");
286  }
287 
288  void emitUntiedSwitch(CodeGenFunction &CGF) override {
289  if (OuterRegionInfo)
290  OuterRegionInfo->emitUntiedSwitch(CGF);
291  }
292 
293  CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
294 
295  static bool classof(const CGCapturedStmtInfo *Info) {
296  return CGOpenMPRegionInfo::classof(Info) &&
297  cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
298  }
299 
300  ~CGOpenMPInlinedRegionInfo() override = default;
301 
302 private:
303  /// CodeGen info about outer OpenMP region.
305  CGOpenMPRegionInfo *OuterRegionInfo;
306 };
307 
308 /// API for captured statement code generation in OpenMP target
309 /// constructs. For this captures, implicit parameters are used instead of the
310 /// captured fields. The name of the target region has to be unique in a given
311 /// application so it is provided by the client, because only the client has
312 /// the information to generate that.
313 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
314 public:
315  CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
316  const RegionCodeGenTy &CodeGen, StringRef HelperName)
317  : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
318  /*HasCancel=*/false),
319  HelperName(HelperName) {}
320 
321  /// This is unused for target regions because each starts executing
322  /// with a single thread.
323  const VarDecl *getThreadIDVariable() const override { return nullptr; }
324 
325  /// Get the name of the capture helper.
326  StringRef getHelperName() const override { return HelperName; }
327 
328  static bool classof(const CGCapturedStmtInfo *Info) {
329  return CGOpenMPRegionInfo::classof(Info) &&
330  cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
331  }
332 
333 private:
334  StringRef HelperName;
335 };
336 
337 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
338  llvm_unreachable("No codegen for expressions");
339 }
340 /// API for generation of expressions captured in a innermost OpenMP
341 /// region.
342 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
343 public:
344  CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
345  : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
346  OMPD_unknown,
347  /*HasCancel=*/false),
348  PrivScope(CGF) {
349  // Make sure the globals captured in the provided statement are local by
350  // using the privatization logic. We assume the same variable is not
351  // captured more than once.
352  for (const auto &C : CS.captures()) {
353  if (!C.capturesVariable() && !C.capturesVariableByCopy())
354  continue;
355 
356  const VarDecl *VD = C.getCapturedVar();
357  if (VD->isLocalVarDeclOrParm())
358  continue;
359 
360  DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
361  /*RefersToEnclosingVariableOrCapture=*/false,
363  C.getLocation());
364  PrivScope.addPrivate(
365  VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
366  }
367  (void)PrivScope.Privatize();
368  }
369 
370  /// Lookup the captured field decl for a variable.
371  const FieldDecl *lookup(const VarDecl *VD) const override {
372  if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
373  return FD;
374  return nullptr;
375  }
376 
377  /// Emit the captured statement body.
378  void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
379  llvm_unreachable("No body for expressions");
380  }
381 
382  /// Get a variable or parameter for storing global thread id
383  /// inside OpenMP construct.
384  const VarDecl *getThreadIDVariable() const override {
385  llvm_unreachable("No thread id for expressions");
386  }
387 
388  /// Get the name of the capture helper.
389  StringRef getHelperName() const override {
390  llvm_unreachable("No helper name for expressions");
391  }
392 
393  static bool classof(const CGCapturedStmtInfo *Info) { return false; }
394 
395 private:
396  /// Private scope to capture global variables.
398 };
399 
400 /// RAII for emitting code of OpenMP constructs.
401 class InlinedOpenMPRegionRAII {
402  CodeGenFunction &CGF;
403  llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
404  FieldDecl *LambdaThisCaptureField = nullptr;
405  const CodeGen::CGBlockInfo *BlockInfo = nullptr;
406 
407 public:
408  /// Constructs region for combined constructs.
409  /// \param CodeGen Code generation sequence for combined directives. Includes
410  /// a list of functions used for code generation of implicitly inlined
411  /// regions.
412  InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
413  OpenMPDirectiveKind Kind, bool HasCancel)
414  : CGF(CGF) {
415  // Start emission for the construct.
416  CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
417  CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
418  std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
419  LambdaThisCaptureField = CGF.LambdaThisCaptureField;
420  CGF.LambdaThisCaptureField = nullptr;
421  BlockInfo = CGF.BlockInfo;
422  CGF.BlockInfo = nullptr;
423  }
424 
425  ~InlinedOpenMPRegionRAII() {
426  // Restore original CapturedStmtInfo only if we're done with code emission.
427  auto *OldCSI =
428  cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
429  delete CGF.CapturedStmtInfo;
430  CGF.CapturedStmtInfo = OldCSI;
431  std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
432  CGF.LambdaThisCaptureField = LambdaThisCaptureField;
433  CGF.BlockInfo = BlockInfo;
434  }
435 };
436 
437 /// Values for bit flags used in the ident_t to describe the fields.
438 /// All enumeric elements are named and described in accordance with the code
439 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
440 enum OpenMPLocationFlags : unsigned {
441  /// Use trampoline for internal microtask.
442  OMP_IDENT_IMD = 0x01,
443  /// Use c-style ident structure.
444  OMP_IDENT_KMPC = 0x02,
445  /// Atomic reduction option for kmpc_reduce.
446  OMP_ATOMIC_REDUCE = 0x10,
447  /// Explicit 'barrier' directive.
448  OMP_IDENT_BARRIER_EXPL = 0x20,
449  /// Implicit barrier in code.
450  OMP_IDENT_BARRIER_IMPL = 0x40,
451  /// Implicit barrier in 'for' directive.
452  OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
453  /// Implicit barrier in 'sections' directive.
454  OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
455  /// Implicit barrier in 'single' directive.
456  OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
457  /// Call of __kmp_for_static_init for static loop.
458  OMP_IDENT_WORK_LOOP = 0x200,
459  /// Call of __kmp_for_static_init for sections.
460  OMP_IDENT_WORK_SECTIONS = 0x400,
461  /// Call of __kmp_for_static_init for distribute.
462  OMP_IDENT_WORK_DISTRIBUTE = 0x800,
463  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
464 };
465 
466 namespace {
468 /// Values for bit flags for marking which requires clauses have been used.
470  /// flag undefined.
471  OMP_REQ_UNDEFINED = 0x000,
472  /// no requires clause present.
473  OMP_REQ_NONE = 0x001,
474  /// reverse_offload clause.
475  OMP_REQ_REVERSE_OFFLOAD = 0x002,
476  /// unified_address clause.
477  OMP_REQ_UNIFIED_ADDRESS = 0x004,
478  /// unified_shared_memory clause.
479  OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
480  /// dynamic_allocators clause.
481  OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
482  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
483 };
484 
486  /// Device ID if the device was not defined, runtime should get it
487  /// from environment variables in the spec.
488  OMP_DEVICEID_UNDEF = -1,
489 };
490 } // anonymous namespace
491 
492 /// Describes ident structure that describes a source location.
493 /// All descriptions are taken from
494 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
495 /// Original structure:
496 /// typedef struct ident {
497 /// kmp_int32 reserved_1; /**< might be used in Fortran;
498 /// see above */
499 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
500 /// KMP_IDENT_KMPC identifies this union
501 /// member */
502 /// kmp_int32 reserved_2; /**< not really used in Fortran any more;
503 /// see above */
504 ///#if USE_ITT_BUILD
505 /// /* but currently used for storing
506 /// region-specific ITT */
507 /// /* contextual information. */
508 ///#endif /* USE_ITT_BUILD */
509 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
510 /// C++ */
511 /// char const *psource; /**< String describing the source location.
512 /// The string is composed of semi-colon separated
513 // fields which describe the source file,
514 /// the function and a pair of line numbers that
515 /// delimit the construct.
516 /// */
517 /// } ident_t;
519  /// might be used in Fortran
521  /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
523  /// Not really used in Fortran any more
525  /// Source[4] in Fortran, do not use for C++
527  /// String describing the source location. The string is composed of
528  /// semi-colon separated fields which describe the source file, the function
529  /// and a pair of line numbers that delimit the construct.
531 };
532 
533 /// Schedule types for 'omp for' loops (these enumerators are taken from
534 /// the enum sched_type in kmp.h).
536  /// Lower bound for default (unordered) versions.
544  /// static with chunk adjustment (e.g., simd)
546  /// Lower bound for 'ordered' versions.
555  /// dist_schedule types
558  /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
559  /// Set if the monotonic schedule modifier was present.
561  /// Set if the nonmonotonic schedule modifier was present.
563 };
564 
566  /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
567  /// kmpc_micro microtask, ...);
569  /// Call to void *__kmpc_threadprivate_cached(ident_t *loc,
570  /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
572  /// Call to void __kmpc_threadprivate_register( ident_t *,
573  /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
575  // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
577  // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
578  // kmp_critical_name *crit);
580  // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
581  // global_tid, kmp_critical_name *crit, uintptr_t hint);
583  // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
584  // kmp_critical_name *crit);
586  // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
587  // global_tid);
589  // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
591  // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
593  // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
594  // global_tid);
596  // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
597  // global_tid);
599  // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
600  // kmp_int32 num_threads);
602  // Call to void __kmpc_flush(ident_t *loc);
604  // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
606  // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
608  // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
609  // int end_part);
611  // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
613  // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
615  // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
616  // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
617  // kmp_routine_entry_t *task_entry);
619  // Call to kmp_task_t * __kmpc_omp_target_task_alloc(ident_t *,
620  // kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t,
621  // size_t sizeof_shareds, kmp_routine_entry_t *task_entry,
622  // kmp_int64 device_id);
624  // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
625  // new_task);
627  // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
628  // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
629  // kmp_int32 didit);
631  // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
632  // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
633  // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
635  // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
636  // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
637  // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
638  // *lck);
640  // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
641  // kmp_critical_name *lck);
643  // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
644  // kmp_critical_name *lck);
646  // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
647  // kmp_task_t * new_task);
649  // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
650  // kmp_task_t * new_task);
652  // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
654  // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
656  // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
657  // global_tid);
659  // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
661  // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
663  // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
664  // int proc_bind);
666  // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
667  // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
668  // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
670  // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
671  // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
672  // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
674  // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
675  // global_tid, kmp_int32 cncl_kind);
677  // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
678  // kmp_int32 cncl_kind);
680  // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
681  // kmp_int32 num_teams, kmp_int32 thread_limit);
683  // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
684  // microtask, ...);
686  // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
687  // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
688  // sched, kmp_uint64 grainsize, void *task_dup);
690  // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
691  // num_dims, struct kmp_dim *dims);
693  // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
695  // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
696  // *vec);
698  // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
699  // *vec);
701  // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void
702  // *data);
704  // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
705  // *d);
707  // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al);
709  // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
711 
712  //
713  // Offloading related calls
714  //
715  // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
716  // size);
718  // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
719  // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
720  // *arg_types);
722  // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
723  // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
724  // *arg_types);
726  // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
727  // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
728  // *arg_types, int32_t num_teams, int32_t thread_limit);
730  // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void
731  // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
732  // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
734  // Call to void __tgt_register_requires(int64_t flags);
736  // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
737  // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
739  // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
740  // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
741  // *arg_types);
743  // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
744  // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
746  // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t
747  // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
748  // *arg_types);
750  // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
751  // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
753  // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t
754  // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
755  // *arg_types);
757  // Call to int64_t __tgt_mapper_num_components(void *rt_mapper_handle);
759  // Call to void __tgt_push_mapper_component(void *rt_mapper_handle, void
760  // *base, void *begin, int64_t size, int64_t type);
762 };
763 
764 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
765 /// region.
766 class CleanupTy final : public EHScopeStack::Cleanup {
767  PrePostActionTy *Action;
768 
769 public:
770  explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
771  void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
772  if (!CGF.HaveInsertPoint())
773  return;
774  Action->Exit(CGF);
775  }
776 };
777 
778 } // anonymous namespace
779 
782  if (PrePostAction) {
783  CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
784  Callback(CodeGen, CGF, *PrePostAction);
785  } else {
786  PrePostActionTy Action;
787  Callback(CodeGen, CGF, Action);
788  }
789 }
790 
791 /// Check if the combiner is a call to UDR combiner and if it is so return the
792 /// UDR decl used for reduction.
793 static const OMPDeclareReductionDecl *
794 getReductionInit(const Expr *ReductionOp) {
795  if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
796  if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
797  if (const auto *DRE =
798  dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
799  if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
800  return DRD;
801  return nullptr;
802 }
803 
805  const OMPDeclareReductionDecl *DRD,
806  const Expr *InitOp,
807  Address Private, Address Original,
808  QualType Ty) {
809  if (DRD->getInitializer()) {
810  std::pair<llvm::Function *, llvm::Function *> Reduction =
811  CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
812  const auto *CE = cast<CallExpr>(InitOp);
813  const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
814  const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
815  const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
816  const auto *LHSDRE =
817  cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
818  const auto *RHSDRE =
819  cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
820  CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
821  PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
822  [=]() { return Private; });
823  PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
824  [=]() { return Original; });
825  (void)PrivateScope.Privatize();
826  RValue Func = RValue::get(Reduction.second);
827  CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
828  CGF.EmitIgnoredExpr(InitOp);
829  } else {
830  llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
831  std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
832  auto *GV = new llvm::GlobalVariable(
833  CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
834  llvm::GlobalValue::PrivateLinkage, Init, Name);
835  LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
836  RValue InitRVal;
837  switch (CGF.getEvaluationKind(Ty)) {
838  case TEK_Scalar:
839  InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
840  break;
841  case TEK_Complex:
842  InitRVal =
844  break;
845  case TEK_Aggregate:
846  InitRVal = RValue::getAggregate(LV.getAddress(CGF));
847  break;
848  }
849  OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
850  CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
851  CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
852  /*IsInitializer=*/false);
853  }
854 }
855 
856 /// Emit initialization of arrays of complex types.
857 /// \param DestAddr Address of the array.
858 /// \param Type Type of array.
859 /// \param Init Initial expression of array.
860 /// \param SrcAddr Address of the original array.
861 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
862  QualType Type, bool EmitDeclareReductionInit,
863  const Expr *Init,
864  const OMPDeclareReductionDecl *DRD,
865  Address SrcAddr = Address::invalid()) {
866  // Perform element-by-element initialization.
867  QualType ElementTy;
868 
869  // Drill down to the base element type on both arrays.
870  const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
871  llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
872  DestAddr =
873  CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
874  if (DRD)
875  SrcAddr =
876  CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
877 
878  llvm::Value *SrcBegin = nullptr;
879  if (DRD)
880  SrcBegin = SrcAddr.getPointer();
881  llvm::Value *DestBegin = DestAddr.getPointer();
882  // Cast from pointer to array type to pointer to single element.
883  llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
884  // The basic structure here is a while-do loop.
885  llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
886  llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
887  llvm::Value *IsEmpty =
888  CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
889  CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
890 
891  // Enter the loop body, making that address the current address.
892  llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
893  CGF.EmitBlock(BodyBB);
894 
895  CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
896 
897  llvm::PHINode *SrcElementPHI = nullptr;
898  Address SrcElementCurrent = Address::invalid();
899  if (DRD) {
900  SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
901  "omp.arraycpy.srcElementPast");
902  SrcElementPHI->addIncoming(SrcBegin, EntryBB);
903  SrcElementCurrent =
904  Address(SrcElementPHI,
905  SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
906  }
907  llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
908  DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
909  DestElementPHI->addIncoming(DestBegin, EntryBB);
910  Address DestElementCurrent =
911  Address(DestElementPHI,
912  DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
913 
914  // Emit copy.
915  {
916  CodeGenFunction::RunCleanupsScope InitScope(CGF);
917  if (EmitDeclareReductionInit) {
918  emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
919  SrcElementCurrent, ElementTy);
920  } else
921  CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
922  /*IsInitializer=*/false);
923  }
924 
925  if (DRD) {
926  // Shift the address forward by one element.
927  llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
928  SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
929  SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
930  }
931 
932  // Shift the address forward by one element.
933  llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
934  DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
935  // Check whether we've reached the end.
936  llvm::Value *Done =
937  CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
938  CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
939  DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
940 
941  // Done.
942  CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
943 }
944 
945 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
946  return CGF.EmitOMPSharedLValue(E);
947 }
948 
949 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
950  const Expr *E) {
951  if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
952  return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
953  return LValue();
954 }
955 
956 void ReductionCodeGen::emitAggregateInitialization(
957  CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
958  const OMPDeclareReductionDecl *DRD) {
959  // Emit VarDecl with copy init for arrays.
960  // Get the address of the original variable captured in current
961  // captured region.
962  const auto *PrivateVD =
963  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
964  bool EmitDeclareReductionInit =
965  DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
966  EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
967  EmitDeclareReductionInit,
968  EmitDeclareReductionInit ? ClausesData[N].ReductionOp
969  : PrivateVD->getInit(),
970  DRD, SharedLVal.getAddress(CGF));
971 }
972 
975  ArrayRef<const Expr *> ReductionOps) {
976  ClausesData.reserve(Shareds.size());
977  SharedAddresses.reserve(Shareds.size());
978  Sizes.reserve(Shareds.size());
979  BaseDecls.reserve(Shareds.size());
980  auto IPriv = Privates.begin();
981  auto IRed = ReductionOps.begin();
982  for (const Expr *Ref : Shareds) {
983  ClausesData.emplace_back(Ref, *IPriv, *IRed);
984  std::advance(IPriv, 1);
985  std::advance(IRed, 1);
986  }
987 }
988 
989 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {
990  assert(SharedAddresses.size() == N &&
991  "Number of generated lvalues must be exactly N.");
992  LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
993  LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
994  SharedAddresses.emplace_back(First, Second);
995 }
996 
998  const auto *PrivateVD =
999  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1000  QualType PrivateType = PrivateVD->getType();
1001  bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
1002  if (!PrivateType->isVariablyModifiedType()) {
1003  Sizes.emplace_back(
1004  CGF.getTypeSize(
1005  SharedAddresses[N].first.getType().getNonReferenceType()),
1006  nullptr);
1007  return;
1008  }
1009  llvm::Value *Size;
1010  llvm::Value *SizeInChars;
1011  auto *ElemType = cast<llvm::PointerType>(
1012  SharedAddresses[N].first.getPointer(CGF)->getType())
1013  ->getElementType();
1014  auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
1015  if (AsArraySection) {
1016  Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(CGF),
1017  SharedAddresses[N].first.getPointer(CGF));
1018  Size = CGF.Builder.CreateNUWAdd(
1019  Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
1020  SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
1021  } else {
1022  SizeInChars = CGF.getTypeSize(
1023  SharedAddresses[N].first.getType().getNonReferenceType());
1024  Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
1025  }
1026  Sizes.emplace_back(SizeInChars, Size);
1028  CGF,
1029  cast<OpaqueValueExpr>(
1030  CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1031  RValue::get(Size));
1032  CGF.EmitVariablyModifiedType(PrivateType);
1033 }
1034 
1036  llvm::Value *Size) {
1037  const auto *PrivateVD =
1038  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1039  QualType PrivateType = PrivateVD->getType();
1040  if (!PrivateType->isVariablyModifiedType()) {
1041  assert(!Size && !Sizes[N].second &&
1042  "Size should be nullptr for non-variably modified reduction "
1043  "items.");
1044  return;
1045  }
1047  CGF,
1048  cast<OpaqueValueExpr>(
1049  CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1050  RValue::get(Size));
1051  CGF.EmitVariablyModifiedType(PrivateType);
1052 }
1053 
1055  CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
1056  llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
1057  assert(SharedAddresses.size() > N && "No variable was generated");
1058  const auto *PrivateVD =
1059  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1060  const OMPDeclareReductionDecl *DRD =
1061  getReductionInit(ClausesData[N].ReductionOp);
1062  QualType PrivateType = PrivateVD->getType();
1063  PrivateAddr = CGF.Builder.CreateElementBitCast(
1064  PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1065  QualType SharedType = SharedAddresses[N].first.getType();
1066  SharedLVal = CGF.MakeAddrLValue(
1067  CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
1068  CGF.ConvertTypeForMem(SharedType)),
1069  SharedType, SharedAddresses[N].first.getBaseInfo(),
1070  CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
1071  if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
1072  emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
1073  } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
1074  emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
1075  PrivateAddr, SharedLVal.getAddress(CGF),
1076  SharedLVal.getType());
1077  } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
1078  !CGF.isTrivialInitializer(PrivateVD->getInit())) {
1079  CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
1080  PrivateVD->getType().getQualifiers(),
1081  /*IsInitializer=*/false);
1082  }
1083 }
1084 
1086  const auto *PrivateVD =
1087  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1088  QualType PrivateType = PrivateVD->getType();
1089  QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1090  return DTorKind != QualType::DK_none;
1091 }
1092 
1094  Address PrivateAddr) {
1095  const auto *PrivateVD =
1096  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1097  QualType PrivateType = PrivateVD->getType();
1098  QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1099  if (needCleanups(N)) {
1100  PrivateAddr = CGF.Builder.CreateElementBitCast(
1101  PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1102  CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
1103  }
1104 }
1105 
1107  LValue BaseLV) {
1108  BaseTy = BaseTy.getNonReferenceType();
1109  while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1110  !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1111  if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
1112  BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
1113  } else {
1114  LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
1115  BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
1116  }
1117  BaseTy = BaseTy->getPointeeType();
1118  }
1119  return CGF.MakeAddrLValue(
1120  CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
1121  CGF.ConvertTypeForMem(ElTy)),
1122  BaseLV.getType(), BaseLV.getBaseInfo(),
1123  CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
1124 }
1125 
1127  llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
1128  llvm::Value *Addr) {
1129  Address Tmp = Address::invalid();
1130  Address TopTmp = Address::invalid();
1131  Address MostTopTmp = Address::invalid();
1132  BaseTy = BaseTy.getNonReferenceType();
1133  while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1134  !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1135  Tmp = CGF.CreateMemTemp(BaseTy);
1136  if (TopTmp.isValid())
1137  CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
1138  else
1139  MostTopTmp = Tmp;
1140  TopTmp = Tmp;
1141  BaseTy = BaseTy->getPointeeType();
1142  }
1143  llvm::Type *Ty = BaseLVType;
1144  if (Tmp.isValid())
1145  Ty = Tmp.getElementType();
1146  Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
1147  if (Tmp.isValid()) {
1148  CGF.Builder.CreateStore(Addr, Tmp);
1149  return MostTopTmp;
1150  }
1151  return Address(Addr, BaseLVAlignment);
1152 }
1153 
1154 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
1155  const VarDecl *OrigVD = nullptr;
1156  if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
1157  const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
1158  while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
1159  Base = TempOASE->getBase()->IgnoreParenImpCasts();
1160  while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1161  Base = TempASE->getBase()->IgnoreParenImpCasts();
1162  DE = cast<DeclRefExpr>(Base);
1163  OrigVD = cast<VarDecl>(DE->getDecl());
1164  } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
1165  const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
1166  while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1167  Base = TempASE->getBase()->IgnoreParenImpCasts();
1168  DE = cast<DeclRefExpr>(Base);
1169  OrigVD = cast<VarDecl>(DE->getDecl());
1170  }
1171  return OrigVD;
1172 }
1173 
1175  Address PrivateAddr) {
1176  const DeclRefExpr *DE;
1177  if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1178  BaseDecls.emplace_back(OrigVD);
1179  LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1180  LValue BaseLValue =
1181  loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1182  OriginalBaseLValue);
1183  llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1184  BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF));
1185  llvm::Value *PrivatePointer =
1187  PrivateAddr.getPointer(),
1188  SharedAddresses[N].first.getAddress(CGF).getType());
1189  llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1190  return castToBase(CGF, OrigVD->getType(),
1191  SharedAddresses[N].first.getType(),
1192  OriginalBaseLValue.getAddress(CGF).getType(),
1193  OriginalBaseLValue.getAlignment(), Ptr);
1194  }
1195  BaseDecls.emplace_back(
1196  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1197  return PrivateAddr;
1198 }
1199 
1201  const OMPDeclareReductionDecl *DRD =
1202  getReductionInit(ClausesData[N].ReductionOp);
1203  return DRD && DRD->getInitializer();
1204 }
1205 
1206 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1207  return CGF.EmitLoadOfPointerLValue(
1208  CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1209  getThreadIDVariable()->getType()->castAs<PointerType>());
1210 }
1211 
1212 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1213  if (!CGF.HaveInsertPoint())
1214  return;
1215  // 1.2.2 OpenMP Language Terminology
1216  // Structured block - An executable statement with a single entry at the
1217  // top and a single exit at the bottom.
1218  // The point of exit cannot be a branch out of the structured block.
1219  // longjmp() and throw() must not violate the entry/exit criteria.
1220  CGF.EHStack.pushTerminate();
1221  CodeGen(CGF);
1222  CGF.EHStack.popTerminate();
1223 }
1224 
1225 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1226  CodeGenFunction &CGF) {
1227  return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1228  getThreadIDVariable()->getType(),
1230 }
1231 
1233  QualType FieldTy) {
1234  auto *Field = FieldDecl::Create(
1235  C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1237  /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1238  Field->setAccess(AS_public);
1239  DC->addDecl(Field);
1240  return Field;
1241 }
1242 
1243 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1244  StringRef Separator)
1245  : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1247  ASTContext &C = CGM.getContext();
1248  RecordDecl *RD = C.buildImplicitRecord("ident_t");
1249  QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1250  RD->startDefinition();
1251  // reserved_1
1252  addFieldToRecordDecl(C, RD, KmpInt32Ty);
1253  // flags
1254  addFieldToRecordDecl(C, RD, KmpInt32Ty);
1255  // reserved_2
1256  addFieldToRecordDecl(C, RD, KmpInt32Ty);
1257  // reserved_3
1258  addFieldToRecordDecl(C, RD, KmpInt32Ty);
1259  // psource
1260  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1261  RD->completeDefinition();
1262  IdentQTy = C.getRecordType(RD);
1263  IdentTy = CGM.getTypes().ConvertRecordDeclType(RD);
1264  KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1265 
1267 }
1268 
1270  const GlobalDecl &OldGD,
1271  llvm::GlobalValue *OrigAddr,
1272  bool IsForDefinition) {
1273  // Emit at least a definition for the aliasee if the the address of the
1274  // original function is requested.
1275  if (IsForDefinition || OrigAddr)
1276  (void)CGM.GetAddrOfGlobal(NewGD);
1277  StringRef NewMangledName = CGM.getMangledName(NewGD);
1278  llvm::GlobalValue *Addr = CGM.GetGlobalValue(NewMangledName);
1279  if (Addr && !Addr->isDeclaration()) {
1280  const auto *D = cast<FunctionDecl>(OldGD.getDecl());
1281  const CGFunctionInfo &FI = CGM.getTypes().arrangeGlobalDeclaration(NewGD);
1283 
1284  // Create a reference to the named value. This ensures that it is emitted
1285  // if a deferred decl.
1286  llvm::GlobalValue::LinkageTypes LT = CGM.getFunctionLinkage(OldGD);
1287 
1288  // Create the new alias itself, but don't set a name yet.
1289  auto *GA =
1290  llvm::GlobalAlias::create(DeclTy, 0, LT, "", Addr, &CGM.getModule());
1291 
1292  if (OrigAddr) {
1293  assert(OrigAddr->isDeclaration() && "Expected declaration");
1294 
1295  GA->takeName(OrigAddr);
1296  OrigAddr->replaceAllUsesWith(
1297  llvm::ConstantExpr::getBitCast(GA, OrigAddr->getType()));
1298  OrigAddr->eraseFromParent();
1299  } else {
1300  GA->setName(CGM.getMangledName(OldGD));
1301  }
1302 
1303  // Set attributes which are particular to an alias; this is a
1304  // specialization of the attributes which may be set on a global function.
1305  if (D->hasAttr<WeakAttr>() || D->hasAttr<WeakRefAttr>() ||
1306  D->isWeakImported())
1307  GA->setLinkage(llvm::Function::WeakAnyLinkage);
1308 
1309  CGM.SetCommonAttributes(OldGD, GA);
1310  return true;
1311  }
1312  return false;
1313 }
1314 
1315 void CGOpenMPRuntime::clear() {
1316  InternalVars.clear();
1317  // Clean non-target variable declarations possibly used only in debug info.
1318  for (const auto &Data : EmittedNonTargetVariables) {
1319  if (!Data.getValue().pointsToAliveValue())
1320  continue;
1321  auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1322  if (!GV)
1323  continue;
1324  if (!GV->isDeclaration() || GV->getNumUses() > 0)
1325  continue;
1326  GV->eraseFromParent();
1327  }
1328  // Emit aliases for the deferred aliasees.
1329  for (const auto &Pair : DeferredVariantFunction) {
1330  StringRef MangledName = CGM.getMangledName(Pair.second.second);
1331  llvm::GlobalValue *Addr = CGM.GetGlobalValue(MangledName);
1332  // If not able to emit alias, just emit original declaration.
1333  (void)tryEmitDeclareVariant(Pair.second.first, Pair.second.second, Addr,
1334  /*IsForDefinition=*/false);
1335  }
1336 }
1337 
1338 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1339  SmallString<128> Buffer;
1340  llvm::raw_svector_ostream OS(Buffer);
1341  StringRef Sep = FirstSeparator;
1342  for (StringRef Part : Parts) {
1343  OS << Sep << Part;
1344  Sep = Separator;
1345  }
1346  return OS.str();
1347 }
1348 
1349 static llvm::Function *
1351  const Expr *CombinerInitializer, const VarDecl *In,
1352  const VarDecl *Out, bool IsCombiner) {
1353  // void .omp_combiner.(Ty *in, Ty *out);
1354  ASTContext &C = CGM.getContext();
1355  QualType PtrTy = C.getPointerType(Ty).withRestrict();
1356  FunctionArgList Args;
1357  ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1358  /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1359  ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1360  /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1361  Args.push_back(&OmpOutParm);
1362  Args.push_back(&OmpInParm);
1363  const CGFunctionInfo &FnInfo =
1365  llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1366  std::string Name = CGM.getOpenMPRuntime().getName(
1367  {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1369  Name, &CGM.getModule());
1370  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1371  if (CGM.getLangOpts().Optimize) {
1372  Fn->removeFnAttr(llvm::Attribute::NoInline);
1373  Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1374  Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1375  }
1376  CodeGenFunction CGF(CGM);
1377  // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1378  // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1379  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1380  Out->getLocation());
1382  Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1383  Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1384  return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1385  .getAddress(CGF);
1386  });
1387  Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1388  Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1389  return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1390  .getAddress(CGF);
1391  });
1392  (void)Scope.Privatize();
1393  if (!IsCombiner && Out->hasInit() &&
1394  !CGF.isTrivialInitializer(Out->getInit())) {
1395  CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1396  Out->getType().getQualifiers(),
1397  /*IsInitializer=*/true);
1398  }
1399  if (CombinerInitializer)
1400  CGF.EmitIgnoredExpr(CombinerInitializer);
1401  Scope.ForceCleanup();
1402  CGF.FinishFunction();
1403  return Fn;
1404 }
1405 
1407  CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1408  if (UDRMap.count(D) > 0)
1409  return;
1410  llvm::Function *Combiner = emitCombinerOrInitializer(
1411  CGM, D->getType(), D->getCombiner(),
1412  cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1413  cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1414  /*IsCombiner=*/true);
1415  llvm::Function *Initializer = nullptr;
1416  if (const Expr *Init = D->getInitializer()) {
1417  Initializer = emitCombinerOrInitializer(
1418  CGM, D->getType(),
1420  : nullptr,
1421  cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1422  cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1423  /*IsCombiner=*/false);
1424  }
1425  UDRMap.try_emplace(D, Combiner, Initializer);
1426  if (CGF) {
1427  auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1428  Decls.second.push_back(D);
1429  }
1430 }
1431 
1432 std::pair<llvm::Function *, llvm::Function *>
1434  auto I = UDRMap.find(D);
1435  if (I != UDRMap.end())
1436  return I->second;
1437  emitUserDefinedReduction(/*CGF=*/nullptr, D);
1438  return UDRMap.lookup(D);
1439 }
1440 
1441 namespace {
1442 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1443 // Builder if one is present.
1444 struct PushAndPopStackRAII {
1445  PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1446  bool HasCancel)
1447  : OMPBuilder(OMPBuilder) {
1448  if (!OMPBuilder)
1449  return;
1450 
1451  // The following callback is the crucial part of clangs cleanup process.
1452  //
1453  // NOTE:
1454  // Once the OpenMPIRBuilder is used to create parallel regions (and
1455  // similar), the cancellation destination (Dest below) is determined via
1456  // IP. That means if we have variables to finalize we split the block at IP,
1457  // use the new block (=BB) as destination to build a JumpDest (via
1458  // getJumpDestInCurrentScope(BB)) which then is fed to
1459  // EmitBranchThroughCleanup. Furthermore, there will not be the need
1460  // to push & pop an FinalizationInfo object.
1461  // The FiniCB will still be needed but at the point where the
1462  // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1463  auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1464  assert(IP.getBlock()->end() == IP.getPoint() &&
1465  "Clang CG should cause non-terminated block!");
1466  CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1467  CGF.Builder.restoreIP(IP);
1469  CGF.getOMPCancelDestination(OMPD_parallel);
1470  CGF.EmitBranchThroughCleanup(Dest);
1471  };
1472 
1473  // TODO: Remove this once we emit parallel regions through the
1474  // OpenMPIRBuilder as it can do this setup internally.
1475  llvm::OpenMPIRBuilder::FinalizationInfo FI(
1476  {FiniCB, OMPD_parallel, HasCancel});
1477  OMPBuilder->pushFinalizationCB(std::move(FI));
1478  }
1479  ~PushAndPopStackRAII() {
1480  if (OMPBuilder)
1481  OMPBuilder->popFinalizationCB();
1482  }
1483  llvm::OpenMPIRBuilder *OMPBuilder;
1484 };
1485 } // namespace
1486 
1488  CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1489  const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1490  const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1491  assert(ThreadIDVar->getType()->isPointerType() &&
1492  "thread id variable must be of type kmp_int32 *");
1493  CodeGenFunction CGF(CGM, true);
1494  bool HasCancel = false;
1495  if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1496  HasCancel = OPD->hasCancel();
1497  else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1498  HasCancel = OPSD->hasCancel();
1499  else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1500  HasCancel = OPFD->hasCancel();
1501  else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1502  HasCancel = OPFD->hasCancel();
1503  else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1504  HasCancel = OPFD->hasCancel();
1505  else if (const auto *OPFD =
1506  dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1507  HasCancel = OPFD->hasCancel();
1508  else if (const auto *OPFD =
1509  dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1510  HasCancel = OPFD->hasCancel();
1511 
1512  // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1513  // parallel region to make cancellation barriers work properly.
1514  llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder();
1515  PushAndPopStackRAII PSR(OMPBuilder, CGF, HasCancel);
1516  CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1517  HasCancel, OutlinedHelperName);
1518  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1519  return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
1520 }
1521 
1523  const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1524  OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1525  const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1527  CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1528 }
1529 
1531  const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1532  OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1533  const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1535  CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1536 }
1537 
1539  const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1540  const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1541  OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1542  bool Tied, unsigned &NumberOfParts) {
1543  auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1544  PrePostActionTy &) {
1545  llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1546  llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1547  llvm::Value *TaskArgs[] = {
1548  UpLoc, ThreadID,
1549  CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1550  TaskTVar->getType()->castAs<PointerType>())
1551  .getPointer(CGF)};
1552  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
1553  };
1554  CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1555  UntiedCodeGen);
1556  CodeGen.setAction(Action);
1557  assert(!ThreadIDVar->getType()->isPointerType() &&
1558  "thread id variable must be of type kmp_int32 for tasks");
1559  const OpenMPDirectiveKind Region =
1560  isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1561  : OMPD_task;
1562  const CapturedStmt *CS = D.getCapturedStmt(Region);
1563  const auto *TD = dyn_cast<OMPTaskDirective>(&D);
1564  CodeGenFunction CGF(CGM, true);
1565  CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1566  InnermostKind,
1567  TD ? TD->hasCancel() : false, Action);
1568  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1569  llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1570  if (!Tied)
1571  NumberOfParts = Action.getNumberOfParts();
1572  return Res;
1573 }
1574 
1576  const RecordDecl *RD, const CGRecordLayout &RL,
1577  ArrayRef<llvm::Constant *> Data) {
1578  llvm::StructType *StructTy = RL.getLLVMType();
1579  unsigned PrevIdx = 0;
1580  ConstantInitBuilder CIBuilder(CGM);
1581  auto DI = Data.begin();
1582  for (const FieldDecl *FD : RD->fields()) {
1583  unsigned Idx = RL.getLLVMFieldNo(FD);
1584  // Fill the alignment.
1585  for (unsigned I = PrevIdx; I < Idx; ++I)
1586  Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1587  PrevIdx = Idx + 1;
1588  Fields.add(*DI);
1589  ++DI;
1590  }
1591 }
1592 
1593 template <class... As>
1594 static llvm::GlobalVariable *
1596  ArrayRef<llvm::Constant *> Data, const Twine &Name,
1597  As &&... Args) {
1598  const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1599  const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1600  ConstantInitBuilder CIBuilder(CGM);
1601  ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1602  buildStructValue(Fields, CGM, RD, RL, Data);
1603  return Fields.finishAndCreateGlobal(
1604  Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1605  std::forward<As>(Args)...);
1606 }
1607 
1608 template <typename T>
1609 static void
1611  ArrayRef<llvm::Constant *> Data,
1612  T &Parent) {
1613  const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1614  const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1615  ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1616  buildStructValue(Fields, CGM, RD, RL, Data);
1617  Fields.finishAndAddTo(Parent);
1618 }
1619 
1620 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
1621  CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1622  unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1623  FlagsTy FlagsKey(Flags, Reserved2Flags);
1624  llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey);
1625  if (!Entry) {
1626  if (!DefaultOpenMPPSource) {
1627  // Initialize default location for psource field of ident_t structure of
1628  // all ident_t objects. Format is ";file;function;line;column;;".
1629  // Taken from
1630  // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp
1631  DefaultOpenMPPSource =
1632  CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
1633  DefaultOpenMPPSource =
1634  llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
1635  }
1636 
1637  llvm::Constant *Data[] = {
1638  llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1639  llvm::ConstantInt::get(CGM.Int32Ty, Flags),
1640  llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags),
1641  llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource};
1642  llvm::GlobalValue *DefaultOpenMPLocation =
1643  createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "",
1644  llvm::GlobalValue::PrivateLinkage);
1645  DefaultOpenMPLocation->setUnnamedAddr(
1646  llvm::GlobalValue::UnnamedAddr::Global);
1647 
1648  OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation;
1649  }
1650  return Address(Entry, Align);
1651 }
1652 
1654  bool AtCurrentPoint) {
1655  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1656  assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1657 
1658  llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1659  if (AtCurrentPoint) {
1660  Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1661  Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1662  } else {
1663  Elem.second.ServiceInsertPt =
1664  new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1665  Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1666  }
1667 }
1668 
1670  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1671  if (Elem.second.ServiceInsertPt) {
1672  llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1673  Elem.second.ServiceInsertPt = nullptr;
1674  Ptr->eraseFromParent();
1675  }
1676 }
1677 
1679  SourceLocation Loc,
1680  unsigned Flags) {
1681  Flags |= OMP_IDENT_KMPC;
1682  // If no debug info is generated - return global default location.
1683  if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1684  Loc.isInvalid())
1685  return getOrCreateDefaultLocation(Flags).getPointer();
1686 
1687  assert(CGF.CurFn && "No function in current CodeGenFunction.");
1688 
1689  CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1690  Address LocValue = Address::invalid();
1691  auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1692  if (I != OpenMPLocThreadIDMap.end())
1693  LocValue = Address(I->second.DebugLoc, Align);
1694 
1695  // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
1696  // GetOpenMPThreadID was called before this routine.
1697  if (!LocValue.isValid()) {
1698  // Generate "ident_t .kmpc_loc.addr;"
1699  Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr");
1700  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1701  Elem.second.DebugLoc = AI.getPointer();
1702  LocValue = AI;
1703 
1704  if (!Elem.second.ServiceInsertPt)
1706  CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1707  CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1708  CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
1709  CGF.getTypeSize(IdentQTy));
1710  }
1711 
1712  // char **psource = &.kmpc_loc_<flags>.addr.psource;
1713  LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy);
1714  auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin();
1715  LValue PSource =
1716  CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource));
1717 
1718  llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
1719  if (OMPDebugLoc == nullptr) {
1720  SmallString<128> Buffer2;
1721  llvm::raw_svector_ostream OS2(Buffer2);
1722  // Build debug location
1724  OS2 << ";" << PLoc.getFilename() << ";";
1725  if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1726  OS2 << FD->getQualifiedNameAsString();
1727  OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1728  OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
1729  OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
1730  }
1731  // *psource = ";<File>;<Function>;<Line>;<Column>;;";
1732  CGF.EmitStoreOfScalar(OMPDebugLoc, PSource);
1733 
1734  // Our callers always pass this to a runtime function, so for
1735  // convenience, go ahead and return a naked pointer.
1736  return LocValue.getPointer();
1737 }
1738 
1740  SourceLocation Loc) {
1741  assert(CGF.CurFn && "No function in current CodeGenFunction.");
1742 
1743  llvm::Value *ThreadID = nullptr;
1744  // Check whether we've already cached a load of the thread id in this
1745  // function.
1746  auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1747  if (I != OpenMPLocThreadIDMap.end()) {
1748  ThreadID = I->second.ThreadID;
1749  if (ThreadID != nullptr)
1750  return ThreadID;
1751  }
1752  // If exceptions are enabled, do not use parameter to avoid possible crash.
1753  if (auto *OMPRegionInfo =
1754  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1755  if (OMPRegionInfo->getThreadIDVariable()) {
1756  // Check if this an outlined function with thread id passed as argument.
1757  LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1758  llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1759  if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1760  !CGF.getLangOpts().CXXExceptions ||
1761  CGF.Builder.GetInsertBlock() == TopBlock ||
1762  !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1763  cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1764  TopBlock ||
1765  cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1766  CGF.Builder.GetInsertBlock()) {
1767  ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1768  // If value loaded in entry block, cache it and use it everywhere in
1769  // function.
1770  if (CGF.Builder.GetInsertBlock() == TopBlock) {
1771  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1772  Elem.second.ThreadID = ThreadID;
1773  }
1774  return ThreadID;
1775  }
1776  }
1777  }
1778 
1779  // This is not an outlined function region - need to call __kmpc_int32
1780  // kmpc_global_thread_num(ident_t *loc).
1781  // Generate thread id value and cache this value for use across the
1782  // function.
1783  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1784  if (!Elem.second.ServiceInsertPt)
1786  CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1787  CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1788  llvm::CallInst *Call = CGF.Builder.CreateCall(
1790  emitUpdateLocation(CGF, Loc));
1791  Call->setCallingConv(CGF.getRuntimeCC());
1792  Elem.second.ThreadID = Call;
1793  return Call;
1794 }
1795 
1797  assert(CGF.CurFn && "No function in current CodeGenFunction.");
1798  if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1800  OpenMPLocThreadIDMap.erase(CGF.CurFn);
1801  }
1802  if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1803  for(auto *D : FunctionUDRMap[CGF.CurFn])
1804  UDRMap.erase(D);
1805  FunctionUDRMap.erase(CGF.CurFn);
1806  }
1807  auto I = FunctionUDMMap.find(CGF.CurFn);
1808  if (I != FunctionUDMMap.end()) {
1809  for(auto *D : I->second)
1810  UDMMap.erase(D);
1811  FunctionUDMMap.erase(I);
1812  }
1813 }
1814 
1816  return IdentTy->getPointerTo();
1817 }
1818 
1820  if (!Kmpc_MicroTy) {
1821  // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1822  llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1823  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1824  Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1825  }
1826  return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1827 }
1828 
1829 llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
1830  llvm::FunctionCallee RTLFn = nullptr;
1831  switch (static_cast<OpenMPRTLFunction>(Function)) {
1832  case OMPRTL__kmpc_fork_call: {
1833  // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
1834  // microtask, ...);
1835  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1837  auto *FnTy =
1838  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1839  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
1840  if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
1841  if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
1842  llvm::LLVMContext &Ctx = F->getContext();
1843  llvm::MDBuilder MDB(Ctx);
1844  // Annotate the callback behavior of the __kmpc_fork_call:
1845  // - The callback callee is argument number 2 (microtask).
1846  // - The first two arguments of the callback callee are unknown (-1).
1847  // - All variadic arguments to the __kmpc_fork_call are passed to the
1848  // callback callee.
1849  F->addMetadata(
1850  llvm::LLVMContext::MD_callback,
1851  *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
1852  2, {-1, -1},
1853  /* VarArgsArePassed */ true)}));
1854  }
1855  }
1856  break;
1857  }
1859  // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
1860  llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1861  auto *FnTy =
1862  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1863  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
1864  break;
1865  }
1867  // Build void *__kmpc_threadprivate_cached(ident_t *loc,
1868  // kmp_int32 global_tid, void *data, size_t size, void ***cache);
1869  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1871  CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
1872  auto *FnTy =
1873  llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
1874  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
1875  break;
1876  }
1877  case OMPRTL__kmpc_critical: {
1878  // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1879  // kmp_critical_name *crit);
1880  llvm::Type *TypeParams[] = {
1882  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1883  auto *FnTy =
1884  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1885  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
1886  break;
1887  }
1889  // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1890  // kmp_critical_name *crit, uintptr_t hint);
1891  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1892  llvm::PointerType::getUnqual(KmpCriticalNameTy),
1893  CGM.IntPtrTy};
1894  auto *FnTy =
1895  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1896  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
1897  break;
1898  }
1900  // Build void __kmpc_threadprivate_register(ident_t *, void *data,
1901  // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
1902  // typedef void *(*kmpc_ctor)(void *);
1903  auto *KmpcCtorTy =
1904  llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1905  /*isVarArg*/ false)->getPointerTo();
1906  // typedef void *(*kmpc_cctor)(void *, void *);
1907  llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1908  auto *KmpcCopyCtorTy =
1909  llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
1910  /*isVarArg*/ false)
1911  ->getPointerTo();
1912  // typedef void (*kmpc_dtor)(void *);
1913  auto *KmpcDtorTy =
1914  llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
1915  ->getPointerTo();
1916  llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
1917  KmpcCopyCtorTy, KmpcDtorTy};
1918  auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
1919  /*isVarArg*/ false);
1920  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
1921  break;
1922  }
1924  // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1925  // kmp_critical_name *crit);
1926  llvm::Type *TypeParams[] = {
1928  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1929  auto *FnTy =
1930  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1931  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
1932  break;
1933  }
1935  // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
1936  // global_tid);
1937  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1938  auto *FnTy =
1939  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1940  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
1941  break;
1942  }
1943  case OMPRTL__kmpc_barrier: {
1944  // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
1945  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1946  auto *FnTy =
1947  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1948  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
1949  break;
1950  }
1952  // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
1953  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1954  auto *FnTy =
1955  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1956  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
1957  break;
1958  }
1960  // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
1961  // kmp_int32 num_threads)
1962  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1963  CGM.Int32Ty};
1964  auto *FnTy =
1965  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1966  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
1967  break;
1968  }
1970  // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
1971  // global_tid);
1972  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1973  auto *FnTy =
1974  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1975  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
1976  break;
1977  }
1979  // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
1980  // global_tid);
1981  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1982  auto *FnTy =
1983  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1984  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
1985  break;
1986  }
1987  case OMPRTL__kmpc_flush: {
1988  // Build void __kmpc_flush(ident_t *loc);
1989  llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1990  auto *FnTy =
1991  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1992  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
1993  break;
1994  }
1995  case OMPRTL__kmpc_master: {
1996  // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
1997  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1998  auto *FnTy =
1999  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2000  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
2001  break;
2002  }
2003  case OMPRTL__kmpc_end_master: {
2004  // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
2005  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2006  auto *FnTy =
2007  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2008  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
2009  break;
2010  }
2012  // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
2013  // int end_part);
2014  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2015  auto *FnTy =
2016  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2017  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
2018  break;
2019  }
2020  case OMPRTL__kmpc_single: {
2021  // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
2022  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2023  auto *FnTy =
2024  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2025  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
2026  break;
2027  }
2028  case OMPRTL__kmpc_end_single: {
2029  // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
2030  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2031  auto *FnTy =
2032  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2033  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
2034  break;
2035  }
2037  // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
2038  // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
2039  // kmp_routine_entry_t *task_entry);
2040  assert(KmpRoutineEntryPtrTy != nullptr &&
2041  "Type kmp_routine_entry_t must be created.");
2042  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2043  CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
2044  // Return void * and then cast to particular kmp_task_t type.
2045  auto *FnTy =
2046  llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2047  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
2048  break;
2049  }
2051  // Build kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *, kmp_int32 gtid,
2052  // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
2053  // kmp_routine_entry_t *task_entry, kmp_int64 device_id);
2054  assert(KmpRoutineEntryPtrTy != nullptr &&
2055  "Type kmp_routine_entry_t must be created.");
2056  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2057  CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy,
2058  CGM.Int64Ty};
2059  // Return void * and then cast to particular kmp_task_t type.
2060  auto *FnTy =
2061  llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2062  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_target_task_alloc");
2063  break;
2064  }
2065  case OMPRTL__kmpc_omp_task: {
2066  // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2067  // *new_task);
2068  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2069  CGM.VoidPtrTy};
2070  auto *FnTy =
2071  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2072  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
2073  break;
2074  }
2075  case OMPRTL__kmpc_copyprivate: {
2076  // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
2077  // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
2078  // kmp_int32 didit);
2079  llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2080  auto *CpyFnTy =
2081  llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
2082  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
2083  CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
2084  CGM.Int32Ty};
2085  auto *FnTy =
2086  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2087  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
2088  break;
2089  }
2090  case OMPRTL__kmpc_reduce: {
2091  // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
2092  // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
2093  // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
2094  llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2095  auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
2096  /*isVarArg=*/false);
2097  llvm::Type *TypeParams[] = {
2099  CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
2100  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2101  auto *FnTy =
2102  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2103  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
2104  break;
2105  }
2107  // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
2108  // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
2109  // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
2110  // *lck);
2111  llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2112  auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
2113  /*isVarArg=*/false);
2114  llvm::Type *TypeParams[] = {
2116  CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
2117  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2118  auto *FnTy =
2119  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2120  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
2121  break;
2122  }
2123  case OMPRTL__kmpc_end_reduce: {
2124  // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
2125  // kmp_critical_name *lck);
2126  llvm::Type *TypeParams[] = {
2128  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2129  auto *FnTy =
2130  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2131  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
2132  break;
2133  }
2135  // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
2136  // kmp_critical_name *lck);
2137  llvm::Type *TypeParams[] = {
2139  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2140  auto *FnTy =
2141  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2142  RTLFn =
2143  CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
2144  break;
2145  }
2147  // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2148  // *new_task);
2149  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2150  CGM.VoidPtrTy};
2151  auto *FnTy =
2152  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2153  RTLFn =
2154  CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
2155  break;
2156  }
2158  // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2159  // *new_task);
2160  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2161  CGM.VoidPtrTy};
2162  auto *FnTy =
2163  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2164  RTLFn = CGM.CreateRuntimeFunction(FnTy,
2165  /*Name=*/"__kmpc_omp_task_complete_if0");
2166  break;
2167  }
2168  case OMPRTL__kmpc_ordered: {
2169  // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
2170  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2171  auto *FnTy =
2172  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2173  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
2174  break;
2175  }
2176  case OMPRTL__kmpc_end_ordered: {
2177  // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
2178  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2179  auto *FnTy =
2180  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2181  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
2182  break;
2183  }
2185  // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
2186  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2187  auto *FnTy =
2188  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2189  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
2190  break;
2191  }
2192  case OMPRTL__kmpc_taskgroup: {
2193  // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
2194  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2195  auto *FnTy =
2196  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2197  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
2198  break;
2199  }
2201  // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
2202  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2203  auto *FnTy =
2204  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2205  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
2206  break;
2207  }
2209  // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
2210  // int proc_bind)
2211  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2212  auto *FnTy =
2213  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2214  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
2215  break;
2216  }
2218  // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
2219  // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
2220  // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
2221  llvm::Type *TypeParams[] = {
2224  auto *FnTy =
2225  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2226  RTLFn =
2227  CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
2228  break;
2229  }
2231  // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
2232  // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
2233  // kmp_depend_info_t *noalias_dep_list);
2234  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2237  auto *FnTy =
2238  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2239  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
2240  break;
2241  }
2243  // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
2244  // global_tid, kmp_int32 cncl_kind)
2245  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2246  auto *FnTy =
2247  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2248  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
2249  break;
2250  }
2251  case OMPRTL__kmpc_cancel: {
2252  // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
2253  // kmp_int32 cncl_kind)
2254  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2255  auto *FnTy =
2256  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2257  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
2258  break;
2259  }
2261  // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
2262  // kmp_int32 num_teams, kmp_int32 num_threads)
2263  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2264  CGM.Int32Ty};
2265  auto *FnTy =
2266  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2267  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
2268  break;
2269  }
2270  case OMPRTL__kmpc_fork_teams: {
2271  // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
2272  // microtask, ...);
2273  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2275  auto *FnTy =
2276  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
2277  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
2278  if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
2279  if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
2280  llvm::LLVMContext &Ctx = F->getContext();
2281  llvm::MDBuilder MDB(Ctx);
2282  // Annotate the callback behavior of the __kmpc_fork_teams:
2283  // - The callback callee is argument number 2 (microtask).
2284  // - The first two arguments of the callback callee are unknown (-1).
2285  // - All variadic arguments to the __kmpc_fork_teams are passed to the
2286  // callback callee.
2287  F->addMetadata(
2288  llvm::LLVMContext::MD_callback,
2289  *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
2290  2, {-1, -1},
2291  /* VarArgsArePassed */ true)}));
2292  }
2293  }
2294  break;
2295  }
2296  case OMPRTL__kmpc_taskloop: {
2297  // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
2298  // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
2299  // sched, kmp_uint64 grainsize, void *task_dup);
2300  llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2301  CGM.IntTy,
2302  CGM.VoidPtrTy,
2303  CGM.IntTy,
2304  CGM.Int64Ty->getPointerTo(),
2305  CGM.Int64Ty->getPointerTo(),
2306  CGM.Int64Ty,
2307  CGM.IntTy,
2308  CGM.IntTy,
2309  CGM.Int64Ty,
2310  CGM.VoidPtrTy};
2311  auto *FnTy =
2312  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2313  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
2314  break;
2315  }
2317  // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
2318  // num_dims, struct kmp_dim *dims);
2319  llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2320  CGM.Int32Ty,
2321  CGM.Int32Ty,
2322  CGM.VoidPtrTy};
2323  auto *FnTy =
2324  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2325  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
2326  break;
2327  }
2329  // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
2330  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2331  auto *FnTy =
2332  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2333  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
2334  break;
2335  }
2337  // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
2338  // *vec);
2339  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2340  CGM.Int64Ty->getPointerTo()};
2341  auto *FnTy =
2342  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2343  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
2344  break;
2345  }
2347  // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
2348  // *vec);
2349  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2350  CGM.Int64Ty->getPointerTo()};
2351  auto *FnTy =
2352  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2353  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
2354  break;
2355  }
2357  // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void
2358  // *data);
2359  llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
2360  auto *FnTy =
2361  llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2362  RTLFn =
2363  CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init");
2364  break;
2365  }
2367  // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
2368  // *d);
2369  llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2370  auto *FnTy =
2371  llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2372  RTLFn = CGM.CreateRuntimeFunction(
2373  FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
2374  break;
2375  }
2376  case OMPRTL__kmpc_alloc: {
2377  // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t
2378  // al); omp_allocator_handle_t type is void *.
2379  llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy};
2380  auto *FnTy =
2381  llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2382  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc");
2383  break;
2384  }
2385  case OMPRTL__kmpc_free: {
2386  // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t
2387  // al); omp_allocator_handle_t type is void *.
2388  llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2389  auto *FnTy =
2390  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2391  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free");
2392  break;
2393  }
2395  // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
2396  // size);
2397  llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty};
2398  llvm::FunctionType *FnTy =
2399  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2400  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount");
2401  break;
2402  }
2403  case OMPRTL__tgt_target: {
2404  // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
2405  // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2406  // *arg_types);
2407  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2408  CGM.VoidPtrTy,
2409  CGM.Int32Ty,
2410  CGM.VoidPtrPtrTy,
2411  CGM.VoidPtrPtrTy,
2412  CGM.Int64Ty->getPointerTo(),
2413  CGM.Int64Ty->getPointerTo()};
2414  auto *FnTy =
2415  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2416  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
2417  break;
2418  }
2420  // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
2421  // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
2422  // int64_t *arg_types);
2423  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2424  CGM.VoidPtrTy,
2425  CGM.Int32Ty,
2426  CGM.VoidPtrPtrTy,
2427  CGM.VoidPtrPtrTy,
2428  CGM.Int64Ty->getPointerTo(),
2429  CGM.Int64Ty->getPointerTo()};
2430  auto *FnTy =
2431  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2432  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait");
2433  break;
2434  }
2435  case OMPRTL__tgt_target_teams: {
2436  // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
2437  // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
2438  // int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2439  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2440  CGM.VoidPtrTy,
2441  CGM.Int32Ty,
2442  CGM.VoidPtrPtrTy,
2443  CGM.VoidPtrPtrTy,
2444  CGM.Int64Ty->getPointerTo(),
2445  CGM.Int64Ty->getPointerTo(),
2446  CGM.Int32Ty,
2447  CGM.Int32Ty};
2448  auto *FnTy =
2449  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2450  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
2451  break;
2452  }
2454  // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void
2455  // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
2456  // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2457  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2458  CGM.VoidPtrTy,
2459  CGM.Int32Ty,
2460  CGM.VoidPtrPtrTy,
2461  CGM.VoidPtrPtrTy,
2462  CGM.Int64Ty->getPointerTo(),
2463  CGM.Int64Ty->getPointerTo(),
2464  CGM.Int32Ty,
2465  CGM.Int32Ty};
2466  auto *FnTy =
2467  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2468  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait");
2469  break;
2470  }
2472  // Build void __tgt_register_requires(int64_t flags);
2473  llvm::Type *TypeParams[] = {CGM.Int64Ty};
2474  auto *FnTy =
2475  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2476  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires");
2477  break;
2478  }
2480  // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
2481  // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2482  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2483  CGM.Int32Ty,
2484  CGM.VoidPtrPtrTy,
2485  CGM.VoidPtrPtrTy,
2486  CGM.Int64Ty->getPointerTo(),
2487  CGM.Int64Ty->getPointerTo()};
2488  auto *FnTy =
2489  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2490  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
2491  break;
2492  }
2494  // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
2495  // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2496  // *arg_types);
2497  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2498  CGM.Int32Ty,
2499  CGM.VoidPtrPtrTy,
2500  CGM.VoidPtrPtrTy,
2501  CGM.Int64Ty->getPointerTo(),
2502  CGM.Int64Ty->getPointerTo()};
2503  auto *FnTy =
2504  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2505  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait");
2506  break;
2507  }
2509  // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
2510  // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2511  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2512  CGM.Int32Ty,
2513  CGM.VoidPtrPtrTy,
2514  CGM.VoidPtrPtrTy,
2515  CGM.Int64Ty->getPointerTo(),
2516  CGM.Int64Ty->getPointerTo()};
2517  auto *FnTy =
2518  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2519  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
2520  break;
2521  }
2523  // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t
2524  // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2525  // *arg_types);
2526  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2527  CGM.Int32Ty,
2528  CGM.VoidPtrPtrTy,
2529  CGM.VoidPtrPtrTy,
2530  CGM.Int64Ty->getPointerTo(),
2531  CGM.Int64Ty->getPointerTo()};
2532  auto *FnTy =
2533  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2534  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait");
2535  break;
2536  }
2538  // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
2539  // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2540  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2541  CGM.Int32Ty,
2542  CGM.VoidPtrPtrTy,
2543  CGM.VoidPtrPtrTy,
2544  CGM.Int64Ty->getPointerTo(),
2545  CGM.Int64Ty->getPointerTo()};
2546  auto *FnTy =
2547  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2548  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
2549  break;
2550  }
2552  // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t
2553  // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2554  // *arg_types);
2555  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2556  CGM.Int32Ty,
2557  CGM.VoidPtrPtrTy,
2558  CGM.VoidPtrPtrTy,
2559  CGM.Int64Ty->getPointerTo(),
2560  CGM.Int64Ty->getPointerTo()};
2561  auto *FnTy =
2562  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2563  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait");
2564  break;
2565  }
2567  // Build int64_t __tgt_mapper_num_components(void *rt_mapper_handle);
2568  llvm::Type *TypeParams[] = {CGM.VoidPtrTy};
2569  auto *FnTy =
2570  llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false);
2571  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_mapper_num_components");
2572  break;
2573  }
2575  // Build void __tgt_push_mapper_component(void *rt_mapper_handle, void
2576  // *base, void *begin, int64_t size, int64_t type);
2577  llvm::Type *TypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.VoidPtrTy,
2578  CGM.Int64Ty, CGM.Int64Ty};
2579  auto *FnTy =
2580  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2581  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_push_mapper_component");
2582  break;
2583  }
2584  }
2585  assert(RTLFn && "Unable to find OpenMP runtime function");
2586  return RTLFn;
2587 }
2588 
2589 llvm::FunctionCallee
2590 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
2591  assert((IVSize == 32 || IVSize == 64) &&
2592  "IV size is not compatible with the omp runtime");
2593  StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
2594  : "__kmpc_for_static_init_4u")
2595  : (IVSigned ? "__kmpc_for_static_init_8"
2596  : "__kmpc_for_static_init_8u");
2597  llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2598  auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2599  llvm::Type *TypeParams[] = {
2600  getIdentTyPointerTy(), // loc
2601  CGM.Int32Ty, // tid
2602  CGM.Int32Ty, // schedtype
2603  llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2604  PtrTy, // p_lower
2605  PtrTy, // p_upper
2606  PtrTy, // p_stride
2607  ITy, // incr
2608  ITy // chunk
2609  };
2610  auto *FnTy =
2611  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2612  return CGM.CreateRuntimeFunction(FnTy, Name);
2613 }
2614 
2615 llvm::FunctionCallee
2616 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
2617  assert((IVSize == 32 || IVSize == 64) &&
2618  "IV size is not compatible with the omp runtime");
2619  StringRef Name =
2620  IVSize == 32
2621  ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
2622  : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
2623  llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2624  llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
2625  CGM.Int32Ty, // tid
2626  CGM.Int32Ty, // schedtype
2627  ITy, // lower
2628  ITy, // upper
2629  ITy, // stride
2630  ITy // chunk
2631  };
2632  auto *FnTy =
2633  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2634  return CGM.CreateRuntimeFunction(FnTy, Name);
2635 }
2636 
2637 llvm::FunctionCallee
2638 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
2639  assert((IVSize == 32 || IVSize == 64) &&
2640  "IV size is not compatible with the omp runtime");
2641  StringRef Name =
2642  IVSize == 32
2643  ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
2644  : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
2645  llvm::Type *TypeParams[] = {
2646  getIdentTyPointerTy(), // loc
2647  CGM.Int32Ty, // tid
2648  };
2649  auto *FnTy =
2650  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2651  return CGM.CreateRuntimeFunction(FnTy, Name);
2652 }
2653 
2654 llvm::FunctionCallee
2655 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
2656  assert((IVSize == 32 || IVSize == 64) &&
2657  "IV size is not compatible with the omp runtime");
2658  StringRef Name =
2659  IVSize == 32
2660  ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
2661  : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
2662  llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2663  auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2664  llvm::Type *TypeParams[] = {
2665  getIdentTyPointerTy(), // loc
2666  CGM.Int32Ty, // tid
2667  llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2668  PtrTy, // p_lower
2669  PtrTy, // p_upper
2670  PtrTy // p_stride
2671  };
2672  auto *FnTy =
2673  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2674  return CGM.CreateRuntimeFunction(FnTy, Name);
2675 }
2676 
2677 /// Obtain information that uniquely identifies a target entry. This
2678 /// consists of the file and device IDs as well as line number associated with
2679 /// the relevant entry source location.
2681  unsigned &DeviceID, unsigned &FileID,
2682  unsigned &LineNum) {
2684 
2685  // The loc should be always valid and have a file ID (the user cannot use
2686  // #pragma directives in macros)
2687 
2688  assert(Loc.isValid() && "Source location is expected to be always valid.");
2689 
2690  PresumedLoc PLoc = SM.getPresumedLoc(Loc);
2691  assert(PLoc.isValid() && "Source location is expected to be always valid.");
2692 
2693  llvm::sys::fs::UniqueID ID;
2694  if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
2695  SM.getDiagnostics().Report(diag::err_cannot_open_file)
2696  << PLoc.getFilename() << EC.message();
2697 
2698  DeviceID = ID.getDevice();
2699  FileID = ID.getFile();
2700  LineNum = PLoc.getLine();
2701 }
2702 
2704  if (CGM.getLangOpts().OpenMPSimd)
2705  return Address::invalid();
2707  OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2708  if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
2709  (*Res == OMPDeclareTargetDeclAttr::MT_To &&
2711  SmallString<64> PtrName;
2712  {
2713  llvm::raw_svector_ostream OS(PtrName);
2714  OS << CGM.getMangledName(GlobalDecl(VD));
2715  if (!VD->isExternallyVisible()) {
2716  unsigned DeviceID, FileID, Line;
2718  VD->getCanonicalDecl()->getBeginLoc(),
2719  DeviceID, FileID, Line);
2720  OS << llvm::format("_%x", FileID);
2721  }
2722  OS << "_decl_tgt_ref_ptr";
2723  }
2724  llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
2725  if (!Ptr) {
2726  QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
2728  PtrName);
2729 
2730  auto *GV = cast<llvm::GlobalVariable>(Ptr);
2731  GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
2732 
2733  if (!CGM.getLangOpts().OpenMPIsDevice)
2734  GV->setInitializer(CGM.GetAddrOfGlobal(VD));
2735  registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
2736  }
2737  return Address(Ptr, CGM.getContext().getDeclAlign(VD));
2738  }
2739  return Address::invalid();
2740 }
2741 
2742 llvm::Constant *
2744  assert(!CGM.getLangOpts().OpenMPUseTLS ||
2746  // Lookup the entry, lazily creating it if necessary.
2747  std::string Suffix = getName({"cache", ""});
2749  CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
2750 }
2751 
2753  const VarDecl *VD,
2754  Address VDAddr,
2755  SourceLocation Loc) {
2756  if (CGM.getLangOpts().OpenMPUseTLS &&
2758  return VDAddr;
2759 
2760  llvm::Type *VarTy = VDAddr.getElementType();
2761  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2762  CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
2763  CGM.Int8PtrTy),
2766  return Address(CGF.EmitRuntimeCall(
2768  VDAddr.getAlignment());
2769 }
2770 
2772  CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
2773  llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
2774  // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
2775  // library.
2776  llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
2778  OMPLoc);
2779  // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
2780  // to register constructor/destructor for variable.
2781  llvm::Value *Args[] = {
2782  OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
2783  Ctor, CopyCtor, Dtor};
2784  CGF.EmitRuntimeCall(
2786 }
2787 
2789  const VarDecl *VD, Address VDAddr, SourceLocation Loc,
2790  bool PerformInit, CodeGenFunction *CGF) {
2791  if (CGM.getLangOpts().OpenMPUseTLS &&
2793  return nullptr;
2794 
2795  VD = VD->getDefinition(CGM.getContext());
2796  if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
2797  QualType ASTTy = VD->getType();
2798 
2799  llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
2800  const Expr *Init = VD->getAnyInitializer();
2801  if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2802  // Generate function that re-emits the declaration's initializer into the
2803  // threadprivate copy of the variable VD
2804  CodeGenFunction CtorCGF(CGM);
2805  FunctionArgList Args;
2806  ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2807  /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2809  Args.push_back(&Dst);
2810 
2811  const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2812  CGM.getContext().VoidPtrTy, Args);
2813  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2814  std::string Name = getName({"__kmpc_global_ctor_", ""});
2815  llvm::Function *Fn =
2816  CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2817  CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
2818  Args, Loc, Loc);
2819  llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
2820  CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2821  CGM.getContext().VoidPtrTy, Dst.getLocation());
2822  Address Arg = Address(ArgVal, VDAddr.getAlignment());
2823  Arg = CtorCGF.Builder.CreateElementBitCast(
2824  Arg, CtorCGF.ConvertTypeForMem(ASTTy));
2825  CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
2826  /*IsInitializer=*/true);
2827  ArgVal = CtorCGF.EmitLoadOfScalar(
2828  CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2829  CGM.getContext().VoidPtrTy, Dst.getLocation());
2830  CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
2831  CtorCGF.FinishFunction();
2832  Ctor = Fn;
2833  }
2834  if (VD->getType().isDestructedType() != QualType::DK_none) {
2835  // Generate function that emits destructor call for the threadprivate copy
2836  // of the variable VD
2837  CodeGenFunction DtorCGF(CGM);
2838  FunctionArgList Args;
2839  ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2840  /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2842  Args.push_back(&Dst);
2843 
2844  const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2845  CGM.getContext().VoidTy, Args);
2846  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2847  std::string Name = getName({"__kmpc_global_dtor_", ""});
2848  llvm::Function *Fn =
2849  CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2850  auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2851  DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
2852  Loc, Loc);
2853  // Create a scope with an artificial location for the body of this function.
2854  auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2855  llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
2856  DtorCGF.GetAddrOfLocalVar(&Dst),
2857  /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
2858  DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
2859  DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2860  DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2861  DtorCGF.FinishFunction();
2862  Dtor = Fn;
2863  }
2864  // Do not emit init function if it is not required.
2865  if (!Ctor && !Dtor)
2866  return nullptr;
2867 
2868  llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2869  auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
2870  /*isVarArg=*/false)
2871  ->getPointerTo();
2872  // Copying constructor for the threadprivate variable.
2873  // Must be NULL - reserved by runtime, but currently it requires that this
2874  // parameter is always NULL. Otherwise it fires assertion.
2875  CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
2876  if (Ctor == nullptr) {
2877  auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
2878  /*isVarArg=*/false)
2879  ->getPointerTo();
2880  Ctor = llvm::Constant::getNullValue(CtorTy);
2881  }
2882  if (Dtor == nullptr) {
2883  auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
2884  /*isVarArg=*/false)
2885  ->getPointerTo();
2886  Dtor = llvm::Constant::getNullValue(DtorTy);
2887  }
2888  if (!CGF) {
2889  auto *InitFunctionTy =
2890  llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
2891  std::string Name = getName({"__omp_threadprivate_init_", ""});
2892  llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction(
2893  InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
2894  CodeGenFunction InitCGF(CGM);
2895  FunctionArgList ArgList;
2896  InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
2897  CGM.getTypes().arrangeNullaryFunction(), ArgList,
2898  Loc, Loc);
2899  emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2900  InitCGF.FinishFunction();
2901  return InitFunction;
2902  }
2903  emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2904  }
2905  return nullptr;
2906 }
2907 
2909  llvm::GlobalVariable *Addr,
2910  bool PerformInit) {
2911  if (CGM.getLangOpts().OMPTargetTriples.empty() &&
2912  !CGM.getLangOpts().OpenMPIsDevice)
2913  return false;
2915  OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2916  if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
2917  (*Res == OMPDeclareTargetDeclAttr::MT_To &&
2919  return CGM.getLangOpts().OpenMPIsDevice;
2920  VD = VD->getDefinition(CGM.getContext());
2921  if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
2922  return CGM.getLangOpts().OpenMPIsDevice;
2923 
2924  QualType ASTTy = VD->getType();
2925 
2927  // Produce the unique prefix to identify the new target regions. We use
2928  // the source location of the variable declaration which we know to not
2929  // conflict with any target region.
2930  unsigned DeviceID;
2931  unsigned FileID;
2932  unsigned Line;
2933  getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
2934  SmallString<128> Buffer, Out;
2935  {
2936  llvm::raw_svector_ostream OS(Buffer);
2937  OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
2938  << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
2939  }
2940 
2941  const Expr *Init = VD->getAnyInitializer();
2942  if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2943  llvm::Constant *Ctor;
2944  llvm::Constant *ID;
2945  if (CGM.getLangOpts().OpenMPIsDevice) {
2946  // Generate function that re-emits the declaration's initializer into
2947  // the threadprivate copy of the variable VD
2948  CodeGenFunction CtorCGF(CGM);
2949 
2951  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2952  llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2953  FTy, Twine(Buffer, "_ctor"), FI, Loc);
2954  auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
2955  CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2956  FunctionArgList(), Loc, Loc);
2957  auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
2958  CtorCGF.EmitAnyExprToMem(Init,
2959  Address(Addr, CGM.getContext().getDeclAlign(VD)),
2960  Init->getType().getQualifiers(),
2961  /*IsInitializer=*/true);
2962  CtorCGF.FinishFunction();
2963  Ctor = Fn;
2964  ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2965  CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
2966  } else {
2967  Ctor = new llvm::GlobalVariable(
2968  CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2969  llvm::GlobalValue::PrivateLinkage,
2970  llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
2971  ID = Ctor;
2972  }
2973 
2974  // Register the information for the entry associated with the constructor.
2975  Out.clear();
2977  DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
2979  }
2980  if (VD->getType().isDestructedType() != QualType::DK_none) {
2981  llvm::Constant *Dtor;
2982  llvm::Constant *ID;
2983  if (CGM.getLangOpts().OpenMPIsDevice) {
2984  // Generate function that emits destructor call for the threadprivate
2985  // copy of the variable VD
2986  CodeGenFunction DtorCGF(CGM);
2987 
2989  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2990  llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2991  FTy, Twine(Buffer, "_dtor"), FI, Loc);
2992  auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2993  DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2994  FunctionArgList(), Loc, Loc);
2995  // Create a scope with an artificial location for the body of this
2996  // function.
2997  auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2998  DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
2999  ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
3000  DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
3001  DtorCGF.FinishFunction();
3002  Dtor = Fn;
3003  ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
3004  CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
3005  } else {
3006  Dtor = new llvm::GlobalVariable(
3007  CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
3008  llvm::GlobalValue::PrivateLinkage,
3009  llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
3010  ID = Dtor;
3011  }
3012  // Register the information for the entry associated with the destructor.
3013  Out.clear();
3015  DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
3017  }
3018  return CGM.getLangOpts().OpenMPIsDevice;
3019 }
3020 
3022  QualType VarType,
3023  StringRef Name) {
3024  std::string Suffix = getName({"artificial", ""});
3025  llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
3026  llvm::Value *GAddr =
3027  getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
3028  if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
3030  cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
3031  return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
3032  }
3033  std::string CacheSuffix = getName({"cache", ""});
3034  llvm::Value *Args[] = {
3036  getThreadID(CGF, SourceLocation()),
3038  CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
3039  /*isSigned=*/false),
3041  CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
3042  return Address(
3044  CGF.EmitRuntimeCall(
3046  VarLVType->getPointerTo(/*AddrSpace=*/0)),
3047  CGM.getContext().getTypeAlignInChars(VarType));
3048 }
3049 
3051  const RegionCodeGenTy &ThenGen,
3052  const RegionCodeGenTy &ElseGen) {
3053  CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
3054 
3055  // If the condition constant folds and can be elided, try to avoid emitting
3056  // the condition and the dead arm of the if/else.
3057  bool CondConstant;
3058  if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
3059  if (CondConstant)
3060  ThenGen(CGF);
3061  else
3062  ElseGen(CGF);
3063  return;
3064  }
3065 
3066  // Otherwise, the condition did not fold, or we couldn't elide it. Just
3067  // emit the conditional branch.
3068  llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
3069  llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
3070  llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
3071  CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
3072 
3073  // Emit the 'then' code.
3074  CGF.EmitBlock(ThenBlock);
3075  ThenGen(CGF);
3076  CGF.EmitBranch(ContBlock);
3077  // Emit the 'else' code if present.
3078  // There is no need to emit line number for unconditional branch.
3080  CGF.EmitBlock(ElseBlock);
3081  ElseGen(CGF);
3082  // There is no need to emit line number for unconditional branch.
3084  CGF.EmitBranch(ContBlock);
3085  // Emit the continuation block for code after the if.
3086  CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
3087 }
3088 
3090  llvm::Function *OutlinedFn,
3091  ArrayRef<llvm::Value *> CapturedVars,
3092  const Expr *IfCond) {
3093  if (!CGF.HaveInsertPoint())
3094  return;
3095  llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
3096  auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
3097  PrePostActionTy &) {
3098  // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
3099  CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
3100  llvm::Value *Args[] = {
3101  RTLoc,
3102  CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
3103  CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
3105  RealArgs.append(std::begin(Args), std::end(Args));
3106  RealArgs.append(CapturedVars.begin(), CapturedVars.end());
3107 
3108  llvm::FunctionCallee RTLFn =
3109  RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
3110  CGF.EmitRuntimeCall(RTLFn, RealArgs);
3111  };
3112  auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
3113  PrePostActionTy &) {
3114  CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
3115  llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
3116  // Build calls:
3117  // __kmpc_serialized_parallel(&Loc, GTid);
3118  llvm::Value *Args[] = {RTLoc, ThreadID};
3119  CGF.EmitRuntimeCall(
3120  RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
3121 
3122  // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
3123  Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
3124  Address ZeroAddrBound =
3125  CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
3126  /*Name=*/".bound.zero.addr");
3127  CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
3128  llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
3129  // ThreadId for serialized parallels is 0.
3130  OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
3131  OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
3132  OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
3133  RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
3134 
3135  // __kmpc_end_serialized_parallel(&Loc, GTid);
3136  llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
3137  CGF.EmitRuntimeCall(
3138  RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
3139  EndArgs);
3140  };
3141  if (IfCond) {
3142  emitIfClause(CGF, IfCond, ThenGen, ElseGen);
3143  } else {
3144  RegionCodeGenTy ThenRCG(ThenGen);
3145  ThenRCG(CGF);
3146  }
3147 }
3148 
3149 // If we're inside an (outlined) parallel region, use the region info's
3150 // thread-ID variable (it is passed in a first argument of the outlined function
3151 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
3152 // regular serial code region, get thread ID by calling kmp_int32
3153 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
3154 // return the address of that temp.
3156  SourceLocation Loc) {
3157  if (auto *OMPRegionInfo =
3158  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3159  if (OMPRegionInfo->getThreadIDVariable())
3160  return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
3161 
3162  llvm::Value *ThreadID = getThreadID(CGF, Loc);
3163  QualType Int32Ty =
3164  CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
3165  Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
3166  CGF.EmitStoreOfScalar(ThreadID,
3167  CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
3168 
3169  return ThreadIDTemp;
3170 }
3171 
3173  llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
3174  SmallString<256> Buffer;
3175  llvm::raw_svector_ostream Out(Buffer);
3176  Out << Name;
3177  StringRef RuntimeName = Out.str();
3178  auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
3179  if (Elem.second) {
3180  assert(Elem.second->getType()->getPointerElementType() == Ty &&
3181  "OMP internal variable has different type than requested");
3182  return &*Elem.second;
3183  }
3184 
3185  return Elem.second = new llvm::GlobalVariable(
3186  CGM.getModule(), Ty, /*IsConstant*/ false,
3187  llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
3188  Elem.first(), /*InsertBefore=*/nullptr,
3189  llvm::GlobalValue::NotThreadLocal, AddressSpace);
3190 }
3191 
3193  std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
3194  std::string Name = getName({Prefix, "var"});
3195  return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
3196 }
3197 
3198 namespace {
3199 /// Common pre(post)-action for different OpenMP constructs.
3200 class CommonActionTy final : public PrePostActionTy {
3201  llvm::FunctionCallee EnterCallee;
3202  ArrayRef<llvm::Value *> EnterArgs;
3203  llvm::FunctionCallee ExitCallee;
3204  ArrayRef<llvm::Value *> ExitArgs;
3205  bool Conditional;
3206  llvm::BasicBlock *ContBlock = nullptr;
3207 
3208 public:
3209  CommonActionTy(llvm::FunctionCallee EnterCallee,
3210  ArrayRef<llvm::Value *> EnterArgs,
3211  llvm::FunctionCallee ExitCallee,
3212  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
3213  : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
3214  ExitArgs(ExitArgs), Conditional(Conditional) {}
3215  void Enter(CodeGenFunction &CGF) override {
3216  llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
3217  if (Conditional) {
3218  llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
3219  auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
3220  ContBlock = CGF.createBasicBlock("omp_if.end");
3221  // Generate the branch (If-stmt)
3222  CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
3223  CGF.EmitBlock(ThenBlock);
3224  }
3225  }
3226  void Done(CodeGenFunction &CGF) {
3227  // Emit the rest of blocks/branches
3228  CGF.EmitBranch(ContBlock);
3229  CGF.EmitBlock(ContBlock, true);
3230  }
3231  void Exit(CodeGenFunction &CGF) override {
3232  CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
3233  }
3234 };
3235 } // anonymous namespace
3236 
3238  StringRef CriticalName,
3239  const RegionCodeGenTy &CriticalOpGen,
3240  SourceLocation Loc, const Expr *Hint) {
3241  // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
3242  // CriticalOpGen();
3243  // __kmpc_end_critical(ident_t *, gtid, Lock);
3244  // Prepare arguments and build a call to __kmpc_critical
3245  if (!CGF.HaveInsertPoint())
3246  return;
3247  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3248  getCriticalRegionLock(CriticalName)};
3249  llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
3250  std::end(Args));
3251  if (Hint) {
3252  EnterArgs.push_back(CGF.Builder.CreateIntCast(
3253  CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
3254  }
3255  CommonActionTy Action(
3259  CriticalOpGen.setAction(Action);
3260  emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
3261 }
3262 
3264  const RegionCodeGenTy &MasterOpGen,
3265  SourceLocation Loc) {
3266  if (!CGF.HaveInsertPoint())
3267  return;
3268  // if(__kmpc_master(ident_t *, gtid)) {
3269  // MasterOpGen();
3270  // __kmpc_end_master(ident_t *, gtid);
3271  // }
3272  // Prepare arguments and build a call to __kmpc_master
3273  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3274  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
3276  /*Conditional=*/true);
3277  MasterOpGen.setAction(Action);
3278  emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
3279  Action.Done(CGF);
3280 }
3281 
3283  SourceLocation Loc) {
3284  if (!CGF.HaveInsertPoint())
3285  return;
3286  // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
3287  llvm::Value *Args[] = {
3288  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3289  llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
3291  if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3292  Region->emitUntiedSwitch(CGF);
3293 }
3294 
3296  const RegionCodeGenTy &TaskgroupOpGen,
3297  SourceLocation Loc) {
3298  if (!CGF.HaveInsertPoint())
3299  return;
3300  // __kmpc_taskgroup(ident_t *, gtid);
3301  // TaskgroupOpGen();
3302  // __kmpc_end_taskgroup(ident_t *, gtid);
3303  // Prepare arguments and build a call to __kmpc_taskgroup
3304  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3305  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
3307  Args);
3308  TaskgroupOpGen.setAction(Action);
3309  emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
3310 }
3311 
3312 /// Given an array of pointers to variables, project the address of a
3313 /// given variable.
3315  unsigned Index, const VarDecl *Var) {
3316  // Pull out the pointer to the variable.
3317  Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
3318  llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
3319 
3320  Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
3321  Addr = CGF.Builder.CreateElementBitCast(
3322  Addr, CGF.ConvertTypeForMem(Var->getType()));
3323  return Addr;
3324 }
3325 
3327  CodeGenModule &CGM, llvm::Type *ArgsType,
3328  ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
3329  ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
3330  SourceLocation Loc) {
3331  ASTContext &C = CGM.getContext();
3332  // void copy_func(void *LHSArg, void *RHSArg);
3333  FunctionArgList Args;
3334  ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3336  ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3338  Args.push_back(&LHSArg);
3339  Args.push_back(&RHSArg);
3340  const auto &CGFI =
3342  std::string Name =
3343  CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
3344  auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
3346  &CGM.getModule());
3347  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
3348  Fn->setDoesNotRecurse();
3349  CodeGenFunction CGF(CGM);
3350  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
3351  // Dest = (void*[n])(LHSArg);
3352  // Src = (void*[n])(RHSArg);
3354  CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
3355  ArgsType), CGF.getPointerAlign());
3357  CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
3358  ArgsType), CGF.getPointerAlign());
3359  // *(Type0*)Dst[0] = *(Type0*)Src[0];
3360  // *(Type1*)Dst[1] = *(Type1*)Src[1];
3361  // ...
3362  // *(Typen*)Dst[n] = *(Typen*)Src[n];
3363  for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
3364  const auto *DestVar =
3365  cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
3366  Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
3367 
3368  const auto *SrcVar =
3369  cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
3370  Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
3371 
3372  const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
3373  QualType Type = VD->getType();
3374  CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
3375  }
3376  CGF.FinishFunction();
3377  return Fn;
3378 }
3379 
3381  const RegionCodeGenTy &SingleOpGen,
3382  SourceLocation Loc,
3383  ArrayRef<const Expr *> CopyprivateVars,
3384  ArrayRef<const Expr *> SrcExprs,
3385  ArrayRef<const Expr *> DstExprs,
3386  ArrayRef<const Expr *> AssignmentOps) {
3387  if (!CGF.HaveInsertPoint())
3388  return;
3389  assert(CopyprivateVars.size() == SrcExprs.size() &&
3390  CopyprivateVars.size() == DstExprs.size() &&
3391  CopyprivateVars.size() == AssignmentOps.size());
3392  ASTContext &C = CGM.getContext();
3393  // int32 did_it = 0;
3394  // if(__kmpc_single(ident_t *, gtid)) {
3395  // SingleOpGen();
3396  // __kmpc_end_single(ident_t *, gtid);
3397  // did_it = 1;
3398  // }
3399  // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3400  // <copy_func>, did_it);
3401 
3402  Address DidIt = Address::invalid();
3403  if (!CopyprivateVars.empty()) {
3404  // int32 did_it = 0;
3405  QualType KmpInt32Ty =
3406  C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3407  DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
3408  CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
3409  }
3410  // Prepare arguments and build a call to __kmpc_single
3411  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3412  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
3414  /*Conditional=*/true);
3415  SingleOpGen.setAction(Action);
3416  emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
3417  if (DidIt.isValid()) {
3418  // did_it = 1;
3419  CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
3420  }
3421  Action.Done(CGF);
3422  // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3423  // <copy_func>, did_it);
3424  if (DidIt.isValid()) {
3425  llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
3426  QualType CopyprivateArrayTy = C.getConstantArrayType(
3427  C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
3428  /*IndexTypeQuals=*/0);
3429  // Create a list of all private variables for copyprivate.
3430  Address CopyprivateList =
3431  CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
3432  for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
3433  Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
3434  CGF.Builder.CreateStore(
3436  CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
3437  CGF.VoidPtrTy),
3438  Elem);
3439  }
3440  // Build function that copies private values from single region to all other
3441  // threads in the corresponding parallel region.
3443  CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
3444  CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
3445  llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
3446  Address CL =
3447  CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
3448  CGF.VoidPtrTy);
3449  llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
3450  llvm::Value *Args[] = {
3451  emitUpdateLocation(CGF, Loc), // ident_t *<loc>
3452  getThreadID(CGF, Loc), // i32 <gtid>
3453  BufSize, // size_t <buf_size>
3454  CL.getPointer(), // void *<copyprivate list>
3455  CpyFn, // void (*) (void *, void *) <copy_func>
3456  DidItVal // i32 did_it
3457  };
3459  }
3460 }
3461 
3463  const RegionCodeGenTy &OrderedOpGen,
3464  SourceLocation Loc, bool IsThreads) {
3465  if (!CGF.HaveInsertPoint())
3466  return;
3467  // __kmpc_ordered(ident_t *, gtid);
3468  // OrderedOpGen();
3469  // __kmpc_end_ordered(ident_t *, gtid);
3470  // Prepare arguments and build a call to __kmpc_ordered
3471  if (IsThreads) {
3472  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3473  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
3475  Args);
3476  OrderedOpGen.setAction(Action);
3477  emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3478  return;
3479  }
3480  emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3481 }
3482 
3484  unsigned Flags;
3485  if (Kind == OMPD_for)
3486  Flags = OMP_IDENT_BARRIER_IMPL_FOR;
3487  else if (Kind == OMPD_sections)
3488  Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
3489  else if (Kind == OMPD_single)
3490  Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
3491  else if (Kind == OMPD_barrier)
3492  Flags = OMP_IDENT_BARRIER_EXPL;
3493  else
3494  Flags = OMP_IDENT_BARRIER_IMPL;
3495  return Flags;
3496 }
3497 
3499  CodeGenFunction &CGF, const OMPLoopDirective &S,
3500  OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
3501  // Check if the loop directive is actually a doacross loop directive. In this
3502  // case choose static, 1 schedule.
3503  if (llvm::any_of(
3505  [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
3506  ScheduleKind = OMPC_SCHEDULE_static;
3507  // Chunk size is 1 in this case.
3508  llvm::APInt ChunkSize(32, 1);
3509  ChunkExpr = IntegerLiteral::Create(
3510  CGF.getContext(), ChunkSize,
3511  CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
3512  SourceLocation());
3513  }
3514 }
3515 
3517  OpenMPDirectiveKind Kind, bool EmitChecks,
3518  bool ForceSimpleCall) {
3519  // Check if we should use the OMPBuilder
3520  auto *OMPRegionInfo =
3521  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
3522  llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
3523  if (OMPBuilder) {
3524  CGF.Builder.restoreIP(OMPBuilder->CreateBarrier(
3525  CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
3526  return;
3527  }
3528 
3529  if (!CGF.HaveInsertPoint())
3530  return;
3531  // Build call __kmpc_cancel_barrier(loc, thread_id);
3532  // Build call __kmpc_barrier(loc, thread_id);
3533  unsigned Flags = getDefaultFlagsForBarriers(Kind);
3534  // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
3535  // thread_id);
3536  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
3537  getThreadID(CGF, Loc)};
3538  if (OMPRegionInfo) {
3539  if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
3540  llvm::Value *Result = CGF.EmitRuntimeCall(
3542  if (EmitChecks) {
3543  // if (__kmpc_cancel_barrier()) {
3544  // exit from construct;
3545  // }
3546  llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
3547  llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
3548  llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
3549  CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
3550  CGF.EmitBlock(ExitBB);
3551  // exit from construct;
3552  CodeGenFunction::JumpDest CancelDestination =
3553  CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
3554  CGF.EmitBranchThroughCleanup(CancelDestination);
3555  CGF.EmitBlock(ContBB, /*IsFinished=*/true);
3556  }
3557  return;
3558  }
3559  }
3561 }
3562 
3563 /// Map the OpenMP loop schedule to the runtime enumeration.
3565  bool Chunked, bool Ordered) {
3566  switch (ScheduleKind) {
3567  case OMPC_SCHEDULE_static:
3568  return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
3569  : (Ordered ? OMP_ord_static : OMP_sch_static);
3570  case OMPC_SCHEDULE_dynamic:
3572  case OMPC_SCHEDULE_guided:
3574  case OMPC_SCHEDULE_runtime:
3575  return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
3576  case OMPC_SCHEDULE_auto:
3577  return Ordered ? OMP_ord_auto : OMP_sch_auto;
3578  case OMPC_SCHEDULE_unknown:
3579  assert(!Chunked && "chunk was specified but schedule kind not known");
3580  return Ordered ? OMP_ord_static : OMP_sch_static;
3581  }
3582  llvm_unreachable("Unexpected runtime schedule");
3583 }
3584 
3585 /// Map the OpenMP distribute schedule to the runtime enumeration.
3586 static OpenMPSchedType
3588  // only static is allowed for dist_schedule
3590 }
3591 
3593  bool Chunked) const {
3594  OpenMPSchedType Schedule =
3595  getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3596  return Schedule == OMP_sch_static;
3597 }
3598 
3600  OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3601  OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3602  return Schedule == OMP_dist_sch_static;
3603 }
3604 
3606  bool Chunked) const {
3607  OpenMPSchedType Schedule =
3608  getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3609  return Schedule == OMP_sch_static_chunked;
3610 }
3611 
3613  OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3614  OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3615  return Schedule == OMP_dist_sch_static_chunked;
3616 }
3617 
3619  OpenMPSchedType Schedule =
3620  getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
3621  assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
3622  return Schedule != OMP_sch_static;
3623 }
3624 
3628  int Modifier = 0;
3629  switch (M1) {
3630  case OMPC_SCHEDULE_MODIFIER_monotonic:
3631  Modifier = OMP_sch_modifier_monotonic;
3632  break;
3633  case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3634  Modifier = OMP_sch_modifier_nonmonotonic;
3635  break;
3636  case OMPC_SCHEDULE_MODIFIER_simd:
3637  if (Schedule == OMP_sch_static_chunked)
3639  break;
3642  break;
3643  }
3644  switch (M2) {
3645  case OMPC_SCHEDULE_MODIFIER_monotonic:
3646  Modifier = OMP_sch_modifier_monotonic;
3647  break;
3648  case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3649  Modifier = OMP_sch_modifier_nonmonotonic;
3650  break;
3651  case OMPC_SCHEDULE_MODIFIER_simd:
3652  if (Schedule == OMP_sch_static_chunked)
3654  break;
3657  break;
3658  }
3659  // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
3660  // If the static schedule kind is specified or if the ordered clause is
3661  // specified, and if the nonmonotonic modifier is not specified, the effect is
3662  // as if the monotonic modifier is specified. Otherwise, unless the monotonic
3663  // modifier is specified, the effect is as if the nonmonotonic modifier is
3664  // specified.
3665  if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
3666  if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
3667  Schedule == OMP_sch_static_balanced_chunked ||
3668  Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
3669  Schedule == OMP_dist_sch_static_chunked ||
3670  Schedule == OMP_dist_sch_static))
3671  Modifier = OMP_sch_modifier_nonmonotonic;
3672  }
3673  return Schedule | Modifier;
3674 }
3675 
3677  CodeGenFunction &CGF, SourceLocation Loc,
3678  const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
3679  bool Ordered, const DispatchRTInput &DispatchValues) {
3680  if (!CGF.HaveInsertPoint())
3681  return;
3683  ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
3684  assert(Ordered ||
3685  (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
3686  Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
3687  Schedule != OMP_sch_static_balanced_chunked));
3688  // Call __kmpc_dispatch_init(
3689  // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
3690  // kmp_int[32|64] lower, kmp_int[32|64] upper,
3691  // kmp_int[32|64] stride, kmp_int[32|64] chunk);
3692 
3693  // If the Chunk was not specified in the clause - use default value 1.
3694  llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
3695  : CGF.Builder.getIntN(IVSize, 1);
3696  llvm::Value *Args[] = {
3697  emitUpdateLocation(CGF, Loc),
3698  getThreadID(CGF, Loc),
3699  CGF.Builder.getInt32(addMonoNonMonoModifier(
3700  CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
3701  DispatchValues.LB, // Lower
3702  DispatchValues.UB, // Upper
3703  CGF.Builder.getIntN(IVSize, 1), // Stride
3704  Chunk // Chunk
3705  };
3706  CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
3707 }
3708 
3710  CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
3711  llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
3713  const CGOpenMPRuntime::StaticRTInput &Values) {
3714  if (!CGF.HaveInsertPoint())
3715  return;
3716 
3717  assert(!Values.Ordered);
3718  assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
3719  Schedule == OMP_sch_static_balanced_chunked ||
3720  Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
3721  Schedule == OMP_dist_sch_static ||
3722  Schedule == OMP_dist_sch_static_chunked);
3723 
3724  // Call __kmpc_for_static_init(
3725  // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
3726  // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
3727  // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
3728  // kmp_int[32|64] incr, kmp_int[32|64] chunk);
3729  llvm::Value *Chunk = Values.Chunk;
3730  if (Chunk == nullptr) {
3731  assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
3732  Schedule == OMP_dist_sch_static) &&
3733  "expected static non-chunked schedule");
3734  // If the Chunk was not specified in the clause - use default value 1.
3735  Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
3736  } else {
3737  assert((Schedule == OMP_sch_static_chunked ||
3738  Schedule == OMP_sch_static_balanced_chunked ||
3739  Schedule == OMP_ord_static_chunked ||
3740  Schedule == OMP_dist_sch_static_chunked) &&
3741  "expected static chunked schedule");
3742  }
3743  llvm::Value *Args[] = {
3744  UpdateLocation,
3745  ThreadId,
3746  CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
3747  M2)), // Schedule type
3748  Values.IL.getPointer(), // &isLastIter
3749  Values.LB.getPointer(), // &LB
3750  Values.UB.getPointer(), // &UB
3751  Values.ST.getPointer(), // &Stride
3752  CGF.Builder.getIntN(Values.IVSize, 1), // Incr
3753  Chunk // Chunk
3754  };
3755  CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
3756 }
3757 
3759  SourceLocation Loc,
3760  OpenMPDirectiveKind DKind,
3761  const OpenMPScheduleTy &ScheduleKind,
3762  const StaticRTInput &Values) {
3763  OpenMPSchedType ScheduleNum = getRuntimeSchedule(
3764  ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
3765  assert(isOpenMPWorksharingDirective(DKind) &&
3766  "Expected loop-based or sections-based directive.");
3767  llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
3768  isOpenMPLoopDirective(DKind)
3769  ? OMP_IDENT_WORK_LOOP
3770  : OMP_IDENT_WORK_SECTIONS);
3771  llvm::Value *ThreadId = getThreadID(CGF, Loc);
3772  llvm::FunctionCallee StaticInitFunction =
3774  emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3775  ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
3776 }
3777 
3779  CodeGenFunction &CGF, SourceLocation Loc,
3780  OpenMPDistScheduleClauseKind SchedKind,
3781  const CGOpenMPRuntime::StaticRTInput &Values) {
3782  OpenMPSchedType ScheduleNum =
3783  getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
3784  llvm::Value *UpdatedLocation =
3785  emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
3786  llvm::Value *ThreadId = getThreadID(CGF, Loc);
3787  llvm::FunctionCallee StaticInitFunction =
3788  createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3789  emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3790  ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
3792 }
3793 
3795  SourceLocation Loc,
3796  OpenMPDirectiveKind DKind) {
3797  if (!CGF.HaveInsertPoint())
3798  return;
3799  // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
3800  llvm::Value *Args[] = {
3801  emitUpdateLocation(CGF, Loc,
3803  ? OMP_IDENT_WORK_DISTRIBUTE
3804  : isOpenMPLoopDirective(DKind)
3805  ? OMP_IDENT_WORK_LOOP
3806  : OMP_IDENT_WORK_SECTIONS),
3807  getThreadID(CGF, Loc)};
3809  Args);
3810 }
3811 
3813  SourceLocation Loc,
3814  unsigned IVSize,
3815  bool IVSigned) {
3816  if (!CGF.HaveInsertPoint())
3817  return;
3818  // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
3819  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3820  CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
3821 }
3822 
3824  SourceLocation Loc, unsigned IVSize,
3825  bool IVSigned, Address IL,
3826  Address LB, Address UB,
3827  Address ST) {
3828  // Call __kmpc_dispatch_next(
3829  // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
3830  // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
3831  // kmp_int[32|64] *p_stride);
3832  llvm::Value *Args[] = {
3833  emitUpdateLocation(CGF, Loc),
3834  getThreadID(CGF, Loc),
3835  IL.getPointer(), // &isLastIter
3836  LB.getPointer(), // &Lower
3837  UB.getPointer(), // &Upper
3838  ST.getPointer() // &Stride
3839  };
3840  llvm::Value *Call =
3841  CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
3842  return CGF.EmitScalarConversion(
3843  Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
3844  CGF.getContext().BoolTy, Loc);
3845 }
3846 
3848  llvm::Value *NumThreads,
3849  SourceLocation Loc) {
3850  if (!CGF.HaveInsertPoint())
3851  return;
3852  // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
3853  llvm::Value *Args[] = {
3854  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3855  CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
3857  Args);
3858 }
3859 
3861  ProcBindKind ProcBind,
3862  SourceLocation Loc) {
3863  if (!CGF.HaveInsertPoint())
3864  return;
3865  assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
3866  // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
3867  llvm::Value *Args[] = {
3868  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3869  llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
3871 }
3872 
3873 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
3874  SourceLocation Loc) {
3875  if (!CGF.HaveInsertPoint())
3876  return;
3877  // Build call void __kmpc_flush(ident_t *loc)
3879  emitUpdateLocation(CGF, Loc));
3880 }
3881 
3882 namespace {
3883 /// Indexes of fields for type kmp_task_t.
3885  /// List of shared variables.
3886  KmpTaskTShareds,
3887  /// Task routine.
3888  KmpTaskTRoutine,
3889  /// Partition id for the untied tasks.
3890  KmpTaskTPartId,
3891  /// Function with call of destructors for private variables.
3892  Data1,
3893  /// Task priority.
3894  Data2,
3895  /// (Taskloops only) Lower bound.
3896  KmpTaskTLowerBound,
3897  /// (Taskloops only) Upper bound.
3898  KmpTaskTUpperBound,
3899  /// (Taskloops only) Stride.
3900  KmpTaskTStride,
3901  /// (Taskloops only) Is last iteration flag.
3902  KmpTaskTLastIter,
3903  /// (Taskloops only) Reduction data.
3904  KmpTaskTReductions,
3905 };
3906 } // anonymous namespace
3907 
3908 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
3909  return OffloadEntriesTargetRegion.empty() &&
3910  OffloadEntriesDeviceGlobalVar.empty();
3911 }
3912 
3913 /// Initialize target region entry.
3914 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3915  initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3916  StringRef ParentName, unsigned LineNum,
3917  unsigned Order) {
3918  assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3919  "only required for the device "
3920  "code generation.");
3921  OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3922  OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3923  OMPTargetRegionEntryTargetRegion);
3924  ++OffloadingEntriesNum;
3925 }
3926 
3927 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3928  registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3929  StringRef ParentName, unsigned LineNum,
3930  llvm::Constant *Addr, llvm::Constant *ID,
3931  OMPTargetRegionEntryKind Flags) {
3932  // If we are emitting code for a target, the entry is already initialized,
3933  // only has to be registered.
3934  if (CGM.getLangOpts().OpenMPIsDevice) {
3935  if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
3936  unsigned DiagID = CGM.getDiags().getCustomDiagID(
3938  "Unable to find target region on line '%0' in the device code.");
3939  CGM.getDiags().Report(DiagID) << LineNum;
3940  return;
3941  }
3942  auto &Entry =
3943  OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3944  assert(Entry.isValid() && "Entry not initialized!");
3945  Entry.setAddress(Addr);
3946  Entry.setID(ID);
3947  Entry.setFlags(Flags);
3948  } else {
3949  OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3950  OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3951  ++OffloadingEntriesNum;
3952  }
3953 }
3954 
3955 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3956  unsigned DeviceID, unsigned FileID, StringRef ParentName,
3957  unsigned LineNum) const {
3958  auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3959  if (PerDevice == OffloadEntriesTargetRegion.end())
3960  return false;
3961  auto PerFile = PerDevice->second.find(FileID);
3962  if (PerFile == PerDevice->second.end())
3963  return false;
3964  auto PerParentName = PerFile->second.find(ParentName);
3965  if (PerParentName == PerFile->second.end())
3966  return false;
3967  auto PerLine = PerParentName->second.find(LineNum);
3968  if (PerLine == PerParentName->second.end())
3969  return false;
3970  // Fail if this entry is already registered.
3971  if (PerLine->second.getAddress() || PerLine->second.getID())
3972  return false;
3973  return true;
3974 }
3975 
3976 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3977  const OffloadTargetRegionEntryInfoActTy &Action) {
3978  // Scan all target region entries and perform the provided action.
3979  for (const auto &D : OffloadEntriesTargetRegion)
3980  for (const auto &F : D.second)
3981  for (const auto &P : F.second)
3982  for (const auto &L : P.second)
3983  Action(D.first, F.first, P.first(), L.first, L.second);
3984 }
3985 
3986 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3987  initializeDeviceGlobalVarEntryInfo(StringRef Name,
3988  OMPTargetGlobalVarEntryKind Flags,
3989  unsigned Order) {
3990  assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3991  "only required for the device "
3992  "code generation.");
3993  OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3994  ++OffloadingEntriesNum;
3995 }
3996 
3997 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3998  registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3999  CharUnits VarSize,
4000  OMPTargetGlobalVarEntryKind Flags,
4001  llvm::GlobalValue::LinkageTypes Linkage) {
4002  if (CGM.getLangOpts().OpenMPIsDevice) {
4003  auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
4004  assert(Entry.isValid() && Entry.getFlags() == Flags &&
4005  "Entry not initialized!");
4006  assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
4007  "Resetting with the new address.");
4008  if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
4009  if (Entry.getVarSize().isZero()) {
4010  Entry.setVarSize(VarSize);
4011  Entry.setLinkage(Linkage);
4012  }
4013  return;
4014  }
4015  Entry.setVarSize(VarSize);
4016  Entry.setLinkage(Linkage);
4017  Entry.setAddress(Addr);
4018  } else {
4019  if (hasDeviceGlobalVarEntryInfo(VarName)) {
4020  auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
4021  assert(Entry.isValid() && Entry.getFlags() == Flags &&
4022  "Entry not initialized!");
4023  assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
4024  "Resetting with the new address.");
4025  if (Entry.getVarSize().isZero()) {
4026  Entry.setVarSize(VarSize);
4027  Entry.setLinkage(Linkage);
4028  }
4029  return;
4030  }
4031  OffloadEntriesDeviceGlobalVar.try_emplace(
4032  VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
4033  ++OffloadingEntriesNum;
4034  }
4035 }
4036 
4037 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
4038  actOnDeviceGlobalVarEntriesInfo(
4039  const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
4040  // Scan all target region entries and perform the provided action.
4041  for (const auto &E : OffloadEntriesDeviceGlobalVar)
4042  Action(E.getKey(), E.getValue());
4043 }
4044 
4046  llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
4047  llvm::GlobalValue::LinkageTypes Linkage) {
4048  StringRef Name = Addr->getName();
4049  llvm::Module &M = CGM.getModule();
4050  llvm::LLVMContext &C = M.getContext();
4051 
4052  // Create constant string with the name.
4053  llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
4054 
4055  std::string StringName = getName({"omp_offloading", "entry_name"});
4056  auto *Str = new llvm::GlobalVariable(
4057  M, StrPtrInit->getType(), /*isConstant=*/true,
4058  llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
4059  Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
4060 
4061  llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
4062  llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
4063  llvm::ConstantInt::get(CGM.SizeTy, Size),
4064  llvm::ConstantInt::get(CGM.Int32Ty, Flags),
4065  llvm::ConstantInt::get(CGM.Int32Ty, 0)};
4066  std::string EntryName = getName({"omp_offloading", "entry", ""});
4067  llvm::GlobalVariable *Entry = createGlobalStruct(
4068  CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
4069  Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
4070 
4071  // The entry has to be created in the section the linker expects it to be.
4072  Entry->setSection("omp_offloading_entries");
4073 }
4074 
4076  // Emit the offloading entries and metadata so that the device codegen side
4077  // can easily figure out what to emit. The produced metadata looks like
4078  // this:
4079  //
4080  // !omp_offload.info = !{!1, ...}
4081  //
4082  // Right now we only generate metadata for function that contain target
4083  // regions.
4084 
4085  // If we are in simd mode or there are no entries, we don't need to do
4086  // anything.
4087  if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
4088  return;
4089 
4090  llvm::Module &M = CGM.getModule();
4091  llvm::LLVMContext &C = M.getContext();
4093  SourceLocation, StringRef>,
4094  16>
4095  OrderedEntries(OffloadEntriesInfoManager.size());
4096  llvm::SmallVector<StringRef, 16> ParentFunctions(
4098 
4099  // Auxiliary methods to create metadata values and strings.
4100  auto &&GetMDInt = [this](unsigned V) {
4101  return llvm::ConstantAsMetadata::get(
4102  llvm::ConstantInt::get(CGM.Int32Ty, V));
4103  };
4104 
4105  auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
4106 
4107  // Create the offloading info metadata node.
4108  llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
4109 
4110  // Create function that emits metadata for each target region entry;
4111  auto &&TargetRegionMetadataEmitter =
4112  [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
4113  &GetMDString](
4114  unsigned DeviceID, unsigned FileID, StringRef ParentName,
4115  unsigned Line,
4117  // Generate metadata for target regions. Each entry of this metadata
4118  // contains:
4119  // - Entry 0 -> Kind of this type of metadata (0).
4120  // - Entry 1 -> Device ID of the file where the entry was identified.
4121  // - Entry 2 -> File ID of the file where the entry was identified.
4122  // - Entry 3 -> Mangled name of the function where the entry was
4123  // identified.
4124  // - Entry 4 -> Line in the file where the entry was identified.
4125  // - Entry 5 -> Order the entry was created.
4126  // The first element of the metadata node is the kind.
4127  llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
4128  GetMDInt(FileID), GetMDString(ParentName),
4129  GetMDInt(Line), GetMDInt(E.getOrder())};
4130 
4131  SourceLocation Loc;
4132  for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
4134  I != E; ++I) {
4135  if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
4136  I->getFirst()->getUniqueID().getFile() == FileID) {
4138  I->getFirst(), Line, 1);
4139  break;
4140  }
4141  }
4142  // Save this entry in the right position of the ordered entries array.
4143  OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
4144  ParentFunctions[E.getOrder()] = ParentName;
4145 
4146  // Add metadata to the named metadata node.
4147  MD->addOperand(llvm::MDNode::get(C, Ops));
4148  };
4149 
4151  TargetRegionMetadataEmitter);
4152 
4153  // Create function that emits metadata for each device global variable entry;
4154  auto &&DeviceGlobalVarMetadataEmitter =
4155  [&C, &OrderedEntries, &GetMDInt, &GetMDString,
4156  MD](StringRef MangledName,
4158  &E) {
4159  // Generate metadata for global variables. Each entry of this metadata
4160  // contains:
4161  // - Entry 0 -> Kind of this type of metadata (1).
4162  // - Entry 1 -> Mangled name of the variable.
4163  // - Entry 2 -> Declare target kind.
4164  // - Entry 3 -> Order the entry was created.
4165  // The first element of the metadata node is the kind.
4166  llvm::Metadata *Ops[] = {
4167  GetMDInt(E.getKind()), GetMDString(MangledName),
4168  GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
4169 
4170  // Save this entry in the right position of the ordered entries array.
4171  OrderedEntries[E.getOrder()] =
4172  std::make_tuple(&E, SourceLocation(), MangledName);
4173 
4174  // Add metadata to the named metadata node.
4175  MD->addOperand(llvm::MDNode::get(C, Ops));
4176  };
4177 
4179  DeviceGlobalVarMetadataEmitter);
4180 
4181  for (const auto &E : OrderedEntries) {
4182  assert(std::get<0>(E) && "All ordered entries must exist!");
4183  if (const auto *CE =
4184  dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
4185  std::get<0>(E))) {
4186  if (!CE->getID() || !CE->getAddress()) {
4187  // Do not blame the entry if the parent funtion is not emitted.
4188  StringRef FnName = ParentFunctions[CE->getOrder()];
4189  if (!CGM.GetGlobalValue(FnName))
4190  continue;
4191  unsigned DiagID = CGM.getDiags().getCustomDiagID(
4193  "Offloading entry for target region in %0 is incorrect: either the "
4194  "address or the ID is invalid.");
4195  CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
4196  continue;
4197  }
4198  createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
4199  CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
4200  } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
4201  OffloadEntryInfoDeviceGlobalVar>(
4202  std::get<0>(E))) {
4205  CE->getFlags());
4206  switch (Flags) {
4208  if (CGM.getLangOpts().OpenMPIsDevice &&
4209  CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
4210  continue;
4211  if (!CE->getAddress()) {
4212  unsigned DiagID = CGM.getDiags().getCustomDiagID(
4213  DiagnosticsEngine::Error, "Offloading entry for declare target "
4214  "variable %0 is incorrect: the "
4215  "address is invalid.");
4216  CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
4217  continue;
4218  }
4219  // The vaiable has no definition - no need to add the entry.
4220  if (CE->getVarSize().isZero())
4221  continue;
4222  break;
4223  }
4225  assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
4226  (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
4227  "Declaret target link address is set.");
4228  if (CGM.getLangOpts().OpenMPIsDevice)
4229  continue;
4230  if (!CE->getAddress()) {
4231  unsigned DiagID = CGM.getDiags().getCustomDiagID(
4233  "Offloading entry for declare target variable is incorrect: the "
4234  "address is invalid.");
4235  CGM.getDiags().Report(DiagID);
4236  continue;
4237  }
4238  break;
4239  }
4240  createOffloadEntry(CE->getAddress(), CE->getAddress(),
4241  CE->getVarSize().getQuantity(), Flags,
4242  CE->getLinkage());
4243  } else {
4244  llvm_unreachable("Unsupported entry kind.");
4245  }
4246  }
4247 }
4248 
4249 /// Loads all the offload entries information from the host IR
4250 /// metadata.
4252  // If we are in target mode, load the metadata from the host IR. This code has
4253  // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
4254 
4255  if (!CGM.getLangOpts().OpenMPIsDevice)
4256  return;
4257 
4258  if (CGM.getLangOpts().OMPHostIRFile.empty())
4259  return;
4260 
4261  auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
4262  if (auto EC = Buf.getError()) {
4263  CGM.getDiags().Report(diag::err_cannot_open_file)
4264  << CGM.getLangOpts().OMPHostIRFile << EC.message();
4265  return;
4266  }
4267 
4268  llvm::LLVMContext C;
4269  auto ME = expectedToErrorOrAndEmitErrors(
4270  C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
4271 
4272  if (auto EC = ME.getError()) {
4273  unsigned DiagID = CGM.getDiags().getCustomDiagID(
4274  DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
4275  CGM.getDiags().Report(DiagID)
4276  << CGM.getLangOpts().OMPHostIRFile << EC.message();
4277  return;
4278  }
4279 
4280  llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
4281  if (!MD)
4282  return;
4283 
4284  for (llvm::MDNode *MN : MD->operands()) {
4285  auto &&GetMDInt = [MN](unsigned Idx) {
4286  auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
4287  return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
4288  };
4289 
4290  auto &&GetMDString = [MN](unsigned Idx) {
4291  auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
4292  return V->getString();
4293  };
4294 
4295  switch (GetMDInt(0)) {
4296  default:
4297  llvm_unreachable("Unexpected metadata!");
4298  break;
4302  /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
4303  /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
4304  /*Order=*/GetMDInt(5));
4305  break;
4309  /*MangledName=*/GetMDString(1),
4310  static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4311  /*Flags=*/GetMDInt(2)),
4312  /*Order=*/GetMDInt(3));
4313  break;
4314  }
4315  }
4316 }
4317 
4319  if (!KmpRoutineEntryPtrTy) {
4320  // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
4321  ASTContext &C = CGM.getContext();
4322  QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
4324  KmpRoutineEntryPtrQTy = C.getPointerType(
4325  C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
4326  KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
4327  }
4328 }
4329 
4331  // Make sure the type of the entry is already created. This is the type we
4332  // have to create:
4333  // struct __tgt_offload_entry{
4334  // void *addr; // Pointer to the offload entry info.
4335  // // (function or global)
4336  // char *name; // Name of the function or global.
4337  // size_t size; // Size of the entry info (0 if it a function).
4338  // int32_t flags; // Flags associated with the entry, e.g. 'link'.
4339  // int32_t reserved; // Reserved, to use by the runtime library.
4340  // };
4341  if (TgtOffloadEntryQTy.isNull()) {
4342  ASTContext &C = CGM.getContext();
4343  RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
4344  RD->startDefinition();
4345  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4347  addFieldToRecordDecl(C, RD, C.getSizeType());
4349  C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4351  C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4352  RD->completeDefinition();
4353  RD->addAttr(PackedAttr::CreateImplicit(C));
4355  }
4356  return TgtOffloadEntryQTy;
4357 }
4358 
4359 namespace {
4360 struct PrivateHelpersTy {
4361  PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
4362  const VarDecl *PrivateElemInit)
4363  : Original(Original), PrivateCopy(PrivateCopy),
4364  PrivateElemInit(PrivateElemInit) {}
4365  const VarDecl *Original;
4366  const VarDecl *PrivateCopy;
4367  const VarDecl *PrivateElemInit;
4368 };
4369 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
4370 } // anonymous namespace
4371 
4372 static RecordDecl *
4374  if (!Privates.empty()) {
4375  ASTContext &C = CGM.getContext();
4376  // Build struct .kmp_privates_t. {
4377  // /* private vars */
4378  // };
4379  RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
4380  RD->startDefinition();
4381  for (const auto &Pair : Privates) {
4382  const VarDecl *VD = Pair.second.Original;
4383  QualType Type = VD->getType().getNonReferenceType();
4384  FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
4385  if (VD->hasAttrs()) {
4386  for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
4387  E(VD->getAttrs().end());
4388  I != E; ++I)
4389  FD->addAttr(*I);
4390  }
4391  }
4392  RD->completeDefinition();
4393  return RD;
4394  }
4395  return nullptr;
4396 }
4397 
4398 static RecordDecl *
4400  QualType KmpInt32Ty,
4401  QualType KmpRoutineEntryPointerQTy) {
4402  ASTContext &C = CGM.getContext();
4403  // Build struct kmp_task_t {
4404  // void * shareds;
4405  // kmp_routine_entry_t routine;
4406  // kmp_int32 part_id;
4407  // kmp_cmplrdata_t data1;
4408  // kmp_cmplrdata_t data2;
4409  // For taskloops additional fields:
4410  // kmp_uint64 lb;
4411  // kmp_uint64 ub;
4412  // kmp_int64 st;
4413  // kmp_int32 liter;
4414  // void * reductions;
4415  // };
4416  RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
4417  UD->startDefinition();
4418  addFieldToRecordDecl(C, UD, KmpInt32Ty);
4419  addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
4420  UD->completeDefinition();
4421  QualType KmpCmplrdataTy = C.getRecordType(UD);
4422  RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
4423  RD->startDefinition();
4424  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4425  addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
4426  addFieldToRecordDecl(C, RD, KmpInt32Ty);
4427  addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4428  addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4429  if (isOpenMPTaskLoopDirective(Kind)) {
4430  QualType KmpUInt64Ty =
4431  CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
4432  QualType KmpInt64Ty =
4433  CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
4434  addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4435  addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4436  addFieldToRecordDecl(C, RD, KmpInt64Ty);
4437  addFieldToRecordDecl(C, RD, KmpInt32Ty);
4438  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4439  }
4440  RD->completeDefinition();
4441  return RD;
4442 }
4443 
4444 static RecordDecl *
4446  ArrayRef<PrivateDataTy> Privates) {
4447  ASTContext &C = CGM.getContext();
4448  // Build struct kmp_task_t_with_privates {
4449  // kmp_task_t task_data;
4450  // .kmp_privates_t. privates;
4451  // };
4452  RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
4453  RD->startDefinition();
4454  addFieldToRecordDecl(C, RD, KmpTaskTQTy);
4455  if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
4456  addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
4457  RD->completeDefinition();
4458  return RD;
4459 }
4460 
4461 /// Emit a proxy function which accepts kmp_task_t as the second
4462 /// argument.
4463 /// \code
4464 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
4465 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
4466 /// For taskloops:
4467 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4468 /// tt->reductions, tt->shareds);
4469 /// return 0;
4470 /// }
4471 /// \endcode
4472 static llvm::Function *
4474  OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
4475  QualType KmpTaskTWithPrivatesPtrQTy,
4476  QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
4477  QualType SharedsPtrTy, llvm::Function *TaskFunction,
4478  llvm::Value *TaskPrivatesMap) {
4479  ASTContext &C = CGM.getContext();
4480  FunctionArgList Args;
4481  ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4483  ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4484  KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4486  Args.push_back(&GtidArg);
4487  Args.push_back(&TaskTypeArg);
4488  const auto &TaskEntryFnInfo =
4489  CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4490  llvm::FunctionType *TaskEntryTy =
4491  CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
4492  std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
4493  auto *TaskEntry = llvm::Function::Create(
4494  TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4495  CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
4496  TaskEntry->setDoesNotRecurse();
4497  CodeGenFunction CGF(CGM);
4498  CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
4499  Loc, Loc);
4500 
4501  // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
4502  // tt,
4503  // For taskloops:
4504  // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4505  // tt->task_data.shareds);
4506  llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
4507  CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
4508  LValue TDBase = CGF.EmitLoadOfPointerLValue(
4509  CGF.GetAddrOfLocalVar(&TaskTypeArg),
4510  KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4511  const auto *KmpTaskTWithPrivatesQTyRD =
4512  cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4513  LValue Base =
4514  CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4515  const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4516  auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4517  LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
4518  llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
4519 
4520  auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
4521  LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
4523  CGF.EmitLoadOfScalar(SharedsLVal, Loc),
4524  CGF.ConvertTypeForMem(SharedsPtrTy));
4525 
4526  auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4527  llvm::Value *PrivatesParam;
4528  if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
4529  LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
4530  PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4531  PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
4532  } else {
4533  PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4534  }
4535 
4536  llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
4537  TaskPrivatesMap,
4538  CGF.Builder
4540  TDBase.getAddress(CGF), CGF.VoidPtrTy)
4541  .getPointer()};
4542  SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
4543  std::end(CommonArgs));
4544  if (isOpenMPTaskLoopDirective(Kind)) {
4545  auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
4546  LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
4547  llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
4548  auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
4549  LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
4550  llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
4551  auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
4552  LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
4553  llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
4554  auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4555  LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4556  llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
4557  auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
4558  LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
4559  llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
4560  CallArgs.push_back(LBParam);
4561  CallArgs.push_back(UBParam);
4562  CallArgs.push_back(StParam);
4563  CallArgs.push_back(LIParam);
4564  CallArgs.push_back(RParam);
4565  }
4566  CallArgs.push_back(SharedsParam);
4567 
4568  CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
4569  CallArgs);
4570  CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
4571  CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
4572  CGF.FinishFunction();
4573  return TaskEntry;
4574 }
4575 
4577  SourceLocation Loc,
4578  QualType KmpInt32Ty,
4579  QualType KmpTaskTWithPrivatesPtrQTy,
4580  QualType KmpTaskTWithPrivatesQTy) {
4581  ASTContext &C = CGM.getContext();
4582  FunctionArgList Args;
4583  ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4585  ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4586  KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4588  Args.push_back(&GtidArg);
4589  Args.push_back(&TaskTypeArg);
4590  const auto &DestructorFnInfo =
4591  CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4592  llvm::FunctionType *DestructorFnTy =
4593  CGM.getTypes().GetFunctionType(DestructorFnInfo);
4594  std::string Name =
4595  CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
4596  auto *DestructorFn =
4598  Name, &CGM.getModule());
4599  CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
4600  DestructorFnInfo);
4601  DestructorFn->setDoesNotRecurse();
4602  CodeGenFunction CGF(CGM);
4603  CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
4604  Args, Loc, Loc);
4605 
4607  CGF.GetAddrOfLocalVar(&TaskTypeArg),
4608  KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4609  const auto *KmpTaskTWithPrivatesQTyRD =
4610  cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4611  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4612  Base = CGF.EmitLValueForField(Base, *FI);
4613  for (const auto *Field :
4614  cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
4615  if (QualType::DestructionKind DtorKind =
4616  Field->getType().isDestructedType()) {
4617  LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
4618  CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
4619  }
4620  }
4621  CGF.FinishFunction();
4622  return DestructorFn;
4623 }
4624 
4625 /// Emit a privates mapping function for correct handling of private and
4626 /// firstprivate variables.
4627 /// \code
4628 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
4629 /// **noalias priv1,..., <tyn> **noalias privn) {
4630 /// *priv1 = &.privates.priv1;
4631 /// ...;
4632 /// *privn = &.privates.privn;
4633 /// }
4634 /// \endcode
4635 static llvm::Value *
4637  ArrayRef<const Expr *> PrivateVars,
4638  ArrayRef<const Expr *> FirstprivateVars,
4639  ArrayRef<const Expr *> LastprivateVars,
4640  QualType PrivatesQTy,
4641  ArrayRef<PrivateDataTy> Privates) {
4642  ASTContext &C = CGM.getContext();
4643  FunctionArgList Args;
4644  ImplicitParamDecl TaskPrivatesArg(
4645  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4646  C.getPointerType(PrivatesQTy).withConst().withRestrict(),
4648  Args.push_back(&TaskPrivatesArg);
4649  llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
4650  unsigned Counter = 1;
4651  for (const Expr *E : PrivateVars) {
4652  Args.push_back(ImplicitParamDecl::Create(
4653  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4655  .withConst()
4656  .withRestrict(),
4658  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4659  PrivateVarsPos[VD] = Counter;
4660  ++Counter;
4661  }
4662  for (const Expr *E : FirstprivateVars) {
4663  Args.push_back(ImplicitParamDecl::Create(
4664  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4666  .withConst()
4667  .withRestrict(),
4669  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4670  PrivateVarsPos[VD] = Counter;
4671  ++Counter;
4672  }
4673  for (const Expr *E : LastprivateVars) {
4674  Args.push_back(ImplicitParamDecl::Create(
4675  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4677  .withConst()
4678  .withRestrict(),
4680  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4681  PrivateVarsPos[VD] = Counter;
4682  ++Counter;
4683  }
4684  const auto &TaskPrivatesMapFnInfo =
4686  llvm::FunctionType *TaskPrivatesMapTy =
4687  CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
4688  std::string Name =
4689  CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
4690  auto *TaskPrivatesMap = llvm::Function::Create(
4691  TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
4692  &CGM.getModule());
4693  CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
4694  TaskPrivatesMapFnInfo);
4695  if (CGM.getLangOpts().Optimize) {
4696  TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
4697  TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
4698  TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
4699  }
4700  CodeGenFunction CGF(CGM);
4701  CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
4702  TaskPrivatesMapFnInfo, Args, Loc, Loc);
4703 
4704  // *privi = &.privates.privi;
4706  CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
4707  TaskPrivatesArg.getType()->castAs<PointerType>());
4708  const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
4709  Counter = 0;
4710  for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
4711  LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
4712  const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
4713  LValue RefLVal =
4714  CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
4715  LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
4716  RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
4717  CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
4718  ++Counter;
4719  }
4720  CGF.FinishFunction();
4721  return TaskPrivatesMap;
4722 }
4723 
4724 /// Emit initialization for private variables in task-based directives.
4726  const OMPExecutableDirective &D,
4727  Address KmpTaskSharedsPtr, LValue TDBase,
4728  const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4729  QualType SharedsTy, QualType SharedsPtrTy,
4730  const OMPTaskDataTy &Data,
4731  ArrayRef<PrivateDataTy> Privates, bool ForDup) {
4732  ASTContext &C = CGF.getContext();
4733  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4734  LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
4736  ? OMPD_taskloop
4737  : OMPD_task;
4738  const CapturedStmt &CS = *D.getCapturedStmt(Kind);
4739  CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
4740  LValue SrcBase;
4741  bool IsTargetTask =
4744  // For target-based directives skip 3 firstprivate arrays BasePointersArray,
4745  // PointersArray and SizesArray. The original variables for these arrays are
4746  // not captured and we get their addresses explicitly.
4747  if ((!IsTargetTask && !Data.FirstprivateVars.empty()) ||
4748  (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
4749  SrcBase = CGF.MakeAddrLValue(
4751  KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
4752  SharedsTy);
4753  }
4754  FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
4755  for (const PrivateDataTy &Pair : Privates) {
4756  const VarDecl *VD = Pair.second.PrivateCopy;
4757  const Expr *Init = VD->getAnyInitializer();
4758  if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
4759  !CGF.isTrivialInitializer(Init)))) {
4760  LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
4761  if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
4762  const VarDecl *OriginalVD = Pair.second.Original;
4763  // Check if the variable is the target-based BasePointersArray,
4764  // PointersArray or SizesArray.
4765  LValue SharedRefLValue;
4766  QualType Type = PrivateLValue.getType();
4767  const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
4768  if (IsTargetTask && !SharedField) {
4769  assert(isa<ImplicitParamDecl>(OriginalVD) &&
4770  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
4771  cast<CapturedDecl>(OriginalVD->getDeclContext())
4772  ->getNumParams() == 0 &&
4773  isa<TranslationUnitDecl>(
4774  cast<CapturedDecl>(OriginalVD->getDeclContext())
4775  ->getDeclContext()) &&
4776  "Expected artificial target data variable.");
4777  SharedRefLValue =
4778  CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
4779  } else {
4780  SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
4781  SharedRefLValue = CGF.MakeAddrLValue(
4782  Address(SharedRefLValue.getPointer(CGF),
4783  C.getDeclAlign(OriginalVD)),
4784  SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
4785  SharedRefLValue.getTBAAInfo());
4786  }
4787  if (Type->isArrayType()) {
4788  // Initialize firstprivate array.
4789  if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
4790  // Perform simple memcpy.
4791  CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
4792  } else {
4793  // Initialize firstprivate array using element-by-element
4794  // initialization.
4796  PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
4797  Type,
4798  [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
4799  Address SrcElement) {
4800  // Clean up any temporaries needed by the initialization.
4801  CodeGenFunction::OMPPrivateScope InitScope(CGF);
4802  InitScope.addPrivate(
4803  Elem, [SrcElement]() -> Address { return SrcElement; });
4804  (void)InitScope.Privatize();
4805  // Emit initialization for single element.
4807  CGF, &CapturesInfo);
4808  CGF.EmitAnyExprToMem(Init, DestElement,
4809  Init->getType().getQualifiers(),
4810  /*IsInitializer=*/false);
4811  });
4812  }
4813  } else {
4814  CodeGenFunction::OMPPrivateScope InitScope(CGF);
4815  InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
4816  return SharedRefLValue.getAddress(CGF);
4817  });
4818  (void)InitScope.Privatize();
4819  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
4820  CGF.EmitExprAsInit(Init, VD, PrivateLValue,
4821  /*capturedByInit=*/false);
4822  }
4823  } else {
4824  CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
4825  }
4826  }
4827  ++FI;
4828  }
4829 }
4830 
4831 /// Check if duplication function is required for taskloops.
4833  ArrayRef<PrivateDataTy> Privates) {
4834  bool InitRequired = false;
4835  for (const PrivateDataTy &Pair : Privates) {
4836  const VarDecl *VD = Pair.second.PrivateCopy;
4837  const Expr *Init = VD->getAnyInitializer();
4838  InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
4839  !CGF.isTrivialInitializer(Init));
4840  if (InitRequired)
4841  break;
4842  }
4843  return InitRequired;
4844 }
4845 
4846 
4847 /// Emit task_dup function (for initialization of
4848 /// private/firstprivate/lastprivate vars and last_iter flag)
4849 /// \code
4850 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
4851 /// lastpriv) {
4852 /// // setup lastprivate flag
4853 /// task_dst->last = lastpriv;
4854 /// // could be constructor calls here...
4855 /// }
4856 /// \endcode
4857 static llvm::Value *
4859  const OMPExecutableDirective &D,
4860  QualType KmpTaskTWithPrivatesPtrQTy,
4861  const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4862  const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4863  QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4864  ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4865  ASTContext &C = CGM.getContext();
4866  FunctionArgList Args;
4867  ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4868  KmpTaskTWithPrivatesPtrQTy,
4870  ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4871  KmpTaskTWithPrivatesPtrQTy,
4873  ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4875  Args.push_back(&DstArg);
4876  Args.push_back(&SrcArg);
4877  Args.push_back(&LastprivArg);
4878  const auto &TaskDupFnInfo =
4880  llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4881  std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4882  auto *TaskDup = llvm::Function::Create(
4883  TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4884  CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4885  TaskDup->setDoesNotRecurse();
4886  CodeGenFunction CGF(CGM);
4887  CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4888  Loc);
4889 
4890  LValue TDBase = CGF.EmitLoadOfPointerLValue(
4891  CGF.GetAddrOfLocalVar(&DstArg),
4892  KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4893  // task_dst->liter = lastpriv;
4894  if (WithLastIter) {
4895  auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4897  TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4898  LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4899  llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4900  CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4901  CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4902  }
4903 
4904  // Emit initial values for private copies (if any).
4905  assert(!Privates.empty());
4906  Address KmpTaskSharedsPtr = Address::invalid();
4907  if (!Data.FirstprivateVars.empty()) {
4908  LValue TDBase = CGF.EmitLoadOfPointerLValue(
4909  CGF.GetAddrOfLocalVar(&SrcArg),
4910  KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4912  TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4913  KmpTaskSharedsPtr = Address(
4915  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4916  KmpTaskTShareds)),
4917  Loc),
4918  CGF.getNaturalTypeAlignment(SharedsTy));
4919  }
4920  emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4921  SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4922  CGF.FinishFunction();
4923  return TaskDup;
4924 }
4925 
4926 /// Checks if destructor function is required to be generated.
4927 /// \return true if cleanups are required, false otherwise.
4928 static bool
4929 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
4930  bool NeedsCleanup = false;
4931  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4932  const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
4933  for (const FieldDecl *FD : PrivateRD->fields()) {
4934  NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
4935  if (NeedsCleanup)
4936  break;
4937  }
4938  return NeedsCleanup;
4939 }
4940 
4941 CGOpenMPRuntime::TaskResultTy
4943  const OMPExecutableDirective &D,
4944  llvm::Function *TaskFunction, QualType SharedsTy,
4945  Address Shareds, const OMPTaskDataTy &Data) {
4946  ASTContext &C = CGM.getContext();
4948  // Aggregate privates and sort them by the alignment.
4949  auto I = Data.PrivateCopies.begin();
4950  for (const Expr *E : Data.PrivateVars) {
4951  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4952  Privates.emplace_back(
4953  C.getDeclAlign(VD),
4954  PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4955  /*PrivateElemInit=*/nullptr));
4956  ++I;
4957  }
4958  I = Data.FirstprivateCopies.begin();
4959  auto IElemInitRef = Data.FirstprivateInits.begin();
4960  for (const Expr *E : Data.FirstprivateVars) {
4961  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4962  Privates.emplace_back(
4963  C.getDeclAlign(VD),
4964  PrivateHelpersTy(
4965  VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4966  cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4967  ++I;
4968  ++IElemInitRef;
4969  }
4970  I = Data.LastprivateCopies.begin();
4971  for (const Expr *E : Data.LastprivateVars) {
4972  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4973  Privates.emplace_back(
4974  C.getDeclAlign(VD),
4975  PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4976  /*PrivateElemInit=*/nullptr));
4977  ++I;
4978  }
4979  llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) {
4980  return L.first > R.first;
4981  });
4982  QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4983  // Build type kmp_routine_entry_t (if not built yet).
4984  emitKmpRoutineEntryT(KmpInt32Ty);
4985  // Build type kmp_task_t (if not built yet).
4987  if (SavedKmpTaskloopTQTy.isNull()) {
4989  CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4990  }
4992  } else {
4993  assert((D.getDirectiveKind() == OMPD_task ||
4996  "Expected taskloop, task or target directive");
4997  if (SavedKmpTaskTQTy.isNull()) {
4999  CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
5000  }
5002  }
5003  const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
5004  // Build particular struct kmp_task_t for the given task.
5005  const RecordDecl *KmpTaskTWithPrivatesQTyRD =
5007  QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
5008  QualType KmpTaskTWithPrivatesPtrQTy =
5009  C.getPointerType(KmpTaskTWithPrivatesQTy);
5010  llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
5011  llvm::Type *KmpTaskTWithPrivatesPtrTy =
5012  KmpTaskTWithPrivatesTy->getPointerTo();
5013  llvm::Value *KmpTaskTWithPrivatesTySize =
5014  CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
5015  QualType SharedsPtrTy = C.getPointerType(SharedsTy);
5016 
5017  // Emit initial values for private copies (if any).
5018  llvm::Value *TaskPrivatesMap = nullptr;
5019  llvm::Type *TaskPrivatesMapTy =
5020  std::next(TaskFunction->arg_begin(), 3)->getType();
5021  if (!Privates.empty()) {
5022  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
5023  TaskPrivatesMap = emitTaskPrivateMappingFunction(
5024  CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
5025  FI->getType(), Privates);
5026  TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5027  TaskPrivatesMap, TaskPrivatesMapTy);
5028  } else {
5029  TaskPrivatesMap = llvm::ConstantPointerNull::get(
5030  cast<llvm::PointerType>(TaskPrivatesMapTy));
5031  }
5032  // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
5033  // kmp_task_t *tt);
5034  llvm::Function *TaskEntry = emitProxyTaskFunction(
5035  CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5036  KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
5037  TaskPrivatesMap);
5038 
5039  // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
5040  // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
5041  // kmp_routine_entry_t *task_entry);
5042  // Task flags. Format is taken from
5043  // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h,
5044  // description of kmp_tasking_flags struct.
5045  enum {
5046  TiedFlag = 0x1,
5047  FinalFlag = 0x2,
5048  DestructorsFlag = 0x8,
5049  PriorityFlag = 0x20
5050  };
5051  unsigned Flags = Data.Tied ? TiedFlag : 0;
5052  bool NeedsCleanup = false;
5053  if (!Privates.empty()) {
5054  NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
5055  if (NeedsCleanup)
5056  Flags = Flags | DestructorsFlag;
5057  }
5058  if (Data.Priority.getInt())
5059  Flags = Flags | PriorityFlag;
5060  llvm::Value *TaskFlags =
5061  Data.Final.getPointer()
5062  ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
5063  CGF.Builder.getInt32(FinalFlag),
5064  CGF.Builder.getInt32(/*C=*/0))
5065  : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
5066  TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
5067  llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
5068  SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
5069  getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
5071  TaskEntry, KmpRoutineEntryPtrTy)};
5072  llvm::Value *NewTask;
5073  if (D.hasClausesOfKind<OMPNowaitClause>()) {
5074  // Check if we have any device clause associated with the directive.
5075  const Expr *Device = nullptr;
5076  if (auto *C = D.getSingleClause<OMPDeviceClause>())
5077  Device = C->getDevice();
5078  // Emit device ID if any otherwise use default value.
5079  llvm::Value *DeviceID;
5080  if (Device)
5081  DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
5082  CGF.Int64Ty, /*isSigned=*/true);
5083  else
5084  DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
5085  AllocArgs.push_back(DeviceID);
5086  NewTask = CGF.EmitRuntimeCall(
5088  } else {
5089  NewTask = CGF.EmitRuntimeCall(
5091  }
5092  llvm::Value *NewTaskNewTaskTTy =
5094  NewTask, KmpTaskTWithPrivatesPtrTy);
5095  LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
5096  KmpTaskTWithPrivatesQTy);
5097  LValue TDBase =
5098  CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
5099  // Fill the data in the resulting kmp_task_t record.
5100  // Copy shareds if there are any.
5101  Address KmpTaskSharedsPtr = Address::invalid();
5102  if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
5103  KmpTaskSharedsPtr =
5105  CGF.EmitLValueForField(
5106  TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
5107  KmpTaskTShareds)),
5108  Loc),
5109  CGF.getNaturalTypeAlignment(SharedsTy));
5110  LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
5111  LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
5112  CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
5113  }
5114  // Emit initial values for private copies (if any).
5115  TaskResultTy Result;
5116  if (!Privates.empty()) {
5117  emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
5118  SharedsTy, SharedsPtrTy, Data, Privates,
5119  /*ForDup=*/false);
5121  (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
5122  Result.TaskDupFn = emitTaskDupFunction(
5123  CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
5124  KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
5125  /*WithLastIter=*/!Data.LastprivateVars.empty());
5126  }
5127  }
5128  // Fields of union "kmp_cmplrdata_t" for destructors and priority.
5129  enum { Priority = 0, Destructors = 1 };
5130  // Provide pointer to function with destructors for privates.
5131  auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
5132  const RecordDecl *KmpCmplrdataUD =
5133  (*FI)->getType()->getAsUnionType()->getDecl();
5134  if (NeedsCleanup) {
5135  llvm::Value *DestructorFn = emitDestructorsFunction(
5136  CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5137  KmpTaskTWithPrivatesQTy);
5138  LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
5139  LValue DestructorsLV = CGF.EmitLValueForField(
5140  Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
5142  DestructorFn, KmpRoutineEntryPtrTy),
5143  DestructorsLV);
5144  }
5145  // Set priority.
5146  if (Data.Priority.getInt()) {
5147  LValue Data2LV = CGF.EmitLValueForField(
5148  TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
5149  LValue PriorityLV = CGF.EmitLValueForField(
5150  Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
5151  CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
5152  }
5153  Result.NewTask = NewTask;
5154  Result.TaskEntry = TaskEntry;
5155  Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
5156  Result.TDBase = TDBase;
5157  Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
5158  return Result;
5159 }
5160 
5162  const OMPExecutableDirective &D,
5163  llvm::Function *TaskFunction,
5164  QualType SharedsTy, Address Shareds,
5165  const Expr *IfCond,
5166  const OMPTaskDataTy &Data) {
5167  if (!CGF.HaveInsertPoint())
5168  return;
5169 
5170  TaskResultTy Result =
5171  emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5172  llvm::Value *NewTask = Result.NewTask;
5173  llvm::Function *TaskEntry = Result.TaskEntry;
5174  llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5175  LValue TDBase = Result.TDBase;
5176  const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5177  ASTContext &C = CGM.getContext();
5178  // Process list of dependences.
5179  Address DependenciesArray = Address::invalid();
5180  unsigned NumDependencies = Data.Dependences.size();
5181  if (NumDependencies) {
5182  // Dependence kind for RTL.
5183  enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3, DepMutexInOutSet = 0x4 };
5184  enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
5185  RecordDecl *KmpDependInfoRD;
5186  QualType FlagsTy =
5187  C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
5188  llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5189  if (KmpDependInfoTy.isNull()) {
5190  KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
5191  KmpDependInfoRD->startDefinition();
5192  addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
5193  addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
5194  addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
5195  KmpDependInfoRD->completeDefinition();
5196  KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
5197  } else {
5198  KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5199  }
5200  // Define type kmp_depend_info[<Dependences.size()>];
5201  QualType KmpDependInfoArrayTy = C.getConstantArrayType(
5202  KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
5203  nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5204  // kmp_depend_info[<Dependences.size()>] deps;
5205  DependenciesArray =
5206  CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
5207  for (unsigned I = 0; I < NumDependencies; ++I) {
5208  const Expr *E = Data.Dependences[I].second;
5209  LValue Addr = CGF.EmitLValue(E);
5210  llvm::Value *Size;
5211  QualType Ty = E->getType();
5212  if (const auto *ASE =
5213  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
5214  LValue UpAddrLVal =
5215  CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
5216  llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
5217  UpAddrLVal.getPointer(CGF), /*Idx0=*/1);
5218  llvm::Value *LowIntPtr =
5219  CGF.Builder.CreatePtrToInt(Addr.getPointer(CGF), CGM.SizeTy);
5220  llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
5221  Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
5222  } else {
5223  Size = CGF.getTypeSize(Ty);
5224  }
5225  LValue Base = CGF.MakeAddrLValue(
5226  CGF.Builder.CreateConstArrayGEP(DependenciesArray, I),
5227  KmpDependInfoTy);
5228  // deps[i].base_addr = &<Dependences[i].second>;
5229  LValue BaseAddrLVal = CGF.EmitLValueForField(
5230  Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5231  CGF.EmitStoreOfScalar(
5232  CGF.Builder.CreatePtrToInt(Addr.getPointer(CGF), CGF.IntPtrTy),
5233  BaseAddrLVal);
5234  // deps[i].len = sizeof(<Dependences[i].second>);
5235  LValue LenLVal = CGF.EmitLValueForField(
5236  Base, *std::next(KmpDependInfoRD->field_begin(), Len));
5237  CGF.EmitStoreOfScalar(Size, LenLVal);
5238  // deps[i].flags = <Dependences[i].first>;
5239  RTLDependenceKindTy DepKind;
5240  switch (Data.Dependences[I].first) {
5241  case OMPC_DEPEND_in:
5242  DepKind = DepIn;
5243  break;
5244  // Out and InOut dependencies must use the same code.
5245  case OMPC_DEPEND_out:
5246  case OMPC_DEPEND_inout:
5247  DepKind = DepInOut;
5248  break;
5249  case OMPC_DEPEND_mutexinoutset:
5250  DepKind = DepMutexInOutSet;
5251  break;
5252  case OMPC_DEPEND_source:
5253  case OMPC_DEPEND_sink:
5254  case OMPC_DEPEND_unknown:
5255  llvm_unreachable("Unknown task dependence type");
5256  }
5257  LValue FlagsLVal = CGF.EmitLValueForField(
5258  Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5259  CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5260  FlagsLVal);
5261  }
5262  DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5263  CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0), CGF.VoidPtrTy);
5264  }
5265 
5266  // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5267  // libcall.
5268  // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5269  // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5270  // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5271  // list is not empty
5272  llvm::Value *ThreadID = getThreadID(CGF, Loc);
5273  llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5274  llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5275  llvm::Value *DepTaskArgs[7];
5276  if (NumDependencies) {
5277  DepTaskArgs[0] = UpLoc;
5278  DepTaskArgs[1] = ThreadID;
5279  DepTaskArgs[2] = NewTask;
5280  DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
5281  DepTaskArgs[4] = DependenciesArray.getPointer();
5282  DepTaskArgs[5] = CGF.Builder.getInt32(0);
5283  DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5284  }
5285  auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies,
5286  &TaskArgs,
5287  &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5288  if (!Data.Tied) {
5289  auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5290  LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5291  CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5292  }
5293  if (NumDependencies) {
5294  CGF.EmitRuntimeCall(
5296  } else {
5297  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
5298  TaskArgs);
5299  }
5300  // Check if parent region is untied and build return for untied task;
5301  if (auto *Region =
5302  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5303  Region->emitUntiedSwitch(CGF);
5304  };
5305 
5306  llvm::Value *DepWaitTaskArgs[6];
5307  if (NumDependencies) {
5308  DepWaitTaskArgs[0] = UpLoc;
5309  DepWaitTaskArgs[1] = ThreadID;
5310  DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
5311  DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5312  DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5313  DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5314  }
5315  auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
5316  NumDependencies, &DepWaitTaskArgs,
5317  Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5318  CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5319  CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5320  // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5321  // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5322  // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5323  // is specified.
5324  if (NumDependencies)
5325  CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
5326  DepWaitTaskArgs);
5327  // Call proxy_task_entry(gtid, new_task);
5328  auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5329  Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5330  Action.Enter(CGF);
5331  llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5332  CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5333  OutlinedFnArgs);
5334  };
5335 
5336  // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5337  // kmp_task_t *new_task);
5338  // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5339  // kmp_task_t *new_task);
5340  RegionCodeGenTy RCG(CodeGen);
5341  CommonActionTy Action(
5342  RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs,
5343  RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs);
5344  RCG.setAction(Action);
5345  RCG(CGF);
5346  };
5347 
5348  if (IfCond) {
5349  emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5350  } else {
5351  RegionCodeGenTy ThenRCG(ThenCodeGen);
5352  ThenRCG(CGF);
5353  }
5354 }
5355 
5357  const OMPLoopDirective &D,
5358  llvm::Function *TaskFunction,
5359  QualType SharedsTy, Address Shareds,
5360  const Expr *IfCond,
5361  const OMPTaskDataTy &Data) {
5362  if (!CGF.HaveInsertPoint())
5363  return;
5364  TaskResultTy Result =
5365  emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5366  // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5367  // libcall.
5368  // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5369  // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5370  // sched, kmp_uint64 grainsize, void *task_dup);
5371  llvm::Value *ThreadID = getThreadID(CGF, Loc);
5372  llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5373  llvm::Value *IfVal;
5374  if (IfCond) {
5375  IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5376  /*isSigned=*/true);
5377  } else {
5378  IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5379  }
5380 
5381  LValue LBLVal = CGF.EmitLValueForField(
5382  Result.TDBase,
5383  *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5384  const auto *LBVar =
5385  cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5386  CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5387  LBLVal.getQuals(),
5388  /*IsInitializer=*/true);
5389  LValue UBLVal = CGF.EmitLValueForField(
5390  Result.TDBase,
5391  *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5392  const auto *UBVar =
5393  cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5394  CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5395  UBLVal.getQuals(),
5396  /*IsInitializer=*/true);
5397  LValue StLVal = CGF.EmitLValueForField(
5398  Result.TDBase,
5399  *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5400  const auto *StVar =
5401  cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5402  CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5403  StLVal.getQuals(),
5404  /*IsInitializer=*/true);
5405  // Store reductions address.
5406  LValue RedLVal = CGF.EmitLValueForField(
5407  Result.TDBase,
5408  *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5409  if (Data.Reductions) {
5410  CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5411  } else {
5412  CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5413  CGF.getContext().VoidPtrTy);
5414  }
5415  enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5416  llvm::Value *TaskArgs[] = {
5417  UpLoc,
5418  ThreadID,
5419  Result.NewTask,
5420  IfVal,
5421  LBLVal.getPointer(CGF),
5422  UBLVal.getPointer(CGF),
5423  CGF.EmitLoadOfScalar(StLVal, Loc),
5424  llvm::ConstantInt::getSigned(
5425  CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5426  llvm::ConstantInt::getSigned(
5427  CGF.IntTy, Data.Schedule.getPointer()
5428  ? Data.Schedule.getInt() ? NumTasks : Grainsize
5429  : NoSchedule),
5430  Data.Schedule.getPointer()
5431  ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5432  /*isSigned=*/false)
5433  : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5435  Result.TaskDupFn, CGF.VoidPtrTy)
5436  : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5438 }
5439 
5440 /// Emit reduction operation for each element of array (required for
5441 /// array sections) LHS op = RHS.
5442 /// \param Type Type of array.
5443 /// \param LHSVar Variable on the left side of the reduction operation
5444 /// (references element of array in original variable).
5445 /// \param RHSVar Variable on the right side of the reduction operation
5446 /// (references element of array in original variable).
5447 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5448 /// RHSVar.
5450  CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5451  const VarDecl *RHSVar,
5452  const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5453  const Expr *, const Expr *)> &RedOpGen,
5454  const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5455  const Expr *UpExpr = nullptr) {
5456  // Perform element-by-element initialization.
5457  QualType ElementTy;
5458  Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5459  Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5460 
5461  // Drill down to the base element type on both arrays.
5462  const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5463  llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5464 
5465  llvm::Value *RHSBegin = RHSAddr.getPointer();
5466  llvm::Value *LHSBegin = LHSAddr.getPointer();
5467  // Cast from pointer to array type to pointer to single element.
5468  llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5469  // The basic structure here is a while-do loop.
5470  llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5471  llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5472  llvm::Value *IsEmpty =
5473  CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5474  CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5475 
5476  // Enter the loop body, making that address the current address.
5477  llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5478  CGF.EmitBlock(BodyBB);
5479 
5480  CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5481 
5482  llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5483  RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5484  RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5485  Address RHSElementCurrent =
5486  Address(RHSElementPHI,
5487  RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5488 
5489  llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5490  LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5491  LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5492  Address LHSElementCurrent =
5493  Address(LHSElementPHI,
5494  LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5495 
5496  // Emit copy.
5498  Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5499  Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5500  Scope.Privatize();
5501  RedOpGen(CGF, XExpr, EExpr, UpExpr);
5502  Scope.ForceCleanup();
5503 
5504  // Shift the address forward by one element.
5505  llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5506  LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5507  llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5508  RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5509  // Check whether we've reached the end.
5510  llvm::Value *Done =
5511  CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5512  CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5513  LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5514  RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5515 
5516  // Done.
5517  CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5518 }
5519 
5520 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5521 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5522 /// UDR combiner function.
5524  const Expr *ReductionOp) {
5525  if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5526  if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5527  if (const auto *DRE =
5528  dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5529  if (const auto *DRD =
5530  dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5531  std::pair<llvm::Function *, llvm::Function *> Reduction =
5532  CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5533  RValue Func = RValue::get(Reduction.first);
5534  CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5535  CGF.EmitIgnoredExpr(ReductionOp);
5536  return;
5537  }
5538  CGF.EmitIgnoredExpr(ReductionOp);
5539 }
5540 
5542  SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5543  ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5544  ArrayRef<const Expr *> ReductionOps) {
5545  ASTContext &C = CGM.getContext();
5546 
5547  // void reduction_func(void *LHSArg, void *RHSArg);
5548  FunctionArgList Args;
5549  ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5551  ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5553  Args.push_back(&LHSArg);
5554  Args.push_back(&RHSArg);
5555  const auto &CGFI =
5557  std::string Name = getName({"omp", "reduction", "reduction_func"});
5560  &CGM.getModule());
5562  Fn->setDoesNotRecurse();
5563  CodeGenFunction CGF(CGM);
5564  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5565 
5566  // Dst = (void*[n])(LHSArg);
5567  // Src = (void*[n])(RHSArg);
5569  CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5570  ArgsType), CGF.getPointerAlign());
5572  CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5573  ArgsType), CGF.getPointerAlign());
5574 
5575  // ...
5576  // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5577  // ...
5579  auto IPriv = Privates.begin();
5580  unsigned Idx = 0;
5581  for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5582  const auto *RHSVar =
5583  cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5584  Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5585  return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5586  });
5587  const auto *LHSVar =
5588  cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5589  Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5590  return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5591  });
5592  QualType PrivTy = (*IPriv)->getType();
5593  if (PrivTy->isVariablyModifiedType()) {
5594  // Get array size and emit VLA type.
5595  ++Idx;
5596  Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5597  llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5598  const VariableArrayType *VLA =
5599  CGF.getContext().getAsVariableArrayType(PrivTy);
5600  const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5602  CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5603  CGF.EmitVariablyModifiedType(PrivTy);
5604  }
5605  }
5606  Scope.Privatize();
5607  IPriv = Privates.begin();
5608  auto ILHS = LHSExprs.begin();
5609  auto IRHS = RHSExprs.begin();
5610  for (const Expr *E : ReductionOps) {
5611  if ((*IPriv)->getType()->isArrayType()) {
5612  // Emit reduction for array section.
5613  const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5614  const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5616  CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5617  [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5618  emitReductionCombiner(CGF, E);
5619  });
5620  } else {
5621  // Emit reduction for array subscript or single variable.
5622  emitReductionCombiner(CGF, E);
5623  }
5624  ++IPriv;
5625  ++ILHS;
5626  ++IRHS;
5627  }
5628  Scope.ForceCleanup();
5629  CGF.FinishFunction();
5630  return Fn;
5631 }
5632 
5634  const Expr *ReductionOp,
5635  const Expr *PrivateRef,
5636  const DeclRefExpr *LHS,
5637  const DeclRefExpr *RHS) {
5638  if (PrivateRef->getType()->isArrayType()) {
5639  // Emit reduction for array section.
5640  const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5641  const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5643  CGF, PrivateRef->getType(), LHSVar, RHSVar,
5644  [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5645  emitReductionCombiner(CGF, ReductionOp);
5646  });
5647  } else {
5648  // Emit reduction for array subscript or single variable.
5649  emitReductionCombiner(CGF, ReductionOp);
5650  }
5651 }
5652 
5654  ArrayRef<const Expr *> Privates,
5655  ArrayRef<const Expr *> LHSExprs,
5656  ArrayRef<const Expr *> RHSExprs,
5657  ArrayRef<const Expr *> ReductionOps,
5658  ReductionOptionsTy Options) {
5659  if (!CGF.HaveInsertPoint())
5660  return;
5661 
5662  bool WithNowait = Options.WithNowait;
5663  bool SimpleReduction = Options.SimpleReduction;
5664 
5665  // Next code should be emitted for reduction:
5666  //
5667  // static kmp_critical_name lock = { 0 };
5668  //
5669  // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5670  // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5671  // ...
5672  // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5673  // *(Type<n>-1*)rhs[<n>-1]);
5674  // }
5675  //
5676  // ...
5677  // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5678  // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5679  // RedList, reduce_func, &<lock>)) {
5680  // case 1:
5681  // ...
5682  // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5683  // ...
5684  // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5685  // break;
5686  // case 2:
5687  // ...
5688  // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5689  // ...
5690  // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5691  // break;
5692  // default:;
5693  // }
5694  //
5695  // if SimpleReduction is true, only the next code is generated:
5696  // ...
5697  // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5698  // ...
5699 
5700  ASTContext &C = CGM.getContext();
5701 
5702  if (SimpleReduction) {
5704  auto IPriv = Privates.begin();
5705  auto ILHS = LHSExprs.begin();
5706  auto IRHS = RHSExprs.begin();
5707  for (const Expr *E : ReductionOps) {
5708  emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5709  cast<DeclRefExpr>(*IRHS));
5710  ++IPriv;
5711  ++ILHS;
5712  ++IRHS;
5713  }
5714  return;
5715  }
5716 
5717  // 1. Build a list of reduction variables.
5718  // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5719  auto Size = RHSExprs.size();
5720  for (const Expr *E : Privates) {
5721  if (E->getType()->isVariablyModifiedType())
5722  // Reserve place for array size.
5723  ++Size;
5724  }
5725  llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5726  QualType ReductionArrayTy =
5727  C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5728  /*IndexTypeQuals=*/0);
5729  Address ReductionList =
5730  CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5731  auto IPriv = Privates.begin();
5732  unsigned Idx = 0;
5733  for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5734  Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5735  CGF.Builder.CreateStore(
5737  CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5738  Elem);
5739  if ((*IPriv)->getType()->isVariablyModifiedType()) {
5740  // Store array size.
5741  ++Idx;
5742  Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5743  llvm::Value *Size = CGF.Builder.CreateIntCast(
5744  CGF.getVLASize(
5745  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5746  .NumElts,
5747  CGF.SizeTy, /*isSigned=*/false);
5748  CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5749  Elem);
5750  }
5751  }
5752 
5753  // 2. Emit reduce_func().
5754  llvm::Function *ReductionFn = emitReductionFunction(
5755  Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5756  LHSExprs, RHSExprs, ReductionOps);
5757 
5758  // 3. Create static kmp_critical_name lock = { 0 };
5759  std::string Name = getName({"reduction"});
5760  llvm::Value *Lock = getCriticalRegionLock(Name);
5761 
5762  // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5763  // RedList, reduce_func, &<lock>);
5764  llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5765  llvm::Value *ThreadId = getThreadID(CGF, Loc);
5766  llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5768  ReductionList.getPointer(), CGF.VoidPtrTy);
5769  llvm::Value *Args[] = {
5770  IdentTLoc, // ident_t *<loc>
5771  ThreadId, // i32 <gtid>
5772  CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5773  ReductionArrayTySize, // size_type sizeof(RedList)
5774  RL, // void *RedList
5775  ReductionFn, // void (*) (void *, void *) <reduce_func>
5776  Lock // kmp_critical_name *&<lock>
5777  };
5778  llvm::Value *Res = CGF.EmitRuntimeCall(
5781  Args);
5782 
5783  // 5. Build switch(res)
5784  llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5785  llvm::SwitchInst *SwInst =
5786  CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5787 
5788  // 6. Build case 1:
5789  // ...
5790  // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5791  // ...
5792  // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5793  // break;
5794  llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5795  SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5796  CGF.EmitBlock(Case1BB);
5797 
5798  // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5799  llvm::Value *EndArgs[] = {
5800  IdentTLoc, // ident_t *<loc>
5801  ThreadId, // i32 <gtid>
5802  Lock // kmp_critical_name *&<lock>
5803  };
5804  auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5805  CodeGenFunction &CGF, PrePostActionTy &Action) {
5806  CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5807  auto IPriv = Privates.begin();
5808  auto ILHS = LHSExprs.begin();
5809  auto IRHS = RHSExprs.begin();
5810  for (const Expr *E : ReductionOps) {
5811  RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5812  cast<DeclRefExpr>(*IRHS));
5813  ++IPriv;
5814  ++ILHS;
5815  ++IRHS;
5816  }
5817  };
5818  RegionCodeGenTy RCG(CodeGen);
5819  CommonActionTy Action(
5820  nullptr, llvm::None,
5823  EndArgs);
5824  RCG.setAction(Action);
5825  RCG(CGF);
5826 
5827  CGF.EmitBranch(DefaultBB);
5828 
5829  // 7. Build case 2:
5830  // ...
5831  // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5832  // ...
5833  // break;
5834  llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5835  SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5836  CGF.EmitBlock(Case2BB);
5837 
5838  auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5839  CodeGenFunction &CGF, PrePostActionTy &Action) {
5840  auto ILHS = LHSExprs.begin();
5841  auto IRHS = RHSExprs.begin();
5842  auto IPriv = Privates.begin();
5843  for (const Expr *E : ReductionOps) {
5844  const Expr *XExpr = nullptr;
5845  const Expr *EExpr = nullptr;
5846  const Expr *UpExpr = nullptr;
5847  BinaryOperatorKind BO = BO_Comma;
5848  if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5849  if (BO->getOpcode() == BO_Assign) {
5850  XExpr = BO->getLHS();
5851  UpExpr = BO->getRHS();
5852  }
5853  }
5854  // Try to emit update expression as a simple atomic.
5855  const Expr *RHSExpr = UpExpr;
5856  if (RHSExpr) {
5857  // Analyze RHS part of the whole expression.
5858  if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5859  RHSExpr->IgnoreParenImpCasts())) {
5860  // If this is a conditional operator, analyze its condition for
5861  // min/max reduction operator.
5862  RHSExpr = ACO->getCond();
5863  }
5864  if (const auto *BORHS =
5865  dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5866  EExpr = BORHS->getRHS();
5867  BO = BORHS->getOpcode();
5868  }
5869  }
5870  if (XExpr) {
5871  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5872  auto &&AtomicRedGen = [BO, VD,
5873  Loc](CodeGenFunction &CGF, const Expr *XExpr,
5874  const Expr *EExpr, const Expr *UpExpr) {
5875  LValue X = CGF.EmitLValue(XExpr);
5876  RValue E;
5877  if (EExpr)
5878  E = CGF.EmitAnyExpr(EExpr);
5879  CGF.EmitOMPAtomicSimpleUpdateExpr(
5880  X, E, BO, /*IsXLHSInRHSPart=*/true,
5881  llvm::AtomicOrdering::Monotonic, Loc,
5882  [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5883  CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5884  PrivateScope.addPrivate(
5885  VD, [&CGF, VD, XRValue, Loc]() {
5886  Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5887  CGF.emitOMPSimpleStore(
5888  CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5889  VD->getType().getNonReferenceType(), Loc);
5890  return LHSTemp;
5891  });
5892  (void)PrivateScope.Privatize();
5893  return CGF.EmitAnyExpr(UpExpr);
5894  });
5895  };
5896  if ((*IPriv)->getType()->isArrayType()) {
5897  // Emit atomic reduction for array section.
5898  const auto *RHSVar =
5899  cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5900  EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5901  AtomicRedGen, XExpr, EExpr, UpExpr);
5902  } else {
5903  // Emit atomic reduction for array subscript or single variable.
5904  AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5905  }
5906  } else {
5907  // Emit as a critical region.
5908  auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5909  const Expr *, const Expr *) {
5910  CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5911  std::string Name = RT.getName({"atomic_reduction"});
5912  RT.emitCriticalRegion(
5913  CGF, Name,
5914  [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5915  Action.Enter(CGF);
5916  emitReductionCombiner(CGF, E);
5917  },
5918  Loc);
5919  };
5920  if ((*IPriv)->getType()->isArrayType()) {
5921  const auto *LHSVar =
5922  cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5923  const auto *RHSVar =
5924  cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5925  EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5926  CritRedGen);
5927  } else {
5928  CritRedGen(CGF, nullptr, nullptr, nullptr);
5929  }
5930  }
5931  ++ILHS;
5932  ++IRHS;
5933  ++IPriv;
5934  }
5935  };
5936  RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5937  if (!WithNowait) {
5938  // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5939  llvm::Value *EndArgs[] = {
5940  IdentTLoc, // ident_t *<loc>
5941  ThreadId, // i32 <gtid>
5942  Lock // kmp_critical_name *&<lock>
5943  };
5944  CommonActionTy Action(nullptr, llvm::None,
5945  createRuntimeFunction(OMPRTL__kmpc_end_reduce),
5946  EndArgs);
5947  AtomicRCG.setAction(Action);
5948  AtomicRCG(CGF);
5949  } else {
5950  AtomicRCG(CGF);
5951  }
5952 
5953  CGF.EmitBranch(DefaultBB);
5954  CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5955 }
5956 
5957 /// Generates unique name for artificial threadprivate variables.
5958 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5959 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5960  const Expr *Ref) {
5961  SmallString<256> Buffer;
5962  llvm::raw_svector_ostream Out(Buffer);
5963  const clang::DeclRefExpr *DE;
5964  const VarDecl *D = ::getBaseDecl(Ref, DE);
5965  if (!D)
5966  D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5967  D = D->getCanonicalDecl();
5968  std::string Name = CGM.getOpenMPRuntime().getName(
5969  {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5970  Out << Prefix << Name << "_"
5971  << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5972  return Out.str();
5973 }
5974 
5975 /// Emits reduction initializer function:
5976 /// \code
5977 /// void @.red_init(void* %arg) {
5978 /// %0 = bitcast void* %arg to <type>*
5979 /// store <type> <init>, <type>* %0
5980 /// ret void
5981 /// }
5982 /// \endcode
5984  SourceLocation Loc,
5985  ReductionCodeGen &RCG, unsigned N) {
5986  ASTContext &C = CGM.getContext();
5987  FunctionArgList Args;
5988  ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5990  Args.emplace_back(&Param);
5991  const auto &FnInfo =
5993  llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5994  std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5996  Name, &CGM.getModule());
5997  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5998  Fn->setDoesNotRecurse();
5999  CodeGenFunction CGF(CGM);
6000  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6001  Address PrivateAddr = CGF.EmitLoadOfPointer(
6002  CGF.GetAddrOfLocalVar(&Param),
6003  C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6004  llvm::Value *Size = nullptr;
6005  // If the size of the reduction item is non-constant, load it from global
6006  // threadprivate variable.
6007  if (RCG.getSizes(N).second) {
6008  Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6009  CGF, CGM.getContext().getSizeType(),
6010  generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6011  Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6012  CGM.getContext().getSizeType(), Loc);
6013  }
6014  RCG.emitAggregateType(CGF, N, Size);
6015  LValue SharedLVal;
6016  // If initializer uses initializer from declare reduction construct, emit a
6017  // pointer to the address of the original reduction item (reuired by reduction
6018  // initializer)
6019  if (RCG.usesReductionInitializer(N)) {
6020  Address SharedAddr =
6021  CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6022  CGF, CGM.getContext().VoidPtrTy,
6023  generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6024  SharedAddr = CGF.EmitLoadOfPointer(
6025  SharedAddr,
6026  CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
6027  SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
6028  } else {
6029  SharedLVal = CGF.MakeNaturalAlignAddrLValue(
6030  llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
6031  CGM.getContext().VoidPtrTy);
6032  }
6033  // Emit the initializer:
6034  // %0 = bitcast void* %arg to <type>*
6035  // store <type> <init>, <type>* %0
6036  RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal,
6037  [](CodeGenFunction &) { return false; });
6038  CGF.FinishFunction();
6039  return Fn;
6040 }
6041 
6042 /// Emits reduction combiner function:
6043 /// \code
6044 /// void @.red_comb(void* %arg0, void* %arg1) {
6045 /// %lhs = bitcast void* %arg0 to <type>*
6046 /// %rhs = bitcast void* %arg1 to <type>*
6047 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
6048 /// store <type> %2, <type>* %lhs
6049 /// ret void
6050 /// }
6051 /// \endcode
6053  SourceLocation Loc,
6054  ReductionCodeGen &RCG, unsigned N,
6055  const Expr *ReductionOp,
6056  const Expr *LHS, const Expr *RHS,
6057  const Expr *PrivateRef) {
6058  ASTContext &C = CGM.getContext();
6059  const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
6060  const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
6061  FunctionArgList Args;
6062  ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
6064  ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6066  Args.emplace_back(&ParamInOut);
6067  Args.emplace_back(&ParamIn);
6068  const auto &FnInfo =
6070  llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6071  std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
6073  Name, &CGM.getModule());
6074  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6075  Fn->setDoesNotRecurse();
6076  CodeGenFunction CGF(CGM);
6077  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6078  llvm::Value *Size = nullptr;
6079  // If the size of the reduction item is non-constant, load it from global
6080  // threadprivate variable.
6081  if (RCG.getSizes(N).second) {
6082  Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6083  CGF, CGM.getContext().getSizeType(),
6084  generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6085  Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6086  CGM.getContext().getSizeType(), Loc);
6087  }
6088  RCG.emitAggregateType(CGF, N, Size);
6089  // Remap lhs and rhs variables to the addresses of the function arguments.
6090  // %lhs = bitcast void* %arg0 to <type>*
6091  // %rhs = bitcast void* %arg1 to <type>*
6092  CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6093  PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
6094  // Pull out the pointer to the variable.
6095  Address PtrAddr = CGF.EmitLoadOfPointer(
6096  CGF.GetAddrOfLocalVar(&ParamInOut),
6097  C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6098  return CGF.Builder.CreateElementBitCast(
6099  PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
6100  });
6101  PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
6102  // Pull out the pointer to the variable.
6103  Address PtrAddr = CGF.EmitLoadOfPointer(
6104  CGF.GetAddrOfLocalVar(&ParamIn),
6105  C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6106  return CGF.Builder.CreateElementBitCast(
6107  PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
6108  });
6109  PrivateScope.Privatize();
6110  // Emit the combiner body:
6111  // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
6112  // store <type> %2, <type>* %lhs
6113  CGM.getOpenMPRuntime().emitSingleReductionCombiner(
6114  CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
6115  cast<DeclRefExpr>(RHS));
6116  CGF.FinishFunction();
6117  return Fn;
6118 }
6119 
6120 /// Emits reduction finalizer function:
6121 /// \code
6122 /// void @.red_fini(void* %arg) {
6123 /// %0 = bitcast void* %arg to <type>*
6124 /// <destroy>(<type>* %0)
6125 /// ret void
6126 /// }
6127 /// \endcode
6129  SourceLocation Loc,
6130  ReductionCodeGen &RCG, unsigned N) {
6131  if (!RCG.needCleanups(N))
6132  return nullptr;
6133  ASTContext &C = CGM.getContext();
6134  FunctionArgList Args;
6135  ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6137  Args.emplace_back(&Param);
6138  const auto &FnInfo =
6140  llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6141  std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6143  Name, &CGM.getModule());
6144  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6145  Fn->setDoesNotRecurse();
6146  CodeGenFunction CGF(CGM);
6147  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6148  Address PrivateAddr = CGF.EmitLoadOfPointer(
6149  CGF.GetAddrOfLocalVar(&Param),
6150  C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6151  llvm::Value *Size = nullptr;
6152  // If the size of the reduction item is non-constant, load it from global
6153  // threadprivate variable.
6154  if (RCG.getSizes(N).second) {
6155  Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6156  CGF, CGM.getContext().getSizeType(),
6157  generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6158  Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6159  CGM.getContext().getSizeType(), Loc);
6160  }
6161  RCG.emitAggregateType(CGF, N, Size);
6162  // Emit the finalizer body:
6163  // <destroy>(<type>* %0)
6164  RCG.emitCleanups(CGF, N, PrivateAddr);
6165  CGF.FinishFunction(Loc);
6166  return Fn;
6167 }
6168 
6170  CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6171  ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6172  if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6173  return nullptr;
6174 
6175  // Build typedef struct:
6176  // kmp_task_red_input {
6177  // void *reduce_shar; // shared reduction item
6178  // size_t reduce_size; // size of data item
6179  // void *reduce_init; // data initialization routine
6180  // void *reduce_fini; // data finalization routine
6181  // void *reduce_comb; // data combiner routine
6182  // kmp_task_red_flags_t flags; // flags for additional info from compiler
6183  // } kmp_task_red_input_t;
6184  ASTContext &C = CGM.getContext();
6185  RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t");
6186  RD->startDefinition();
6187  const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6188  const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6189  const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6190  const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6191  const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6192  const FieldDecl *FlagsFD = addFieldToRecordDecl(
6193  C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6194  RD->completeDefinition();
6195  QualType RDType = C.getRecordType(RD);
6196  unsigned Size = Data.ReductionVars.size();
6197  llvm::APInt ArraySize(/*numBits=*/64, Size);
6198  QualType ArrayRDType = C.getConstantArrayType(
6199  RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6200  // kmp_task_red_input_t .rd_input.[Size];
6201  Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6203  Data.ReductionOps);
6204  for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6205  // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6206  llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6207  llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6209  TaskRedInput.getPointer(), Idxs,
6210  /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6211  ".rd_input.gep.");
6212  LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6213  // ElemLVal.reduce_shar = &Shareds[Cnt];
6214  LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6215  RCG.emitSharedLValue(CGF, Cnt);
6216  llvm::Value *CastedShared =
6217  CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6218  CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6219  RCG.emitAggregateType(CGF, Cnt);
6220  llvm::Value *SizeValInChars;
6221  llvm::Value *SizeVal;
6222  std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6223  // We use delayed creation/initialization for VLAs, array sections and
6224  // custom reduction initializations. It is required because runtime does not
6225  // provide the way to pass the sizes of VLAs/array sections to
6226  // initializer/combiner/finalizer functions and does not pass the pointer to
6227  // original reduction item to the initializer. Instead threadprivate global
6228  // variables are used to store these values and use them in the functions.
6229  bool DelayedCreation = !!SizeVal;
6230  SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6231  /*isSigned=*/false);
6232  LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6233  CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6234  // ElemLVal.reduce_init = init;
6235  LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6236  llvm::Value *InitAddr =
6237  CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6238  CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6239  DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt);
6240  // ElemLVal.reduce_fini = fini;
6241  LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6242  llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6243  llvm::Value *FiniAddr = Fini
6244  ? CGF.EmitCastToVoidPtr(Fini)
6245  : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6246  CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6247  // ElemLVal.reduce_comb = comb;
6248  LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6250  CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6251  RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6252  CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6253  // ElemLVal.flags = 0;
6254  LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6255  if (DelayedCreation) {
6256  CGF.EmitStoreOfScalar(
6257  llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6258  FlagsLVal);
6259  } else
6260  CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6261  FlagsLVal.getType());
6262  }
6263  // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void
6264  // *data);
6265  llvm::Value *Args[] = {
6266  CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6267  /*isSigned=*/true),
6268  llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6270  CGM.VoidPtrTy)};
6271  return CGF.EmitRuntimeCall(
6273 }
6274 
6276  SourceLocation Loc,
6277  ReductionCodeGen &RCG,
6278  unsigned N) {
6279  auto Sizes = RCG.getSizes(N);
6280  // Emit threadprivate global variable if the type is non-constant
6281  // (Sizes.second = nullptr).
6282  if (Sizes.second) {
6283  llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6284  /*isSigned=*/false);
6286  CGF, CGM.getContext().getSizeType(),
6287  generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6288  CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6289  }
6290  // Store address of the original reduction item if custom initializer is used.
6291  if (RCG.usesReductionInitializer(N)) {
6293  CGF, CGM.getContext().VoidPtrTy,
6294  generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6295  CGF.Builder.CreateStore(
6297  RCG.getSharedLValue(N).getPointer(CGF), CGM.VoidPtrTy),
6298  SharedAddr, /*IsVolatile=*/false);
6299  }
6300 }
6301 
6303  SourceLocation Loc,
6304  llvm::Value *ReductionsPtr,
6305  LValue SharedLVal) {
6306  // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6307  // *d);
6308  llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6309  CGM.IntTy,
6310  /*isSigned=*/true),
6311  ReductionsPtr,
6313  SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6314  return Address(
6315  CGF.EmitRuntimeCall(
6317  SharedLVal.getAlignment());
6318 }
6319 
6321  SourceLocation Loc) {
6322  if (!CGF.HaveInsertPoint())
6323  return;
6324  // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6325  // global_tid);
6326  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6327  // Ignore return result until untied tasks are supported.
6329  if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6330  Region->emitUntiedSwitch(CGF);
6331 }
6332 
6334  OpenMPDirectiveKind InnerKind,
6335  const RegionCodeGenTy &CodeGen,
6336  bool HasCancel) {
6337  if (!CGF.HaveInsertPoint())
6338  return;
6339  InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
6340  CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6341 }
6342 
6343 namespace {
6345  CancelNoreq = 0,
6346  CancelParallel = 1,
6347  CancelLoop = 2,
6348  CancelSections = 3,
6349  CancelTaskgroup = 4
6350 };
6351 } // anonymous namespace
6352 
6354  RTCancelKind CancelKind = CancelNoreq;
6355  if (CancelRegion == OMPD_parallel)
6356  CancelKind = CancelParallel;
6357  else if (CancelRegion == OMPD_for)
6358  CancelKind = CancelLoop;
6359  else if (CancelRegion == OMPD_sections)
6360  CancelKind = CancelSections;
6361  else {
6362  assert(CancelRegion == OMPD_taskgroup);
6363  CancelKind = CancelTaskgroup;
6364  }
6365  return CancelKind;
6366 }
6367 
6369  CodeGenFunction &CGF, SourceLocation Loc,
6370  OpenMPDirectiveKind CancelRegion) {
6371  if (!CGF.HaveInsertPoint())
6372  return;
6373  // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6374  // global_tid, kmp_int32 cncl_kind);
6375  if (auto *OMPRegionInfo =
6376  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6377  // For 'cancellation point taskgroup', the task region info may not have a
6378  // cancel. This may instead happen in another adjacent task.
6379  if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6380  llvm::Value *Args[] = {
6381  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6382  CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6383  // Ignore return result until untied tasks are supported.
6384  llvm::Value *Result = CGF.EmitRuntimeCall(
6386  // if (__kmpc_cancellationpoint()) {
6387  // exit from construct;
6388  // }
6389  llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6390  llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6391  llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6392  CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6393  CGF.EmitBlock(ExitBB);
6394  // exit from construct;
6395  CodeGenFunction::JumpDest CancelDest =
6396  CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6397  CGF.EmitBranchThroughCleanup(CancelDest);
6398  CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6399  }
6400  }
6401 }
6402 
6404  const Expr *IfCond,
6405  OpenMPDirectiveKind CancelRegion) {
6406  if (!CGF.HaveInsertPoint())
6407  return;
6408  // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6409  // kmp_int32 cncl_kind);
6410  if (auto *OMPRegionInfo =
6411  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6412  auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF,
6413  PrePostActionTy &) {
6414  CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6415  llvm::Value *Args[] = {
6416  RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6417  CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6418  // Ignore return result until untied tasks are supported.
6419  llvm::Value *Result = CGF.EmitRuntimeCall(
6420  RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
6421  // if (__kmpc_cancel()) {
6422  // exit from construct;
6423  // }
6424  llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6425  llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6426  llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6427  CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6428  CGF.EmitBlock(ExitBB);
6429  // exit from construct;
6430  CodeGenFunction::JumpDest CancelDest =
6431  CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6432  CGF.EmitBranchThroughCleanup(CancelDest);
6433  CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6434  };
6435  if (IfCond) {
6436  emitIfClause(CGF, IfCond, ThenGen,
6437  [](CodeGenFunction &, PrePostActionTy &) {});
6438  } else {
6439  RegionCodeGenTy ThenRCG(ThenGen);
6440  ThenRCG(CGF);
6441  }
6442  }
6443 }
6444 
6446  const OMPExecutableDirective &D, StringRef ParentName,
6447  llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6448  bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6449  assert(!ParentName.empty() && "Invalid target region parent name!");
6450  HasEmittedTargetRegion = true;
6451  emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6452  IsOffloadEntry, CodeGen);
6453 }
6454 
6456  const OMPExecutableDirective &D, StringRef ParentName,
6457  llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6458  bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6459  // Create a unique name for the entry function using the source location
6460  // information of the current target region. The name will be something like:
6461  //
6462  // __omp_offloading_DD_FFFF_PP_lBB
6463  //
6464  // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6465  // mangled name of the function that encloses the target region and BB is the
6466  // line number of the target region.
6467 
6468  unsigned DeviceID;
6469  unsigned FileID;
6470  unsigned Line;
6471  getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6472  Line);
6473  SmallString<64> EntryFnName;
6474  {
6475  llvm::raw_svector_ostream OS(EntryFnName);
6476  OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6477  << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6478  }
6479 
6480  const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6481 
6482  CodeGenFunction CGF(CGM, true);
6483  CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6484  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6485 
6486  OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS);
6487 
6488  // If this target outline function is not an offload entry, we don't need to
6489  // register it.
6490  if (!IsOffloadEntry)
6491  return;
6492 
6493  // The target region ID is used by the runtime library to identify the current
6494  // target region, so it only has to be unique and not necessarily point to
6495  // anything. It could be the pointer to the outlined function that implements
6496  // the target region, but we aren't using that so that the compiler doesn't
6497  // need to keep that, and could therefore inline the host function if proven
6498  // worthwhile during optimization. In the other hand, if emitting code for the
6499  // device, the ID has to be the function address so that it can retrieved from
6500  // the offloading entry and launched by the runtime library. We also mark the
6501  // outlined function to have external linkage in case we are emitting code for
6502  // the device, because these functions will be entry points to the device.
6503 
6504  if (CGM.getLangOpts().OpenMPIsDevice) {
6505  OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6506  OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6507  OutlinedFn->setDSOLocal(false);
6508  } else {
6509  std::string Name = getName({EntryFnName, "region_id"});
6510  OutlinedFnID = new llvm::GlobalVariable(
6511  CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6512  llvm::GlobalValue::WeakAnyLinkage,
6513  llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6514  }
6515 
6516  // Register the information for the entry associated with this target region.
6518  DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6520 }
6521 
6522 /// Checks if the expression is constant or does not have non-trivial function
6523 /// calls.
6524 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6525  // We can skip constant expressions.
6526  // We can skip expressions with trivial calls or simple expressions.
6528  !E->hasNonTrivialCall(Ctx)) &&
6529  !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6530 }
6531 
6533  const Stmt *Body) {
6534  const Stmt *Child = Body->IgnoreContainers();
6535  while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6536  Child = nullptr;
6537  for (const Stmt *S : C->body()) {
6538  if (const auto *E = dyn_cast<Expr>(S)) {
6539  if (isTrivial(Ctx, E))
6540  continue;
6541  }
6542  // Some of the statements can be ignored.
6543  if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6544  isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6545  continue;
6546  // Analyze declarations.
6547  if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6548  if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
6549  if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6550  isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6551  isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6552  isa<UsingDirectiveDecl>(D) ||
6553  isa<OMPDeclareReductionDecl>(D) ||
6554  isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6555  return true;
6556  const auto *VD = dyn_cast<VarDecl>(D);
6557  if (!VD)
6558  return false;
6559  return VD->isConstexpr() ||
6560  ((VD->getType().isTrivialType(Ctx) ||
6561  VD->getType()->isReferenceType()) &&
6562  (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
6563  }))
6564  continue;
6565  }
6566  // Found multiple children - cannot get the one child only.
6567  if (Child)
6568  return nullptr;
6569  Child = S;
6570  }
6571  if (Child)
6572  Child = Child->IgnoreContainers();
6573  }
6574  return Child;
6575 }
6576 
6577 /// Emit the number of teams for a target directive. Inspect the num_teams
6578 /// clause associated with a teams construct combined or closely nested
6579 /// with the target directive.
6580 ///
6581 /// Emit a team of size one for directives such as 'target parallel' that
6582 /// have no associated teams construct.
6583 ///
6584 /// Otherwise, return nullptr.
6585 static llvm::Value *
6587  const OMPExecutableDirective &D) {
6588  assert(!CGF.getLangOpts().OpenMPIsDevice &&
6589  "Clauses associated with the teams directive expected to be emitted "
6590  "only for the host!");
6591  OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6592  assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6593  "Expected target-based executable directive.");
6594  CGBuilderTy &Bld = CGF.Builder;
6595  switch (DirectiveKind) {
6596  case OMPD_target: {
6597  const auto *CS = D.getInnermostCapturedStmt();
6598  const auto *Body =
6599  CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6600  const Stmt *ChildStmt =
6602  if (const auto *NestedDir =
6603  dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6604  if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6605  if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6606  CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6607  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6608  const Expr *NumTeams =
6609  NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6610  llvm::Value *NumTeamsVal =
6611  CGF.EmitScalarExpr(NumTeams,
6612  /*IgnoreResultAssign*/ true);
6613  return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6614  /*isSigned=*/true);
6615  }
6616  return Bld.getInt32(0);
6617  }
6618  if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6619  isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6620  return Bld.getInt32(1);
6621  return Bld.getInt32(0);
6622  }
6623  return nullptr;
6624  }
6625  case OMPD_target_teams:
6626  case OMPD_target_teams_distribute:
6627  case OMPD_target_teams_distribute_simd:
6628  case OMPD_target_teams_distribute_parallel_for:
6629  case OMPD_target_teams_distribute_parallel_for_simd: {
6631  CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6632  const Expr *NumTeams =
6633  D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6634  llvm::Value *NumTeamsVal =
6635  CGF.EmitScalarExpr(NumTeams,
6636  /*IgnoreResultAssign*/ true);
6637  return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6638  /*isSigned=*/true);
6639  }
6640  return Bld.getInt32(0);
6641  }
6642  case OMPD_target_parallel:
6643  case OMPD_target_parallel_for:
6644  case OMPD_target_parallel_for_simd:
6645  case OMPD_target_simd:
6646  return Bld.getInt32(1);
6647  case OMPD_parallel:
6648  case OMPD_for:
6649  case OMPD_parallel_for:
6650  case OMPD_parallel_master:
6651  case OMPD_parallel_sections:
6652  case OMPD_for_simd:
6653  case OMPD_parallel_for_simd:
6654  case OMPD_cancel:
6655  case OMPD_cancellation_point:
6656  case OMPD_ordered:
6657  case OMPD_threadprivate:
6658  case OMPD_allocate:
6659  case OMPD_task:
6660  case OMPD_simd:
6661  case OMPD_sections:
6662  case OMPD_section:
6663  case OMPD_single:
6664  case OMPD_master:
6665  case OMPD_critical:
6666  case OMPD_taskyield:
6667  case OMPD_barrier:
6668  case OMPD_taskwait:
6669  case OMPD_taskgroup:
6670  case OMPD_atomic:
6671  case OMPD_flush:
6672  case OMPD_teams:
6673  case OMPD_target_data:
6674  case OMPD_target_exit_data:
6675  case OMPD_target_enter_data:
6676  case OMPD_distribute:
6677  case OMPD_distribute_simd:
6678  case OMPD_distribute_parallel_for:
6679  case OMPD_distribute_parallel_for_simd:
6680  case OMPD_teams_distribute:
6681  case OMPD_teams_distribute_simd:
6682  case OMPD_teams_distribute_parallel_for:
6683  case OMPD_teams_distribute_parallel_for_simd:
6684  case OMPD_target_update:
6685  case OMPD_declare_simd:
6686  case OMPD_declare_variant:
6687  case OMPD_declare_target:
6688  case OMPD_end_declare_target:
6689  case OMPD_declare_reduction:
6690  case OMPD_declare_mapper:
6691  case OMPD_taskloop:
6692  case OMPD_taskloop_simd:
6693  case OMPD_master_taskloop:
6694  case OMPD_master_taskloop_simd:
6695  case OMPD_parallel_master_taskloop:
6696  case OMPD_parallel_master_taskloop_simd:
6697  case OMPD_requires:
6698  case OMPD_unknown:
6699  break;
6700  }
6701  llvm_unreachable("Unexpected directive kind.");
6702 }
6703 
6705  llvm::Value *DefaultThreadLimitVal) {
6707  CGF.getContext(), CS->getCapturedStmt());
6708  if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6709  if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6710  llvm::Value *NumThreads = nullptr;
6711  llvm::Value *CondVal = nullptr;
6712  // Handle if clause. If if clause present, the number of threads is
6713  // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6714  if (Dir->hasClausesOfKind<OMPIfClause>()) {
6715  CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6716  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6717  const OMPIfClause *IfClause = nullptr;
6718  for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6719  if (C->getNameModifier() == OMPD_unknown ||
6720  C->getNameModifier() == OMPD_parallel) {
6721  IfClause = C;
6722  break;
6723  }
6724  }
6725  if (IfClause) {
6726  const Expr *Cond = IfClause->getCondition();
6727  bool Result;
6728  if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6729  if (!Result)
6730  return CGF.Builder.getInt32(1);
6731  } else {
6733  if (const auto *PreInit =
6734  cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6735  for (const auto *I : PreInit->decls()) {
6736  if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6737  CGF.EmitVarDecl(cast<VarDecl>(*I));
6738  } else {
6740  CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6741  CGF.EmitAutoVarCleanups(Emission);
6742  }
6743  }
6744  }
6745  CondVal = CGF.EvaluateExprAsBool(Cond);
6746  }
6747  }
6748  }
6749  // Check the value of num_threads clause iff if clause was not specified
6750  // or is not evaluated to false.
6751  if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6752  CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6753  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6754  const auto *NumThreadsClause =
6755  Dir->getSingleClause<OMPNumThreadsClause>();
6757  CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6758  if (const auto *PreInit =
6759  cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6760  for (const auto *I : PreInit->decls()) {
6761  if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6762  CGF.EmitVarDecl(cast<VarDecl>(*I));
6763  } else {
6765  CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6766  CGF.EmitAutoVarCleanups(Emission);
6767  }
6768  }
6769  }
6770  NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6771  NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6772  /*isSigned=*/false);
6773  if (DefaultThreadLimitVal)
6774  NumThreads = CGF.Builder.CreateSelect(
6775  CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6776  DefaultThreadLimitVal, NumThreads);
6777  } else {
6778  NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6779  : CGF.Builder.getInt32(0);
6780  }
6781  // Process condition of the if clause.
6782  if (CondVal) {
6783  NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6784  CGF.Builder.getInt32(1));
6785  }
6786  return NumThreads;
6787  }
6788  if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6789  return CGF.Builder.getInt32(1);
6790  return DefaultThreadLimitVal;
6791  }
6792  return DefaultThreadLimitVal ? DefaultThreadLimitVal
6793  : CGF.Builder.getInt32(0);
6794 }
6795 
6796 /// Emit the number of threads for a target directive. Inspect the
6797 /// thread_limit clause associated with a teams construct combined or closely
6798 /// nested with the target directive.
6799 ///
6800 /// Emit the num_threads clause for directives such as 'target parallel' that
6801 /// have no associated teams construct.
6802 ///
6803 /// Otherwise, return nullptr.
6804 static llvm::Value *
6806  const OMPExecutableDirective &D) {
6807  assert(!CGF.getLangOpts().OpenMPIsDevice &&
6808  "Clauses associated with the teams directive expected to be emitted "
6809  "only for the host!");
6810  OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6811  assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6812  "Expected target-based executable directive.");
6813  CGBuilderTy &Bld = CGF.Builder;
6814  llvm::Value *ThreadLimitVal = nullptr;
6815  llvm::Value *NumThreadsVal = nullptr;
6816  switch (DirectiveKind) {
6817  case OMPD_target: {
6818  const CapturedStmt *CS = D.getInnermostCapturedStmt();
6819  if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6820  return NumThreads;
6822  CGF.getContext(), CS->getCapturedStmt());
6823  if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6824  if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6825  CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6826  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6827  const auto *ThreadLimitClause =
6828  Dir->getSingleClause<OMPThreadLimitClause>();
6830  CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6831  if (const auto *PreInit =
6832  cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6833  for (const auto *I : PreInit->decls()) {
6834  if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6835  CGF.EmitVarDecl(cast<VarDecl>(*I));
6836  } else {
6838  CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6839  CGF.EmitAutoVarCleanups(Emission);
6840  }
6841  }
6842  }
6843  llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6844  ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6845  ThreadLimitVal =
6846  Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6847  }
6848  if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6849  !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6850  CS = Dir->getInnermostCapturedStmt();
6852  CGF.getContext(), CS->getCapturedStmt());
6853  Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6854  }
6855  if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6856  !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6857  CS = Dir->getInnermostCapturedStmt();
6858  if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6859  return NumThreads;
6860  }
6861  if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6862  return Bld.getInt32(1);
6863  }
6864  return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6865  }
6866  case OMPD_target_teams: {
6868  CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6869  const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6870  llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6871  ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6872  ThreadLimitVal =
6873  Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6874  }
6875  const CapturedStmt *CS = D.getInnermostCapturedStmt();
6876  if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6877  return NumThreads;
6879  CGF.getContext(), CS->getCapturedStmt());
6880  if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6881  if (Dir->getDirectiveKind() == OMPD_distribute) {
6882  CS = Dir->getInnermostCapturedStmt();
6883  if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6884  return NumThreads;
6885  }
6886  }
6887  return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6888  }
6889  case OMPD_target_teams_distribute:
6891  CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6892  const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6893  llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6894  ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6895  ThreadLimitVal =
6896  Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6897  }
6898  return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
6899  case OMPD_target_parallel:
6900  case OMPD_target_parallel_for:
6901  case OMPD_target_parallel_for_simd:
6902  case OMPD_target_teams_distribute_parallel_for:
6903  case OMPD_target_teams_distribute_parallel_for_simd: {
6904  llvm::Value *CondVal = nullptr;
6905  // Handle if clause. If if clause present, the number of threads is
6906  // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6907  if (D.hasClausesOfKind<OMPIfClause>()) {
6908  const OMPIfClause *IfClause = nullptr;
6909  for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6910  if (C->getNameModifier() == OMPD_unknown ||
6911  C->getNameModifier() == OMPD_parallel) {
6912  IfClause = C;
6913  break;
6914  }
6915  }
6916  if (IfClause) {
6917  const Expr *Cond = IfClause->getCondition();
6918  bool Result;
6919  if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6920  if (!Result)
6921  return Bld.getInt32(1);
6922  } else {
6924  CondVal = CGF.EvaluateExprAsBool(Cond);
6925  }
6926  }
6927  }
6929  CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6930  const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6931  llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6932  ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6933  ThreadLimitVal =
6934  Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6935  }
6937  CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6938  const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6939  llvm::Value *NumThreads = CGF.EmitScalarExpr(
6940  NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
6941  NumThreadsVal =
6942  Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
6943  ThreadLimitVal = ThreadLimitVal
6944  ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
6945  ThreadLimitVal),
6946  NumThreadsVal, ThreadLimitVal)
6947  : NumThreadsVal;
6948  }
6949  if (!ThreadLimitVal)
6950  ThreadLimitVal = Bld.getInt32(0);
6951  if (CondVal)
6952  return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
6953  return ThreadLimitVal;
6954  }
6955  case OMPD_target_teams_distribute_simd:
6956  case OMPD_target_simd:
6957  return Bld.getInt32(1);
6958  case OMPD_parallel:
6959  case OMPD_for:
6960  case OMPD_parallel_for:
6961  case OMPD_parallel_master:
6962  case OMPD_parallel_sections:
6963  case OMPD_for_simd:
6964  case OMPD_parallel_for_simd:
6965  case OMPD_cancel:
6966  case OMPD_cancellation_point:
6967  case OMPD_ordered:
6968  case OMPD_threadprivate:
6969  case OMPD_allocate:
6970  case OMPD_task:
6971  case OMPD_simd:
6972  case OMPD_sections:
6973  case OMPD_section:
6974  case OMPD_single:
6975  case OMPD_master:
6976  case OMPD_critical:
6977  case OMPD_taskyield:
6978  case OMPD_barrier:
6979  case OMPD_taskwait:
6980  case OMPD_taskgroup:
6981  case OMPD_atomic:
6982  case OMPD_flush:
6983  case OMPD_teams:
6984  case OMPD_target_data:
6985  case OMPD_target_exit_data:
6986  case OMPD_target_enter_data:
6987  case OMPD_distribute:
6988  case OMPD_distribute_simd:
6989  case OMPD_distribute_parallel_for:
6990  case OMPD_distribute_parallel_for_simd:
6991  case OMPD_teams_distribute:
6992  case OMPD_teams_distribute_simd:
6993  case OMPD_teams_distribute_parallel_for:
6994  case OMPD_teams_distribute_parallel_for_simd:
6995  case OMPD_target_update:
6996  case OMPD_declare_simd:
6997  case OMPD_declare_variant:
6998  case OMPD_declare_target:
6999  case OMPD_end_declare_target:
7000  case OMPD_declare_reduction:
7001  case OMPD_declare_mapper:
7002  case OMPD_taskloop:
7003  case OMPD_taskloop_simd:
7004  case OMPD_master_taskloop:
7005  case OMPD_master_taskloop_simd:
7006  case OMPD_parallel_master_taskloop:
7007  case OMPD_parallel_master_taskloop_simd:
7008  case OMPD_requires:
7009  case OMPD_unknown:
7010  break;
7011  }
7012  llvm_unreachable("Unsupported directive kind.");
7013 }
7014 
7015 namespace {
7017 
7018 // Utility to handle information from clauses associated with a given
7019 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7020 // It provides a convenient interface to obtain the information and generate
7021 // code for that information.
7022 class MappableExprsHandler {
7023 public:
7024  /// Values for bit flags used to specify the mapping type for
7025  /// offloading.
7026  enum OpenMPOffloadMappingFlags : uint64_t {
7027  /// No flags
7028  OMP_MAP_NONE = 0x0,
7029  /// Allocate memory on the device and move data from host to device.
7030  OMP_MAP_TO = 0x01,
7031  /// Allocate memory on the device and move data from device to host.
7032  OMP_MAP_FROM = 0x02,
7033  /// Always perform the requested mapping action on the element, even
7034  /// if it was already mapped before.
7035  OMP_MAP_ALWAYS = 0x04,
7036  /// Delete the element from the device environment, ignoring the
7037  /// current reference count associated with the element.
7038  OMP_MAP_DELETE = 0x08,
7039  /// The element being mapped is a pointer-pointee pair; both the
7040  /// pointer and the pointee should be mapped.
7041  OMP_MAP_PTR_AND_OBJ = 0x10,
7042  /// This flags signals that the base address of an entry should be
7043  /// passed to the target kernel as an argument.
7044  OMP_MAP_TARGET_PARAM = 0x20,
7045  /// Signal that the runtime library has to return the device pointer
7046  /// in the current position for the data being mapped. Used when we have the
7047  /// use_device_ptr clause.
7048  OMP_MAP_RETURN_PARAM = 0x40,
7049  /// This flag signals that the reference being passed is a pointer to
7050  /// private data.
7051  OMP_MAP_PRIVATE = 0x80,
7052  /// Pass the element to the device by value.
7053  OMP_MAP_LITERAL = 0x100,
7054  /// Implicit map
7055  OMP_MAP_IMPLICIT = 0x200,
7056  /// Close is a hint to the runtime to allocate memory close to
7057  /// the target device.
7058  OMP_MAP_CLOSE = 0x400,
7059  /// The 16 MSBs of the flags indicate whether the entry is member of some
7060  /// struct/class.
7061  OMP_MAP_MEMBER_OF = 0xffff000000000000,
7062  LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7063  };
7064 
7065  /// Get the offset of the OMP_MAP_MEMBER_OF field.
7066  static unsigned getFlagMemberOffset() {
7067  unsigned Offset = 0;
7068  for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7069  Remain = Remain >> 1)
7070  Offset++;
7071  return Offset;
7072  }
7073 
7074  /// Class that associates information with a base pointer to be passed to the
7075  /// runtime library.
7076  class BasePointerInfo {
7077  /// The base pointer.
7078  llvm::Value *Ptr = nullptr;
7079  /// The base declaration that refers to this device pointer, or null if
7080  /// there is none.
7081  const ValueDecl *DevPtrDecl = nullptr;
7082 
7083  public:
7084  BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7085  : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7086  llvm::Value *operator*() const { return Ptr; }
7087  const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7088  void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7089  };
7090 
7091  using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7092  using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7093  using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7094 
7095  /// Map between a struct and the its lowest & highest elements which have been
7096  /// mapped.
7097  /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7098  /// HE(FieldIndex, Pointer)}
7099  struct StructRangeInfoTy {
7100  std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7101  0, Address::invalid()};
7102  std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7103  0, Address::invalid()};
7105  };
7106 
7107 private:
7108  /// Kind that defines how a device pointer has to be returned.
7109  struct MapInfo {
7112  ArrayRef<OpenMPMapModifierKind> MapModifiers;
7113  bool ReturnDevicePointer = false;
7114  bool IsImplicit = false;
7115 
7116  MapInfo() = default;
7117  MapInfo(
7119  OpenMPMapClauseKind MapType,
7120  ArrayRef<OpenMPMapModifierKind> MapModifiers,
7121  bool ReturnDevicePointer, bool IsImplicit)
7122  : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7123  ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {}
7124  };
7125 
7126  /// If use_device_ptr is used on a pointer which is a struct member and there
7127  /// is no map information about it, then emission of that entry is deferred
7128  /// until the whole struct has been processed.
7129  struct DeferredDevicePtrEntryTy {
7130  const Expr *IE = nullptr;
7131  const ValueDecl *VD = nullptr;
7132 
7133  DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD)
7134  : IE(IE), VD(VD) {}
7135  };
7136 
7137  /// The target directive from where the mappable clauses were extracted. It
7138  /// is either a executable directive or a user-defined mapper directive.
7139  llvm::PointerUnion<const OMPExecutableDirective *,
7140  const OMPDeclareMapperDecl *>
7141  CurDir;
7142 
7143  /// Function the directive is being generated for.
7144  CodeGenFunction &CGF;
7145 
7146  /// Set of all first private variables in the current directive.
7147  /// bool data is set to true if the variable is implicitly marked as
7148  /// firstprivate, false otherwise.
7149  llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7150 
7151  /// Map between device pointer declarations and their expression components.
7152  /// The key value for declarations in 'this' is null.
7153  llvm::DenseMap<
7154  const ValueDecl *,
7156  DevPointersMap;
7157 
7158  llvm::Value *getExprTypeSize(const Expr *E) const {
7159  QualType ExprTy = E->getType().getCanonicalType();
7160 
7161  // Reference types are ignored for mapping purposes.
7162  if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7163  ExprTy = RefTy->getPointeeType().getCanonicalType();
7164 
7165  // Given that an array section is considered a built-in type, we need to
7166  // do the calculation based on the length of the section instead of relying
7167  // on CGF.getTypeSize(E->getType()).
7168  if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7170  OAE->getBase()->IgnoreParenImpCasts())
7171  .getCanonicalType();
7172 
7173  // If there is no length associated with the expression and lower bound is
7174  // not specified too, that means we are using the whole length of the
7175  // base.
7176  if (!OAE->getLength() && OAE->getColonLoc().isValid() &&
7177  !OAE->getLowerBound())
7178  return CGF.getTypeSize(BaseTy);
7179 
7180  llvm::Value *ElemSize;
7181  if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7182  ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7183  } else {
7184  const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7185  assert(ATy && "Expecting array type if not a pointer type.");
7186  ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7187  }
7188 
7189  // If we don't have a length at this point, that is because we have an
7190  // array section with a single element.
7191  if (!OAE->getLength() && OAE->getColonLoc().isInvalid())
7192  return ElemSize;
7193 
7194  if (const Expr *LenExpr = OAE->getLength()) {
7195  llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7196  LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7197  CGF.getContext().getSizeType(),
7198  LenExpr->getExprLoc());
7199  return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7200  }
7201  assert(!OAE->getLength() && OAE->getColonLoc().isValid() &&
7202  OAE->getLowerBound() && "expected array_section[lb:].");
7203  // Size = sizetype - lb * elemtype;
7204  llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7205  llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7206  LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7207  CGF.getContext().getSizeType(),
7208  OAE->getLowerBound()->getExprLoc());
7209  LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7210  llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7211  llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7212  LengthVal = CGF.Builder.CreateSelect(
7213  Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7214  return LengthVal;
7215  }
7216  return CGF.getTypeSize(ExprTy);
7217  }
7218 
7219  /// Return the corresponding bits for a given map clause modifier. Add
7220  /// a flag marking the map as a pointer if requested. Add a flag marking the
7221  /// map as the first one of a series of maps that relate to the same map
7222  /// expression.
7223  OpenMPOffloadMappingFlags getMapTypeBits(
7224  OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7225  bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const {
7226  OpenMPOffloadMappingFlags Bits =
7227  IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7228  switch (MapType) {
7229  case OMPC_MAP_alloc:
7230  case OMPC_MAP_release:
7231  // alloc and release is the default behavior in the runtime library, i.e.
7232  // if we don't pass any bits alloc/release that is what the runtime is
7233  // going to do. Therefore, we don't need to signal anything for these two
7234  // type modifiers.
7235  break;
7236  case OMPC_MAP_to:
7237  Bits |= OMP_MAP_TO;
7238  break;
7239  case OMPC_MAP_from:
7240  Bits |= OMP_MAP_FROM;
7241  break;
7242  case OMPC_MAP_tofrom:
7243  Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7244  break;
7245  case OMPC_MAP_delete:
7246  Bits |= OMP_MAP_DELETE;
7247  break;
7248  case OMPC_MAP_unknown:
7249  llvm_unreachable("Unexpected map type!");
7250  }
7251  if (AddPtrFlag)
7252  Bits |= OMP_MAP_PTR_AND_OBJ;
7253  if (AddIsTargetParamFlag)
7254  Bits |= OMP_MAP_TARGET_PARAM;
7255  if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7256  != MapModifiers.end())
7257  Bits |= OMP_MAP_ALWAYS;
7258  if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
7259  != MapModifiers.end())
7260  Bits |= OMP_MAP_CLOSE;
7261  return Bits;
7262  }
7263 
7264  /// Return true if the provided expression is a final array section. A
7265  /// final array section, is one whose length can't be proved to be one.
7266  bool isFinalArraySectionExpression(const Expr *E) const {
7267  const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7268 
7269  // It is not an array section and therefore not a unity-size one.
7270  if (!OASE)
7271  return false;
7272 
7273  // An array section with no colon always refer to a single element.
7274  if (OASE->getColonLoc().isInvalid())
7275  return false;
7276 
7277  const Expr *Length = OASE->getLength();
7278 
7279  // If we don't have a length we have to check if the array has size 1
7280  // for this dimension. Also, we should always expect a length if the
7281  // base type is pointer.
7282  if (!Length) {
7284  OASE->getBase()->IgnoreParenImpCasts())
7285  .getCanonicalType();
7286  if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7287  return ATy->getSize().getSExtValue() != 1;
7288  // If we don't have a constant dimension length, we have to consider
7289  // the current section as having any size, so it is not necessarily
7290  // unitary. If it happen to be unity size, that's user fault.
7291  return true;
7292  }
7293 
7294  // Check if the length evaluates to 1.
7295  Expr::EvalResult Result;
7296  if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7297  return true; // Can have more that size 1.
7298 
7299  llvm::APSInt ConstLength = Result.Val.getInt();
7300  return ConstLength.getSExtValue() != 1;
7301  }
7302 
7303  /// Generate the base pointers, section pointers, sizes and map type
7304  /// bits for the provided map type, map modifier, and expression components.
7305  /// \a IsFirstComponent should be set to true if the provided set of
7306  /// components is the first associated with a capture.
7307  void generateInfoForComponentList(
7308  OpenMPMapClauseKind MapType,
7309  ArrayRef<OpenMPMapModifierKind> MapModifiers,
7311  MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
7312  MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
7313  StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
7314  bool IsImplicit,
7315  ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7316  OverlappedElements = llvm::None) const {
7317  // The following summarizes what has to be generated for each map and the
7318  // types below. The generated information is expressed in this order:
7319  // base pointer, section pointer, size, flags
7320  // (to add to the ones that come from the map type and modifier).
7321  //
7322  // double d;
7323  // int i[100];
7324  // float *p;
7325  //
7326  // struct S1 {
7327  // int i;
7328  // float f[50];
7329  // }
7330  // struct S2 {
7331  // int i;
7332  // float f[50];
7333  // S1 s;
7334  // double *p;
7335  // struct S2 *ps;
7336  // }
7337  // S2 s;
7338  // S2 *ps;
7339  //
7340  // map(d)
7341  // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7342  //
7343  // map(i)
7344  // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7345  //
7346  // map(i[1:23])
7347  // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7348  //
7349  // map(p)
7350  // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7351  //
7352  // map(p[1:24])
7353  // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7354  //
7355  // map(s)
7356  // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7357  //
7358  // map(s.i)
7359  // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7360  //
7361  // map(s.s.f)
7362  // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7363  //
7364  // map(s.p)
7365  // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7366  //
7367  // map(to: s.p[:22])
7368  // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7369  // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7370  // &(s.p), &(s.p[0]), 22*sizeof(double),
7371  // MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7372  // (*) alloc space for struct members, only this is a target parameter
7373  // (**) map the pointer (nothing to be mapped in this example) (the compiler
7374  // optimizes this entry out, same in the examples below)
7375  // (***) map the pointee (map: to)
7376  //
7377  // map(s.ps)
7378  // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7379  //
7380  // map(from: s.ps->s.i)
7381  // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7382  // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7383  // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7384  //
7385  // map(to: s.ps->ps)
7386  // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7387  // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7388  // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO
7389  //
7390  // map(s.ps->ps->ps)
7391  // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7392  // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7393  // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7394  // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7395  //
7396  // map(to: s.ps->ps->s.f[:22])
7397  // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7398  // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7399  // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7400  // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7401  //
7402  // map(ps)
7403  // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7404  //
7405  // map(ps->i)
7406  // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7407  //
7408  // map(ps->s.f)
7409  // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7410  //
7411  // map(from: ps->p)
7412  // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7413  //
7414  // map(to: ps->p[:22])
7415  // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7416  // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7417  // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7418  //
7419  // map(ps->ps)
7420  // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7421  //
7422  // map(from: ps->ps->s.i)
7423  // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7424  // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7425  // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7426  //
7427  // map(from: ps->ps->ps)
7428  // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7429  // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7430  // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7431  //
7432  // map(ps->ps->ps->ps)
7433  // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7434  // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7435  // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7436  // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7437  //
7438  // map(to: ps->ps->ps->s.f[:22])
7439  // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7440  // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7441  // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7442  // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7443  //
7444  // map(to: s.f[:22]) map(from: s.p[:33])
7445  // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7446  // sizeof(double*) (**), TARGET_PARAM
7447  // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7448  // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7449  // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7450  // (*) allocate contiguous space needed to fit all mapped members even if
7451  // we allocate space for members not mapped (in this example,
7452  // s.f[22..49] and s.s are not mapped, yet we must allocate space for
7453  // them as well because they fall between &s.f[0] and &s.p)
7454  //
7455  // map(from: s.f[:22]) map(to: ps->p[:33])
7456  // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7457  // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7458  // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7459  // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7460  // (*) the struct this entry pertains to is the 2nd element in the list of
7461  // arguments, hence MEMBER_OF(2)
7462  //
7463  // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7464  // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7465  // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7466  // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7467  // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7468  // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7469  // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7470  // (*) the struct this entry pertains to is the 4th element in the list
7471  // of arguments, hence MEMBER_OF(4)
7472 
7473  // Track if the map information being generated is the first for a capture.
7474  bool IsCaptureFirstInfo = IsFirstComponentList;
7475  // When the variable is on a declare target link or in a to clause with
7476  // unified memory, a reference is needed to hold the host/device address
7477  // of the variable.
7478  bool RequiresReference = false;
7479 
7480  // Scan the components from the base to the complete expression.
7481  auto CI = Components.rbegin();
7482  auto CE = Components.rend();
7483  auto I = CI;
7484 
7485  // Track if the map information being generated is the first for a list of
7486  // components.
7487  bool IsExpressionFirstInfo = true;
7488  Address BP = Address::invalid();
7489  const Expr *AssocExpr = I->getAssociatedExpression();
7490  const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7491  const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7492 
7493  if (isa<MemberExpr>(AssocExpr)) {
7494  // The base is the 'this' pointer. The content of the pointer is going
7495  // to be the base of the field being mapped.
7496  BP = CGF.LoadCXXThisAddress();
7497  } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7498  (OASE &&
7499  isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7500  BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7501  } else {
7502  // The base is the reference to the variable.
7503  // BP = &Var.
7504  BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7505  if (const auto *VD =
7506  dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7508  OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7509  if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7510  (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7511  CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7512  RequiresReference = true;
7513  BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7514  }
7515  }
7516  }
7517 
7518  // If the variable is a pointer and is being dereferenced (i.e. is not
7519  // the last component), the base has to be the pointer itself, not its
7520  // reference. References are ignored for mapping purposes.
7521  QualType Ty =
7522  I->getAssociatedDeclaration()->getType().getNonReferenceType();
7523  if (Ty->isAnyPointerType() && std::next(I) != CE) {
7524  BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7525 
7526  // We do not need to generate individual map information for the
7527  // pointer, it can be associated with the combined storage.
7528  ++I;
7529  }
7530  }
7531 
7532  // Track whether a component of the list should be marked as MEMBER_OF some
7533  // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7534  // in a component list should be marked as MEMBER_OF, all subsequent entries
7535  // do not belong to the base struct. E.g.
7536  // struct S2 s;
7537  // s.ps->ps->ps->f[:]
7538  // (1) (2) (3) (4)
7539  // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7540  // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7541  // is the pointee of ps(2) which is not member of struct s, so it should not
7542  // be marked as such (it is still PTR_AND_OBJ).
7543  // The variable is initialized to false so that PTR_AND_OBJ entries which
7544  // are not struct members are not considered (e.g. array of pointers to
7545  // data).
7546  bool ShouldBeMemberOf = false;
7547 
7548  // Variable keeping track of whether or not we have encountered a component
7549  // in the component list which is a member expression. Useful when we have a
7550  // pointer or a final array section, in which case it is the previous
7551  // component in the list which tells us whether we have a member expression.
7552  // E.g. X.f[:]
7553  // While processing the final array section "[:]" it is "f" which tells us
7554  // whether we are dealing with a member of a declared struct.
7555  const MemberExpr *EncounteredME = nullptr;
7556 
7557  for (; I != CE; ++I) {
7558  // If the current component is member of a struct (parent struct) mark it.
7559  if (!EncounteredME) {
7560  EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7561  // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7562  // as MEMBER_OF the parent struct.
7563  if (EncounteredME)
7564  ShouldBeMemberOf = true;
7565  }
7566 
7567  auto Next = std::next(I);
7568 
7569  // We need to generate the addresses and sizes if this is the last
7570  // component, if the component is a pointer or if it is an array section
7571  // whose length can't be proved to be one. If this is a pointer, it
7572  // becomes the base address for the following components.
7573 
7574  // A final array section, is one whose length can't be proved to be one.
7575  bool IsFinalArraySection =
7576  isFinalArraySectionExpression(I->getAssociatedExpression());
7577 
7578  // Get information on whether the element is a pointer. Have to do a
7579  // special treatment for array sections given that they are built-in
7580  // types.
7581  const auto *OASE =
7582  dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7583  bool IsPointer =
7585  .getCanonicalType()
7586  ->isAnyPointerType()) ||
7587  I->getAssociatedExpression()->getType()->isAnyPointerType();
7588 
7589  if (Next == CE || IsPointer || IsFinalArraySection) {
7590  // If this is not the last component, we expect the pointer to be
7591  // associated with an array expression or member expression.
7592  assert((Next == CE ||
7593  isa<MemberExpr>(Next->getAssociatedExpression()) ||
7594  isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7595  isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) &&
7596  "Unexpected expression");
7597 
7598  Address LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7599  .getAddress(CGF);
7600 
7601  // If this component is a pointer inside the base struct then we don't
7602  // need to create any entry for it - it will be combined with the object
7603  // it is pointing to into a single PTR_AND_OBJ entry.
7604  bool IsMemberPointer =
7605  IsPointer && EncounteredME &&
7606  (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
7607  EncounteredME);
7608  if (!OverlappedElements.empty()) {
7609  // Handle base element with the info for overlapped elements.
7610  assert(!PartialStruct.Base.isValid() && "The base element is set.");
7611  assert(Next == CE &&
7612  "Expected last element for the overlapped elements.");
7613  assert(!IsPointer &&
7614  "Unexpected base element with the pointer type.");
7615  // Mark the whole struct as the struct that requires allocation on the
7616  // device.
7617  PartialStruct.LowestElem = {0, LB};
7618  CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7619  I->getAssociatedExpression()->getType());
7620  Address HB = CGF.Builder.CreateConstGEP(
7622  CGF.VoidPtrTy),
7623  TypeSize.getQuantity() - 1);
7624  PartialStruct.HighestElem = {
7625  std::numeric_limits<decltype(
7626  PartialStruct.HighestElem.first)>::max(),
7627  HB};
7628  PartialStruct.Base = BP;
7629  // Emit data for non-overlapped data.
7630  OpenMPOffloadMappingFlags Flags =
7631  OMP_MAP_MEMBER_OF |
7632  getMapTypeBits(MapType, MapModifiers, IsImplicit,
7633  /*AddPtrFlag=*/false,
7634  /*AddIsTargetParamFlag=*/false);
7635  LB = BP;
7636  llvm::Value *Size = nullptr;
7637  // Do bitcopy of all non-overlapped structure elements.
7639  Component : OverlappedElements) {
7640  Address ComponentLB = Address::invalid();
7642  Component) {
7643  if (MC.getAssociatedDeclaration()) {
7644  ComponentLB =
7645  CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7646  .getAddress(CGF);
7647  Size = CGF.Builder.CreatePtrDiff(
7648  CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7649  CGF.EmitCastToVoidPtr(LB.getPointer()));
7650  break;
7651  }
7652  }
7653  BasePointers.push_back(BP.getPointer());
7654  Pointers.push_back(LB.getPointer());
7655  Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty,
7656  /*isSigned=*/true));
7657  Types.push_back(Flags);
7658  LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7659  }
7660  BasePointers.push_back(BP.getPointer());
7661  Pointers.push_back(LB.getPointer());
7662  Size = CGF.Builder.CreatePtrDiff(
7663  CGF.EmitCastToVoidPtr(
7664  CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
7665  CGF.EmitCastToVoidPtr(LB.getPointer()));
7666  Sizes.push_back(
7667  CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7668  Types.push_back(Flags);
7669  break;
7670  }
7671  llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7672  if (!IsMemberPointer) {
7673  BasePointers.push_back(BP.getPointer());
7674  Pointers.push_back(LB.getPointer());
7675  Sizes.push_back(
7676  CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7677 
7678  // We need to add a pointer flag for each map that comes from the
7679  // same expression except for the first one. We also need to signal
7680  // this map is the first one that relates with the current capture
7681  // (there is a set of entries for each capture).
7682  OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7683  MapType, MapModifiers, IsImplicit,
7684  !IsExpressionFirstInfo || RequiresReference,
7685  IsCaptureFirstInfo && !RequiresReference);
7686 
7687  if (!IsExpressionFirstInfo) {
7688  // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7689  // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7690  if (IsPointer)
7691  Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7692  OMP_MAP_DELETE | OMP_MAP_CLOSE);
7693 
7694  if (ShouldBeMemberOf) {
7695  // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7696  // should be later updated with the correct value of MEMBER_OF.
7697  Flags |= OMP_MAP_MEMBER_OF;
7698  // From now on, all subsequent PTR_AND_OBJ entries should not be
7699  // marked as MEMBER_OF.
7700  ShouldBeMemberOf = false;
7701  }
7702  }
7703 
7704  Types.push_back(Flags);
7705  }
7706 
7707  // If we have encountered a member expression so far, keep track of the
7708  // mapped member. If the parent is "*this", then the value declaration
7709  // is nullptr.
7710  if (EncounteredME) {
7711  const auto *FD = dyn_cast<FieldDecl>(EncounteredME->getMemberDecl());
7712  unsigned FieldIndex = FD->getFieldIndex();
7713 
7714  // Update info about the lowest and highest elements for this struct
7715  if (!PartialStruct.Base.isValid()) {
7716  PartialStruct.LowestElem = {FieldIndex, LB};
7717  PartialStruct.HighestElem = {FieldIndex, LB};
7718  PartialStruct.Base = BP;
7719  } else if (FieldIndex < PartialStruct.LowestElem.first) {
7720  PartialStruct.LowestElem = {FieldIndex, LB};
7721  } else if (FieldIndex > PartialStruct.HighestElem.first) {
7722  PartialStruct.HighestElem = {FieldIndex, LB};
7723  }
7724  }
7725 
7726  // If we have a final array section, we are done with this expression.
7727  if (IsFinalArraySection)
7728  break;
7729 
7730  // The pointer becomes the base for the next element.
7731  if (Next != CE)
7732  BP = LB;
7733 
7734  IsExpressionFirstInfo = false;
7735  IsCaptureFirstInfo = false;
7736  }
7737  }
7738  }
7739 
7740  /// Return the adjusted map modifiers if the declaration a capture refers to
7741  /// appears in a first-private clause. This is expected to be used only with
7742  /// directives that start with 'target'.
7743  MappableExprsHandler::OpenMPOffloadMappingFlags
7744  getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7745  assert(Cap.capturesVariable() && "Expected capture by reference only!");
7746 
7747  // A first private variable captured by reference will use only the
7748  // 'private ptr' and 'map to' flag. Return the right flags if the captured
7749  // declaration is known as first-private in this handler.
7750  if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7751  if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
7753  return MappableExprsHandler::OMP_MAP_ALWAYS |
7754  MappableExprsHandler::OMP_MAP_TO;
7755  if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7756  return MappableExprsHandler::OMP_MAP_TO |
7757  MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
7758  return MappableExprsHandler::OMP_MAP_PRIVATE |
7759  MappableExprsHandler::OMP_MAP_TO;
7760  }
7761  return MappableExprsHandler::OMP_MAP_TO |
7762  MappableExprsHandler::OMP_MAP_FROM;
7763  }
7764 
7765  static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
7766  // Rotate by getFlagMemberOffset() bits.
7767  return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
7768  << getFlagMemberOffset());
7769  }
7770 
7771  static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
7772  OpenMPOffloadMappingFlags MemberOfFlag) {
7773  // If the entry is PTR_AND_OBJ but has not been marked with the special
7774  // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
7775  // marked as MEMBER_OF.
7776  if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
7777  ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
7778  return;
7779 
7780  // Reset the placeholder value to prepare the flag for the assignment of the
7781  // proper MEMBER_OF value.
7782  Flags &= ~OMP_MAP_MEMBER_OF;
7783  Flags |= MemberOfFlag;
7784  }
7785 
7786  void getPlainLayout(const CXXRecordDecl *RD,
7788  bool AsBase) const {
7789  const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7790 
7791  llvm::StructType *St =
7792  AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7793 
7794  unsigned NumElements = St->getNumElements();
7796  llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7797  RecordLayout(NumElements);
7798 
7799  // Fill bases.
7800  for (const auto &I : RD->bases()) {
7801  if (I.isVirtual())
7802  continue;
7803  const auto *Base = I.getType()->getAsCXXRecordDecl();
7804  // Ignore empty bases.
7805  if (Base->isEmpty() || CGF.getContext()
7808  .isZero())
7809  continue;
7810 
7811  unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7812  RecordLayout[FieldIndex] = Base;
7813  }
7814  // Fill in virtual bases.
7815  for (const auto &I : RD->vbases()) {
7816  const auto *Base = I.getType()->getAsCXXRecordDecl();
7817  // Ignore empty bases.
7818  if (Base->isEmpty())
7819  continue;
7820  unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7821  if (RecordLayout[FieldIndex])
7822  continue;
7823  RecordLayout[FieldIndex] = Base;
7824  }
7825  // Fill in all the fields.
7826  assert(!RD->isUnion() && "Unexpected union.");
7827  for (const auto *Field : RD->fields()) {
7828  // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7829  // will fill in later.)
7830  if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
7831  unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7832  RecordLayout[FieldIndex] = Field;
7833  }
7834  }
7835  for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7836  &Data : RecordLayout) {
7837  if (Data.isNull())
7838  continue;
7839  if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7840  getPlainLayout(Base, Layout, /*AsBase=*/true);
7841  else
7842  Layout.push_back(Data.get<const FieldDecl *>());
7843  }
7844  }
7845 
7846 public:
7847  MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
7848  : CurDir(&Dir), CGF(CGF) {
7849  // Extract firstprivate clause information.
7850  for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
7851  for (const auto *D : C->varlists())
7852  FirstPrivateDecls.try_emplace(
7853  cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
7854  // Extract device pointer clause information.
7855  for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
7856  for (auto L : C->component_lists())
7857  DevPointersMap[L.first].push_back(L.second);
7858  }
7859 
7860  /// Constructor for the declare mapper directive.
7861  MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
7862  : CurDir(&Dir), CGF(CGF) {}
7863 
7864  /// Generate code for the combined entry if we have a partially mapped struct
7865  /// and take care of the mapping flags of the arguments corresponding to
7866  /// individual struct members.
7867  void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers,
7868  MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7869  MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes,
7870  const StructRangeInfoTy &PartialStruct) const {
7871  // Base is the base of the struct
7872  BasePointers.push_back(PartialStruct.Base.getPointer());
7873  // Pointer is the address of the lowest element
7874  llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
7875  Pointers.push_back(LB);
7876  // Size is (addr of {highest+1} element) - (addr of lowest element)
7877  llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
7878  llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
7879  llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
7880  llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
7881  llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
7882  llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
7883  /*isSigned=*/false);
7884  Sizes.push_back(Size);
7885  // Map type is always TARGET_PARAM
7886  Types.push_back(OMP_MAP_TARGET_PARAM);
7887  // Remove TARGET_PARAM flag from the first element
7888  (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
7889 
7890  // All other current entries will be MEMBER_OF the combined entry
7891  // (except for PTR_AND_OBJ entries which do not have a placeholder value
7892  // 0xFFFF in the MEMBER_OF field).
7893  OpenMPOffloadMappingFlags MemberOfFlag =
7894  getMemberOfFlag(BasePointers.size() - 1);
7895  for (auto &M : CurTypes)
7896  setCorrectMemberOfFlag(M, MemberOfFlag);
7897  }
7898 
7899  /// Generate all the base pointers, section pointers, sizes and map
7900  /// types for the extracted mappable expressions. Also, for each item that
7901  /// relates with a device pointer, a pair of the relevant declaration and
7902  /// index where it occurs is appended to the device pointers info array.
7903  void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
7904  MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7905  MapFlagsArrayTy &Types) const {
7906  // We have to process the component lists that relate with the same
7907  // declaration in a single chunk so that we can generate the map flags
7908  // correctly. Therefore, we organize all lists in a map.
7909  llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
7910 
7911  // Helper function to fill the information map for the different supported
7912  // clauses.
7913  auto &&InfoGen = [&Info](
7914  const ValueDecl *D,
7916  OpenMPMapClauseKind MapType,
7917  ArrayRef<OpenMPMapModifierKind> MapModifiers,
7918  bool ReturnDevicePointer, bool IsImplicit) {
7919  const ValueDecl *VD =
7920  D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
7921  Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
7922  IsImplicit);
7923  };
7924 
7925  assert(CurDir.is<const OMPExecutableDirective *>() &&
7926  "Expect a executable directive");
7927  const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
7928  for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>())
7929  for (const auto L : C->component_lists()) {
7930  InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(),
7931  /*ReturnDevicePointer=*/false, C->isImplicit());
7932  }
7933  for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>())
7934  for (const auto L : C->component_lists()) {
7935  InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None,
7936  /*ReturnDevicePointer=*/false, C->isImplicit());
7937  }
7938  for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>())
7939  for (const auto L : C->component_lists()) {
7940  InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None,
7941  /*ReturnDevicePointer=*/false, C->isImplicit());
7942  }
7943 
7944  // Look at the use_device_ptr clause information and mark the existing map
7945  // entries as such. If there is no map information for an entry in the
7946  // use_device_ptr list, we create one with map type 'alloc' and zero size
7947  // section. It is the user fault if that was not mapped before. If there is
7948  // no map information and the pointer is a struct member, then we defer the
7949  // emission of that entry until the whole struct has been processed.
7950  llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
7951  DeferredInfo;
7952 
7953  for (const auto *C :
7954  CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) {
7955  for (const auto L : C->component_lists()) {
7956  assert(!L.second.empty() && "Not expecting empty list of components!");
7957  const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
7958  VD = cast<ValueDecl>(VD->getCanonicalDecl());
7959  const Expr *IE = L.second.back().getAssociatedExpression();
7960  // If the first component is a member expression, we have to look into
7961  // 'this', which maps to null in the map of map information. Otherwise
7962  // look directly for the information.
7963  auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
7964 
7965  // We potentially have map information for this declaration already.
7966  // Look for the first set of components that refer to it.
7967  if (It != Info.end()) {
7968  auto CI = std::find_if(
7969  It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
7970  return MI.Components.back().getAssociatedDeclaration() == VD;
7971  });
7972  // If we found a map entry, signal that the pointer has to be returned
7973  // and move on to the next declaration.
7974  if (CI != It->second.end()) {
7975  CI->ReturnDevicePointer = true;
7976  continue;
7977  }
7978  }
7979 
7980  // We didn't find any match in our map information - generate a zero
7981  // size array section - if the pointer is a struct member we defer this
7982  // action until the whole struct has been processed.
7983  if (isa<MemberExpr>(IE)) {
7984  // Insert the pointer into Info to be processed by
7985  // generateInfoForComponentList. Because it is a member pointer
7986  // without a pointee, no entry will be generated for it, therefore
7987  // we need to generate one after the whole struct has been processed.
7988  // Nonetheless, generateInfoForComponentList must be called to take
7989  // the pointer into account for the calculation of the range of the
7990  // partial struct.
7991  InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
7992  /*ReturnDevicePointer=*/false, C->isImplicit());
7993  DeferredInfo[nullptr].emplace_back(IE, VD);
7994  } else {
7995  llvm::Value *Ptr =
7996  CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
7997  BasePointers.emplace_back(Ptr, VD);
7998  Pointers.push_back(Ptr);
7999  Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8000  Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
8001  }
8002  }
8003  }
8004 
8005  for (const auto &M : Info) {
8006  // We need to know when we generate information for the first component
8007  // associated with a capture, because the mapping flags depend on it.
8008  bool IsFirstComponentList = true;
8009 
8010  // Temporary versions of arrays
8011  MapBaseValuesArrayTy CurBasePointers;
8012  MapValuesArrayTy CurPointers;
8013  MapValuesArrayTy CurSizes;
8014  MapFlagsArrayTy CurTypes;
8015  StructRangeInfoTy PartialStruct;
8016 
8017  for (const MapInfo &L : M.second) {
8018  assert(!L.Components.empty() &&
8019  "Not expecting declaration with no component lists.");
8020 
8021  // Remember the current base pointer index.
8022  unsigned CurrentBasePointersIdx = CurBasePointers.size();
8023  generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
8024  CurBasePointers, CurPointers, CurSizes,
8025  CurTypes, PartialStruct,
8026  IsFirstComponentList, L.IsImplicit);
8027 
8028  // If this entry relates with a device pointer, set the relevant
8029  // declaration and add the 'return pointer' flag.
8030  if (L.ReturnDevicePointer) {
8031  assert(CurBasePointers.size() > CurrentBasePointersIdx &&
8032  "Unexpected number of mapped base pointers.");
8033 
8034  const ValueDecl *RelevantVD =
8035  L.Components.back().getAssociatedDeclaration();
8036  assert(RelevantVD &&
8037  "No relevant declaration related with device pointer??");
8038 
8039  CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
8040  CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8041  }
8042  IsFirstComponentList = false;
8043  }
8044 
8045  // Append any pending zero-length pointers which are struct members and
8046  // used with use_device_ptr.
8047  auto CI = DeferredInfo.find(M.first);
8048  if (CI != DeferredInfo.end()) {
8049  for (const DeferredDevicePtrEntryTy &L : CI->second) {
8050  llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8051  llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
8052  this->CGF.EmitLValue(L.IE), L.IE->getExprLoc());
8053  CurBasePointers.emplace_back(BasePtr, L.VD);
8054  CurPointers.push_back(Ptr);
8055  CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty));
8056  // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
8057  // value MEMBER_OF=FFFF so that the entry is later updated with the
8058  // correct value of MEMBER_OF.
8059  CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8060  OMP_MAP_MEMBER_OF);
8061  }
8062  }
8063 
8064  // If there is an entry in PartialStruct it means we have a struct with
8065  // individual members mapped. Emit an extra combined entry.
8066  if (PartialStruct.Base.isValid())
8067  emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8068  PartialStruct);
8069 
8070  // We need to append the results of this capture to what we already have.
8071  BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8072  Pointers.append(CurPointers.begin(), CurPointers.end());
8073  Sizes.append(CurSizes.begin(), CurSizes.end());
8074  Types.append(CurTypes.begin(), CurTypes.end());
8075  }
8076  }
8077 
8078  /// Generate all the base pointers, section pointers, sizes and map types for
8079  /// the extracted map clauses of user-defined mapper.
8080  void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers,
8081  MapValuesArrayTy &Pointers,
8082  MapValuesArrayTy &Sizes,
8083  MapFlagsArrayTy &Types) const {
8084  assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8085  "Expect a declare mapper directive");
8086  const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8087  // We have to process the component lists that relate with the same
8088  // declaration in a single chunk so that we can generate the map flags
8089  // correctly. Therefore, we organize all lists in a map.
8090  llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
8091 
8092  // Helper function to fill the information map for the different supported
8093  // clauses.
8094  auto &&InfoGen = [&Info](
8095  const ValueDecl *D,
8097  OpenMPMapClauseKind MapType,
8098  ArrayRef<OpenMPMapModifierKind> MapModifiers,
8099  bool ReturnDevicePointer, bool IsImplicit) {
8100  const ValueDecl *VD =
8101  D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
8102  Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
8103  IsImplicit);
8104  };
8105 
8106  for (const auto *C : CurMapperDir->clauselists()) {
8107  const auto *MC = cast<OMPMapClause>(C);
8108  for (const auto L : MC->component_lists()) {
8109  InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(),
8110  /*ReturnDevicePointer=*/false, MC->isImplicit());
8111  }
8112  }
8113 
8114  for (const auto &M : Info) {
8115  // We need to know when we generate information for the first component
8116  // associated with a capture, because the mapping flags depend on it.
8117  bool IsFirstComponentList = true;
8118 
8119  // Temporary versions of arrays
8120  MapBaseValuesArrayTy CurBasePointers;
8121  MapValuesArrayTy CurPointers;
8122  MapValuesArrayTy CurSizes;
8123  MapFlagsArrayTy CurTypes;
8124  StructRangeInfoTy PartialStruct;
8125 
8126  for (const MapInfo &L : M.second) {
8127  assert(!L.Components.empty() &&
8128  "Not expecting declaration with no component lists.");
8129  generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
8130  CurBasePointers, CurPointers, CurSizes,
8131  CurTypes, PartialStruct,
8132  IsFirstComponentList, L.IsImplicit);
8133  IsFirstComponentList = false;
8134  }
8135 
8136  // If there is an entry in PartialStruct it means we have a struct with
8137  // individual members mapped. Emit an extra combined entry.
8138  if (PartialStruct.Base.isValid())
8139  emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8140  PartialStruct);
8141 
8142  // We need to append the results of this capture to what we already have.
8143  BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8144  Pointers.append(CurPointers.begin(), CurPointers.end());
8145  Sizes.append(CurSizes.begin(), CurSizes.end());
8146  Types.append(CurTypes.begin(), CurTypes.end());
8147  }
8148  }
8149 
8150  /// Emit capture info for lambdas for variables captured by reference.
8151  void generateInfoForLambdaCaptures(
8152  const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers,
8153  MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
8154  MapFlagsArrayTy &Types,
8155  llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8156  const auto *RD = VD->getType()
8157  .getCanonicalType()
8159  ->getAsCXXRecordDecl();
8160  if (!RD || !RD->isLambda())
8161  return;
8162  Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8163  LValue VDLVal = CGF.MakeAddrLValue(
8164  VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8165  llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8166  FieldDecl *ThisCapture = nullptr;
8167  RD->getCaptureFields(Captures, ThisCapture);
8168  if (ThisCapture) {
8169  LValue ThisLVal =
8170  CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8171  LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8172  LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8173  VDLVal.getPointer(CGF));
8174  BasePointers.push_back(ThisLVal.getPointer(CGF));
8175  Pointers.push_back(ThisLValVal.getPointer(CGF));
8176  Sizes.push_back(
8177  CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8178  CGF.Int64Ty, /*isSigned=*/true));
8179  Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8180  OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8181  }
8182  for (const LambdaCapture &LC : RD->captures()) {
8183  if (!LC.capturesVariable())
8184  continue;
8185  const VarDecl *VD = LC.getCapturedVar();
8186  if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8187  continue;
8188  auto It = Captures.find(VD);
8189  assert(It != Captures.end() && "Found lambda capture without field.");
8190  LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8191  if (LC.getCaptureKind() == LCK_ByRef) {
8192  LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8193  LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8194  VDLVal.getPointer(CGF));
8195  BasePointers.push_back(VarLVal.getPointer(CGF));
8196  Pointers.push_back(VarLValVal.getPointer(CGF));
8197  Sizes.push_back(CGF.Builder.CreateIntCast(
8198  CGF.getTypeSize(
8200  CGF.Int64Ty, /*isSigned=*/true));
8201  } else {
8202  RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8203  LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8204  VDLVal.getPointer(CGF));
8205  BasePointers.push_back(VarLVal.getPointer(CGF));
8206  Pointers.push_back(VarRVal.getScalarVal());
8207  Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8208  }
8209  Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8210  OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8211  }
8212  }
8213 
8214  /// Set correct indices for lambdas captures.
8215  void adjustMemberOfForLambdaCaptures(
8216  const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8217  MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8218  MapFlagsArrayTy &Types) const {
8219  for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8220  // Set correct member_of idx for all implicit lambda captures.
8221  if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8222  OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8223  continue;
8224  llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8225  assert(BasePtr && "Unable to find base lambda address.");
8226  int TgtIdx = -1;
8227  for (unsigned J = I; J > 0; --J) {
8228  unsigned Idx = J - 1;
8229  if (Pointers[Idx] != BasePtr)
8230  continue;
8231  TgtIdx = Idx;
8232  break;
8233  }
8234  assert(TgtIdx != -1 && "Unable to find parent lambda.");
8235  // All other current entries will be MEMBER_OF the combined entry
8236  // (except for PTR_AND_OBJ entries which do not have a placeholder value
8237  // 0xFFFF in the MEMBER_OF field).
8238  OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8239  setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8240  }
8241  }
8242 
8243  /// Generate the base pointers, section pointers, sizes and map types
8244  /// associated to a given capture.
8245  void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8246  llvm::Value *Arg,
8247  MapBaseValuesArrayTy &BasePointers,
8248  MapValuesArrayTy &Pointers,
8249  MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
8250  StructRangeInfoTy &PartialStruct) const {
8251  assert(!Cap->capturesVariableArrayType() &&
8252  "Not expecting to generate map info for a variable array type!");
8253 
8254  // We need to know when we generating information for the first component
8255  const ValueDecl *VD = Cap->capturesThis()
8256  ? nullptr
8257  : Cap->getCapturedVar()->getCanonicalDecl();
8258 
8259  // If this declaration appears in a is_device_ptr clause we just have to
8260  // pass the pointer by value. If it is a reference to a declaration, we just
8261  // pass its value.
8262  if (DevPointersMap.count(VD)) {
8263  BasePointers.emplace_back(Arg, VD);
8264  Pointers.push_back(Arg);
8265  Sizes.push_back(
8266  CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8267  CGF.Int64Ty, /*isSigned=*/true));
8268  Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
8269  return;
8270  }
8271 
8272  using MapData =
8274  OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>;
8275  SmallVector<MapData, 4> DeclComponentLists;
8276  assert(CurDir.is<const OMPExecutableDirective *>() &&
8277  "Expect a executable directive");
8278  const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8279  for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8280  for (const auto L : C->decl_component_lists(VD)) {
8281  assert(L.first == VD &&
8282  "We got information for the wrong declaration??");
8283  assert(!L.second.empty() &&
8284  "Not expecting declaration with no component lists.");
8285  DeclComponentLists.emplace_back(L.second, C->getMapType(),
8286  C->getMapTypeModifiers(),
8287  C->isImplicit());
8288  }
8289  }
8290 
8291  // Find overlapping elements (including the offset from the base element).
8292  llvm::SmallDenseMap<
8293  const MapData *,
8296  4>
8297  OverlappedData;
8298  size_t Count = 0;
8299  for (const MapData &L : DeclComponentLists) {
8300  OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8301  OpenMPMapClauseKind MapType;
8302  ArrayRef<OpenMPMapModifierKind> MapModifiers;
8303  bool IsImplicit;
8304  std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8305  ++Count;
8306  for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8307  OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8308  std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1;
8309  auto CI = Components.rbegin();
8310  auto CE = Components.rend();
8311  auto SI = Components1.rbegin();
8312  auto SE = Components1.rend();
8313  for (; CI != CE && SI != SE; ++CI, ++SI) {
8314  if (CI->getAssociatedExpression()->getStmtClass() !=
8315  SI->getAssociatedExpression()->getStmtClass())
8316  break;
8317  // Are we dealing with different variables/fields?
8318  if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8319  break;
8320  }
8321  // Found overlapping if, at least for one component, reached the head of
8322  // the components list.
8323  if (CI == CE || SI == SE) {
8324  assert((CI != CE || SI != SE) &&
8325  "Unexpected full match of the mapping components.");
8326  const MapData &BaseData = CI == CE ? L : L1;
8327  OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8328  SI == SE ? Components : Components1;
8329  auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8330  OverlappedElements.getSecond().push_back(SubData);
8331  }
8332  }
8333  }
8334  // Sort the overlapped elements for each item.
8336  if (!OverlappedData.empty()) {
8337  if (const auto *CRD =
8338  VD->getType().getCanonicalType()->getAsCXXRecordDecl())
8339  getPlainLayout(CRD, Layout, /*AsBase=*/false);
8340  else {
8341  const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
8342  Layout.append(RD->field_begin(), RD->field_end());
8343  }
8344  }
8345  for (auto &Pair : OverlappedData) {
8346  llvm::sort(
8347  Pair.getSecond(),
8348  [&Layout](
8349  OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8350  OMPClauseMappableExprCommon::MappableExprComponentListRef
8351  Second) {
8352  auto CI = First.rbegin();
8353  auto CE = First.rend();
8354  auto SI = Second.rbegin();
8355  auto SE = Second.rend();
8356  for (; CI != CE && SI != SE; ++CI, ++SI) {
8357  if (CI->getAssociatedExpression()->getStmtClass() !=
8358  SI->getAssociatedExpression()->getStmtClass())
8359  break;
8360  // Are we dealing with different variables/fields?
8361  if (CI->getAssociatedDeclaration() !=
8362  SI->getAssociatedDeclaration())
8363  break;
8364  }
8365 
8366  // Lists contain the same elements.
8367  if (CI == CE && SI == SE)
8368  return false;
8369 
8370  // List with less elements is less than list with more elements.
8371  if (CI == CE || SI == SE)
8372  return CI == CE;
8373 
8374  const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8375  const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8376  if (FD1->getParent() == FD2->getParent())
8377  return FD1->getFieldIndex() < FD2->getFieldIndex();
8378  const auto It =
8379  llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8380  return FD == FD1 || FD == FD2;
8381  });
8382  return *It == FD1;
8383  });
8384  }
8385 
8386  // Associated with a capture, because the mapping flags depend on it.
8387  // Go through all of the elements with the overlapped elements.
8388  for (const auto &Pair : OverlappedData) {
8389  const MapData &L = *Pair.getFirst();
8390  OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8391  OpenMPMapClauseKind MapType;
8392  ArrayRef<OpenMPMapModifierKind> MapModifiers;
8393  bool IsImplicit;
8394  std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8395  ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8396  OverlappedComponents = Pair.getSecond();
8397  bool IsFirstComponentList = true;
8398  generateInfoForComponentList(MapType, MapModifiers, Components,
8399  BasePointers, Pointers, Sizes, Types,
8400  PartialStruct, IsFirstComponentList,
8401  IsImplicit, OverlappedComponents);
8402  }
8403  // Go through other elements without overlapped elements.
8404  bool IsFirstComponentList = OverlappedData.empty();
8405  for (const MapData &L : DeclComponentLists) {
8406  OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8407  OpenMPMapClauseKind MapType;
8408  ArrayRef<OpenMPMapModifierKind> MapModifiers;
8409  bool IsImplicit;
8410  std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8411  auto It = OverlappedData.find(&L);
8412  if (It == OverlappedData.end())
8413  generateInfoForComponentList(MapType, MapModifiers, Components,
8414  BasePointers, Pointers, Sizes, Types,
8415  PartialStruct, IsFirstComponentList,
8416  IsImplicit);
8417  IsFirstComponentList = false;
8418  }
8419  }
8420 
8421  /// Generate the base pointers, section pointers, sizes and map types
8422  /// associated with the declare target link variables.
8423  void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers,
8424  MapValuesArrayTy &Pointers,
8425  MapValuesArrayTy &Sizes,
8426  MapFlagsArrayTy &Types) const {
8427  assert(CurDir.is<const OMPExecutableDirective *>() &&
8428  "Expect a executable directive");
8429  const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8430  // Map other list items in the map clause which are not captured variables
8431  // but "declare target link" global variables.
8432  for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8433  for (const auto L : C->component_lists()) {
8434  if (!L.first)
8435  continue;
8436  const auto *VD = dyn_cast<VarDecl>(L.first);
8437  if (!VD)
8438  continue;
8440  OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
8441  if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8442  !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)
8443  continue;
8444  StructRangeInfoTy PartialStruct;
8445  generateInfoForComponentList(
8446  C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers,
8447  Pointers, Sizes, Types, PartialStruct,
8448  /*IsFirstComponentList=*/true, C->isImplicit());
8449  assert(!PartialStruct.Base.isValid() &&
8450  "No partial structs for declare target link expected.");
8451  }
8452  }
8453  }
8454 
8455  /// Generate the default map information for a given capture \a CI,
8456  /// record field declaration \a RI and captured value \a CV.
8457  void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8458  const FieldDecl &RI, llvm::Value *CV,
8459  MapBaseValuesArrayTy &CurBasePointers,
8460  MapValuesArrayTy &CurPointers,
8461  MapValuesArrayTy &CurSizes,
8462  MapFlagsArrayTy &CurMapTypes) const {
8463  bool IsImplicit = true;
8464  // Do the default mapping.
8465  if (CI.capturesThis()) {
8466  CurBasePointers.push_back(CV);
8467  CurPointers.push_back(CV);
8468  const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8469  CurSizes.push_back(
8470  CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8471  CGF.Int64Ty, /*isSigned=*/true));
8472  // Default map type.
8473  CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
8474  } else if (CI.capturesVariableByCopy()) {
8475  CurBasePointers.push_back(CV);
8476  CurPointers.push_back(CV);
8477  if (!RI.getType()->isAnyPointerType()) {
8478  // We have to signal to the runtime captures passed by value that are
8479  // not pointers.
8480  CurMapTypes.push_back(OMP_MAP_LITERAL);
8481  CurSizes.push_back(CGF.Builder.CreateIntCast(
8482  CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8483  } else {
8484  // Pointers are implicitly mapped with a zero size and no flags
8485  // (other than first map that is added for all implicit maps).
8486  CurMapTypes.push_back(OMP_MAP_NONE);
8487  CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8488  }
8489  const VarDecl *VD = CI.getCapturedVar();
8490  auto I = FirstPrivateDecls.find(VD);
8491  if (I != FirstPrivateDecls.end())
8492  IsImplicit = I->getSecond();
8493  } else {
8494  assert(CI.capturesVariable() && "Expected captured reference.");
8495  const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8496  QualType ElementType = PtrTy->getPointeeType();
8497  CurSizes.push_back(CGF.Builder.CreateIntCast(
8498  CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8499  // The default map type for a scalar/complex type is 'to' because by
8500  // default the value doesn't have to be retrieved. For an aggregate
8501  // type, the default is 'tofrom'.
8502  CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI));
8503  const VarDecl *VD = CI.getCapturedVar();
8504  auto I = FirstPrivateDecls.find(VD);
8505  if (I != FirstPrivateDecls.end() &&
8506  VD->getType().isConstant(CGF.getContext())) {
8507  llvm::Constant *Addr =
8508  CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
8509  // Copy the value of the original variable to the new global copy.
8510  CGF.Builder.CreateMemCpy(
8511  CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF),
8512  Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
8513  CurSizes.back(), /*IsVolatile=*/false);
8514  // Use new global variable as the base pointers.
8515  CurBasePointers.push_back(Addr);
8516  CurPointers.push_back(Addr);
8517  } else {
8518  CurBasePointers.push_back(CV);
8519  if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8520  Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8521  CV, ElementType, CGF.getContext().getDeclAlign(VD),
8523  CurPointers.push_back(PtrAddr.getPointer());
8524  } else {
8525  CurPointers.push_back(CV);
8526  }
8527  }
8528  if (I != FirstPrivateDecls.end())
8529  IsImplicit = I->getSecond();
8530  }
8531  // Every default map produces a single argument which is a target parameter.
8532  CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;
8533 
8534  // Add flag stating this is an implicit map.
8535  if (IsImplicit)
8536  CurMapTypes.back() |= OMP_MAP_IMPLICIT;
8537  }
8538 };
8539 } // anonymous namespace
8540 
8541 /// Emit the arrays used to pass the captures and map information to the
8542 /// offloading runtime library. If there is no map or capture information,
8543 /// return nullptr by reference.
8544 static void
8550  CGOpenMPRuntime::TargetDataInfo &Info) {
8551  CodeGenModule &CGM = CGF.CGM;
8552  ASTContext &Ctx = CGF.getContext();
8553 
8554  // Reset the array information.
8555  Info.clearArrayInfo();
8556  Info.NumberOfPtrs = BasePointers.size();
8557 
8558  if (Info.NumberOfPtrs) {
8559  // Detect if we have any capture size requiring runtime evaluation of the
8560  // size so that a constant array could be eventually used.
8561  bool hasRuntimeEvaluationCaptureSize = false;
8562  for (llvm::Value *S : Sizes)
8563  if (!isa<llvm::Constant>(S)) {
8564  hasRuntimeEvaluationCaptureSize = true;
8565  break;
8566  }
8567 
8568  llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
8569  QualType PointerArrayType = Ctx.getConstantArrayType(
8570  Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
8571  /*IndexTypeQuals=*/0);
8572 
8573  Info.BasePointersArray =
8574  CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
8575  Info.PointersArray =
8576  CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
8577 
8578  // If we don't have any VLA types or other types that require runtime
8579  // evaluation, we can use a constant array for the map sizes, otherwise we
8580  // need to fill up the arrays as we do for the pointers.
8581  QualType Int64Ty =
8582  Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
8583  if (hasRuntimeEvaluationCaptureSize) {
8584  QualType SizeArrayType = Ctx.getConstantArrayType(
8585  Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
8586  /*IndexTypeQuals=*/0);
8587  Info.SizesArray =
8588  CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
8589  } else {
8590  // We expect all the sizes to be constant, so we collect them to create
8591  // a constant array.
8593  for (llvm::Value *S : Sizes)
8594  ConstSizes.push_back(cast<llvm::Constant>(S));
8595 
8596  auto *SizesArrayInit = llvm::ConstantArray::get(
8597  llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
8598  std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
8599  auto *SizesArrayGbl = new llvm::GlobalVariable(
8600  CGM.getModule(), SizesArrayInit->getType(),
8601  /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8602  SizesArrayInit, Name);
8603  SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8604  Info.SizesArray = SizesArrayGbl;
8605  }
8606 
8607  // The map types are always constant so we don't need to generate code to
8608  // fill arrays. Instead, we create an array constant.
8609  SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0);
8610  llvm::copy(MapTypes, Mapping.begin());
8611  llvm::Constant *MapTypesArrayInit =
8612  llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
8613  std::string MaptypesName =
8614  CGM.getOpenMPRuntime().getName({"offload_maptypes"});
8615  auto *MapTypesArrayGbl = new llvm::GlobalVariable(
8616  CGM.getModule(), MapTypesArrayInit->getType(),
8617  /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8618  MapTypesArrayInit, MaptypesName);
8619  MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8620  Info.MapTypesArray = MapTypesArrayGbl;
8621 
8622  for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
8623  llvm::Value *BPVal = *BasePointers[I];
8625  llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8626  Info.BasePointersArray, 0, I);
8628  BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
8629  Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8630  CGF.Builder.CreateStore(BPVal, BPAddr);
8631 
8632  if (Info.requiresDevicePointerInfo())
8633  if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl())
8634  Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
8635 
8636  llvm::Value *PVal = Pointers[I];
8638  llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8639  Info.PointersArray, 0, I);
8641  P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
8642  Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8643  CGF.Builder.CreateStore(PVal, PAddr);
8644 
8645  if (hasRuntimeEvaluationCaptureSize) {
8647  llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8648  Info.SizesArray,
8649  /*Idx0=*/0,
8650  /*Idx1=*/I);
8651  Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
8652  CGF.Builder.CreateStore(
8653  CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true),
8654  SAddr);
8655  }
8656  }
8657  }
8658 }
8659 
8660 /// Emit the arguments to be passed to the runtime library based on the
8661 /// arrays of pointers, sizes and map types.
8663  CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
8664  llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
8665  llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
8666  CodeGenModule &CGM = CGF.CGM;
8667  if (Info.NumberOfPtrs) {
8668  BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8669  llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8670  Info.BasePointersArray,
8671  /*Idx0=*/0, /*Idx1=*/0);
8672  PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8673  llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8674  Info.PointersArray,
8675  /*Idx0=*/0,
8676  /*Idx1=*/0);
8677  SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8678  llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
8679  /*Idx0=*/0, /*Idx1=*/0);
8680  MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8681  llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8682  Info.MapTypesArray,
8683  /*Idx0=*/0,
8684  /*Idx1=*/0);
8685  } else {
8686  BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8687  PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8688  SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8689  MapTypesArrayArg =
8690  llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8691  }
8692 }
8693 
8694 /// Check for inner distribute directive.
8695 static const OMPExecutableDirective *
8697  const auto *CS = D.getInnermostCapturedStmt();
8698  const auto *Body =
8699  CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8700  const Stmt *ChildStmt =
8702 
8703  if (const auto *NestedDir =
8704  dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8705  OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8706  switch (D.getDirectiveKind()) {
8707  case OMPD_target:
8708  if (isOpenMPDistributeDirective(DKind))
8709  return NestedDir;
8710  if (DKind == OMPD_teams) {
8711  Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8712  /*IgnoreCaptured=*/true);
8713  if (!Body)
8714  return nullptr;
8715  ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8716  if (const auto *NND =
8717  dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8718  DKind = NND->getDirectiveKind();
8719  if (isOpenMPDistributeDirective(DKind))
8720  return NND;
8721  }
8722  }
8723  return nullptr;
8724  case OMPD_target_teams:
8725  if (isOpenMPDistributeDirective(DKind))
8726  return NestedDir;
8727  return nullptr;
8728  case OMPD_target_parallel:
8729  case OMPD_target_simd:
8730  case OMPD_target_parallel_for:
8731  case OMPD_target_parallel_for_simd:
8732  return nullptr;
8733  case OMPD_target_teams_distribute:
8734  case OMPD_target_teams_distribute_simd:
8735  case OMPD_target_teams_distribute_parallel_for:
8736  case OMPD_target_teams_distribute_parallel_for_simd:
8737  case OMPD_parallel:
8738  case OMPD_for:
8739  case OMPD_parallel_for:
8740  case OMPD_parallel_master:
8741  case OMPD_parallel_sections:
8742  case OMPD_for_simd:
8743  case OMPD_parallel_for_simd:
8744  case OMPD_cancel:
8745  case OMPD_cancellation_point:
8746  case OMPD_ordered:
8747  case OMPD_threadprivate:
8748  case OMPD_allocate:
8749  case OMPD_task:
8750  case OMPD_simd:
8751  case OMPD_sections:
8752  case OMPD_section:
8753  case OMPD_single:
8754  case OMPD_master:
8755  case OMPD_critical:
8756  case OMPD_taskyield:
8757  case OMPD_barrier:
8758  case OMPD_taskwait:
8759  case OMPD_taskgroup:
8760  case OMPD_atomic:
8761  case OMPD_flush:
8762  case OMPD_teams:
8763  case OMPD_target_data:
8764  case OMPD_target_exit_data:
8765  case OMPD_target_enter_data:
8766  case OMPD_distribute:
8767  case OMPD_distribute_simd:
8768  case OMPD_distribute_parallel_for:
8769  case OMPD_distribute_parallel_for_simd:
8770  case OMPD_teams_distribute:
8771  case OMPD_teams_distribute_simd:
8772  case OMPD_teams_distribute_parallel_for:
8773  case OMPD_teams_distribute_parallel_for_simd:
8774  case OMPD_target_update:
8775  case OMPD_declare_simd:
8776  case OMPD_declare_variant:
8777  case OMPD_declare_target:
8778  case OMPD_end_declare_target:
8779  case OMPD_declare_reduction:
8780  case OMPD_declare_mapper:
8781  case OMPD_taskloop:
8782  case OMPD_taskloop_simd:
8783  case OMPD_master_taskloop:
8784  case OMPD_master_taskloop_simd:
8785  case OMPD_parallel_master_taskloop:
8786  case OMPD_parallel_master_taskloop_simd:
8787  case OMPD_requires:
8788  case OMPD_unknown:
8789  llvm_unreachable("Unexpected directive.");
8790  }
8791  }
8792 
8793  return nullptr;
8794 }
8795 
8796 /// Emit the user-defined mapper function. The code generation follows the
8797 /// pattern in the example below.
8798 /// \code
8799 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
8800 /// void *base, void *begin,
8801 /// int64_t size, int64_t type) {
8802 /// // Allocate space for an array section first.
8803 /// if (size > 1 && !maptype.IsDelete)
8804 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8805 /// size*sizeof(Ty), clearToFrom(type));
8806 /// // Map members.
8807 /// for (unsigned i = 0; i < size; i++) {
8808 /// // For each component specified by this mapper:
8809 /// for (auto c : all_components) {
8810 /// if (c.hasMapper())
8811 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
8812 /// c.arg_type);
8813 /// else
8814 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
8815 /// c.arg_begin, c.arg_size, c.arg_type);
8816 /// }
8817 /// }
8818 /// // Delete the array section.
8819 /// if (size > 1 && maptype.IsDelete)
8820 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8821 /// size*sizeof(Ty), clearToFrom(type));
8822 /// }
8823 /// \endcode
8825  CodeGenFunction *CGF) {
8826  if (UDMMap.count(D) > 0)
8827  return;
8828  ASTContext &C = CGM.getContext();
8829  QualType Ty = D->getType();
8830  QualType PtrTy = C.getPointerType(Ty).withRestrict();
8831  QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
8832  auto *MapperVarDecl =
8833  cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
8834  SourceLocation Loc = D->getLocation();
8835  CharUnits ElementSize = C.getTypeSizeInChars(Ty);
8836 
8837  // Prepare mapper function arguments and attributes.
8838  ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
8840  ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
8842  ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
8844  ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
8846  ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
8848  FunctionArgList Args;
8849  Args.push_back(&HandleArg);
8850  Args.push_back(&BaseArg);
8851  Args.push_back(&BeginArg);
8852  Args.push_back(&SizeArg);
8853  Args.push_back(&TypeArg);
8854  const CGFunctionInfo &FnInfo =
8856  llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
8857  SmallString<64> TyStr;
8858  llvm::raw_svector_ostream Out(TyStr);
8860  std::string Name = getName({"omp_mapper", TyStr, D->getName()});
8862  Name, &CGM.getModule());
8864  Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
8865  // Start the mapper function code generation.
8866  CodeGenFunction MapperCGF(CGM);
8867  MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
8868  // Compute the starting and end addreses of array elements.
8869  llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
8870  MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
8871  C.getPointerType(Int64Ty), Loc);
8872  llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
8873  MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(),
8875  llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size);
8876  llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
8877  MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
8878  C.getPointerType(Int64Ty), Loc);
8879  // Prepare common arguments for array initiation and deletion.
8880  llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
8881  MapperCGF.GetAddrOfLocalVar(&HandleArg),
8882  /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
8883  llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
8884  MapperCGF.GetAddrOfLocalVar(&BaseArg),
8885  /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
8886  llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
8887  MapperCGF.GetAddrOfLocalVar(&BeginArg),
8888  /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
8889 
8890  // Emit array initiation if this is an array section and \p MapType indicates
8891  // that memory allocation is required.
8892  llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
8893  emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
8894  ElementSize, HeadBB, /*IsInit=*/true);
8895 
8896  // Emit a for loop to iterate through SizeArg of elements and map all of them.
8897 
8898  // Emit the loop header block.
8899  MapperCGF.EmitBlock(HeadBB);
8900  llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
8901  llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
8902  // Evaluate whether the initial condition is satisfied.
8903  llvm::Value *IsEmpty =
8904  MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
8905  MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
8906  llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
8907 
8908  // Emit the loop body block.
8909  MapperCGF.EmitBlock(BodyBB);
8910  llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
8911  PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
8912  PtrPHI->addIncoming(PtrBegin, EntryBB);
8913  Address PtrCurrent =
8914  Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
8915  .getAlignment()
8916  .alignmentOfArrayElement(ElementSize));
8917  // Privatize the declared variable of mapper to be the current array element.
8919  Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() {
8920  return MapperCGF
8921  .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>())
8922  .getAddress(MapperCGF);
8923  });
8924  (void)Scope.Privatize();
8925 
8926  // Get map clause information. Fill up the arrays with all mapped variables.
8931  MappableExprsHandler MEHandler(*D, MapperCGF);
8932  MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes);
8933 
8934  // Call the runtime API __tgt_mapper_num_components to get the number of
8935  // pre-existing components.
8936  llvm::Value *OffloadingArgs[] = {Handle};
8937  llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
8939  llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
8940  PreviousSize,
8941  MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
8942 
8943  // Fill up the runtime mapper handle for all components.
8944  for (unsigned I = 0; I < BasePointers.size(); ++I) {
8945  llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
8946  *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
8947  llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
8948  Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
8949  llvm::Value *CurSizeArg = Sizes[I];
8950 
8951  // Extract the MEMBER_OF field from the map type.
8952  llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member");
8953  MapperCGF.EmitBlock(MemberBB);
8954  llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]);
8955  llvm::Value *Member = MapperCGF.Builder.CreateAnd(
8956  OriMapType,
8957  MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF));
8958  llvm::BasicBlock *MemberCombineBB =
8959  MapperCGF.createBasicBlock("omp.member.combine");
8960  llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type");
8961  llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member);
8962  MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB);
8963  // Add the number of pre-existing components to the MEMBER_OF field if it
8964  // is valid.
8965  MapperCGF.EmitBlock(MemberCombineBB);
8966  llvm::Value *CombinedMember =
8967  MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
8968  // Do nothing if it is not a member of previous components.
8969  MapperCGF.EmitBlock(TypeBB);
8970  llvm::PHINode *MemberMapType =
8971  MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype");
8972  MemberMapType->addIncoming(OriMapType, MemberBB);
8973  MemberMapType->addIncoming(CombinedMember, MemberCombineBB);
8974 
8975  // Combine the map type inherited from user-defined mapper with that
8976  // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
8977  // bits of the \a MapType, which is the input argument of the mapper
8978  // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
8979  // bits of MemberMapType.
8980  // [OpenMP 5.0], 1.2.6. map-type decay.
8981  // | alloc | to | from | tofrom | release | delete
8982  // ----------------------------------------------------------
8983  // alloc | alloc | alloc | alloc | alloc | release | delete
8984  // to | alloc | to | alloc | to | release | delete
8985  // from | alloc | alloc | from | from | release | delete
8986  // tofrom | alloc | to | from | tofrom | release | delete
8987  llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
8988  MapType,
8989  MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
8990  MappableExprsHandler::OMP_MAP_FROM));
8991  llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
8992  llvm::BasicBlock *AllocElseBB =
8993  MapperCGF.createBasicBlock("omp.type.alloc.else");
8994  llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
8995  llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
8996  llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
8997  llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
8998  llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
8999  MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9000  // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9001  MapperCGF.EmitBlock(AllocBB);
9002  llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9003  MemberMapType,
9004  MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9005  MappableExprsHandler::OMP_MAP_FROM)));
9006  MapperCGF.Builder.CreateBr(EndBB);
9007  MapperCGF.EmitBlock(AllocElseBB);
9008  llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9009  LeftToFrom,
9010  MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
9011  MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9012  // In case of to, clear OMP_MAP_FROM.
9013  MapperCGF.EmitBlock(ToBB);
9014  llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9015  MemberMapType,
9016  MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
9017  MapperCGF.Builder.CreateBr(EndBB);
9018  MapperCGF.EmitBlock(ToElseBB);
9019  llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9020  LeftToFrom,
9021  MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
9022  MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9023  // In case of from, clear OMP_MAP_TO.
9024  MapperCGF.EmitBlock(FromBB);
9025  llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9026  MemberMapType,
9027  MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
9028  // In case of tofrom, do nothing.
9029  MapperCGF.EmitBlock(EndBB);
9030  llvm::PHINode *CurMapType =
9031  MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9032  CurMapType->addIncoming(AllocMapType, AllocBB);
9033  CurMapType->addIncoming(ToMapType, ToBB);
9034  CurMapType->addIncoming(FromMapType, FromBB);
9035  CurMapType->addIncoming(MemberMapType, ToElseBB);
9036 
9037  // TODO: call the corresponding mapper function if a user-defined mapper is
9038  // associated with this map clause.
9039  // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9040  // data structure.
9041  llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
9042  CurSizeArg, CurMapType};
9043  MapperCGF.EmitRuntimeCall(
9045  OffloadingArgs);
9046  }
9047 
9048  // Update the pointer to point to the next element that needs to be mapped,
9049  // and check whether we have mapped all elements.
9050  llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9051  PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9052  PtrPHI->addIncoming(PtrNext, BodyBB);
9053  llvm::Value *IsDone =
9054  MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9055  llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9056  MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9057 
9058  MapperCGF.EmitBlock(ExitBB);
9059  // Emit array deletion if this is an array section and \p MapType indicates
9060  // that deletion is required.
9061  emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9062  ElementSize, DoneBB, /*IsInit=*/false);
9063 
9064  // Emit the function exit block.
9065  MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9066  MapperCGF.FinishFunction();
9067  UDMMap.try_emplace(D, Fn);
9068  if (CGF) {
9069  auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9070  Decls.second.push_back(D);
9071  }
9072 }
9073 
9074 /// Emit the array initialization or deletion portion for user-defined mapper
9075 /// code generation. First, it evaluates whether an array section is mapped and
9076 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9077 /// true, and \a MapType indicates to not delete this array, array
9078 /// initialization code is generated. If \a IsInit is false, and \a MapType
9079 /// indicates to not this array, array deletion code is generated.
9081  CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9082  llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9083  CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) {
9084  StringRef Prefix = IsInit ? ".init" : ".del";
9085 
9086  // Evaluate if this is an array section.
9087  llvm::BasicBlock *IsDeleteBB =
9088  MapperCGF.createBasicBlock("omp.array" + Prefix + ".evaldelete");
9089  llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.array" + Prefix);
9090  llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE(
9091  Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9092  MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB);
9093 
9094  // Evaluate if we are going to delete this section.
9095  MapperCGF.EmitBlock(IsDeleteBB);
9096  llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9097  MapType,
9098  MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
9099  llvm::Value *DeleteCond;
9100  if (IsInit) {
9101  DeleteCond = MapperCGF.Builder.CreateIsNull(
9102  DeleteBit, "omp.array" + Prefix + ".delete");
9103  } else {
9104  DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9105  DeleteBit, "omp.array" + Prefix + ".delete");
9106  }
9107  MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB);
9108 
9109  MapperCGF.EmitBlock(BodyBB);
9110  // Get the array size by multiplying element size and element number (i.e., \p
9111  // Size).
9112  llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9113  Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9114  // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9115  // memory allocation/deletion purpose only.
9116  llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9117  MapType,
9118  MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9119  MappableExprsHandler::OMP_MAP_FROM)));
9120  // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9121  // data structure.
9122  llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg};
9123  MapperCGF.EmitRuntimeCall(
9125 }
9126 
9128  CodeGenFunction &CGF, const OMPExecutableDirective &D,
9129  llvm::Value *DeviceID,
9130  llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9131  const OMPLoopDirective &D)>
9132  SizeEmitter) {
9134  const OMPExecutableDirective *TD = &D;
9135  // Get nested teams distribute kind directive, if any.
9138  if (!TD)
9139  return;
9140  const auto *LD = cast<OMPLoopDirective>(TD);
9141  auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF,
9142  PrePostActionTy &) {
9143  if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
9144  llvm::Value *Args[] = {DeviceID, NumIterations};
9145  CGF.EmitRuntimeCall(
9147  }
9148  };
9149  emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
9150 }
9151 
9153  CodeGenFunction &CGF, const OMPExecutableDirective &D,
9154  llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9155  const Expr *Device,
9156  llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9157  const OMPLoopDirective &D)>
9158  SizeEmitter) {
9159  if (!CGF.HaveInsertPoint())
9160  return;
9161 
9162  assert(OutlinedFn && "Invalid outlined function!");
9163 
9164  const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>();
9166  const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9167  auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9168  PrePostActionTy &) {
9169  CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9170  };
9171  emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9172 
9174  llvm::Value *MapTypesArray = nullptr;
9175  // Fill up the pointer arrays and transfer execution to the device.
9176  auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
9177  &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars,
9178  SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9179  // On top of the arrays that were filled up, the target offloading call
9180  // takes as arguments the device id as well as the host pointer. The host
9181  // pointer is used by the runtime library to identify the current target
9182  // region, so it only has to be unique and not necessarily point to
9183  // anything. It could be the pointer to the outlined function that
9184  // implements the target region, but we aren't using that so that the
9185  // compiler doesn't need to keep that, and could therefore inline the host
9186  // function if proven worthwhile during optimization.
9187 
9188  // From this point on, we need to have an ID of the target region defined.
9189  assert(OutlinedFnID && "Invalid outlined function ID!");
9190 
9191  // Emit device ID if any.
9192  llvm::Value *DeviceID;
9193  if (Device) {
9194  DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9195  CGF.Int64Ty, /*isSigned=*/true);
9196  } else {
9197  DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9198  }
9199 
9200  // Emit the number of elements in the offloading arrays.
9201  llvm::Value *PointerNum =
9202  CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
9203 
9204  // Return value of the runtime offloading call.
9205  llvm::Value *Return;
9206 
9207  llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
9208  llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
9209 
9210  // Emit tripcount for the target loop-based directive.
9211  emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
9212 
9213  bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
9214  // The target region is an outlined function launched by the runtime
9215  // via calls __tgt_target() or __tgt_target_teams().
9216  //
9217  // __tgt_target() launches a target region with one team and one thread,
9218  // executing a serial region. This master thread may in turn launch
9219  // more threads within its team upon encountering a parallel region,
9220  // however, no additional teams can be launched on the device.
9221  //
9222  // __tgt_target_teams() launches a target region with one or more teams,
9223  // each with one or more threads. This call is required for target
9224  // constructs such as:
9225  // 'target teams'
9226  // 'target' / 'teams'
9227  // 'target teams distribute parallel for'
9228  // 'target parallel'
9229  // and so on.
9230  //
9231  // Note that on the host and CPU targets, the runtime implementation of
9232  // these calls simply call the outlined function without forking threads.
9233  // The outlined functions themselves have runtime calls to
9234  // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
9235  // the compiler in emitTeamsCall() and emitParallelCall().
9236  //
9237  // In contrast, on the NVPTX target, the implementation of
9238  // __tgt_target_teams() launches a GPU kernel with the requested number
9239  // of teams and threads so no additional calls to the runtime are required.
9240  if (NumTeams) {
9241  // If we have NumTeams defined this means that we have an enclosed teams
9242  // region. Therefore we also expect to have NumThreads defined. These two
9243  // values should be defined in the presence of a teams directive,
9244  // regardless of having any clauses associated. If the user is using teams
9245  // but no clauses, these two values will be the default that should be
9246  // passed to the runtime library - a 32-bit integer with the value zero.
9247  assert(NumThreads && "Thread limit expression should be available along "
9248  "with number of teams.");
9249  llvm::Value *OffloadingArgs[] = {DeviceID,
9250  OutlinedFnID,
9251  PointerNum,
9252  InputInfo.BasePointersArray.getPointer(),
9253  InputInfo.PointersArray.getPointer(),
9254  InputInfo.SizesArray.getPointer(),
9255  MapTypesArray,
9256  NumTeams,
9257  NumThreads};
9258  Return = CGF.EmitRuntimeCall(
9261  OffloadingArgs);
9262  } else {
9263  llvm::Value *OffloadingArgs[] = {DeviceID,
9264  OutlinedFnID,
9265  PointerNum,
9266  InputInfo.BasePointersArray.getPointer(),
9267  InputInfo.PointersArray.getPointer(),
9268  InputInfo.SizesArray.getPointer(),
9269  MapTypesArray};
9270  Return = CGF.EmitRuntimeCall(
9272  : OMPRTL__tgt_target),
9273  OffloadingArgs);
9274  }
9275 
9276  // Check the error code and execute the host version if required.
9277  llvm::BasicBlock *OffloadFailedBlock =
9278  CGF.createBasicBlock("omp_offload.failed");
9279  llvm::BasicBlock *OffloadContBlock =
9280  CGF.createBasicBlock("omp_offload.cont");
9281  llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
9282  CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
9283 
9284  CGF.EmitBlock(OffloadFailedBlock);
9285  if (RequiresOuterTask) {
9286  CapturedVars.clear();
9287  CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9288  }
9289  emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9290  CGF.EmitBranch(OffloadContBlock);
9291 
9292  CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
9293  };
9294 
9295  // Notify that the host version must be executed.
9296  auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
9297  RequiresOuterTask](CodeGenFunction &CGF,
9298  PrePostActionTy &) {
9299  if (RequiresOuterTask) {
9300  CapturedVars.clear();
9301  CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9302  }
9303  emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9304  };
9305 
9306  auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
9307  &CapturedVars, RequiresOuterTask,
9308  &CS](CodeGenFunction &CGF, PrePostActionTy &) {
9309  // Fill up the arrays with all the captured variables.
9314 
9315  // Get mappable expression information.
9316  MappableExprsHandler MEHandler(D, CGF);
9317  llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9318 
9319  auto RI = CS.getCapturedRecordDecl()->field_begin();
9320  auto CV = CapturedVars.begin();
9321  for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
9322  CE = CS.capture_end();
9323  CI != CE; ++CI, ++RI, ++CV) {
9328  MappableExprsHandler::StructRangeInfoTy PartialStruct;
9329 
9330  // VLA sizes are passed to the outlined region by copy and do not have map
9331  // information associated.
9332  if (CI->capturesVariableArrayType()) {
9333  CurBasePointers.push_back(*CV);
9334  CurPointers.push_back(*CV);
9335  CurSizes.push_back(CGF.Builder.CreateIntCast(
9336  CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9337  // Copy to the device as an argument. No need to retrieve it.
9338  CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
9339  MappableExprsHandler::OMP_MAP_TARGET_PARAM |
9340  MappableExprsHandler::OMP_MAP_IMPLICIT);
9341  } else {
9342  // If we have any information in the map clause, we use it, otherwise we
9343  // just do a default mapping.
9344  MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
9345  CurSizes, CurMapTypes, PartialStruct);
9346  if (CurBasePointers.empty())
9347  MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
9348  CurPointers, CurSizes, CurMapTypes);
9349  // Generate correct mapping for variables captured by reference in
9350  // lambdas.
9351  if (CI->capturesVariable())
9352  MEHandler.generateInfoForLambdaCaptures(
9353  CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes,
9354  CurMapTypes, LambdaPointers);
9355  }
9356  // We expect to have at least an element of information for this capture.
9357  assert(!CurBasePointers.empty() &&
9358  "Non-existing map pointer for capture!");
9359  assert(CurBasePointers.size() == CurPointers.size() &&
9360  CurBasePointers.size() == CurSizes.size() &&
9361  CurBasePointers.size() == CurMapTypes.size() &&
9362  "Inconsistent map information sizes!");
9363 
9364  // If there is an entry in PartialStruct it means we have a struct with
9365  // individual members mapped. Emit an extra combined entry.
9366  if (PartialStruct.Base.isValid())
9367  MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes,
9368  CurMapTypes, PartialStruct);
9369 
9370  // We need to append the results of this capture to what we already have.
9371  BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
9372  Pointers.append(CurPointers.begin(), CurPointers.end());
9373  Sizes.append(CurSizes.begin(), CurSizes.end());
9374  MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
9375  }
9376  // Adjust MEMBER_OF flags for the lambdas captures.
9377  MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers,
9378  Pointers, MapTypes);
9379  // Map other list items in the map clause which are not captured variables
9380  // but "declare target link" global variables.
9381  MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes,
9382  MapTypes);
9383 
9384  TargetDataInfo Info;
9385  // Fill up the arrays and create the arguments.
9386  emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9388  Info.PointersArray, Info.SizesArray,
9389  Info.MapTypesArray, Info);
9390  InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9391  InputInfo.BasePointersArray =
9393  InputInfo.PointersArray =
9395  InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
9396  MapTypesArray = Info.MapTypesArray;
9397  if (RequiresOuterTask)
9398  CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9399  else
9400  emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9401  };
9402 
9403  auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
9404  CodeGenFunction &CGF, PrePostActionTy &) {
9405  if (RequiresOuterTask) {
9407  CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9408  } else {
9409  emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9410  }
9411  };
9412 
9413  // If we have a target function ID it means that we need to support
9414  // offloading, otherwise, just execute on the host. We need to execute on host
9415  // regardless of the conditional in the if clause if, e.g., the user do not
9416  // specify target triples.
9417  if (OutlinedFnID) {
9418  if (IfCond) {
9419  emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
9420  } else {
9421  RegionCodeGenTy ThenRCG(TargetThenGen);
9422  ThenRCG(CGF);
9423  }
9424  } else {
9425  RegionCodeGenTy ElseRCG(TargetElseGen);
9426  ElseRCG(CGF);
9427  }
9428 }
9429 
9431  StringRef ParentName) {
9432  if (!S)
9433  return;
9434 
9435  // Codegen OMP target directives that offload compute to the device.
9436  bool RequiresDeviceCodegen =
9437  isa<OMPExecutableDirective>(S) &&
9439  cast<OMPExecutableDirective>(S)->getDirectiveKind());
9440 
9441  if (RequiresDeviceCodegen) {
9442  const auto &E = *cast<OMPExecutableDirective>(S);
9443  unsigned DeviceID;
9444  unsigned FileID;
9445  unsigned Line;
9447  FileID, Line);
9448 
9449  // Is this a target region that should not be emitted as an entry point? If
9450  // so just signal we are done with this target region.
9452  ParentName, Line))
9453  return;
9454 
9455  switch (E.getDirectiveKind()) {
9456  case OMPD_target:
9458  cast<OMPTargetDirective>(E));
9459  break;
9460  case OMPD_target_parallel:
9462  CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9463  break;
9464  case OMPD_target_teams:
9466  CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9467  break;
9468  case OMPD_target_teams_distribute:
9470  CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9471  break;
9472  case OMPD_target_teams_distribute_simd:
9474  CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9475  break;
9476  case OMPD_target_parallel_for:
9478  CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9479  break;
9480  case OMPD_target_parallel_for_simd:
9482  CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9483  break;
9484  case OMPD_target_simd:
9486  CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9487  break;
9488  case OMPD_target_teams_distribute_parallel_for:
9490  CGM, ParentName,
9491  cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9492  break;
9493  case OMPD_target_teams_distribute_parallel_for_simd:
9496  CGM, ParentName,
9497  cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9498  break;
9499  case OMPD_parallel:
9500  case OMPD_for:
9501  case OMPD_parallel_for:
9502  case OMPD_parallel_master:
9503  case OMPD_parallel_sections:
9504  case OMPD_for_simd:
9505  case OMPD_parallel_for_simd:
9506  case OMPD_cancel:
9507  case OMPD_cancellation_point:
9508  case OMPD_ordered:
9509  case OMPD_threadprivate:
9510  case OMPD_allocate:
9511  case OMPD_task:
9512  case OMPD_simd:
9513  case OMPD_sections:
9514  case OMPD_section:
9515  case OMPD_single:
9516  case OMPD_master:
9517  case OMPD_critical:
9518  case OMPD_taskyield:
9519  case OMPD_barrier:
9520  case OMPD_taskwait:
9521  case OMPD_taskgroup:
9522  case OMPD_atomic:
9523  case OMPD_flush:
9524  case OMPD_teams:
9525  case OMPD_target_data:
9526  case OMPD_target_exit_data:
9527  case OMPD_target_enter_data:
9528  case OMPD_distribute:
9529  case OMPD_distribute_simd:
9530  case OMPD_distribute_parallel_for:
9531  case OMPD_distribute_parallel_for_simd:
9532  case OMPD_teams_distribute:
9533  case OMPD_teams_distribute_simd:
9534  case OMPD_teams_distribute_parallel_for:
9535  case OMPD_teams_distribute_parallel_for_simd:
9536  case OMPD_target_update:
9537  case OMPD_declare_simd:
9538  case OMPD_declare_variant:
9539  case OMPD_declare_target:
9540  case OMPD_end_declare_target:
9541  case OMPD_declare_reduction:
9542  case OMPD_declare_mapper:
9543  case OMPD_taskloop:
9544  case OMPD_taskloop_simd:
9545  case OMPD_master_taskloop:
9546  case OMPD_master_taskloop_simd:
9547  case OMPD_parallel_master_taskloop:
9548  case OMPD_parallel_master_taskloop_simd:
9549  case OMPD_requires:
9550  case OMPD_unknown:
9551  llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9552  }
9553  return;
9554  }
9555 
9556  if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9557  if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9558  return;
9559 
9561  E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName);
9562  return;
9563  }
9564 
9565  // If this is a lambda function, look into its body.
9566  if (const auto *L = dyn_cast<LambdaExpr>(S))
9567  S = L->getBody();
9568 
9569  // Keep looking for target regions recursively.
9570  for (const Stmt *II : S->children())
9571  scanForTargetRegionsFunctions(II, ParentName);
9572 }
9573 
9575  // If emitting code for the host, we do not process FD here. Instead we do
9576  // the normal code generation.
9577  if (!CGM.getLangOpts().OpenMPIsDevice) {
9578  if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) {
9580  OMPDeclareTargetDeclAttr::getDeviceType(FD);
9581  // Do not emit device_type(nohost) functions for the host.
9582  if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
9583  return true;
9584  }
9585  return false;
9586  }
9587 
9588  const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9589  // Try to detect target regions in the function.
9590  if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
9591  StringRef Name = CGM.getMangledName(GD);
9592  scanForTargetRegionsFunctions(FD->getBody(), Name);
9594  OMPDeclareTargetDeclAttr::getDeviceType(FD);
9595  // Do not emit device_type(nohost) functions for the host.
9596  if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host)
9597  return true;
9598  }
9599 
9600  // Do not to emit function if it is not marked as declare target.
9601  return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9602  AlreadyEmittedTargetDecls.count(VD) == 0;
9603 }
9604 
9606  if (!CGM.getLangOpts().OpenMPIsDevice)
9607  return false;
9608 
9609  // Check if there are Ctors/Dtors in this declaration and look for target
9610  // regions in it. We use the complete variant to produce the kernel name
9611  // mangling.
9612  QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9613  if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9614  for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9615  StringRef ParentName =
9617  scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9618  }
9619  if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9620  StringRef ParentName =
9622  scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9623  }
9624  }
9625 
9626  // Do not to emit variable if it is not marked as declare target.
9628  OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9629  cast<VarDecl>(GD.getDecl()));
9630  if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
9631  (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9633  DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9634  return true;
9635  }
9636  return false;
9637 }
9638 
9639 llvm::Constant *
9641  const VarDecl *VD) {
9642  assert(VD->getType().isConstant(CGM.getContext()) &&
9643  "Expected constant variable.");
9644  StringRef VarName;
9645  llvm::Constant *Addr;
9646  llvm::GlobalValue::LinkageTypes Linkage;
9647  QualType Ty = VD->getType();
9648  SmallString<128> Buffer;
9649  {
9650  unsigned DeviceID;
9651  unsigned FileID;
9652  unsigned Line;
9654  FileID, Line);
9655  llvm::raw_svector_ostream OS(Buffer);
9656  OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
9657  << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
9658  VarName = OS.str();
9659  }
9661  Addr =
9664  cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
9665  CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
9666  CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
9668  VarName, Addr, VarSize,
9670  return Addr;
9671 }
9672 
9674  llvm::Constant *Addr) {
9675  if (CGM.getLangOpts().OMPTargetTriples.empty() &&
9676  !CGM.getLangOpts().OpenMPIsDevice)
9677  return;
9679  OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9680  if (!Res) {
9681  if (CGM.getLangOpts().OpenMPIsDevice) {
9682  // Register non-target variables being emitted in device code (debug info
9683  // may cause this).
9684  StringRef VarName = CGM.getMangledName(VD);
9685  EmittedNonTargetVariables.try_emplace(VarName, Addr);
9686  }
9687  return;
9688  }
9689  // Register declare target variables.
9691  StringRef VarName;
9692  CharUnits VarSize;
9693  llvm::GlobalValue::LinkageTypes Linkage;
9694 
9695  if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9698  VarName = CGM.getMangledName(VD);
9700  VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
9701  assert(!VarSize.isZero() && "Expected non-zero size of the variable");
9702  } else {
9703  VarSize = CharUnits::Zero();
9704  }
9705  Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
9706  // Temp solution to prevent optimizations of the internal variables.
9707  if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
9708  std::string RefName = getName({VarName, "ref"});
9709  if (!CGM.GetGlobalValue(RefName)) {
9710  llvm::Constant *AddrRef =
9711  getOrCreateInternalVariable(Addr->getType(), RefName);
9712  auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
9713  GVAddrRef->setConstant(/*Val=*/true);
9714  GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
9715  GVAddrRef->setInitializer(Addr);
9716  CGM.addCompilerUsedGlobal(GVAddrRef);
9717  }
9718  }
9719  } else {
9720  assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
9721  (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9723  "Declare target attribute must link or to with unified memory.");
9724  if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
9726  else
9728 
9729  if (CGM.getLangOpts().OpenMPIsDevice) {
9730  VarName = Addr->getName();
9731  Addr = nullptr;
9732  } else {
9733  VarName = getAddrOfDeclareTargetVar(VD).getName();
9734  Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
9735  }
9736  VarSize = CGM.getPointerSize();
9737  Linkage = llvm::GlobalValue::WeakAnyLinkage;
9738  }
9739 
9741  VarName, Addr, VarSize, Flags, Linkage);
9742 }
9743 
9745  if (isa<FunctionDecl>(GD.getDecl()) ||
9746  isa<OMPDeclareReductionDecl>(GD.getDecl()))
9747  return emitTargetFunctions(GD);
9748 
9749  return emitTargetGlobalVariable(GD);
9750 }
9751 
9753  for (const VarDecl *VD : DeferredGlobalVariables) {
9755  OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9756  if (!Res)
9757  continue;
9758  if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9760  CGM.EmitGlobal(VD);
9761  } else {
9762  assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
9763  (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9765  "Expected link clause or to clause with unified memory.");
9766  (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
9767  }
9768  }
9769 }
9770 
9772  CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
9774  " Expected target-based directive.");
9775 }
9776 
9778  const OMPRequiresDecl *D) {
9779  for (const OMPClause *Clause : D->clauselists()) {
9780  if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
9782  break;
9783  }
9784  }
9785 }
9786 
9788  LangAS &AS) {
9789  if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
9790  return false;
9791  const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
9792  switch(A->getAllocatorType()) {
9793  case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
9794  // Not supported, fallback to the default mem space.
9795  case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
9796  case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
9797  case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
9798  case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
9799  case OMPAllocateDeclAttr::OMPThreadMemAlloc:
9800  case OMPAllocateDeclAttr::OMPConstMemAlloc:
9801  case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
9802  AS = LangAS::Default;
9803  return true;
9804  case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
9805  llvm_unreachable("Expected predefined allocator for the variables with the "
9806  "static storage.");
9807  }
9808  return false;
9809 }
9810 
9813 }
9814 
9816  CodeGenModule &CGM)
9817  : CGM(CGM) {
9818  if (CGM.getLangOpts().OpenMPIsDevice) {
9819  SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
9820  CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
9821  }
9822 }
9823 
9825  if (CGM.getLangOpts().OpenMPIsDevice)
9826  CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
9827 }
9828 
9830  if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
9831  return true;
9832 
9833  const auto *D = cast<FunctionDecl>(GD.getDecl());
9834  // Do not to emit function if it is marked as declare target as it was already
9835  // emitted.
9836  if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
9837  if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
9838  if (auto *F = dyn_cast_or_null<llvm::Function>(
9839  CGM.GetGlobalValue(CGM.getMangledName(GD))))
9840  return !F->isDeclaration();
9841  return false;
9842  }
9843  return true;
9844  }
9845 
9846  return !AlreadyEmittedTargetDecls.insert(D).second;
9847 }
9848 
9850  // If we don't have entries or if we are emitting code for the device, we
9851  // don't need to do anything.
9852  if (CGM.getLangOpts().OMPTargetTriples.empty() ||
9853  CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
9857  return nullptr;
9858 
9859  // Create and register the function that handles the requires directives.
9860  ASTContext &C = CGM.getContext();
9861 
9862  llvm::Function *RequiresRegFn;
9863  {
9864  CodeGenFunction CGF(CGM);
9865  const auto &FI = CGM.getTypes().arrangeNullaryFunction();
9866  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
9867  std::string ReqName = getName({"omp_offloading", "requires_reg"});
9868  RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI);
9869  CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
9870  OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
9871  // TODO: check for other requires clauses.
9872  // The requires directive takes effect only when a target region is
9873  // present in the compilation unit. Otherwise it is ignored and not
9874  // passed to the runtime. This avoids the runtime from throwing an error
9875  // for mismatching requires clauses across compilation units that don't
9876  // contain at least 1 target region.
9877  assert((HasEmittedTargetRegion ||
9880  "Target or declare target region expected.");
9882  Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
9884  llvm::ConstantInt::get(CGM.Int64Ty, Flags));
9885  CGF.FinishFunction();
9886  }
9887  return RequiresRegFn;
9888 }
9889 
9891  const OMPExecutableDirective &D,
9892  SourceLocation Loc,
9893  llvm::Function *OutlinedFn,
9894  ArrayRef<llvm::Value *> CapturedVars) {
9895  if (!CGF.HaveInsertPoint())
9896  return;
9897 
9898  llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9900 
9901  // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
9902  llvm::Value *Args[] = {
9903  RTLoc,
9904  CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
9905  CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
9907  RealArgs.append(std::begin(Args), std::end(Args));
9908  RealArgs.append(CapturedVars.begin(), CapturedVars.end());
9909 
9910  llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams);
9911  CGF.EmitRuntimeCall(RTLFn, RealArgs);
9912 }
9913 
9915  const Expr *NumTeams,
9916  const Expr *ThreadLimit,
9917  SourceLocation Loc) {
9918  if (!CGF.HaveInsertPoint())
9919  return;
9920 
9921  llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9922 
9923  llvm::Value *NumTeamsVal =
9924  NumTeams
9925  ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
9926  CGF.CGM.Int32Ty, /* isSigned = */ true)
9927  : CGF.Builder.getInt32(0);
9928 
9929  llvm::Value *ThreadLimitVal =
9930  ThreadLimit
9931  ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
9932  CGF.CGM.Int32Ty, /* isSigned = */ true)
9933  : CGF.Builder.getInt32(0);
9934 
9935  // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
9936  llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
9937  ThreadLimitVal};
9939  PushNumTeamsArgs);
9940 }
9941 
9943  CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
9944  const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
9945  if (!CGF.HaveInsertPoint())
9946  return;
9947 
9948  // Action used to replace the default codegen action and turn privatization
9949  // off.
9950  PrePostActionTy NoPrivAction;
9951 
9952  // Generate the code for the opening of the data environment. Capture all the
9953  // arguments of the runtime call by reference because they are used in the
9954  // closing of the region.
9955  auto &&BeginThenGen = [this, &D, Device, &Info,
9956  &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
9957  // Fill up the arrays with all the mapped variables.
9962 
9963  // Get map clause information.
9964  MappableExprsHandler MCHandler(D, CGF);
9965  MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
9966 
9967  // Fill up the arrays and create the arguments.
9968  emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9969 
9970  llvm::Value *BasePointersArrayArg = nullptr;
9971  llvm::Value *PointersArrayArg = nullptr;
9972  llvm::Value *SizesArrayArg = nullptr;
9973  llvm::Value *MapTypesArrayArg = nullptr;
9974  emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
9975  SizesArrayArg, MapTypesArrayArg, Info);
9976 
9977  // Emit device ID if any.
9978  llvm::Value *DeviceID = nullptr;
9979  if (Device) {
9980  DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9981  CGF.Int64Ty, /*isSigned=*/true);
9982  } else {
9983  DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9984  }
9985 
9986  // Emit the number of elements in the offloading arrays.
9987  llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
9988 
9989  llvm::Value *OffloadingArgs[] = {
9990  DeviceID, PointerNum, BasePointersArrayArg,
9991  PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
9993  OffloadingArgs);
9994 
9995  // If device pointer privatization is required, emit the body of the region
9996  // here. It will have to be duplicated: with and without privatization.
9997  if (!Info.CaptureDeviceAddrMap.empty())
9998  CodeGen(CGF);
9999  };
10000 
10001  // Generate code for the closing of the data region.
10002  auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
10003  PrePostActionTy &) {
10004  assert(Info.isValid() && "Invalid data environment closing arguments.");
10005 
10006  llvm::Value *BasePointersArrayArg = nullptr;
10007  llvm::Value *PointersArrayArg = nullptr;
10008  llvm::Value *SizesArrayArg = nullptr;
10009  llvm::Value *MapTypesArrayArg = nullptr;
10010  emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10011  SizesArrayArg, MapTypesArrayArg, Info);
10012 
10013  // Emit device ID if any.
10014  llvm::Value *DeviceID = nullptr;
10015  if (Device) {
10016  DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10017  CGF.Int64Ty, /*isSigned=*/true);
10018  } else {
10019  DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10020  }
10021 
10022  // Emit the number of elements in the offloading arrays.
10023  llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10024 
10025  llvm::Value *OffloadingArgs[] = {
10026  DeviceID, PointerNum, BasePointersArrayArg,
10027  PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
10029  OffloadingArgs);
10030  };
10031 
10032  // If we need device pointer privatization, we need to emit the body of the
10033  // region with no privatization in the 'else' branch of the conditional.
10034  // Otherwise, we don't have to do anything.
10035  auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
10036  PrePostActionTy &) {
10037  if (!Info.CaptureDeviceAddrMap.empty()) {
10038  CodeGen.setAction(NoPrivAction);
10039  CodeGen(CGF);
10040  }
10041  };
10042 
10043  // We don't have to do anything to close the region if the if clause evaluates
10044  // to false.
10045  auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
10046 
10047  if (IfCond) {
10048  emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
10049  } else {
10050  RegionCodeGenTy RCG(BeginThenGen);
10051  RCG(CGF);
10052  }
10053 
10054  // If we don't require privatization of device pointers, we emit the body in
10055  // between the runtime calls. This avoids duplicating the body code.
10056  if (Info.CaptureDeviceAddrMap.empty()) {
10057  CodeGen.setAction(NoPrivAction);
10058  CodeGen(CGF);
10059  }
10060 
10061  if (IfCond) {
10062  emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
10063  } else {
10064  RegionCodeGenTy RCG(EndThenGen);
10065  RCG(CGF);
10066  }
10067 }
10068 
10070  CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10071  const Expr *Device) {
10072  if (!CGF.HaveInsertPoint())
10073  return;
10074 
10075  assert((isa<OMPTargetEnterDataDirective>(D) ||
10076  isa<OMPTargetExitDataDirective>(D) ||
10077  isa<OMPTargetUpdateDirective>(D)) &&
10078  "Expecting either target enter, exit data, or update directives.");
10079 
10081  llvm::Value *MapTypesArray = nullptr;
10082  // Generate the code for the opening of the data environment.
10083  auto &&ThenGen = [this, &D, Device, &InputInfo,
10084  &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10085  // Emit device ID if any.
10086  llvm::Value *DeviceID = nullptr;
10087  if (Device) {
10088  DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10089  CGF.Int64Ty, /*isSigned=*/true);
10090  } else {
10091  DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10092  }
10093 
10094  // Emit the number of elements in the offloading arrays.
10095  llvm::Constant *PointerNum =
10096  CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10097 
10098  llvm::Value *OffloadingArgs[] = {DeviceID,
10099  PointerNum,
10100  InputInfo.BasePointersArray.getPointer(),
10101  InputInfo.PointersArray.getPointer(),
10102  InputInfo.SizesArray.getPointer(),
10103  MapTypesArray};
10104 
10105  // Select the right runtime function call for each expected standalone
10106  // directive.
10107  const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10108  OpenMPRTLFunction RTLFn;
10109  switch (D.getDirectiveKind()) {
10110  case OMPD_target_enter_data:
10111  RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait
10113  break;
10114  case OMPD_target_exit_data:
10115  RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait
10117  break;
10118  case OMPD_target_update:
10119  RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait
10121  break;
10122  case OMPD_parallel:
10123  case OMPD_for:
10124  case OMPD_parallel_for:
10125  case OMPD_parallel_master:
10126  case OMPD_parallel_sections:
10127  case OMPD_for_simd:
10128  case OMPD_parallel_for_simd:
10129  case OMPD_cancel:
10130  case OMPD_cancellation_point:
10131  case OMPD_ordered:
10132  case OMPD_threadprivate:
10133  case OMPD_allocate:
10134  case OMPD_task:
10135  case OMPD_simd:
10136  case OMPD_sections:
10137  case OMPD_section:
10138  case OMPD_single:
10139  case OMPD_master:
10140  case OMPD_critical:
10141  case OMPD_taskyield:
10142  case OMPD_barrier:
10143  case OMPD_taskwait:
10144  case OMPD_taskgroup:
10145  case OMPD_atomic:
10146  case OMPD_flush:
10147  case OMPD_teams:
10148  case OMPD_target_data:
10149  case OMPD_distribute:
10150  case OMPD_distribute_simd:
10151  case OMPD_distribute_parallel_for:
10152  case OMPD_distribute_parallel_for_simd:
10153  case OMPD_teams_distribute:
10154  case OMPD_teams_distribute_simd:
10155  case OMPD_teams_distribute_parallel_for:
10156  case OMPD_teams_distribute_parallel_for_simd:
10157  case OMPD_declare_simd:
10158  case OMPD_declare_variant:
10159  case OMPD_declare_target:
10160  case OMPD_end_declare_target:
10161  case OMPD_declare_reduction:
10162  case OMPD_declare_mapper:
10163  case OMPD_taskloop:
10164  case OMPD_taskloop_simd:
10165  case OMPD_master_taskloop:
10166  case OMPD_master_taskloop_simd:
10167  case OMPD_parallel_master_taskloop:
10168  case OMPD_parallel_master_taskloop_simd:
10169  case OMPD_target:
10170  case OMPD_target_simd:
10171  case OMPD_target_teams_distribute:
10172  case OMPD_target_teams_distribute_simd:
10173  case OMPD_target_teams_distribute_parallel_for:
10174  case OMPD_target_teams_distribute_parallel_for_simd:
10175  case OMPD_target_teams:
10176  case OMPD_target_parallel:
10177  case OMPD_target_parallel_for:
10178  case OMPD_target_parallel_for_simd:
10179  case OMPD_requires:
10180  case OMPD_unknown:
10181  llvm_unreachable("Unexpected standalone target data directive.");
10182  break;
10183  }
10184  CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs);
10185  };
10186 
10187  auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
10188  CodeGenFunction &CGF, PrePostActionTy &) {
10189  // Fill up the arrays with all the mapped variables.
10194 
10195  // Get map clause information.
10196  MappableExprsHandler MEHandler(D, CGF);
10197  MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
10198 
10199  TargetDataInfo Info;
10200  // Fill up the arrays and create the arguments.
10201  emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
10203  Info.PointersArray, Info.SizesArray,
10204  Info.MapTypesArray, Info);
10205  InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10206  InputInfo.BasePointersArray =
10208  InputInfo.PointersArray =
10209  Address(Info.PointersArray, CGM.getPointerAlign());
10210  InputInfo.SizesArray =
10211  Address(Info.SizesArray, CGM.getPointerAlign());
10212  MapTypesArray = Info.MapTypesArray;
10213  if (D.hasClausesOfKind<OMPDependClause>())
10214  CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10215  else
10216  emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10217  };
10218 
10219  if (IfCond) {
10220  emitIfClause(CGF, IfCond, TargetThenGen,
10221  [](CodeGenFunction &CGF, PrePostActionTy &) {});
10222  } else {
10223  RegionCodeGenTy ThenRCG(TargetThenGen);
10224  ThenRCG(CGF);
10225  }
10226 }
10227 
10228 namespace {
10229  /// Kind of parameter in a function with 'declare simd' directive.
10230  enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
10231  /// Attribute set of the parameter.
10232  struct ParamAttrTy {
10233  ParamKindTy Kind = Vector;
10234  llvm::APSInt StrideOrArg;
10235  llvm::APSInt Alignment;
10236  };
10237 } // namespace
10238 
10239 static unsigned evaluateCDTSize(const FunctionDecl *FD,
10240  ArrayRef<ParamAttrTy> ParamAttrs) {
10241  // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10242  // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10243  // of that clause. The VLEN value must be power of 2.
10244  // In other case the notion of the function`s "characteristic data type" (CDT)
10245  // is used to compute the vector length.
10246  // CDT is defined in the following order:
10247  // a) For non-void function, the CDT is the return type.
10248  // b) If the function has any non-uniform, non-linear parameters, then the
10249  // CDT is the type of the first such parameter.
10250  // c) If the CDT determined by a) or b) above is struct, union, or class
10251  // type which is pass-by-value (except for the type that maps to the
10252  // built-in complex data type), the characteristic data type is int.
10253  // d) If none of the above three cases is applicable, the CDT is int.
10254  // The VLEN is then determined based on the CDT and the size of vector
10255  // register of that ISA for which current vector version is generated. The
10256  // VLEN is computed using the formula below:
10257  // VLEN = sizeof(vector_register) / sizeof(CDT),
10258  // where vector register size specified in section 3.2.1 Registers and the
10259  // Stack Frame of original AMD64 ABI document.
10260  QualType RetType = FD->getReturnType();
10261  if (RetType.isNull())
10262  return 0;
10263  ASTContext &C = FD->getASTContext();
10264  QualType CDT;
10265  if (!RetType.isNull() && !RetType->isVoidType()) {
10266  CDT = RetType;
10267  } else {
10268  unsigned Offset = 0;
10269  if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10270  if (ParamAttrs[Offset].Kind == Vector)
10271  CDT = C.getPointerType(C.getRecordType(MD->getParent()));
10272  ++Offset;
10273  }
10274  if (CDT.isNull()) {
10275  for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10276  if (ParamAttrs[I + Offset].Kind == Vector) {
10277  CDT = FD->getParamDecl(I)->getType();
10278  break;
10279  }
10280  }
10281  }
10282  }
10283  if (CDT.isNull())
10284  CDT = C.IntTy;
10285  CDT = CDT->getCanonicalTypeUnqualified();
10286  if (CDT->isRecordType() || CDT->isUnionType())
10287  CDT = C.IntTy;
10288  return C.getTypeSize(CDT);
10289 }
10290 
10291 static void
10292 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10293  const llvm::APSInt &VLENVal,
10294  ArrayRef<ParamAttrTy> ParamAttrs,
10295  OMPDeclareSimdDeclAttr::BranchStateTy State) {
10296  struct ISADataTy {
10297  char ISA;
10298  unsigned VecRegSize;
10299  };
10300  ISADataTy ISAData[] = {
10301  {
10302  'b', 128
10303  }, // SSE
10304  {
10305  'c', 256
10306  }, // AVX
10307  {
10308  'd', 256
10309  }, // AVX2
10310  {
10311  'e', 512
10312  }, // AVX512
10313  };
10315  switch (State) {
10316  case OMPDeclareSimdDeclAttr::BS_Undefined:
10317  Masked.push_back('N');
10318  Masked.push_back('M');
10319  break;
10320  case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10321  Masked.push_back('N');
10322  break;
10323  case OMPDeclareSimdDeclAttr::BS_Inbranch:
10324  Masked.push_back('M');
10325  break;
10326  }
10327  for (char Mask : Masked) {
10328  for (const ISADataTy &Data : ISAData) {
10329  SmallString<256> Buffer;
10330  llvm::raw_svector_ostream Out(Buffer);
10331  Out << "_ZGV" << Data.ISA << Mask;
10332  if (!VLENVal) {
10333  unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10334  assert(NumElts && "Non-zero simdlen/cdtsize expected");
10335  Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10336  } else {
10337  Out << VLENVal;
10338  }
10339  for (const ParamAttrTy &ParamAttr : ParamAttrs) {
10340  switch (ParamAttr.Kind){
10341  case LinearWithVarStride:
10342  Out << 's' << ParamAttr.StrideOrArg;
10343  break;
10344  case Linear:
10345  Out << 'l';
10346  if (!!ParamAttr.StrideOrArg)
10347  Out << ParamAttr.StrideOrArg;
10348  break;
10349  case Uniform:
10350  Out << 'u';
10351  break;
10352  case Vector:
10353  Out << 'v';
10354  break;
10355  }
10356  if (!!ParamAttr.Alignment)
10357  Out << 'a' << ParamAttr.Alignment;
10358  }
10359  Out << '_' << Fn->getName();
10360  Fn->addFnAttr(Out.str());
10361  }
10362  }
10363 }
10364 
10365 // This are the Functions that are needed to mangle the name of the
10366 // vector functions generated by the compiler, according to the rules
10367 // defined in the "Vector Function ABI specifications for AArch64",
10368 // available at
10369 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10370 
10371 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
10372 ///
10373 /// TODO: Need to implement the behavior for reference marked with a
10374 /// var or no linear modifiers (1.b in the section). For this, we
10375 /// need to extend ParamKindTy to support the linear modifiers.
10376 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10377  QT = QT.getCanonicalType();
10378 
10379  if (QT->isVoidType())
10380  return false;
10381 
10382  if (Kind == ParamKindTy::Uniform)
10383  return false;
10384 
10385  if (Kind == ParamKindTy::Linear)
10386  return false;
10387 
10388  // TODO: Handle linear references with modifiers
10389 
10390  if (Kind == ParamKindTy::LinearWithVarStride)
10391  return false;
10392 
10393  return true;
10394 }
10395 
10396 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10397 static bool getAArch64PBV(QualType QT, ASTContext &C) {
10398  QT = QT.getCanonicalType();
10399  unsigned Size = C.getTypeSize(QT);
10400 
10401  // Only scalars and complex within 16 bytes wide set PVB to true.
10402  if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10403  return false;
10404 
10405  if (QT->isFloatingType())
10406  return true;
10407 
10408  if (QT->isIntegerType())
10409  return true;
10410 
10411  if (QT->isPointerType())
10412  return true;
10413 
10414  // TODO: Add support for complex types (section 3.1.2, item 2).
10415 
10416  return false;
10417 }
10418 
10419 /// Computes the lane size (LS) of a return type or of an input parameter,
10420 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10421 /// TODO: Add support for references, section 3.2.1, item 1.
10422 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10423  if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10425  if (getAArch64PBV(PTy, C))
10426  return C.getTypeSize(PTy);
10427  }
10428  if (getAArch64PBV(QT, C))
10429  return C.getTypeSize(QT);
10430 
10431  return C.getTypeSize(C.getUIntPtrType());
10432 }
10433 
10434 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10435 // signature of the scalar function, as defined in 3.2.2 of the
10436 // AAVFABI.
10437 static std::tuple<unsigned, unsigned, bool>
10438 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
10439  QualType RetType = FD->getReturnType().getCanonicalType();
10440 
10441  ASTContext &C = FD->getASTContext();
10442 
10443  bool OutputBecomesInput = false;
10444 
10446  if (!RetType->isVoidType()) {
10447  Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10448  if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10449  OutputBecomesInput = true;
10450  }
10451  for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10452  QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
10453  Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10454  }
10455 
10456  assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10457  // The LS of a function parameter / return value can only be a power
10458  // of 2, starting from 8 bits, up to 128.
10459  assert(std::all_of(Sizes.begin(), Sizes.end(),
10460  [](unsigned Size) {
10461  return Size == 8 || Size == 16 || Size == 32 ||
10462  Size == 64 || Size == 128;
10463  }) &&
10464  "Invalid size");
10465 
10466  return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
10467  *std::max_element(std::begin(Sizes), std::end(Sizes)),
10468  OutputBecomesInput);
10469 }
10470 
10471 /// Mangle the parameter part of the vector function name according to
10472 /// their OpenMP classification. The mangling function is defined in
10473 /// section 3.5 of the AAVFABI.
10474 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10475  SmallString<256> Buffer;
10476  llvm::raw_svector_ostream Out(Buffer);
10477  for (const auto &ParamAttr : ParamAttrs) {
10478  switch (ParamAttr.Kind) {
10479  case LinearWithVarStride:
10480  Out << "ls" << ParamAttr.StrideOrArg;
10481  break;
10482  case Linear:
10483  Out << 'l';
10484  // Don't print the step value if it is not present or if it is
10485  // equal to 1.
10486  if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1)
10487  Out << ParamAttr.StrideOrArg;
10488  break;
10489  case Uniform:
10490  Out << 'u';
10491  break;
10492  case Vector:
10493  Out << 'v';
10494  break;
10495  }
10496 
10497  if (!!ParamAttr.Alignment)
10498  Out << 'a' << ParamAttr.Alignment;
10499  }
10500 
10501  return Out.str();
10502 }
10503 
10504 // Function used to add the attribute. The parameter `VLEN` is
10505 // templated to allow the use of "x" when targeting scalable functions
10506 // for SVE.
10507 template <typename T>
10508 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10509  char ISA, StringRef ParSeq,
10510  StringRef MangledName, bool OutputBecomesInput,
10511  llvm::Function *Fn) {
10512  SmallString<256> Buffer;
10513  llvm::raw_svector_ostream Out(Buffer);
10514  Out << Prefix << ISA << LMask << VLEN;
10515  if (OutputBecomesInput)
10516  Out << "v";
10517  Out << ParSeq << "_" << MangledName;
10518  Fn->addFnAttr(Out.str());
10519 }
10520 
10521 // Helper function to generate the Advanced SIMD names depending on
10522 // the value of the NDS when simdlen is not present.
10523 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10524  StringRef Prefix, char ISA,
10525  StringRef ParSeq, StringRef MangledName,
10526  bool OutputBecomesInput,
10527  llvm::Function *Fn) {
10528  switch (NDS) {
10529  case 8:
10530  addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10531  OutputBecomesInput, Fn);
10532  addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10533  OutputBecomesInput, Fn);
10534  break;
10535  case 16:
10536  addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10537  OutputBecomesInput, Fn);
10538  addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10539  OutputBecomesInput, Fn);
10540  break;
10541  case 32:
10542  addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10543  OutputBecomesInput, Fn);
10544  addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10545  OutputBecomesInput, Fn);
10546  break;
10547  case 64:
10548  case 128:
10549  addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10550  OutputBecomesInput, Fn);
10551  break;
10552  default:
10553  llvm_unreachable("Scalar type is too wide.");
10554  }
10555 }
10556 
10557 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10559  CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10560  ArrayRef<ParamAttrTy> ParamAttrs,
10561  OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10562  char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10563 
10564  // Get basic data for building the vector signature.
10565  const auto Data = getNDSWDS(FD, ParamAttrs);
10566  const unsigned NDS = std::get<0>(Data);
10567  const unsigned WDS = std::get<1>(Data);
10568  const bool OutputBecomesInput = std::get<2>(Data);
10569 
10570  // Check the values provided via `simdlen` by the user.
10571  // 1. A `simdlen(1)` doesn't produce vector signatures,
10572  if (UserVLEN == 1) {
10573  unsigned DiagID = CGM.getDiags().getCustomDiagID(
10575  "The clause simdlen(1) has no effect when targeting aarch64.");
10576  CGM.getDiags().Report(SLoc, DiagID);
10577  return;
10578  }
10579 
10580  // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10581  // Advanced SIMD output.
10582  if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10583  unsigned DiagID = CGM.getDiags().getCustomDiagID(
10584  DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10585  "power of 2 when targeting Advanced SIMD.");
10586  CGM.getDiags().Report(SLoc, DiagID);
10587  return;
10588  }
10589 
10590  // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10591  // limits.
10592  if (ISA == 's' && UserVLEN != 0) {
10593  if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10594  unsigned DiagID = CGM.getDiags().getCustomDiagID(
10595  DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10596  "lanes in the architectural constraints "
10597  "for SVE (min is 128-bit, max is "
10598  "2048-bit, by steps of 128-bit)");
10599  CGM.getDiags().Report(SLoc, DiagID) << WDS;
10600  return;
10601  }
10602  }
10603 
10604  // Sort out parameter sequence.
10605  const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10606  StringRef Prefix = "_ZGV";
10607  // Generate simdlen from user input (if any).
10608  if (UserVLEN) {
10609  if (ISA == 's') {
10610  // SVE generates only a masked function.
10611  addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10612  OutputBecomesInput, Fn);
10613  } else {
10614  assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10615  // Advanced SIMD generates one or two functions, depending on
10616  // the `[not]inbranch` clause.
10617  switch (State) {
10618  case OMPDeclareSimdDeclAttr::BS_Undefined:
10619  addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10620  OutputBecomesInput, Fn);
10621  addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10622  OutputBecomesInput, Fn);
10623  break;
10624  case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10625  addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10626  OutputBecomesInput, Fn);
10627  break;
10628  case OMPDeclareSimdDeclAttr::BS_Inbranch:
10629  addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10630  OutputBecomesInput, Fn);
10631  break;
10632  }
10633  }
10634  } else {
10635  // If no user simdlen is provided, follow the AAVFABI rules for
10636  // generating the vector length.
10637  if (ISA == 's') {
10638  // SVE, section 3.4.1, item 1.
10639  addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10640  OutputBecomesInput, Fn);
10641  } else {
10642  assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10643  // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10644  // two vector names depending on the use of the clause
10645  // `[not]inbranch`.
10646  switch (State) {
10647  case OMPDeclareSimdDeclAttr::BS_Undefined:
10648  addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10649  OutputBecomesInput, Fn);
10650  addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10651  OutputBecomesInput, Fn);
10652  break;
10653  case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10654  addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10655  OutputBecomesInput, Fn);
10656  break;
10657  case OMPDeclareSimdDeclAttr::BS_Inbranch:
10658  addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10659  OutputBecomesInput, Fn);
10660  break;
10661  }
10662  }
10663  }
10664 }
10665 
10667  llvm::Function *Fn) {
10668  ASTContext &C = CGM.getContext();
10669  FD = FD->getMostRecentDecl();
10670  // Map params to their positions in function decl.
10671  llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10672  if (isa<CXXMethodDecl>(FD))
10673  ParamPositions.try_emplace(FD, 0);
10674  unsigned ParamPos = ParamPositions.size();
10675  for (const ParmVarDecl *P : FD->parameters()) {
10676  ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10677  ++ParamPos;
10678  }
10679  while (FD) {
10680  for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10681  llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10682  // Mark uniform parameters.
10683  for (const Expr *E : Attr->uniforms()) {
10684  E = E->IgnoreParenImpCasts();
10685  unsigned Pos;
10686  if (isa<CXXThisExpr>(E)) {
10687  Pos = ParamPositions[FD];
10688  } else {
10689  const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10690  ->getCanonicalDecl();
10691  Pos = ParamPositions[PVD];
10692  }
10693  ParamAttrs[Pos].Kind = Uniform;
10694  }
10695  // Get alignment info.
10696  auto NI = Attr->alignments_begin();
10697  for (const Expr *E : Attr->aligneds()) {
10698  E = E->IgnoreParenImpCasts();
10699  unsigned Pos;
10700  QualType ParmTy;
10701  if (isa<CXXThisExpr>(E)) {
10702  Pos = ParamPositions[FD];
10703  ParmTy = E->getType();
10704  } else {
10705  const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10706  ->getCanonicalDecl();
10707  Pos = ParamPositions[PVD];
10708  ParmTy = PVD->getType();
10709  }
10710  ParamAttrs[Pos].Alignment =
10711  (*NI)
10712  ? (*NI)->EvaluateKnownConstInt(C)
10713  : llvm::APSInt::getUnsigned(
10715  .getQuantity());
10716  ++NI;
10717  }
10718  // Mark linear parameters.
10719  auto SI = Attr->steps_begin();
10720  auto MI = Attr->modifiers_begin();
10721  for (const Expr *E : Attr->linears()) {
10722  E = E->IgnoreParenImpCasts();
10723  unsigned Pos;
10724  if (isa<CXXThisExpr>(E)) {
10725  Pos = ParamPositions[FD];
10726  } else {
10727  const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10728  ->getCanonicalDecl();
10729  Pos = ParamPositions[PVD];
10730  }
10731  ParamAttrTy &ParamAttr = ParamAttrs[Pos];
10732  ParamAttr.Kind = Linear;
10733  if (*SI) {
10734  Expr::EvalResult Result;
10735  if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
10736  if (const auto *DRE =
10737  cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
10738  if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
10739  ParamAttr.Kind = LinearWithVarStride;
10740  ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
10741  ParamPositions[StridePVD->getCanonicalDecl()]);
10742  }
10743  }
10744  } else {
10745  ParamAttr.StrideOrArg = Result.Val.getInt();
10746  }
10747  }
10748  ++SI;
10749  ++MI;
10750  }
10751  llvm::APSInt VLENVal;
10752  SourceLocation ExprLoc;
10753  const Expr *VLENExpr = Attr->getSimdlen();
10754  if (VLENExpr) {
10755  VLENVal = VLENExpr->EvaluateKnownConstInt(C);
10756  ExprLoc = VLENExpr->getExprLoc();
10757  }
10758  OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
10759  if (CGM.getTriple().isX86()) {
10760  emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
10761  } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
10762  unsigned VLEN = VLENVal.getExtValue();
10763  StringRef MangledName = Fn->getName();
10764  if (CGM.getTarget().hasFeature("sve"))
10765  emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10766  MangledName, 's', 128, Fn, ExprLoc);
10767  if (CGM.getTarget().hasFeature("neon"))
10768  emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10769  MangledName, 'n', 128, Fn, ExprLoc);
10770  }
10771  }
10772  FD = FD->getPreviousDecl();
10773  }
10774 }
10775 
10776 namespace {
10777 /// Cleanup action for doacross support.
10778 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
10779 public:
10780  static const int DoacrossFinArgs = 2;
10781 
10782 private:
10783  llvm::FunctionCallee RTLFn;
10784  llvm::Value *Args[DoacrossFinArgs];
10785 
10786 public:
10787  DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
10788  ArrayRef<llvm::Value *> CallArgs)
10789  : RTLFn(RTLFn) {
10790  assert(CallArgs.size() == DoacrossFinArgs);
10791  std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
10792  }
10793  void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
10794  if (!CGF.HaveInsertPoint())
10795  return;
10796  CGF.EmitRuntimeCall(RTLFn, Args);
10797  }
10798 };
10799 } // namespace
10800 
10802  const OMPLoopDirective &D,
10803  ArrayRef<Expr *> NumIterations) {
10804  if (!CGF.HaveInsertPoint())
10805  return;
10806 
10807  ASTContext &C = CGM.getContext();
10808  QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
10809  RecordDecl *RD;
10810  if (KmpDimTy.isNull()) {
10811  // Build struct kmp_dim { // loop bounds info casted to kmp_int64
10812  // kmp_int64 lo; // lower
10813  // kmp_int64 up; // upper
10814  // kmp_int64 st; // stride
10815  // };
10816  RD = C.buildImplicitRecord("kmp_dim");
10817  RD->startDefinition();
10818  addFieldToRecordDecl(C, RD, Int64Ty);
10819  addFieldToRecordDecl(C, RD, Int64Ty);
10820  addFieldToRecordDecl(C, RD, Int64Ty);
10821  RD->completeDefinition();
10822  KmpDimTy = C.getRecordType(RD);
10823  } else {
10824  RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
10825  }
10826  llvm::APInt Size(/*numBits=*/32, NumIterations.size());
10827  QualType ArrayTy =
10828  C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
10829 
10830  Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
10831  CGF.EmitNullInitialization(DimsAddr, ArrayTy);
10832  enum { LowerFD = 0, UpperFD, StrideFD };
10833  // Fill dims with data.
10834  for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
10835  LValue DimsLVal = CGF.MakeAddrLValue(
10836  CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
10837  // dims.upper = num_iterations;
10838  LValue UpperLVal = CGF.EmitLValueForField(
10839  DimsLVal, *std::next(RD->field_begin(), UpperFD));
10840  llvm::Value *NumIterVal =
10841  CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]),
10842  D.getNumIterations()->getType(), Int64Ty,
10843  D.getNumIterations()->getExprLoc());
10844  CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
10845  // dims.stride = 1;
10846  LValue StrideLVal = CGF.EmitLValueForField(
10847  DimsLVal, *std::next(RD->field_begin(), StrideFD));
10848  CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
10849  StrideLVal);
10850  }
10851 
10852  // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
10853  // kmp_int32 num_dims, struct kmp_dim * dims);
10854  llvm::Value *Args[] = {
10855  emitUpdateLocation(CGF, D.getBeginLoc()),
10856  getThreadID(CGF, D.getBeginLoc()),
10857  llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
10859  CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
10860  CGM.VoidPtrTy)};
10861 
10862  llvm::FunctionCallee RTLFn =
10864  CGF.EmitRuntimeCall(RTLFn, Args);
10865  llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
10866  emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
10867  llvm::FunctionCallee FiniRTLFn =
10869  CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
10870  llvm::makeArrayRef(FiniArgs));
10871 }
10872 
10874  const OMPDependClause *C) {
10875  QualType Int64Ty =
10876  CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
10877  llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
10878  QualType ArrayTy = CGM.getContext().getConstantArrayType(
10879  Int64Ty, Size, nullptr, ArrayType::Normal, 0);
10880  Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
10881  for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
10882  const Expr *CounterVal = C->getLoopData(I);
10883  assert(CounterVal);
10884  llvm::Value *CntVal = CGF.EmitScalarConversion(
10885  CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
10886  CounterVal->getExprLoc());
10887  CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
10888  /*Volatile=*/false, Int64Ty);
10889  }
10890  llvm::Value *Args[] = {
10891  emitUpdateLocation(CGF, C->getBeginLoc()),
10892  getThreadID(CGF, C->getBeginLoc()),
10893  CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
10894  llvm::FunctionCallee RTLFn;
10895  if (C->getDependencyKind() == OMPC_DEPEND_source) {
10897  } else {
10898  assert(C->getDependencyKind() == OMPC_DEPEND_sink);
10900  }
10901  CGF.EmitRuntimeCall(RTLFn, Args);
10902 }
10903 
10905  llvm::FunctionCallee Callee,
10906  ArrayRef<llvm::Value *> Args) const {
10907  assert(Loc.isValid() && "Outlined function call location must be valid.");
10908  auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
10909 
10910  if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
10911  if (Fn->doesNotThrow()) {
10912  CGF.EmitNounwindRuntimeCall(Fn, Args);
10913  return;
10914  }
10915  }
10916  CGF.EmitRuntimeCall(Callee, Args);
10917 }
10918 
10920  CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
10921  ArrayRef<llvm::Value *> Args) const {
10922  emitCall(CGF, Loc, OutlinedFn, Args);
10923 }
10924 
10926  if (const auto *FD = dyn_cast<FunctionDecl>(D))
10927  if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
10929 }
10930 
10932  const VarDecl *NativeParam,
10933  const VarDecl *TargetParam) const {
10934  return CGF.GetAddrOfLocalVar(NativeParam);
10935 }
10936 
10937 namespace {
10938 /// Cleanup action for allocate support.
10939 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
10940 public:
10941  static const int CleanupArgs = 3;
10942 
10943 private:
10944  llvm::FunctionCallee RTLFn;
10945  llvm::Value *Args[CleanupArgs];
10946 
10947 public:
10948  OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
10949  ArrayRef<llvm::Value *> CallArgs)
10950  : RTLFn(RTLFn) {
10951  assert(CallArgs.size() == CleanupArgs &&
10952  "Size of arguments does not match.");
10953  std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
10954  }
10955  void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
10956  if (!CGF.HaveInsertPoint())
10957  return;
10958  CGF.EmitRuntimeCall(RTLFn, Args);
10959  }
10960 };
10961 } // namespace
10962 
10964  const VarDecl *VD) {
10965  if (!VD)
10966  return Address::invalid();
10967  const VarDecl *CVD = VD->getCanonicalDecl();
10968  if (!CVD->hasAttr<OMPAllocateDeclAttr>())
10969  return Address::invalid();
10970  const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
10971  // Use the default allocation.
10972  if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
10973  !AA->getAllocator())
10974  return Address::invalid();
10975  llvm::Value *Size;
10976  CharUnits Align = CGM.getContext().getDeclAlign(CVD);
10977  if (CVD->getType()->isVariablyModifiedType()) {
10978  Size = CGF.getTypeSize(CVD->getType());
10979  // Align the size: ((size + align - 1) / align) * align
10980  Size = CGF.Builder.CreateNUWAdd(
10981  Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
10982  Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
10983  Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
10984  } else {
10985  CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
10986  Size = CGM.getSize(Sz.alignTo(Align));
10987  }
10988  llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
10989  assert(AA->getAllocator() &&
10990  "Expected allocator expression for non-default allocator.");
10991  llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
10992  // According to the standard, the original allocator type is a enum (integer).
10993  // Convert to pointer type, if required.
10994  if (Allocator->getType()->isIntegerTy())
10995  Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
10996  else if (Allocator->getType()->isPointerTy())
10997  Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
10998  CGM.VoidPtrTy);
10999  llvm::Value *Args[] = {ThreadID, Size, Allocator};
11000 
11001  llvm::Value *Addr =
11003  CVD->getName() + ".void.addr");
11004  llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
11005  Allocator};
11006  llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free);
11007 
11008  CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11009  llvm::makeArrayRef(FiniArgs));
11011  Addr,
11012  CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
11013  CVD->getName() + ".addr");
11014  return Address(Addr, Align);
11015 }
11016 
11017 namespace {
11018 using OMPContextSelectorData =
11020 using CompleteOMPContextSelectorData = SmallVector<OMPContextSelectorData, 4>;
11021 } // anonymous namespace
11022 
11023 /// Checks current context and returns true if it matches the context selector.
11025  typename... Arguments>
11026 static bool checkContext(const OMPContextSelectorData &Data,
11027  Arguments... Params) {
11028  assert(Data.CtxSet != OMP_CTX_SET_unknown && Data.Ctx != OMP_CTX_unknown &&
11029  "Unknown context selector or context selector set.");
11030  return false;
11031 }
11032 
11033 /// Checks for implementation={vendor(<vendor>)} context selector.
11034 /// \returns true iff <vendor>="llvm", false otherwise.
11035 template <>
11037  const OMPContextSelectorData &Data) {
11038  return llvm::all_of(Data.Names,
11039  [](StringRef S) { return !S.compare_lower("llvm"); });
11040 }
11041 
11042 /// Checks for device={kind(<kind>)} context selector.
11043 /// \returns true if <kind>="host" and compilation is for host.
11044 /// true if <kind>="nohost" and compilation is for device.
11045 /// true if <kind>="cpu" and compilation is for Arm, X86 or PPC CPU.
11046 /// true if <kind>="gpu" and compilation is for NVPTX or AMDGCN.
11047 /// false otherwise.
11048 template <>
11049 bool checkContext<OMP_CTX_SET_device, OMP_CTX_kind, CodeGenModule &>(
11050  const OMPContextSelectorData &Data, CodeGenModule &CGM) {
11051  for (StringRef Name : Data.Names) {
11052  if (!Name.compare_lower("host")) {
11053  if (CGM.getLangOpts().OpenMPIsDevice)
11054  return false;
11055  continue;
11056  }
11057  if (!Name.compare_lower("nohost")) {
11058  if (!CGM.getLangOpts().OpenMPIsDevice)
11059  return false;
11060  continue;
11061  }
11062  switch (CGM.getTriple().getArch()) {
11063  case llvm::Triple::arm:
11064  case llvm::Triple::armeb:
11065  case llvm::Triple::aarch64:
11066  case llvm::Triple::aarch64_be:
11067  case llvm::Triple::aarch64_32:
11068  case llvm::Triple::ppc:
11069  case llvm::Triple::ppc64:
11070  case llvm::Triple::ppc64le:
11071  case llvm::Triple::x86:
11072  case llvm::Triple::x86_64:
11073  if (Name.compare_lower("cpu"))
11074  return false;
11075  break;
11076  case llvm::Triple::amdgcn:
11077  case llvm::Triple::nvptx:
11078  case llvm::Triple::nvptx64:
11079  if (Name.compare_lower("gpu"))
11080  return false;
11081  break;
11082  case llvm::Triple::UnknownArch:
11083  case llvm::Triple::arc:
11084  case llvm::Triple::avr:
11085  case llvm::Triple::bpfel:
11086  case llvm::Triple::bpfeb:
11087  case llvm::Triple::hexagon:
11088  case llvm::Triple::mips:
11089  case llvm::Triple::mipsel:
11090  case llvm::Triple::mips64:
11091  case llvm::Triple::mips64el:
11092  case llvm::Triple::msp430:
11093  case llvm::Triple::r600:
11094  case llvm::Triple::riscv32:
11095  case llvm::Triple::riscv64:
11096  case llvm::Triple::sparc:
11097  case llvm::Triple::sparcv9:
11098  case llvm::Triple::sparcel:
11099  case llvm::Triple::systemz:
11100  case llvm::Triple::tce:
11101  case llvm::Triple::tcele:
11102  case llvm::Triple::thumb:
11103  case llvm::Triple::thumbeb:
11104  case llvm::Triple::xcore:
11105  case llvm::Triple::le32:
11106  case llvm::Triple::le64:
11107  case llvm::Triple::amdil:
11108  case llvm::Triple::amdil64:
11109  case llvm::Triple::hsail:
11110  case llvm::Triple::hsail64:
11111  case llvm::Triple::spir:
11112  case llvm::Triple::spir64:
11113  case llvm::Triple::kalimba:
11114  case llvm::Triple::shave:
11115  case llvm::Triple::lanai:
11116  case llvm::Triple::wasm32:
11117  case llvm::Triple::wasm64:
11118  case llvm::Triple::renderscript32:
11119  case llvm::Triple::renderscript64:
11120  case llvm::Triple::ve:
11121  return false;
11122  }
11123  }
11124  return true;
11125 }
11126 
11128  const CompleteOMPContextSelectorData &ContextData) {
11129  for (const OMPContextSelectorData &Data : ContextData) {
11130  switch (Data.Ctx) {
11131  case OMP_CTX_vendor:
11132  assert(Data.CtxSet == OMP_CTX_SET_implementation &&
11133  "Expected implementation context selector set.");
11135  return false;
11136  break;
11137  case OMP_CTX_kind:
11138  assert(Data.CtxSet == OMP_CTX_SET_device &&
11139  "Expected device context selector set.");
11140  if (!checkContext<OMP_CTX_SET_device, OMP_CTX_kind, CodeGenModule &>(Data,
11141  CGM))
11142  return false;
11143  break;
11144  case OMP_CTX_unknown:
11145  llvm_unreachable("Unknown context selector kind.");
11146  }
11147  }
11148  return true;
11149 }
11150 
11151 static CompleteOMPContextSelectorData
11153  const OMPDeclareVariantAttr *A) {
11154  CompleteOMPContextSelectorData Data;
11155  for (unsigned I = 0, E = A->scores_size(); I < E; ++I) {
11156  Data.emplace_back();
11157  auto CtxSet = static_cast<OpenMPContextSelectorSetKind>(
11158  *std::next(A->ctxSelectorSets_begin(), I));
11159  auto Ctx = static_cast<OpenMPContextSelectorKind>(
11160  *std::next(A->ctxSelectors_begin(), I));
11161  Data.back().CtxSet = CtxSet;
11162  Data.back().Ctx = Ctx;
11163  const Expr *Score = *std::next(A->scores_begin(), I);
11164  Data.back().Score = Score->EvaluateKnownConstInt(C);
11165  switch (Ctx) {
11166  case OMP_CTX_vendor:
11167  assert(CtxSet == OMP_CTX_SET_implementation &&
11168  "Expected implementation context selector set.");
11169  Data.back().Names =
11170  llvm::makeArrayRef(A->implVendors_begin(), A->implVendors_end());
11171  break;
11172  case OMP_CTX_kind:
11173  assert(CtxSet == OMP_CTX_SET_device &&
11174  "Expected device context selector set.");
11175  Data.back().Names =
11176  llvm::makeArrayRef(A->deviceKinds_begin(), A->deviceKinds_end());
11177  break;
11178  case OMP_CTX_unknown:
11179  llvm_unreachable("Unknown context selector kind.");
11180  }
11181  }
11182  return Data;
11183 }
11184 
11185 static bool isStrictSubset(const CompleteOMPContextSelectorData &LHS,
11186  const CompleteOMPContextSelectorData &RHS) {
11187  llvm::SmallDenseMap<std::pair<int, int>, llvm::StringSet<>, 4> RHSData;
11188  for (const OMPContextSelectorData &D : RHS) {
11189  auto &Pair = RHSData.FindAndConstruct(std::make_pair(D.CtxSet, D.Ctx));
11190  Pair.getSecond().insert(D.Names.begin(), D.Names.end());
11191  }
11192  bool AllSetsAreEqual = true;
11193  for (const OMPContextSelectorData &D : LHS) {
11194  auto It = RHSData.find(std::make_pair(D.CtxSet, D.Ctx));
11195  if (It == RHSData.end())
11196  return false;
11197  if (D.Names.size() > It->getSecond().size())
11198  return false;
11199  if (llvm::set_union(It->getSecond(), D.Names))
11200  return false;
11201  AllSetsAreEqual =
11202  AllSetsAreEqual && (D.Names.size() == It->getSecond().size());
11203  }
11204 
11205  return LHS.size() != RHS.size() || !AllSetsAreEqual;
11206 }
11207 
11208 static bool greaterCtxScore(const CompleteOMPContextSelectorData &LHS,
11209  const CompleteOMPContextSelectorData &RHS) {
11210  // Score is calculated as sum of all scores + 1.
11211  llvm::APSInt LHSScore(llvm::APInt(64, 1), /*isUnsigned=*/false);
11212  bool RHSIsSubsetOfLHS = isStrictSubset(RHS, LHS);
11213  if (RHSIsSubsetOfLHS) {
11214  LHSScore = llvm::APSInt::get(0);
11215  } else {
11216  for (const OMPContextSelectorData &Data : LHS) {
11217  if (Data.Score.getBitWidth() > LHSScore.getBitWidth()) {
11218  LHSScore = LHSScore.extend(Data.Score.getBitWidth()) + Data.Score;
11219  } else if (Data.Score.getBitWidth() < LHSScore.getBitWidth()) {
11220  LHSScore += Data.Score.extend(LHSScore.getBitWidth());
11221  } else {
11222  LHSScore += Data.Score;
11223  }
11224  }
11225  }
11226  llvm::APSInt RHSScore(llvm::APInt(64, 1), /*isUnsigned=*/false);
11227  if (!RHSIsSubsetOfLHS && isStrictSubset(LHS, RHS)) {
11228  RHSScore = llvm::APSInt::get(0);
11229  } else {
11230  for (const OMPContextSelectorData &Data : RHS) {
11231  if (Data.Score.getBitWidth() > RHSScore.getBitWidth()) {
11232  RHSScore = RHSScore.extend(Data.Score.getBitWidth()) + Data.Score;
11233  } else if (Data.Score.getBitWidth() < RHSScore.getBitWidth()) {
11234  RHSScore += Data.Score.extend(RHSScore.getBitWidth());
11235  } else {
11236  RHSScore += Data.Score;
11237  }
11238  }
11239  }
11240  return llvm::APSInt::compareValues(LHSScore, RHSScore) >= 0;
11241 }
11242 
11243 /// Finds the variant function that matches current context with its context
11244 /// selector.
11246  const FunctionDecl *FD) {
11247  if (!FD->hasAttrs() || !FD->hasAttr<OMPDeclareVariantAttr>())
11248  return FD;
11249  // Iterate through all DeclareVariant attributes and check context selectors.
11250  const OMPDeclareVariantAttr *TopMostAttr = nullptr;
11251  CompleteOMPContextSelectorData TopMostData;
11252  for (const auto *A : FD->specific_attrs<OMPDeclareVariantAttr>()) {
11253  CompleteOMPContextSelectorData Data =
11255  if (!matchesContext(CGM, Data))
11256  continue;
11257  // If the attribute matches the context, find the attribute with the highest
11258  // score.
11259  if (!TopMostAttr || !greaterCtxScore(TopMostData, Data)) {
11260  TopMostAttr = A;
11261  TopMostData.swap(Data);
11262  }
11263  }
11264  if (!TopMostAttr)
11265  return FD;
11266  return cast<FunctionDecl>(
11267  cast<DeclRefExpr>(TopMostAttr->getVariantFuncRef()->IgnoreParenImpCasts())
11268  ->getDecl());
11269 }
11270 
11271 bool CGOpenMPRuntime::emitDeclareVariant(GlobalDecl GD, bool IsForDefinition) {
11272  const auto *D = cast<FunctionDecl>(GD.getDecl());
11273  // If the original function is defined already, use its definition.
11274  StringRef MangledName = CGM.getMangledName(GD);
11275  llvm::GlobalValue *Orig = CGM.GetGlobalValue(MangledName);
11276  if (Orig && !Orig->isDeclaration())
11277  return false;
11278  const FunctionDecl *NewFD = getDeclareVariantFunction(CGM, D);
11279  // Emit original function if it does not have declare variant attribute or the
11280  // context does not match.
11281  if (NewFD == D)
11282  return false;
11283  GlobalDecl NewGD = GD.getWithDecl(NewFD);
11284  if (tryEmitDeclareVariant(NewGD, GD, Orig, IsForDefinition)) {
11285  DeferredVariantFunction.erase(D);
11286  return true;
11287  }
11288  DeferredVariantFunction.insert(std::make_pair(D, std::make_pair(NewGD, GD)));
11289  return true;
11290 }
11291 
11293  CodeGenModule &CGM, const OMPLoopDirective &S)
11294  : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11295  assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11296  if (!NeedToPush)
11297  return;
11298  NontemporalDeclsSet &DS =
11299  CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11300  for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11301  for (const Stmt *Ref : C->private_refs()) {
11302  const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11303  const ValueDecl *VD;
11304  if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11305  VD = DRE->getDecl();
11306  } else {
11307  const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11308  assert((ME->isImplicitCXXThis() ||
11309  isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11310  "Expected member of current class.");
11311  VD = ME->getMemberDecl();
11312  }
11313  DS.insert(VD);
11314  }
11315  }
11316 }
11317 
11319  if (!NeedToPush)
11320  return;
11321  CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11322 }
11323 
11325  assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11326 
11327  return llvm::any_of(
11328  CGM.getOpenMPRuntime().NontemporalDeclsStack,
11329  [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
11330 }
11331 
11333  CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11334  : CGM(CGF.CGM),
11335  NeedToPush(llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11336  [](const OMPLastprivateClause *C) {
11337  return C->getKind() ==
11338  OMPC_LASTPRIVATE_conditional;
11339  })) {
11340  assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11341  if (!NeedToPush)
11342  return;
11344  CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11345  for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11346  if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11347  continue;
11348 
11349  for (const Expr *Ref : C->varlists()) {
11350  Data.DeclToUniqeName.try_emplace(
11351  cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11352  generateUniqueName(CGM, "pl_cond", Ref));
11353  }
11354  }
11355  Data.IVLVal = IVLVal;
11356  // In simd only mode or for simd directives no need to generate threadprivate
11357  // references for the loop iteration counter, we can use the original one
11358  // since outlining cannot happen in simd regions.
11359  if (CGF.getLangOpts().OpenMPSimd ||
11361  Data.UseOriginalIV = true;
11362  return;
11363  }
11364  llvm::SmallString<16> Buffer;
11365  llvm::raw_svector_ostream OS(Buffer);
11366  PresumedLoc PLoc =
11368  assert(PLoc.isValid() && "Source location is expected to be always valid.");
11369 
11370  llvm::sys::fs::UniqueID ID;
11371  if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
11372  CGM.getDiags().Report(diag::err_cannot_open_file)
11373  << PLoc.getFilename() << EC.message();
11374  OS << "$pl_cond_" << ID.getDevice() << "_" << ID.getFile() << "_"
11375  << PLoc.getLine() << "_" << PLoc.getColumn() << "$iv";
11376  Data.IVName = OS.str();
11377 }
11378 
11380  if (!NeedToPush)
11381  return;
11382  CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11383 }
11384 
11386  CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11387  if (CGM.getLangOpts().OpenMPSimd ||
11388  !llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11389  [](const OMPLastprivateClause *C) {
11390  return C->getKind() == OMPC_LASTPRIVATE_conditional;
11391  }))
11392  return;
11395  if (Data.UseOriginalIV)
11396  return;
11397  // Global loop counter. Required to handle inner parallel-for regions.
11398  // global_iv = iv;
11399  Address GlobIVAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
11400  CGF, Data.IVLVal.getType(), Data.IVName);
11401  LValue GlobIVLVal = CGF.MakeAddrLValue(GlobIVAddr, Data.IVLVal.getType());
11402  llvm::Value *IVVal = CGF.EmitLoadOfScalar(Data.IVLVal, S.getBeginLoc());
11403  CGF.EmitStoreOfScalar(IVVal, GlobIVLVal);
11404 }
11405 
11406 namespace {
11407 /// Checks if the lastprivate conditional variable is referenced in LHS.
11408 class LastprivateConditionalRefChecker final
11409  : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11410  CodeGenFunction &CGF;
11412  const Expr *FoundE = nullptr;
11413  const Decl *FoundD = nullptr;
11414  StringRef UniqueDeclName;
11415  LValue IVLVal;
11416  StringRef IVName;
11417  SourceLocation Loc;
11418  bool UseOriginalIV = false;
11419 
11420 public:
11421  bool VisitDeclRefExpr(const DeclRefExpr *E) {
11423  llvm::reverse(LPM)) {
11424  auto It = D.DeclToUniqeName.find(E->getDecl());
11425  if (It == D.DeclToUniqeName.end())
11426  continue;
11427  FoundE = E;
11428  FoundD = E->getDecl()->getCanonicalDecl();
11429  UniqueDeclName = It->getSecond();
11430  IVLVal = D.IVLVal;
11431  IVName = D.IVName;
11432  UseOriginalIV = D.UseOriginalIV;
11433  break;
11434  }
11435  return FoundE == E;
11436  }
11437  bool VisitMemberExpr(const MemberExpr *E) {
11438  if (!CGF.IsWrappedCXXThis(E->getBase()))
11439  return false;
11441  llvm::reverse(LPM)) {
11442  auto It = D.DeclToUniqeName.find(E->getMemberDecl());
11443  if (It == D.DeclToUniqeName.end())
11444  continue;
11445  FoundE = E;
11446  FoundD = E->getMemberDecl()->getCanonicalDecl();
11447  UniqueDeclName = It->getSecond();
11448  IVLVal = D.IVLVal;
11449  IVName = D.IVName;
11450  UseOriginalIV = D.UseOriginalIV;
11451  break;
11452  }
11453  return FoundE == E;
11454  }
11455  bool VisitStmt(const Stmt *S) {
11456  for (const Stmt *Child : S->children()) {
11457  if (!Child)
11458  continue;
11459  if (const auto *E = dyn_cast<Expr>(Child))
11460  if (!E->isGLValue())
11461  continue;
11462  if (Visit(Child))
11463  return true;
11464  }
11465  return false;
11466  }
11467  explicit LastprivateConditionalRefChecker(
11468  CodeGenFunction &CGF,
11470  : CGF(CGF), LPM(LPM) {}
11471  std::tuple<const Expr *, const Decl *, StringRef, LValue, StringRef, bool>
11472  getFoundData() const {
11473  return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, IVName,
11474  UseOriginalIV);
11475  }
11476 };
11477 } // namespace
11478 
11480  const Expr *LHS) {
11481  if (CGF.getLangOpts().OpenMP < 50)
11482  return;
11483  LastprivateConditionalRefChecker Checker(CGF, LastprivateConditionalStack);
11484  if (!Checker.Visit(LHS))
11485  return;
11486  const Expr *FoundE;
11487  const Decl *FoundD;
11488  StringRef UniqueDeclName;
11489  LValue IVLVal;
11490  StringRef IVName;
11491  bool UseOriginalIV;
11492  std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, IVName, UseOriginalIV) =
11493  Checker.getFoundData();
11494 
11495  // Last updated loop counter for the lastprivate conditional var.
11496  // int<xx> last_iv = 0;
11497  llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
11498  llvm::Constant *LastIV =
11499  getOrCreateInternalVariable(LLIVTy, UniqueDeclName + "$iv");
11500  cast<llvm::GlobalVariable>(LastIV)->setAlignment(
11501  IVLVal.getAlignment().getAsAlign());
11502  LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
11503 
11504  // Private address of the lastprivate conditional in the current context.
11505  // priv_a
11506  LValue LVal = CGF.EmitLValue(FoundE);
11507  // Last value of the lastprivate conditional.
11508  // decltype(priv_a) last_a;
11509  llvm::Constant *Last = getOrCreateInternalVariable(
11510  LVal.getAddress(CGF).getElementType(), UniqueDeclName);
11511  cast<llvm::GlobalVariable>(Last)->setAlignment(
11512  LVal.getAlignment().getAsAlign());
11513  LValue LastLVal =
11514  CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
11515 
11516  // Global loop counter. Required to handle inner parallel-for regions.
11517  // global_iv
11518  if (!UseOriginalIV) {
11519  Address IVAddr =
11520  getAddrOfArtificialThreadPrivate(CGF, IVLVal.getType(), IVName);
11521  IVLVal = CGF.MakeAddrLValue(IVAddr, IVLVal.getType());
11522  }
11523  llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, FoundE->getExprLoc());
11524 
11525  // #pragma omp critical(a)
11526  // if (last_iv <= iv) {
11527  // last_iv = iv;
11528  // last_a = priv_a;
11529  // }
11530  auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11531  FoundE](CodeGenFunction &CGF, PrePostActionTy &Action) {
11532  Action.Enter(CGF);
11533  llvm::Value *LastIVVal =
11534  CGF.EmitLoadOfScalar(LastIVLVal, FoundE->getExprLoc());
11535  // (last_iv <= global_iv) ? Check if the variable is updated and store new
11536  // value in global var.
11537  llvm::Value *CmpRes;
11538  if (IVLVal.getType()->isSignedIntegerType()) {
11539  CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
11540  } else {
11541  assert(IVLVal.getType()->isUnsignedIntegerType() &&
11542  "Loop iteration variable must be integer.");
11543  CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
11544  }
11545  llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
11546  llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
11547  CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
11548  // {
11549  CGF.EmitBlock(ThenBB);
11550 
11551  // last_iv = global_iv;
11552  CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
11553 
11554  // last_a = priv_a;
11555  switch (CGF.getEvaluationKind(LVal.getType())) {
11556  case TEK_Scalar: {
11557  llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, FoundE->getExprLoc());
11558  CGF.EmitStoreOfScalar(PrivVal, LastLVal);
11559  break;
11560  }
11561  case TEK_Complex: {
11563  CGF.EmitLoadOfComplex(LVal, FoundE->getExprLoc());
11564  CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
11565  break;
11566  }
11567  case TEK_Aggregate:
11568  llvm_unreachable(
11569  "Aggregates are not supported in lastprivate conditional.");
11570  }
11571  // }
11572  CGF.EmitBranch(ExitBB);
11573  // There is no need to emit line number for unconditional branch.
11575  CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
11576  };
11577 
11578  if (CGM.getLangOpts().OpenMPSimd) {
11579  // Do not emit as a critical region as no parallel region could be emitted.
11580  RegionCodeGenTy ThenRCG(CodeGen);
11581  ThenRCG(CGF);
11582  } else {
11583  emitCriticalRegion(CGF, UniqueDeclName, CodeGen, FoundE->getExprLoc());
11584  }
11585 }
11586 
11588  CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
11589  SourceLocation Loc) {
11590  if (CGF.getLangOpts().OpenMP < 50)
11591  return;
11592  auto It = LastprivateConditionalStack.back().DeclToUniqeName.find(VD);
11593  assert(It != LastprivateConditionalStack.back().DeclToUniqeName.end() &&
11594  "Unknown lastprivate conditional variable.");
11595  StringRef UniqueName = It->getSecond();
11596  llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
11597  // The variable was not updated in the region - exit.
11598  if (!GV)
11599  return;
11600  LValue LPLVal = CGF.MakeAddrLValue(
11601  GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
11602  llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
11603  CGF.EmitStoreOfScalar(Res, PrivLVal);
11604 }
11605 
11607  const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11608  OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11609  llvm_unreachable("Not supported in SIMD-only mode");
11610 }
11611 
11613  const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11614  OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11615  llvm_unreachable("Not supported in SIMD-only mode");
11616 }
11617 
11619  const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11620  const VarDecl *PartIDVar, const VarDecl *TaskTVar,
11621  OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
11622  bool Tied, unsigned &NumberOfParts) {
11623  llvm_unreachable("Not supported in SIMD-only mode");
11624 }
11625 
11627  SourceLocation Loc,
11628  llvm::Function *OutlinedFn,
11629  ArrayRef<llvm::Value *> CapturedVars,
11630  const Expr *IfCond) {
11631  llvm_unreachable("Not supported in SIMD-only mode");
11632 }
11633 
11635  CodeGenFunction &CGF, StringRef CriticalName,
11636  const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
11637  const Expr *Hint) {
11638  llvm_unreachable("Not supported in SIMD-only mode");
11639 }
11640 
11642  const RegionCodeGenTy &MasterOpGen,
11643  SourceLocation Loc) {
11644  llvm_unreachable("Not supported in SIMD-only mode");
11645 }
11646 
11648  SourceLocation Loc) {
11649  llvm_unreachable("Not supported in SIMD-only mode");
11650 }
11651 
11653  CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
11654  SourceLocation Loc) {
11655  llvm_unreachable("Not supported in SIMD-only mode");
11656 }
11657 
11659  CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
11660  SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
11661  ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
11662  ArrayRef<const Expr *> AssignmentOps) {
11663  llvm_unreachable("Not supported in SIMD-only mode");
11664 }
11665 
11667  const RegionCodeGenTy &OrderedOpGen,
11668  SourceLocation Loc,
11669  bool IsThreads) {
11670  llvm_unreachable("Not supported in SIMD-only mode");
11671 }
11672 
11674  SourceLocation Loc,
11675  OpenMPDirectiveKind Kind,
11676  bool EmitChecks,
11677  bool ForceSimpleCall) {
11678  llvm_unreachable("Not supported in SIMD-only mode");
11679 }
11680 
11682  CodeGenFunction &CGF, SourceLocation Loc,
11683  const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
11684  bool Ordered, const DispatchRTInput &DispatchValues) {
11685  llvm_unreachable("Not supported in SIMD-only mode");
11686 }
11687 
11690  const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
11691  llvm_unreachable("Not supported in SIMD-only mode");
11692 }
11693 
11695  CodeGenFunction &CGF, SourceLocation Loc,
11696  OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
11697  llvm_unreachable("Not supported in SIMD-only mode");
11698 }
11699 
11701  SourceLocation Loc,
11702  unsigned IVSize,
11703  bool IVSigned) {
11704  llvm_unreachable("Not supported in SIMD-only mode");
11705 }
11706 
11708  SourceLocation Loc,
11709  OpenMPDirectiveKind DKind) {
11710  llvm_unreachable("Not supported in SIMD-only mode");
11711 }
11712 
11714  SourceLocation Loc,
11715  unsigned IVSize, bool IVSigned,
11716  Address IL, Address LB,
11717  Address UB, Address ST) {
11718  llvm_unreachable("Not supported in SIMD-only mode");
11719 }
11720 
11722  llvm::Value *NumThreads,
11723  SourceLocation Loc) {
11724  llvm_unreachable("Not supported in SIMD-only mode");
11725 }
11726 
11728  ProcBindKind ProcBind,
11729  SourceLocation Loc) {
11730  llvm_unreachable("Not supported in SIMD-only mode");
11731 }
11732 
11734  const VarDecl *VD,
11735  Address VDAddr,
11736  SourceLocation Loc) {
11737  llvm_unreachable("Not supported in SIMD-only mode");
11738 }
11739 
11741  const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
11742  CodeGenFunction *CGF) {
11743  llvm_unreachable("Not supported in SIMD-only mode");
11744 }
11745 
11747  CodeGenFunction &CGF, QualType VarType, StringRef Name) {
11748  llvm_unreachable("Not supported in SIMD-only mode");
11749 }
11750 
11753  SourceLocation Loc) {
11754  llvm_unreachable("Not supported in SIMD-only mode");
11755 }
11756 
11758  const OMPExecutableDirective &D,
11759  llvm::Function *TaskFunction,
11760  QualType SharedsTy, Address Shareds,
11761  const Expr *IfCond,
11762  const OMPTaskDataTy &Data) {
11763  llvm_unreachable("Not supported in SIMD-only mode");
11764 }
11765 
11767  CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
11768  llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
11769  const Expr *IfCond, const OMPTaskDataTy &Data) {
11770  llvm_unreachable("Not supported in SIMD-only mode");
11771 }
11772 
11776  ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
11777  assert(Options.SimpleReduction && "Only simple reduction is expected.");
11778  CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
11779  ReductionOps, Options);
11780 }
11781 
11784  ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
11785  llvm_unreachable("Not supported in SIMD-only mode");
11786 }
11787 
11789  SourceLocation Loc,
11790  ReductionCodeGen &RCG,
11791  unsigned N) {
11792  llvm_unreachable("Not supported in SIMD-only mode");
11793 }
11794 
11796  SourceLocation Loc,
11797  llvm::Value *ReductionsPtr,
11798  LValue SharedLVal) {
11799  llvm_unreachable("Not supported in SIMD-only mode");
11800 }
11801 
11803  SourceLocation Loc) {
11804  llvm_unreachable("Not supported in SIMD-only mode");
11805 }
11806 
11808  CodeGenFunction &CGF, SourceLocation Loc,
11809  OpenMPDirectiveKind CancelRegion) {
11810  llvm_unreachable("Not supported in SIMD-only mode");
11811 }
11812 
11814  SourceLocation Loc, const Expr *IfCond,
11815  OpenMPDirectiveKind CancelRegion) {
11816  llvm_unreachable("Not supported in SIMD-only mode");
11817 }
11818 
11820  const OMPExecutableDirective &D, StringRef ParentName,
11821  llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
11822  bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
11823  llvm_unreachable("Not supported in SIMD-only mode");
11824 }
11825 
11827  CodeGenFunction &CGF, const OMPExecutableDirective &D,
11828  llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
11829  const Expr *Device,
11830  llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
11831  const OMPLoopDirective &D)>
11832  SizeEmitter) {
11833  llvm_unreachable("Not supported in SIMD-only mode");
11834 }
11835 
11837  llvm_unreachable("Not supported in SIMD-only mode");
11838 }
11839 
11841  llvm_unreachable("Not supported in SIMD-only mode");
11842 }
11843 
11845  return false;
11846 }
11847 
11849  const OMPExecutableDirective &D,
11850  SourceLocation Loc,
11851  llvm::Function *OutlinedFn,
11852  ArrayRef<llvm::Value *> CapturedVars) {
11853  llvm_unreachable("Not supported in SIMD-only mode");
11854 }
11855 
11857  const Expr *NumTeams,
11858  const Expr *ThreadLimit,
11859  SourceLocation Loc) {
11860  llvm_unreachable("Not supported in SIMD-only mode");
11861 }
11862 
11864  CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11865  const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
11866  llvm_unreachable("Not supported in SIMD-only mode");
11867 }
11868 
11870  CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11871  const Expr *Device) {
11872  llvm_unreachable("Not supported in SIMD-only mode");
11873 }
11874 
11876  const OMPLoopDirective &D,
11877  ArrayRef<Expr *> NumIterations) {
11878  llvm_unreachable("Not supported in SIMD-only mode");
11879 }
11880 
11882  const OMPDependClause *C) {
11883  llvm_unreachable("Not supported in SIMD-only mode");
11884 }
11885 
11886 const VarDecl *
11888  const VarDecl *NativeParam) const {
11889  llvm_unreachable("Not supported in SIMD-only mode");
11890 }
11891 
11892 Address
11894  const VarDecl *NativeParam,
11895  const VarDecl *TargetParam) const {
11896  llvm_unreachable("Not supported in SIMD-only mode");
11897 }
const CGFunctionInfo & arrangeBuiltinFunctionDeclaration(QualType resultType, const FunctionArgList &args)
A builtin function is a freestanding function using the default C conventions.
Definition: CGCall.cpp:653
RecordDecl * buildImplicitRecord(StringRef Name, RecordDecl::TagKind TK=TTK_Struct) const
Create a new implicit TU-level CXXRecordDecl or RecordDecl declaration.
TBAAAccessInfo getTBAAInfoForSubobject(LValue Base, QualType AccessType)
getTBAAInfoForSubobject - Get TBAA information for an access with a given base lvalue.
void pushTerminate()
Push a terminate handler on the stack.
Definition: CGCleanup.cpp:252
static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, llvm::Type *BaseLVType, CharUnits BaseLVAlignment, llvm::Value *Addr)
CharUnits alignTo(const CharUnits &Align) const
alignTo - Returns the next integer (mod 2**64) that is greater than or equal to this quantity and is ...
Definition: CharUnits.h:188
virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S)
Emit the captured statement body.
This represents &#39;#pragma omp task&#39; directive.
Definition: StmtOpenMP.h:1986
static const Decl * getCanonicalDecl(const Decl *D)
Represents a function declaration or definition.
Definition: Decl.h:1783
llvm::IntegerType * IntTy
int
This represents &#39;thread_limit&#39; clause in the &#39;#pragma omp ...&#39; directive.
bool hasRequiresUnifiedSharedMemory() const
Return whether the unified_shared_memory has been specified.
virtual Address getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD)
Gets the OpenMP-specific address of the local variable.
llvm::SmallVector< LastprivateConditionalData, 4 > LastprivateConditionalStack
Stack for list of addresses of declarations in current context marked as lastprivate conditional...
bool checkContext< OMP_CTX_SET_implementation, OMP_CTX_vendor >(const OMPContextSelectorData &Data)
Checks for implementation={vendor(<vendor>)} context selector.
Expr * getUpperBoundVariable() const
Definition: StmtOpenMP.h:962
Other implicit parameter.
Definition: Decl.h:1555
i32 captured_struct **param SharedsTy A type which contains references the shared variables *param Shareds Context with the list of shared variables from the p *TaskFunction *param Data Additional data for task generation like final list of privates etc *TaskResultTy emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const OMPTaskDataTy &Data)
Complete object ctor.
Definition: ABI.h:25
unsigned getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it...
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition: Type.h:2614
unsigned llvm::PointerUnion< const Decl *, const Expr * > DeclTy
Definition: Descriptor.h:26
CanQualType VoidPtrTy
Definition: ASTContext.h:1044
Scheduling data for loop-based OpenMP directives.
Definition: OpenMPKinds.h:198
Struct to store the context selectors info.
Definition: OpenMPKinds.h:43
Destroyer * getDestroyer(QualType::DestructionKind destructionKind)
Definition: CGDecl.cpp:2028
A (possibly-)qualified type.
Definition: Type.h:654
base_class_range bases()
Definition: DeclCXX.h:587
llvm::Function * emitReductionFunction(SourceLocation Loc, llvm::Type *ArgsType, ArrayRef< const Expr *> Privates, ArrayRef< const Expr *> LHSExprs, ArrayRef< const Expr *> RHSExprs, ArrayRef< const Expr *> ReductionOps)
Emits reduction function.
bool isArrayType() const
Definition: Type.h:6570
ValueDecl * getMemberDecl() const
Retrieve the member declaration to which this expression refers.
Definition: Expr.h:2919
llvm::Type * ConvertTypeForMem(QualType T)
void EmitBranchOnBoolExpr(const Expr *Cond, llvm::BasicBlock *TrueBlock, llvm::BasicBlock *FalseBlock, uint64_t TrueCount)
EmitBranchOnBoolExpr - Emit a branch on a boolean condition (e.g.
const CodeGenOptions & getCodeGenOpts() const
void EmitVarDecl(const VarDecl &D)
EmitVarDecl - Emit a local variable declaration.
Definition: CGDecl.cpp:168
GlobalDecl getWithDecl(const Decl *D)
Definition: GlobalDecl.h:117
Address CreateMemTemp(QualType T, const Twine &Name="tmp", Address *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
Definition: CGExpr.cpp:139
ConstStmtVisitor - This class implements a simple visitor for Stmt subclasses.
Definition: StmtVisitor.h:193
CGRecordLayout - This class handles struct and union layout info while lowering AST types to LLVM typ...
bool HaveInsertPoint() const
HaveInsertPoint - True if an insertion point is defined.
const RecordDecl * KmpTaskTQTyRD
static void EmitOMPTargetParallelForSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelForSimdDirective &S)
Emit device code for the target parallel for simd directive.
void emitSingleReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp, const Expr *PrivateRef, const DeclRefExpr *LHS, const DeclRefExpr *RHS)
Emits single reduction combiner.
Specialize PointerLikeTypeTraits to allow LazyGenerationalUpdatePtr to be placed into a PointerUnion...
Definition: Dominators.h:30
void actOnDeviceGlobalVarEntriesInfo(const OffloadDeviceGlobalVarEntryInfoActTy &Action)
The standard implementation of ConstantInitBuilder used in Clang.
const ASTRecordLayout & getASTRecordLayout(const RecordDecl *D) const
Get or compute information about the layout of the specified record (struct/union/class) D...
Stmt - This represents one statement.
Definition: Stmt.h:66
Expr * getLowerBoundVariable() const
Definition: StmtOpenMP.h:954
OpenMPOffloadingRequiresDirFlags
Values for bit flags for marking which requires clauses have been used.
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee...
Definition: Type.cpp:557
virtual void emitUserDefinedReduction(CodeGenFunction *CGF, const OMPDeclareReductionDecl *D)
Emit code for the specified user defined reduction construct.
Expr * getLoopData(unsigned NumLoop)
Get the loop data.
bool capturesThis() const
Determine whether this capture handles the C++ &#39;this&#39; pointer.
Definition: Stmt.h:3414
VarDecl * getCapturedVar() const
Retrieve the declaration of the variable being captured.
Definition: Stmt.cpp:1201
bool emitTargetGlobal(GlobalDecl GD) override
Emit the global GD if it is meaningful for the target.
CharUnits getAlignOfGlobalVarInChars(QualType T) const
Return the alignment in characters that should be given to a global variable with type T...
bool hasNonTrivialCall(const ASTContext &Ctx) const
Determine whether this expression involves a call to any function that is not trivial.
Definition: Expr.cpp:3731
SmallVector< std::pair< OpenMPDependClauseKind, const Expr * >, 4 > Dependences
CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, StringRef Separator)
Constructor allowing to redefine the name separator for the variables.
bool isRecordType() const
Definition: Type.h:6594
Expr * getBase() const
Definition: Expr.h:2913
virtual void initLastprivateConditionalCounter(CodeGenFunction &CGF, const OMPExecutableDirective &S)
Initializes global counter for lastprivate conditional.
fileinfo_iterator fileinfo_end() const
void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, const DispatchRTInput &DispatchValues) override
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
void clearLocThreadIdInsertPt(CodeGenFunction &CGF)
Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const override
Gets the address of the native argument basing on the address of the target-specific parameter...
static CompleteOMPContextSelectorData translateAttrToContextSelectorData(ASTContext &C, const OMPDeclareVariantAttr *A)
SmallVector< const Expr *, 4 > LastprivateCopies
Decl - This represents one declaration (or definition), e.g.
Definition: DeclBase.h:88
specific_attr_iterator - Iterates over a subrange of an AttrVec, only providing attributes that are o...
Definition: AttrIterator.h:34
SourceLocation getBeginLoc() const
Returns starting location of directive kind.
Definition: StmtOpenMP.h:225
static llvm::Value * getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, llvm::Value *DefaultThreadLimitVal)
static llvm::Value * emitNumTeamsForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D)
Emit the number of teams for a target directive.
virtual llvm::Value * emitTaskReductionInit(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr *> LHSExprs, ArrayRef< const Expr *> RHSExprs, const OMPTaskDataTy &Data)
Emit a code for initialization of task reduction clause.
const RecordType * getAsStructureType() const
Definition: Type.cpp:573
llvm::SmallDenseSet< const VarDecl * > DeferredGlobalVariables
List of variables that can become declare target implicitly and, thus, must be emitted.
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
llvm::Function * emitTeamsOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits outlined function for the specified OpenMP teams directive D.
VarDecl * getDefinition(ASTContext &)
Get the real (not just tentative) definition for this declaration.
Definition: Decl.cpp:2194
This represents &#39;if&#39; clause in the &#39;#pragma omp ...&#39; directive.
Definition: OpenMPClause.h:425
static llvm::Value * emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, ArrayRef< const Expr *> PrivateVars, ArrayRef< const Expr *> FirstprivateVars, ArrayRef< const Expr *> LastprivateVars, QualType PrivatesQTy, ArrayRef< PrivateDataTy > Privates)
Emit a privates mapping function for correct handling of private and firstprivate variables...
virtual Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name)
Creates artificial threadprivate variable with name Name and type VarType.
StringRef P
CapturedStmt * getInnermostCapturedStmt()
Get innermost captured statement for the construct.
Definition: StmtOpenMP.h:283
unsigned getFieldIndex() const
Returns the index of this field within its record, as appropriate for passing to ASTRecordLayout::get...
Definition: Decl.cpp:4074
Call to void __kmpc_threadprivate_register( ident_t *, void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);.
QualType getNonReferenceType() const
If Type is a reference type (e.g., const int&), returns the type that the reference refers to ("const...
Definition: Type.h:6452
ReductionCodeGen(ArrayRef< const Expr *> Shareds, ArrayRef< const Expr *> Privates, ArrayRef< const Expr *> ReductionOps)
static RecordDecl * createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef< PrivateDataTy > Privates)
bool isOpenMPTargetDataManagementDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target data offload directive.
The base class of the type hierarchy.
Definition: Type.h:1450
void EmitStoreThroughLValue(RValue Src, LValue Dst, bool isInit=false)
EmitStoreThroughLValue - Store the specified rvalue into the specified lvalue, where both are guarant...
Definition: CGExpr.cpp:1929
void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr *> Privates, ArrayRef< const Expr *> LHSExprs, ArrayRef< const Expr *> RHSExprs, ArrayRef< const Expr *> ReductionOps, ReductionOptionsTy Options) override
Emit a code for reduction clause.
void emitTargetDataStandAloneCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device) override
Emit the data mapping/movement code associated with the directive D that should be of the form &#39;targe...
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
Definition: Diagnostic.h:1300
Represents an array type, per C99 6.7.5.2 - Array Declarators.
Definition: Type.h:2889
virtual const FieldDecl * lookup(const VarDecl *VD) const
Lookup the captured field decl for a variable.
llvm::Value * PointersArray
The array of section pointers passed to the runtime library.
virtual void clear()
virtual void completeDefinition()
Note that the definition of this type is now complete.
Definition: Decl.cpp:4434
bool isZero() const
isZero - Test whether the quantity equals zero.
Definition: CharUnits.h:116
QualType withConst() const
Definition: Type.h:826
bool markAsGlobalTarget(GlobalDecl GD)
Marks the declaration as already emitted for the device code and returns true, if it was marked alrea...
static const FunctionDecl * getDeclareVariantFunction(CodeGenModule &CGM, const FunctionDecl *FD)
Finds the variant function that matches current context with its context selector.
static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2)
const TargetInfo & getTargetInfo() const
Definition: ASTContext.h:707
The l-value was an access to a declared entity or something equivalently strong, like the address of ...
struct with the values to be passed to the dispatch runtime function
capture_const_range captures() const
Definition: DeclCXX.h:1019
Address EmitLoadOfPointer(Address Ptr, const PointerType *PtrTy, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
Definition: CGExpr.cpp:2345
void emitIfClause(CodeGenFunction &CGF, const Expr *Cond, const RegionCodeGenTy &ThenGen, const RegionCodeGenTy &ElseGen)
Emits code for OpenMP &#39;if&#39; clause using specified CodeGen function.
Expr * getCondition() const
Returns condition.
Definition: OpenMPClause.h:493
LValue EmitLValueForFieldInitialization(LValue Base, const FieldDecl *Field)
EmitLValueForFieldInitialization - Like EmitLValueForField, except that if the Field is a reference...
Definition: CGExpr.cpp:4212
const Expr * getAnyInitializer() const
Get the initializer for this variable, no matter which declaration it is attached to...
Definition: Decl.h:1219
virtual void checkArchForUnifiedAddressing(const OMPRequiresDecl *D)
Perform check on requires decl to ensure that target architecture supports unified addressing...
Describes the capture of a variable or of this, or of a C++1y init-capture.
Definition: LambdaCapture.h:25
void scanForTargetRegionsFunctions(const Stmt *S, StringRef ParentName)
Start scanning from statement S and and emit all target regions found along the way.
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
Represents a C++ constructor within a class.
Definition: DeclCXX.h:2383
static llvm::Value * emitNumThreadsForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D)
Emit the number of threads for a target directive.
static void EmitOMPAggregateReduction(CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, const VarDecl *RHSVar, const llvm::function_ref< void(CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *)> &RedOpGen, const Expr *XExpr=nullptr, const Expr *EExpr=nullptr, const Expr *UpExpr=nullptr)
Emit reduction operation for each element of array (required for array sections) LHS op = RHS...
bool ConstantFoldsToSimpleInteger(const Expr *Cond, bool &Result, bool AllowLabels=false)
ConstantFoldsToSimpleInteger - If the specified expression does not fold to a constant, or if it does but contains a label, return false.
void EmitOMPCopy(QualType OriginalType, Address DestAddr, Address SrcAddr, const VarDecl *DestVD, const VarDecl *SrcVD, const Expr *Copy)
Emit proper copying of data from one variable to another.
OpenMPSchedType
Schedule types for &#39;omp for&#39; loops (these enumerators are taken from the enum sched_type in kmp...
bool isTrivialType(const ASTContext &Context) const
Return true if this is a trivial type per (C++0x [basic.types]p9)
Definition: Type.cpp:2241
SmallVector< const Expr *, 4 > ReductionCopies
SourceLocation getEndLoc() const
Returns ending location of directive.
Definition: StmtOpenMP.h:227
Address GetAddrOfLocalVar(const VarDecl *VD)
GetAddrOfLocalVar - Return the address of a local variable.
static const Stmt * getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body)
Checks if the Body is the CompoundStmt and returns its child statement iff there is only one that is ...
llvm::Function * GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S)
Represents a variable declaration or definition.
Definition: Decl.h:820
llvm::Value * getThreadID(CodeGenFunction &CGF, SourceLocation Loc)
Gets thread id value for the current thread.
QualType getReturnType() const
Definition: Decl.h:2445
This represents &#39;num_threads&#39; clause in the &#39;#pragma omp ...&#39; directive.
Definition: OpenMPClause.h:594
const T * getAs() const
Member-template getAs<specific type>&#39;.
Definition: Type.h:7002
LastprivateConditionalRAII(CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
The "union" keyword.
Definition: Type.h:5195
Extra information about a function prototype.
Definition: Type.h:3837
LangAS
Defines the address space values used by the address space qualifier of QualType. ...
Definition: AddressSpaces.h:25
llvm::GlobalVariable * finishAndCreateGlobal(As &&...args)
Given that this builder was created by beginning an array or struct directly on a ConstantInitBuilder...
virtual llvm::Value * emitForNext(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned, Address IL, Address LB, Address UB, Address ST)
Call __kmpc_dispatch_next( ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, kmp_int[32|64] *p_lowe...
bool field_empty() const
Definition: Decl.h:3971
DiagnosticsEngine & getDiags() const
void EmitVariablyModifiedType(QualType Ty)
EmitVLASize - Capture all the sizes for the VLA expressions in the given variably-modified type and s...
llvm::Value * getPointer() const
Definition: Address.h:37
llvm::Type * ConvertTypeForMem(QualType T)
ConvertTypeForMem - Convert type T into a llvm::Type.
Don&#39;t generate debug info.
static llvm::GlobalVariable * createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, ArrayRef< llvm::Constant *> Data, const Twine &Name, As &&... Args)
Represents a parameter to a function.
Definition: Decl.h:1595
Linkage
Describes the different kinds of linkage (C++ [basic.link], C99 6.2.2) that an entity may have...
Definition: Linkage.h:23
static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, const Expr *Ref)
Generates unique name for artificial threadprivate variables.
void createOffloadEntriesAndInfoMetadata()
Creates all the offload entries in the current compilation unit along with the associated metadata...
llvm::FunctionCallee createForStaticInitFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_for_static_init_* runtime function for the specified size IVSize and sign IVSigned...
static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, llvm::Function *Fn)
static unsigned evaluateCDTSize(const FunctionDecl *FD, ArrayRef< ParamAttrTy > ParamAttrs)
std::string getName(ArrayRef< StringRef > Parts) const
Get the platform-specific name separator.
static void EmitOMPTargetParallelDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelDirective &S)
A jump destination is an abstract label, branching to which may require a jump out through normal cle...
Struct that keeps all the relevant information that should be kept throughout a &#39;target data&#39; region...
QualType getTgtOffloadEntryQTy()
Returns __tgt_offload_entry type.
virtual void emitTargetDataStandAloneCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device)
Emit the data mapping/movement code associated with the directive D that should be of the form &#39;targe...
SmallVector< const Expr *, 4 > PrivateVars
Represents a struct/union/class.
Definition: Decl.h:3748
clauselist_range clauselists()
Definition: DeclOpenMP.h:390
llvm::DenseMap< const VarDecl *, FieldDecl * > LambdaCaptureFields
Source[4] in Fortran, do not use for C++.
llvm::StringMap< llvm::WeakTrackingVH > EmittedNonTargetVariables
List of the global variables with their addresses that should not be emitted for the target...
static llvm::Value * emitDestructorsFunction(CodeGenModule &CGM, SourceLocation Loc, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy)
void emitDestroy(Address addr, QualType type, Destroyer *destroyer, bool useEHCleanupForArray)
emitDestroy - Immediately perform the destruction of the given object.
Definition: CGDecl.cpp:2103
bool HasEmittedTargetRegion
Flag for keeping track of weather a target region has been emitted.
static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, LValue BaseLV)
virtual llvm::Function * emitParallelOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP parallel directive D.
i32 captured_struct **param SharedsTy A type which contains references the shared variables *param Shareds Context with the list of shared variables from the p *TaskFunction *param IfCond Not a nullptr if if clause was nullptr *otherwise *param Data Additional data for task generation like final list of privates etc *virtual void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data)
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
llvm::Type * ConvertType(QualType T)
ConvertType - Convert type T into a llvm::Type.
virtual void emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc)
Emits a master region.
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:168
virtual void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, const Expr *Device, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter)
Emit the target offloading code associated with D.
LineState State
virtual void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned)
Call the appropriate runtime routine to notify that we finished iteration of the ordered loop with th...
void EmitExprAsInit(const Expr *init, const ValueDecl *D, LValue lvalue, bool capturedByInit)
EmitExprAsInit - Emits the code necessary to initialize a location in memory with the given initializ...
Definition: CGDecl.cpp:1870
Call to void *__kmpc_threadprivate_cached(ident_t *loc, kmp_int32 global_tid, void *data...
void emitProcBindClause(CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc) override
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, int proc_bind) to generat...
The scope used to remap some variables as private in the OpenMP loop body (or other captured region e...
field_range fields() const
Definition: Decl.h:3963
SmallVector< const Expr *, 4 > LastprivateVars
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: Stmt.cpp:275
Address EmitLoadOfReference(LValue RefLVal, LValueBaseInfo *PointeeBaseInfo=nullptr, TBAAAccessInfo *PointeeTBAAInfo=nullptr)
Definition: CGExpr.cpp:2323
virtual void emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C)
Emit code for doacross ordered directive with &#39;depend&#39; clause.
Represents a member of a struct/union/class.
Definition: Decl.h:2729
This represents clause &#39;lastprivate&#39; in the &#39;#pragma omp ...&#39; directives.
CharUnits getAlignment() const
Definition: CGValue.h:316
SourceLocation translateFileLineCol(const FileEntry *SourceFile, unsigned Line, unsigned Col) const
Get the source location for the given file:line:col triplet.
llvm::SmallDenseMap< CanonicalDeclPtr< const Decl >, SmallString< 16 > > DeclToUniqeName
static std::string mangleVectorParameters(ArrayRef< ParamAttrTy > ParamAttrs)
Mangle the parameter part of the vector function name according to their OpenMP classification.
const CapturedStmt * getCapturedStmt(OpenMPDirectiveKind RegionKind) const
Returns the captured statement associated with the component region within the (combined) directive...
Definition: StmtOpenMP.h:266
An RAII object to set (and then clear) a mapping for an OpaqueValueExpr.
bool isNontemporalDecl(const ValueDecl *VD) const
Checks if the VD variable is marked as nontemporal declaration in current context.
static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C)
Computes the lane size (LS) of a return type or of an input parameter, as defined by LS(P) in 3...
OpenMPContextSelectorKind
OpenMP context selectors.
Definition: OpenMPKinds.h:30
might be used in Fortran
void setLocThreadIdInsertPt(CodeGenFunction &CGF, bool AtCurrentPoint=false)
std::pair< llvm::Value *, llvm::Value * > getSizes(unsigned N) const
Returns the size of the reduction item (in chars and total number of elements in the item)...
LValue EmitOMPArraySectionExpr(const OMPArraySectionExpr *E, bool IsLowerBound=true)
Definition: CGExpr.cpp:3707
unsigned getNonVirtualBaseLLVMFieldNo(const CXXRecordDecl *RD) const
static void emitOffloadingArrays(CodeGenFunction &CGF, MappableExprsHandler::MapBaseValuesArrayTy &BasePointers, MappableExprsHandler::MapValuesArrayTy &Pointers, MappableExprsHandler::MapValuesArrayTy &Sizes, MappableExprsHandler::MapFlagsArrayTy &MapTypes, CGOpenMPRuntime::TargetDataInfo &Info)
Emit the arrays used to pass the captures and map information to the offloading runtime library...
void startDefinition()
Starts the definition of this tag declaration.
Definition: Decl.cpp:4154
bool isReferenceType() const
Definition: Type.h:6516
This represents clause &#39;map&#39; in the &#39;#pragma omp ...&#39; directives.
InitKind getInitializerKind() const
Get initializer kind.
Definition: DeclOpenMP.h:173
void EmitOMPAggregateAssign(Address DestAddr, Address SrcAddr, QualType OriginalType, const llvm::function_ref< void(Address, Address)> CopyGen)
Perform element by element copying of arrays with type OriginalType from SrcAddr to DestAddr using co...
OMPTargetGlobalVarEntryKind
Kind of the global variable entry..
virtual bool isDynamic(OpenMPScheduleClauseKind ScheduleKind) const
Check if the specified ScheduleKind is dynamic.
static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, unsigned Index, const VarDecl *Var)
Given an array of pointers to variables, project the address of a given variable. ...
This represents clause &#39;to&#39; in the &#39;#pragma omp ...&#39; directives.
static CharUnits Zero()
Zero - Construct a CharUnits quantity of zero.
Definition: CharUnits.h:53
clang::CharUnits operator*(clang::CharUnits::QuantityType Scale, const clang::CharUnits &CU)
Definition: CharUnits.h:212
static void emitAArch64DeclareSimdFunction(CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, ArrayRef< ParamAttrTy > ParamAttrs, OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc)
Emit vector function attributes for AArch64, as defined in the AAVFABI.
llvm::Type * getKmpc_MicroPointerTy()
Returns pointer to kmpc_micro type.
OpenMPDirectiveKind getDirectiveKind() const
Definition: StmtOpenMP.h:301
__DEVICE__ int max(int __a, int __b)
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: Decl.h:723
This is a common base class for loop directives (&#39;omp simd&#39;, &#39;omp for&#39;, &#39;omp for simd&#39; etc...
Definition: StmtOpenMP.h:420
void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc) override
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)...
virtual bool emitTargetFunctions(GlobalDecl GD)
Emit the target regions enclosed in GD function definition or the function itself in case it is a val...
An r-value expression (a pr-value in the C++11 taxonomy) produces a temporary value.
Definition: Specifiers.h:125
CharUnits GetTargetTypeStoreSize(llvm::Type *Ty) const
Return the store size, in character units, of the given LLVM type.
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
ComplexPairTy EmitLoadOfComplex(LValue src, SourceLocation loc)
EmitLoadOfComplex - Load a complex number from the specified l-value.
static ApplyDebugLocation CreateArtificial(CodeGenFunction &CGF)
Apply TemporaryLocation if it is valid.
Definition: CGDebugInfo.h:769
ArrayRef< ParmVarDecl * > parameters() const
Definition: Decl.h:2399
bool EvaluateAsBooleanCondition(bool &Result, const ASTContext &Ctx, bool InConstantContext=false) const
EvaluateAsBooleanCondition - Return true if this is a constant which we can fold and convert to a boo...
virtual bool tryEmitDeclareVariant(const GlobalDecl &NewGD, const GlobalDecl &OldGD, llvm::GlobalValue *OrigAddr, bool IsForDefinition)
Tries to emit declare variant function for OldGD from NewGD.
bool isGLValue() const
Definition: Expr.h:261
OpenMPDistScheduleClauseKind
OpenMP attributes for &#39;dist_schedule&#39; clause.
Definition: OpenMPKinds.h:151
Expr * getMapperVarRef()
Get the variable declared in the mapper.
Definition: DeclOpenMP.h:282
virtual void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D, ArrayRef< Expr *> NumIterations)
Emit initialization for doacross loop nesting support.
BinaryOperatorKind
void emitUserDefinedMapper(const OMPDeclareMapperDecl *D, CodeGenFunction *CGF=nullptr)
Emit the function for the user defined mapper construct.
Expr * getInitializer()
Get initializer expression (if specified) of the declare reduction construct.
Definition: DeclOpenMP.h:170
QualType TgtOffloadEntryQTy
Type struct __tgt_offload_entry{ void *addr; // Pointer to the offload entry info.
static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, const RecordDecl *RD, const CGRecordLayout &RL, ArrayRef< llvm::Constant *> Data)
static llvm::Function * emitParallelOrTeamsOutlinedFunction(CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen)
OpenMPContextSelectorSetKind
OpenMP context selector sets.
Definition: OpenMPKinds.h:23
static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, QualType Type, bool EmitDeclareReductionInit, const Expr *Init, const OMPDeclareReductionDecl *DRD, Address SrcAddr=Address::invalid())
Emit initialization of arrays of complex types.
static bool getAArch64MTV(QualType QT, ParamKindTy Kind)
Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
OpenMPScheduleClauseKind Schedule
Definition: OpenMPKinds.h:199
Address CreateElementBitCast(Address Addr, llvm::Type *Ty, const llvm::Twine &Name="")
Cast the element type of the given address to a different type, preserving information like the align...
Definition: CGBuilder.h:156
CharUnits - This is an opaque type for sizes expressed in character units.
Definition: CharUnits.h:38
bool Ordered
true if loop is ordered, false otherwise.
void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned) override
Call the appropriate runtime routine to notify that we finished iteration of the ordered loop with th...
bool isOpenMPTeamsDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a teams-kind directive.
APValue Val
Val - This is the value the expression can be folded to.
Definition: Expr.h:588
llvm::Align getAsAlign() const
getAsAlign - Returns Quantity as a valid llvm::Align, Beware llvm::Align assumes power of two 8-bit b...
Definition: CharUnits.h:183
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, const Expr *Device, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter) override
Emit the target offloading code associated with D.
virtual void emitTargetDataCalls(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info)
Emit the target data mapping code associated with D.
const ArrayType * getAsArrayTypeUnsafe() const
A variant of getAs<> for array types which silently discards qualifiers from the outermost type...
Definition: Type.h:7053
void emitCleanups(CodeGenFunction &CGF, unsigned N, Address PrivateAddr)
Emits cleanup code for the reduction item.
RAII for correct setting/restoring of CapturedStmtInfo.
const Stmt * getPreInitStmt() const
Get pre-initialization statement for the clause.
Definition: OpenMPClause.h:132
CharUnits getAlignment() const
Return the alignment of this pointer.
Definition: Address.h:66
void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc) override
Emits code for a taskyield directive.
llvm::StringSet ThreadPrivateWithDefinition
Set of threadprivate variables with the generated initializer.
bool empty() const
Return true if a there are no entries defined.
child_range children()
Definition: Stmt.cpp:224
String describing the source location.
void emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc) override
Emits a master region.
virtual std::pair< llvm::Function *, llvm::Function * > getUserDefinedReduction(const OMPDeclareReductionDecl *D)
Get combiner/initializer for the specified user-defined reduction, if any.
void actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy &Action)
OpenMPScheduleClauseModifier M2
Definition: OpenMPKinds.h:201
bool needsEHCleanup(QualType::DestructionKind kind)
Determines whether an EH cleanup is required to destroy a type with the given destruction kind...
SmallVector< const Expr *, 4 > PrivateCopies
llvm::CallInst * CreateMemCpy(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile=false)
Definition: CGBuilder.h:274
llvm::StructType * getBaseSubobjectLLVMType() const
Return the "base subobject" LLVM type associated with this record.
unsigned NumberOfPtrs
The total number of pointers passed to the runtime library.
void operator()(CodeGenFunction &CGF) const
bool isConstexpr() const
Whether this variable is (C++11) constexpr.
Definition: Decl.h:1412
CharUnits getDeclAlign(const Decl *D, bool ForAlignof=false) const
Return a conservative estimate of the alignment of the specified decl D.
bool isLambda() const
Determine whether this class describes a lambda function object.
Definition: DeclCXX.h:960
Address adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, Address PrivateAddr)
Adjusts PrivatedAddr for using instead of the original variable address in normal operations...
Expr * getSizeExpr() const
Definition: Type.h:3058
Scope - A scope is a transient data structure that is used while parsing the program.
Definition: Scope.h:40
const Type * getTypePtr() const
Retrieves a pointer to the underlying (unqualified) type.
Definition: Type.h:6256
virtual void emitDeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn)
Marks function Fn with properly mangled versions of vector functions.
field_iterator field_begin() const
Definition: Decl.cpp:4425
bool usesReductionInitializer(unsigned N) const
Returns true if the initialization of the reduction item uses initializer from declare reduction cons...
static std::tuple< unsigned, unsigned, bool > getNDSWDS(const FunctionDecl *FD, ArrayRef< ParamAttrTy > ParamAttrs)
bool isOpenMPTargetExecutionDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target code offload directive.
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
ArrayRef< MappableComponent > MappableExprComponentListRef
void EmitIgnoredExpr(const Expr *E)
EmitIgnoredExpr - Emit an expression in a context which ignores the result.
Definition: CGExpr.cpp:182
void addCompilerUsedGlobal(llvm::GlobalValue *GV)
Add a global to a list to be added to the llvm.compiler.used metadata.
SmallVector< const Expr *, 4 > FirstprivateCopies
OpenMPDependClauseKind getDependencyKind() const
Get dependency type.
static void emitPrivatesInit(CodeGenFunction &CGF, const OMPExecutableDirective &D, Address KmpTaskSharedsPtr, LValue TDBase, const RecordDecl *KmpTaskTWithPrivatesQTyRD, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, ArrayRef< PrivateDataTy > Privates, bool ForDup)
Emit initialization for private variables in task-based directives.
DiagnosticsEngine & getDiagnostics() const
static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, llvm::Function *Fn)
static llvm::Function * emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, const Expr *CombinerInitializer, const VarDecl *In, const VarDecl *Out, bool IsCombiner)
virtual void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D)
Emits OpenMP-specific function prolog.
static void emitOffloadingArraysArgument(CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info)
Emit the arguments to be passed to the runtime library based on the arrays of pointers, sizes and map types.
Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
static const OMPDeclareReductionDecl * getReductionInit(const Expr *ReductionOp)
Check if the combiner is a call to UDR combiner and if it is so return the UDR decl used for reductio...
llvm::Value * emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, unsigned Flags=0)
Emits object of ident_t type with info for source location.
bool isOpenMPWorksharingDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a worksharing directive.
static IntegerLiteral * Create(const ASTContext &C, const llvm::APInt &V, QualType type, SourceLocation l)
Returns a new integer literal with value &#39;V&#39; and type &#39;type&#39;.
Definition: Expr.cpp:928
SmallVector< const Expr *, 4 > ReductionOps
This represents clause &#39;is_device_ptr&#39; in the &#39;#pragma omp ...&#39; directives.
llvm::omp::Directive OpenMPDirectiveKind
OpenMP directives.
Definition: OpenMPKinds.h:62
SmallVector< const Expr *, 4 > ReductionVars
virtual bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS)
Checks if the variable has associated OMPAllocateDeclAttr attribute with the predefined allocator and...
virtual void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind)
Call the appropriate runtime routine to notify that we finished all the work with current loop...
virtual Decl * getCanonicalDecl()
Retrieves the "canonical" declaration of the given declaration.
Definition: DeclBase.h:877
static bool greaterCtxScore(const CompleteOMPContextSelectorData &LHS, const CompleteOMPContextSelectorData &RHS)
const CGFunctionInfo & arrangeGlobalDeclaration(GlobalDecl GD)
Definition: CGCall.cpp:513
This represents clause &#39;from&#39; in the &#39;#pragma omp ...&#39; directives.
LValue EmitLValueForField(LValue Base, const FieldDecl *Field)
Definition: CGExpr.cpp:4055
const VarDecl * translateParameter(const FieldDecl *FD, const VarDecl *NativeParam) const override
Translates the native parameter of outlined function if this is required for target.
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
static ImplicitParamDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, ImplicitParamKind ParamKind)
Create implicit parameter.
Definition: Decl.cpp:4685
virtual bool emitDeclareVariant(GlobalDecl GD, bool IsForDefinition)
Emits the definition of the declare variant function.
NodeId Parent
Definition: ASTDiff.cpp:191
OpenMP 4.0 [2.4, Array Sections].
Definition: ExprOpenMP.h:44
bool hasAttr() const
Definition: DeclBase.h:542
bool isValid() const
Definition: Address.h:35
std::pair< llvm::Value *, llvm::Value * > ComplexPairTy
AutoVarEmission EmitAutoVarAlloca(const VarDecl &var)
EmitAutoVarAlloca - Emit the alloca and debug information for a local variable.
Definition: CGDecl.cpp:1379
CXXRecordDecl * getAsCXXRecordDecl() const
Retrieves the CXXRecordDecl that this type refers to, either because the type is a RecordType or beca...
Definition: Type.cpp:1690
void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr) override
Emits a critical region.
void emitTargetNumIterationsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Value *DeviceID, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter)
Emit code that pushes the trip count of loops associated with constructs &#39;target teams distribute&#39; an...
Describes the capture of either a variable, or &#39;this&#39;, or variable-length array type.
Definition: Stmt.h:3389
const CodeGen::CGBlockInfo * BlockInfo
virtual Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const
Gets the address of the native argument basing on the address of the target-specific parameter...
CGBlockInfo - Information to generate a block literal.
Definition: CGBlocks.h:152
void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer)
EmitAnyExprToMem - Emits the code necessary to evaluate an arbitrary expression into the given memory...
Definition: CGExpr.cpp:223
OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
LValueBaseInfo getBaseInfo() const
Definition: CGValue.h:319
RValue - This trivial value class is used to represent the result of an expression that is evaluated...
Definition: CGValue.h:39
static llvm::Function * emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, QualType SharedsPtrTy, llvm::Function *TaskFunction, llvm::Value *TaskPrivatesMap)
Emit a proxy function which accepts kmp_task_t as the second argument.
bool needCleanups(unsigned N)
Returns true if the private copy requires cleanups.
Class intended to support codegen of all kind of the reduction clauses.
llvm::FunctionCallee createRuntimeFunction(unsigned Function)
Returns specified OpenMP runtime function.
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Definition: CharUnits.h:179
Expr * getCombiner()
Get combiner expression of the declare reduction construct.
Definition: DeclOpenMP.h:152
TypeSourceInfo * getTrivialTypeSourceInfo(QualType T, SourceLocation Loc=SourceLocation()) const
Allocate a TypeSourceInfo where all locations have been initialized to a given location, which defaults to the empty location.
virtual bool isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const
Check if the specified ScheduleKind is static chunked.
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx, SmallVectorImpl< PartialDiagnosticAt > *Diag=nullptr) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
unsigned getNumLoops() const
Get number of loops associated with the clause.
This represents &#39;#pragma omp requires...&#39; directive.
Definition: DeclOpenMP.h:345
virtual void createOffloadEntry(llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, llvm::GlobalValue::LinkageTypes Linkage)
Creates offloading entry for the provided entry ID ID, address Addr, size Size, and flags Flags...
unsigned Offset
Definition: Format.cpp:1827
llvm::CallingConv::ID getRuntimeCC() const
bool HasSideEffects(const ASTContext &Ctx, bool IncludePossibleEffects=true) const
HasSideEffects - This routine returns true for all those expressions which have any effect other than...
Definition: Expr.cpp:3401
This represents implicit clause &#39;depend&#39; for the &#39;#pragma omp task&#39; directive.
virtual unsigned getDefaultFirstprivateAddressSpace() const
Returns default address space for the constant firstprivates, 0 by default.
KmpTaskTFields
Indexes of fields for type kmp_task_t.
void emitDeferredTargetDecls() const
Emit deferred declare target variables marked for deferred emission.
static TypeEvaluationKind getEvaluationKind(QualType T)
getEvaluationKind - Return the TypeEvaluationKind of QualType T.
bool isValid() const
virtual void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Required to resolve existing problems in the runtime.
virtual void emitFlush(CodeGenFunction &CGF, ArrayRef< const Expr *> Vars, SourceLocation Loc)
Emit flush of the variables specified in &#39;omp flush&#39; directive.
static void createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, ArrayRef< llvm::Constant *> Data, T &Parent)
bool addPrivate(const VarDecl *LocalVD, const llvm::function_ref< Address()> PrivateGen)
Registers LocalVD variable as a private and apply PrivateGen function for it to generate correspondin...
void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc) override
Emit code for &#39;taskwait&#39; directive.
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
Definition: Decl.h:619
This represents one expression.
Definition: Expr.h:108
Allow any unmodeled side effect.
Definition: Expr.h:615
static Address invalid()
Definition: Address.h:34
void loadOffloadInfoMetadata()
Loads all the offload entries information from the host IR metadata.
static void emitInitWithReductionInitializer(CodeGenFunction &CGF, const OMPDeclareReductionDecl *DRD, const Expr *InitOp, Address Private, Address Original, QualType Ty)
Address getAddress(CodeGenFunction &CGF) const
Definition: CGValue.h:327
static llvm::Value * emitReduceCombFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N, const Expr *ReductionOp, const Expr *LHS, const Expr *RHS, const Expr *PrivateRef)
Emits reduction combiner function:
const AnnotatedLine * Line
Enters a new scope for capturing cleanups, all of which will be executed once the scope is exited...
Stmt * IgnoreContainers(bool IgnoreCaptured=false)
Skip no-op (attributed, compound) container stmts and skip captured stmt at the top, if IgnoreCaptured is true.
Definition: Stmt.cpp:134
bool isOpenMPParallelDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a parallel-kind directive.
const CGFunctionInfo & arrangeNullaryFunction()
A nullary function is a freestanding function of type &#39;void ()&#39;.
Definition: CGCall.cpp:695
std::string OMPHostIRFile
Name of the IR file that contains the result of the OpenMP target host code generation.
Definition: LangOptions.h:297
void emitKmpRoutineEntryT(QualType KmpInt32Ty)
Build type kmp_routine_entry_t (if not built yet).
const T * castAs() const
Member-template castAs<specific type>.
Definition: Type.h:7067
llvm::Function * emitTaskOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool Tied, unsigned &NumberOfParts) override
Emits outlined function for the OpenMP task directive D.
unsigned getLine() const
Return the presumed line number of this location.
llvm::Value * emitForNext(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned, Address IL, Address LB, Address UB, Address ST) override
Call __kmpc_dispatch_next( ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, kmp_int[32|64] *p_lowe...
unsigned IVSize
Size of the iteration variable in bits.
static bool checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD)
Checks if destructor function is required to be generated.
#define V(N, I)
Definition: ASTContext.h:2941
void emitSingleRegion(CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, ArrayRef< const Expr *> CopyprivateVars, ArrayRef< const Expr *> DestExprs, ArrayRef< const Expr *> SrcExprs, ArrayRef< const Expr *> AssignmentOps) override
Emits a single region.
Represents a C++ destructor within a class.
Definition: DeclCXX.h:2649
VariableCaptureKind getCaptureKind() const
Determine the kind of capture.
Definition: Stmt.cpp:1197
static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion)
QualType SavedKmpTaskloopTQTy
Saved kmp_task_t for taskloop-based directive.
static RecordDecl * createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, QualType KmpRoutineEntryPointerQTy)
VlaSizePair getVLASize(const VariableArrayType *vla)
Returns an LLVM value that corresponds to the size, in non-variably-sized elements, of a variable length array type, plus that largest non-variably-sized element type.
field_iterator field_end() const
Definition: Decl.h:3966
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition: Address.h:43
CharUnits getTypeAlignInChars(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in characters.
Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name) override
Creates artificial threadprivate variable with name Name and type VarType.
DeclContext * getDeclContext()
Definition: DeclBase.h:438
static llvm::iterator_range< specific_clause_iterator< SpecificClause > > getClausesOfKind(ArrayRef< OMPClause *> Clauses)
Definition: StmtOpenMP.h:187
SourceLocation Begin
llvm::FunctionCallee createDispatchNextFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_next_* runtime function for the specified size IVSize and sign IVSigned...
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition: CharUnits.h:63
void emitFlush(CodeGenFunction &CGF, ArrayRef< const Expr *> Vars, SourceLocation Loc) override
Emit flush of the variables specified in &#39;omp flush&#39; directive.
This represents &#39;ordered&#39; clause in the &#39;#pragma omp ...&#39; directive.
llvm::SmallDenseSet< CanonicalDeclPtr< const Decl > > NontemporalDeclsSet
static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, bool Chunked, bool Ordered)
Map the OpenMP loop schedule to the runtime enumeration.
QualType getType() const
Definition: Expr.h:137
llvm::GlobalValue::LinkageTypes getLLVMLinkageVarDefinition(const VarDecl *VD, bool IsConstant)
Returns LLVM linkage for a declarator.
DefinitionKind hasDefinition(ASTContext &) const
Check whether this variable is defined in this translation unit.
Definition: Decl.cpp:2203
TagDecl * getAsTagDecl() const
Retrieves the TagDecl that this type refers to, either because the type is a TagType or because it is...
Definition: Type.cpp:1698
Provides LLVM&#39;s BitmaskEnum facility to enumeration types declared in namespace clang.
SmallVector< const Expr *, 4 > FirstprivateVars
TBAAAccessInfo getTBAAInfo() const
Definition: CGValue.h:308
CharUnits alignmentOfArrayElement(CharUnits elementSize) const
Given that this is the alignment of the first element of an array, return the minimum alignment of an...
Definition: CharUnits.h:201
static llvm::Value * emitCopyprivateCopyFunction(CodeGenModule &CGM, llvm::Type *ArgsType, ArrayRef< const Expr *> CopyprivateVars, ArrayRef< const Expr *> DestExprs, ArrayRef< const Expr *> SrcExprs, ArrayRef< const Expr *> AssignmentOps, SourceLocation Loc)
virtual Address getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, SourceLocation Loc)
Returns address of the threadprivate variable for the current thread.
LValue MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T)
llvm::GlobalVariable::LinkageTypes getFunctionLinkage(GlobalDecl GD)
QualType getRecordType(const RecordDecl *Decl) const
Represents an unpacked "presumed" location which can be presented to the user.
void Emit(CodeGenFunction &CGF, Flags) override
Emit the cleanup.
virtual void emitTargetOutlinedFunctionHelper(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
Helper to emit outlined function for &#39;target&#39; directive.
void registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, llvm::Constant *Addr, llvm::Constant *ID, OMPTargetRegionEntryKind Flags)
Register target region entry.
bool isOpenMPTaskLoopDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a taskloop directive.
QualType getFunctionType(QualType ResultTy, ArrayRef< QualType > Args, const FunctionProtoType::ExtProtoInfo &EPI) const
Return a normal function type with a typed argument list.
Definition: ASTContext.h:1401
llvm::Function * emitThreadPrivateVarDefinition(const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF=nullptr) override
Emit a code for initialization of threadprivate variable.
llvm::Value * EmitCastToVoidPtr(llvm::Value *value)
Emit a cast to void* in the appropriate address space.
Definition: CGExpr.cpp:50
Allow UB that we can give a value, but not arbitrary unmodeled side effects.
Definition: Expr.h:613
const Type * getBaseElementTypeUnsafe() const
Get the base element type of this type, potentially discarding type qualifiers.
Definition: Type.h:6925
Expr * getInitPriv()
Get Priv variable of the initializer.
Definition: DeclOpenMP.h:180
const TargetInfo & getTarget() const
This represents clause &#39;firstprivate&#39; in the &#39;#pragma omp ...&#39; directives.
ValueDecl * getDecl()
Definition: Expr.h:1247
void SetCommonAttributes(GlobalDecl GD, llvm::GlobalValue *GV)
Set attributes which are common to any form of a global definition (alias, Objective-C method...
void emitTaskgroupRegion(CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, SourceLocation Loc) override
Emit a taskgroup region.
*QualType KmpTaskTQTy
virtual void emitProcBindClause(CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc)
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, int proc_bind) to generat...
const Qualifiers & getQuals() const
Definition: CGValue.h:311
fileinfo_iterator fileinfo_begin() const
Address getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, SourceLocation Loc) override
Returns address of the threadprivate variable for the current thread.
bool isUnionType() const
Definition: Type.cpp:527
const LangOptions & getLangOpts() const
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
ASTContext & getContext() const
bool isNull() const
Return true if this QualType doesn&#39;t point to a type yet.
Definition: Type.h:719
static bool getAArch64PBV(QualType QT, ASTContext &C)
Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
const SourceManager & SM
Definition: Format.cpp:1685
virtual void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr)
Emits a critical region.
VarDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
Definition: Decl.cpp:2088
GlobalDecl - represents a global declaration.
Definition: GlobalDecl.h:40
This file defines OpenMP AST classes for clauses.
bool hasClausesOfKind() const
Returns true if the current directive has one or more clauses of a specific kind. ...
Definition: StmtOpenMP.h:219
virtual bool emitTargetGlobalVariable(GlobalDecl GD)
Emit the global variable if it is a valid device global variable.
void finishAndAddTo(AggregateBuilderBase &parent)
Given that this builder was created by beginning an array or struct component on the given parent bui...
AttrVec & getAttrs()
Definition: DeclBase.h:490
CanQualType getCanonicalTypeUnqualified() const
virtual bool hasBody() const
Returns true if this Decl represents a declaration for a body of code, such as a function or method d...
Definition: DeclBase.h:992
bool hasAttrs() const
Definition: DeclBase.h:484
QualType KmpDependInfoTy
Type typedef struct kmp_depend_info { kmp_intptr_t base_addr; size_t len; struct { bool in:1; bool ou...
LValue EmitLoadOfPointerLValue(Address Ptr, const PointerType *PtrTy)
Definition: CGExpr.cpp:2355
The l-value was considered opaque, so the alignment was determined from a type.
RecordDecl * getDecl() const
Definition: Type.h:4505
const char * getFilename() const
Return the presumed filename of this location.
Address CreateConstGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = T* ...
Definition: CGBuilder.h:232
virtual Address getAddrOfDeclareTargetVar(const VarDecl *VD)
Returns the address of the variable marked as declare target with link clause OR as declare target wi...
const SpecificClause * getSingleClause() const
Gets a single clause of the specified kind associated with the current directive iff there is only on...
Definition: StmtOpenMP.h:205
static void emitReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp)
Emit reduction combiner.
Expr * getStrideVariable() const
Definition: StmtOpenMP.h:970
bool UseOriginalIV
True if original lvalue for loop counter can be used in codegen (simd region or simd only mode) and n...
This represents &#39;num_teams&#39; clause in the &#39;#pragma omp ...&#39; directive.
OpaqueValueExpr - An expression referring to an opaque object of a fixed type and value class...
Definition: Expr.h:1075
Address CreateBitCast(Address Addr, llvm::Type *Ty, const llvm::Twine &Name="")
Definition: CGBuilder.h:141
virtual void registerTargetGlobalVariable(const VarDecl *VD, llvm::Constant *Addr)
Checks if the provided global decl GD is a declare target variable and registers it when emitting cod...
#define false
Definition: stdbool.h:17
Kind
CanProxy< U > castAs() const
This captures a statement into a function.
Definition: Stmt.h:3376
QualType getCanonicalType() const
Definition: Type.h:6295
IdentFieldIndex
decl_type * getPreviousDecl()
Return the previous declaration of this declaration or NULL if this is the first declaration.
Definition: Redeclarable.h:203
llvm::Function * emitRequiresDirectiveRegFun()
Creates and returns a registration function for when at least one requires directives was used in the...
static unsigned getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind)
Returns default flags for the barriers depending on the directive, for which this barier is going to ...
unsigned getColumn() const
Return the presumed column number of this location.
static with chunk adjustment (e.g., simd)
void pushDestroy(QualType::DestructionKind dtorKind, Address addr, QualType type)
pushDestroy - Push the standard destructor for the given type as at least a normal cleanup...
Definition: CGDecl.cpp:2055
void emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond, OpenMPDirectiveKind CancelRegion) override
Emit code for &#39;cancel&#39; construct.
Encodes a location in the source.
static const OMPExecutableDirective * getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D)
Check for inner distribute directive.
llvm::APSInt APSInt
llvm::FunctionCallee createDispatchFiniFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_fini_* runtime function for the specified size IVSize and sign IVSigned...
llvm::Type * getIdentTyPointerTy()
Returns pointer to ident_t type.
llvm::Value * MapTypesArray
The array of map types passed to the runtime library.
This represents &#39;#pragma omp declare reduction ...&#39; directive.
Definition: DeclOpenMP.h:102
QualType getUIntPtrType() const
Return a type compatible with "uintptr_t" (C99 7.18.1.4), as defined by the target.
unsigned getOpenMPDefaultSimdAlign(QualType T) const
Get default simd alignment of the specified complete type in bits.
llvm::Value * EvaluateExprAsBool(const Expr *E)
EvaluateExprAsBool - Perform the usual unary conversions on the specified expression and compare the ...
Definition: CGExpr.cpp:164
llvm::PointerIntPair< llvm::Value *, 1, bool > Final
bool isVariablyModifiedType() const
Whether this type is a variably-modified type (C99 6.7.5).
Definition: Type.h:2166
virtual void emitSingleRegion(CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, ArrayRef< const Expr *> CopyprivateVars, ArrayRef< const Expr *> DestExprs, ArrayRef< const Expr *> SrcExprs, ArrayRef< const Expr *> AssignmentOps)
Emits a single region.
void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value *> CapturedVars, const Expr *IfCond) override
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
Address CreateConstInBoundsGEP2_32(Address Addr, unsigned Idx0, unsigned Idx1, const llvm::Twine &Name="")
Definition: CGBuilder.h:258
virtual void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc)
Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams...
llvm::OpenMPIRBuilder * getOpenMPIRBuilder()
Return a pointer to the configured OpenMPIRBuilder, if any.
void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false) override
Emit an implicit/explicit barrier for OpenMP threads.
This is a basic class for representing single OpenMP executable directive.
Definition: StmtOpenMP.h:33
bool emitTargetGlobalVariable(GlobalDecl GD) override
Emit the global variable if it is a valid device global variable.
Lower bound for &#39;ordered&#39; versions.
ASTContext & getASTContext() const LLVM_READONLY
Definition: DeclBase.cpp:377
llvm::Value * getPointer(CodeGenFunction &CGF) const
Definition: CGValue.h:323
virtual void emitCancellationPointCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind CancelRegion)
Emit code for &#39;cancellation point&#39; construct.
const Decl * getDecl() const
Definition: GlobalDecl.h:77
bool capturesVariable() const
Determine whether this capture handles a variable (by reference).
Definition: Stmt.h:3417
void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D, ArrayRef< Expr *> NumIterations) override
Emit initialization for doacross loop nesting support.
Address CreateConstArrayGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = [n x T]* ...
Definition: CGBuilder.h:198
Set if the nonmonotonic schedule modifier was present.
virtual llvm::Function * emitThreadPrivateVarDefinition(const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF=nullptr)
Emit a code for initialization of threadprivate variable.
OpenMPLinearClauseKind Modifier
Modifier of &#39;linear&#39; clause.
Definition: OpenMPClause.h:101
const ParmVarDecl * getParamDecl(unsigned i) const
Definition: Decl.h:2422
static void EmitOMPTargetSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S)
Emit device code for the target simd directive.
void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) override
const ArrayType * getAsArrayType(QualType T) const
Type Query functions.
virtual void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value *> CapturedVars, const Expr *IfCond)
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
This is a basic class for representing single OpenMP clause.
Definition: OpenMPClause.h:51
i32 captured_struct **param SharedsTy A type which contains references the shared variables *param Shareds Context with the list of shared variables from the p *TaskFunction *param IfCond Not a nullptr if if clause was nullptr *otherwise *param Data Additional data for task generation like final list of privates etc *void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) override
bool isTrivialInitializer(const Expr *Init)
Determine whether the given initializer is trivial in the sense that it requires no code to be genera...
Definition: CGDecl.cpp:1650
CanQualType VoidTy
Definition: ASTContext.h:1016
bool IVSigned
Sign of the iteration variable.
void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) override
Call the appropriate runtime routine to initialize it before start of loop.
void emitAggregateType(CodeGenFunction &CGF, unsigned N)
Emits the code for the variable-modified type, if required.
static void EmitOMPTargetTeamsDistributeParallelForDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeParallelForDirective &S)
virtual void emitLastprivateConditionalFinalUpdate(CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, SourceLocation Loc)
Gets the address of the global copy used for lastprivate conditional update, if any.
Maps the expression for the lastprivate variable to the global copy used to store new value because o...
bool isAnyPointerType() const
Definition: Type.h:6508
This declaration is only a declaration.
Definition: Decl.h:1156
unsigned size() const
Return number of entries defined so far.
virtual void Enter(CodeGenFunction &CGF)
An aligned address.
Definition: Address.h:24
llvm::FunctionCallee createDispatchInitFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_init_* runtime function for the specified size IVSize and sign IVSigned...
llvm::APInt APInt
Definition: Integral.h:27
void StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, const CGFunctionInfo &FnInfo, const FunctionArgList &Args, SourceLocation Loc=SourceLocation(), SourceLocation StartLoc=SourceLocation())
Emit code for the start of a function.
Stmt * getCapturedStmt()
Retrieve the statement being captured.
Definition: Stmt.h:3477
void emitTargetDataCalls(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) override
Emit the target data mapping code associated with D.
DestructionKind isDestructedType() const
Returns a nonzero value if objects of this type require non-trivial work to clean up after...
Definition: Type.h:1174
llvm::Value * getCriticalRegionLock(StringRef CriticalName)
Returns corresponding lock object for the specified critical region name.
unsigned getCustomDiagID(Level L, const char(&FormatString)[N])
Return an ID for a diagnostic with the specified format string and level.
Definition: Diagnostic.h:782
MangleContext & getMangleContext()
Gets the mangle context.
Definition: CGCXXABI.h:96
void emitThreadPrivateVarInit(CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc)
Emits initialization code for the threadprivate variables.
Complete object dtor.
Definition: ABI.h:35
JumpDest getJumpDestInCurrentScope(llvm::BasicBlock *Target)
The given basic block lies in the current EH scope, but may be a target of a potentially scope-crossi...
virtual void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc)
Emits code for a taskyield directive.
void EmitAggregateAssign(LValue Dest, LValue Src, QualType EltTy)
Emit an aggregate assignment.
QualType getType() const
Definition: CGValue.h:264
CharUnits getNaturalTypeAlignment(QualType T, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, bool forPointeeType=false)
bool hasTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum) const
Return true if a target region entry with the provided information exists.
virtual void functionFinished(CodeGenFunction &CGF)
Cleans up references to the objects in finished function.
virtual void emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, ArrayRef< llvm::Value *> Args=llvm::None) const
Emits call of the outlined function with the provided arguments, translating these arguments to corre...
const Expr * getRefExpr(unsigned N) const
Returns the base declaration of the reduction item.
void FinishFunction(SourceLocation EndLoc=SourceLocation())
FinishFunction - Complete IR generation of the current function.
virtual void mangleTypeName(QualType T, raw_ostream &)=0
Generates a unique string for an externally visible type for use with TBAA or type uniquing...
void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N) override
Required to resolve existing problems in the runtime.
Expr * getInitOrig()
Get Orig variable of the initializer.
Definition: DeclOpenMP.h:177
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition: Expr.cpp:224
void registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, CharUnits VarSize, OMPTargetGlobalVarEntryKind Flags, llvm::GlobalValue::LinkageTypes Linkage)
Register device global variable entry.
Struct with the values to be passed to the static runtime function.
ConstantAddress GetAddrOfConstantCString(const std::string &Str, const char *GlobalName=nullptr)
Returns a pointer to a character array containing the literal and a terminating &#39;\0&#39; character...
llvm::Constant * GetAddrOfGlobal(GlobalDecl GD, ForDefinition_t IsForDefinition=NotForDefinition)
static void EmitOMPTargetTeamsDistributeDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeDirective &S)
Emit device code for the target teams distribute directive.
A basic class for pre|post-action for advanced codegen sequence for OpenMP region.
JumpDest getOMPCancelDestination(OpenMPDirectiveKind Kind)
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type, returning the result.
void emitOrderedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, SourceLocation Loc, bool IsThreads) override
Emit an ordered region.
void addUsedGlobal(llvm::GlobalValue *GV)
Add a global to a list to be added to the llvm.used metadata.
FunctionArgList - Type for representing both the decl and type of parameters to a function...
Definition: CGCall.h:355
llvm::Value * getScalarVal() const
getScalarVal() - Return the Value* of this scalar value.
Definition: CGValue.h:59
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
static void emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, const llvm::APSInt &VLENVal, ArrayRef< ParamAttrTy > ParamAttrs, OMPDeclareSimdDeclAttr::BranchStateTy State)
CanQualType CharTy
Definition: ASTContext.h:1018
void setAction(PrePostActionTy &Action) const
CGFunctionInfo - Class to encapsulate the information about a function definition.
This class organizes the cross-function state that is used while generating LLVM code.
QualType withRestrict() const
Definition: Type.h:842
CGOpenMPRuntime & getOpenMPRuntime()
Return a reference to the configured OpenMP runtime.
static const VarDecl * getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE)
StructBuilder beginStruct(llvm::StructType *ty=nullptr)
OpenMPScheduleClauseModifier
OpenMP modifiers for &#39;schedule&#39; clause.
Definition: OpenMPKinds.h:93
LValue EmitOMPSharedLValue(const Expr *E)
Emits the lvalue for the expression with possibly captured variable.
llvm::GlobalValue * GetGlobalValue(StringRef Ref)
Dataflow Directional Tag Classes.
static ApplyDebugLocation CreateDefaultArtificial(CodeGenFunction &CGF, SourceLocation TemporaryLocation)
Apply TemporaryLocation if it is valid.
Definition: CGDebugInfo.h:776
Class provides a way to call simple version of codegen for OpenMP region, or an advanced with possibl...
LValue EmitLoadOfReferenceLValue(LValue RefLVal)
Definition: CGExpr.cpp:2336
static llvm::Value * emitReduceInitFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Emits reduction initializer function:
This represents &#39;device&#39; clause in the &#39;#pragma omp ...&#39; directive.
bool isValid() const
Return true if this is a valid SourceLocation object.
bool emitTargetFunctions(GlobalDecl GD) override
Emit the target regions enclosed in GD function definition or the function itself in case it is a val...
i32 captured_struct **param SharedsTy A type which contains references the shared variables *param Shareds Context with the list of shared variables from the p *TaskFunction *param IfCond Not a nullptr if if clause was nullptr *otherwise *param Data Additional data for task generation like final list of privates etc *virtual void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data)
DeclContext - This is used only as base class of specific decl types that can act as declaration cont...
Definition: DeclBase.h:1271
static RValue getComplex(llvm::Value *V1, llvm::Value *V2)
Definition: CGValue.h:93
EvalResult is a struct with detailed info about an evaluated expression.
Definition: Expr.h:586
NontemporalDeclsRAII(CodeGenModule &CGM, const OMPLoopDirective &S)
static std::string getName(const CallEvent &Call)
virtual bool emitTargetGlobal(GlobalDecl GD)
Emit the global GD if it is meaningful for the target.
virtual void emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond, OpenMPDirectiveKind CancelRegion)
Emit code for &#39;cancel&#39; construct.
std::vector< llvm::Triple > OMPTargetTriples
Triples of the OpenMP targets that the host code codegen should take into account in order to generat...
Definition: LangOptions.h:293
OpenMPScheduleClauseModifier M1
Definition: OpenMPKinds.h:200
llvm::Value * LB
Loop lower bound.
A basic class for pre|post-action for advanced codegen sequence for OpenMP region.
LValue getSharedLValue(unsigned N) const
Returns LValue for the reduction item.
const Expr * getInit() const
Definition: Decl.h:1229
llvm::Constant * getPointer() const
Definition: Address.h:83
void initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, unsigned Order)
Initialize target region entry.
std::unique_ptr< DiagnosticConsumer > create(StringRef OutputFile, DiagnosticOptions *Diags, bool MergeChildRecords=false)
Returns a DiagnosticConsumer that serializes diagnostics to a bitcode file.
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
Definition: CGBuilder.h:69
bool isOpenMPSimdDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a simd directive.
llvm::PointerIntPair< llvm::Value *, 1, bool > Priority
RTCancelKind
llvm::Constant * EmitNullConstant(QualType T)
Return the result of value-initializing the given type, i.e.
PresumedLoc getPresumedLoc(SourceLocation Loc, bool UseLineDirectives=true) const
Returns the "presumed" location of a SourceLocation specifies.
llvm::FunctionCallee CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeList ExtraAttrs=llvm::AttributeList(), bool Local=false, bool AssumeConvergent=false)
Create or return a runtime function declaration with the specified type and name. ...
llvm::Value * UB
Loop upper bound.
llvm::Value * Chunk
Chunk size specified using &#39;schedule&#39; clause (nullptr if chunk was not specified) ...
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
Definition: CGBuilder.h:107
llvm::Module & getModule() const
llvm::Value * Chunk
Value of the chunk for the static_chunked scheduled loop.
void emitUDMapperArrayInitOrDel(CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *BasePtr, llvm::Value *Ptr, llvm::Value *Size, llvm::Value *MapType, CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit)
Emit the array initialization or deletion portion for user-defined mapper code generation.
Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...
Not really used in Fortran any more.
LValue MakeAddrLValue(Address Addr, QualType T, AlignmentSource Source=AlignmentSource::Type)
virtual void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
Emit outilined function for &#39;target&#39; directive.
llvm::StructType * ConvertRecordDeclType(const RecordDecl *TD)
ConvertRecordDeclType - Lay out a tagged decl type like struct or union.
virtual bool isDefaultLocationConstant() const
Check if the default location must be constant.
Expr * IgnoreParenImpCasts() LLVM_READONLY
Skip past any parentheses and implicit casts which might surround this expression until reaching a fi...
Definition: Expr.cpp:2995
virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc)
Emits address of the word in a memory where current thread id is stored.
void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value *> CapturedVars) override
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
void emitInitialization(CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, llvm::function_ref< bool(CodeGenFunction &)> DefaultInit)
Performs initialization of the private copy for the reduction item.
JumpDest ReturnBlock
ReturnBlock - Unified return block.
OffloadEntriesInfoManagerTy OffloadEntriesInfoManager
virtual bool isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const
Check if the specified ScheduleKind is static non-chunked.
static bool isStrictSubset(const CompleteOMPContextSelectorData &LHS, const CompleteOMPContextSelectorData &RHS)
bool capturesVariableByCopy() const
Determine whether this capture handles a variable by copy.
Definition: Stmt.h:3420
Class that represents a component of a mappable expression.
static void emitForStaticInitCall(CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, const CGOpenMPRuntime::StaticRTInput &Values)
void EmitAutoVarCleanups(const AutoVarEmission &emission)
Definition: CGDecl.cpp:1980
llvm::DenseSet< CanonicalDeclPtr< const Decl > > AlreadyEmittedTargetDecls
List of the emitted declarations.
API for captured statement code generation.
virtual bool emitDeclareTargetVarDefinition(const VarDecl *VD, llvm::GlobalVariable *Addr, bool PerformInit)
Emit a code for initialization of declare target variable.
virtual void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value *> CapturedVars)
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
virtual void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values)
bool isOpenMPDistributeDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a distribute directive.
virtual StringRef getOutlinedHelperName() const
Get the function name of an outlined region.
static bool classof(const OMPClause *T)
static bool matchesContext(CodeGenModule &CGM, const CompleteOMPContextSelectorData &ContextData)
bool HasEmittedDeclareTargetRegion
Flag for keeping track of weather a device routine has been emitted.
ArraySubscriptExpr - [C99 6.5.2.1] Array Subscripting.
Definition: Expr.h:2462
virtual void emitOrderedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, SourceLocation Loc, bool IsThreads)
Emit an ordered region.
This file defines OpenMP AST classes for executable directives and clauses.
static FieldDecl * addFieldToRecordDecl(ASTContext &C, DeclContext *DC, QualType FieldTy)
CodeGenTypes & getTypes() const
StructBuilder beginStruct(llvm::StructType *structTy=nullptr)
virtual void checkAndEmitLastprivateConditional(CodeGenFunction &CGF, const Expr *LHS)
Checks if the provided LVal is lastprivate conditional and emits the code to update the value of the ...
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
Definition: Type.h:6811
static void EmitOMPTargetTeamsDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDirective &S)
Emit device code for the target teams directive.
CleanupTy(PrePostActionTy *Action)
T * getAttr() const
Definition: DeclBase.h:538
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition: Address.h:51
void initializeDeviceGlobalVarEntryInfo(StringRef Name, OMPTargetGlobalVarEntryKind Flags, unsigned Order)
Initialize device global variable entry.
static bool checkInitIsRequired(CodeGenFunction &CGF, ArrayRef< PrivateDataTy > Privates)
Check if duplication function is required for taskloops.
llvm::Value * EmitCheckedInBoundsGEP(llvm::Value *Ptr, ArrayRef< llvm::Value *> IdxList, bool SignedIndices, bool IsSubtraction, SourceLocation Loc, const Twine &Name="")
Same as IRBuilder::CreateInBoundsGEP, but additionally emits a check to detect undefined behavior whe...
static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, unsigned &DeviceID, unsigned &FileID, unsigned &LineNum)
Obtain information that uniquely identifies a target entry.
Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *ReductionsPtr, LValue SharedLVal) override
Get the address of void * type of the privatue copy of the reduction item specified by the SharedLVal...
llvm::AssertingVH< llvm::Instruction > AllocaInsertPt
AllocaInsertPoint - This is an instruction in the entry block before which we prefer to insert alloca...
OpenMPLocationFlags
Values for bit flags used in the ident_t to describe the fields.
virtual void emitInlinedDirective(CodeGenFunction &CGF, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool HasCancel=false)
Emit code for the directive that does not require outlining.
OpenMPScheduleClauseKind
OpenMP attributes for &#39;schedule&#39; clause.
Definition: OpenMPKinds.h:85
Expr * getNumIterations() const
Definition: StmtOpenMP.h:1002
llvm::StringRef getName() const
Return the IR name of the pointer value.
Definition: Address.h:61
llvm::StructType * getLLVMType() const
Return the "complete object" LLVM type associated with this record.
Base for LValueReferenceType and RValueReferenceType.
Definition: Type.h:2750
bool hasSameType(QualType T1, QualType T2) const
Determine whether the given types T1 and T2 are equivalent.
Definition: ASTContext.h:2321
Entity that registers the offloading constants that were emitted so far.
void SetInternalFunctionAttributes(GlobalDecl GD, llvm::Function *F, const CGFunctionInfo &FI)
Set the attributes on the LLVM function for the given decl and function info.
void emitCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee Callee, ArrayRef< llvm::Value *> Args=llvm::None) const
Emits Callee function call with arguments Args with location Loc.
StringRef getMangledName(GlobalDecl GD)
llvm::Constant * getOrCreateInternalVariable(llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace=0)
Gets (if variable with the given name already exist) or creates internal global variable with the spe...
QualType getConstantArrayType(QualType EltTy, const llvm::APInt &ArySize, const Expr *SizeExpr, ArrayType::ArraySizeModifier ASM, unsigned IndexTypeQuals) const
Return the unique reference to the type for a constant array of the specified element type...
Internal linkage, which indicates that the entity can be referred to from within the translation unit...
Definition: Linkage.h:31
void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)
EmitBlock - Emit the given block.
Definition: CGStmt.cpp:473
void addDecl(Decl *D)
Add the declaration D into this context.
Definition: DeclBase.cpp:1524
llvm::Constant * registerTargetFirstprivateCopy(CodeGenFunction &CGF, const VarDecl *VD)
Registers provided target firstprivate variable as global on the target.
void getCaptureFields(llvm::DenseMap< const VarDecl *, FieldDecl *> &Captures, FieldDecl *&ThisCapture) const
For a closure type, retrieve the mapping from captured variables and this to the non-static data memb...
Definition: DeclCXX.cpp:1454
uint64_t getTypeSize(QualType T) const
Return the size of the specified (complete) type T, in bits.
Definition: ASTContext.h:2104
virtual void emitTaskgroupRegion(CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, SourceLocation Loc)
Emit a taskgroup region.
SourceManager & getSourceManager()
Definition: ASTContext.h:679
llvm::ConstantInt * getSize(CharUnits numChars)
Emit the given number of characters as a value of type size_t.
const Decl * CurFuncDecl
CurFuncDecl - Holds the Decl for the current outermost non-closure context.
llvm::iterator_range< specific_attr_iterator< T > > specific_attrs() const
Definition: DeclBase.h:524
virtual void adjustTargetSpecificDataForLambdas(CodeGenFunction &CGF, const OMPExecutableDirective &D) const
Adjust some parameters for the target-based directives, like addresses of the variables captured by r...
X
Add a minimal nested name specifier fixit hint to allow lookup of a tag name from an outer enclosing ...
Definition: SemaDecl.cpp:14781
CharUnits toCharUnitsFromBits(int64_t BitSize) const
Convert a size in bits to a size in characters.
Lower bound for default (unordered) versions.
void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc) override
Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams...
bool EvaluateAsInt(EvalResult &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects, bool InConstantContext=false) const
EvaluateAsInt - Return true if this is a constant which we can fold and convert to an integer...
MemberExpr - [C99 6.5.2.3] Structure and Union Members.
Definition: Expr.h:2836
virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr *> Privates, ArrayRef< const Expr *> LHSExprs, ArrayRef< const Expr *> RHSExprs, ArrayRef< const Expr *> ReductionOps, ReductionOptionsTy Options)
Emit a code for reduction clause.
This represents &#39;nowait&#39; clause in the &#39;#pragma omp ...&#39; directive.
static bool isTrivial(ASTContext &Ctx, const Expr *E)
Checks if the expression is constant or does not have non-trivial function calls. ...
llvm::PointerIntPair< llvm::Value *, 1, bool > Schedule
Represents a C++ struct/union/class.
Definition: DeclCXX.h:253
void EmitBranch(llvm::BasicBlock *Block)
EmitBranch - Emit a branch to the specified basic block from the current insert block, taking care to avoid creation of branches from dummy blocks.
Definition: CGStmt.cpp:493
llvm::Function * CreateGlobalInitOrDestructFunction(llvm::FunctionType *ty, const Twine &name, const CGFunctionInfo &FI, SourceLocation Loc=SourceLocation(), bool TLS=false)
Definition: CGDeclCXX.cpp:336
bool isVoidType() const
Definition: Type.h:6777
llvm::Value * BasePointersArray
The array of base pointer passed to the runtime library.
static QualType getBaseOriginalType(const Expr *Base)
Return original type of the base expression for array section.
Definition: Expr.cpp:4712
static llvm::Value * emitReduceFiniFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Emits reduction finalizer function:
OpenMPOffloadingReservedDeviceIDs
llvm::Type * ConvertType(QualType T)
bool isTLSSupported() const
Whether the target supports thread-local storage.
Definition: TargetInfo.h:1196
void emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C) override
Emit code for doacross ordered directive with &#39;depend&#39; clause.
int Priority
Definition: Format.cpp:1829
Privates[]
Gets the list of initial values for linear variables.
Definition: OpenMPClause.h:150
OpenMPMapClauseKind
OpenMP mapping kind for &#39;map&#39; clause.
Definition: OpenMPKinds.h:118
Qualifiers getQualifiers() const
Retrieve the set of qualifiers applied to this type.
Definition: Type.h:6283
Capturing by reference.
Definition: Lambda.h:37
virtual llvm::Function * emitTeamsOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP teams directive D.
LValue EmitLValue(const Expr *E)
EmitLValue - Emit code to compute a designator that specifies the location of the expression...
Definition: CGExpr.cpp:1246
Address ReturnValue
ReturnValue - The temporary alloca to hold the return value.
static bool checkContext(const OMPContextSelectorData &Data, Arguments... Params)
Checks current context and returns true if it matches the context selector.
A helper class of ConstantInitBuilder, used for building constant struct initializers.
void popTerminate()
Pops a terminate handler off the stack.
Definition: CGCleanup.h:582
ASTImporterLookupTable & LT
CharUnits getNonVirtualSize() const
getNonVirtualSize - Get the non-virtual size (in chars) of an object, which is the size of the object...
Definition: RecordLayout.h:202
QualType getPointerType(QualType T) const
Return the uniqued reference to the type for a pointer to the specified type.
OpenMPRTLFunction
void addAttr(Attr *A)
Definition: DeclBase.cpp:832
unsigned getVirtualBaseIndex(const CXXRecordDecl *base) const
Return the LLVM field index corresponding to the given virtual base.
This represents &#39;#pragma omp declare mapper ...&#39; directive.
Definition: DeclOpenMP.h:217
virtual void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false)
Emit an implicit/explicit barrier for OpenMP threads.
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
Definition: Stmt.cpp:263
void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) override
Emit outilined function for &#39;target&#39; directive.
RValue EmitLoadOfLValue(LValue V, SourceLocation Loc)
EmitLoadOfLValue - Given an expression that represents a value lvalue, this method emits the address ...
Definition: CGExpr.cpp:1777
static llvm::Value * emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, const OMPExecutableDirective &D, QualType KmpTaskTWithPrivatesPtrQTy, const RecordDecl *KmpTaskTWithPrivatesQTyRD, const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, ArrayRef< PrivateDataTy > Privates, bool WithLastIter)
Emit task_dup function (for initialization of private/firstprivate/lastprivate vars and last_iter fla...
virtual void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, const DispatchRTInput &DispatchValues)
Call the appropriate runtime routine to initialize it before start of loop.
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition: Decl.h:250
static RecordDecl * createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, ArrayRef< PrivateDataTy > Privates)
llvm::Function * emitParallelOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits outlined function for the specified OpenMP parallel directive D.
llvm::MapVector< CanonicalDeclPtr< const FunctionDecl >, std::pair< GlobalDecl, GlobalDecl > > DeferredVariantFunction
Mapping of the original functions to their variants and original global decl.
CGCapturedStmtInfo * CapturedStmtInfo
llvm::Value * EmitScalarConversion(llvm::Value *Src, QualType SrcTy, QualType DstTy, SourceLocation Loc)
Emit a conversion from the specified type to the specified destination type, both of which are LLVM s...
CGCXXABI & getCXXABI() const
Expr * getCombinerIn()
Get In variable of the combiner.
Definition: DeclOpenMP.h:155
const VariableArrayType * getAsVariableArrayType(QualType T) const
Definition: ASTContext.h:2469
CanQualType IntTy
Definition: ASTContext.h:1025
decl_type * getMostRecentDecl()
Returns the most recent (re)declaration of this declaration.
Definition: Redeclarable.h:225
bool isEvaluatable(const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects) const
isEvaluatable - Call EvaluateAsRValue to see if this expression can be constant folded without side-e...
void EmitAggregateCopy(LValue Dest, LValue Src, QualType EltTy, AggValueSlot::Overlap_t MayOverlap, bool isVolatile=false)
EmitAggregateCopy - Emit an aggregate copy.
Definition: CGExprAgg.cpp:1902
capture_range captures()
Definition: Stmt.h:3511
A reference to a declared variable, function, enum, etc.
Definition: Expr.h:1171
QualType getIntPtrType() const
Return a type compatible with "intptr_t" (C99 7.18.1.4), as defined by the target.
static RValue get(llvm::Value *V)
Definition: CGValue.h:86
bool isUnion() const
Definition: Decl.h:3407
virtual void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values)
Call the appropriate runtime routine to initialize it before start of loop.
llvm::Value * emitTaskReductionInit(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr *> LHSExprs, ArrayRef< const Expr *> RHSExprs, const OMPTaskDataTy &Data) override
Emit a code for initialization of task reduction clause.
bool isPointerType() const
Definition: Type.h:6504
static void EmitOMPTargetDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetDirective &S)
Emit device code for the target directive.
void EmitBranchThroughCleanup(JumpDest Dest)
EmitBranchThroughCleanup - Emit a branch from the current insert block through the normal cleanup han...
Definition: CGCleanup.cpp:1044
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
static ApplyDebugLocation CreateEmpty(CodeGenFunction &CGF)
Set the IRBuilder to not attach debug locations.
Definition: CGDebugInfo.h:786
ParamKindTy
Kind of parameter in a function with &#39;declare simd&#39; directive.
bool HasRequiresUnifiedSharedMemory
Flag for keeping track of weather a requires unified_shared_memory directive is present.
static void EmitOMPTargetParallelForDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelForDirective &S)
Emit device code for the target parallel for directive.
QualType KmpDimTy
struct kmp_dim { // loop bounds info casted to kmp_int64 kmp_int64 lo; // lower kmp_int64 up; // uppe...
void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind) override
Call the appropriate runtime routine to notify that we finished all the work with current loop...
QualType getType() const
Definition: Decl.h:630
An l-value expression is a reference to an object with independent storage.
Definition: Specifiers.h:129
bool isFloatingType() const
Definition: Type.cpp:2005
static RValue getAggregate(Address addr, bool isVolatile=false)
Definition: CGValue.h:107
bool isOpenMPLoopDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a directive with an associated loop construct.
i32 captured_struct **param SharedsTy A type which contains references the shared variables *param Shareds Context with the list of shared variables from the p *TaskFunction *param IfCond Not a nullptr if if clause was nullptr *otherwise *param Data Additional data for task generation like final list of privates etc *void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) override
LValue - This represents an lvalue references.
Definition: CGValue.h:167
virtual unsigned getDefaultLocationReserved2Flags() const
Returns additional flags that can be stored in reserved_2 field of the default location.
Information for lazily generating a cleanup.
Definition: EHScopeStack.h:146
QualType SavedKmpTaskTQTy
Saved kmp_task_t for task directive.
virtual Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *ReductionsPtr, LValue SharedLVal)
Get the address of void * type of the privatue copy of the reduction item specified by the SharedLVal...
Represents a C array with a specified size that is not an integer-constant-expression.
Definition: Type.h:3039
CanQualType BoolTy
Definition: ASTContext.h:1017
bool isConstant(const ASTContext &Ctx) const
Definition: Type.h:789
APSInt & getInt()
Definition: APValue.h:380
virtual void getDefaultScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const
Choose default schedule type and chunk value for the schedule clause.
const LangOptions & getLangOpts() const
llvm::CallInst * EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::StringSet DeclareTargetWithDefinition
Set of declare target variables with the generated initializer.
static FieldDecl * Create(const ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, Expr *BW, bool Mutable, InClassInitStyle InitStyle)
Definition: Decl.cpp:4006
Address CreatePointerBitCastOrAddrSpaceCast(Address Addr, llvm::Type *Ty, const llvm::Twine &Name="")
Definition: CGBuilder.h:163
llvm::Value * emitArrayLength(const ArrayType *arrayType, QualType &baseType, Address &addr)
emitArrayLength - Compute the length of an array, even if it&#39;s a VLA, and drill down to the base elem...
virtual llvm::Function * emitTaskOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool Tied, unsigned &NumberOfParts)
Emits outlined function for the OpenMP task directive D.
Expr * getCombinerOut()
Get Out variable of the combiner.
Definition: DeclOpenMP.h:158
unsigned getNumParams() const
Return the number of parameters this function must have based on its FunctionType.
Definition: Decl.cpp:3242
const CGRecordLayout & getCGRecordLayout(const RecordDecl *)
getCGRecordLayout - Return record layout info for the given record decl.
bool hasInit() const
Definition: Decl.cpp:2226
This represents clause &#39;nontemporal&#39; in the &#39;#pragma omp ...&#39; directives.
No in-class initializer.
Definition: Specifiers.h:259
base_class_range vbases()
Definition: DeclCXX.h:604
This class handles loading and caching of source files into memory.
void emitCancellationPointCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind CancelRegion) override
Emit code for &#39;cancellation point&#39; construct.
void EmitGlobal(GlobalDecl D)
Emit code for a single global function or var decl.
virtual void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc)
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)...
bool capturesVariableArrayType() const
Determine whether this capture handles a variable-length array type.
Definition: Stmt.h:3426
bool isLocalVarDeclOrParm() const
Similar to isLocalVarDecl but also includes parameters.
Definition: Decl.h:1123
Attr - This represents one attribute.
Definition: Attr.h:45
SmallVector< const Expr *, 4 > FirstprivateInits
SourceLocation getLocation() const
Definition: DeclBase.h:429
This represents clause &#39;use_device_ptr&#39; in the &#39;#pragma omp ...&#39; directives.
static void EmitOMPTargetTeamsDistributeSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeSimdDirective &S)
Emit device code for the target teams distribute simd directive.
void EmitNullInitialization(Address DestPtr, QualType Ty)
EmitNullInitialization - Generate code to set a value of the given type to null, If the type contains...
bool isExternallyVisible() const
Definition: Decl.h:362
llvm::Constant * getOrCreateThreadPrivateCache(const VarDecl *VD)
If the specified mangled name is not in the module, create and return threadprivate cache object...
llvm::Value * SizesArray
The array of sizes passed to the runtime library.
QualType getIntTypeForBitwidth(unsigned DestWidth, unsigned Signed) const
getIntTypeForBitwidth - sets integer QualTy according to specified details: bitwidth, signed/unsigned.
static OMPLinearClause * Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc, OpenMPLinearClauseKind Modifier, SourceLocation ModifierLoc, SourceLocation ColonLoc, SourceLocation EndLoc, ArrayRef< Expr *> VL, ArrayRef< Expr *> PL, ArrayRef< Expr *> IL, Expr *Step, Expr *CalcStep, Stmt *PreInit, Expr *PostUpdate)
Creates clause with a list of variables VL and a linear step Step.
bool Privatize()
Privatizes local variables previously registered as private.
static void EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeParallelForSimdDirective &S)
Emit device code for the target teams distribute parallel for simd directive.
CanQualType getSizeType() const
Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.
virtual void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc)
Emit code for &#39;taskwait&#39; directive.
unsigned getLLVMFieldNo(const FieldDecl *FD) const
Return llvm::StructType element number that corresponds to the field FD.
llvm::FunctionType * GetFunctionType(const CGFunctionInfo &Info)
GetFunctionType - Get the LLVM function type for.
Definition: CGCall.cpp:1556