clang  8.0.0
AMDGPU.cpp
Go to the documentation of this file.
1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements AMDGPU TargetInfo objects.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
15 #include "clang/Basic/Builtins.h"
20 #include "llvm/ADT/StringSwitch.h"
21 
22 using namespace clang;
23 using namespace clang::targets;
24 
25 namespace clang {
26 namespace targets {
27 
28 // If you edit the description strings, make sure you update
29 // getPointerWidthV().
30 
31 static const char *const DataLayoutStringR600 =
32  "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
33  "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
34 
35 static const char *const DataLayoutStringAMDGCN =
36  "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
37  "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
38  "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
39 
40 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
41  Generic, // Default
42  Global, // opencl_global
43  Local, // opencl_local
44  Constant, // opencl_constant
45  Private, // opencl_private
46  Generic, // opencl_generic
47  Global, // cuda_device
48  Constant, // cuda_constant
49  Local // cuda_shared
50 };
51 
52 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
53  Private, // Default
54  Global, // opencl_global
55  Local, // opencl_local
56  Constant, // opencl_constant
57  Private, // opencl_private
58  Generic, // opencl_generic
59  Global, // cuda_device
60  Constant, // cuda_constant
61  Local // cuda_shared
62 };
63 } // namespace targets
64 } // namespace clang
65 
66 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
67 #define BUILTIN(ID, TYPE, ATTRS) \
68  {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
69 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \
70  {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
71 #include "clang/Basic/BuiltinsAMDGPU.def"
72 };
73 
74 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
75  "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
76  "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
77  "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
78  "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
79  "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
80  "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
81  "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
82  "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
83  "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
84  "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
85  "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
86  "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
87  "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
88  "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
89  "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
90  "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
91  "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
92  "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
93  "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
94  "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
95  "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
96  "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
97  "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
98  "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
99  "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
100  "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
101  "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
102  "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
103  "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
104  "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
105  "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
106  "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
107  "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
108  "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
109  "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
110  "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
111  "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
112  "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
113  "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
114  "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
115  "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
116  "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
117  "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
118  "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
119  "flat_scratch_lo", "flat_scratch_hi"
120 };
121 
123  return llvm::makeArrayRef(GCCRegNames);
124 }
125 
127  llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
128  const std::vector<std::string> &FeatureVec) const {
129 
130  using namespace llvm::AMDGPU;
131 
132  // XXX - What does the member GPU mean if device name string passed here?
133  if (isAMDGCN(getTriple())) {
134  if (CPU.empty())
135  CPU = "gfx600";
136 
137  switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
138  case GK_GFX906:
139  Features["dl-insts"] = true;
140  Features["dot-insts"] = true;
141  LLVM_FALLTHROUGH;
142  case GK_GFX909:
143  case GK_GFX904:
144  case GK_GFX902:
145  case GK_GFX900:
146  Features["gfx9-insts"] = true;
147  LLVM_FALLTHROUGH;
148  case GK_GFX810:
149  case GK_GFX803:
150  case GK_GFX802:
151  case GK_GFX801:
152  Features["vi-insts"] = true;
153  Features["16-bit-insts"] = true;
154  Features["dpp"] = true;
155  Features["s-memrealtime"] = true;
156  LLVM_FALLTHROUGH;
157  case GK_GFX704:
158  case GK_GFX703:
159  case GK_GFX702:
160  case GK_GFX701:
161  case GK_GFX700:
162  Features["ci-insts"] = true;
163  LLVM_FALLTHROUGH;
164  case GK_GFX601:
165  case GK_GFX600:
166  break;
167  case GK_NONE:
168  return false;
169  default:
170  llvm_unreachable("Unhandled GPU!");
171  }
172  } else {
173  if (CPU.empty())
174  CPU = "r600";
175 
176  switch (llvm::AMDGPU::parseArchR600(CPU)) {
177  case GK_CAYMAN:
178  case GK_CYPRESS:
179  case GK_RV770:
180  case GK_RV670:
181  // TODO: Add fp64 when implemented.
182  break;
183  case GK_TURKS:
184  case GK_CAICOS:
185  case GK_BARTS:
186  case GK_SUMO:
187  case GK_REDWOOD:
188  case GK_JUNIPER:
189  case GK_CEDAR:
190  case GK_RV730:
191  case GK_RV710:
192  case GK_RS880:
193  case GK_R630:
194  case GK_R600:
195  break;
196  default:
197  llvm_unreachable("Unhandled GPU!");
198  }
199  }
200 
201  return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
202 }
203 
205  TargetOptions &TargetOpts) const {
206  bool hasFP32Denormals = false;
207  bool hasFP64Denormals = false;
208 
209  for (auto &I : TargetOpts.FeaturesAsWritten) {
210  if (I == "+fp32-denormals" || I == "-fp32-denormals")
211  hasFP32Denormals = true;
212  if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals")
213  hasFP64Denormals = true;
214  }
215  if (!hasFP32Denormals)
216  TargetOpts.Features.push_back(
217  (Twine(hasFastFMAF() && hasFullRateDenormalsF32() && !CGOpts.FlushDenorm
218  ? '+' : '-') + Twine("fp32-denormals"))
219  .str());
220  // Always do not flush fp64 or fp16 denorms.
221  if (!hasFP64Denormals && hasFP64())
222  TargetOpts.Features.push_back("+fp64-fp16-denormals");
223 }
224 
226  SmallVectorImpl<StringRef> &Values) const {
227  if (isAMDGCN(getTriple()))
228  llvm::AMDGPU::fillValidArchListAMDGCN(Values);
229  else
230  llvm::AMDGPU::fillValidArchListR600(Values);
231 }
232 
233 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
234  AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
235 }
236 
237 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
238  const TargetOptions &Opts)
239  : TargetInfo(Triple),
240  GPUKind(isAMDGCN(Triple) ?
241  llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
242  llvm::AMDGPU::parseArchR600(Opts.CPU)),
243  GPUFeatures(isAMDGCN(Triple) ?
244  llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
245  llvm::AMDGPU::getArchAttrR600(GPUKind)) {
248  assert(DataLayout->getAllocaAddrSpace() == Private);
249 
250  setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
251  !isAMDGCN(Triple));
253 
254  // Set pointer width and alignment for target address space 0.
255  PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
256  if (getMaxPointerWidth() == 64) {
257  LongWidth = LongAlign = 64;
261  }
262 
264 }
265 
267  TargetInfo::adjust(Opts);
268  // ToDo: There are still a few places using default address space as private
269  // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
270  // can be removed from the following line.
271  setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
272  !isAMDGCN(getTriple()));
273 }
274 
276  return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
278 }
279 
281  MacroBuilder &Builder) const {
282  Builder.defineMacro("__AMD__");
283  Builder.defineMacro("__AMDGPU__");
284 
285  if (isAMDGCN(getTriple()))
286  Builder.defineMacro("__AMDGCN__");
287  else
288  Builder.defineMacro("__R600__");
289 
290  if (GPUKind != llvm::AMDGPU::GK_NONE) {
291  StringRef CanonName = isAMDGCN(getTriple()) ?
292  getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
293  Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
294  }
295 
296  // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
297  // removed in the near future.
298  if (hasFMAF())
299  Builder.defineMacro("__HAS_FMAF__");
300  if (hasFastFMAF())
301  Builder.defineMacro("FP_FAST_FMAF");
302  if (hasLDEXPF())
303  Builder.defineMacro("__HAS_LDEXPF__");
304  if (hasFP64())
305  Builder.defineMacro("__HAS_FP64__");
306  if (hasFastFMA())
307  Builder.defineMacro("FP_FAST_FMA");
308 }
bool initFeatureMap(llvm::StringMap< bool > &Features, DiagnosticsEngine &Diags, StringRef CPU, const std::vector< std::string > &FeatureVec) const override
Initialize the map with the default set of target features for the CPU this should include all legal ...
Definition: AMDGPU.cpp:126
Defines the clang::MacroBuilder utility class.
virtual void adjust(LangOptions &Opts)
Set forced language options.
Definition: TargetInfo.cpp:325
IntType IntPtrType
Definition: TargetInfo.h:222
DominatorTree GraphTraits specialization so the DominatorTree can be iterable by generic graph iterat...
Definition: Dominators.h:30
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
Definition: TargetInfo.h:955
Options for controlling the target.
Definition: TargetOptions.h:27
void getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const override
===-— Other target property query methods --------------------——===//
Definition: AMDGPU.cpp:280
void fillValidCPUList(SmallVectorImpl< StringRef > &Values) const override
Fill a SmallVectorImpl with the valid values to setCPU.
Definition: AMDGPU.cpp:225
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Definition: LangOptions.h:50
unsigned char MaxAtomicPromoteWidth
Definition: TargetInfo.h:108
unsigned char PointerWidth
Definition: TargetInfo.h:68
Concrete class used by the front-end to report problems and issues.
Definition: Diagnostic.h:149
ArrayRef< const char * > getGCCRegNames() const override
Definition: AMDGPU.cpp:122
unsigned char LongWidth
Definition: TargetInfo.h:76
void setAddressSpaceMap(bool DefaultIsPrivate)
Definition: AMDGPU.cpp:233
static const char *const GCCRegNames[]
Definition: X86.cpp:44
return Out str()
Exposes information about the current target.
Definition: TargetInfo.h:54
virtual bool initFeatureMap(llvm::StringMap< bool > &Features, DiagnosticsEngine &Diags, StringRef CPU, const std::vector< std::string > &FeatureVec) const
Initialize the map with the default set of target features for the CPU this should include all legal ...
Definition: TargetInfo.cpp:386
Defines the clang::LangOptions interface.
void adjustTargetOptions(const CodeGenOptions &CGOpts, TargetOptions &TargetOpts) const override
Adjust target options based on codegen options.
Definition: AMDGPU.cpp:204
void resetDataLayout(StringRef DL)
Definition: TargetInfo.h:135
IntType PtrDiffType
Definition: TargetInfo.h:222
Enumerates target-specific builtins in their own namespaces within namespace clang.
std::vector< std::string > Features
The list of target specific features to enable or disable – this should be a list of strings startin...
Definition: TargetOptions.h:56
static const char *const DataLayoutStringR600
Definition: AMDGPU.cpp:31
std::vector< std::string > FeaturesAsWritten
The list of target specific features to enable or disable, as written on the command line...
Definition: TargetOptions.h:52
unsigned char PointerAlign
Definition: TargetInfo.h:68
AMDGPUTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
Definition: AMDGPU.cpp:237
Dataflow Directional Tag Classes.
static const char *const DataLayoutStringAMDGCN
Definition: AMDGPU.cpp:35
unsigned[(unsigned) LangAS::FirstTargetAddressSpace] LangASMap
The type of a lookup table which maps from language-specific address spaces to target-specific ones...
Definition: AddressSpaces.h:54
unsigned char LongAlign
Definition: TargetInfo.h:76
ArrayRef< Builtin::Info > getTargetBuiltins() const override
Return information about target-specific builtins for the current primary target, and info about whic...
Definition: AMDGPU.cpp:275
CodeGenOptions - Track various options which control how the code is optimized and passed to the back...
void adjust(LangOptions &Opts) override
Set forced language options.
Definition: AMDGPU.cpp:266
unsigned char MaxAtomicInlineWidth
Definition: TargetInfo.h:108
void defineMacro(const Twine &Name, const Twine &Value="1")
Append a #define line for macro of the form "\#define Name Value\n".
Definition: MacroBuilder.h:30
std::unique_ptr< llvm::DataLayout > DataLayout
Definition: TargetInfo.h:113
Defines enum values for all the target-independent builtin functions.
uint64_t getMaxPointerWidth() const override
Return the maximum width of pointers on this target.
Definition: AMDGPU.h:104
bool UseAddrSpaceMapMangling
Specify if mangling based on address space map should be used or not for language specific address sp...
Definition: TargetInfo.h:256