clang  10.0.0git
AMDGPU.cpp
Go to the documentation of this file.
1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements AMDGPU TargetInfo objects.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AMDGPU.h"
14 #include "clang/Basic/Builtins.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/IR/DataLayout.h"
21 
22 using namespace clang;
23 using namespace clang::targets;
24 
25 namespace clang {
26 namespace targets {
27 
28 // If you edit the description strings, make sure you update
29 // getPointerWidthV().
30 
31 static const char *const DataLayoutStringR600 =
32  "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
33  "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
34 
35 static const char *const DataLayoutStringAMDGCN =
36  "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
37  "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
38  "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
39  "-ni:7";
40 
41 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
42  Generic, // Default
43  Global, // opencl_global
44  Local, // opencl_local
45  Constant, // opencl_constant
46  Private, // opencl_private
47  Generic, // opencl_generic
48  Global, // cuda_device
49  Constant, // cuda_constant
50  Local, // cuda_shared
51  Generic, // ptr32_sptr
52  Generic, // ptr32_uptr
53  Generic // ptr64
54 };
55 
56 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
57  Private, // Default
58  Global, // opencl_global
59  Local, // opencl_local
60  Constant, // opencl_constant
61  Private, // opencl_private
62  Generic, // opencl_generic
63  Global, // cuda_device
64  Constant, // cuda_constant
65  Local, // cuda_shared
66  Generic, // ptr32_sptr
67  Generic, // ptr32_uptr
68  Generic // ptr64
69 
70 };
71 } // namespace targets
72 } // namespace clang
73 
74 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
75 #define BUILTIN(ID, TYPE, ATTRS) \
76  {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
77 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \
78  {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
79 #include "clang/Basic/BuiltinsAMDGPU.def"
80 };
81 
82 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
83  "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
84  "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
85  "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
86  "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
87  "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
88  "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
89  "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
90  "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
91  "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
92  "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
93  "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
94  "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
95  "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
96  "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
97  "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
98  "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
99  "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
100  "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
101  "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
102  "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
103  "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
104  "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
105  "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
106  "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
107  "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
108  "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
109  "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
110  "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
111  "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
112  "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
113  "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
114  "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
115  "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
116  "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
117  "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
118  "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
119  "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
120  "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
121  "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
122  "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
123  "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
124  "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
125  "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
126  "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
127  "flat_scratch_lo", "flat_scratch_hi"
128 };
129 
131  return llvm::makeArrayRef(GCCRegNames);
132 }
133 
135  llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
136  const std::vector<std::string> &FeatureVec) const {
137 
138  using namespace llvm::AMDGPU;
139 
140  // XXX - What does the member GPU mean if device name string passed here?
141  if (isAMDGCN(getTriple())) {
142  switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
143  case GK_GFX1012:
144  case GK_GFX1011:
145  Features["dot1-insts"] = true;
146  Features["dot2-insts"] = true;
147  Features["dot5-insts"] = true;
148  Features["dot6-insts"] = true;
149  LLVM_FALLTHROUGH;
150  case GK_GFX1010:
151  Features["dl-insts"] = true;
152  Features["ci-insts"] = true;
153  Features["flat-address-space"] = true;
154  Features["16-bit-insts"] = true;
155  Features["dpp"] = true;
156  Features["gfx8-insts"] = true;
157  Features["gfx9-insts"] = true;
158  Features["gfx10-insts"] = true;
159  Features["s-memrealtime"] = true;
160  break;
161  case GK_GFX908:
162  Features["dot3-insts"] = true;
163  Features["dot4-insts"] = true;
164  Features["dot5-insts"] = true;
165  Features["dot6-insts"] = true;
166  LLVM_FALLTHROUGH;
167  case GK_GFX906:
168  Features["dl-insts"] = true;
169  Features["dot1-insts"] = true;
170  Features["dot2-insts"] = true;
171  LLVM_FALLTHROUGH;
172  case GK_GFX909:
173  case GK_GFX904:
174  case GK_GFX902:
175  case GK_GFX900:
176  Features["gfx9-insts"] = true;
177  LLVM_FALLTHROUGH;
178  case GK_GFX810:
179  case GK_GFX803:
180  case GK_GFX802:
181  case GK_GFX801:
182  Features["gfx8-insts"] = true;
183  Features["16-bit-insts"] = true;
184  Features["dpp"] = true;
185  Features["s-memrealtime"] = true;
186  LLVM_FALLTHROUGH;
187  case GK_GFX704:
188  case GK_GFX703:
189  case GK_GFX702:
190  case GK_GFX701:
191  case GK_GFX700:
192  Features["ci-insts"] = true;
193  Features["flat-address-space"] = true;
194  LLVM_FALLTHROUGH;
195  case GK_GFX601:
196  case GK_GFX600:
197  break;
198  case GK_NONE:
199  break;
200  default:
201  llvm_unreachable("Unhandled GPU!");
202  }
203  } else {
204  if (CPU.empty())
205  CPU = "r600";
206 
207  switch (llvm::AMDGPU::parseArchR600(CPU)) {
208  case GK_CAYMAN:
209  case GK_CYPRESS:
210  case GK_RV770:
211  case GK_RV670:
212  // TODO: Add fp64 when implemented.
213  break;
214  case GK_TURKS:
215  case GK_CAICOS:
216  case GK_BARTS:
217  case GK_SUMO:
218  case GK_REDWOOD:
219  case GK_JUNIPER:
220  case GK_CEDAR:
221  case GK_RV730:
222  case GK_RV710:
223  case GK_RS880:
224  case GK_R630:
225  case GK_R600:
226  break;
227  default:
228  llvm_unreachable("Unhandled GPU!");
229  }
230  }
231 
232  return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
233 }
234 
236  TargetOptions &TargetOpts) const {
237  bool hasFP32Denormals = false;
238  bool hasFP64Denormals = false;
239 
240  for (auto &I : TargetOpts.FeaturesAsWritten) {
241  if (I == "+fp32-denormals" || I == "-fp32-denormals")
242  hasFP32Denormals = true;
243  if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals")
244  hasFP64Denormals = true;
245  }
246  if (!hasFP32Denormals)
247  TargetOpts.Features.push_back(
248  (Twine(hasFastFMAF() && hasFullRateDenormalsF32() && !CGOpts.FlushDenorm
249  ? '+' : '-') + Twine("fp32-denormals"))
250  .str());
251  // Always do not flush fp64 or fp16 denorms.
252  if (!hasFP64Denormals && hasFP64())
253  TargetOpts.Features.push_back("+fp64-fp16-denormals");
254 }
255 
257  SmallVectorImpl<StringRef> &Values) const {
258  if (isAMDGCN(getTriple()))
259  llvm::AMDGPU::fillValidArchListAMDGCN(Values);
260  else
261  llvm::AMDGPU::fillValidArchListR600(Values);
262 }
263 
264 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
265  AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
266 }
267 
268 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
269  const TargetOptions &Opts)
270  : TargetInfo(Triple),
271  GPUKind(isAMDGCN(Triple) ?
272  llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
273  llvm::AMDGPU::parseArchR600(Opts.CPU)),
274  GPUFeatures(isAMDGCN(Triple) ?
275  llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
276  llvm::AMDGPU::getArchAttrR600(GPUKind)) {
279  assert(DataLayout->getAllocaAddrSpace() == Private);
280 
281  setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
282  !isAMDGCN(Triple));
284 
285  HasLegalHalfType = true;
286  HasFloat16 = true;
287 
288  // Set pointer width and alignment for target address space 0.
289  PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
290  if (getMaxPointerWidth() == 64) {
291  LongWidth = LongAlign = 64;
295  }
296 
298 }
299 
301  TargetInfo::adjust(Opts);
302  // ToDo: There are still a few places using default address space as private
303  // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
304  // can be removed from the following line.
305  setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
306  !isAMDGCN(getTriple()));
307 }
308 
310  return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
312 }
313 
315  MacroBuilder &Builder) const {
316  Builder.defineMacro("__AMD__");
317  Builder.defineMacro("__AMDGPU__");
318 
319  if (isAMDGCN(getTriple()))
320  Builder.defineMacro("__AMDGCN__");
321  else
322  Builder.defineMacro("__R600__");
323 
324  if (GPUKind != llvm::AMDGPU::GK_NONE) {
325  StringRef CanonName = isAMDGCN(getTriple()) ?
326  getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
327  Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
328  }
329 
330  // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
331  // removed in the near future.
332  if (hasFMAF())
333  Builder.defineMacro("__HAS_FMAF__");
334  if (hasFastFMAF())
335  Builder.defineMacro("FP_FAST_FMAF");
336  if (hasLDEXPF())
337  Builder.defineMacro("__HAS_LDEXPF__");
338  if (hasFP64())
339  Builder.defineMacro("__HAS_FP64__");
340  if (hasFastFMA())
341  Builder.defineMacro("FP_FAST_FMA");
342 }
343 
345  assert(HalfFormat == Aux->HalfFormat);
346  assert(FloatFormat == Aux->FloatFormat);
347  assert(DoubleFormat == Aux->DoubleFormat);
348 
349  // On x86_64 long double is 80-bit extended precision format, which is
350  // not supported by AMDGPU. 128-bit floating point format is also not
351  // supported by AMDGPU. Therefore keep its own format for these two types.
352  auto SaveLongDoubleFormat = LongDoubleFormat;
353  auto SaveFloat128Format = Float128Format;
354  copyAuxTarget(Aux);
355  LongDoubleFormat = SaveLongDoubleFormat;
356  Float128Format = SaveFloat128Format;
357 }
bool initFeatureMap(llvm::StringMap< bool > &Features, DiagnosticsEngine &Diags, StringRef CPU, const std::vector< std::string > &FeatureVec) const override
Initialize the map with the default set of target features for the CPU this should include all legal ...
Definition: AMDGPU.cpp:134
Defines the clang::MacroBuilder utility class.
virtual void adjust(LangOptions &Opts)
Set forced language options.
Definition: TargetInfo.cpp:330
const llvm::fltSemantics * FloatFormat
Definition: TargetInfo.h:103
Specialize PointerLikeTypeTraits to allow LazyGenerationalUpdatePtr to be placed into a PointerUnion...
Definition: Dominators.h:30
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
Definition: TargetInfo.h:994
Options for controlling the target.
Definition: TargetOptions.h:26
void getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const override
===-— Other target property query methods --------------------——===//
Definition: AMDGPU.cpp:314
void fillValidCPUList(SmallVectorImpl< StringRef > &Values) const override
Fill a SmallVectorImpl with the valid values to setCPU.
Definition: AMDGPU.cpp:256
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Definition: LangOptions.h:53
unsigned char MaxAtomicPromoteWidth
Definition: TargetInfo.h:180
const llvm::fltSemantics * HalfFormat
Definition: TargetInfo.h:103
const llvm::fltSemantics * Float128Format
Definition: TargetInfo.h:103
Concrete class used by the front-end to report problems and issues.
Definition: Diagnostic.h:149
ArrayRef< const char * > getGCCRegNames() const override
Definition: AMDGPU.cpp:130
void setAddressSpaceMap(bool DefaultIsPrivate)
Definition: AMDGPU.cpp:264
static const char *const GCCRegNames[]
Definition: X86.cpp:43
Exposes information about the current target.
Definition: TargetInfo.h:164
virtual bool initFeatureMap(llvm::StringMap< bool > &Features, DiagnosticsEngine &Diags, StringRef CPU, const std::vector< std::string > &FeatureVec) const
Initialize the map with the default set of target features for the CPU this should include all legal ...
Definition: TargetInfo.cpp:402
Defines the clang::LangOptions interface.
void adjustTargetOptions(const CodeGenOptions &CGOpts, TargetOptions &TargetOpts) const override
Adjust target options based on codegen options.
Definition: AMDGPU.cpp:235
void setAuxTarget(const TargetInfo *Aux) override
Definition: AMDGPU.cpp:344
const llvm::fltSemantics * LongDoubleFormat
Definition: TargetInfo.h:103
Enumerates target-specific builtins in their own namespaces within namespace clang.
std::vector< std::string > Features
The list of target specific features to enable or disable – this should be a list of strings startin...
Definition: TargetOptions.h:55
static const char *const DataLayoutStringR600
Definition: AMDGPU.cpp:31
std::vector< std::string > FeaturesAsWritten
The list of target specific features to enable or disable, as written on the command line...
Definition: TargetOptions.h:51
AMDGPUTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
Definition: AMDGPU.cpp:268
Dataflow Directional Tag Classes.
static const char *const DataLayoutStringAMDGCN
Definition: AMDGPU.cpp:35
unsigned[(unsigned) LangAS::FirstTargetAddressSpace] LangASMap
The type of a lookup table which maps from language-specific address spaces to target-specific ones...
Definition: AddressSpaces.h:58
ArrayRef< Builtin::Info > getTargetBuiltins() const override
Return information about target-specific builtins for the current primary target, and info about whic...
Definition: AMDGPU.cpp:309
CodeGenOptions - Track various options which control how the code is optimized and passed to the back...
void copyAuxTarget(const TargetInfo *Aux)
Copy type and layout related info.
Definition: TargetInfo.cpp:817
void adjust(LangOptions &Opts) override
Set forced language options.
Definition: AMDGPU.cpp:300
const llvm::fltSemantics * DoubleFormat
Definition: TargetInfo.h:103
unsigned char MaxAtomicInlineWidth
Definition: TargetInfo.h:180
void defineMacro(const Twine &Name, const Twine &Value="1")
Append a #define line for macro of the form "\#define Name Value\n".
Definition: MacroBuilder.h:29
std::unique_ptr< llvm::DataLayout > DataLayout
Definition: TargetInfo.h:182
void resetDataLayout(StringRef DL)
Definition: TargetInfo.cpp:140
Defines enum values for all the target-independent builtin functions.
uint64_t getMaxPointerWidth() const override
Return the maximum width of pointers on this target.
Definition: AMDGPU.h:102
bool UseAddrSpaceMapMangling
Specify if mangling based on address space map should be used or not for language specific address sp...
Definition: TargetInfo.h:268