14 #include "clang/Config/config.h" 21 #include "llvm/Option/ArgList.h" 22 #include "llvm/Support/Path.h" 23 #include <system_error> 28 using namespace clang;
34 if (!V.startswith(
"CUDA Version "))
36 V = V.substr(strlen(
"CUDA Version "));
37 int Major = -1, Minor = -1;
38 auto First = V.split(
'.');
39 auto Second = First.second.split(
'.');
40 if (First.first.getAsInteger(10, Major) ||
41 Second.first.getAsInteger(10, Minor))
44 if (Major == 7 && Minor == 0) {
49 if (Major == 7 && Minor == 5)
51 if (Major == 8 && Minor == 0)
53 if (Major == 9 && Minor == 0)
59 const Driver &D,
const llvm::Triple &HostTriple,
60 const llvm::opt::ArgList &Args)
65 std::initializer_list<const char *> Versions = {
"8.0",
"7.5",
"7.0"};
67 if (Args.hasArg(clang::driver::options::OPT_cuda_path_EQ)) {
68 CudaPathCandidates.push_back(
69 Args.getLastArgValue(clang::driver::options::OPT_cuda_path_EQ));
70 }
else if (HostTriple.isOSWindows()) {
71 for (
const char *Ver : Versions)
72 CudaPathCandidates.push_back(
73 D.
SysRoot +
"/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v" +
76 CudaPathCandidates.push_back(D.
SysRoot +
"/usr/local/cuda");
77 for (
const char *Ver : Versions)
78 CudaPathCandidates.push_back(D.
SysRoot +
"/usr/local/cuda-" + Ver);
83 CudaPathCandidates.push_back(D.
SysRoot +
"/usr/lib/cuda");
86 for (
const auto &CudaPath : CudaPathCandidates) {
90 InstallPath = CudaPath;
91 BinPath = CudaPath +
"/bin";
92 IncludePath = InstallPath +
"/include";
93 LibDevicePath = InstallPath +
"/nvvm/libdevice";
96 if (!(FS.exists(IncludePath) && FS.exists(BinPath)))
105 if (HostTriple.isArch64Bit() && FS.exists(InstallPath +
"/lib64"))
106 LibPath = InstallPath +
"/lib64";
107 else if (FS.exists(InstallPath +
"/lib"))
108 LibPath = InstallPath +
"/lib";
112 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile =
113 FS.getBufferForFile(InstallPath +
"/version.txt");
124 std::string FilePath = LibDevicePath +
"/libdevice.10.bc";
125 if (FS.exists(FilePath)) {
126 for (
const char *GpuArch :
127 {
"sm_20",
"sm_30",
"sm_32",
"sm_35",
"sm_50",
"sm_52",
"sm_53",
128 "sm_60",
"sm_61",
"sm_62",
"sm_70"})
129 LibDeviceMap[GpuArch] = FilePath;
133 for (llvm::sys::fs::directory_iterator LI(LibDevicePath, EC), LE;
134 !EC && LI != LE; LI = LI.increment(EC)) {
135 StringRef FilePath = LI->path();
136 StringRef FileName = llvm::sys::path::filename(FilePath);
139 const StringRef LibDeviceName =
"libdevice.";
140 if (!(FileName.startswith(LibDeviceName) && FileName.endswith(
".bc")))
142 StringRef GpuArch = FileName.slice(
143 LibDeviceName.size(), FileName.find(
'.', LibDeviceName.size()));
144 LibDeviceMap[GpuArch] = FilePath.str();
148 if (GpuArch ==
"compute_20") {
149 LibDeviceMap[
"sm_20"] = FilePath;
150 LibDeviceMap[
"sm_21"] = FilePath;
151 LibDeviceMap[
"sm_32"] = FilePath;
152 }
else if (GpuArch ==
"compute_30") {
153 LibDeviceMap[
"sm_30"] = FilePath;
155 LibDeviceMap[
"sm_50"] = FilePath;
156 LibDeviceMap[
"sm_52"] = FilePath;
157 LibDeviceMap[
"sm_53"] = FilePath;
159 LibDeviceMap[
"sm_60"] = FilePath;
160 LibDeviceMap[
"sm_61"] = FilePath;
161 LibDeviceMap[
"sm_62"] = FilePath;
162 }
else if (GpuArch ==
"compute_35") {
163 LibDeviceMap[
"sm_35"] = FilePath;
164 LibDeviceMap[
"sm_37"] = FilePath;
165 }
else if (GpuArch ==
"compute_50") {
167 LibDeviceMap[
"sm_50"] = FilePath;
168 LibDeviceMap[
"sm_52"] = FilePath;
169 LibDeviceMap[
"sm_53"] = FilePath;
177 if (LibDeviceMap.empty() && !Args.hasArg(options::OPT_nocudalib))
186 const ArgList &DriverArgs, ArgStringList &CC1Args)
const {
187 if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
191 llvm::sys::path::append(P,
"include");
192 llvm::sys::path::append(P,
"cuda_wrappers");
193 CC1Args.push_back(
"-internal-isystem");
194 CC1Args.push_back(DriverArgs.MakeArgString(P));
197 if (DriverArgs.hasArg(options::OPT_nocudainc))
201 D.
Diag(diag::err_drv_no_cuda_installation);
205 CC1Args.push_back(
"-internal-isystem");
207 CC1Args.push_back(
"-include");
208 CC1Args.push_back(
"__clang_cuda_runtime_wrapper.h");
214 ArchsWithBadVersion.count(Arch) > 0)
219 if (Version < MinVersion || Version > MaxVersion) {
220 ArchsWithBadVersion.insert(Arch);
221 D.
Diag(diag::err_drv_cuda_version_unsupported)
230 OS <<
"Found CUDA installation: " << InstallPath <<
", version " 238 const char *LinkingOutput)
const {
241 assert(TC.getTriple().isNVPTX() &&
"Wrong platform");
243 StringRef GPUArchName;
248 GPUArchName = Args.getLastArgValue(options::OPT_march_EQ);
249 assert(!GPUArchName.empty() &&
"Must have an architecture passed in.");
256 "Device action expected to have an architecture.");
259 if (!Args.hasArg(options::OPT_no_cuda_version_check)) {
260 TC.CudaInstallation.CheckCudaVersionSupportsArch(gpu_arch);
263 ArgStringList CmdArgs;
264 CmdArgs.push_back(TC.getTriple().isArch64Bit() ?
"-m64" :
"-m32");
265 if (Args.hasFlag(options::OPT_cuda_noopt_device_debug,
266 options::OPT_no_cuda_noopt_device_debug,
false)) {
269 CmdArgs.push_back(
"-g");
270 CmdArgs.push_back(
"--dont-merge-basicblocks");
271 CmdArgs.push_back(
"--return-at-end");
272 }
else if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
280 StringRef OOpt =
"3";
281 if (A->getOption().matches(options::OPT_O4) ||
282 A->getOption().matches(options::OPT_Ofast))
284 else if (A->getOption().matches(options::OPT_O0))
286 else if (A->getOption().matches(options::OPT_O)) {
288 OOpt = llvm::StringSwitch<const char *>(A->getValue())
296 CmdArgs.push_back(Args.MakeArgString(llvm::Twine(
"-O") + OOpt));
300 CmdArgs.push_back(
"-O0");
304 if (Args.hasArg(options::OPT_v))
305 CmdArgs.push_back(
"-v");
307 CmdArgs.push_back(
"--gpu-name");
309 CmdArgs.push_back(
"--output-file");
310 CmdArgs.push_back(Args.MakeArgString(TC.getInputFilename(Output)));
311 for (
const auto& II : Inputs)
312 CmdArgs.push_back(Args.MakeArgString(II.getFilename()));
314 for (
const auto& A : Args.getAllArgValues(options::OPT_Xcuda_ptxas))
315 CmdArgs.push_back(Args.MakeArgString(A));
319 Args.hasFlag(options::OPT_fopenmp_relocatable_target,
320 options::OPT_fnoopenmp_relocatable_target,
322 CmdArgs.push_back(
"-c");
325 if (Arg *A = Args.getLastArg(options::OPT_ptxas_path_EQ))
326 Exec = A->getValue();
328 Exec = Args.MakeArgString(TC.GetProgramPath(
"ptxas"));
329 C.
addCommand(llvm::make_unique<Command>(JA, *
this, Exec, CmdArgs, Inputs));
339 const char *LinkingOutput)
const {
342 assert(TC.getTriple().isNVPTX() &&
"Wrong platform");
344 ArgStringList CmdArgs;
345 CmdArgs.push_back(
"--cuda");
346 CmdArgs.push_back(TC.getTriple().isArch64Bit() ?
"-64" :
"-32");
347 CmdArgs.push_back(Args.MakeArgString(
"--create"));
348 CmdArgs.push_back(Args.MakeArgString(Output.
getFilename()));
350 for (
const auto& II : Inputs) {
351 auto *A = II.getAction();
352 assert(A->getInputs().size() == 1 &&
353 "Device offload action is expected to have a single input");
354 const char *gpu_arch_str = A->getOffloadingArch();
355 assert(gpu_arch_str &&
356 "Device action expected to have associated a GPU architecture!");
362 (II.getType() == types::TY_PP_Asm)
365 CmdArgs.push_back(Args.MakeArgString(llvm::Twine(
"--image=profile=") +
366 Arch +
",file=" + II.getFilename()));
369 for (
const auto& A : Args.getAllArgValues(options::OPT_Xcuda_fatbinary))
370 CmdArgs.push_back(Args.MakeArgString(A));
372 const char *Exec = Args.MakeArgString(TC.GetProgramPath(
"fatbinary"));
373 C.
addCommand(llvm::make_unique<Command>(JA, *
this, Exec, CmdArgs, Inputs));
380 const char *LinkingOutput)
const {
383 assert(TC.getTriple().isNVPTX() &&
"Wrong platform");
385 ArgStringList CmdArgs;
390 "CUDA toolchain not expected for an OpenMP host device.");
393 CmdArgs.push_back(
"-o");
396 assert(Output.
isNothing() &&
"Invalid output.");
397 if (Args.hasArg(options::OPT_g_Flag))
398 CmdArgs.push_back(
"-g");
400 if (Args.hasArg(options::OPT_v))
401 CmdArgs.push_back(
"-v");
404 Args.getLastArgValue(options::OPT_march_EQ);
405 assert(!GPUArch.empty() &&
"At least one GPU Arch required for ptxas.");
407 CmdArgs.push_back(
"-arch");
408 CmdArgs.push_back(Args.MakeArgString(GPUArch));
415 llvm::sys::path::parent_path(TC.getDriver().Dir);
416 llvm::sys::path::append(DefaultLibPath,
"lib" CLANG_LIBDIR_SUFFIX);
417 CmdArgs.push_back(Args.MakeArgString(Twine(
"-L") + DefaultLibPath));
420 CmdArgs.push_back(
"-lomptarget-nvptx");
422 for (
const auto &II : Inputs) {
423 if (II.getType() == types::TY_LLVM_IR ||
424 II.getType() == types::TY_LTO_IR ||
425 II.getType() == types::TY_LTO_BC ||
426 II.getType() == types::TY_LLVM_BC) {
428 << getToolChain().getTripleString();
434 if (!II.isFilename())
438 C.
getArgs().MakeArgString(getToolChain().getInputFilename(II)));
440 CmdArgs.push_back(CubinF);
446 Args.MakeArgString(getToolChain().GetProgramPath(
"nvlink"));
447 C.
addCommand(llvm::make_unique<Command>(JA, *
this, Exec, CmdArgs, Inputs));
454 CudaToolChain::CudaToolChain(
const Driver &D,
const llvm::Triple &Triple,
455 const ToolChain &HostTC,
const ArgList &Args,
457 :
ToolChain(D, Triple, Args), HostTC(HostTC),
458 CudaInstallation(D, HostTC.getTriple(), Args), OK(OK) {
476 llvm::sys::path::replace_extension(Filename,
"cubin");
477 return Filename.str();
481 const llvm::opt::ArgList &DriverArgs,
482 llvm::opt::ArgStringList &CC1Args,
486 StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
487 assert(!GpuArch.empty() &&
"Must have an explicit GPU arch.");
490 "Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs.");
493 CC1Args.push_back(
"-fcuda-is-device");
495 if (DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
496 options::OPT_fno_cuda_flush_denormals_to_zero,
false))
497 CC1Args.push_back(
"-fcuda-flush-denormals-to-zero");
499 if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
500 options::OPT_fno_cuda_approx_transcendentals,
false))
501 CC1Args.push_back(
"-fcuda-approx-transcendentals");
504 if (DriverArgs.hasArg(options::OPT_nocudalib))
509 if (LibDeviceFile.empty()) {
511 DriverArgs.hasArg(options::OPT_S))
514 getDriver().
Diag(diag::err_drv_no_cuda_libdevice) << GpuArch;
518 CC1Args.push_back(
"-mlink-cuda-bitcode");
519 CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
523 CC1Args.push_back(
"-target-feature");
524 CC1Args.push_back(
"+ptx60");
529 CC1Args.push_back(
"-target-feature");
530 CC1Args.push_back(
"+ptx42");
535 ArgStringList &CC1Args)
const {
537 if (!DriverArgs.hasArg(options::OPT_nocudainc) &&
538 !DriverArgs.hasArg(options::OPT_no_cuda_version_check)) {
539 StringRef Arch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
540 assert(!Arch.empty() &&
"Must have an explicit GPU arch.");
546 llvm::opt::DerivedArgList *
550 DerivedArgList *DAL =
553 DAL =
new DerivedArgList(Args.getBaseArgs());
561 for (Arg *A : Args) {
562 bool IsDuplicate =
false;
563 for (Arg *DALArg : *DAL) {
573 StringRef Arch = DAL->getLastArgValue(options::OPT_march_EQ);
575 DAL->AddJoinedArg(
nullptr, Opts.getOption(options::OPT_march_EQ),
576 CLANG_OPENMP_NVPTX_DEFAULT_ARCH);
581 for (Arg *A : Args) {
582 if (A->getOption().matches(options::OPT_Xarch__)) {
584 if (BoundArch.empty() || A->getValue(0) != BoundArch)
587 unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(1));
588 unsigned Prev = Index;
589 std::unique_ptr<Arg> XarchArg(Opts.ParseOneArg(Args, Index));
599 if (!XarchArg || Index > Prev + 1) {
600 getDriver().
Diag(diag::err_drv_invalid_Xarch_argument_with_args)
601 << A->getAsString(Args);
604 getDriver().
Diag(diag::err_drv_invalid_Xarch_argument_isdriver)
605 << A->getAsString(Args);
608 XarchArg->setBaseArg(A);
609 A = XarchArg.release();
610 DAL->AddSynthesizedArg(A);
615 if (!BoundArch.empty()) {
616 DAL->eraseArg(options::OPT_march_EQ);
617 DAL->AddJoinedArg(
nullptr, Opts.getOption(options::OPT_march_EQ), BoundArch);
642 ArgStringList &CC1Args)
const {
647 ArgStringList &CC1Args)
const {
652 ArgStringList &CC1Args)
const {
670 const ArgList &Args)
const {
Represents a version number in the form major[.minor[.subminor[.build]]].
const char * CudaArchToString(CudaArch A)
bool isHostOffloading(OffloadKind OKind) const
Check if this action have any offload kinds.
DiagnosticBuilder Diag(unsigned DiagID) const
CudaArch StringToCudaArch(llvm::StringRef S)
CudaInstallationDetector(const Driver &D, const llvm::Triple &HostTriple, const llvm::opt::ArgList &Args)
void print(raw_ostream &OS) const
Print information about the detected CUDA installation.
Distro - Helper class for detecting and classifying Linux distributions.
bool isOffloading(OffloadKind OKind) const
void CheckCudaVersionSupportsArch(CudaArch Arch) const
Emit an error if Version does not support the given Arch.
bool isDeviceOffloading(OffloadKind OKind) const
std::string getLibDeviceFile(StringRef Gpu) const
Get libdevice file for given architecture.
Driver - Encapsulate logic for constructing compilation processes from a set of gcc-driver-like comma...
const char * CudaVersionToString(CudaVersion V)
void addCommand(std::unique_ptr< Command > C)
const char * CudaVirtualArchToString(CudaVirtualArch A)
void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const
CudaVersion version() const
Get the detected Cuda install's version.
const llvm::opt::DerivedArgList & getArgs() const
CudaVersion MaxVersionForCudaArch(CudaArch A)
Get the latest CudaVersion that supports the given CudaArch.
vfs::FileSystem & getVFS() const
bool isValid() const
Check whether we detected a valid Cuda install.
StringRef getIncludePath() const
Get the detected Cuda Include path.
CudaVersion MinVersionForCudaArch(CudaArch A)
Get the earliest CudaVersion that supports the given CudaArch.
Dataflow Directional Tag Classes.
std::string SysRoot
sysroot, if present
Defines the virtual file system interface vfs::FileSystem.
CudaVirtualArch VirtualArchForCudaArch(CudaArch A)
Get the compute_xx corresponding to an sm_yy.
bool exists(const Twine &Path)
Check whether a file exists. Provided for convenience.
Compilation - A set of tasks to perform for a single driver invocation.
const Driver & getDriver() const
const llvm::opt::OptTable & getOpts() const
const char * addTempFile(const char *Name)
addTempFile - Add a file to remove on exit, and returns its argument.
StringRef getBinPath() const
Get the detected path to Cuda's bin directory.
const char * getOffloadingArch() const
std::string ResourceDir
The path to the compiler resource directory.