13 #include "clang/Config/config.h" 19 #include "llvm/Option/ArgList.h" 20 #include "llvm/Support/FileSystem.h" 21 #include "llvm/Support/Path.h" 22 #include "llvm/Support/Process.h" 23 #include "llvm/Support/Program.h" 24 #include "llvm/Support/VirtualFileSystem.h" 25 #include <system_error> 30 using namespace clang;
36 if (!V.startswith(
"CUDA Version "))
38 V = V.substr(strlen(
"CUDA Version "));
40 V.split(VersionParts,
'.');
41 if (VersionParts.size() < 2)
43 std::string MajorMinor = join_items(
".", VersionParts[0], VersionParts[1]);
50 D.
Diag(diag::warn_drv_unknown_cuda_version)
56 const Driver &D,
const llvm::Triple &HostTriple,
57 const llvm::opt::ArgList &Args)
63 Candidate(std::string Path,
bool StrictChecking =
false)
64 : Path(Path), StrictChecking(StrictChecking) {}
69 std::initializer_list<const char *> Versions = {
"8.0",
"7.5",
"7.0"};
71 if (Args.hasArg(clang::driver::options::OPT_cuda_path_EQ)) {
72 Candidates.emplace_back(
73 Args.getLastArgValue(clang::driver::options::OPT_cuda_path_EQ).str());
74 }
else if (HostTriple.isOSWindows()) {
75 for (
const char *Ver : Versions)
76 Candidates.emplace_back(
77 D.
SysRoot +
"/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v" +
80 if (!Args.hasArg(clang::driver::options::OPT_cuda_path_ignore_env)) {
89 if (llvm::ErrorOr<std::string> ptxas =
90 llvm::sys::findProgramByName(
"ptxas")) {
92 llvm::sys::fs::real_path(*ptxas, ptxasAbsolutePath);
94 StringRef ptxasDir = llvm::sys::path::parent_path(ptxasAbsolutePath);
95 if (llvm::sys::path::filename(ptxasDir) ==
"bin")
96 Candidates.emplace_back(llvm::sys::path::parent_path(ptxasDir),
101 Candidates.emplace_back(D.
SysRoot +
"/usr/local/cuda");
102 for (
const char *Ver : Versions)
103 Candidates.emplace_back(D.
SysRoot +
"/usr/local/cuda-" + Ver);
105 Distro Dist(D.
getVFS(), llvm::Triple(llvm::sys::getProcessTriple()));
106 if (Dist.IsDebian() || Dist.IsUbuntu())
109 Candidates.emplace_back(D.
SysRoot +
"/usr/lib/cuda");
112 bool NoCudaLib = Args.hasArg(options::OPT_nogpulib);
114 for (
const auto &Candidate : Candidates) {
115 InstallPath = Candidate.Path;
116 if (InstallPath.empty() || !D.
getVFS().exists(InstallPath))
119 BinPath = InstallPath +
"/bin";
120 IncludePath = InstallPath +
"/include";
121 LibDevicePath = InstallPath +
"/nvvm/libdevice";
124 if (!(FS.exists(IncludePath) && FS.exists(BinPath)))
126 bool CheckLibDevice = (!NoCudaLib || Candidate.StrictChecking);
127 if (CheckLibDevice && !FS.exists(LibDevicePath))
136 if (HostTriple.isArch64Bit() && FS.exists(InstallPath +
"/lib64"))
137 LibPath = InstallPath +
"/lib64";
138 else if (FS.exists(InstallPath +
"/lib"))
139 LibPath = InstallPath +
"/lib";
143 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile =
144 FS.getBufferForFile(InstallPath +
"/version.txt");
155 std::string FilePath = LibDevicePath +
"/libdevice.10.bc";
156 if (FS.exists(FilePath)) {
157 for (
const char *GpuArchName :
158 {
"sm_30",
"sm_32",
"sm_35",
"sm_37",
"sm_50",
"sm_52",
"sm_53",
159 "sm_60",
"sm_61",
"sm_62",
"sm_70",
"sm_72",
"sm_75"}) {
163 LibDeviceMap[GpuArchName] = FilePath;
168 for (llvm::sys::fs::directory_iterator LI(LibDevicePath, EC),
LE;
169 !EC && LI !=
LE; LI = LI.increment(EC)) {
170 StringRef FilePath = LI->path();
171 StringRef FileName = llvm::sys::path::filename(FilePath);
174 const StringRef LibDeviceName =
"libdevice.";
175 if (!(FileName.startswith(LibDeviceName) && FileName.endswith(
".bc")))
177 StringRef GpuArch = FileName.slice(
178 LibDeviceName.size(), FileName.find(
'.', LibDeviceName.size()));
179 LibDeviceMap[GpuArch] = FilePath.str();
183 if (GpuArch ==
"compute_20") {
184 LibDeviceMap[
"sm_20"] = FilePath;
185 LibDeviceMap[
"sm_21"] = FilePath;
186 LibDeviceMap[
"sm_32"] = FilePath;
187 }
else if (GpuArch ==
"compute_30") {
188 LibDeviceMap[
"sm_30"] = FilePath;
190 LibDeviceMap[
"sm_50"] = FilePath;
191 LibDeviceMap[
"sm_52"] = FilePath;
192 LibDeviceMap[
"sm_53"] = FilePath;
194 LibDeviceMap[
"sm_60"] = FilePath;
195 LibDeviceMap[
"sm_61"] = FilePath;
196 LibDeviceMap[
"sm_62"] = FilePath;
197 }
else if (GpuArch ==
"compute_35") {
198 LibDeviceMap[
"sm_35"] = FilePath;
199 LibDeviceMap[
"sm_37"] = FilePath;
200 }
else if (GpuArch ==
"compute_50") {
202 LibDeviceMap[
"sm_50"] = FilePath;
203 LibDeviceMap[
"sm_52"] = FilePath;
204 LibDeviceMap[
"sm_53"] = FilePath;
212 if (LibDeviceMap.empty() && !NoCudaLib)
221 const ArgList &DriverArgs, ArgStringList &CC1Args)
const {
222 if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
226 llvm::sys::path::append(P,
"include");
227 llvm::sys::path::append(P,
"cuda_wrappers");
228 CC1Args.push_back(
"-internal-isystem");
229 CC1Args.push_back(DriverArgs.MakeArgString(P));
232 if (DriverArgs.hasArg(options::OPT_nocudainc))
236 D.Diag(diag::err_drv_no_cuda_installation);
240 CC1Args.push_back(
"-internal-isystem");
242 CC1Args.push_back(
"-include");
243 CC1Args.push_back(
"__clang_cuda_runtime_wrapper.h");
249 ArchsWithBadVersion.count(Arch) > 0)
254 if (Version < MinVersion || Version > MaxVersion) {
255 ArchsWithBadVersion.insert(Arch);
256 D.Diag(diag::err_drv_cuda_version_unsupported)
265 OS <<
"Found CUDA installation: " << InstallPath <<
", version " 278 EmitSameDebugInfoAsHost,
292 const Arg *A = Args.getLastArg(options::OPT_O_Group);
293 bool IsDebugEnabled = !A || A->getOption().matches(options::OPT_O0) ||
294 Args.hasFlag(options::OPT_cuda_noopt_device_debug,
295 options::OPT_no_cuda_noopt_device_debug,
297 if (
const Arg *A = Args.getLastArg(options::OPT_g_Group)) {
298 const Option &Opt = A->getOption();
299 if (Opt.matches(options::OPT_gN_Group)) {
300 if (Opt.matches(options::OPT_g0) || Opt.matches(options::OPT_ggdb0))
301 return DisableDebugInfo;
302 if (Opt.matches(options::OPT_gline_directives_only))
307 return DisableDebugInfo;
314 const char *LinkingOutput)
const {
317 assert(TC.getTriple().isNVPTX() &&
"Wrong platform");
319 StringRef GPUArchName;
324 GPUArchName = Args.getLastArgValue(options::OPT_march_EQ);
325 assert(!GPUArchName.empty() &&
"Must have an architecture passed in.");
332 "Device action expected to have an architecture.");
335 if (!Args.hasArg(options::OPT_no_cuda_version_check)) {
336 TC.CudaInstallation.CheckCudaVersionSupportsArch(gpu_arch);
339 ArgStringList CmdArgs;
340 CmdArgs.push_back(TC.getTriple().isArch64Bit() ?
"-m64" :
"-m32");
342 if (DIKind == EmitSameDebugInfoAsHost) {
345 CmdArgs.push_back(
"-g");
346 CmdArgs.push_back(
"--dont-merge-basicblocks");
347 CmdArgs.push_back(
"--return-at-end");
348 }
else if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
356 StringRef OOpt =
"3";
357 if (A->getOption().matches(options::OPT_O4) ||
358 A->getOption().matches(options::OPT_Ofast))
360 else if (A->getOption().matches(options::OPT_O0))
362 else if (A->getOption().matches(options::OPT_O)) {
364 OOpt = llvm::StringSwitch<const char *>(A->getValue())
372 CmdArgs.push_back(Args.MakeArgString(llvm::Twine(
"-O") + OOpt));
376 CmdArgs.push_back(
"-O0");
379 CmdArgs.push_back(
"-lineinfo");
382 if (Args.hasArg(options::OPT_v))
383 CmdArgs.push_back(
"-v");
385 CmdArgs.push_back(
"--gpu-name");
387 CmdArgs.push_back(
"--output-file");
388 CmdArgs.push_back(Args.MakeArgString(TC.getInputFilename(Output)));
389 for (
const auto& II : Inputs)
390 CmdArgs.push_back(Args.MakeArgString(II.getFilename()));
392 for (
const auto& A : Args.getAllArgValues(options::OPT_Xcuda_ptxas))
393 CmdArgs.push_back(Args.MakeArgString(A));
395 bool Relocatable =
false;
398 Relocatable = Args.hasFlag(options::OPT_fopenmp_relocatable_target,
399 options::OPT_fnoopenmp_relocatable_target,
402 Relocatable = Args.hasFlag(options::OPT_fgpu_rdc,
403 options::OPT_fno_gpu_rdc,
false);
406 CmdArgs.push_back(
"-c");
409 if (Arg *A = Args.getLastArg(options::OPT_ptxas_path_EQ))
410 Exec = A->getValue();
412 Exec = Args.MakeArgString(TC.GetProgramPath(
"ptxas"));
413 C.
addCommand(std::make_unique<Command>(JA, *
this, Exec, CmdArgs, Inputs));
417 bool includePTX =
true;
418 for (Arg *A : Args) {
419 if (!(A->getOption().matches(options::OPT_cuda_include_ptx_EQ) ||
420 A->getOption().matches(options::OPT_no_cuda_include_ptx_EQ)))
423 const StringRef ArchStr = A->getValue();
424 if (ArchStr ==
"all" || ArchStr == gpu_arch) {
425 includePTX = A->getOption().matches(options::OPT_cuda_include_ptx_EQ);
439 const char *LinkingOutput)
const {
442 assert(TC.getTriple().isNVPTX() &&
"Wrong platform");
444 ArgStringList CmdArgs;
446 CmdArgs.push_back(
"--cuda");
447 CmdArgs.push_back(TC.getTriple().isArch64Bit() ?
"-64" :
"-32");
448 CmdArgs.push_back(Args.MakeArgString(
"--create"));
449 CmdArgs.push_back(Args.MakeArgString(Output.
getFilename()));
451 CmdArgs.push_back(
"-g");
453 for (
const auto& II : Inputs) {
454 auto *A = II.getAction();
455 assert(A->getInputs().size() == 1 &&
456 "Device offload action is expected to have a single input");
457 const char *gpu_arch_str = A->getOffloadingArch();
458 assert(gpu_arch_str &&
459 "Device action expected to have associated a GPU architecture!");
462 if (II.getType() == types::TY_PP_Asm &&
468 (II.getType() == types::TY_PP_Asm)
471 CmdArgs.push_back(Args.MakeArgString(llvm::Twine(
"--image=profile=") +
472 Arch +
",file=" + II.getFilename()));
475 for (
const auto& A : Args.getAllArgValues(options::OPT_Xcuda_fatbinary))
476 CmdArgs.push_back(Args.MakeArgString(A));
478 const char *Exec = Args.MakeArgString(TC.GetProgramPath(
"fatbinary"));
479 C.
addCommand(std::make_unique<Command>(JA, *
this, Exec, CmdArgs, Inputs));
486 const char *LinkingOutput)
const {
489 assert(TC.getTriple().isNVPTX() &&
"Wrong platform");
491 ArgStringList CmdArgs;
496 "CUDA toolchain not expected for an OpenMP host device.");
499 CmdArgs.push_back(
"-o");
502 assert(Output.
isNothing() &&
"Invalid output.");
504 CmdArgs.push_back(
"-g");
506 if (Args.hasArg(options::OPT_v))
507 CmdArgs.push_back(
"-v");
510 Args.getLastArgValue(options::OPT_march_EQ);
511 assert(!GPUArch.empty() &&
"At least one GPU Arch required for ptxas.");
513 CmdArgs.push_back(
"-arch");
514 CmdArgs.push_back(Args.MakeArgString(GPUArch));
518 if (
const Arg *A = Args.getLastArg(options::OPT_libomptarget_nvptx_path_EQ))
519 CmdArgs.push_back(Args.MakeArgString(Twine(
"-L") + A->getValue()));
526 llvm::sys::path::parent_path(TC.getDriver().Dir);
527 llvm::sys::path::append(DefaultLibPath,
"lib" CLANG_LIBDIR_SUFFIX);
528 CmdArgs.push_back(Args.MakeArgString(Twine(
"-L") + DefaultLibPath));
531 CmdArgs.push_back(
"-lomptarget-nvptx");
533 for (
const auto &II : Inputs) {
534 if (II.getType() == types::TY_LLVM_IR ||
535 II.getType() == types::TY_LTO_IR ||
536 II.getType() == types::TY_LTO_BC ||
537 II.getType() == types::TY_LLVM_BC) {
539 << getToolChain().getTripleString();
545 if (!II.isFilename())
549 C.
getArgs().MakeArgString(getToolChain().getInputFilename(II)));
551 CmdArgs.push_back(CubinF);
555 Args.MakeArgString(getToolChain().GetProgramPath(
"nvlink"));
556 C.
addCommand(std::make_unique<Command>(JA, *
this, Exec, CmdArgs, Inputs));
563 CudaToolChain::CudaToolChain(
const Driver &D,
const llvm::Triple &Triple,
564 const ToolChain &HostTC,
const ArgList &Args,
566 :
ToolChain(D, Triple, Args), HostTC(HostTC),
567 CudaInstallation(D, HostTC.getTriple(), Args), OK(OK) {
585 llvm::sys::path::replace_extension(Filename,
"cubin");
586 return Filename.str();
590 const llvm::opt::ArgList &DriverArgs,
591 llvm::opt::ArgStringList &CC1Args,
595 StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
596 assert(!GpuArch.empty() &&
"Must have an explicit GPU arch.");
599 "Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs.");
602 CC1Args.push_back(
"-fcuda-is-device");
604 if (DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
605 options::OPT_fno_cuda_flush_denormals_to_zero,
false))
606 CC1Args.push_back(
"-fcuda-flush-denormals-to-zero");
608 if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
609 options::OPT_fno_cuda_approx_transcendentals,
false))
610 CC1Args.push_back(
"-fcuda-approx-transcendentals");
612 if (DriverArgs.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
614 CC1Args.push_back(
"-fgpu-rdc");
617 if (DriverArgs.hasArg(options::OPT_nogpulib))
622 if (LibDeviceFile.empty()) {
624 DriverArgs.hasArg(options::OPT_S))
627 getDriver().
Diag(diag::err_drv_no_cuda_libdevice) << GpuArch;
631 CC1Args.push_back(
"-mlink-builtin-bitcode");
632 CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
637 const char *PtxFeature =
nullptr;
640 PtxFeature =
"+ptx64";
643 PtxFeature =
"+ptx63";
646 PtxFeature =
"+ptx61";
649 PtxFeature =
"+ptx61";
652 PtxFeature =
"+ptx60";
655 PtxFeature =
"+ptx42";
657 CC1Args.append({
"-target-feature", PtxFeature});
658 if (DriverArgs.hasFlag(options::OPT_fcuda_short_ptr,
659 options::OPT_fno_cuda_short_ptr,
false))
660 CC1Args.append({
"-mllvm",
"--nvptx-short-ptr"});
663 CC1Args.push_back(DriverArgs.MakeArgString(
664 Twine(
"-target-sdk-version=") +
669 if (
const Arg *A = DriverArgs.getLastArg(options::OPT_libomptarget_nvptx_path_EQ))
670 LibraryPaths.push_back(A->getValue());
674 llvm::sys::Process::GetEnv(
"LIBRARY_PATH");
677 const char EnvPathSeparatorStr[] = {llvm::sys::EnvPathSeparator,
'\0'};
678 llvm::SplitString(*LibPath, Frags, EnvPathSeparatorStr);
679 for (StringRef Path : Frags)
680 LibraryPaths.emplace_back(Path.trim());
685 llvm::sys::path::parent_path(
getDriver().Dir);
686 llvm::sys::path::append(DefaultLibPath, Twine(
"lib") + CLANG_LIBDIR_SUFFIX);
687 LibraryPaths.emplace_back(DefaultLibPath.c_str());
689 std::string LibOmpTargetName =
690 "libomptarget-nvptx-" + GpuArch.str() +
".bc";
691 bool FoundBCLibrary =
false;
692 for (StringRef LibraryPath : LibraryPaths) {
694 llvm::sys::path::append(LibOmpTargetFile, LibOmpTargetName);
695 if (llvm::sys::fs::exists(LibOmpTargetFile)) {
696 CC1Args.push_back(
"-mlink-builtin-bitcode");
697 CC1Args.push_back(DriverArgs.MakeArgString(LibOmpTargetFile));
698 FoundBCLibrary =
true;
703 getDriver().
Diag(diag::warn_drv_omp_offload_target_missingbcruntime)
709 const Option &O = A->getOption();
710 return (O.matches(options::OPT_gN_Group) &&
711 !O.matches(options::OPT_gmodules)) ||
712 O.matches(options::OPT_g_Flag) ||
713 O.matches(options::OPT_ggdbN_Group) || O.matches(options::OPT_ggdb) ||
714 O.matches(options::OPT_gdwarf) || O.matches(options::OPT_gdwarf_2) ||
715 O.matches(options::OPT_gdwarf_3) || O.matches(options::OPT_gdwarf_4) ||
716 O.matches(options::OPT_gdwarf_5) ||
717 O.matches(options::OPT_gcolumn_info);
723 case DisableDebugInfo:
729 case EmitSameDebugInfoAsHost:
736 ArgStringList &CC1Args)
const {
738 if (!DriverArgs.hasArg(options::OPT_nocudainc) &&
739 !DriverArgs.hasArg(options::OPT_no_cuda_version_check)) {
740 StringRef Arch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
741 assert(!Arch.empty() &&
"Must have an explicit GPU arch.");
747 llvm::opt::DerivedArgList *
751 DerivedArgList *DAL =
754 DAL =
new DerivedArgList(Args.getBaseArgs());
762 for (Arg *A : Args) {
763 bool IsDuplicate =
false;
764 for (Arg *DALArg : *DAL) {
774 StringRef Arch = DAL->getLastArgValue(options::OPT_march_EQ);
776 DAL->AddJoinedArg(
nullptr, Opts.getOption(options::OPT_march_EQ),
777 CLANG_OPENMP_NVPTX_DEFAULT_ARCH);
782 for (Arg *A : Args) {
783 if (A->getOption().matches(options::OPT_Xarch__)) {
785 if (BoundArch.empty() || A->getValue(0) != BoundArch)
788 unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(1));
789 unsigned Prev = Index;
790 std::unique_ptr<Arg> XarchArg(Opts.ParseOneArg(Args, Index));
800 if (!XarchArg || Index > Prev + 1) {
801 getDriver().
Diag(diag::err_drv_invalid_Xarch_argument_with_args)
802 << A->getAsString(Args);
805 getDriver().
Diag(diag::err_drv_invalid_Xarch_argument_isdriver)
806 << A->getAsString(Args);
809 XarchArg->setBaseArg(A);
810 A = XarchArg.release();
811 DAL->AddSynthesizedArg(A);
816 if (!BoundArch.empty()) {
817 DAL->eraseArg(options::OPT_march_EQ);
818 DAL->AddJoinedArg(
nullptr, Opts.getOption(options::OPT_march_EQ), BoundArch);
843 ArgStringList &CC1Args)
const {
848 ArgStringList &CC1Args)
const {
853 ArgStringList &CC1Args)
const {
871 const ArgList &Args)
const {
CudaVersion CudaStringToVersion(const llvm::Twine &S)
const char * CudaArchToString(CudaArch A)
bool isHostOffloading(OffloadKind OKind) const
Check if this action have any offload kinds.
DiagnosticBuilder Diag(unsigned DiagID) const
CudaArch StringToCudaArch(llvm::StringRef S)
Don't generate debug info.
CudaInstallationDetector(const Driver &D, const llvm::Triple &HostTriple, const llvm::opt::ArgList &Args)
void print(raw_ostream &OS) const
Print information about the detected CUDA installation.
Distro - Helper class for detecting and classifying Linux distributions.
bool isOffloading(OffloadKind OKind) const
void CheckCudaVersionSupportsArch(CudaArch Arch) const
Emit an error if Version does not support the given Arch.
Emit only debug directives with the line numbers data.
bool isDeviceOffloading(OffloadKind OKind) const
std::string getLibDeviceFile(StringRef Gpu) const
Get libdevice file for given architecture.
Driver - Encapsulate logic for constructing compilation processes from a set of gcc-driver-like comma...
llvm::vfs::FileSystem & getVFS() const
const char * CudaVersionToString(CudaVersion V)
void addCommand(std::unique_ptr< Command > C)
const char * CudaVirtualArchToString(CudaVirtualArch A)
void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const
CudaVersion version() const
Get the detected Cuda install's version.
const llvm::opt::DerivedArgList & getArgs() const
CudaVersion MaxVersionForCudaArch(CudaArch A)
Get the latest CudaVersion that supports the given CudaArch.
bool isValid() const
Check whether we detected a valid Cuda install.
StringRef getIncludePath() const
Get the detected Cuda Include path.
CudaVersion MinVersionForCudaArch(CudaArch A)
Get the earliest CudaVersion that supports the given CudaArch.
Dataflow Directional Tag Classes.
std::string SysRoot
sysroot, if present
CudaVirtualArch VirtualArchForCudaArch(CudaArch A)
Get the compute_xx corresponding to an sm_yy.
Compilation - A set of tasks to perform for a single driver invocation.
const Driver & getDriver() const
bool LE(InterpState &S, CodePtr OpPC)
const llvm::opt::OptTable & getOpts() const
const char * addTempFile(const char *Name)
addTempFile - Add a file to remove on exit, and returns its argument.
StringRef getBinPath() const
Get the detected path to Cuda's bin directory.
const char * getOffloadingArch() const