[PATCH] D122089: [CUDA] Add getTargetFeatures for the NVPTX toolchain
zixuan-wu added inline comments. Comment at: clang/lib/Driver/ToolChains/Cuda.cpp:648 +#define CASE_CUDA_VERSION(CUDA_VER, PTX_VER) \ + case CudaVersion::CUDA_##CUDA_VER: \ +PtxFeature = "+ptx" #PTX_VER; \ jhuber6 wrote: > zixuan-wu wrote: > > It's same name as `CudaVersion` variable above, and it cause compile error. > > @jhuber6 > > Maybe the error depends on host compiler version so that it does not report > > immediately. > Fixed in rGd91223274145. Thanks for pointing that out, it worked fined > locally. Thank you for your quick action. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D122089/new/ https://reviews.llvm.org/D122089 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D122089: [CUDA] Add getTargetFeatures for the NVPTX toolchain
jhuber6 added inline comments. Comment at: clang/lib/Driver/ToolChains/Cuda.cpp:648 +#define CASE_CUDA_VERSION(CUDA_VER, PTX_VER) \ + case CudaVersion::CUDA_##CUDA_VER: \ +PtxFeature = "+ptx" #PTX_VER; \ zixuan-wu wrote: > It's same name as `CudaVersion` variable above, and it cause compile error. > @jhuber6 > Maybe the error depends on host compiler version so that it does not report > immediately. Fixed in rGd91223274145. Thanks for pointing that out, it worked fined locally. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D122089/new/ https://reviews.llvm.org/D122089 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D122089: [CUDA] Add getTargetFeatures for the NVPTX toolchain
zixuan-wu added inline comments. Comment at: clang/lib/Driver/ToolChains/Cuda.cpp:648 +#define CASE_CUDA_VERSION(CUDA_VER, PTX_VER) \ + case CudaVersion::CUDA_##CUDA_VER: \ +PtxFeature = "+ptx" #PTX_VER; \ It's same name as `CudaVersion` variable above, and it cause compile error. @jhuber6 Maybe the error depends on host compiler version so that it does not report immediately. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D122089/new/ https://reviews.llvm.org/D122089 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D122089: [CUDA] Add getTargetFeatures for the NVPTX toolchain
This revision was automatically updated to reflect the committed changes. Closed by commit rGa3248e4b28ce: [CUDA] Add getTargetFeatures for the NVPTX toolchain (authored by jhuber6). Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D122089/new/ https://reviews.llvm.org/D122089 Files: clang/lib/Driver/ToolChains/Clang.cpp clang/lib/Driver/ToolChains/Cuda.cpp clang/lib/Driver/ToolChains/Cuda.h Index: clang/lib/Driver/ToolChains/Cuda.h === --- clang/lib/Driver/ToolChains/Cuda.h +++ clang/lib/Driver/ToolChains/Cuda.h @@ -124,6 +124,11 @@ const char *LinkingOutput) const override; }; +void getNVPTXTargetFeatures(const Driver , const llvm::Triple , +const llvm::opt::ArgList , +std::vector , +Optional CudaVersion = None); + } // end namespace NVPTX } // end namespace tools Index: clang/lib/Driver/ToolChains/Cuda.cpp === --- clang/lib/Driver/ToolChains/Cuda.cpp +++ clang/lib/Driver/ToolChains/Cuda.cpp @@ -630,6 +630,43 @@ Exec, CmdArgs, Inputs, Output)); } +void NVPTX::getNVPTXTargetFeatures(const Driver , const llvm::Triple , + const llvm::opt::ArgList , + std::vector , + Optional CudaVersion) { + if (!CudaVersion) { +CudaInstallationDetector CudaInstallation(D, Triple, Args); +CudaVersion = CudaInstallation.version(); + } + + // New CUDA versions often introduce new instructions that are only supported + // by new PTX version, so we need to raise PTX level to enable them in NVPTX + // back-end. + const char *PtxFeature = nullptr; + switch (*CudaVersion) { +#define CASE_CUDA_VERSION(CUDA_VER, PTX_VER) \ + case CudaVersion::CUDA_##CUDA_VER: \ +PtxFeature = "+ptx" #PTX_VER; \ +break; +CASE_CUDA_VERSION(115, 75); +CASE_CUDA_VERSION(114, 74); +CASE_CUDA_VERSION(113, 73); +CASE_CUDA_VERSION(112, 72); +CASE_CUDA_VERSION(111, 71); +CASE_CUDA_VERSION(110, 70); +CASE_CUDA_VERSION(102, 65); +CASE_CUDA_VERSION(101, 64); +CASE_CUDA_VERSION(100, 63); +CASE_CUDA_VERSION(92, 61); +CASE_CUDA_VERSION(91, 61); +CASE_CUDA_VERSION(90, 60); +#undef CASE_CUDA_VERSION + default: +PtxFeature = "+ptx42"; + } + Features.push_back(PtxFeature); +} + /// CUDA toolchain. Our assembler is ptxas, and our "linker" is fatbinary, /// which isn't properly a linker but nonetheless performs the step of stitching /// together object files from the assembler into a single blob. @@ -701,32 +738,11 @@ clang::CudaVersion CudaInstallationVersion = CudaInstallation.version(); - // New CUDA versions often introduce new instructions that are only supported - // by new PTX version, so we need to raise PTX level to enable them in NVPTX - // back-end. - const char *PtxFeature = nullptr; - switch (CudaInstallationVersion) { -#define CASE_CUDA_VERSION(CUDA_VER, PTX_VER) \ - case CudaVersion::CUDA_##CUDA_VER: \ -PtxFeature = "+ptx" #PTX_VER; \ -break; -CASE_CUDA_VERSION(115, 75); -CASE_CUDA_VERSION(114, 74); -CASE_CUDA_VERSION(113, 73); -CASE_CUDA_VERSION(112, 72); -CASE_CUDA_VERSION(111, 71); -CASE_CUDA_VERSION(110, 70); -CASE_CUDA_VERSION(102, 65); -CASE_CUDA_VERSION(101, 64); -CASE_CUDA_VERSION(100, 63); -CASE_CUDA_VERSION(92, 61); -CASE_CUDA_VERSION(91, 61); -CASE_CUDA_VERSION(90, 60); -#undef CASE_CUDA_VERSION - default: -PtxFeature = "+ptx42"; - } - CC1Args.append({"-target-feature", PtxFeature}); + std::vector Features; + NVPTX::getNVPTXTargetFeatures(getDriver(), getTriple(), DriverArgs, Features, +CudaInstallationVersion); + for (StringRef PtxFeature : Features) +CC1Args.append({"-target-feature", DriverArgs.MakeArgString(PtxFeature)}); if (DriverArgs.hasFlag(options::OPT_fcuda_short_ptr, options::OPT_fno_cuda_short_ptr, false)) CC1Args.append({"-mllvm", "--nvptx-short-ptr"}); Index: clang/lib/Driver/ToolChains/Clang.cpp === --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -369,6 +369,10 @@ case llvm::Triple::amdgcn: amdgpu::getAMDGPUTargetFeatures(D, Triple, Args, Features); break; + case llvm::Triple::nvptx: + case llvm::Triple::nvptx64: +NVPTX::getNVPTXTargetFeatures(D, Triple, Args, Features); +break; case llvm::Triple::m68k: m68k::getM68kTargetFeatures(D,
[PATCH] D122089: [CUDA] Add getTargetFeatures for the NVPTX toolchain
jdoerfert accepted this revision. jdoerfert added a comment. This revision is now accepted and ready to land. LG Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D122089/new/ https://reviews.llvm.org/D122089 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D122089: [CUDA] Add getTargetFeatures for the NVPTX toolchain
jhuber6 added inline comments. Comment at: clang/lib/Driver/ToolChains/Cuda.cpp:635 + const llvm::opt::ArgList , + std::vector , + Optional CudaVersion) { jdoerfert wrote: > Why std::vector. looks like this only returns 1 anyway, no? That's how the rest of these functions are used in `getTargetFeatures` above. If I had a different method it wouldn't be easy to insert there. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D122089/new/ https://reviews.llvm.org/D122089 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D122089: [CUDA] Add getTargetFeatures for the NVPTX toolchain
jdoerfert added inline comments. Comment at: clang/lib/Driver/ToolChains/Cuda.cpp:635 + const llvm::opt::ArgList , + std::vector , + Optional CudaVersion) { Why std::vector. looks like this only returns 1 anyway, no? Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D122089/new/ https://reviews.llvm.org/D122089 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D122089: [CUDA] Add getTargetFeatures for the NVPTX toolchain
jhuber6 created this revision. jhuber6 added reviewers: jdoerfert, tra, tianshilei1992, yaxunl. Herald added a subscriber: asavonic. Herald added a project: All. jhuber6 requested review of this revision. Herald added subscribers: cfe-commits, sstefan1. Herald added a project: clang. The NVPTX toolchain uses target features to determine the PTX version to use. However this isn't exposed externally like most other toolchain specific target features are. Add this functionaliy in preparation for using it in for OpenMP offloading. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D122089 Files: clang/lib/Driver/ToolChains/Clang.cpp clang/lib/Driver/ToolChains/Cuda.cpp clang/lib/Driver/ToolChains/Cuda.h Index: clang/lib/Driver/ToolChains/Cuda.h === --- clang/lib/Driver/ToolChains/Cuda.h +++ clang/lib/Driver/ToolChains/Cuda.h @@ -124,6 +124,11 @@ const char *LinkingOutput) const override; }; +void getNVPTXTargetFeatures(const Driver , const llvm::Triple , +const llvm::opt::ArgList , +std::vector , +Optional CudaVersion = None); + } // end namespace NVPTX } // end namespace tools Index: clang/lib/Driver/ToolChains/Cuda.cpp === --- clang/lib/Driver/ToolChains/Cuda.cpp +++ clang/lib/Driver/ToolChains/Cuda.cpp @@ -630,6 +630,43 @@ Exec, CmdArgs, Inputs, Output)); } +void NVPTX::getNVPTXTargetFeatures(const Driver , const llvm::Triple , + const llvm::opt::ArgList , + std::vector , + Optional CudaVersion) { + if (!CudaVersion) { +CudaInstallationDetector CudaInstallation(D, Triple, Args); +CudaVersion = CudaInstallation.version(); + } + + // New CUDA versions often introduce new instructions that are only supported + // by new PTX version, so we need to raise PTX level to enable them in NVPTX + // back-end. + const char *PtxFeature = nullptr; + switch (*CudaVersion) { +#define CASE_CUDA_VERSION(CUDA_VER, PTX_VER) \ + case CudaVersion::CUDA_##CUDA_VER: \ +PtxFeature = "+ptx" #PTX_VER; \ +break; +CASE_CUDA_VERSION(115, 75); +CASE_CUDA_VERSION(114, 74); +CASE_CUDA_VERSION(113, 73); +CASE_CUDA_VERSION(112, 72); +CASE_CUDA_VERSION(111, 71); +CASE_CUDA_VERSION(110, 70); +CASE_CUDA_VERSION(102, 65); +CASE_CUDA_VERSION(101, 64); +CASE_CUDA_VERSION(100, 63); +CASE_CUDA_VERSION(92, 61); +CASE_CUDA_VERSION(91, 61); +CASE_CUDA_VERSION(90, 60); +#undef CASE_CUDA_VERSION + default: +PtxFeature = "+ptx42"; + } + Features.push_back(PtxFeature); +} + /// CUDA toolchain. Our assembler is ptxas, and our "linker" is fatbinary, /// which isn't properly a linker but nonetheless performs the step of stitching /// together object files from the assembler into a single blob. @@ -701,32 +738,11 @@ clang::CudaVersion CudaInstallationVersion = CudaInstallation.version(); - // New CUDA versions often introduce new instructions that are only supported - // by new PTX version, so we need to raise PTX level to enable them in NVPTX - // back-end. - const char *PtxFeature = nullptr; - switch (CudaInstallationVersion) { -#define CASE_CUDA_VERSION(CUDA_VER, PTX_VER) \ - case CudaVersion::CUDA_##CUDA_VER: \ -PtxFeature = "+ptx" #PTX_VER; \ -break; -CASE_CUDA_VERSION(115, 75); -CASE_CUDA_VERSION(114, 74); -CASE_CUDA_VERSION(113, 73); -CASE_CUDA_VERSION(112, 72); -CASE_CUDA_VERSION(111, 71); -CASE_CUDA_VERSION(110, 70); -CASE_CUDA_VERSION(102, 65); -CASE_CUDA_VERSION(101, 64); -CASE_CUDA_VERSION(100, 63); -CASE_CUDA_VERSION(92, 61); -CASE_CUDA_VERSION(91, 61); -CASE_CUDA_VERSION(90, 60); -#undef CASE_CUDA_VERSION - default: -PtxFeature = "+ptx42"; - } - CC1Args.append({"-target-feature", PtxFeature}); + std::vector Features; + NVPTX::getNVPTXTargetFeatures(getDriver(), getTriple(), DriverArgs, Features, +CudaInstallationVersion); + for (StringRef PtxFeature : Features) +CC1Args.append({"-target-feature", DriverArgs.MakeArgString(PtxFeature)}); if (DriverArgs.hasFlag(options::OPT_fcuda_short_ptr, options::OPT_fno_cuda_short_ptr, false)) CC1Args.append({"-mllvm", "--nvptx-short-ptr"}); Index: clang/lib/Driver/ToolChains/Clang.cpp === --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -369,6 +369,10 @@ case