[PATCH] D155775: [Clang][Driver][RFC] Add driver support for C++ Parallel Algorithm Offload
AlexVlx updated this revision to Diff 547570. CHANGES SINCE LAST ACTION https://reviews.llvm.org/D155775/new/ https://reviews.llvm.org/D155775 Files: clang/include/clang/Basic/DiagnosticDriverKinds.td clang/include/clang/Basic/LangOptions.def clang/include/clang/Driver/Options.td clang/lib/Driver/Driver.cpp clang/lib/Driver/ToolChains/AMDGPU.cpp clang/lib/Driver/ToolChains/Clang.cpp clang/lib/Driver/ToolChains/HIPAMD.cpp clang/lib/Driver/ToolChains/ROCm.h clang/test/Driver/Inputs/stdpar/stdpar_lib.hpp clang/test/Driver/stdpar.c Index: clang/test/Driver/stdpar.c === --- /dev/null +++ clang/test/Driver/stdpar.c @@ -0,0 +1,18 @@ +// RUN: %clang -### -stdpar --compile %s 2>&1 | \ +// RUN: FileCheck --check-prefix=STDPAR-MISSING-LIB %s +// STDPAR-MISSING-LIB: error: cannot find HIP Standard Parallelism Acceleration library; provide it via '--stdpar-path' + +// RUN: %clang -### --stdpar --stdpar-path=%S/Inputs/stdpar \ +// RUN: --stdpar-thrust-path=%S/Inputs/stdpar/thrust \ +// RUN: --stdpar-prim-path=%S/Inputs/stdpar/prim --compile %s 2>&1 | \ +// RUN: FileCheck --check-prefix=STDPAR-COMPILE %s +// STDPAR-COMPILE: "-x" "hip" +// STDPAR-COMPILE: "-idirafter" "{{.*/thrust}}" +// STDPAR-COMPILE: "-idirafter" "{{.*/prim}}" +// STDPAR-COMPILE: "-idirafter" "{{.*/Inputs/stdpar}}" +// STDPAR-COMPILE: "-include" "stdpar_lib.hpp" + +// RUN: touch %t.o +// RUN: %clang -### -stdpar %t.o 2>&1 | FileCheck --check-prefix=STDPAR-LINK %s +// STDPAR-LINK: "-rpath" +// STDPAR-LINK: "-l{{.*hip.*}}" Index: clang/lib/Driver/ToolChains/ROCm.h === --- clang/lib/Driver/ToolChains/ROCm.h +++ clang/lib/Driver/ToolChains/ROCm.h @@ -77,6 +77,9 @@ const Driver bool HasHIPRuntime = false; bool HasDeviceLibrary = false; + bool HasHIPStdParLibrary = false; + bool HasRocThrustLibrary = false; + bool HasRocPrimLibrary = false; // Default version if not detected or specified. const unsigned DefaultVersionMajor = 3; @@ -96,6 +99,13 @@ std::vector RocmDeviceLibPathArg; // HIP runtime path specified by --hip-path. StringRef HIPPathArg; + // HIP Standard Parallel Algorithm acceleration library specified by + // --stdpar-path + StringRef HIPStdParPathArg; + // rocThrust algorithm library specified by --stdpar-thrust-path + StringRef HIPRocThrustPathArg; + // rocPrim algorithm library specified by --stdpar-prim-path + StringRef HIPRocPrimPathArg; // HIP version specified by --hip-version. StringRef HIPVersionArg; // Wheter -nogpulib is specified. @@ -180,6 +190,9 @@ /// Check whether we detected a valid ROCm device library. bool hasDeviceLibrary() const { return HasDeviceLibrary; } + /// Check whether we detected a valid HIP STDPAR Acceleration library. + bool hasHIPStdParLibrary() const { return HasHIPStdParLibrary; } + /// Print information about the detected ROCm installation. void print(raw_ostream ) const; Index: clang/lib/Driver/ToolChains/HIPAMD.cpp === --- clang/lib/Driver/ToolChains/HIPAMD.cpp +++ clang/lib/Driver/ToolChains/HIPAMD.cpp @@ -115,6 +115,8 @@ "--no-undefined", "-shared", "-plugin-opt=-amdgpu-internalize-symbols"}; + if (Args.hasArg(options::OPT_stdpar)) +LldArgs.push_back("-plugin-opt=-amdgpu-enable-stdpar"); auto = getToolChain(); auto = TC.getDriver(); @@ -246,6 +248,8 @@ if (!DriverArgs.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false)) CC1Args.append({"-mllvm", "-amdgpu-internalize-symbols"}); + if (DriverArgs.hasArgNoClaim(options::OPT_stdpar)) +CC1Args.append({"-mllvm", "-amdgpu-enable-stdpar"}); StringRef MaxThreadsPerBlock = DriverArgs.getLastArgValue(options::OPT_gpu_max_threads_per_block_EQ); Index: clang/lib/Driver/ToolChains/Clang.cpp === --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -6533,6 +6533,12 @@ if (Args.hasFlag(options::OPT_fgpu_allow_device_init, options::OPT_fno_gpu_allow_device_init, false)) CmdArgs.push_back("-fgpu-allow-device-init"); +if (Args.hasArg(options::OPT_stdpar)) { + CmdArgs.push_back("-stdpar"); + + if (Args.hasArg(options::OPT_stdpar_interpose_alloc)) +CmdArgs.push_back("-stdpar-interpose-alloc"); +} Args.addOptInFlag(CmdArgs, options::OPT_fhip_kernel_arg_name, options::OPT_fno_hip_kernel_arg_name); } Index: clang/lib/Driver/ToolChains/AMDGPU.cpp === --- clang/lib/Driver/ToolChains/AMDGPU.cpp +++ clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -329,6 +329,19 @@ RocmDeviceLibPathArg =
[PATCH] D155775: [Clang][Driver][RFC] Add driver support for C++ Parallel Algorithm Offload
AlexVlx updated this revision to Diff 544974. AlexVlx added a comment. Exploit the fact that ROCm/AMDGPU does LTCG at the moment and for the foreseeable future by moving the accelerator code selection pass to later. CHANGES SINCE LAST ACTION https://reviews.llvm.org/D155775/new/ https://reviews.llvm.org/D155775 Files: clang/include/clang/Basic/DiagnosticDriverKinds.td clang/include/clang/Basic/LangOptions.def clang/include/clang/Driver/Options.td clang/lib/Driver/Driver.cpp clang/lib/Driver/ToolChains/AMDGPU.cpp clang/lib/Driver/ToolChains/Clang.cpp clang/lib/Driver/ToolChains/HIPAMD.cpp clang/lib/Driver/ToolChains/ROCm.h clang/test/Driver/Inputs/stdpar/stdpar_lib.hpp clang/test/Driver/stdpar.c Index: clang/test/Driver/stdpar.c === --- /dev/null +++ clang/test/Driver/stdpar.c @@ -0,0 +1,18 @@ +// RUN: %clang -### -stdpar --compile %s 2>&1 | \ +// RUN: FileCheck --check-prefix=STDPAR-MISSING-LIB %s +// STDPAR-MISSING-LIB: error: cannot find HIP Standard Parallelism Acceleration library; provide it via '--stdpar-path' + +// RUN: %clang -### --stdpar --stdpar-path=%S/Inputs/stdpar \ +// RUN: --stdpar-thrust-path=%S/Inputs/stdpar/thrust \ +// RUN: --stdpar-prim-path=%S/Inputs/stdpar/prim --compile %s 2>&1 | \ +// RUN: FileCheck --check-prefix=STDPAR-COMPILE %s +// STDPAR-COMPILE: "-x" "hip" +// STDPAR-COMPILE: "-idirafter" "{{.*/thrust}}" +// STDPAR-COMPILE: "-idirafter" "{{.*/prim}}" +// STDPAR-COMPILE: "-idirafter" "{{.*/Inputs/stdpar}}" +// STDPAR-COMPILE: "-include" "stdpar_lib.hpp" + +// RUN: touch %t.o +// RUN: %clang -### -stdpar %t.o 2>&1 | FileCheck --check-prefix=STDPAR-LINK %s +// STDPAR-LINK: "-rpath" +// STDPAR-LINK: "-l{{.*hip.*}}" Index: clang/lib/Driver/ToolChains/ROCm.h === --- clang/lib/Driver/ToolChains/ROCm.h +++ clang/lib/Driver/ToolChains/ROCm.h @@ -77,6 +77,9 @@ const Driver bool HasHIPRuntime = false; bool HasDeviceLibrary = false; + bool HasHIPStdParLibrary = false; + bool HasRocThrustLibrary = false; + bool HasRocPrimLibrary = false; // Default version if not detected or specified. const unsigned DefaultVersionMajor = 3; @@ -96,6 +99,13 @@ std::vector RocmDeviceLibPathArg; // HIP runtime path specified by --hip-path. StringRef HIPPathArg; + // HIP Standard Parallel Algorithm acceleration library specified by + // --stdpar-path + StringRef HIPStdParPathArg; + // rocThrust algorithm library specified by --stdpar-thrust-path + StringRef HIPRocThrustPathArg; + // rocPrim algorithm library specified by --stdpar-prim-path + StringRef HIPRocPrimPathArg; // HIP version specified by --hip-version. StringRef HIPVersionArg; // Wheter -nogpulib is specified. @@ -180,6 +190,9 @@ /// Check whether we detected a valid ROCm device library. bool hasDeviceLibrary() const { return HasDeviceLibrary; } + /// Check whether we detected a valid HIP STDPAR Acceleration library. + bool hasHIPStdParLibrary() const { return HasHIPStdParLibrary; } + /// Print information about the detected ROCm installation. void print(raw_ostream ) const; Index: clang/lib/Driver/ToolChains/HIPAMD.cpp === --- clang/lib/Driver/ToolChains/HIPAMD.cpp +++ clang/lib/Driver/ToolChains/HIPAMD.cpp @@ -115,6 +115,8 @@ "--no-undefined", "-shared", "-plugin-opt=-amdgpu-internalize-symbols"}; + if (Args.hasArg(options::OPT_stdpar)) +LldArgs.push_back("-plugin-opt=-amdgpu-enable-stdpar"); auto = getToolChain(); auto = TC.getDriver(); @@ -246,6 +248,8 @@ if (!DriverArgs.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false)) CC1Args.append({"-mllvm", "-amdgpu-internalize-symbols"}); + if (DriverArgs.hasArgNoClaim(options::OPT_stdpar)) +CC1Args.append({"-mllvm", "-amdgpu-enable-stdpar"}); StringRef MaxThreadsPerBlock = DriverArgs.getLastArgValue(options::OPT_gpu_max_threads_per_block_EQ); Index: clang/lib/Driver/ToolChains/Clang.cpp === --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -6543,6 +6543,12 @@ if (Args.hasFlag(options::OPT_fgpu_allow_device_init, options::OPT_fno_gpu_allow_device_init, false)) CmdArgs.push_back("-fgpu-allow-device-init"); +if (Args.hasArg(options::OPT_stdpar)) { + CmdArgs.push_back("-stdpar"); + + if (Args.hasArg(options::OPT_stdpar_interpose_alloc)) +CmdArgs.push_back("-stdpar-interpose-alloc"); +} Args.addOptInFlag(CmdArgs, options::OPT_fhip_kernel_arg_name, options::OPT_fno_hip_kernel_arg_name); } Index: clang/lib/Driver/ToolChains/AMDGPU.cpp
[PATCH] D155775: [Clang][Driver][RFC] Add driver support for C++ Parallel Algorithm Offload
AlexVlx updated this revision to Diff 542287. AlexVlx added a comment. Removed some accidental noise. CHANGES SINCE LAST ACTION https://reviews.llvm.org/D155775/new/ https://reviews.llvm.org/D155775 Files: clang/include/clang/Basic/DiagnosticDriverKinds.td clang/include/clang/Basic/LangOptions.def clang/include/clang/Driver/Options.td clang/lib/Driver/Driver.cpp clang/lib/Driver/ToolChains/AMDGPU.cpp clang/lib/Driver/ToolChains/Clang.cpp clang/lib/Driver/ToolChains/ROCm.h clang/test/Driver/Inputs/stdpar/stdpar_lib.hpp clang/test/Driver/stdpar.c Index: clang/test/Driver/stdpar.c === --- /dev/null +++ clang/test/Driver/stdpar.c @@ -0,0 +1,18 @@ +// RUN: %clang -### -stdpar --compile %s 2>&1 | \ +// RUN: FileCheck --check-prefix=STDPAR-MISSING-LIB %s +// STDPAR-MISSING-LIB: error: cannot find HIP Standard Parallelism Acceleration library; provide it via '--stdpar-path' + +// RUN: %clang -### --stdpar --stdpar-path=%S/Inputs/stdpar \ +// RUN: --stdpar-thrust-path=%S/Inputs/stdpar/thrust \ +// RUN: --stdpar-prim-path=%S/Inputs/stdpar/prim --compile %s 2>&1 | \ +// RUN: FileCheck --check-prefix=STDPAR-COMPILE %s +// STDPAR-COMPILE: "-x" "hip" +// STDPAR-COMPILE: "-idirafter" "{{.*/Inputs/stdpar/thrust}}" +// STDPAR-COMPILE: "-idirafter" "{{.*/Inputs/stdpar/prim}}" +// STDPAR-COMPILE: "-idirafter" "{{.*/Inputs/stdpar}}" +// STDPAR-COMPILE: "-include" "stdpar_lib.hpp" + +// RUN: touch %t.o +// RUN: %clang -### -stdpar %t.o 2>&1 | FileCheck --check-prefix=STDPAR-LINK %s +// STDPAR-LINK: "-rpath" +// STDPAR-LINK: "-l{{.*hip.*}}" Index: clang/lib/Driver/ToolChains/ROCm.h === --- clang/lib/Driver/ToolChains/ROCm.h +++ clang/lib/Driver/ToolChains/ROCm.h @@ -77,6 +77,9 @@ const Driver bool HasHIPRuntime = false; bool HasDeviceLibrary = false; + bool HasHIPStdParLibrary = false; + bool HasRocThrustLibrary = false; + bool HasRocPrimLibrary = false; // Default version if not detected or specified. const unsigned DefaultVersionMajor = 3; @@ -96,6 +99,13 @@ std::vector RocmDeviceLibPathArg; // HIP runtime path specified by --hip-path. StringRef HIPPathArg; + // HIP Standard Parallel Algorithm acceleration library specified by + // --stdpar-path + StringRef HIPStdParPathArg; + // rocThrust algorithm library specified by --stdpar-thrust-path + StringRef HIPRocThrustPathArg; + // rocPrim algorithm library specified by --stdpar-prim-path + StringRef HIPRocPrimPathArg; // HIP version specified by --hip-version. StringRef HIPVersionArg; // Wheter -nogpulib is specified. @@ -180,6 +190,9 @@ /// Check whether we detected a valid ROCm device library. bool hasDeviceLibrary() const { return HasDeviceLibrary; } + /// Check whether we detected a valid HIP STDPAR Acceleration library. + bool hasHIPStdParLibrary() const { return HasHIPStdParLibrary; } + /// Print information about the detected ROCm installation. void print(raw_ostream ) const; Index: clang/lib/Driver/ToolChains/Clang.cpp === --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -6527,6 +6527,12 @@ if (Args.hasFlag(options::OPT_fgpu_allow_device_init, options::OPT_fno_gpu_allow_device_init, false)) CmdArgs.push_back("-fgpu-allow-device-init"); +if (Args.hasArg(options::OPT_stdpar)) { + CmdArgs.push_back("-stdpar"); + + if (Args.hasArg(options::OPT_stdpar_interpose_alloc)) +CmdArgs.push_back("-stdpar-interpose-alloc"); +} Args.addOptInFlag(CmdArgs, options::OPT_fhip_kernel_arg_name, options::OPT_fno_hip_kernel_arg_name); } Index: clang/lib/Driver/ToolChains/AMDGPU.cpp === --- clang/lib/Driver/ToolChains/AMDGPU.cpp +++ clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -329,6 +329,19 @@ RocmDeviceLibPathArg = Args.getAllArgValues(clang::driver::options::OPT_rocm_device_lib_path_EQ); HIPPathArg = Args.getLastArgValue(clang::driver::options::OPT_hip_path_EQ); + HIPStdParPathArg = +Args.getLastArgValue(clang::driver::options::OPT_stdpar_path_EQ); + HasHIPStdParLibrary = !HIPStdParPathArg.empty() && +D.getVFS().exists(HIPStdParPathArg + "/stdpar_lib.hpp"); + HIPRocThrustPathArg = +Args.getLastArgValue(clang::driver::options::OPT_stdpar_thrust_path_EQ); + HasRocThrustLibrary = !HIPRocThrustPathArg.empty() && +D.getVFS().exists(HIPRocThrustPathArg + "/thrust"); + HIPRocPrimPathArg = +Args.getLastArgValue(clang::driver::options::OPT_stdpar_prim_path_EQ); + HasRocPrimLibrary = !HIPRocPrimPathArg.empty() && + D.getVFS().exists(HIPRocPrimPathArg + "/rocprim"); + if (auto *A =
[PATCH] D155775: [Clang][Driver][RFC] Add driver support for C++ Parallel Algorithm Offload
AlexVlx created this revision. AlexVlx added reviewers: jansvoboda11, arsenm, yaxunl, MaskRay. AlexVlx added a project: clang. Herald added subscribers: cmtice, kerbowa, ormris, hiraditya, tpr, jvesely. Herald added a reviewer: jhenderson. Herald added a project: All. AlexVlx requested review of this revision. Herald added subscribers: llvm-commits, cfe-commits, wdng. Herald added a project: LLVM. This patch adds the Driver changes needed by the standard algorithm offload feature being proposed here: https://discourse.llvm.org/t/rfc-adding-c-parallel-algorithm-offload-support-to-clang-llvm/72159/1. The verbose documentation is included in its parent patch. What this change does can be summed up as follows: 1. add two flags, one for enabling `stdpar` compilation, the second enabling the optional allocation interposition mode; 2. the flags correspond to new LangOpt members; 3. if we are compiling or linking with `-stdpar`, we enable HIP; in the compilation case C and C++ inputs are treated as HIP inputs; 4. the ROCm / AMDGPU driver is augmented to look for and include an implementation detail forwarding header, which is provided here https://github.com/ROCmSoftwarePlatform/roc-stdpar/blob/main/include/stdpar_lib.hpp; we error out if the user requested `stdpar` but the header or its dependencies cannot be found (it is plausible that in the future we'll move the check for the dependencies to the header itself in order to reduce the compiler footprint). Tests for the behaviour described above are also added. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D155775 Files: clang/include/clang/Basic/DiagnosticDriverKinds.td clang/include/clang/Basic/LangOptions.def clang/include/clang/Driver/Options.td clang/lib/Driver/Driver.cpp clang/lib/Driver/ToolChains/AMDGPU.cpp clang/lib/Driver/ToolChains/Clang.cpp clang/lib/Driver/ToolChains/ROCm.h clang/test/Driver/Inputs/stdpar/stdpar_lib.hpp clang/test/Driver/stdpar.c llvm/include/llvm/CodeGen/AccelTable.h llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp llvm/test/DebugInfo/Generic/accel-table-hash-collisions.ll llvm/test/DebugInfo/Generic/apple-names-hash-collisions.ll llvm/test/DebugInfo/Generic/debug-names-hash-collisions.ll llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn Index: llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn === --- llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn +++ llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn @@ -238,7 +238,6 @@ "sha512intrin.h", "shaintrin.h", "sifive_vector.h", -"sm3intrin.h", "smmintrin.h", "stdalign.h", "stdarg.h", Index: llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp === --- llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp +++ llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp @@ -11,7 +11,6 @@ //===--===// #include "llvm-dwarfdump.h" -#include "llvm/ADT/MapVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringSet.h" @@ -464,7 +463,7 @@ static void findAllApple( DWARFContext , raw_ostream , std::function GetNameForDWARFReg) { - MapVector> NameToDies; + StringMap> NameToDies; auto PushDIEs = [&](const AppleAcceleratorTable ) { for (const auto : Accel.entries()) { Index: llvm/test/DebugInfo/Generic/debug-names-hash-collisions.ll === --- llvm/test/DebugInfo/Generic/debug-names-hash-collisions.ll +++ llvm/test/DebugInfo/Generic/debug-names-hash-collisions.ll @@ -29,21 +29,21 @@ ; Check that all the names are present in the output ; CHECK: Bucket 0 ; CHECK: Hash: 0xF8CF70D -; CHECK-NEXT:String: 0x{{[0-9a-f]*}} "_ZN4lldb7SBBlockaSERKS0_" -; CHECK: Hash: 0xF8CF70D ; CHECK-NEXT:String: 0x{{[0-9a-f]*}} "_ZN4lldb7SBBlockC1ERKS0_" -; CHECK: Hash: 0x135A482C -; CHECK-NEXT:String: 0x{{[0-9a-f]*}} "_ZN4lldb7SBErroraSERKS0_" +; CHECK: Hash: 0xF8CF70D +; CHECK-NEXT:String: 0x{{[0-9a-f]*}} "_ZN4lldb7SBBlockaSERKS0_" ; CHECK: Hash: 0x135A482C ; CHECK-NEXT:String: 0x{{[0-9a-f]*}} "_ZN4lldb7SBErrorC1ERKS0_" +; CHECK: Hash: 0x135A482C +; CHECK-NEXT:String: 0x{{[0-9a-f]*}} "_ZN4lldb7SBErroraSERKS0_" ; CHECK-NOT: String: ; CHECK: Bucket 1 ; CHECK-NEXT: EMPTY ; CHECK: Bucket 2 ; CHECK: Hash: 0x2841B989 -; CHECK-NEXT:String: 0x{{[0-9a-f]*}} "_ZL11numCommutes" -; CHECK: Hash: 0x2841B989 ; CHECK-NEXT:String: 0x{{[0-9a-f]*}} "_ZL11NumCommutes" +; CHECK: Hash: 0x2841B989 +; CHECK-NEXT:String: 0x{{[0-9a-f]*}} "_ZL11numCommutes" ; CHECK: Hash: 0x3E190F5F ; CHECK-NEXT:String: 0x{{[0-9a-f]*}} "_ZL9NumRemats" ; CHECK: Hash: 0x3E190F5F Index: llvm/test/DebugInfo/Generic/apple-names-hash-collisions.ll