https://github.com/jplehr updated https://github.com/llvm/llvm-project/pull/75468
>From 9809ba1ec31cb1a4a066f709ae8bd3e7777965e1 Mon Sep 17 00:00:00 2001 From: JP Lehr <janpatrick.l...@amd.com> Date: Thu, 6 Jul 2023 16:47:21 -0400 Subject: [PATCH] [OpenMP] Introduce -fopenmp-force-usm flag The new flag implements logic to include #pragma omp requires unified_shared_memory in every translation unit. This enables a straightforward way to enable USM for an application without the need to modify sources. --- clang/include/clang/Driver/Options.td | 2 ++ clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp | 16 ++++++++++++++++ clang/lib/Headers/CMakeLists.txt | 1 + .../lib/Headers/openmp_wrappers/usm/force_usm.h | 6 ++++++ 4 files changed, 25 insertions(+) create mode 100644 clang/lib/Headers/openmp_wrappers/usm/force_usm.h diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 1b02087425b751..b9cd3043a13a9a 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3381,6 +3381,8 @@ def fopenmp_cuda_blocks_per_sm_EQ : Joined<["-"], "fopenmp-cuda-blocks-per-sm="> Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>; def fopenmp_cuda_teams_reduction_recs_num_EQ : Joined<["-"], "fopenmp-cuda-teams-reduction-recs-num=">, Group<f_Group>, Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>; +def fopenmp_force_usm : Flag<["-"], "fopenmp-force-usm">, Group<f_Group>, + Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[CC1Option]>; //===----------------------------------------------------------------------===// // Shared cc1 + fc1 OpenMP Target Options diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp index b012b7cb729378..a077f2f06d7728 100644 --- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp @@ -129,6 +129,22 @@ AMDGPUOpenMPToolChain::GetCXXStdlibType(const ArgList &Args) const { void AMDGPUOpenMPToolChain::AddClangSystemIncludeArgs( const ArgList &DriverArgs, ArgStringList &CC1Args) const { HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args); + + CC1Args.push_back("-internal-isystem"); + SmallString<128> P(HostTC.getDriver().ResourceDir); + llvm::sys::path::append(P, "include/cuda_wrappers"); + CC1Args.push_back(DriverArgs.MakeArgString(P)); + + // Force USM mode will forcefully include #pragma omp requires + // unified_shared_memory via the force_usm header + // XXX This may result in a compilation error if the source + // file already includes that pragma. + if (DriverArgs.hasArg(options::OPT_fopenmp_force_usm)) { + CC1Args.push_back("-include"); + CC1Args.push_back( + DriverArgs.MakeArgString(HostTC.getDriver().ResourceDir + + "/include/openmp_wrappers/force_usm.h")); + } } void AMDGPUOpenMPToolChain::AddIAMCUIncludeArgs(const ArgList &Args, diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index f8fdd402777e48..aac232fa8b4405 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -319,6 +319,7 @@ set(openmp_wrapper_files openmp_wrappers/__clang_openmp_device_functions.h openmp_wrappers/complex_cmath.h openmp_wrappers/new + openmp_wrappers/usm/force_usm.h ) set(llvm_libc_wrapper_files diff --git a/clang/lib/Headers/openmp_wrappers/usm/force_usm.h b/clang/lib/Headers/openmp_wrappers/usm/force_usm.h new file mode 100644 index 00000000000000..15c394e27ce9c2 --- /dev/null +++ b/clang/lib/Headers/openmp_wrappers/usm/force_usm.h @@ -0,0 +1,6 @@ +#ifndef __CLANG_FORCE_OPENMP_USM +#define __CLANG_FORCE_OPENMP_USM + +#pragma omp requires unified_shared_memory + +#endif _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits