from:"Saiyedul Islam via Phabricator via cfe\-commits"

[PATCH] D139730: [OpenMP][DeviceRTL][AMDGPU] Support code object version 5

2023-08-29 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam added inline comments.



Comment at: clang/test/CodeGenCUDA/amdgpu-code-object-version-linking.cu:12
+// RUN: llvm-link %t_0 %t_5 -o -| llvm-dis -o - | FileCheck 
-check-prefix=LINKED5 %s
+
+#include "Inputs/cuda.h"

yaxunl wrote:
> saiislam wrote:
> > yaxunl wrote:
> > > need to test using clang -cc1 with -O3 and -mlink-builtin-bitcode to link 
> > > the device lib and verify the load of llvm.amdgcn.abi.version being 
> > > eliminated after optimization.
> > > 
> > > I think currently it cannot do that since llvm.amdgcn.abi.version is not 
> > > internalized by the internalization pass. This can cause some significant 
> > > perf drops since loading is expensive. Need to tweak the function 
> > > controlling what variables can be internalized for amdgpu so that this 
> > > variable gets internalized, or having a generic way to tell that function 
> > > which variables should be internalized, e.g. by adding a metadata 
> > > amdgcn.internalize
> > load of llvm.amdgcn.abi.version is being eliminated with cc1, -O3, and 
> > mlink-builtin-bitcode of device lib.
> It seems being eliminated by IPSCCP. It makes sense since it is constant 
> weak_odr without externally_initialized. Either changing it to weak or adding 
> externally_initialized will keep the load. Normal `__constant__` var in 
> device code may be changed by host code, therefore they are emitted with 
> externally_initialized and do not have the load eliminated.
Thank you @yaxunl !
I have added these observations as comments in the code at load emit and global 
emit locations.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139730/new/

https://reviews.llvm.org/D139730

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D139730: [OpenMP][DeviceRTL][AMDGPU] Support code object version 5

2023-08-29 Thread Saiyedul Islam via Phabricator via cfe-commits

This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rGf616c3eeb43f: [OpenMP][DeviceRTL][AMDGPU] Support code 
object version 5 (authored by saiislam).

Changed prior to commit:
  https://reviews.llvm.org/D139730?vs=553991=554262#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139730/new/

https://reviews.llvm.org/D139730

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/CodeGen/CodeGenModule.cpp
  clang/lib/CodeGen/CodeGenModule.h
  clang/lib/CodeGen/TargetInfo.h
  clang/lib/CodeGen/Targets/AMDGPU.cpp
  clang/lib/Driver/ToolChain.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/CodeGenCUDA/amdgpu-code-object-version-linking.cu
  clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
  clang/test/CodeGenOpenCL/opencl_types.cl
  clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
  openmp/libomptarget/DeviceRTL/CMakeLists.txt
  openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
  openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h

Index: openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
===
--- openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
+++ openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
@@ -25,6 +25,7 @@
 #include "llvm/Support/MemoryBufferRef.h"
 
 #include "llvm/Support/YAMLTraits.h"
+using namespace llvm::ELF;
 
 namespace llvm {
 namespace omp {
@@ -32,19 +33,29 @@
 namespace plugin {
 namespace utils {
 
-// The implicit arguments of AMDGPU kernels.
+// The implicit arguments of COV5 AMDGPU kernels.
 struct AMDGPUImplicitArgsTy {
-  uint64_t OffsetX;
-  uint64_t OffsetY;
-  uint64_t OffsetZ;
-  uint64_t HostcallPtr;
-  uint64_t Unused0;
-  uint64_t Unused1;
-  uint64_t Unused2;
+  uint32_t BlockCountX;
+  uint32_t BlockCountY;
+  uint32_t BlockCountZ;
+  uint16_t GroupSizeX;
+  uint16_t GroupSizeY;
+  uint16_t GroupSizeZ;
+  uint8_t Unused0[46]; // 46 byte offset.
+  uint16_t GridDims;
+  uint8_t Unused1[190]; // 190 byte offset.
 };
 
-static_assert(sizeof(AMDGPUImplicitArgsTy) == 56,
-  "Unexpected size of implicit arguments");
+// Dummy struct for COV4 implicitargs.
+struct AMDGPUImplicitArgsTyCOV4 {
+  uint8_t Unused[56];
+};
+
+uint32_t getImplicitArgsSize(uint16_t Version) {
+  return Version < ELF::ELFABIVERSION_AMDGPU_HSA_V5
+ ? sizeof(AMDGPUImplicitArgsTyCOV4)
+ : sizeof(AMDGPUImplicitArgsTy);
+}
 
 /// Parse a TargetID to get processor arch and feature map.
 /// Returns processor subarch.
@@ -295,7 +306,8 @@
 /// Reads the AMDGPU specific metadata from the ELF file and propagates the
 /// KernelInfoMap
 Error readAMDGPUMetaDataFromImage(MemoryBufferRef MemBuffer,
-  StringMap ) {
+  StringMap ,
+  uint16_t ) {
   Error Err = Error::success(); // Used later as out-parameter
 
   auto ELFOrError = object::ELF64LEFile::create(MemBuffer.getBuffer());
@@ -305,6 +317,12 @@
   const object::ELF64LEFile ELFObj = ELFOrError.get();
   ArrayRef Sections = cantFail(ELFObj.sections());
   KernelInfoReader Reader(KernelInfoMap);
+
+  // Read the code object version from ELF image header
+  auto Header = ELFObj.getHeader();
+  ELFABIVersion = (uint8_t)(Header.e_ident[ELF::EI_ABIVERSION]);
+  DP("ELFABIVERSION Version: %u\n", ELFABIVersion);
+
   for (const auto  : Sections) {
 if (S.sh_type != ELF::SHT_NOTE)
   continue;
Index: openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
===
--- openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
+++ openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -381,6 +381,9 @@
   /// Get the executable.
   hsa_executable_t getExecutable() const { return Executable; }
 
+  /// Get to Code Object Version of the ELF
+  uint16_t getELFABIVersion() const { return ELFABIVersion; }
+
   /// Find an HSA device symbol by its name on the executable.
   Expected
   findDeviceSymbol(GenericDeviceTy , StringRef SymbolName) const;
@@ -401,6 +404,7 @@
   hsa_executable_t Executable;
   hsa_code_object_t CodeObject;
   StringMap KernelInfoMap;
+  uint16_t ELFABIVersion;
 };
 
 /// Class implementing the AMDGPU kernel functionalities which derives from the
@@ -408,8 +412,7 @@
 struct AMDGPUKernelTy : public GenericKernelTy {
   /// Create an AMDGPU kernel with a name and an execution mode.
   AMDGPUKernelTy(const char *Name, OMPTgtExecModeFlags ExecutionMode)
-  : GenericKernelTy(Name, ExecutionMode),
-ImplicitArgsSize(sizeof(utils::AMDGPUImplicitArgsTy)) {}
+  : GenericKernelTy(Name, ExecutionMode) {}
 
   /// Initialize the AMDGPU kernel.
   Error initImpl(GenericDeviceTy , DeviceImageTy ) override {
@@ -450,6 +453,9 @@
 // TODO: Read the kernel descriptor for the max threads

[PATCH] D139730: [OpenMP][DeviceRTL][AMDGPU] Support code object version 5

2023-08-28 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam updated this revision to Diff 553991.
saiislam marked 3 inline comments as done.
saiislam added a comment.

Minor fixes addressing reviewer's comment.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139730/new/

https://reviews.llvm.org/D139730

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/CodeGen/CodeGenModule.cpp
  clang/lib/CodeGen/CodeGenModule.h
  clang/lib/CodeGen/TargetInfo.h
  clang/lib/CodeGen/Targets/AMDGPU.cpp
  clang/lib/Driver/ToolChain.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/CodeGenCUDA/amdgpu-code-object-version-linking.cu
  clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
  clang/test/CodeGenOpenCL/opencl_types.cl
  clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
  openmp/libomptarget/DeviceRTL/CMakeLists.txt
  openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
  openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h

Index: openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
===
--- openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
+++ openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
@@ -25,6 +25,7 @@
 #include "llvm/Support/MemoryBufferRef.h"
 
 #include "llvm/Support/YAMLTraits.h"
+using namespace llvm::ELF;
 
 namespace llvm {
 namespace omp {
@@ -32,19 +33,29 @@
 namespace plugin {
 namespace utils {
 
-// The implicit arguments of AMDGPU kernels.
+// The implicit arguments of COV5 AMDGPU kernels.
 struct AMDGPUImplicitArgsTy {
-  uint64_t OffsetX;
-  uint64_t OffsetY;
-  uint64_t OffsetZ;
-  uint64_t HostcallPtr;
-  uint64_t Unused0;
-  uint64_t Unused1;
-  uint64_t Unused2;
+  uint32_t BlockCountX;
+  uint32_t BlockCountY;
+  uint32_t BlockCountZ;
+  uint16_t GroupSizeX;
+  uint16_t GroupSizeY;
+  uint16_t GroupSizeZ;
+  uint8_t Unused0[46]; // 46 byte offset.
+  uint16_t GridDims;
+  uint8_t Unused1[190]; // 190 byte offset.
 };
 
-static_assert(sizeof(AMDGPUImplicitArgsTy) == 56,
-  "Unexpected size of implicit arguments");
+// Dummy struct for COV4 implicitargs.
+struct AMDGPUImplicitArgsTyCOV4 {
+  uint8_t Unused[56];
+};
+
+uint32_t getImplicitArgsSize(uint16_t Version) {
+  return Version < ELF::ELFABIVERSION_AMDGPU_HSA_V5
+ ? sizeof(AMDGPUImplicitArgsTyCOV4)
+ : sizeof(AMDGPUImplicitArgsTy);
+}
 
 /// Parse a TargetID to get processor arch and feature map.
 /// Returns processor subarch.
@@ -295,7 +306,8 @@
 /// Reads the AMDGPU specific metadata from the ELF file and propagates the
 /// KernelInfoMap
 Error readAMDGPUMetaDataFromImage(MemoryBufferRef MemBuffer,
-  StringMap ) {
+  StringMap ,
+  uint16_t ) {
   Error Err = Error::success(); // Used later as out-parameter
 
   auto ELFOrError = object::ELF64LEFile::create(MemBuffer.getBuffer());
@@ -305,6 +317,12 @@
   const object::ELF64LEFile ELFObj = ELFOrError.get();
   ArrayRef Sections = cantFail(ELFObj.sections());
   KernelInfoReader Reader(KernelInfoMap);
+
+  // Read the code object version from ELF image header
+  auto Header = ELFObj.getHeader();
+  ELFABIVersion = (uint8_t)(Header.e_ident[ELF::EI_ABIVERSION]);
+  DP("ELFABIVERSION Version: %u\n", ELFABIVersion);
+
   for (const auto  : Sections) {
 if (S.sh_type != ELF::SHT_NOTE)
   continue;
Index: openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
===
--- openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
+++ openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -381,6 +381,9 @@
   /// Get the executable.
   hsa_executable_t getExecutable() const { return Executable; }
 
+  /// Get to Code Object Version of the ELF
+  uint16_t getELFABIVersion() const { return ELFABIVersion; }
+
   /// Find an HSA device symbol by its name on the executable.
   Expected
   findDeviceSymbol(GenericDeviceTy , StringRef SymbolName) const;
@@ -401,6 +404,7 @@
   hsa_executable_t Executable;
   hsa_code_object_t CodeObject;
   StringMap KernelInfoMap;
+  uint16_t ELFABIVersion;
 };
 
 /// Class implementing the AMDGPU kernel functionalities which derives from the
@@ -408,8 +412,7 @@
 struct AMDGPUKernelTy : public GenericKernelTy {
   /// Create an AMDGPU kernel with a name and an execution mode.
   AMDGPUKernelTy(const char *Name, OMPTgtExecModeFlags ExecutionMode)
-  : GenericKernelTy(Name, ExecutionMode),
-ImplicitArgsSize(sizeof(utils::AMDGPUImplicitArgsTy)) {}
+  : GenericKernelTy(Name, ExecutionMode) {}
 
   /// Initialize the AMDGPU kernel.
   Error initImpl(GenericDeviceTy , DeviceImageTy ) override {
@@ -450,6 +453,9 @@
 // TODO: Read the kernel descriptor for the max threads per block. May be
 // read from the image.
 
+ImplicitArgsSize = utils::getImplicitArgsSize(AMDImage.getELFABIVersion());
+DP("ELFABIVersion: %d\n",

[PATCH] D139730: [OpenMP][DeviceRTL][AMDGPU] Support code object version 5

2023-08-25 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam updated this revision to Diff 553413.
saiislam marked 2 inline comments as done.
saiislam added a comment.

Changed getImplicitArgsSize to use sizeof.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139730/new/

https://reviews.llvm.org/D139730

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/CodeGen/CodeGenModule.cpp
  clang/lib/CodeGen/CodeGenModule.h
  clang/lib/CodeGen/TargetInfo.h
  clang/lib/CodeGen/Targets/AMDGPU.cpp
  clang/lib/Driver/ToolChain.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/CodeGenCUDA/amdgpu-code-object-version-linking.cu
  clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
  clang/test/CodeGenOpenCL/opencl_types.cl
  clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
  openmp/libomptarget/DeviceRTL/CMakeLists.txt
  openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
  openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h

Index: openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
===
--- openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
+++ openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
@@ -25,6 +25,7 @@
 #include "llvm/Support/MemoryBufferRef.h"
 
 #include "llvm/Support/YAMLTraits.h"
+using namespace llvm::ELF;
 
 namespace llvm {
 namespace omp {
@@ -32,19 +33,29 @@
 namespace plugin {
 namespace utils {
 
-// The implicit arguments of AMDGPU kernels.
+// The implicit arguments of COV5 AMDGPU kernels.
 struct AMDGPUImplicitArgsTy {
-  uint64_t OffsetX;
-  uint64_t OffsetY;
-  uint64_t OffsetZ;
-  uint64_t HostcallPtr;
-  uint64_t Unused0;
-  uint64_t Unused1;
-  uint64_t Unused2;
+  uint32_t BlockCountX;
+  uint32_t BlockCountY;
+  uint32_t BlockCountZ;
+  uint16_t GroupSizeX;
+  uint16_t GroupSizeY;
+  uint16_t GroupSizeZ;
+  uint8_t Unused0[46]; // 46 byte offset.
+  uint16_t GridDims;
+  uint8_t Unused1[190]; // 190 byte offset.
 };
 
-static_assert(sizeof(AMDGPUImplicitArgsTy) == 56,
-  "Unexpected size of implicit arguments");
+// Dummy struct for COV4 implicitargs.
+struct AMDGPUImplicitArgsTyCOV4 {
+  uint8_t Unused[56];
+};
+
+uint16_t getImplicitArgsSize(uint16_t Version) {
+  return Version < ELF::ELFABIVERSION_AMDGPU_HSA_V5
+ ? sizeof(AMDGPUImplicitArgsTyCOV4)
+ : sizeof(AMDGPUImplicitArgsTy);
+}
 
 /// Parse a TargetID to get processor arch and feature map.
 /// Returns processor subarch.
@@ -295,7 +306,8 @@
 /// Reads the AMDGPU specific metadata from the ELF file and propagates the
 /// KernelInfoMap
 Error readAMDGPUMetaDataFromImage(MemoryBufferRef MemBuffer,
-  StringMap ) {
+  StringMap ,
+  uint16_t ) {
   Error Err = Error::success(); // Used later as out-parameter
 
   auto ELFOrError = object::ELF64LEFile::create(MemBuffer.getBuffer());
@@ -305,6 +317,12 @@
   const object::ELF64LEFile ELFObj = ELFOrError.get();
   ArrayRef Sections = cantFail(ELFObj.sections());
   KernelInfoReader Reader(KernelInfoMap);
+
+  // Read the code object version from ELF image header
+  auto Header = ELFObj.getHeader();
+  ELFABIVersion = (uint8_t)(Header.e_ident[ELF::EI_ABIVERSION]);
+  DP("ELFABIVERSION Version: %u\n", ELFABIVersion);
+
   for (const auto  : Sections) {
 if (S.sh_type != ELF::SHT_NOTE)
   continue;
Index: openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
===
--- openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
+++ openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -381,6 +381,9 @@
   /// Get the executable.
   hsa_executable_t getExecutable() const { return Executable; }
 
+  /// Get to Code Object Version of the ELF
+  uint16_t getELFABIVersion() const { return ELFABIVersion; }
+
   /// Find an HSA device symbol by its name on the executable.
   Expected
   findDeviceSymbol(GenericDeviceTy , StringRef SymbolName) const;
@@ -401,6 +404,7 @@
   hsa_executable_t Executable;
   hsa_code_object_t CodeObject;
   StringMap KernelInfoMap;
+  uint16_t ELFABIVersion;
 };
 
 /// Class implementing the AMDGPU kernel functionalities which derives from the
@@ -408,8 +412,7 @@
 struct AMDGPUKernelTy : public GenericKernelTy {
   /// Create an AMDGPU kernel with a name and an execution mode.
   AMDGPUKernelTy(const char *Name, OMPTgtExecModeFlags ExecutionMode)
-  : GenericKernelTy(Name, ExecutionMode),
-ImplicitArgsSize(sizeof(utils::AMDGPUImplicitArgsTy)) {}
+  : GenericKernelTy(Name, ExecutionMode) {}
 
   /// Initialize the AMDGPU kernel.
   Error initImpl(GenericDeviceTy , DeviceImageTy ) override {
@@ -450,6 +453,9 @@
 // TODO: Read the kernel descriptor for the max threads per block. May be
 // read from the image.
 
+ImplicitArgsSize = utils::getImplicitArgsSize(AMDImage.getELFABIVersion());
+DP("ELFABIVersion: %d\n",

[PATCH] D139730: [OpenMP][DeviceRTL][AMDGPU] Support code object version 5

2023-08-24 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam marked an inline comment as done.
saiislam added inline comments.



Comment at: openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h:49
 
-static_assert(sizeof(AMDGPUImplicitArgsTy) == 56,
-  "Unexpected size of implicit arguments");
+enum IMPLICITARGS : uint32_t {
+  COV4_SIZE = 56,

jhuber6 wrote:
> We should probably be using `sizeof` now that it's back to being a struct and 
> keep the old struct definition.
AMDGPU plugin doesn't use any implicitarg for COV4, but it does so for COV5. 
So, we are not keeping two separate structures for implicitargs of COV4 and 
COV5.
If we use sizeof then it will always return 256 corresponding to COV5 (even for 
cov4, which should be 56). That's why we need this function.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139730/new/

https://reviews.llvm.org/D139730

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D139730: [OpenMP][DeviceRTL][AMDGPU] Support code object version 5

2023-08-24 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam added inline comments.



Comment at: clang/lib/Driver/ToolChains/Clang.cpp:8649-8650
 
+  // code-object-version=X needs to be passed to clang-linker-wrapper to ensure
+  // that it is used by lld.
+  if (const Arg *A = Args.getLastArg(options::OPT_mcode_object_version_EQ)) {

arsenm wrote:
> so device rtl is linked once as a normal library?
No, this is command generation for clang-linker-wrapper. Since, devicertl is 
compiled only to get bitcode file (-c), it is never called.



Comment at: clang/lib/Driver/ToolChains/Clang.cpp:8653-8654
+CmdArgs.push_back(Args.MakeArgString("-mllvm"));
+CmdArgs.push_back(Args.MakeArgString(
+Twine("--amdhsa-code-object-version=") + A->getValue()));
+  }

arsenm wrote:
> Why do you need this? The code object version is supposed to come from a 
> module flag. We should be getting rid of the command line argument for it
During command generation for clang-linker-wrapper, it is required to check 
user's provided `mcode-object-version=X` so that `amdhsa-code-object-version=X` 
can be passed to the clang/lto backend.

`getAmdhsaCodeObjectVersion()` and `getHsaAbiVersion()` both still use the 
above command line argument to override user's choice of COV, instead of the 
module flag.



Comment at: clang/test/CodeGenCUDA/amdgpu-code-object-version-linking.cu:12
+// RUN: llvm-link %t_0 %t_5 -o -| llvm-dis -o - | FileCheck 
-check-prefix=LINKED5 %s
+
+#include "Inputs/cuda.h"

yaxunl wrote:
> need to test using clang -cc1 with -O3 and -mlink-builtin-bitcode to link the 
> device lib and verify the load of llvm.amdgcn.abi.version being eliminated 
> after optimization.
> 
> I think currently it cannot do that since llvm.amdgcn.abi.version is not 
> internalized by the internalization pass. This can cause some significant 
> perf drops since loading is expensive. Need to tweak the function controlling 
> what variables can be internalized for amdgpu so that this variable gets 
> internalized, or having a generic way to tell that function which variables 
> should be internalized, e.g. by adding a metadata amdgcn.internalize
load of llvm.amdgcn.abi.version is being eliminated with cc1, -O3, and 
mlink-builtin-bitcode of device lib.



Comment at: clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp:406-410
+  // pass on -mllvm options to the clang
+  for (const opt::Arg *Arg : Args.filtered(OPT_mllvm)) {
+CmdArgs.push_back("-mllvm");
+CmdArgs.push_back(Arg->getValue());
+  }

arsenm wrote:
> Shouldn't need this?
It is required so that when clang pass (not the lto backend) is called from 
clang-linker-wrapper due to `-save-temps`, user provided COV is correctly 
propagated.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139730/new/

https://reviews.llvm.org/D139730

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D139730: [OpenMP][DeviceRTL][AMDGPU] Support code object version 5

2023-08-24 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam updated this revision to Diff 553179.
saiislam marked 7 inline comments as done.
saiislam added a comment.

Updated test case to check internalization of newly inserted global variable.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139730/new/

https://reviews.llvm.org/D139730

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/CodeGen/CodeGenModule.cpp
  clang/lib/CodeGen/CodeGenModule.h
  clang/lib/CodeGen/TargetInfo.h
  clang/lib/CodeGen/Targets/AMDGPU.cpp
  clang/lib/Driver/ToolChain.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/CodeGenCUDA/amdgpu-code-object-version-linking.cu
  clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
  clang/test/CodeGenOpenCL/opencl_types.cl
  clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
  openmp/libomptarget/DeviceRTL/CMakeLists.txt
  openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
  openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h

Index: openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
===
--- openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
+++ openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
@@ -25,6 +25,7 @@
 #include "llvm/Support/MemoryBufferRef.h"
 
 #include "llvm/Support/YAMLTraits.h"
+using namespace llvm::ELF;
 
 namespace llvm {
 namespace omp {
@@ -34,17 +35,25 @@
 
 // The implicit arguments of AMDGPU kernels.
 struct AMDGPUImplicitArgsTy {
-  uint64_t OffsetX;
-  uint64_t OffsetY;
-  uint64_t OffsetZ;
-  uint64_t HostcallPtr;
-  uint64_t Unused0;
-  uint64_t Unused1;
-  uint64_t Unused2;
+  uint32_t BlockCountX;
+  uint32_t BlockCountY;
+  uint32_t BlockCountZ;
+  uint16_t GroupSizeX;
+  uint16_t GroupSizeY;
+  uint16_t GroupSizeZ;
+  uint8_t Unused0[46]; // 46 byte offset.
+  uint16_t GridDims;
+  uint8_t Unused1[190]; // 190 byte offset.
 };
 
-static_assert(sizeof(AMDGPUImplicitArgsTy) == 56,
-  "Unexpected size of implicit arguments");
+enum IMPLICITARGS : uint32_t {
+  COV4_SIZE = 56,
+  COV5_SIZE = 256,
+};
+
+uint16_t getImplicitArgsSize(uint16_t Version) {
+  return Version < ELF::ELFABIVERSION_AMDGPU_HSA_V5 ? COV4_SIZE : COV5_SIZE;
+}
 
 /// Parse a TargetID to get processor arch and feature map.
 /// Returns processor subarch.
@@ -295,7 +304,8 @@
 /// Reads the AMDGPU specific metadata from the ELF file and propagates the
 /// KernelInfoMap
 Error readAMDGPUMetaDataFromImage(MemoryBufferRef MemBuffer,
-  StringMap ) {
+  StringMap ,
+  uint16_t ) {
   Error Err = Error::success(); // Used later as out-parameter
 
   auto ELFOrError = object::ELF64LEFile::create(MemBuffer.getBuffer());
@@ -305,6 +315,12 @@
   const object::ELF64LEFile ELFObj = ELFOrError.get();
   ArrayRef Sections = cantFail(ELFObj.sections());
   KernelInfoReader Reader(KernelInfoMap);
+
+  // Read the code object version from ELF image header
+  auto Header = ELFObj.getHeader();
+  ELFABIVersion = (uint8_t)(Header.e_ident[ELF::EI_ABIVERSION]);
+  DP("ELFABIVERSION Version: %u\n", ELFABIVersion);
+
   for (const auto  : Sections) {
 if (S.sh_type != ELF::SHT_NOTE)
   continue;
Index: openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
===
--- openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
+++ openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -381,6 +381,9 @@
   /// Get the executable.
   hsa_executable_t getExecutable() const { return Executable; }
 
+  /// Get to Code Object Version of the ELF
+  uint16_t getELFABIVersion() const { return ELFABIVersion; }
+
   /// Find an HSA device symbol by its name on the executable.
   Expected
   findDeviceSymbol(GenericDeviceTy , StringRef SymbolName) const;
@@ -401,6 +404,7 @@
   hsa_executable_t Executable;
   hsa_code_object_t CodeObject;
   StringMap KernelInfoMap;
+  uint16_t ELFABIVersion;
 };
 
 /// Class implementing the AMDGPU kernel functionalities which derives from the
@@ -408,8 +412,7 @@
 struct AMDGPUKernelTy : public GenericKernelTy {
   /// Create an AMDGPU kernel with a name and an execution mode.
   AMDGPUKernelTy(const char *Name, OMPTgtExecModeFlags ExecutionMode)
-  : GenericKernelTy(Name, ExecutionMode),
-ImplicitArgsSize(sizeof(utils::AMDGPUImplicitArgsTy)) {}
+  : GenericKernelTy(Name, ExecutionMode) {}
 
   /// Initialize the AMDGPU kernel.
   Error initImpl(GenericDeviceTy , DeviceImageTy ) override {
@@ -450,6 +453,9 @@
 // TODO: Read the kernel descriptor for the max threads per block. May be
 // read from the image.
 
+ImplicitArgsSize = utils::getImplicitArgsSize(AMDImage.getELFABIVersion());
+DP("ELFABIVersion: %d\n", AMDImage.getELFABIVersion());
+
 // Get additional kernel info read from image
 KernelInfo = AMDImage.getKernelInfo(getName());
 if

[PATCH] D139730: [OpenMP][DeviceRTL][AMDGPU] Support code object version 5

2023-08-22 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam updated this revision to Diff 552344.
saiislam marked 5 inline comments as done.
saiislam added a comment.

Used CreateConstInBoundsGEP1_32 for emitting GEP statements. Changed lambda 
function to simple fucntion body for defining the global variable.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139730/new/

https://reviews.llvm.org/D139730

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/CodeGen/CodeGenModule.cpp
  clang/lib/CodeGen/CodeGenModule.h
  clang/lib/CodeGen/TargetInfo.h
  clang/lib/CodeGen/Targets/AMDGPU.cpp
  clang/lib/Driver/ToolChain.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/CodeGenCUDA/amdgpu-code-object-version-linking.cu
  clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
  clang/test/CodeGenOpenCL/opencl_types.cl
  clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
  openmp/libomptarget/DeviceRTL/CMakeLists.txt
  openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
  openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h

Index: openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
===
--- openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
+++ openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
@@ -25,6 +25,7 @@
 #include "llvm/Support/MemoryBufferRef.h"
 
 #include "llvm/Support/YAMLTraits.h"
+using namespace llvm::ELF;
 
 namespace llvm {
 namespace omp {
@@ -34,17 +35,25 @@
 
 // The implicit arguments of AMDGPU kernels.
 struct AMDGPUImplicitArgsTy {
-  uint64_t OffsetX;
-  uint64_t OffsetY;
-  uint64_t OffsetZ;
-  uint64_t HostcallPtr;
-  uint64_t Unused0;
-  uint64_t Unused1;
-  uint64_t Unused2;
+  uint32_t BlockCountX;
+  uint32_t BlockCountY;
+  uint32_t BlockCountZ;
+  uint16_t GroupSizeX;
+  uint16_t GroupSizeY;
+  uint16_t GroupSizeZ;
+  uint8_t Unused0[46]; // 46 byte offset.
+  uint16_t GridDims;
+  uint8_t Unused1[190]; // 190 byte offset.
 };
 
-static_assert(sizeof(AMDGPUImplicitArgsTy) == 56,
-  "Unexpected size of implicit arguments");
+enum IMPLICITARGS : uint32_t {
+  COV4_SIZE = 56,
+  COV5_SIZE = 256,
+};
+
+uint16_t getImplicitArgsSize(uint16_t Version) {
+  return Version < ELF::ELFABIVERSION_AMDGPU_HSA_V5 ? COV4_SIZE : COV5_SIZE;
+}
 
 /// Parse a TargetID to get processor arch and feature map.
 /// Returns processor subarch.
@@ -295,7 +304,8 @@
 /// Reads the AMDGPU specific metadata from the ELF file and propagates the
 /// KernelInfoMap
 Error readAMDGPUMetaDataFromImage(MemoryBufferRef MemBuffer,
-  StringMap ) {
+  StringMap ,
+  uint16_t ) {
   Error Err = Error::success(); // Used later as out-parameter
 
   auto ELFOrError = object::ELF64LEFile::create(MemBuffer.getBuffer());
@@ -305,6 +315,12 @@
   const object::ELF64LEFile ELFObj = ELFOrError.get();
   ArrayRef Sections = cantFail(ELFObj.sections());
   KernelInfoReader Reader(KernelInfoMap);
+
+  // Read the code object version from ELF image header
+  auto Header = ELFObj.getHeader();
+  ELFABIVersion = (uint8_t)(Header.e_ident[ELF::EI_ABIVERSION]);
+  DP("ELFABIVERSION Version: %u\n", ELFABIVersion);
+
   for (const auto  : Sections) {
 if (S.sh_type != ELF::SHT_NOTE)
   continue;
Index: openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
===
--- openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
+++ openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -381,6 +381,9 @@
   /// Get the executable.
   hsa_executable_t getExecutable() const { return Executable; }
 
+  /// Get to Code Object Version of the ELF
+  uint16_t getELFABIVersion() const { return ELFABIVersion; }
+
   /// Find an HSA device symbol by its name on the executable.
   Expected
   findDeviceSymbol(GenericDeviceTy , StringRef SymbolName) const;
@@ -401,6 +404,7 @@
   hsa_executable_t Executable;
   hsa_code_object_t CodeObject;
   StringMap KernelInfoMap;
+  uint16_t ELFABIVersion;
 };
 
 /// Class implementing the AMDGPU kernel functionalities which derives from the
@@ -408,8 +412,7 @@
 struct AMDGPUKernelTy : public GenericKernelTy {
   /// Create an AMDGPU kernel with a name and an execution mode.
   AMDGPUKernelTy(const char *Name, OMPTgtExecModeFlags ExecutionMode)
-  : GenericKernelTy(Name, ExecutionMode),
-ImplicitArgsSize(sizeof(utils::AMDGPUImplicitArgsTy)) {}
+  : GenericKernelTy(Name, ExecutionMode) {}
 
   /// Initialize the AMDGPU kernel.
   Error initImpl(GenericDeviceTy , DeviceImageTy ) override {
@@ -450,6 +453,9 @@
 // TODO: Read the kernel descriptor for the max threads per block. May be
 // read from the image.
 
+ImplicitArgsSize = utils::getImplicitArgsSize(AMDImage.getELFABIVersion());
+DP("ELFABIVersion: %d\n", AMDImage.getELFABIVersion());
+
 // Get additional kernel info read from image

[PATCH] D139730: [OpenMP][DeviceRTL][AMDGPU] Support code object version 5

2023-08-21 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam updated this revision to Diff 552085.
saiislam marked an inline comment as done.
saiislam added a comment.

Adressed reviewer's comments.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139730/new/

https://reviews.llvm.org/D139730

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/CodeGen/CodeGenModule.cpp
  clang/lib/CodeGen/CodeGenModule.h
  clang/lib/CodeGen/TargetInfo.h
  clang/lib/CodeGen/Targets/AMDGPU.cpp
  clang/lib/Driver/ToolChain.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/CodeGenCUDA/amdgpu-code-object-version-linking.cu
  clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
  clang/test/CodeGenOpenCL/opencl_types.cl
  clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
  openmp/libomptarget/DeviceRTL/CMakeLists.txt
  openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
  openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h

Index: openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
===
--- openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
+++ openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
@@ -25,6 +25,7 @@
 #include "llvm/Support/MemoryBufferRef.h"
 
 #include "llvm/Support/YAMLTraits.h"
+using namespace llvm::ELF;
 
 namespace llvm {
 namespace omp {
@@ -34,17 +35,25 @@
 
 // The implicit arguments of AMDGPU kernels.
 struct AMDGPUImplicitArgsTy {
-  uint64_t OffsetX;
-  uint64_t OffsetY;
-  uint64_t OffsetZ;
-  uint64_t HostcallPtr;
-  uint64_t Unused0;
-  uint64_t Unused1;
-  uint64_t Unused2;
+  uint32_t BlockCountX;
+  uint32_t BlockCountY;
+  uint32_t BlockCountZ;
+  uint16_t GroupSizeX;
+  uint16_t GroupSizeY;
+  uint16_t GroupSizeZ;
+  uint8_t Unused0[46]; // 46 byte offset.
+  uint16_t GridDims;
+  uint8_t Unused1[190]; // 190 byte offset.
 };
 
-static_assert(sizeof(AMDGPUImplicitArgsTy) == 56,
-  "Unexpected size of implicit arguments");
+enum IMPLICITARGS : uint32_t {
+  COV4_SIZE = 56,
+  COV5_SIZE = 256,
+};
+
+uint16_t getImplicitArgsSize(uint16_t Version) {
+  return Version < ELF::ELFABIVERSION_AMDGPU_HSA_V5 ? COV4_SIZE : COV5_SIZE;
+}
 
 /// Parse a TargetID to get processor arch and feature map.
 /// Returns processor subarch.
@@ -295,7 +304,8 @@
 /// Reads the AMDGPU specific metadata from the ELF file and propagates the
 /// KernelInfoMap
 Error readAMDGPUMetaDataFromImage(MemoryBufferRef MemBuffer,
-  StringMap ) {
+  StringMap ,
+  uint16_t ) {
   Error Err = Error::success(); // Used later as out-parameter
 
   auto ELFOrError = object::ELF64LEFile::create(MemBuffer.getBuffer());
@@ -305,6 +315,12 @@
   const object::ELF64LEFile ELFObj = ELFOrError.get();
   ArrayRef Sections = cantFail(ELFObj.sections());
   KernelInfoReader Reader(KernelInfoMap);
+
+  // Read the code object version from ELF image header
+  auto Header = ELFObj.getHeader();
+  ELFABIVersion = (uint8_t)(Header.e_ident[ELF::EI_ABIVERSION]);
+  DP("ELFABIVERSION Version: %u\n", ELFABIVersion);
+
   for (const auto  : Sections) {
 if (S.sh_type != ELF::SHT_NOTE)
   continue;
Index: openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
===
--- openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
+++ openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -381,6 +381,9 @@
   /// Get the executable.
   hsa_executable_t getExecutable() const { return Executable; }
 
+  /// Get to Code Object Version of the ELF
+  uint16_t getELFABIVersion() const { return ELFABIVersion; }
+
   /// Find an HSA device symbol by its name on the executable.
   Expected
   findDeviceSymbol(GenericDeviceTy , StringRef SymbolName) const;
@@ -401,6 +404,7 @@
   hsa_executable_t Executable;
   hsa_code_object_t CodeObject;
   StringMap KernelInfoMap;
+  uint16_t ELFABIVersion;
 };
 
 /// Class implementing the AMDGPU kernel functionalities which derives from the
@@ -408,8 +412,7 @@
 struct AMDGPUKernelTy : public GenericKernelTy {
   /// Create an AMDGPU kernel with a name and an execution mode.
   AMDGPUKernelTy(const char *Name, OMPTgtExecModeFlags ExecutionMode)
-  : GenericKernelTy(Name, ExecutionMode),
-ImplicitArgsSize(sizeof(utils::AMDGPUImplicitArgsTy)) {}
+  : GenericKernelTy(Name, ExecutionMode) {}
 
   /// Initialize the AMDGPU kernel.
   Error initImpl(GenericDeviceTy , DeviceImageTy ) override {
@@ -450,6 +453,9 @@
 // TODO: Read the kernel descriptor for the max threads per block. May be
 // read from the image.
 
+ImplicitArgsSize = utils::getImplicitArgsSize(AMDImage.getELFABIVersion());
+DP("ELFABIVersion: %d\n", AMDImage.getELFABIVersion());
+
 // Get additional kernel info read from image
 KernelInfo = AMDImage.getKernelInfo(getName());
 if (!KernelInfo.has_value())
@@ -476,6 +482,10 @@
   /// Get the

[PATCH] D139730: [OpenMP][DeviceRTL][AMDGPU] Support code object version 5

2023-08-21 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam marked 4 inline comments as done.
saiislam added inline comments.



Comment at: clang/lib/CodeGen/Targets/AMDGPU.cpp:383
+CGM.getTarget().getTargetOpts().CodeObjectVersion, /*Size=*/32,
+llvm::GlobalValue::WeakODRLinkage);
+}

yaxunl wrote:
> 
> I am not sure weak_odr linkage will work when code object version is none. 
> This will cause conflict when a module emitted with cov none is linked with a 
> module emitted with cov4 or cov5. Also, when all modules are emitted with cov 
> none, we end up with a linked module with cov none and the work group size 
> code will not work.
> 
> Probably we need to emit llvm.amdgcn.abi.version with external linkage for 
> cov none.
> 
> Another issue is that llvm.amdgcn.abi.version is not internalized. It is 
> always loaded from memory even though it is in constant address space. This 
> will cause bad performance. Considering device libs may use clang builtin for 
> workgroup size. The performance impact may be significant. To avoid 
> performance degradation, we need to internalize it as early as possible in 
> the optimization pipeline.
I tried external linkage but it didn't work. Only weak_odr is working fine.



Comment at: clang/lib/CodeGen/Targets/AMDGPU.cpp:369-386
+if (CGM.getModule().getNamedGlobal(Name))
+  return;
+
+auto *Type =
+llvm::IntegerType::getIntNTy(CGM.getModule().getContext(), Size);
+auto *GV = new llvm::GlobalVariable(
+CGM.getModule(), Type, true, Linkage,

arsenm wrote:
> You moved GetOrCreateLLVMGlobal but don't use it? 
> 
> The lamdba is unnecessary for a single local use
I am using GetOrCreateLLVMGlobal in CGBuiltin.cpp while emitting code for 
amdgpu_worgroup_size.



Comment at: clang/lib/CodeGen/Targets/AMDGPU.cpp:369-386
+if (CGM.getModule().getNamedGlobal(Name))
+  return;
+
+auto *Type =
+llvm::IntegerType::getIntNTy(CGM.getModule().getContext(), Size);
+auto *GV = new llvm::GlobalVariable(
+CGM.getModule(), Type, true, Linkage,

saiislam wrote:
> arsenm wrote:
> > You moved GetOrCreateLLVMGlobal but don't use it? 
> > 
> > The lamdba is unnecessary for a single local use
> I am using GetOrCreateLLVMGlobal in CGBuiltin.cpp while emitting code for 
> amdgpu_worgroup_size.
I was hoping that this patch will pave way for D130096, so that it can generate 
rest of the control constants using the same lambda.
I can remove this and simplify the code if you want.



Comment at: clang/lib/Driver/ToolChain.cpp:1372
+  if (SameTripleAsHost ||
+  A->getOption().matches(options::OPT_mcode_object_version_EQ))
 DAL->append(A);

arsenm wrote:
> Don't understand why this is necessary
This function creates a derived argument list for OpenMP target specific flags.
`mcode-object-version` remains unset for device compilation step if we don't 
pass it here.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139730/new/

https://reviews.llvm.org/D139730

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D139730: [OpenMP][DeviceRTL][AMDGPU] Support code object version 5

2023-08-18 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam added a comment.

In D139730#4597504 , @jhuber6 wrote:

> Some nits. I'm assuming we're getting the code object in the backend now? 
> We'll need to make sure that `-Wl,--amdhsa-code-object-version` is passed to 
> the clang invocation inside of the `clang-linker-wrapper` to handle 
> `-save-temps` mode.

Clang-linker-wrapper was not passing `-mllvm` option to the clang backend.




Comment at: openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h:36
 
-// The implicit arguments of AMDGPU kernels.
-struct AMDGPUImplicitArgsTy {
-  uint64_t OffsetX;
-  uint64_t OffsetY;
-  uint64_t OffsetZ;
-  uint64_t HostcallPtr;
-  uint64_t Unused0;
-  uint64_t Unused1;
-  uint64_t Unused2;
+enum IMPLICITARGS : uint32_t {
+  COV4_SIZE = 56,

jhuber6 wrote:
> I'm still not a fan of replacing the struct. The mnemonic of having a struct 
> is much more user friendly.
> ```
> ImplicitArgsTy Args{};
> std::memset(, sizeof(ImplicitArgsTy), 0);
> ...
> ```
> If we don't use something, just make it some random bytes, e.g.
> ```
> struct ImplicitArgsTy {
>   uint64_t OffsetX;
>   uint8_t Unused[64]; // 64 byte offset.
> };
> ```
Replaced.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139730/new/

https://reviews.llvm.org/D139730

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D139730: [OpenMP][DeviceRTL][AMDGPU] Support code object version 5

2023-08-18 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam updated this revision to Diff 551597.
saiislam marked 4 inline comments as done.
saiislam added a comment.

Changed ImplitArgs implementation using struct.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139730/new/

https://reviews.llvm.org/D139730

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/CodeGen/CodeGenModule.cpp
  clang/lib/CodeGen/CodeGenModule.h
  clang/lib/CodeGen/TargetInfo.h
  clang/lib/CodeGen/Targets/AMDGPU.cpp
  clang/lib/Driver/ToolChain.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/CodeGenCUDA/amdgpu-code-object-version-linking.cu
  clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
  clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
  openmp/libomptarget/DeviceRTL/CMakeLists.txt
  openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
  openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h

Index: openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
===
--- openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
+++ openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
@@ -25,6 +25,7 @@
 #include "llvm/Support/MemoryBufferRef.h"
 
 #include "llvm/Support/YAMLTraits.h"
+using namespace llvm::ELF;
 
 namespace llvm {
 namespace omp {
@@ -34,17 +35,25 @@
 
 // The implicit arguments of AMDGPU kernels.
 struct AMDGPUImplicitArgsTy {
-  uint64_t OffsetX;
-  uint64_t OffsetY;
-  uint64_t OffsetZ;
-  uint64_t HostcallPtr;
-  uint64_t Unused0;
-  uint64_t Unused1;
-  uint64_t Unused2;
+  uint32_t BlockCountX;
+  uint32_t BlockCountY;
+  uint32_t BlockCountZ;
+  uint16_t GroupSizeX;
+  uint16_t GroupSizeY;
+  uint16_t GroupSizeZ;
+  uint8_t Unused0[46]; // 46 byte offset.
+  uint16_t GridDims;
+  uint8_t Unused1[190]; // 190 byte offset.
 };
 
-static_assert(sizeof(AMDGPUImplicitArgsTy) == 56,
-  "Unexpected size of implicit arguments");
+enum IMPLICITARGS : uint32_t {
+  COV4_SIZE = 56,
+  COV5_SIZE = 256,
+};
+
+uint16_t getImplicitArgsSize(uint16_t Version) {
+  return Version < ELF::ELFABIVERSION_AMDGPU_HSA_V5 ? COV4_SIZE : COV5_SIZE;
+}
 
 /// Parse a TargetID to get processor arch and feature map.
 /// Returns processor subarch.
@@ -295,7 +304,8 @@
 /// Reads the AMDGPU specific metadata from the ELF file and propagates the
 /// KernelInfoMap
 Error readAMDGPUMetaDataFromImage(MemoryBufferRef MemBuffer,
-  StringMap ) {
+  StringMap ,
+  uint16_t ) {
   Error Err = Error::success(); // Used later as out-parameter
 
   auto ELFOrError = object::ELF64LEFile::create(MemBuffer.getBuffer());
@@ -305,6 +315,12 @@
   const object::ELF64LEFile ELFObj = ELFOrError.get();
   ArrayRef Sections = cantFail(ELFObj.sections());
   KernelInfoReader Reader(KernelInfoMap);
+
+  // Read the code object version from ELF image header
+  auto Header = ELFObj.getHeader();
+  ELFABIVersion = (uint8_t)(Header.e_ident[ELF::EI_ABIVERSION]);
+  DP("ELFABIVERSION Version: %u\n", ELFABIVersion);
+
   for (const auto  : Sections) {
 if (S.sh_type != ELF::SHT_NOTE)
   continue;
Index: openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
===
--- openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
+++ openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -381,6 +381,9 @@
   /// Get the executable.
   hsa_executable_t getExecutable() const { return Executable; }
 
+  /// Get to Code Object Version of the ELF
+  uint16_t getELFABIVersion() const { return ELFABIVersion; }
+
   /// Find an HSA device symbol by its name on the executable.
   Expected
   findDeviceSymbol(GenericDeviceTy , StringRef SymbolName) const;
@@ -401,6 +404,7 @@
   hsa_executable_t Executable;
   hsa_code_object_t CodeObject;
   StringMap KernelInfoMap;
+  uint16_t ELFABIVersion;
 };
 
 /// Class implementing the AMDGPU kernel functionalities which derives from the
@@ -408,8 +412,7 @@
 struct AMDGPUKernelTy : public GenericKernelTy {
   /// Create an AMDGPU kernel with a name and an execution mode.
   AMDGPUKernelTy(const char *Name, OMPTgtExecModeFlags ExecutionMode)
-  : GenericKernelTy(Name, ExecutionMode),
-ImplicitArgsSize(sizeof(utils::AMDGPUImplicitArgsTy)) {}
+  : GenericKernelTy(Name, ExecutionMode) {}
 
   /// Initialize the AMDGPU kernel.
   Error initImpl(GenericDeviceTy , DeviceImageTy ) override {
@@ -450,6 +453,12 @@
 // TODO: Read the kernel descriptor for the max threads per block. May be
 // read from the image.
 
+ImplicitArgsSize =
+(AMDImage.getELFABIVersion() < llvm::ELF::ELFABIVERSION_AMDGPU_HSA_V5)
+? utils::COV4_SIZE
+: utils::COV5_SIZE;
+DP("ELFABIVersion: %d\n", AMDImage.getELFABIVersion());
+
 // Get additional kernel info read from image
 KernelInfo = AMDImage.getKernelInfo(getName());

[PATCH] D139730: [OpenMP][DeviceRTL][AMDGPU] Support code object version 5

2023-08-17 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam added inline comments.



Comment at: clang/lib/CodeGen/CGBuiltin.cpp:17143-17145
+  llvm::LoadInst *LD;
+  Constant *Offset, *Offset1;
+  Value *DP, *DP1;

arsenm wrote:
> Move down to define and initialize
There are multiple uses of the same identifier. Defining them four times looks 
odd.



Comment at: openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp:2550-2551
+Error Err = retrieveAllMemoryPools();
+if (Err)
+  return Plugin::error("Unable to retieve all memmory pools");
+

jhuber6 wrote:
> This and below isn't correct. You can't discard an `llvm::Error` value like 
> this without either doing `consumeError(std::move(Err))` or 
> `toString(std::move(Err))`. However, you don't need to consume these in the 
> first place, they already contain the error message from the callee and 
> should just be forwarded.
Removed the logic for preallocatedheap.



Comment at: openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp:1752
+// if (auto Err = preAllocateDeviceMemoryPool())
+//   return Err;
+

jhuber6 wrote:
> saiislam wrote:
> > jhuber6 wrote:
> > > Leftoever?
> > No, it is not a left over.
> > One of the fields in cov5 implicitikernarg is heap_v1 ptr. It should point 
> > to a 128KB zero-initialized block of coarse-grained memory on each device 
> > before launching the kernel. This code was working a while ago, but right 
> > now it is failing most likely due to some latest change in devicertl memory 
> > handling mechanism.
> > I need to debug it with this patch, otherwise it will cause all target 
> > region code calling device-malloc to fail.
> > I will try to fix it before the next revision.
> Do we really need that? We only use a fraction of the existing implicit 
> arguments. My understanding is that most of these are more for runtime 
> handling for HIP and OpenCL while we would most likely want our own solution. 
> I'm assuming that the 128KB is not required for anything we use?
I have removed the preallocatedheap work from this patch.



Comment at: openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h:36
 
-// The implicit arguments of AMDGPU kernels.
-struct AMDGPUImplicitArgsTy {
-  uint64_t OffsetX;
-  uint64_t OffsetY;
-  uint64_t OffsetZ;
-  uint64_t HostcallPtr;
-  uint64_t Unused0;
-  uint64_t Unused1;
-  uint64_t Unused2;
+enum IMPLICITARGS : uint32_t {
+  COV4_SIZE = 56,

jhuber6 wrote:
> saiislam wrote:
> > jhuber6 wrote:
> > > arsenm wrote:
> > > > This is getting duplicated a few places, should it move to a support 
> > > > header?
> > > > 
> > > > I don't love the existing APIs for this, I think a struct definition 
> > > > makes more sense
> > > The other user here is my custom loader, @JonChesterfield has talked 
> > > about wanting a common HSA helper header for awhile now.
> > > 
> > > I agree that the struct definition is much better. Being able to simply 
> > > allocate this size and then zero fill it is much cleaner.
> > Defining a struct for whole 256 byte of implicitargs in cov5 was becoming a 
> > little difficult due to different sizes of various fields (2, 4, 6, 8, 48, 
> > 72 bytes) along with multiple reserved fields in between. It made sense for 
> > cov4 because it only had 7 fields of 8 bytes each, where we needed only 4th 
> > field in OpenMP runtime (for hostcall_buffer).
> > 
> > Offset based lookups like the following allows handling/exposing only 
> > required fields across generations of ABI.
> If we don't use it, just put it as `unused`. It's really hard to read as-is 
> and it makes it more difficult to just zero fill.
I have reduced the fields to bare minimum required for OpenMP.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139730/new/

https://reviews.llvm.org/D139730

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D139730: [OpenMP][DeviceRTL][AMDGPU] Support code object version 5

2023-08-17 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam updated this revision to Diff 551266.
saiislam marked 6 inline comments as done.
saiislam added a comment.

Updated the patch as per reviewers comments.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139730/new/

https://reviews.llvm.org/D139730

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/CodeGen/CodeGenModule.cpp
  clang/lib/CodeGen/CodeGenModule.h
  clang/lib/CodeGen/TargetInfo.h
  clang/lib/CodeGen/Targets/AMDGPU.cpp
  clang/lib/Driver/ToolChain.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/CodeGenCUDA/amdgpu-code-object-version-linking.cu
  clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
  openmp/libomptarget/DeviceRTL/CMakeLists.txt
  openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
  openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h

Index: openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
===
--- openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
+++ openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
@@ -25,6 +25,7 @@
 #include "llvm/Support/MemoryBufferRef.h"
 
 #include "llvm/Support/YAMLTraits.h"
+using namespace llvm::ELF;
 
 namespace llvm {
 namespace omp {
@@ -32,19 +33,29 @@
 namespace plugin {
 namespace utils {
 
-// The implicit arguments of AMDGPU kernels.
-struct AMDGPUImplicitArgsTy {
-  uint64_t OffsetX;
-  uint64_t OffsetY;
-  uint64_t OffsetZ;
-  uint64_t HostcallPtr;
-  uint64_t Unused0;
-  uint64_t Unused1;
-  uint64_t Unused2;
+enum IMPLICITARGS : uint32_t {
+  COV4_SIZE = 56,
+  COV5_SIZE = 256,
+
+  COV5_BLOCK_COUNT_X_OFFSET = 0,
+  COV5_BLOCK_COUNT_X_SIZE = 4,
+
+  COV5_GROUP_SIZE_X_OFFSET = 12,
+  COV5_GROUP_SIZE_X_SIZE = 2,
+
+  COV5_GROUP_SIZE_Y_OFFSET = 14,
+  COV5_GROUP_SIZE_Y_SIZE = 2,
+
+  COV5_GROUP_SIZE_Z_OFFSET = 16,
+  COV5_GROUP_SIZE_Z_SIZE = 2,
+
+  COV5_GRID_DIMS_OFFSET = 64,
+  COV5_GRID_DIMS_SIZE = 2,
 };
 
-static_assert(sizeof(AMDGPUImplicitArgsTy) == 56,
-  "Unexpected size of implicit arguments");
+uint16_t getImplicitArgsSize(uint16_t Version) {
+  return Version < ELF::ELFABIVERSION_AMDGPU_HSA_V5 ? 56 : 256;
+}
 
 /// Parse a TargetID to get processor arch and feature map.
 /// Returns processor subarch.
@@ -295,7 +306,8 @@
 /// Reads the AMDGPU specific metadata from the ELF file and propagates the
 /// KernelInfoMap
 Error readAMDGPUMetaDataFromImage(MemoryBufferRef MemBuffer,
-  StringMap ) {
+  StringMap ,
+  uint16_t ) {
   Error Err = Error::success(); // Used later as out-parameter
 
   auto ELFOrError = object::ELF64LEFile::create(MemBuffer.getBuffer());
@@ -305,6 +317,12 @@
   const object::ELF64LEFile ELFObj = ELFOrError.get();
   ArrayRef Sections = cantFail(ELFObj.sections());
   KernelInfoReader Reader(KernelInfoMap);
+
+  // Read the code object version from ELF image header
+  auto Header = ELFObj.getHeader();
+  ELFABIVersion = (uint8_t)(Header.e_ident[ELF::EI_ABIVERSION]);
+  DP("ELFABIVERSION Version: %u\n", ELFABIVersion);
+
   for (const auto  : Sections) {
 if (S.sh_type != ELF::SHT_NOTE)
   continue;
Index: openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
===
--- openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
+++ openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -253,6 +253,13 @@
 return Plugin::check(Status, "Error in hsa_amd_agents_allow_access: %s");
   }
 
+  Error zeroInitializeMemory(void *Ptr, size_t Size) {
+uint64_t Rounded = sizeof(uint32_t) * ((Size + 3) / sizeof(uint32_t));
+hsa_status_t Status =
+hsa_amd_memory_fill(Ptr, 0, Rounded / sizeof(uint32_t));
+return Plugin::check(Status, "Error in hsa_amd_memory_fill: %s");
+  }
+
   /// Get attribute from the memory pool.
   template 
   Error getAttr(hsa_amd_memory_pool_info_t Kind, Ty ) const {
@@ -381,6 +388,9 @@
   /// Get the executable.
   hsa_executable_t getExecutable() const { return Executable; }
 
+  /// Get to Code Object Version of the ELF
+  uint16_t getELFABIVersion() const { return ELFABIVersion; }
+
   /// Find an HSA device symbol by its name on the executable.
   Expected
   findDeviceSymbol(GenericDeviceTy , StringRef SymbolName) const;
@@ -401,6 +411,7 @@
   hsa_executable_t Executable;
   hsa_code_object_t CodeObject;
   StringMap KernelInfoMap;
+  uint16_t ELFABIVersion;
 };
 
 /// Class implementing the AMDGPU kernel functionalities which derives from the
@@ -408,8 +419,7 @@
 struct AMDGPUKernelTy : public GenericKernelTy {
   /// Create an AMDGPU kernel with a name and an execution mode.
   AMDGPUKernelTy(const char *Name, OMPTgtExecModeFlags ExecutionMode)
-  : GenericKernelTy(Name, ExecutionMode),
-ImplicitArgsSize(sizeof(utils::AMDGPUImplicitArgsTy)) {}
+  : GenericKernelTy(Name, ExecutionMode) {}
 
   ///

[PATCH] D156928: [Clang][AMDGPU] Fix handling of -mcode-object-version=none arg

2023-08-07 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam added a comment.

In D156928#4565506 , @jhuber6 wrote:

> In D156928#4562239 , 
> @JonChesterfield wrote:
>
>> Or, the front end could define those objects directly, without importing IR 
>> files that define the objects with the content clang used to choose the 
>> object file. E.g. instead of the argument daz=off (spelled differently) 
>> finding a file called daz.off.ll that defines variable called daz with a 
>> value 0, that argument could define that variable. I think @jhuber6 has a 
>> partial patch trying to do that.
>>
>> If we were more ambitious, we could use intrinsics that are folded reliably 
>> at O0 instead of magic variables that hopefully get constant folded. That 
>> would kill a bunch of O0 bugs.
>>
>> In general though, splicing magic variables in the front end seems unlikely 
>> to be performance critical relative to splicing them in at the start of the 
>> backend.
>
> I think @saiislam is working on a patch that will handle that. We'll have 
> `clang` emit some global that OpenMP uses.

Thanks Joseph.
Yes, I have abandoned this patch and using `-Xclang -mcode-object-version=none` 
option in the patch to enable cov5 support for OpenMP. 



Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D156928/new/

https://reviews.llvm.org/D156928

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D139730: [OpenMP][DeviceRTL][AMDGPU] Support code object version 5

2023-08-07 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam added a comment.

In D139730#4561622 , @arsenm wrote:

> In D139730#4561619 , @arsenm wrote:
>
>> In D139730#4561575 , @jhuber6 
>> wrote:
>>
>>> In D139730#4561573 , @arsenm 
>>> wrote:
>>>
 In D139730#4561540 , @jhuber6 
 wrote:

> Could you explain briefly what the approach here is? I'm confused as to 
> what's actually changed and how we're handling this difference. I thought 
> if this was just the definition of some builtin function we could just 
> rely on the backend to figure it out. Why do we need to know the code 
> object version inside the device RTL?

 The build is called in the device rtl, so the device RTL needs to contain 
 both implementations. The "backend figuring it out" is dead code 
 elimination
>>>
>>> Okay, do we expect to re-use this interface anywhere? If it's just for 
>>> OpenMP then we should probably copy the approach taken for 
>>> `__omp_rtl_debug_kind`, which is a global created on the GPU by 
>>> `CGOpenMPRuntimeGPU`'s constructor and does more or less the same thing.
>>
>> device libs replicates the same scheme using its own copy of an equivalent 
>> variable. Trying to merge those two together
>
> Although I guess that doesn't really need the builtin changes?

This builtin was already aware about cov4 and cov5. All this patch is changing 
is making it aware about a possibility where both needs to be present.
It is already used by device-libs, deviceRTL, and libc-gpu.
Also, encapsulating ABI related changes in implementation of the builtin allows 
other runtime developers to be agnostic to these lower level changes.




Comment at: clang/lib/CodeGen/CGBuiltin.cpp:17187-17188
+Address(Result, CGF.Int16Ty, CharUnits::fromQuantity(2)));
+  } else {
+if (Cov == clang::TargetOptions::COV_5) {
+  // Indexing the implicit kernarg segment.

jhuber6 wrote:
> nit.
There are a couple of common lines after the inner if-else, in the outer else 
section.



Comment at: openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp:1752
+// if (auto Err = preAllocateDeviceMemoryPool())
+//   return Err;
+

jhuber6 wrote:
> Leftoever?
No, it is not a left over.
One of the fields in cov5 implicitikernarg is heap_v1 ptr. It should point to a 
128KB zero-initialized block of coarse-grained memory on each device before 
launching the kernel. This code was working a while ago, but right now it is 
failing most likely due to some latest change in devicertl memory handling 
mechanism.
I need to debug it with this patch, otherwise it will cause all target region 
code calling device-malloc to fail.
I will try to fix it before the next revision.



Comment at: openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp:2542
+  /// Get the address of pointer to the preallocated device memory pool.
+  void **getPreAllocatedDeviceMemoryPool() {
+return 

jhuber6 wrote:
> Why do we need this? The current method shouldn't need to change if all we're 
> doing is allocating memory of greater size.
`PreAllocatedDeviceMemoryPool` is the pointer which stores the intermediate 
value before it is written to heap_v1_ptr field of cov5 implicitkernarg.



Comment at: openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp:3036
 
+  if (getImplicitArgsSize() < utils::COV5_SIZE) {
+DP("Setting fields of ImplicitArgs for COV4\n");

jhuber6 wrote:
> So we're required to emit some new arguments? I don't have any idea 
> what'schanged between this COV4 and COV5 stuff.
In cov5, we need to set certain fields of the implicit kernel arguments before 
launching the kernel.
Please see [[ 
https://llvm.org/docs/AMDGPUUsage.html#amdgpu-amdhsa-code-object-kernel-argument-metadata-map-table-v5
 | AMDHSA Code Object V5 Kernel Argument Metadata Map Additions and Changes]] 
for more details.

Only NumBlocks, NumThreads(XYZ), GridDims, and Heap_V1_ptr are relevant for us, 
so I have simplified code further.



Comment at: openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp:3037
+  if (getImplicitArgsSize() < utils::COV5_SIZE) {
+DP("Setting fields of ImplicitArgs for COV4\n");
+  } else {

arsenm wrote:
> This isn't doing anything?
Earlier we used to set hostcall_buffer here, but not anymore.
I have left the message in DP just for debug help.



Comment at: openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h:36
 
-// The implicit arguments of AMDGPU kernels.
-struct AMDGPUImplicitArgsTy {
-  uint64_t OffsetX;
-  uint64_t OffsetY;
-  uint64_t OffsetZ;
-  uint64_t HostcallPtr;
-  uint64_t Unused0;
-  uint64_t Unused1;
-  uint64_t Unused2;
+enum IMPLICITARGS :

[PATCH] D139730: [OpenMP][DeviceRTL][AMDGPU] Support code object version 5

2023-08-07 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam updated this revision to Diff 547751.
saiislam marked 5 inline comments as done.
saiislam added a comment.

Removed unused cov5 implicitargs fields.
Added comments about EmitAMDGPUWorkGroupSize and ABI-agnostica code emission.
Adressed reviewers' comments.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139730/new/

https://reviews.llvm.org/D139730

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/CodeGen/CodeGenModule.cpp
  clang/lib/CodeGen/CodeGenModule.h
  clang/lib/CodeGen/TargetInfo.h
  clang/lib/CodeGen/Targets/AMDGPU.cpp
  clang/lib/Driver/ToolChain.cpp
  openmp/libomptarget/DeviceRTL/CMakeLists.txt
  openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
  openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h

Index: openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
===
--- openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
+++ openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
@@ -25,6 +25,7 @@
 #include "llvm/Support/MemoryBufferRef.h"
 
 #include "llvm/Support/YAMLTraits.h"
+using namespace llvm::ELF;
 
 namespace llvm {
 namespace omp {
@@ -32,19 +33,35 @@
 namespace plugin {
 namespace utils {
 
-// The implicit arguments of AMDGPU kernels.
-struct AMDGPUImplicitArgsTy {
-  uint64_t OffsetX;
-  uint64_t OffsetY;
-  uint64_t OffsetZ;
-  uint64_t HostcallPtr;
-  uint64_t Unused0;
-  uint64_t Unused1;
-  uint64_t Unused2;
+enum IMPLICITARGS : uint32_t {
+  COV4_SIZE = 56,
+  COV5_SIZE = 256,
+
+  COV5_BLOCK_COUNT_X_OFFSET = 0,
+  COV5_BLOCK_COUNT_X_SIZE = 4,
+
+  COV5_GROUP_SIZE_X_OFFSET = 12,
+  COV5_GROUP_SIZE_X_SIZE = 2,
+
+  COV5_GROUP_SIZE_Y_OFFSET = 14,
+  COV5_GROUP_SIZE_Y_SIZE = 2,
+
+  COV5_GROUP_SIZE_Z_OFFSET = 16,
+  COV5_GROUP_SIZE_Z_SIZE = 2,
+
+  COV5_GRID_DIMS_OFFSET = 64,
+  COV5_GRID_DIMS_SIZE = 2,
+
+  COV5_HEAPV1_PTR_OFFSET = 96,
+  COV5_HEAPV1_PTR_SIZE = 8,
+
+  // 128 KB
+  PER_DEVICE_PREALLOC_SIZE = 131072
 };
 
-static_assert(sizeof(AMDGPUImplicitArgsTy) == 56,
-  "Unexpected size of implicit arguments");
+uint16_t getImplicitArgsSize(uint16_t Version) {
+  return Version < ELF::ELFABIVERSION_AMDGPU_HSA_V5 ? 56 : 256;
+}
 
 /// Parse a TargetID to get processor arch and feature map.
 /// Returns processor subarch.
@@ -295,7 +312,8 @@
 /// Reads the AMDGPU specific metadata from the ELF file and propagates the
 /// KernelInfoMap
 Error readAMDGPUMetaDataFromImage(MemoryBufferRef MemBuffer,
-  StringMap ) {
+  StringMap ,
+  uint16_t ) {
   Error Err = Error::success(); // Used later as out-parameter
 
   auto ELFOrError = object::ELF64LEFile::create(MemBuffer.getBuffer());
@@ -305,6 +323,12 @@
   const object::ELF64LEFile ELFObj = ELFOrError.get();
   ArrayRef Sections = cantFail(ELFObj.sections());
   KernelInfoReader Reader(KernelInfoMap);
+
+  // Read the code object version from ELF image header
+  auto Header = ELFObj.getHeader();
+  ELFABIVersion = (uint8_t)(Header.e_ident[ELF::EI_ABIVERSION]);
+  DP("ELFABIVERSION Version: %u\n", ELFABIVersion);
+
   for (const auto  : Sections) {
 if (S.sh_type != ELF::SHT_NOTE)
   continue;
Index: openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
===
--- openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
+++ openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -253,6 +253,13 @@
 return Plugin::check(Status, "Error in hsa_amd_agents_allow_access: %s");
   }
 
+  Error zeroInitializeMemory(void *Ptr, size_t Size) {
+uint64_t Rounded = sizeof(uint32_t) * ((Size + 3) / sizeof(uint32_t));
+hsa_status_t Status =
+hsa_amd_memory_fill(Ptr, 0, Rounded / sizeof(uint32_t));
+return Plugin::check(Status, "Error in hsa_amd_memory_fill: %s");
+  }
+
   /// Get attribute from the memory pool.
   template 
   Error getAttr(hsa_amd_memory_pool_info_t Kind, Ty ) const {
@@ -381,6 +388,9 @@
   /// Get the executable.
   hsa_executable_t getExecutable() const { return Executable; }
 
+  /// Get to Code Object Version of the ELF
+  uint16_t getELFABIVersion() const { return ELFABIVersion; }
+
   /// Find an HSA device symbol by its name on the executable.
   Expected
   findDeviceSymbol(GenericDeviceTy , StringRef SymbolName) const;
@@ -401,6 +411,7 @@
   hsa_executable_t Executable;
   hsa_code_object_t CodeObject;
   StringMap KernelInfoMap;
+  uint16_t ELFABIVersion;
 };
 
 /// Class implementing the AMDGPU kernel functionalities which derives from the
@@ -408,8 +419,7 @@
 struct AMDGPUKernelTy : public GenericKernelTy {
   /// Create an AMDGPU kernel with a name and an execution mode.
   AMDGPUKernelTy(const char *Name, OMPTgtExecModeFlags ExecutionMode)
-  : GenericKernelTy(Name, ExecutionMode),
-ImplicitArgsSize(sizeof(utils::AMDGPUImplicitArgsTy))

[PATCH] D139730: [OpenMP][DeviceRTL][AMDGPU] Support code object version 5

2023-08-04 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam updated this revision to Diff 547297.
saiislam added a comment.

Another attempt at cov5 support by using CodeGen for 
buitlin_amdgpu_workgroup_size.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139730/new/

https://reviews.llvm.org/D139730

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/CodeGen/CodeGenModule.cpp
  clang/lib/CodeGen/CodeGenModule.h
  clang/lib/CodeGen/TargetInfo.h
  clang/lib/CodeGen/Targets/AMDGPU.cpp
  clang/lib/Driver/ToolChain.cpp
  openmp/libomptarget/DeviceRTL/CMakeLists.txt
  openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
  openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h

Index: openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
===
--- openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
+++ openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
@@ -25,6 +25,7 @@
 #include "llvm/Support/MemoryBufferRef.h"
 
 #include "llvm/Support/YAMLTraits.h"
+using namespace llvm::ELF;
 
 namespace llvm {
 namespace omp {
@@ -32,19 +33,55 @@
 namespace plugin {
 namespace utils {
 
-// The implicit arguments of AMDGPU kernels.
-struct AMDGPUImplicitArgsTy {
-  uint64_t OffsetX;
-  uint64_t OffsetY;
-  uint64_t OffsetZ;
-  uint64_t HostcallPtr;
-  uint64_t Unused0;
-  uint64_t Unused1;
-  uint64_t Unused2;
+enum IMPLICITARGS : uint32_t {
+  COV4_SIZE = 56,
+  COV4_HOSTCALL_PTR_OFFSET = 24,
+  HOSTCALL_PTR_SIZE = 8,
+
+  COV5_SIZE = 256,
+
+  COV5_BLOCK_COUNT_X_OFFSET = 0,
+  COV5_BLOCK_COUNT_X_SIZE = 4,
+
+  COV5_BLOCK_COUNT_Y_OFFSET = 4,
+  COV5_BLOCK_COUNT_Y_SIZE = 4,
+
+  COV5_BLOCK_COUNT_Z_OFFSET = 8,
+  COV5_BLOCK_COUNT_Z_SIZE = 4,
+
+  COV5_GROUP_SIZE_X_OFFSET = 12,
+  COV5_GROUP_SIZE_X_SIZE = 2,
+
+  COV5_GROUP_SIZE_Y_OFFSET = 14,
+  COV5_GROUP_SIZE_Y_SIZE = 2,
+
+  COV5_GROUP_SIZE_Z_OFFSET = 16,
+  COV5_GROUP_SIZE_Z_SIZE = 2,
+
+  COV5_REMAINDER_X_OFFSET = 18,
+  COV5_REMAINDER_X_SIZE = 2,
+
+  COV5_REMAINDER_Y_OFFSET = 20,
+  COV5_REMAINDER_Y_SIZE = 2,
+
+  COV5_REMAINDER_Z_OFFSET = 22,
+  COV5_REMAINDER_Z_SIZE = 2,
+
+  COV5_GRID_DIMS_OFFSET = 64,
+  COV5_GRID_DIMS_SIZE = 2,
+
+  COV5_HOSTCALL_PTR_OFFSET = 80,
+
+  COV5_HEAPV1_PTR_OFFSET = 96,
+  COV5_HEAPV1_PTR_SIZE = 8,
+
+  // 128 KB
+  PER_DEVICE_PREALLOC_SIZE = 131072
 };
 
-static_assert(sizeof(AMDGPUImplicitArgsTy) == 56,
-  "Unexpected size of implicit arguments");
+uint16_t getImplicitArgsSize(uint16_t Version) {
+  return Version < ELF::ELFABIVERSION_AMDGPU_HSA_V5 ? 56 : 256;
+}
 
 /// Parse a TargetID to get processor arch and feature map.
 /// Returns processor subarch.
@@ -295,7 +332,8 @@
 /// Reads the AMDGPU specific metadata from the ELF file and propagates the
 /// KernelInfoMap
 Error readAMDGPUMetaDataFromImage(MemoryBufferRef MemBuffer,
-  StringMap ) {
+  StringMap ,
+  uint16_t ) {
   Error Err = Error::success(); // Used later as out-parameter
 
   auto ELFOrError = object::ELF64LEFile::create(MemBuffer.getBuffer());
@@ -305,6 +343,12 @@
   const object::ELF64LEFile ELFObj = ELFOrError.get();
   ArrayRef Sections = cantFail(ELFObj.sections());
   KernelInfoReader Reader(KernelInfoMap);
+
+  // Read the code object version from ELF image header
+  auto Header = ELFObj.getHeader();
+  ELFABIVersion = (uint8_t)(Header.e_ident[ELF::EI_ABIVERSION]);
+  DP("ELFABIVERSION Version: %u\n", ELFABIVersion);
+
   for (const auto  : Sections) {
 if (S.sh_type != ELF::SHT_NOTE)
   continue;
Index: openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
===
--- openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
+++ openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -253,6 +253,13 @@
 return Plugin::check(Status, "Error in hsa_amd_agents_allow_access: %s");
   }
 
+  Error zeroInitializeMemory(void *Ptr, size_t Size) {
+uint64_t Rounded = sizeof(uint32_t) * ((Size + 3) / sizeof(uint32_t));
+hsa_status_t Status =
+hsa_amd_memory_fill(Ptr, 0, Rounded / sizeof(uint32_t));
+return Plugin::check(Status, "Error in hsa_amd_memory_fill: %s");
+  }
+
   /// Get attribute from the memory pool.
   template 
   Error getAttr(hsa_amd_memory_pool_info_t Kind, Ty ) const {
@@ -381,6 +388,9 @@
   /// Get the executable.
   hsa_executable_t getExecutable() const { return Executable; }
 
+  /// Get to Code Object Version of the ELF
+  uint16_t getELFABIVersion() const { return ELFABIVersion; }
+
   /// Find an HSA device symbol by its name on the executable.
   Expected
   findDeviceSymbol(GenericDeviceTy , StringRef SymbolName) const;
@@ -401,6 +411,7 @@
   hsa_executable_t Executable;
   hsa_code_object_t CodeObject;
   StringMap KernelInfoMap;
+  uint16_t ELFABIVersion;
 };
 
 /// Class implementing the AMDGPU kernel functionalities which derives from

[PATCH] D156928: [Clang][AMDGPU] Fix handling of -mcode-object-version=none arg

2023-08-04 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam marked 2 inline comments as done.
saiislam added a comment.

In D156928#4555121 , @yaxunl wrote:

> `-mcode-object-version=none` was intentionally designed to work with `clang 
> -cc1` only, since it does not work with clang driver if users link with 
> device library. Device library can still use it by  using it with `-Xclang`.

Thanks for the tip @yaxunl . I will abandon this revision and use Xclang for 
passing cov_none to devicertl.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D156928/new/

https://reviews.llvm.org/D156928

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D156928: [Clang][AMDGPU] Fix handling of -mcode-object-version=none arg

2023-08-02 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam created this revision.
saiislam added reviewers: jhuber6, yaxunl.
Herald added subscribers: tpr, dstuttard, kzhuravl.
Herald added a project: All.
saiislam requested review of this revision.
Herald added subscribers: cfe-commits, MaskRay, wdng.
Herald added a project: clang.

-mcode-object-version=none is a special argument which allows
abi-agnostic code to be generated for device runtime libraries.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D156928

Files:
  clang/include/clang/Basic/TargetOptions.h
  clang/lib/Driver/ToolChain.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Driver/ToolChains/CommonArgs.cpp

Index: clang/lib/Driver/ToolChains/CommonArgs.cpp
===
--- clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -25,6 +25,7 @@
 #include "clang/Basic/CharInfo.h"
 #include "clang/Basic/LangOptions.h"
 #include "clang/Basic/ObjCRuntime.h"
+#include "clang/Basic/TargetOptions.h"
 #include "clang/Basic/Version.h"
 #include "clang/Config/config.h"
 #include "clang/Driver/Action.h"
@@ -2299,16 +2300,16 @@
 
 void tools::checkAMDGPUCodeObjectVersion(const Driver ,
  const llvm::opt::ArgList ) {
-  const unsigned MinCodeObjVer = 2;
-  const unsigned MaxCodeObjVer = 5;
 
   if (auto *CodeObjArg = getAMDGPUCodeObjectArgument(D, Args)) {
 if (CodeObjArg->getOption().getID() ==
 options::OPT_mcode_object_version_EQ) {
-  unsigned CodeObjVer = MaxCodeObjVer;
-  auto Remnant =
-  StringRef(CodeObjArg->getValue()).getAsInteger(0, CodeObjVer);
-  if (Remnant || CodeObjVer < MinCodeObjVer || CodeObjVer > MaxCodeObjVer)
+  unsigned CodeObjVer = TargetOptions::COV_Default / 100;
+  auto CovStr = StringRef(CodeObjArg->getValue());
+  if(CovStr.starts_with("none")) return;
+  
+  CovStr.getAsInteger(0, CodeObjVer);
+  if (CodeObjVer < TargetOptions::COV_None || CodeObjVer > TargetOptions::COV_MAX)
 D.Diag(diag::err_drv_invalid_int_value)
 << CodeObjArg->getAsString(Args) << CodeObjArg->getValue();
 }
@@ -2317,9 +2318,13 @@
 
 unsigned tools::getAMDGPUCodeObjectVersion(const Driver ,
const llvm::opt::ArgList ) {
-  unsigned CodeObjVer = 4; // default
-  if (auto *CodeObjArg = getAMDGPUCodeObjectArgument(D, Args))
-StringRef(CodeObjArg->getValue()).getAsInteger(0, CodeObjVer);
+
+  unsigned CodeObjVer = TargetOptions::COV_Default / 100; // default
+  if (haveAMDGPUCodeObjectVersionArgument(D, Args)) {
+auto CodeObjArg = StringRef(getAMDGPUCodeObjectArgument(D, Args)->getValue());
+if(CodeObjArg.starts_with("none"))  return TargetOptions::COV_None;
+CodeObjArg.getAsInteger(0, CodeObjVer);
+  }
   return CodeObjVer;
 }
 
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -1055,15 +1055,19 @@
   // provided the user (e.g. front end tests) can use the default.
   if (haveAMDGPUCodeObjectVersionArgument(D, Args)) {
 unsigned CodeObjVer = getAMDGPUCodeObjectVersion(D, Args);
+if(CodeObjVer != 0) {
 CmdArgs.insert(CmdArgs.begin() + 1,
Args.MakeArgString(Twine("--amdhsa-code-object-version=") +
   Twine(CodeObjVer)));
 CmdArgs.insert(CmdArgs.begin() + 1, "-mllvm");
+}
 // -cc1as does not accept -mcode-object-version option.
-if (!IsCC1As)
+if (!IsCC1As) {
+  std::string CodeObjVerStr = (CodeObjVer ? Twine(CodeObjVer) : "none").str();
   CmdArgs.insert(CmdArgs.begin() + 1,
  Args.MakeArgString(Twine("-mcode-object-version=") +
-Twine(CodeObjVer)));
+CodeObjVerStr));
+}
   }
 }
 
Index: clang/lib/Driver/ToolChain.cpp
===
--- clang/lib/Driver/ToolChain.cpp
+++ clang/lib/Driver/ToolChain.cpp
@@ -1354,6 +1354,9 @@
 
   // Handle -Xopenmp-target flags
   for (auto *A : Args) {
+if (A->getOption().matches(options::OPT_mcode_object_version_EQ))
+  DAL->append(A);
+
 // Exclude flags which may only apply to the host toolchain.
 // Do not exclude flags when the host triple (AuxTriple)
 // matches the current toolchain triple. If it is not present
Index: clang/include/clang/Basic/TargetOptions.h
===
--- clang/include/clang/Basic/TargetOptions.h
+++ clang/include/clang/Basic/TargetOptions.h
@@ -86,6 +86,8 @@
 COV_3 = 300,
 COV_4 = 400,
 COV_5 = 500,
+COV_Default = 400,
+COV_MAX = 500
   };
   /// \brief Code object version for AMDGPU.
   CodeObjectVersionKind CodeObjectVersion = CodeObjectVersionKind::COV_None;

[PATCH] D129635: [OpenMP] Update the default version of OpenMP to 5.1

2023-06-26 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam added a comment.

@jdoerfert @RaviNarayanaswamy @ABataev

Are there any features in this table which have been already implemented but 
have not been tagged?
https://clang.llvm.org/docs/OpenMPSupport.html#openmp-5-1-implementation-details


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D129635/new/

https://reviews.llvm.org/D129635

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D129635: [OpenMP] Update the default version of OpenMP to 5.1

2023-06-14 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam accepted this revision.
saiislam added a comment.

Thank you @animeshk-amd!
LGTM!

As discussed in the multi-company OpenMP LLVM meeting, this is the right time 
to upgrade the default OpenMP spec version to 5.1.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D129635/new/

https://reviews.llvm.org/D129635

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D150998: [OpenMP] Fix using the target ID when using the new driver

2023-05-23 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam added inline comments.



Comment at: clang/lib/Driver/ToolChains/Clang.cpp:8475
 "triple=" + TC->getTripleString(),
-"arch=" + Arch.str(),
+"arch=" + getProcessorFromTargetID(TC->getTriple(), Arch).str(),
 "kind=" + Kind.str(),

jhuber6 wrote:
> yaxunl wrote:
> > jhuber6 wrote:
> > > saiislam wrote:
> > > > Shouldn't Arch (targetID here) should be passed along instead of just 
> > > > the processor?
> > > > 
> > > > For example, `gfx90a:xnack+` and `gfx90a:xnack-` should be treated 
> > > > differently.
> > > So the problem there is that this will cause us to no longer link in 
> > > something like the OpenMP runtime library since `gfx90a` != 
> > > `gfx90a:xnack+`. Right now the behavior is that we will link them both 
> > > together since the architecture matches but then the attributes will get 
> > > resolved the same way we handle `-mattr=+x,-x`. I'm not sure what the 
> > > expected behaviour is here.
> > targetID is part of ROCm ABI as it is returned as part of Isa::GetIsaName 
> > (https://github.com/RadeonOpenCompute/ROCR-Runtime/blob/rocm-5.5.x/src/core/runtime/isa.cpp#L98)
> >  . 
> > 
> > the compatibility rule for targetID is specified by 
> > https://clang.llvm.org/docs/ClangOffloadBundler.html#target-id . For 
> > example, bundle entry with gfx90a can be consumed by device with GetIsaName 
> > gfx90a:xnack+ or gfx90a:xnack- . but bundle entry with gfx90a:xnack+ can 
> > only be consumed by device with GetIsaName gfx90a:xnack+.
> > 
> > Language runtime is supposed to do a compatibility check for bundle entry 
> > with the device GetIsaName. Isa::IsCompatible 
> > (https://github.com/RadeonOpenCompute/ROCR-Runtime/blob/3b939c398bdac0c2b9a860ff9a0ed0be0c80f911/src/core/runtime/isa.cpp#L73)
> >  can be used to do that. For convenience, language runtime is expected to 
> > use targetID for identifying bundle entries instead of re-construct 
> > targetID from features when needed.
> > 
> > targetID is also used for compatibility checks when linking bitcode.
> > 
> So what we need is some more sophisticated logic in the linker wrapper to 
> merge the binaries according to these rules. However the handling will 
> definitely require pulling this apart when we send it to LTO.
Some logic is given in [[ 
https://github.com/llvm/llvm-project/blob/111d27484132c0692c214880576dc4a37fd6d645/clang/lib/Driver/OffloadBundler.cpp#L155
 | ClangOffloadBundler  ]] and in [[ 
https://github.com/llvm/llvm-project/blob/74c2ec50f393bad8b31d0dd0bd8b2ff44d361198/openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h#L80
 | AMDGPU plugin ]]


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D150998/new/

https://reviews.llvm.org/D150998

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D150998: [OpenMP] Fix using the target ID when using the new driver

2023-05-22 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam added inline comments.



Comment at: clang/lib/Driver/ToolChains/Clang.cpp:8465-8470
+if (TC->getTriple().isAMDGPU()) {
+  for (StringRef Feature : llvm::split(Arch.split(':').second, ':')) {
+FeatureArgs.emplace_back(
+Args.MakeArgString(Feature.take_back() + Feature.drop_back()));
+  }
+}

May be use `parseTargetIDWithFormatCheckingOnly()`?



Comment at: clang/lib/Driver/ToolChains/Clang.cpp:8475
 "triple=" + TC->getTripleString(),
-"arch=" + Arch.str(),
+"arch=" + getProcessorFromTargetID(TC->getTriple(), Arch).str(),
 "kind=" + Kind.str(),

Shouldn't Arch (targetID here) should be passed along instead of just the 
processor?

For example, `gfx90a:xnack+` and `gfx90a:xnack-` should be treated differently.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D150998/new/

https://reviews.llvm.org/D150998

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D142022: [Clang][OpenMP] Fix handling of -mcode-object-version for OpenMP

2023-01-18 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam marked an inline comment as done.
saiislam added inline comments.



Comment at: clang/lib/Driver/ToolChains/Clang.cpp:7116
   if (Triple.isAMDGPU()) {
-handleAMDGPUCodeObjectVersionOptions(D, Args, CmdArgs);
+handleAMDGPUCodeObjectVersionOptions(D, C.getArgs(), CmdArgs);
 

yaxunl wrote:
> why do you need to change Args to C.getArgs() ?
You are right. Don't need it. Removed.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D142022/new/

https://reviews.llvm.org/D142022

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D142022: [Clang][OpenMP] Fix handling of -mcode-object-version for OpenMP

2023-01-18 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam updated this revision to Diff 490204.
saiislam added a comment.

Removed the unnecessary call to getArgs() and added test cases.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D142022/new/

https://reviews.llvm.org/D142022

Files:
  clang/lib/Driver/Driver.cpp
  clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
  clang/lib/Driver/ToolChains/AMDGPUOpenMP.h
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/Driver/amdgpu-openmp-toolchain.c


Index: clang/test/Driver/amdgpu-openmp-toolchain.c
===
--- clang/test/Driver/amdgpu-openmp-toolchain.c
+++ clang/test/Driver/amdgpu-openmp-toolchain.c
@@ -69,3 +69,10 @@
 // RUN:   --rocm-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode 
-fopenmp-new-driver %s  2>&1 | \
 // RUN: FileCheck %s --check-prefix=CHECK-LIB-DEVICE-NOGPULIB
 // CHECK-LIB-DEVICE-NOGPULIB-NOT: "-cc1" 
{{.*}}ocml.bc"{{.*}}ockl.bc"{{.*}}oclc_daz_opt_on.bc"{{.*}}oclc_unsafe_math_off.bc"{{.*}}oclc_finite_only_off.bc"{{.*}}oclc_correctly_rounded_sqrt_on.bc"{{.*}}oclc_wavefrontsize64_on.bc"{{.*}}oclc_isa_version_803.bc"
+
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu 
-mcode-object-version=4 -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa 
-Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 %s 2>&1 | FileCheck %s 
--check-prefix=CHECK-COV4
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu 
-mcode-object-version=5 -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa 
-Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 %s 2>&1 | FileCheck %s 
--check-prefix=CHECK-COV5
+// CHECK-COV4: "-cc1" {{.*}}oclc_abi_version_400.bc
+// CHECK-COV4-NOT: "-cc1" {{.*}}oclc_abi_version_500.bc
+// CHECK-COV5: "-cc1" {{.*}}oclc_abi_version_500.bc
+// CHECK-COV5-NOT: "-cc1" {{.*}}oclc_abi_version_400.bc
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -8087,7 +8087,8 @@
   }
 
   if (Triple.isAMDGPU())
-handleAMDGPUCodeObjectVersionOptions(D, Args, CmdArgs, /*IsCC1As=*/true);
+handleAMDGPUCodeObjectVersionOptions(D, C.getArgs(), CmdArgs,
+ /*IsCC1As=*/true);
 
   assert(Input.isFilename() && "Invalid input.");
   CmdArgs.push_back(Input.getFilename());
Index: clang/lib/Driver/ToolChains/AMDGPUOpenMP.h
===
--- clang/lib/Driver/ToolChains/AMDGPUOpenMP.h
+++ clang/lib/Driver/ToolChains/AMDGPUOpenMP.h
@@ -26,8 +26,8 @@
 : public ROCMToolChain {
 public:
   AMDGPUOpenMPToolChain(const Driver , const llvm::Triple ,
-const ToolChain ,
-const llvm::opt::ArgList );
+const ToolChain , const llvm::opt::ArgList 
,
+const llvm::opt::DerivedArgList );
 
   const llvm::Triple *getAuxTriple() const override {
 return ();
@@ -58,6 +58,7 @@
   getDeviceLibs(const llvm::opt::ArgList ) const override;
 
   const ToolChain 
+  const llvm::opt::DerivedArgList 
 };
 
 } // end namespace toolchains
Index: clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
===
--- clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -58,8 +58,9 @@
 AMDGPUOpenMPToolChain::AMDGPUOpenMPToolChain(const Driver ,
  const llvm::Triple ,
  const ToolChain ,
- const ArgList )
-: ROCMToolChain(D, Triple, Args), HostTC(HostTC) {
+ const ArgList ,
+ const DerivedArgList )
+: ROCMToolChain(D, Triple, Args), HostTC(HostTC), DerivedArgs(DerivedArgs) 
{
   // Lookup binaries into the driver directory, this is used to
   // discover the clang-offload-bundler executable.
   getProgramPaths().push_back(getDriver().Dir);
@@ -190,7 +191,7 @@
   getTriple(), Args.getLastArgValue(options::OPT_march_EQ));
 
   SmallVector BCLibs;
-  for (auto BCLib : getCommonDeviceLibNames(Args, GpuArch.str(),
+  for (auto BCLib : getCommonDeviceLibNames(DerivedArgs, GpuArch.str(),
 /*IsOpenMP=*/true))
 BCLibs.emplace_back(BCLib);
 
Index: clang/lib/Driver/Driver.cpp
===
--- clang/lib/Driver/Driver.cpp
+++ clang/lib/Driver/Driver.cpp
@@ -872,7 +872,7 @@
   }
   if (AMDTriple) {
 auto TempTC = std::make_unique(
-*this, *AMDTriple, *HostTC, C.getInputArgs());
+*this, *AMDTriple, *HostTC, C.getInputArgs(), C.getArgs());
 for (StringRef Arch : getOffloadArchs(
  C, C.getArgs(), Action::OFK_OpenMP, &*TempTC, true))

[PATCH] D139730: [OpenMP][DeviceRTL][AMDGPU] Support code object version 5

2023-01-18 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam marked an inline comment as done.
saiislam added inline comments.



Comment at: clang/lib/Driver/ToolChains/Clang.cpp:7085
   if (Triple.isAMDGPU()) {
-handleAMDGPUCodeObjectVersionOptions(D, Args, CmdArgs);
+handleAMDGPUCodeObjectVersionOptions(D, C.getArgs(), CmdArgs,
+ /*IsCC1As=*/true);

yaxunl wrote:
> Any reason you need the original args? This will bypass the driver 
> translation, which should not in normal cases.
We need derived args to look for mcode-object-version. I have created a 
separate review for this change. Please have a look at D142022


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139730/new/

https://reviews.llvm.org/D139730

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D142022: [Clang][OpenMP] Fix handling of -mcode-object-version for OpenMP

2023-01-18 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam created this revision.
saiislam added reviewers: jhuber6, yaxunl.
Herald added subscribers: kosarev, kerbowa, guansong, tpr, jvesely.
Herald added a project: All.
saiislam requested review of this revision.
Herald added subscribers: cfe-commits, sstefan1, MaskRay.
Herald added a reviewer: jdoerfert.
Herald added a project: clang.

Code object version flag for AMDGPU was not being honored by the
driver. It was required to be passed as derived arg so that correct
bitcode library can be linked.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D142022

Files:
  clang/lib/Driver/Driver.cpp
  clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
  clang/lib/Driver/ToolChains/AMDGPUOpenMP.h
  clang/lib/Driver/ToolChains/Clang.cpp


Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -7113,7 +7113,7 @@
   }
 
   if (Triple.isAMDGPU()) {
-handleAMDGPUCodeObjectVersionOptions(D, Args, CmdArgs);
+handleAMDGPUCodeObjectVersionOptions(D, C.getArgs(), CmdArgs);
 
 Args.addOptInFlag(CmdArgs, options::OPT_munsafe_fp_atomics,
   options::OPT_mno_unsafe_fp_atomics);
@@ -8087,7 +8087,8 @@
   }
 
   if (Triple.isAMDGPU())
-handleAMDGPUCodeObjectVersionOptions(D, Args, CmdArgs, /*IsCC1As=*/true);
+handleAMDGPUCodeObjectVersionOptions(D, C.getArgs(), CmdArgs,
+ /*IsCC1As=*/true);
 
   assert(Input.isFilename() && "Invalid input.");
   CmdArgs.push_back(Input.getFilename());
Index: clang/lib/Driver/ToolChains/AMDGPUOpenMP.h
===
--- clang/lib/Driver/ToolChains/AMDGPUOpenMP.h
+++ clang/lib/Driver/ToolChains/AMDGPUOpenMP.h
@@ -26,8 +26,8 @@
 : public ROCMToolChain {
 public:
   AMDGPUOpenMPToolChain(const Driver , const llvm::Triple ,
-const ToolChain ,
-const llvm::opt::ArgList );
+const ToolChain , const llvm::opt::ArgList 
,
+const llvm::opt::DerivedArgList );
 
   const llvm::Triple *getAuxTriple() const override {
 return ();
@@ -58,6 +58,7 @@
   getDeviceLibs(const llvm::opt::ArgList ) const override;
 
   const ToolChain 
+  const llvm::opt::DerivedArgList 
 };
 
 } // end namespace toolchains
Index: clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
===
--- clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -58,8 +58,9 @@
 AMDGPUOpenMPToolChain::AMDGPUOpenMPToolChain(const Driver ,
  const llvm::Triple ,
  const ToolChain ,
- const ArgList )
-: ROCMToolChain(D, Triple, Args), HostTC(HostTC) {
+ const ArgList ,
+ const DerivedArgList )
+: ROCMToolChain(D, Triple, Args), HostTC(HostTC), DerivedArgs(DerivedArgs) 
{
   // Lookup binaries into the driver directory, this is used to
   // discover the clang-offload-bundler executable.
   getProgramPaths().push_back(getDriver().Dir);
@@ -190,7 +191,7 @@
   getTriple(), Args.getLastArgValue(options::OPT_march_EQ));
 
   SmallVector BCLibs;
-  for (auto BCLib : getCommonDeviceLibNames(Args, GpuArch.str(),
+  for (auto BCLib : getCommonDeviceLibNames(DerivedArgs, GpuArch.str(),
 /*IsOpenMP=*/true))
 BCLibs.emplace_back(BCLib);
 
Index: clang/lib/Driver/Driver.cpp
===
--- clang/lib/Driver/Driver.cpp
+++ clang/lib/Driver/Driver.cpp
@@ -872,7 +872,7 @@
   }
   if (AMDTriple) {
 auto TempTC = std::make_unique(
-*this, *AMDTriple, *HostTC, C.getInputArgs());
+*this, *AMDTriple, *HostTC, C.getInputArgs(), C.getArgs());
 for (StringRef Arch : getOffloadArchs(
  C, C.getArgs(), Action::OFK_OpenMP, &*TempTC, true))
   Archs.insert(Arch);
@@ -943,7 +943,7 @@
   *this, TT, *HostTC, C.getInputArgs());
 else if (TT.isAMDGCN())
   DeviceTC = std::make_unique(
-  *this, TT, *HostTC, C.getInputArgs());
+  *this, TT, *HostTC, C.getInputArgs(), C.getArgs());
 else
   assert(DeviceTC && "Device toolchain not defined.");
   }


Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -7113,7 +7113,7 @@
   }
 
   if (Triple.isAMDGPU()) {
-handleAMDGPUCodeObjectVersionOptions(D, Args, CmdArgs);
+handleAMDGPUCodeObjectVersionOptions(D,

[PATCH] D139730: [OpenMP][DeviceRTL][AMDGPU] Support code object version 5

2023-01-04 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam added inline comments.



Comment at: openmp/libomptarget/DeviceRTL/src/Mapping.cpp:50
 
-uint32_t getNumHardwareThreadsInBlock() {
-  return __builtin_amdgcn_workgroup_size_x();
-}
+uint32_t getNumHardwareThreadsInBlock() { return external_get_local_size(0); }
 

If we still don't want to depend on rocm-device-libs then we will have to do 
something like (haven't tried this code yet):

```
uint32_t getNumHardwareThreadsInBlock() {
   if (__oclc_ABI_version < 500) {
  return __builtin_amdgcn_workgroup_size_x();
   } else {
  void *implicitArgPtr = __builtin_amdgcn_implicitarg_ptr();
  return (ushort)implicitArgPtr[6];
}
```



Comment at: openmp/libomptarget/DeviceRTL/src/Mapping.cpp:80
 
-uint32_t getNumberOfBlocks() {
-  return __builtin_amdgcn_grid_size_x() / __builtin_amdgcn_workgroup_size_x();
-}
+uint32_t getNumberOfBlocks() { return external_get_num_groups(0); }
 

```
uint32_t getNumberOfBlocks() {
   if (__oclc_ABI_version < 500) {
  return __builtin_amdgcn_grid_size_x() / 
__builtin_amdgcn_workgroup_size_x();
   } else {
  void *implicitArgPtr = __builtin_amdgcn_implicitarg_ptr();
  return (uint)implicitArgPtr[0];
}
```


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139730/new/

https://reviews.llvm.org/D139730

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D139730: [OpenMP][DeviceRTL][AMDGPU] Support code object version 5

2023-01-04 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam added a comment.

Thanks everyone for your review and comments!
I am going to address all of them in a series of smaller patches starting with 
D140784 .


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139730/new/

https://reviews.llvm.org/D139730

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D139730: [OpenMP][DeviceRTL][AMDGPU] Support code object version 5

2022-12-09 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam created this revision.
saiislam added reviewers: jdoerfert, JonChesterfield, jhuber6, yaxunl.
Herald added subscribers: kosarev, kerbowa, guansong, tpr, dstuttard, jvesely, 
kzhuravl.
Herald added a project: All.
saiislam requested review of this revision.
Herald added subscribers: openmp-commits, cfe-commits, sstefan1, MaskRay, wdng.
Herald added projects: clang, OpenMP.

Update DeviceRTL and the AMDGPU plugin to use code
object version 5. Default is code object version 4.

DeviceRTL uses rocm-device-libs instead of directly calling
amdgcn builtins for the functions which are affected by
cov5.

AMDGPU plugin queries the ELF for code object version
and then prepares various implicitargs accordingly.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D139730

Files:
  clang/lib/Driver/Driver.cpp
  clang/lib/Driver/ToolChains/AMDGPU.cpp
  clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
  clang/lib/Driver/ToolChains/AMDGPUOpenMP.h
  clang/lib/Driver/ToolChains/Clang.cpp
  openmp/libomptarget/DeviceRTL/include/Interface.h
  openmp/libomptarget/DeviceRTL/src/Mapping.cpp
  openmp/libomptarget/DeviceRTL/src/State.cpp
  openmp/libomptarget/plugins/amdgpu/impl/get_elf_mach_gfx_name.cpp
  openmp/libomptarget/plugins/amdgpu/impl/get_elf_mach_gfx_name.h
  openmp/libomptarget/plugins/amdgpu/impl/internal.h
  openmp/libomptarget/plugins/amdgpu/impl/system.cpp
  openmp/libomptarget/plugins/amdgpu/src/rtl.cpp

Index: openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
===
--- openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
+++ openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
@@ -124,9 +124,10 @@
   uint32_t KernargSegmentSize;
   void *KernargRegion = nullptr;
   std::queue FreeKernargSegments;
+  uint16_t CodeObjectVersion;
 
   uint32_t kernargSizeIncludingImplicit() {
-return KernargSegmentSize + sizeof(impl_implicit_args_t);
+return KernargSegmentSize + implicitArgsSize(CodeObjectVersion);
   }
 
   ~KernelArgPool() {
@@ -143,8 +144,10 @@
   KernelArgPool(const KernelArgPool &) = delete;
   KernelArgPool(KernelArgPool &&) = delete;
 
-  KernelArgPool(uint32_t KernargSegmentSize, hsa_amd_memory_pool_t )
-  : KernargSegmentSize(KernargSegmentSize) {
+  KernelArgPool(uint32_t KernargSegmentSize, hsa_amd_memory_pool_t ,
+uint16_t CodeObjectVersion)
+  : KernargSegmentSize(KernargSegmentSize),
+CodeObjectVersion(CodeObjectVersion) {
 
 // impl uses one pool per kernel for all gpus, with a fixed upper size
 // preserving that exact scheme here, including the queue
@@ -228,16 +231,16 @@
   KernelTy(llvm::omp::OMPTgtExecModeFlags ExecutionMode, int16_t ConstWgSize,
int32_t DeviceId, void *CallStackAddr, const char *Name,
uint32_t KernargSegmentSize,
-   hsa_amd_memory_pool_t )
+   hsa_amd_memory_pool_t , uint16_t CodeObjectVersion)
   : ExecutionMode(ExecutionMode), ConstWGSize(ConstWgSize),
 DeviceId(DeviceId), CallStackAddr(CallStackAddr), Name(Name) {
 DP("Construct kernelinfo: ExecMode %d\n", ExecutionMode);
 
 std::string N(Name);
 if (KernelArgPoolMap.find(N) == KernelArgPoolMap.end()) {
-  KernelArgPoolMap.insert(
-  std::make_pair(N, std::unique_ptr(new KernelArgPool(
-KernargSegmentSize, KernArgMemoryPool;
+  KernelArgPoolMap.insert(std::make_pair(
+  N, std::unique_ptr(new KernelArgPool(
+ KernargSegmentSize, KernArgMemoryPool, CodeObjectVersion;
 }
   }
 };
@@ -474,6 +477,7 @@
   std::vector WarpSize;
   std::vector GPUName;
   std::vector TargetID;
+  uint16_t CodeObjectVersion;
 
   // OpenMP properties
   std::vector NumTeams;
@@ -487,6 +491,7 @@
 
   // Resource pools
   SignalPoolT FreeSignalPool;
+  std::vector PreallocatedDeviceHeap;
 
   bool HostcallRequired = false;
 
@@ -861,7 +866,6 @@
"Unexpected device id!");
 FuncGblEntries[DeviceId].emplace_back();
 FuncOrGblEntryTy  = FuncGblEntries[DeviceId].back();
-// KernelArgPoolMap.clear();
 E.Entries.clear();
 E.Table.EntriesBegin = E.Table.EntriesEnd = 0;
   }
@@ -1032,6 +1036,7 @@
 SymbolInfoTable.resize(NumberOfDevices);
 DeviceCoarseGrainedMemoryPools.resize(NumberOfDevices);
 DeviceFineGrainedMemoryPools.resize(NumberOfDevices);
+PreallocatedDeviceHeap.resize(NumberOfDevices);
 
 Err = setupDevicePools(HSAAgents);
 if (Err != HSA_STATUS_SUCCESS) {
@@ -1361,6 +1366,27 @@
   return PacketId;
 }
 
+const uint16_t getCodeObjectVersionFromELF(__tgt_device_image *Image) {
+  char *ImageBegin = (char *)Image->ImageStart;
+  size_t ImageSize = (char *)Image->ImageEnd - ImageBegin;
+
+  StringRef Buffer = StringRef(ImageBegin, ImageSize);
+  auto ElfOrErr = ObjectFile::createELFObjectFile(MemoryBufferRef(Buffer, ""),
+  /*InitContent=*/false);
+  if (!ElfOrErr) {
+REPORT("Failed to load

[PATCH] D134974: [OpenMP] Add map clause to the LIT test on use_device_addr clause

2022-10-21 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam accepted this revision.
saiislam added a comment.
This revision is now accepted and ready to land.

LGTM!


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D134974/new/

https://reviews.llvm.org/D134974

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D135724: [HIP] Fix unbundling archive

2022-10-12 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam accepted this revision.
saiislam added a comment.
This revision is now accepted and ready to land.

Thank you!
It is fixing our OpenMP smoke test failure.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D135724/new/

https://reviews.llvm.org/D135724

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D134546: [clang-offload-bundler] extracting compatible bundle entry

2022-09-23 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam added a comment.

The patch looks fine to me.
Please wait for @tra 's final review.

On a different note, can this compatibility testing logic be moved to a llvm 
library instead of clang's?
I want to use it in OpenMP's AMDGPU plugin, which now links llvm libraries by 
default.




Comment at: clang/lib/Driver/OffloadBundler.cpp:1008
+auto Output = Worklist.begin();
+for (auto E = Worklist.end(); Output != E; Output++) {
+  if (isCodeObjectCompatible(

tra wrote:
> The patch description implies that there are at least two classes of 
> compatible objects -- the ones that match exactly and the ones that are not 
> exact match, but are still compatible.
> 
> 
> Here we're iterating until we find the first compatible object. What if we 
> also have the object that matches exactly, but it's further down the list. Is 
> that a problem that we may pick one or the other, depending on the order they 
> happen to appear in the worklist? It would be good to add a test case for 
> this scenario.
Though it looks plausible, such a case is not possible.
 
From [[ https://clang.llvm.org/docs/ClangOffloadBundler.html#bundle-entry-id | 
Clang Offload Bundler's Documentation]]
> If there is an entry with a target feature specified as Any, then all entries 
> must specify that target feature as Any for the same processor.




CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D134546/new/

https://reviews.llvm.org/D134546

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D132643: [OpenMP] Extend lit test for parallel for simd construct

2022-09-12 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam accepted this revision.
saiislam added a comment.
This revision is now accepted and ready to land.

LGTM. Thank!


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D132643/new/

https://reviews.llvm.org/D132643

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D131763: [OpenMP] Add lit test for metadirective device arch inspired from sollve

2022-09-06 Thread Saiyedul Islam via Phabricator via cfe-commits

This revision was automatically updated to reflect the committed changes.
Closed by commit rG0cecc6e8e27c: [OpenMP] Add lit test for metadirective device 
arch inspired (authored by animeshk-amd, committed by saiislam).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D131763/new/

https://reviews.llvm.org/D131763

Files:
  clang/test/OpenMP/metadirective_ast_print.c
  clang/test/OpenMP/metadirective_device_arch_codegen.cpp

Index: clang/test/OpenMP/metadirective_device_arch_codegen.cpp
===
--- /dev/null
+++ clang/test/OpenMP/metadirective_device_arch_codegen.cpp
@@ -0,0 +1,65 @@
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang_cc1 -fopenmp -x c++ -w -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -fopenmp -x c++ -w -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -target-cpu gfx906 -o - | FileCheck %s
+// expected-no-diagnostics
+
+
+/*===---=== 
+
+Inspired from SOLLVE tests:
+ - 5.0/metadirective/test_metadirective_arch_is_nvidia.c
+
+
+======*/
+
+
+#define N 1024
+
+int metadirective1() {
+   
+   int v1[N], v2[N], v3[N];
+
+   int target_device_num, host_device_num, default_device;
+   int errors = 0;
+
+   #pragma omp target map(to:v1,v2) map(from:v3, target_device_num) device(default_device)
+   {
+  #pragma omp metadirective \
+   when(device={arch("amdgcn")}: teams distribute parallel for) \
+   default(parallel for)
+
+ for (int i = 0; i < N; i++) {
+	#pragma omp atomic write
+v3[i] = v1[i] * v2[i];
+ }
+   }
+
+   return errors;
+}
+
+// CHECK-LABEL: define weak_odr amdgpu_kernel void {{.+}}metadirective1
+// CHECK: entry:
+// CHECK: %{{[0-9]}} = call i32 @__kmpc_target_init
+// CHECK: user_code.entry:
+// CHECK: call void @__omp_outlined__
+// CHECK-NOT: call void @__kmpc_parallel_51
+// CHECK: ret void
+
+
+// CHECK-LABEL: define internal void @__omp_outlined__
+// CHECK: entry:
+// CHECK: call void @__kmpc_distribute_static_init
+// CHECK: omp.loop.exit:  
+// CHECK: call void @__kmpc_distribute_static_fini
+
+
+// CHECK-LABEL: define internal void @__omp_outlined__.{{[0-9]+}}
+// CHECK: entry:
+// CHECK: call void @__kmpc_for_static_init_4
+// CHECK: omp.inner.for.body:
+// CHECK: store atomic {{.*}} monotonic
+// CHECK: omp.loop.exit:
+// CHECK-NEXT: call void @__kmpc_distribute_static_fini
+// CHECK-NEXT: ret void
+
Index: clang/test/OpenMP/metadirective_ast_print.c
===
--- clang/test/OpenMP/metadirective_ast_print.c
+++ clang/test/OpenMP/metadirective_ast_print.c
@@ -1,6 +1,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -triple x86_64-unknown-linux-gnu -x c -std=c99 -ast-print %s -o - | FileCheck %s
 
 // RUN: %clang_cc1 -verify -fopenmp-simd -triple x86_64-unknown-linux-gnu -x c -std=c99 -ast-print %s -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp -triple amdgcn-amd-amdhsa -x c -std=c99 -ast-print %s -o - | FileCheck %s --check-prefix=CHECK-AMDGCN
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -triple amdgcn-amd-amdhsa -x c -std=c99 -ast-print %s -o - | FileCheck %s --check-prefix=CHECK-AMDGCN
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -57,6 +61,12 @@
 for (int j = 0; j < 16; j++)
   array[i] = i;
   }
+
+#pragma omp metadirective when(device={arch("amdgcn")}: \
+teams distribute parallel for)\
+default(parallel for)
+  for (int i = 0; i < 100; i++)
+  ;
 }
 
 // CHECK: void bar(void);
@@ -83,5 +93,7 @@
 // CHECK-NEXT: for (int i = 0; i < 16; i++) {
 // CHECK-NEXT: #pragma omp simd
 // CHECK-NEXT: for (int j = 0; j < 16; j++)
+// CHECK-AMDGCN: #pragma omp teams distribute parallel for
+// CHECK-AMDGCN-NEXT: for (int i = 0; i < 100; i++)
 
 #endif
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D131763: [OpenMP] Add lit test for metadirective device arch inspired from sollve

2022-09-05 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam accepted this revision.
saiislam added a comment.
This revision is now accepted and ready to land.

LGTM. Thanks!


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D131763/new/

https://reviews.llvm.org/D131763

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D132607: [OffloadPackager] Add ability to extract images from other file types

2022-09-05 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam accepted this revision.
saiislam added a comment.
This revision is now accepted and ready to land.

LGTM. Thanks!


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D132607/new/

https://reviews.llvm.org/D132607

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D131763: [OpenMP] Add lit test for metadirective device arch inspired from sollve

2022-08-16 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam added a comment.

In D131763#3719323 , @jdoerfert wrote:

> In D131763#3719140 , @saiislam 
> wrote:
>
>> In D131763#3719132 , @jdoerfert 
>> wrote:
>>
>>> This doesn't actually test much, only once case/compilation is covered. In 
>>> the second function nothing specific to LLVM as impl is checked.
>>
>> The second function, is the only place in llvm-project where vendor(llvm) is 
>> being tested for a non-error test.
>
> Really?
>
>   ag 'vendor\(llvm\)' clang/test/OpenMP --files-with-matches  
>   
> 
>   
>   clang/test/OpenMP/begin_declare_variant_messages.c
>   clang/test/OpenMP/begin_declare_variant_using_messages.cpp
>   clang/test/OpenMP/declare_variant_ast_print.c
>   clang/test/OpenMP/declare_variant_ast_print.cpp
>   clang/test/OpenMP/declare_variant_implementation_vendor_codegen.cpp
>   clang/test/OpenMP/declare_variant_messages.cpp
>   clang/test/OpenMP/declare_variant_mixed_codegen.cpp
>   clang/test/OpenMP/metadirective_ast_print.c
>   clang/test/OpenMP/metadirective_implementation_codegen.c
>   clang/test/OpenMP/nvptx_declare_variant_implementation_vendor_codegen.cpp
>   clang/test/OpenMP/declare_variant_messages.c
>   clang/test/OpenMP/metadirective_empty.cpp
>   clang/test/OpenMP/metadirective_implementation_codegen.cpp
>
> That said, the above function still doesn't test anything wrt. llvm as impl 
> anyway. We could just as well match amd or nvidia and the check lines still 
> match just fine.

My mistake. You are right about more tests being present. I searched in the 
wrong branch locally :-)


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D131763/new/

https://reviews.llvm.org/D131763

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D131763: [OpenMP] Add lit test for metadirective device arch inspired from sollve

2022-08-12 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam added a comment.

In D131763#3719132 , @jdoerfert wrote:

> This doesn't actually test much, only once case/compilation is covered. In 
> the second function nothing specific to LLVM as impl is checked.

The second function, is the only place in llvm-project where vendor(llvm) is 
being tested for a non-error test.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D131763/new/

https://reviews.llvm.org/D131763

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D129635: [OpenMP] Update the default version of OpenMP to 5.1

2022-07-27 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam accepted this revision.
saiislam added a comment.

Thanks, LGTM!

In the multi-company OpenMP meeting, it was decided to defer this update.
So, please don't land this patch yet.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D129635/new/

https://reviews.llvm.org/D129635

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D128090: [Clang][OpenMP] Process multi-arch compilation options given via -march

2022-07-13 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam added a comment.

In D128090#3648210 , @jhuber6 wrote:

> Sorry never noticed this revision. The purpose of this patch seems to be 
> supporting something like this
>
>   clang input.c -fopenmp -fopenmp-targets=nvptx64 -Xopenmp-target=nvptx64 
> -march=sm_70 -Xopenmp-target=nvptx64 -march=sm_80
>
> Right now the above works if you replace `-march=` with `--offload-arch=`. 
> Currently the offloading tools use a "bound" architecture to tie a specific 
> architecture with a job, which is what allows us to offload to multiple 
> architectures. If there is no bound architecture gives, we instead use the 
> `-march=` option, and if that is not present we derive it. It would be 
> possible to set the bound architecture via `-march` if we wanted to. But I'm 
> not sure if it's necessary given that it would just be an alternate syntax 
> for `--offload-arch=`.

`-Xopenmp-target -march ` used to be the only option to target a specific sub 
arch before `--offload-arch`. But, it doesn't support multiple archs.
This patch relies on infra used by `--offload-arch` to support this verbose 
method of specifying multiple archs.

Use case: people already familiar with `-Xopenmp-target -march` option are 
likely to use the same for multiple archs, until they learn about shorthand 
representation, `--offload-arch`.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D128090/new/

https://reviews.llvm.org/D128090

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D127304: [LinkerWrapper] Embed OffloadBinaries for OpenMP offloading images

2022-07-12 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam accepted this revision.
saiislam added a comment.
This revision is now accepted and ready to land.

Thanks, LGTM!


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D127304/new/

https://reviews.llvm.org/D127304

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D128090: [Clang][OpenMP] Process multi-arch compilation options given via -march

2022-06-17 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam updated this revision to Diff 438004.
saiislam added a comment.

clang-formatted.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D128090/new/

https://reviews.llvm.org/D128090

Files:
  clang/include/clang/Driver/Driver.h
  clang/lib/Driver/Driver.cpp


Index: clang/lib/Driver/Driver.cpp
===
--- clang/lib/Driver/Driver.cpp
+++ clang/lib/Driver/Driver.cpp
@@ -722,6 +722,38 @@
   return RT;
 }
 
+bool Driver::GetTargetInfoFromMArch(
+Compilation , llvm::StringMap> ) {
+  StringRef OpenMPTargetArch;
+  for (Arg *A : C.getInputArgs()) {
+if (A->getOption().matches(options::OPT_Xopenmp_target_EQ)) {
+  StringRef OpenMPTargetTriple = StringRef(A->getValue(0));
+  llvm::Triple TargetTriple(OpenMPTargetTriple);
+
+  for (auto *V : A->getValues()) {
+StringRef VStr = StringRef(V);
+if (VStr.startswith("-march=") || VStr.startswith("--march=")) {
+  OpenMPTargetArch = VStr.split('=').second;
+  CudaArch Arch = StringToCudaArch(StringRef(OpenMPTargetArch));
+  if (Arch == CudaArch::UNKNOWN) {
+C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch)
+<< OpenMPTargetArch;
+C.setContainsError();
+return false;
+  }
+
+  if (!OpenMPTargetTriple.empty() && !OpenMPTargetArch.empty()) {
+DerivedArchs[OpenMPTargetTriple].insert(OpenMPTargetArch);
+  }
+}
+A->claim();
+  }
+}
+  }
+
+  return true;
+}
+
 void Driver::CreateOffloadingDeviceToolChains(Compilation ,
   InputList ) {
 
@@ -812,6 +844,10 @@
 << OpenMPTargets->getAsString(C.getInputArgs());
 return;
   }
+  // Process legacy option -fopenmp-targets -Xopenmp-target and -march
+  auto status = GetTargetInfoFromMArch(C, DerivedArchs);
+  if (!status)
+return;
   llvm::copy(OpenMPTargets->getValues(), 
std::back_inserter(OpenMPTriples));
 } else if (C.getInputArgs().hasArg(options::OPT_offload_arch_EQ) &&
!IsHIP && !IsCuda) {
Index: clang/include/clang/Driver/Driver.h
===
--- clang/include/clang/Driver/Driver.h
+++ clang/include/clang/Driver/Driver.h
@@ -412,6 +412,11 @@
   /// current compilation. Also, update the host tool chain kind accordingly.
   void CreateOffloadingDeviceToolChains(Compilation , InputList );
 
+  /// GetTargetInfoFromMArch - extract sub-architecture from -march flag used
+  /// with -fopenmp-targets and -Xopenmp-target options.
+  bool GetTargetInfoFromMArch(
+  Compilation , llvm::StringMap> 
);
+
   /// BuildCompilation - Construct a compilation object for a command
   /// line argument vector.
   ///


Index: clang/lib/Driver/Driver.cpp
===
--- clang/lib/Driver/Driver.cpp
+++ clang/lib/Driver/Driver.cpp
@@ -722,6 +722,38 @@
   return RT;
 }
 
+bool Driver::GetTargetInfoFromMArch(
+Compilation , llvm::StringMap> ) {
+  StringRef OpenMPTargetArch;
+  for (Arg *A : C.getInputArgs()) {
+if (A->getOption().matches(options::OPT_Xopenmp_target_EQ)) {
+  StringRef OpenMPTargetTriple = StringRef(A->getValue(0));
+  llvm::Triple TargetTriple(OpenMPTargetTriple);
+
+  for (auto *V : A->getValues()) {
+StringRef VStr = StringRef(V);
+if (VStr.startswith("-march=") || VStr.startswith("--march=")) {
+  OpenMPTargetArch = VStr.split('=').second;
+  CudaArch Arch = StringToCudaArch(StringRef(OpenMPTargetArch));
+  if (Arch == CudaArch::UNKNOWN) {
+C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch)
+<< OpenMPTargetArch;
+C.setContainsError();
+return false;
+  }
+
+  if (!OpenMPTargetTriple.empty() && !OpenMPTargetArch.empty()) {
+DerivedArchs[OpenMPTargetTriple].insert(OpenMPTargetArch);
+  }
+}
+A->claim();
+  }
+}
+  }
+
+  return true;
+}
+
 void Driver::CreateOffloadingDeviceToolChains(Compilation ,
   InputList ) {
 
@@ -812,6 +844,10 @@
 << OpenMPTargets->getAsString(C.getInputArgs());
 return;
   }
+  // Process legacy option -fopenmp-targets -Xopenmp-target and -march
+  auto status = GetTargetInfoFromMArch(C, DerivedArchs);
+  if (!status)
+return;
   llvm::copy(OpenMPTargets->getValues(), std::back_inserter(OpenMPTriples));
 } else if (C.getInputArgs().hasArg(options::OPT_offload_arch_EQ) &&
!IsHIP && !IsCuda) {
Index: clang/include/clang/Driver/Driver.h
===
--- clang/include/clang/Driver/Driver.h
+++ clang/include/clang/Driver/Driver.h
@@ -412,6 +412,11 @@
   ///

[PATCH] D128090: [Clang][OpenMP] Process multi-arch compilation options given via -march

2022-06-17 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam created this revision.
saiislam added reviewers: jdoerfert, JonChesterfield, jhuber6, yaxunl.
Herald added a subscriber: guansong.
Herald added a project: All.
saiislam requested review of this revision.
Herald added subscribers: cfe-commits, sstefan1, MaskRay.
Herald added a project: clang.

Subarchitectures for multi-file compilation specified using -fopenmp-targets,
-Xopenmp-target, and -march were not getting added to the
 map `KnownArchs`.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D128090

Files:
  clang/include/clang/Driver/Driver.h
  clang/lib/Driver/Driver.cpp


Index: clang/lib/Driver/Driver.cpp
===
--- clang/lib/Driver/Driver.cpp
+++ clang/lib/Driver/Driver.cpp
@@ -722,6 +722,37 @@
   return RT;
 }
 
+bool Driver::GetTargetInfoFromMArch(Compilation , 
llvm::StringMap> ) {
+  StringRef OpenMPTargetArch;
+  for (Arg *A : C.getInputArgs()) {
+if (A->getOption().matches(options::OPT_Xopenmp_target_EQ)) {
+  StringRef OpenMPTargetTriple = StringRef(A->getValue(0));
+  llvm::Triple TargetTriple(OpenMPTargetTriple);
+
+  for (auto *V : A->getValues()) {
+StringRef VStr = StringRef(V);
+if (VStr.startswith("-march=") || VStr.startswith("--march=")) {
+  OpenMPTargetArch = VStr.split('=').second;
+  CudaArch Arch = StringToCudaArch(StringRef(OpenMPTargetArch));
+  if (Arch == CudaArch::UNKNOWN) {
+C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch)
+<< OpenMPTargetArch;
+C.setContainsError();
+return false;
+  }
+
+  if (!OpenMPTargetTriple.empty() && !OpenMPTargetArch.empty()) {
+DerivedArchs[OpenMPTargetTriple].insert(OpenMPTargetArch);
+  }
+}
+A->claim();
+  }
+}
+  }
+
+  return true;
+}
+
 void Driver::CreateOffloadingDeviceToolChains(Compilation ,
   InputList ) {
 
@@ -812,6 +843,10 @@
 << OpenMPTargets->getAsString(C.getInputArgs());
 return;
   }
+  // Process legacy option -fopenmp-targets -Xopenmp-target and -march
+  auto status = GetTargetInfoFromMArch(C, DerivedArchs);
+  if (!status)
+return;
   llvm::copy(OpenMPTargets->getValues(), 
std::back_inserter(OpenMPTriples));
 } else if (C.getInputArgs().hasArg(options::OPT_offload_arch_EQ) &&
!IsHIP && !IsCuda) {
Index: clang/include/clang/Driver/Driver.h
===
--- clang/include/clang/Driver/Driver.h
+++ clang/include/clang/Driver/Driver.h
@@ -412,6 +412,10 @@
   /// current compilation. Also, update the host tool chain kind accordingly.
   void CreateOffloadingDeviceToolChains(Compilation , InputList );
 
+  /// GetTargetInfoFromMArch - extract sub-architecture from -march flag used
+  /// with -fopenmp-targets and -Xopenmp-target options.
+  bool GetTargetInfoFromMArch(Compilation , 
llvm::StringMap> );
+
   /// BuildCompilation - Construct a compilation object for a command
   /// line argument vector.
   ///


Index: clang/lib/Driver/Driver.cpp
===
--- clang/lib/Driver/Driver.cpp
+++ clang/lib/Driver/Driver.cpp
@@ -722,6 +722,37 @@
   return RT;
 }
 
+bool Driver::GetTargetInfoFromMArch(Compilation , llvm::StringMap> ) {
+  StringRef OpenMPTargetArch;
+  for (Arg *A : C.getInputArgs()) {
+if (A->getOption().matches(options::OPT_Xopenmp_target_EQ)) {
+  StringRef OpenMPTargetTriple = StringRef(A->getValue(0));
+  llvm::Triple TargetTriple(OpenMPTargetTriple);
+
+  for (auto *V : A->getValues()) {
+StringRef VStr = StringRef(V);
+if (VStr.startswith("-march=") || VStr.startswith("--march=")) {
+  OpenMPTargetArch = VStr.split('=').second;
+  CudaArch Arch = StringToCudaArch(StringRef(OpenMPTargetArch));
+  if (Arch == CudaArch::UNKNOWN) {
+C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch)
+<< OpenMPTargetArch;
+C.setContainsError();
+return false;
+  }
+
+  if (!OpenMPTargetTriple.empty() && !OpenMPTargetArch.empty()) {
+DerivedArchs[OpenMPTargetTriple].insert(OpenMPTargetArch);
+  }
+}
+A->claim();
+  }
+}
+  }
+
+  return true;
+}
+
 void Driver::CreateOffloadingDeviceToolChains(Compilation ,
   InputList ) {
 
@@ -812,6 +843,10 @@
 << OpenMPTargets->getAsString(C.getInputArgs());
 return;
   }
+  // Process legacy option -fopenmp-targets -Xopenmp-target and -march
+  auto status = GetTargetInfoFromMArch(C, DerivedArchs);
+  if (!status)
+return;
   llvm::copy(OpenMPTargets->getValues(), std::back_inserter(OpenMPTriples));
 } else if

[PATCH] D124525: [OpenMP][ClangLinkerWrapper] Extending linker wrapper to embed metadata for multi-arch fat binaries

2022-06-01 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam updated this revision to Diff 433401.
saiislam added a comment.

Added the multi-entry logic in libomptarget. Yet to move the image 
compatibility testing to plugin.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124525/new/

https://reviews.llvm.org/D124525

Files:
  clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
  clang/tools/clang-linker-wrapper/OffloadWrapper.cpp
  clang/tools/clang-linker-wrapper/OffloadWrapper.h
  clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp
  openmp/libomptarget/include/omptarget.h
  openmp/libomptarget/include/rtl.h
  openmp/libomptarget/src/exports
  openmp/libomptarget/src/interface.cpp
  openmp/libomptarget/src/rtl.cpp

Index: openmp/libomptarget/src/rtl.cpp
===
--- openmp/libomptarget/src/rtl.cpp
+++ openmp/libomptarget/src/rtl.cpp
@@ -13,6 +13,7 @@
 #include "rtl.h"
 #include "device.h"
 #include "private.h"
+//#include "llvm/OffloadArch/OffloadArch.h"
 
 #include 
 #include 
@@ -20,6 +21,8 @@
 #include 
 #include 
 #include 
+// It's strange we do not have llvm tools for openmp runtime, so we use stat
+#include 
 
 // List of all plugins that can support offloading.
 static const char *RTLNames[] = {
@@ -351,18 +354,127 @@
 initRTLonce(R);
 }
 
+/// Query runtime capabilities of this system by calling offload-arch -c
+/// offload_arch_output_buffer is persistant storage returned by this
+/// __tgt_get_active_offload_env.
+static void
+__tgt_get_active_offload_env(__tgt_active_offload_env *active_env,
+ char *offload_arch_output_buffer,
+ size_t offload_arch_output_buffer_size) {
+
+  // If OFFLOAD_ARCH_OVERRIDE env varible is present then use its value instead
+  // of querying it using LLVMOffloadArch library.
+  if (char *OffloadArchEnvVar = getenv("OFFLOAD_ARCH_OVERRIDE")) {
+if (OffloadArchEnvVar) {
+  active_env->capabilities = OffloadArchEnvVar;
+  return;
+}
+  }
+  // Qget runtime capabilities of this system with libLLVMOffloadArch.a
+  // if (int rc = getRuntimeCapabilities(offload_arch_output_buffer,
+  // offload_arch_output_buffer_size))
+  //   return;
+  // active_env->capabilities = offload_arch_output_buffer;
+  // return;
+}
+
+std::vector _splitstrings(char *input, const char *sep) {
+  std::vector split_strings;
+  std::string s(input);
+  std::string delimiter(sep);
+  size_t pos = 0;
+  while ((pos = s.find(delimiter)) != std::string::npos) {
+if (pos != 0)
+  split_strings.push_back(s.substr(0, pos));
+s.erase(0, pos + delimiter.length());
+  }
+  if (s.length() > 1)
+split_strings.push_back(s.substr(0, s.length()));
+  return split_strings;
+}
+
+static bool _ImageIsCompatibleWithEnv(__tgt_image_info *image_info,
+  __tgt_active_offload_env *active_env) {
+  // get_image_info will return null if no image information was registered.
+  // If no image information, assume application built with old compiler and
+  // check each image.
+  if (!(image_info && image_info->image_info_version == 1))
+return true;
+
+  if (!active_env->capabilities)
+return false;
+
+  // Each runtime requirement for the compiled image is stored in
+  // the image_info->offload_arch (TargetID) string.
+  // Each runtime capability obtained from "offload-arch -c" is stored in
+  // actvie_env->capabilities (TargetID) string.
+  // If every requirement has a matching capability, then the image
+  // is compatible with active environment
+
+  std::vector reqs = _splitstrings(image_info->offload_arch, ":");
+  std::vector caps = _splitstrings(active_env->capabilities, ":");
+
+  bool is_compatible = true;
+  for (auto req : reqs) {
+bool missing_capability = true;
+for (auto capability : caps)
+  if (capability == req)
+missing_capability = false;
+if (missing_capability) {
+  DP("Image requires %s but runtime capability %s is missing.\n",
+ image_info->offload_arch, req.c_str());
+  is_compatible = false;
+}
+  }
+  return is_compatible;
+}
+
 void RTLsTy::RegisterLib(__tgt_bin_desc *desc) {
+
+  __tgt_device_image *newDeviceImages =
+  new __tgt_device_image[desc->NumDeviceImages];
+
+  for (int32_t i = 0; i < desc->NumDeviceImages; ++i) {
+newDeviceImages[i].EntriesBegin = desc->DeviceImages[i].EntriesBegin;
+newDeviceImages[i].EntriesEnd = desc->DeviceImages[i].EntriesEnd;
+newDeviceImages[i].ImageStart = desc->DeviceImages[i].ImageStart;
+newDeviceImages[i].ImageEnd = desc->DeviceImages[i].ImageEnd;
+newDeviceImages[i].ImageInfo = nullptr;
+// TODO : delete(desc->DeviceImages[i]);
+  }
+
+  desc->DeviceImages = static_cast<__tgt_device_image *>(newDeviceImages);
+
+  this->RegisterLibV2(desc);
+}
+
+#define MAX_CAPS_STR_SIZE 1024
+void RTLsTy::RegisterLibV2(__tgt_bin_desc *desc) {
+
+  // Get the

[PATCH] D124525: [OpenMP][ClangLinkerWrapper] Extending linker wrapper to embed metadata for multi-arch fat binaries

2022-05-26 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam added a comment.

Thanks for the detailed review. I will update rest of the patch soon.




Comment at: clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp:613-614
   CmdArgs.push_back("-shared");
+  std::string ArchArg = std::string("-plugin-opt=mcpu=").append(Arch.str());
+  CmdArgs.push_back(ArchArg);
   CmdArgs.push_back("-o");

jhuber6 wrote:
> Does this just pass the architecture to the AMD link? Is this related? If not 
> move it to a separate patch and I'll review it.
Yes, it passes the architecture to AMD link.



Comment at: clang/tools/clang-linker-wrapper/OffloadWrapper.cpp:229-232
+// store value of these variables (i.e. offload archs) into a custom
+// section which will be used by "offload-arch -f". It won't be
+// removed during binary stripping.
+GV->setSection(".offload_arch_list");

jhuber6 wrote:
> Why does this need a custom section? We should just use it like this, not 
> sure why we need these to  be anything but some internal struct.
> ```
> for (auto *Image : BinDesc->Images) {
>   if (Image->Info)
>// Use Info
> }
> ```
Custom section is required so that it survives binary stripping done by various 
OSes. `Offload-arch` tool will look into this section of the binary to print 
list of supported architectures.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124525/new/

https://reviews.llvm.org/D124525

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D124525: [OpenMP][ClangLinkerWrapper] Extending linker wrapper to embed metadata for multi-arch fat binaries

2022-05-26 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam updated this revision to Diff 432352.
saiislam marked 8 inline comments as done.
saiislam added a comment.

Addressed some simple review changes. Will update remaining in the next 
iteration.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124525/new/

https://reviews.llvm.org/D124525

Files:
  clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
  clang/tools/clang-linker-wrapper/OffloadWrapper.cpp
  clang/tools/clang-linker-wrapper/OffloadWrapper.h
  openmp/libomptarget/include/omptarget.h
  openmp/libomptarget/src/rtl.cpp

Index: openmp/libomptarget/src/rtl.cpp
===
--- openmp/libomptarget/src/rtl.cpp
+++ openmp/libomptarget/src/rtl.cpp
@@ -13,6 +13,7 @@
 #include "rtl.h"
 #include "device.h"
 #include "private.h"
+//#include "llvm/OffloadArch/OffloadArch.h"
 
 #include 
 #include 
@@ -20,6 +21,8 @@
 #include 
 #include 
 #include 
+// It's strange we do not have llvm tools for openmp runtime, so we use stat
+#include 
 
 // List of all plugins that can support offloading.
 static const char *RTLNames[] = {
@@ -351,18 +354,108 @@
 initRTLonce(R);
 }
 
+/// Query runtime capabilities of this system by calling offload-arch -c
+/// offload_arch_output_buffer is persistant storage returned by this
+/// __tgt_get_active_offload_env.
+static void
+__tgt_get_active_offload_env(__tgt_active_offload_env *active_env,
+ char *offload_arch_output_buffer,
+ size_t offload_arch_output_buffer_size) {
+
+  // If OFFLOAD_ARCH_OVERRIDE env varible is present then use its value instead
+  // of querying it using LLVMOffloadArch library.
+  if (char *OffloadArchEnvVar = getenv("OFFLOAD_ARCH_OVERRIDE")) {
+if (OffloadArchEnvVar) {
+  active_env->capabilities = OffloadArchEnvVar;
+  return;
+}
+  }
+  // Qget runtime capabilities of this system with libLLVMOffloadArch.a
+  // if (int rc = getRuntimeCapabilities(offload_arch_output_buffer,
+  // offload_arch_output_buffer_size))
+  //   return;
+  // active_env->capabilities = offload_arch_output_buffer;
+  // return;
+}
+
+std::vector _splitstrings(char *input, const char *sep) {
+  std::vector split_strings;
+  std::string s(input);
+  std::string delimiter(sep);
+  size_t pos = 0;
+  while ((pos = s.find(delimiter)) != std::string::npos) {
+if (pos != 0)
+  split_strings.push_back(s.substr(0, pos));
+s.erase(0, pos + delimiter.length());
+  }
+  if (s.length() > 1)
+split_strings.push_back(s.substr(0, s.length()));
+  return split_strings;
+}
+
+static bool _ImageIsCompatibleWithEnv(__tgt_image_info *image_info,
+  __tgt_active_offload_env *active_env) {
+  // get_image_info will return null if no image information was registered.
+  // If no image information, assume application built with old compiler and
+  // check each image.
+  if (!image_info)
+return true;
+
+  if (!active_env->capabilities)
+return false;
+
+  // Each runtime requirement for the compiled image is stored in
+  // the image_info->offload_arch (TargetID) string.
+  // Each runtime capability obtained from "offload-arch -c" is stored in
+  // actvie_env->capabilities (TargetID) string.
+  // If every requirement has a matching capability, then the image
+  // is compatible with active environment
+
+  std::vector reqs = _splitstrings(image_info->offload_arch, ":");
+  std::vector caps = _splitstrings(active_env->capabilities, ":");
+
+  bool is_compatible = true;
+  for (auto req : reqs) {
+bool missing_capability = true;
+for (auto capability : caps)
+  if (capability == req)
+missing_capability = false;
+if (missing_capability) {
+  DP("Image requires %s but runtime capability %s is missing.\n",
+ image_info->offload_arch, req.c_str());
+  is_compatible = false;
+}
+  }
+  return is_compatible;
+}
+
+#define MAX_CAPS_STR_SIZE 1024
 void RTLsTy::RegisterLib(__tgt_bin_desc *desc) {
+
+  // Get the current active offload environment
+  __tgt_active_offload_env offload_env = {nullptr};
+  // Need a buffer to hold results of offload-arch -c command
+  size_t offload_arch_output_buffer_size = MAX_CAPS_STR_SIZE;
+  std::vector offload_arch_output_buffer;
+  offload_arch_output_buffer.resize(offload_arch_output_buffer_size);
+  __tgt_get_active_offload_env(_env, offload_arch_output_buffer.data(),
+   offload_arch_output_buffer_size);
+
+  RTLInfoTy *FoundRTL = NULL;
   PM->RTLsMtx.lock();
   // Register the images with the RTLs that understand them, if any.
   for (int32_t i = 0; i < desc->NumDeviceImages; ++i) {
 // Obtain the image.
 __tgt_device_image *img = >DeviceImages[i];
 
-RTLInfoTy *FoundRTL = nullptr;
-
+// Get corresponding image info offload_arch and check with runtime
+if

[PATCH] D124525: [OpenMP][ClangLinkerWrapper] Extending linker wrapper to embed metadata for multi-arch fat binaries

2022-05-25 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam updated this revision to Diff 431975.
saiislam added a comment.

Changed the embedding scheme to add ImageInfo field in __tgt_device_image.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124525/new/

https://reviews.llvm.org/D124525

Files:
  clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
  clang/tools/clang-linker-wrapper/OffloadWrapper.cpp
  clang/tools/clang-linker-wrapper/OffloadWrapper.h
  openmp/libomptarget/include/omptarget.h
  openmp/libomptarget/src/rtl.cpp

Index: openmp/libomptarget/src/rtl.cpp
===
--- openmp/libomptarget/src/rtl.cpp
+++ openmp/libomptarget/src/rtl.cpp
@@ -13,6 +13,7 @@
 #include "rtl.h"
 #include "device.h"
 #include "private.h"
+//#include "llvm/OffloadArch/OffloadArch.h"
 
 #include 
 #include 
@@ -20,6 +21,8 @@
 #include 
 #include 
 #include 
+// It's strange we do not have llvm tools for openmp runtime, so we use stat
+#include 
 
 // List of all plugins that can support offloading.
 static const char *RTLNames[] = {
@@ -351,18 +354,108 @@
 initRTLonce(R);
 }
 
+/// Query runtime capabilities of this system by calling offload-arch -c
+/// offload_arch_output_buffer is persistant storage returned by this
+/// __tgt_get_active_offload_env.
+static void
+__tgt_get_active_offload_env(__tgt_active_offload_env *active_env,
+ char *offload_arch_output_buffer,
+ size_t offload_arch_output_buffer_size) {
+
+  // If OFFLOAD_ARCH_OVERRIDE env varible is present then use its value instead
+  // of querying it using LLVMOffloadArch library.
+  if (char *OffloadArchEnvVar = getenv("OFFLOAD_ARCH_OVERRIDE")) {
+if (OffloadArchEnvVar) {
+  active_env->capabilities = OffloadArchEnvVar;
+  return;
+}
+  }
+  // Qget runtime capabilities of this system with libLLVMOffloadArch.a
+  // if (int rc = getRuntimeCapabilities(offload_arch_output_buffer,
+  // offload_arch_output_buffer_size))
+  //   return;
+  // active_env->capabilities = offload_arch_output_buffer;
+  // return;
+}
+
+std::vector _splitstrings(char *input, const char *sep) {
+  std::vector split_strings;
+  std::string s(input);
+  std::string delimiter(sep);
+  size_t pos = 0;
+  while ((pos = s.find(delimiter)) != std::string::npos) {
+if (pos != 0)
+  split_strings.push_back(s.substr(0, pos));
+s.erase(0, pos + delimiter.length());
+  }
+  if (s.length() > 1)
+split_strings.push_back(s.substr(0, s.length()));
+  return split_strings;
+}
+
+static bool _ImageIsCompatibleWithEnv(__tgt_image_info *image_info,
+  __tgt_active_offload_env *active_env) {
+  // get_image_info will return null if no image information was registered.
+  // If no image information, assume application built with old compiler and
+  // check each image.
+  if (!image_info)
+return true;
+
+  if (!active_env->capabilities)
+return false;
+
+  // Each runtime requirement for the compiled image is stored in
+  // the image_info->offload_arch (TargetID) string.
+  // Each runtime capability obtained from "offload-arch -c" is stored in
+  // actvie_env->capabilities (TargetID) string.
+  // If every requirement has a matching capability, then the image
+  // is compatible with active environment
+
+  std::vector reqs = _splitstrings(image_info->offload_arch, ":");
+  std::vector caps = _splitstrings(active_env->capabilities, ":");
+
+  bool is_compatible = true;
+  for (auto req : reqs) {
+bool missing_capability = true;
+for (auto capability : caps)
+  if (capability == req)
+missing_capability = false;
+if (missing_capability) {
+  DP("Image requires %s but runtime capability %s is missing.\n",
+ image_info->offload_arch, req.c_str());
+  is_compatible = false;
+}
+  }
+  return is_compatible;
+}
+
+#define MAX_CAPS_STR_SIZE 1024
 void RTLsTy::RegisterLib(__tgt_bin_desc *desc) {
+
+  // Get the current active offload environment
+  __tgt_active_offload_env offload_env = {nullptr};
+  // Need a buffer to hold results of offload-arch -c command
+  size_t offload_arch_output_buffer_size = MAX_CAPS_STR_SIZE;
+  std::vector offload_arch_output_buffer;
+  offload_arch_output_buffer.resize(offload_arch_output_buffer_size);
+  __tgt_get_active_offload_env(_env, offload_arch_output_buffer.data(),
+   offload_arch_output_buffer_size);
+
+  RTLInfoTy *FoundRTL = NULL;
   PM->RTLsMtx.lock();
   // Register the images with the RTLs that understand them, if any.
   for (int32_t i = 0; i < desc->NumDeviceImages; ++i) {
 // Obtain the image.
 __tgt_device_image *img = >DeviceImages[i];
 
-RTLInfoTy *FoundRTL = nullptr;
-
+// Get corresponding image info offload_arch and check with runtime
+if (!_ImageIsCompatibleWithEnv(img->ImageInfo, _env))
+  continue;
+FoundRTL = NULL;

[PATCH] D125050: [OpenMP] Try to Infer target triples using the offloading architecture

2022-05-06 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam accepted this revision.
saiislam added a comment.
This revision is now accepted and ready to land.

Thanks!
LGTM.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D125050/new/

https://reviews.llvm.org/D125050

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D125092: [OpenMP] Add basic support for properly handling static libraries

2022-05-06 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam added a comment.

> Ideally we could just put this on the linker itself, but nvlink doesn't seem 
> to support .a files.

Yes, nvlink does not support archives. So we used a wrapper to extract cubin 
files from the archive and pass them to nvlink. Please see, Clang Nvlink 
Wrapper .


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D125092/new/

https://reviews.llvm.org/D125092

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D124525: [OpenMP][ClangLinkerWrapper] Extending linker wrapper to embed metadata for multi-arch fat binaries

2022-05-06 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam added a comment.

In D124525#3491170 , @jhuber6 wrote:

> I'm suggesting instead we define a new `__tgt_device_image` and a new 
> `__tgt_register_lib` function to support this. This new `__tgt_device_image` 
> will simply contain a pointer to an optional information struct.
>
>   struct __tgt_device_image_v2 {
> void* ImageStart;
> void* ImageEnd;
> __tgt_offload_entry*  EntriesBegin;
> __tgt_offload_entry*  EntriesEnd;
> __tgt_image_into*  ImageInfo;
>   };
>
> This new struct breaks the ABI with the old `__tgt_device_image` because 
> these are put into an array and we change the size, but we should be able to 
> provide backwards compatibility by copying from the old format to the new 
> format and creating a new array. We can detect the new vs. old ABI by 
> expecting that existing applications will call the `__tgt_register_image` 
> function. We will create a new `__tgt_register_image_v2` function for example 
> that all new programs will call. In `libomptarget` we then change 
> `__tgt_register_image` to do the necessary translation.
>
>   struct __tgt_bin_desc { 
>   
>
> int32_t NumDeviceImages;   // Number of device types supported
>   
>  
> __tgt_device_image *DeviceImages;  // Array of device images (1 per dev. 
> type)
> __tgt_offload_entry *HostEntriesBegin; // Begin of table with all host 
> entries   
> __tgt_offload_entry *HostEntriesEnd;   // End of table (non inclusive)
>   
>  
>   };
>   
>   EXTERN void __tgt_register_lib(__tgt_bin_desc *desc) {
> __tgt_device_image_v2 *new_image = alloc_new_version(desc);
> desc->DeviceImages = new_Image;
> __tgt_register_lib_v2(desc);
>   }
>   
>   EXTERN void __tgt_unregister_lib(__tgt_bin_desc *desc) {
> __tgt_unregister_lib_v2(desc);
> dealloc(desc->DeviceImages);
>   }
>
> Now the rest of `libomptarget` solely uses the new format, and we check if 
> information is available by seeing that the `ImageInfo` field is non-null.

Thanks for the input. I am going to try it.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124525/new/

https://reviews.llvm.org/D124525

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D125050: [OpenMP] Try to Infer target triples using the offloading architecture

2022-05-06 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam added a comment.

Looks good to me.

Will it work with `-fno-openmp`? Sometimes `-fno-openmp` is used by the 
end-user to override system provided `-fopenmp` flag for some translation units.

Please have a look at following examples:

  // RUN: %clang -### -target x86_64-linux-gnu \
  // RUN:   --offload-arch=gfx906 \
  // RUN:   %s 2>&1 | FileCheck -check-prefix=OFFLOAD %s
  // OFFLOAD: warning: argument unused during compilation: 
'--offload-arch=gfx906'
  
  // RUN: %clang -### -target x86_64-linux-gnu -fopenmp\
  // RUN:   --offload-arch=gfx906 \
  // RUN:   -fno-openmp \
  // RUN:   %s 2>&1 | FileCheck -check-prefix=OFFLOAD1 %s
  // OFFLOAD1: warning: argument unused during compilation: 
'--offload-arch=gfx906'
  
  // RUN: %clang -### -target x86_64-linux-gnu -fopenmp\
  // RUN:   -fopenmp-targets=amdgcn-amd-amdhsa,amdgcn-amd-amdhsa \
  // RUN:   -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 \
  // RUN:   -fno-openmp \
  // RUN:   %s 2>&1 | FileCheck -check-prefix=LEGACY %s
  // LEGACY: warning: '-fopenmp-targets' must be used in conjunction with a 
'-fopenmp' option compatible with offloading; e.g., '-fopenmp=libomp' or 
'-fopenmp=libiomp5'
  // LEGACY-NEXT: warning: argument unused during compilation: 
'-Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906'
  
  // RUN: %clang -### -target x86_64-linux-gnu -fopenmp\
  // RUN:   --offload-arch=gfx906 \
  // RUN:   --offload-arch=gfx908 \
  // RUN:   -fno-openmp \
  // RUN:   %s 2>&1 | FileCheck -check-prefix=MOFFLOAD %s
  // MOFFLOAD: warning: argument unused during compilation: 
'--offload-arch=gfx906'
  // MOFFLOAD-NEXT: warning: argument unused during compilation: 
'--offload-arch=gfx908'
  
  // RUN: %clang -### -target x86_64-linux-gnu -fopenmp\
  // RUN:   -fopenmp-targets=amdgcn-amd-amdhsa,amdgcn-amd-amdhsa \
  // RUN:   -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 \
  // RUN:   -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx908 \
  // RUN:   -fno-openmp \
  // RUN:   %s 2>&1 | FileCheck -check-prefix=MLEGACY %s
  // MLEGACY: warning: '-fopenmp-targets' must be used in conjunction with a 
'-fopenmp' option compatible with offloading; e.g., '-fopenmp=libomp' or 
'-fopenmp=libiomp5'
  // MLEGACY: warning: argument unused during compilation: 
'-Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906'
  // MLEGACY: warning: argument unused during compilation: 
'-Xopenmp-target=amdgcn-amd-amdhsa -march=gfx908'


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D125050/new/

https://reviews.llvm.org/D125050

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D124721: [OpenMP] Allow compiling multiple target architectures with OpenMP

2022-05-02 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam added inline comments.



Comment at: clang/test/Driver/amdgpu-openmp-toolchain-new.c:6
 // RUN:   | FileCheck %s
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp 
-fopenmp-targets=amdgcn-amd-amdhsa \
+// RUN:  --offload-arch=gfx906 
--libomptarget-amdgpu-bc-path=%S/Inputs/hip_dev_lib %s 2>&1 \

jhuber6 wrote:
> saiislam wrote:
> > saiislam wrote:
> > > jhuber6 wrote:
> > > > saiislam wrote:
> > > > > Wouldn't it be better if the user is not required to specify the 
> > > > > triple in this shorthand version? We can infer the triple from the 
> > > > > GPUArch. We have this support in our downstream branch.
> > > > > 
> > > > > ```
> > > > > clang  --target=x86_64-unknown-linux-gnu -fopenmp 
> > > > > --offload-arch=gfx906 helloworld.c -o helloworld
> > > > > ```
> > > > We could, HIP and CUDA both use some kind of 
> > > > `getAMDOffloadTargetTriple`. I guess in this case we would consider 
> > > > OpenMP offloading active if the user specified `-fopenmp` and 
> > > > `--offload-arch`? I could do this in a separate patch.
> > > Yes, exactly. OpenMP offloading should be active when `-fopenmp` and 
> > > `--offload-arch` both are present.
> > > 
> > > Thank you!
> > Following code might be useful for your patch (it assumes that OffloadArch 
> > is associated with each device tool chain so that multiple archs of same 
> > triple can be compiled together):
> > 
> > 
> >   # [[ 
> > https://github.com/RadeonOpenCompute/llvm-project/blob/359002f885ea860609f0c841d69f4970ccbb37af/clang/lib/Driver/Driver.cpp#L735
> >  | GetTargetInfoFromOffloadArch() ]]
> >   # [[ 
> > https://github.com/RadeonOpenCompute/llvm-project/blob/359002f885ea860609f0c841d69f4970ccbb37af/clang/lib/Driver/Driver.cpp#L779
> >  | Driver::GetTargetInfoFromMarch() ]]
> >   # [[ 
> > https://github.com/RadeonOpenCompute/llvm-project/blob/359002f885ea860609f0c841d69f4970ccbb37af/clang/lib/Driver/Driver.cpp#L819
> >  | Driver::GetTargetInfoFromOffloadArchOpts() ]]
> >   # [[ 
> > https://github.com/RadeonOpenCompute/llvm-project/blob/359002f885ea860609f0c841d69f4970ccbb37af/clang/lib/Driver/Driver.cpp#L851
> >  | modified definition of Driver::CreateOffloadingDeviceToolChains() ]]
> > 
> I'll look into it, I was thinking of a good way to specify architectures per 
> triple. So we could theoretically have `--offload-arch=sm_70` and 
> `--offload_arch=gfx908` work in unison and it might just be easy to group the 
> triples from the architecture.
Along with this, we would also like to support --offload-arch=gfx906 and 
--offload-arch=gfx908 in the same command.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124721/new/

https://reviews.llvm.org/D124721

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D124721: [OpenMP] Allow compiling multiple target architectures with OpenMP

2022-05-02 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam added inline comments.



Comment at: clang/test/Driver/amdgpu-openmp-toolchain-new.c:6
 // RUN:   | FileCheck %s
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp 
-fopenmp-targets=amdgcn-amd-amdhsa \
+// RUN:  --offload-arch=gfx906 
--libomptarget-amdgpu-bc-path=%S/Inputs/hip_dev_lib %s 2>&1 \

saiislam wrote:
> jhuber6 wrote:
> > saiislam wrote:
> > > Wouldn't it be better if the user is not required to specify the triple 
> > > in this shorthand version? We can infer the triple from the GPUArch. We 
> > > have this support in our downstream branch.
> > > 
> > > ```
> > > clang  --target=x86_64-unknown-linux-gnu -fopenmp --offload-arch=gfx906 
> > > helloworld.c -o helloworld
> > > ```
> > We could, HIP and CUDA both use some kind of `getAMDOffloadTargetTriple`. I 
> > guess in this case we would consider OpenMP offloading active if the user 
> > specified `-fopenmp` and `--offload-arch`? I could do this in a separate 
> > patch.
> Yes, exactly. OpenMP offloading should be active when `-fopenmp` and 
> `--offload-arch` both are present.
> 
> Thank you!
Following code might be useful for your patch (it assumes that OffloadArch is 
associated with each device tool chain so that multiple archs of same triple 
can be compiled together):


  # [[ 
https://github.com/RadeonOpenCompute/llvm-project/blob/359002f885ea860609f0c841d69f4970ccbb37af/clang/lib/Driver/Driver.cpp#L735
 | GetTargetInfoFromOffloadArch() ]]
  # [[ 
https://github.com/RadeonOpenCompute/llvm-project/blob/359002f885ea860609f0c841d69f4970ccbb37af/clang/lib/Driver/Driver.cpp#L779
 | Driver::GetTargetInfoFromMarch() ]]
  # [[ 
https://github.com/RadeonOpenCompute/llvm-project/blob/359002f885ea860609f0c841d69f4970ccbb37af/clang/lib/Driver/Driver.cpp#L819
 | Driver::GetTargetInfoFromOffloadArchOpts() ]]
  # [[ 
https://github.com/RadeonOpenCompute/llvm-project/blob/359002f885ea860609f0c841d69f4970ccbb37af/clang/lib/Driver/Driver.cpp#L851
 | modified definition of Driver::CreateOffloadingDeviceToolChains() ]]



Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124721/new/

https://reviews.llvm.org/D124721

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D124721: [OpenMP] Allow compiling multiple target architectures with OpenMP

2022-05-02 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam added inline comments.



Comment at: clang/test/Driver/amdgpu-openmp-toolchain-new.c:6
 // RUN:   | FileCheck %s
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp 
-fopenmp-targets=amdgcn-amd-amdhsa \
+// RUN:  --offload-arch=gfx906 
--libomptarget-amdgpu-bc-path=%S/Inputs/hip_dev_lib %s 2>&1 \

jhuber6 wrote:
> saiislam wrote:
> > Wouldn't it be better if the user is not required to specify the triple in 
> > this shorthand version? We can infer the triple from the GPUArch. We have 
> > this support in our downstream branch.
> > 
> > ```
> > clang  --target=x86_64-unknown-linux-gnu -fopenmp --offload-arch=gfx906 
> > helloworld.c -o helloworld
> > ```
> We could, HIP and CUDA both use some kind of `getAMDOffloadTargetTriple`. I 
> guess in this case we would consider OpenMP offloading active if the user 
> specified `-fopenmp` and `--offload-arch`? I could do this in a separate 
> patch.
Yes, exactly. OpenMP offloading should be active when `-fopenmp` and 
`--offload-arch` both are present.

Thank you!


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124721/new/

https://reviews.llvm.org/D124721

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D124721: [OpenMP] Allow compiling multiple target architectures with OpenMP

2022-05-01 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam added inline comments.



Comment at: clang/test/Driver/amdgpu-openmp-toolchain-new.c:6
 // RUN:   | FileCheck %s
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp 
-fopenmp-targets=amdgcn-amd-amdhsa \
+// RUN:  --offload-arch=gfx906 
--libomptarget-amdgpu-bc-path=%S/Inputs/hip_dev_lib %s 2>&1 \

Wouldn't it be better if the user is not required to specify the triple in this 
shorthand version? We can infer the triple from the GPUArch. We have this 
support in our downstream branch.

```
clang  --target=x86_64-unknown-linux-gnu -fopenmp --offload-arch=gfx906 
helloworld.c -o helloworld
```


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124721/new/

https://reviews.llvm.org/D124721

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D124525: [OpenMP][ClangLinkerWrapper] Extending linker wrapper to embed metadata for multi-arch fat binaries

2022-04-28 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam added a comment.

In D124525#3478469 , @yaxunl wrote:

> need a test for the generated registration code

Yes, I will add tests.




Comment at: clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp:1161
 
-LinkedImages.push_back(*ImageOrErr);
+LinkedImages.emplace_back(TheArch, *ImageOrErr);
   }

jhuber6 wrote:
> I'm doing something similar in D123810, I just used the existing `DeviceFile` 
> because I needed the `Arch` and `Kind` fields to dispatch the appropriate 
> wrapping job for CUDA / HIP.
Seems simpler. I will pull that change here.



Comment at: clang/tools/clang-linker-wrapper/OffloadWrapper.cpp:98-104
+  // struct __tgt_image_info {
+  //   int32_t version;
+  //   int32_t image_number;
+  //   int32_t number_images;
+  //   char* offload_arch;
+  //   char* target_compile_opts;
+  // };

yaxunl wrote:
> I am wondering whether we should add a few more fields to make it more 
> generic for all offloading languages and platforms:
> 
> 
> ```
> char* target_triple;
> char* offloading_kind; // openmp, hip, etc
> char* file_type; // elf, spirv, bitcode, etc
> ```
Good idea. Though I am not sure whether file_type info is being propagated in 
by the linker-wrapper or not. I will check.



Comment at: clang/tools/clang-linker-wrapper/OffloadWrapper.cpp:246
   IRBuilder<> Builder(BasicBlock::Create(C, "entry", Func));
+  // Create calls to __tgt_register_image_info for each image
+  auto *NullPtr = llvm::ConstantPointerNull::get(Builder.getInt8PtrTy());

jhuber6 wrote:
> I'm wondering if it would be better to create a new `__tgt_bin_desc` and call 
> a new `__tgt_register_lib` with it here so we don't need multiple calls here. 
> Inside that new runtime function we could just widen or shrink the existing 
> structs as needed. That way each device image would have this metadata 
> associated with it and the target plugin can handle it as-needed.
Last time multiple vendors objected to changing `__tgt_bin_desc` and 
`__tgt_device_image` structs. The reason was backward compatibility of multiple 
downstream runtimes.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124525/new/

https://reviews.llvm.org/D124525

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D124525: [OpenMP][ClangLinkerWrapper] Extending linker wrapper to embed metadata for multi-arch fat binaries

2022-04-27 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam created this revision.
saiislam added reviewers: jdoerfert, JonChesterfield, jhuber6, yaxunl.
Herald added a subscriber: guansong.
Herald added a project: All.
saiislam requested review of this revision.
Herald added subscribers: openmp-commits, cfe-commits, sstefan1.
Herald added projects: clang, OpenMP.

This patch adds "__tgt_image_info" field for each of the images
embedded in a multi-arch image. Required changes in libomptarget
are also shown.

The information in "__tgt_image_info" struct is provided in the 
clang-linker-wrapper
as a call to __tgt_register_image_info for each image in the library
of images also created by the clang-linker-wrapper.
__tgt_register_image_info is called for each image BEFORE the single
call to __tgt_register_lib so that image information is available
before they are loaded. clang-linker-wrapper gets this image information
from command line arguments provided by the clang driver when it creates
the call to the __clang-linker-wrapper command.
This architecture allows the binary image (pointed to by ImageStart and
ImageEnd in __tgt_device_image) to remain architecture indenendent.
That is, the architecture independent part of the libomptarget runtime
does not need to peer inside the image to determine if it is loadable
even though in most cases the image is an elf object.
There is one __tgt_image_info for each __tgt_device_image. For backward
compabibility, no changes are allowed to either __tgt_device_image or
__tgt_bin_desc. The absense of __tgt_image_info is the indication that
the runtime is being used on a binary created by an old version of
the compiler.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D124525

Files:
  clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
  clang/tools/clang-linker-wrapper/OffloadWrapper.cpp
  clang/tools/clang-linker-wrapper/OffloadWrapper.h
  openmp/libomptarget/include/omptarget.h
  openmp/libomptarget/src/exports
  openmp/libomptarget/src/interface.cpp
  openmp/libomptarget/src/rtl.cpp

Index: openmp/libomptarget/src/rtl.cpp
===
--- openmp/libomptarget/src/rtl.cpp
+++ openmp/libomptarget/src/rtl.cpp
@@ -13,6 +13,7 @@
 #include "rtl.h"
 #include "device.h"
 #include "private.h"
+#include "llvm/OffloadArch/OffloadArch.h"
 
 #include 
 #include 
@@ -20,6 +21,8 @@
 #include 
 #include 
 #include 
+// It's strange we do not have llvm tools for openmp runtime, so we use stat
+#include 
 
 // List of all plugins that can support offloading.
 static const char *RTLNames[] = {
@@ -351,18 +354,109 @@
 initRTLonce(R);
 }
 
+/// Query runtime capabilities of this system by calling offload-arch -c
+/// offload_arch_output_buffer is persistant storage returned by this
+/// __tgt_get_active_offload_env.
+static void
+__tgt_get_active_offload_env(__tgt_active_offload_env *active_env,
+ char *offload_arch_output_buffer,
+ size_t offload_arch_output_buffer_size) {
+
+  // If OFFLOAD_ARCH_OVERRIDE env varible is present then use its value instead of
+  // querying it using LLVMOffloadArch library.
+  if (char *OffloadArchEnvVar = getenv("OFFLOAD_ARCH_OVERRIDE")) {
+if (OffloadArchEnvVar) {
+  active_env->capabilities = OffloadArchEnvVar;
+  return;
+}
+  }
+  // Qget runtime capabilities of this system with libLLVMOffloadArch.a
+  if (int rc = getRuntimeCapabilities(offload_arch_output_buffer,
+  offload_arch_output_buffer_size))
+return;
+  active_env->capabilities = offload_arch_output_buffer;
+  return;
+}
+
+std::vector _splitstrings(char *input, const char *sep) {
+  std::vector split_strings;
+  std::string s(input);
+  std::string delimiter(sep);
+  size_t pos = 0;
+  while ((pos = s.find(delimiter)) != std::string::npos) {
+if (pos != 0)
+  split_strings.push_back(s.substr(0, pos));
+s.erase(0, pos + delimiter.length());
+  }
+  if (s.length() > 1)
+split_strings.push_back(s.substr(0, s.length()));
+  return split_strings;
+}
+
+static bool _ImageIsCompatibleWithEnv(__tgt_image_info *img_info,
+  __tgt_active_offload_env *active_env) {
+  // get_image_info will return null if no image information was registered.
+  // If no image information, assume application built with old compiler and
+  // check each image.
+  if (!img_info)
+return true;
+
+  if (!active_env->capabilities)
+return false;
+
+  // Each runtime requirement for the compiled image is stored in
+  // the img_info->offload_arch (TargetID) string.
+  // Each runtime capability obtained from "offload-arch -c" is stored in
+  // actvie_env->capabilities (TargetID) string.
+  // If every requirement has a matching capability, then the image
+  // is compatible with active environment
+
+  std::vector reqs = _splitstrings(img_info->offload_arch, ":");
+  std::vector caps = _splitstrings(active_env->capabilities,

[PATCH] D123387: [clang-offload-bundler] fix "no output file" issue with -outputs

2022-04-08 Thread Saiyedul Islam via Phabricator via cfe-commits

This revision was automatically updated to reflect the committed changes.
Closed by commit rG0f6cbdee5761: [clang-offload-bundler] fix no output 
file issue with -outputs (authored by scchan, committed by saiislam).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D123387/new/

https://reviews.llvm.org/D123387

Files:
  clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp


Index: clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp
===
--- clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp
+++ clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp
@@ -1450,7 +1450,7 @@
 return 0;
   }
 
-  if (OutputFileNames.getNumOccurrences() == 0) {
+  if (OutputFileNames.size() == 0) {
 reportError(
 createStringError(errc::invalid_argument, "no output file 
specified!"));
   }


Index: clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp
===
--- clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp
+++ clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp
@@ -1450,7 +1450,7 @@
 return 0;
   }
 
-  if (OutputFileNames.getNumOccurrences() == 0) {
+  if (OutputFileNames.size() == 0) {
 reportError(
 createStringError(errc::invalid_argument, "no output file specified!"));
   }
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D123387: [clang-offload-bundler] fix "no output file" issue with -outputs

2022-04-08 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam accepted this revision.
saiislam added a comment.

In D123387#3439069 , @yaxunl wrote:

> Does this mean without this fix clang-offload-bundler emits an error if 
> -outputs option not given? How could the lit tests passed?

I think lit tests were passing because all occurrences of -outputs were 
replaced with -output. It crashed internally in other components where this 
syntax wasn't changed.

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D123387/new/

https://reviews.llvm.org/D123387

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D122069: [Object] Add binary format for bundling offloading metadata

2022-03-30 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam added a comment.

Hey @jhuber6 , as discussed in multi-company meeting, I think that we will need 
at least an arch field somewhere in this. We would like to create multi-arch 
binaries so that runtime can load the compatible one on its own.
You may even consider using TargetID Format 
 to store the 
list of archs.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D122069/new/

https://reviews.llvm.org/D122069

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D120697: [clang-offload-bundler] HIP and OpenMP comaptibility for linking heterogeneous archive library

2022-03-01 Thread Saiyedul Islam via Phabricator via cfe-commits

This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG7a02abf06ff9: [clang-offload-bundler] HIP and OpenMP 
comaptibility for linking heterogeneous… (authored by saiislam).
Herald added a project: All.

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120697/new/

https://reviews.llvm.org/D120697

Files:
  clang/lib/Driver/ToolChains/CommonArgs.cpp
  clang/test/Driver/clang-offload-bundler-asserts-on.c
  clang/test/Driver/clang-offload-bundler.c
  clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp

Index: clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp
===
--- clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp
+++ clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp
@@ -117,6 +117,12 @@
 cl::desc("Alignment of bundle for binary files"),
 cl::init(1), cl::cat(ClangOffloadBundlerCategory));
 
+static cl::opt HipOpenmpCompatible(
+"hip-openmp-compatible",
+cl::desc("Treat hip and hipv4 offload kinds as "
+ "compatible with openmp kind, and vice versa.\n"),
+cl::init(false), cl::cat(ClangOffloadBundlerCategory));
+
 /// Magic string that marks the existence of offloading data.
 #define OFFLOAD_BUNDLER_MAGIC_STR "__CLANG_OFFLOAD_BUNDLE__"
 
@@ -166,6 +172,21 @@
OffloadKind == "hip" || OffloadKind == "hipv4";
   }
 
+  bool isOffloadKindCompatible(const StringRef TargetOffloadKind) const {
+if (OffloadKind == TargetOffloadKind)
+  return true;
+if (HipOpenmpCompatible) {
+  bool HIPCompatibleWithOpenMP =
+  OffloadKind.startswith_insensitive("hip") &&
+  TargetOffloadKind == "openmp";
+  bool OpenMPCompatibleWithHIP =
+  OffloadKind == "openmp" &&
+  TargetOffloadKind.startswith_insensitive("hip");
+  return HIPCompatibleWithOpenMP || OpenMPCompatibleWithHIP;
+}
+return false;
+  }
+
   bool isTripleValid() const {
 return !Triple.str().empty() && Triple.getArch() != Triple::UnknownArch;
   }
@@ -1097,7 +1118,7 @@
   }
 
   // Incompatible if Kinds or Triples mismatch.
-  if (CodeObjectInfo.OffloadKind != TargetInfo.OffloadKind ||
+  if (!CodeObjectInfo.isOffloadKindCompatible(TargetInfo.OffloadKind) ||
   !CodeObjectInfo.Triple.isCompatibleWith(TargetInfo.Triple)) {
 DEBUG_WITH_TYPE(
 "CodeObjectCompatibility",
Index: clang/test/Driver/clang-offload-bundler.c
===
--- clang/test/Driver/clang-offload-bundler.c
+++ clang/test/Driver/clang-offload-bundler.c
@@ -406,6 +406,7 @@
 // Create few code object bundles and archive them to create an input archive
 // RUN: clang-offload-bundler -type=o -targets=host-%itanium_abi_triple,openmp-amdgcn-amd-amdhsa-gfx906,openmp-amdgcn-amd-amdhsa--gfx908 -inputs=%t.o,%t.tgt1,%t.tgt2 -outputs=%t.simple.bundle
 // RUN: clang-offload-bundler -type=o -targets=host-%itanium_abi_triple,openmp-amdgcn-amd-amdhsa--gfx903 -inputs=%t.o,%t.tgt1 -outputs=%t.simple1.bundle
+// RUN: clang-offload-bundler -type=o -targets=host-%itanium_abi_triple,hip-amdgcn-amd-amdhsa--gfx906 -inputs=%t.o,%t.tgt1 -outputs=%t.simple1.bundle
 // RUN: llvm-ar cr %t.input-archive.a %t.simple.bundle %t.simple1.bundle
 
 // RUN: clang-offload-bundler -unbundle -type=a -targets=openmp-amdgcn-amd-amdhsa-gfx906,openmp-amdgcn-amd-amdhsa-gfx908 -inputs=%t.input-archive.a -outputs=%t-archive-gfx906-simple.a,%t-archive-gfx908-simple.a
@@ -423,6 +424,19 @@
 // RUN: cat %t-archive-gfx803-empty.a | FileCheck %s -check-prefix=EMPTYARCHIVE
 // EMPTYARCHIVE: !
 
+// Check compatibility of OpenMP code objects found in the heterogeneous archive library with HIP code objects of the target
+// RUN: clang-offload-bundler -unbundle -type=a -targets=hip-amdgcn-amd-amdhsa-gfx906,hipv4-amdgcn-amd-amdhsa-gfx908 -inputs=%t.input-archive.a -outputs=%t-hip-archive-gfx906-simple.a,%t-hipv4-archive-gfx908-simple.a -hip-openmp-compatible
+// RUN: llvm-ar t %t-hip-archive-gfx906-simple.a | FileCheck %s -check-prefix=HIPOPENMPCOMPAT
+// HIPOPENMPCOMPAT: simple-openmp-amdgcn-amd-amdhsa-gfx906
+// RUN: llvm-ar t %t-hipv4-archive-gfx908-simple.a | FileCheck %s -check-prefix=HIPv4OPENMPCOMPAT
+// HIPv4OPENMPCOMPAT: simple-openmp-amdgcn-amd-amdhsa--gfx908
+
+// Check compatibility of HIP code objects found in the heterogeneous archive library with OpenMP code objects of the target
+// RUN: clang-offload-bundler -unbundle -type=a -targets=openmp-amdgcn-amd-amdhsa--gfx906 \
+// RUN:   -outputs=%T/hip-openmp_906.a -inputs=%T/hip_archive.a -hip-openmp-compatible
+// RUN: llvm-ar t %T/hip-openmp_906.a | FileCheck -check-prefix=OPENMPHIPCOMPAT %s
+// OPENMPHIPCOMPAT: hip_bundle1-hip-amdgcn-amd-amdhsa--gfx906
+
 // Some code so that we can create a binary out of this file.
 int A = 0;
 void test_func(void) {

[PATCH] D120697: [clang-offload-bundler] HIP and OpenMP comaptibility for linking heterogeneous archive library

2022-02-28 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam created this revision.
saiislam added reviewers: yaxunl, jdoerfert, JonChesterfield.
Herald added a subscriber: guansong.
saiislam requested review of this revision.
Herald added subscribers: cfe-commits, sstefan1.
Herald added a project: clang.

`hip-openmp-compatible` flag treats hip and hipv4 offload kinds
as compatible with openmp offload kind while extracting code objects
from a heterogenous archive library. Vice versa is also considered
compatible if hip code was compiled with -fgpu-rdc.

This flag only relaxes compatibility criteria on `OffloadKind`,
rest of the components like `Triple` and `GPUArhc` still needs to
be compatible.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D120697

Files:
  clang/lib/Driver/ToolChains/CommonArgs.cpp
  clang/test/Driver/clang-offload-bundler-asserts-on.c
  clang/test/Driver/clang-offload-bundler.c
  clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp

Index: clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp
===
--- clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp
+++ clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp
@@ -117,6 +117,12 @@
 cl::desc("Alignment of bundle for binary files"),
 cl::init(1), cl::cat(ClangOffloadBundlerCategory));
 
+static cl::opt HipOpenmpCompatible(
+"hip-openmp-compatible",
+cl::desc("Treat hip and hipv4 offload kinds as "
+ "compatible with openmp kind, and vice versa.\n"),
+cl::init(false), cl::cat(ClangOffloadBundlerCategory));
+
 /// Magic string that marks the existence of offloading data.
 #define OFFLOAD_BUNDLER_MAGIC_STR "__CLANG_OFFLOAD_BUNDLE__"
 
@@ -166,6 +172,21 @@
OffloadKind == "hip" || OffloadKind == "hipv4";
   }
 
+  bool isOffloadKindCompatible(const StringRef TargetOffloadKind) const {
+if (OffloadKind == TargetOffloadKind)
+  return true;
+if (HipOpenmpCompatible) {
+  bool HIPCompatibleWithOpenMP =
+  OffloadKind.startswith_insensitive("hip") &&
+  TargetOffloadKind == "openmp";
+  bool OpenMPCompatibleWithHIP =
+  OffloadKind == "openmp" &&
+  TargetOffloadKind.startswith_insensitive("hip");
+  return HIPCompatibleWithOpenMP || OpenMPCompatibleWithHIP;
+}
+return false;
+  }
+
   bool isTripleValid() const {
 return !Triple.str().empty() && Triple.getArch() != Triple::UnknownArch;
   }
@@ -1097,7 +1118,7 @@
   }
 
   // Incompatible if Kinds or Triples mismatch.
-  if (CodeObjectInfo.OffloadKind != TargetInfo.OffloadKind ||
+  if (!CodeObjectInfo.isOffloadKindCompatible(TargetInfo.OffloadKind) ||
   !CodeObjectInfo.Triple.isCompatibleWith(TargetInfo.Triple)) {
 DEBUG_WITH_TYPE(
 "CodeObjectCompatibility",
Index: clang/test/Driver/clang-offload-bundler.c
===
--- clang/test/Driver/clang-offload-bundler.c
+++ clang/test/Driver/clang-offload-bundler.c
@@ -406,6 +406,7 @@
 // Create few code object bundles and archive them to create an input archive
 // RUN: clang-offload-bundler -type=o -targets=host-%itanium_abi_triple,openmp-amdgcn-amd-amdhsa-gfx906,openmp-amdgcn-amd-amdhsa--gfx908 -inputs=%t.o,%t.tgt1,%t.tgt2 -outputs=%t.simple.bundle
 // RUN: clang-offload-bundler -type=o -targets=host-%itanium_abi_triple,openmp-amdgcn-amd-amdhsa--gfx903 -inputs=%t.o,%t.tgt1 -outputs=%t.simple1.bundle
+// RUN: clang-offload-bundler -type=o -targets=host-%itanium_abi_triple,hip-amdgcn-amd-amdhsa--gfx906 -inputs=%t.o,%t.tgt1 -outputs=%t.simple1.bundle
 // RUN: llvm-ar cr %t.input-archive.a %t.simple.bundle %t.simple1.bundle
 
 // RUN: clang-offload-bundler -unbundle -type=a -targets=openmp-amdgcn-amd-amdhsa-gfx906,openmp-amdgcn-amd-amdhsa-gfx908 -inputs=%t.input-archive.a -outputs=%t-archive-gfx906-simple.a,%t-archive-gfx908-simple.a
@@ -423,6 +424,19 @@
 // RUN: cat %t-archive-gfx803-empty.a | FileCheck %s -check-prefix=EMPTYARCHIVE
 // EMPTYARCHIVE: !
 
+// Check compatibility of OpenMP code objects found in the heterogeneous archive library with HIP code objects of the target
+// RUN: clang-offload-bundler -unbundle -type=a -targets=hip-amdgcn-amd-amdhsa-gfx906,hipv4-amdgcn-amd-amdhsa-gfx908 -inputs=%t.input-archive.a -outputs=%t-hip-archive-gfx906-simple.a,%t-hipv4-archive-gfx908-simple.a -hip-openmp-compatible
+// RUN: llvm-ar t %t-hip-archive-gfx906-simple.a | FileCheck %s -check-prefix=HIPOPENMPCOMPAT
+// HIPOPENMPCOMPAT: simple-openmp-amdgcn-amd-amdhsa-gfx906
+// RUN: llvm-ar t %t-hipv4-archive-gfx908-simple.a | FileCheck %s -check-prefix=HIPv4OPENMPCOMPAT
+// HIPv4OPENMPCOMPAT: simple-openmp-amdgcn-amd-amdhsa--gfx908
+
+// Check compatibility of HIP code objects found in the heterogeneous archive library with OpenMP code objects of the target
+// RUN: clang-offload-bundler -unbundle -type=a -targets=openmp-amdgcn-amd-amdhsa--gfx906 \
+// RUN:   -outputs=%T/hip-openmp_906.a

[PATCH] D120271: [Clang] Add offload kind to embedded offload object

2022-02-22 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam added a comment.

Will it be possible to use Bundle Entry ID format [1] for naming sections and 
being sort of the the primary key to refer an offload object?
The arch string following the triple will be just offload-arch (or march, or 
mcpu) for others, but for amdgpu it may contain some more information.

[1] https://clang.llvm.org/docs/ClangOffloadBundler.html#bundle-entry-id


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120271/new/

https://reviews.llvm.org/D120271

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D119256: [OpenMP][Clang] Move partial support of reverse offload to a future version

2022-02-08 Thread Saiyedul Islam via Phabricator via cfe-commits

This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG4db88a54b6d4: [OpenMP][Clang] Move partial support of 
reverse offload to a future version (authored by saiislam).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D119256/new/

https://reviews.llvm.org/D119256

Files:
  clang/test/OpenMP/requires_ast_print.cpp
  clang/test/OpenMP/requires_messages.cpp
  clang/test/OpenMP/requires_target_messages.cpp
  clang/test/OpenMP/target_ast_print.cpp
  clang/test/OpenMP/target_device_codegen.cpp
  llvm/include/llvm/Frontend/OpenMP/OMP.td

Index: llvm/include/llvm/Frontend/OpenMP/OMP.td
===
--- llvm/include/llvm/Frontend/OpenMP/OMP.td
+++ llvm/include/llvm/Frontend/OpenMP/OMP.td
@@ -590,7 +590,15 @@
   let allowedClauses = [
 VersionedClause,
 VersionedClause,
-VersionedClause,
+// OpenMP 5.2 Spec: If an implementation is not supporting a requirement
+// (reverse offload in this case) then it should give compile-time error
+// termination.
+// Seeting supported version for reverse_offload to a distant future version
+// 9.9 so that its partial support can be tested in the meantime.
+//
+// TODO: Correct this supprted version number whenever complete
+// implementation of reverse_offload is available.
+VersionedClause,
 VersionedClause,
 VersionedClause
   ];
Index: clang/test/OpenMP/target_device_codegen.cpp
===
--- clang/test/OpenMP/target_device_codegen.cpp
+++ clang/test/OpenMP/target_device_codegen.cpp
@@ -6,12 +6,25 @@
 // RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
 // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=99 -DOMP99 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,REV
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=99 -DOMP99 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=99 -DOMP99 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,REV
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=99 -DOMP99 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefixes=SIMD-ONLY0
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=99 -DOMP99 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=99 -DOMP99 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=SIMD-ONLY0
+
 // SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
+
+#ifdef OMP99
 #pragma omp requires reverse_offload
+#endif
+
 void foo(int n) {
 
   // CHECK:   [[N:%.+]] = load i32, i32* [[N_ADDR:%.+]],
@@ -40,11 +53,14 @@
   // CHECK:   [[END]]
   #pragma omp target device(device_num: n)
   ;
-  // CHECK-NOT:   call i32 @__tgt_target_mapper(%struct.ident_t* @{{.+}},
-  // CHECK:   call void @__omp_offloading_{{.+}}_l46()
-  // CHECK-NOT:   call i32 @__tgt_target_mapper(%struct.ident_t* @{{.+}},
+
+#ifdef OMP99
+  // REV-NOT:   call i32 @__tgt_target_mapper(%struct.ident_t* @{{.+}},
+  // REV:   call void @__omp_offloading_{{.+}}_l61()
+  // REV-NOT:   call i32 @__tgt_target_mapper(%struct.ident_t* @{{.+}},
   #pragma omp target device(ancestor: n)
   ;
+#endif
 }
 
 #endif
Index: clang/test/OpenMP/target_ast_print.cpp
===
--- clang/test/OpenMP/target_ast_print.cpp
+++ clang/test/OpenMP/target_ast_print.cpp
@@ -342,7 +342,18 @@
 // RUN: %clang_cc1 -DOMP5 -verify -fopenmp-simd -fopenmp-version=50 -ast-print %s | FileCheck %s --check-prefix OMP5
 // RUN: %clang_cc1 -DOMP5 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -DOMP5 -fopenmp-simd -fopenmp-version=50 -std=c++11

[PATCH] D119256: [OpenMP][Clang] Move partial support of reverse offload to a future version

2022-02-08 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam created this revision.
saiislam added reviewers: ABataev, jdoerfert, JonChesterfield.
Herald added subscribers: guansong, yaxunl.
saiislam requested review of this revision.
Herald added subscribers: llvm-commits, cfe-commits, sstefan1.
Herald added projects: clang, LLVM.

OpenMP Spec 5.2 requires unimplemented requires clauses to produce
compile time error termination. Moving current partial support of
reverse_offload to a distant future version 9.9 so that existing
code can be tested and maintained until a complete implementation
is available.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D119256

Files:
  clang/test/OpenMP/requires_ast_print.cpp
  clang/test/OpenMP/requires_messages.cpp
  clang/test/OpenMP/requires_target_messages.cpp
  clang/test/OpenMP/target_ast_print.cpp
  clang/test/OpenMP/target_device_codegen.cpp
  llvm/include/llvm/Frontend/OpenMP/OMP.td

Index: llvm/include/llvm/Frontend/OpenMP/OMP.td
===
--- llvm/include/llvm/Frontend/OpenMP/OMP.td
+++ llvm/include/llvm/Frontend/OpenMP/OMP.td
@@ -590,7 +590,15 @@
   let allowedClauses = [
 VersionedClause,
 VersionedClause,
-VersionedClause,
+// OpenMP 5.2 Spec: If an implementation is not supporting a requirement
+// (reverse offload in this case) then it should give compile-time error
+// termination.
+// Seeting supported version for reverse_offload to a distant future version
+// 9.9 so that its partial support can be tested in the meantime.
+//
+// TODO: Correct this supprted version number whenever complete
+// implementation of reverse_offload is available.
+VersionedClause,
 VersionedClause,
 VersionedClause
   ];
Index: clang/test/OpenMP/target_device_codegen.cpp
===
--- clang/test/OpenMP/target_device_codegen.cpp
+++ clang/test/OpenMP/target_device_codegen.cpp
@@ -6,12 +6,25 @@
 // RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
 // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=99 -DOMP99 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,REV
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=99 -DOMP99 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=99 -DOMP99 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,REV
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=99 -DOMP99 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefixes=SIMD-ONLY0
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=99 -DOMP99 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=99 -DOMP99 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=SIMD-ONLY0
+
 // SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
+
+#ifdef OMP99
 #pragma omp requires reverse_offload
+#endif
+
 void foo(int n) {
 
   // CHECK:   [[N:%.+]] = load i32, i32* [[N_ADDR:%.+]],
@@ -40,11 +53,14 @@
   // CHECK:   [[END]]
   #pragma omp target device(device_num: n)
   ;
-  // CHECK-NOT:   call i32 @__tgt_target_mapper(%struct.ident_t* @{{.+}},
-  // CHECK:   call void @__omp_offloading_{{.+}}_l46()
-  // CHECK-NOT:   call i32 @__tgt_target_mapper(%struct.ident_t* @{{.+}},
+
+#ifdef OMP99
+  // REV-NOT:   call i32 @__tgt_target_mapper(%struct.ident_t* @{{.+}},
+  // REV:   call void @__omp_offloading_{{.+}}_l61()
+  // REV-NOT:   call i32 @__tgt_target_mapper(%struct.ident_t* @{{.+}},
   #pragma omp target device(ancestor: n)
   ;
+#endif
 }
 
 #endif
Index: clang/test/OpenMP/target_ast_print.cpp
===
--- clang/test/OpenMP/target_ast_print.cpp
+++ clang/test/OpenMP/target_ast_print.cpp
@@ -342,7 +342,18 @@
 // RUN: %clang_cc1 -DOMP5 -verify -fopenmp-simd

[PATCH] D118887: [OpenMP][Clang] Allow ancestor device modifier only with reverse offloading

2022-02-04 Thread Saiyedul Islam via Phabricator via cfe-commits

This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rGae9c0740648f: [OpenMP][Clang] Allow ancestor device modifier 
only with reverse offloading (authored by saiislam).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D118887/new/

https://reviews.llvm.org/D118887

Files:
  clang/include/clang/Basic/DiagnosticSemaKinds.td
  clang/lib/Sema/SemaOpenMP.cpp
  clang/test/OpenMP/target_ast_print.cpp
  clang/test/OpenMP/target_device_ancestor_messages.cpp
  clang/test/OpenMP/target_device_codegen.cpp


Index: clang/test/OpenMP/target_device_codegen.cpp
===
--- clang/test/OpenMP/target_device_codegen.cpp
+++ clang/test/OpenMP/target_device_codegen.cpp
@@ -11,7 +11,7 @@
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
-
+#pragma omp requires reverse_offload
 void foo(int n) {
 
   // CHECK:   [[N:%.+]] = load i32, i32* [[N_ADDR:%.+]],
Index: clang/test/OpenMP/target_device_ancestor_messages.cpp
===
--- /dev/null
+++ clang/test/OpenMP/target_device_ancestor_messages.cpp
@@ -0,0 +1,7 @@
+// RUN: %clang_cc1 -triple=x86_64 -verify -fopenmp -fopenmp-targets=x86_64 -x 
c++ -fexceptions -fcxx-exceptions %s
+// RUN: %clang_cc1 -triple=x86_64 -verify -fopenmp-simd 
-fopenmp-targets=x86_64 -x c++ -fexceptions -fcxx-exceptions %s
+
+void bar() {
+#pragma omp target device(ancestor : 1) // expected-error {{Device clause with 
ancestor device-modifier used without specifying 'requires reverse_offload'}}
+  ;
+}
Index: clang/test/OpenMP/target_ast_print.cpp
===
--- clang/test/OpenMP/target_ast_print.cpp
+++ clang/test/OpenMP/target_ast_print.cpp
@@ -342,7 +342,7 @@
 // RUN: %clang_cc1 -DOMP5 -verify -fopenmp-simd -fopenmp-version=50 -ast-print 
%s | FileCheck %s --check-prefix OMP5
 // RUN: %clang_cc1 -DOMP5 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 
-emit-pch -o %t %s
 // RUN: %clang_cc1 -DOMP5 -fopenmp-simd -fopenmp-version=50 -std=c++11 
-include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s 
--check-prefix OMP5
-
+#pragma omp requires reverse_offload
 typedef void **omp_allocator_handle_t;
 extern const omp_allocator_handle_t omp_null_allocator;
 extern const omp_allocator_handle_t omp_default_mem_alloc;
Index: clang/lib/Sema/SemaOpenMP.cpp
===
--- clang/lib/Sema/SemaOpenMP.cpp
+++ clang/lib/Sema/SemaOpenMP.cpp
@@ -18759,6 +18759,18 @@
   if (ErrorFound)
 return nullptr;
 
+  // OpenMP 5.0 [2.12.5, Restrictions]
+  // In case of ancestor device-modifier, a requires directive with
+  // the reverse_offload clause must be specified.
+  if (Modifier == OMPC_DEVICE_ancestor) {
+if (!DSAStack->hasRequiresDeclWithClause()) {
+  targetDiag(
+  StartLoc,
+  diag::err_omp_device_ancestor_without_requires_reverse_offload);
+  ErrorFound = true;
+}
+  }
+
   OpenMPDirectiveKind DKind = DSAStack->getCurrentDirective();
   OpenMPDirectiveKind CaptureRegion =
   getOpenMPCaptureRegionForClause(DKind, OMPC_device, LangOpts.OpenMP);
Index: clang/include/clang/Basic/DiagnosticSemaKinds.td
===
--- clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -10696,6 +10696,8 @@
   "'%0' region encountered before requires directive with '%1' clause">;
 def note_omp_requires_encountered_directive : Note <
   "'%0' previously encountered here">;
+def err_omp_device_ancestor_without_requires_reverse_offload : Error <
+  "Device clause with ancestor device-modifier used without specifying 
'requires reverse_offload'">;
 def err_omp_invalid_scope : Error <
   "'#pragma omp %0' directive must appear only in file scope">;
 def note_omp_invalid_length_on_this_ptr_mapping : Note <


Index: clang/test/OpenMP/target_device_codegen.cpp
===
--- clang/test/OpenMP/target_device_codegen.cpp
+++ clang/test/OpenMP/target_device_codegen.cpp
@@ -11,7 +11,7 @@
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
-
+#pragma omp requires reverse_offload
 void foo(int n) {
 
   // CHECK:   [[N:%.+]] = load i32, i32* [[N_ADDR:%.+]],
Index: clang/test/OpenMP/target_device_ancestor_messages.cpp
===
--- /dev/null
+++ clang/test/OpenMP/target_device_ancestor_messages.cpp
@@ -0,0 +1,7 @@
+// RUN: %clang_cc1 -triple=x86_64 -verify -fopenmp -fopenmp-targets=x86_64 -x c++ -fexceptions -fcxx-exceptions %s
+// RUN: %clang_cc1 -triple=x86_64 -verify -fopenmp-simd -fopenmp-targets=x86_64 -x c++ -fexceptions -fcxx-exceptions %s
+
+void bar() {
+#pragma omp target

[PATCH] D118887: [OpenMP][Clang] Allow ancestor device modifier only with reverse offloading

2022-02-03 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam updated this revision to Diff 405591.
saiislam marked an inline comment as done.
saiislam added a comment.

Restored device ancestor codegen unit test with requires reverese_offload.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D118887/new/

https://reviews.llvm.org/D118887

Files:
  clang/include/clang/Basic/DiagnosticSemaKinds.td
  clang/lib/Sema/SemaOpenMP.cpp
  clang/test/OpenMP/target_ast_print.cpp
  clang/test/OpenMP/target_device_ancestor_messages.cpp
  clang/test/OpenMP/target_device_codegen.cpp


Index: clang/test/OpenMP/target_device_codegen.cpp
===
--- clang/test/OpenMP/target_device_codegen.cpp
+++ clang/test/OpenMP/target_device_codegen.cpp
@@ -11,7 +11,7 @@
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
-
+#pragma omp requires reverse_offload
 void foo(int n) {
 
   // CHECK:   [[N:%.+]] = load i32, i32* [[N_ADDR:%.+]],
Index: clang/test/OpenMP/target_device_ancestor_messages.cpp
===
--- /dev/null
+++ clang/test/OpenMP/target_device_ancestor_messages.cpp
@@ -0,0 +1,7 @@
+// RUN: %clang_cc1 -triple=x86_64 -verify -fopenmp -fopenmp-targets=x86_64 -x 
c++ -fexceptions -fcxx-exceptions %s
+// RUN: %clang_cc1 -triple=x86_64 -verify -fopenmp-simd 
-fopenmp-targets=x86_64 -x c++ -fexceptions -fcxx-exceptions %s
+
+void bar() {
+#pragma omp target device(ancestor : 1) // expected-error {{Device clause with 
ancestor device-modifier used without specifying 'requires reverse_offload'}}
+  ;
+}
Index: clang/test/OpenMP/target_ast_print.cpp
===
--- clang/test/OpenMP/target_ast_print.cpp
+++ clang/test/OpenMP/target_ast_print.cpp
@@ -342,7 +342,7 @@
 // RUN: %clang_cc1 -DOMP5 -verify -fopenmp-simd -fopenmp-version=50 -ast-print 
%s | FileCheck %s --check-prefix OMP5
 // RUN: %clang_cc1 -DOMP5 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 
-emit-pch -o %t %s
 // RUN: %clang_cc1 -DOMP5 -fopenmp-simd -fopenmp-version=50 -std=c++11 
-include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s 
--check-prefix OMP5
-
+#pragma omp requires reverse_offload
 typedef void **omp_allocator_handle_t;
 extern const omp_allocator_handle_t omp_null_allocator;
 extern const omp_allocator_handle_t omp_default_mem_alloc;
Index: clang/lib/Sema/SemaOpenMP.cpp
===
--- clang/lib/Sema/SemaOpenMP.cpp
+++ clang/lib/Sema/SemaOpenMP.cpp
@@ -18759,6 +18759,18 @@
   if (ErrorFound)
 return nullptr;
 
+  // OpenMP 5.0 [2.12.5, Restrictions]
+  // In case of ancestor device-modifier, a requires directive with
+  // the reverse_offload clause must be specified.
+  if (Modifier == OMPC_DEVICE_ancestor) {
+if (!DSAStack->hasRequiresDeclWithClause()) {
+  targetDiag(
+  StartLoc,
+  diag::err_omp_device_ancestor_without_requires_reverse_offload);
+  ErrorFound = true;
+}
+  }
+
   OpenMPDirectiveKind DKind = DSAStack->getCurrentDirective();
   OpenMPDirectiveKind CaptureRegion =
   getOpenMPCaptureRegionForClause(DKind, OMPC_device, LangOpts.OpenMP);
Index: clang/include/clang/Basic/DiagnosticSemaKinds.td
===
--- clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -10699,6 +10699,8 @@
   "'%0' region encountered before requires directive with '%1' clause">;
 def note_omp_requires_encountered_directive : Note <
   "'%0' previously encountered here">;
+def err_omp_device_ancestor_without_requires_reverse_offload : Error <
+  "Device clause with ancestor device-modifier used without specifying 
'requires reverse_offload'">;
 def err_omp_invalid_scope : Error <
   "'#pragma omp %0' directive must appear only in file scope">;
 def note_omp_invalid_length_on_this_ptr_mapping : Note <


Index: clang/test/OpenMP/target_device_codegen.cpp
===
--- clang/test/OpenMP/target_device_codegen.cpp
+++ clang/test/OpenMP/target_device_codegen.cpp
@@ -11,7 +11,7 @@
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
-
+#pragma omp requires reverse_offload
 void foo(int n) {
 
   // CHECK:   [[N:%.+]] = load i32, i32* [[N_ADDR:%.+]],
Index: clang/test/OpenMP/target_device_ancestor_messages.cpp
===
--- /dev/null
+++ clang/test/OpenMP/target_device_ancestor_messages.cpp
@@ -0,0 +1,7 @@
+// RUN: %clang_cc1 -triple=x86_64 -verify -fopenmp -fopenmp-targets=x86_64 -x c++ -fexceptions -fcxx-exceptions %s
+// RUN: %clang_cc1 -triple=x86_64 -verify -fopenmp-simd -fopenmp-targets=x86_64 -x c++ -fexceptions -fcxx-exceptions %s
+
+void bar() {
+#pragma omp target device(ancestor : 1) // expected-error {{Device clause with ancestor

[PATCH] D118887: [OpenMP][Clang] Allow ancestor device modifier only with reverse offloading

2022-02-03 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam added inline comments.



Comment at: clang/test/OpenMP/target_device_codegen.cpp:43-47
-  // CHECK-NOT:   call i32 @__tgt_target_mapper(%struct.ident_t* @{{.+}},
-  // CHECK:   call void @__omp_offloading_{{.+}}_l46()
-  // CHECK-NOT:   call i32 @__tgt_target_mapper(%struct.ident_t* @{{.+}},
-  #pragma omp target device(ancestor: n)
-  ;

ABataev wrote:
> Do we have a codegen test for the ancestor modifier?
We shouldn't have a test for ancestor modifier because it can only be used when 
requires reverse_offload is specified and Spec 5.2 says that if an 
implementation is not supporting a requirement (reverse offload in this case) 
then it should give compile-time error termination [1].

I am going to propose this change in a different phab review.

[1] [[ 
https://www.openmp.org/wp-content/uploads/OpenMP-API-Specification-5-2.pdf | 
OpenMP API Specification 5.2 ]], Section 8.2.1, lines 12-13, page 212.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D118887/new/

https://reviews.llvm.org/D118887

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D118887: [OpenMP][Clang] Allow ancestor device modifier only with reverse offloading

2022-02-03 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam created this revision.
saiislam added reviewers: ABataev, jdoerfert, JonChesterfield.
Herald added subscribers: guansong, yaxunl.
saiislam requested review of this revision.
Herald added subscribers: cfe-commits, sstefan1.
Herald added a project: clang.

OpenMP Spec 5.0 [2.12.5, Restrictions]: If a device clause in which the
ancestor device-modifier appears is present on the target construct,
then a requires directive with the reverse_offload clause must be
specified.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D118887

Files:
  clang/include/clang/Basic/DiagnosticSemaKinds.td
  clang/lib/Sema/SemaOpenMP.cpp
  clang/test/OpenMP/target_ast_print.cpp
  clang/test/OpenMP/target_device_ancestor_messages.cpp
  clang/test/OpenMP/target_device_codegen.cpp


Index: clang/test/OpenMP/target_device_codegen.cpp
===
--- clang/test/OpenMP/target_device_codegen.cpp
+++ clang/test/OpenMP/target_device_codegen.cpp
@@ -40,11 +40,6 @@
   // CHECK:   [[END]]
   #pragma omp target device(device_num: n)
   ;
-  // CHECK-NOT:   call i32 @__tgt_target_mapper(%struct.ident_t* @{{.+}},
-  // CHECK:   call void @__omp_offloading_{{.+}}_l46()
-  // CHECK-NOT:   call i32 @__tgt_target_mapper(%struct.ident_t* @{{.+}},
-  #pragma omp target device(ancestor: n)
-  ;
 }
 
 #endif
Index: clang/test/OpenMP/target_device_ancestor_messages.cpp
===
--- /dev/null
+++ clang/test/OpenMP/target_device_ancestor_messages.cpp
@@ -0,0 +1,7 @@
+// RUN: %clang_cc1 -triple=x86_64 -verify -fopenmp -fopenmp-targets=x86_64 -x 
c++ -fexceptions -fcxx-exceptions %s
+// RUN: %clang_cc1 -triple=x86_64 -verify -fopenmp-simd 
-fopenmp-targets=x86_64 -x c++ -fexceptions -fcxx-exceptions %s
+
+void bar() {
+#pragma omp target device(ancestor : 1) // expected-error {{Device clause with 
ancestor device-modifier used without specifying 'requires reverse_offload'}}
+  ;
+}
Index: clang/test/OpenMP/target_ast_print.cpp
===
--- clang/test/OpenMP/target_ast_print.cpp
+++ clang/test/OpenMP/target_ast_print.cpp
@@ -342,7 +342,7 @@
 // RUN: %clang_cc1 -DOMP5 -verify -fopenmp-simd -fopenmp-version=50 -ast-print 
%s | FileCheck %s --check-prefix OMP5
 // RUN: %clang_cc1 -DOMP5 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 
-emit-pch -o %t %s
 // RUN: %clang_cc1 -DOMP5 -fopenmp-simd -fopenmp-version=50 -std=c++11 
-include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s 
--check-prefix OMP5
-
+#pragma omp requires reverse_offload
 typedef void **omp_allocator_handle_t;
 extern const omp_allocator_handle_t omp_null_allocator;
 extern const omp_allocator_handle_t omp_default_mem_alloc;
Index: clang/lib/Sema/SemaOpenMP.cpp
===
--- clang/lib/Sema/SemaOpenMP.cpp
+++ clang/lib/Sema/SemaOpenMP.cpp
@@ -18759,6 +18759,18 @@
   if (ErrorFound)
 return nullptr;
 
+  // OpenMP 5.0 [2.12.5, Restrictions]
+  // In case of ancestor device-modifier, a requires directive with
+  // the reverse_offload clause must be specified.
+  if (Modifier == OMPC_DEVICE_ancestor) {
+if (!DSAStack->hasRequiresDeclWithClause()) {
+  targetDiag(
+  StartLoc,
+  diag::err_omp_device_ancestor_without_requires_reverse_offload);
+  ErrorFound = true;
+}
+  }
+
   OpenMPDirectiveKind DKind = DSAStack->getCurrentDirective();
   OpenMPDirectiveKind CaptureRegion =
   getOpenMPCaptureRegionForClause(DKind, OMPC_device, LangOpts.OpenMP);
Index: clang/include/clang/Basic/DiagnosticSemaKinds.td
===
--- clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -10699,6 +10699,8 @@
   "'%0' region encountered before requires directive with '%1' clause">;
 def note_omp_requires_encountered_directive : Note <
   "'%0' previously encountered here">;
+def err_omp_device_ancestor_without_requires_reverse_offload : Error <
+  "Device clause with ancestor device-modifier used without specifying 
'requires reverse_offload'">;
 def err_omp_invalid_scope : Error <
   "'#pragma omp %0' directive must appear only in file scope">;
 def note_omp_invalid_length_on_this_ptr_mapping : Note <


Index: clang/test/OpenMP/target_device_codegen.cpp
===
--- clang/test/OpenMP/target_device_codegen.cpp
+++ clang/test/OpenMP/target_device_codegen.cpp
@@ -40,11 +40,6 @@
   // CHECK:   [[END]]
   #pragma omp target device(device_num: n)
   ;
-  // CHECK-NOT:   call i32 @__tgt_target_mapper(%struct.ident_t* @{{.+}},
-  // CHECK:   call void @__omp_offloading_{{.+}}_l46()
-  // CHECK-NOT:   call i32 @__tgt_target_mapper(%struct.ident_t* @{{.+}},
-  #pragma omp target device(ancestor: n)
-  ;
 }
 
 #endif
Index:

[PATCH] D116544: [Clang] Introduce Clang Linker Wrapper Tool

2022-01-31 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam added a comment.

Can you please add this tool's doc in clang/docs?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D116544/new/

https://reviews.llvm.org/D116544

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D117049: [OpenMP] Add support for embedding bitcode images in wrapper tool

2022-01-19 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam added a comment.

It seems that this patch along with D117156  
and D117246  is giving `patch application 
failed` error [https://buildkite.com/llvm-project/diff-checks/builds/82688].
`arc patch` is also giving the same error.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D117049/new/

https://reviews.llvm.org/D117049

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D117120: [Doc] Add documentation for the clang-offload-wrapper tool (NFC)

2022-01-19 Thread Saiyedul Islam via Phabricator via cfe-commits

This revision was automatically updated to reflect the committed changes.
Closed by commit rG0731f6ba4f57: [Doc] Add documentation for the 
clang-offload-wrapper tool (NFC) (authored by saiislam).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D117120/new/

https://reviews.llvm.org/D117120

Files:
  clang/docs/ClangOffloadWrapper.rst
  clang/docs/index.rst

Index: clang/docs/index.rst
===
--- clang/docs/index.rst
+++ clang/docs/index.rst
@@ -84,6 +84,7 @@
ClangFormattedStatus
ClangNvlinkWrapper
ClangOffloadBundler
+   ClangOffloadWrapper
 
 Design Documents
 
Index: clang/docs/ClangOffloadWrapper.rst
===
--- /dev/null
+++ clang/docs/ClangOffloadWrapper.rst
@@ -0,0 +1,220 @@
+=
+Clang Offload Wrapper
+=
+
+.. contents::
+   :local:
+
+.. _clang-offload-wrapper:
+
+Introduction
+
+
+This tool is used in OpenMP offloading toolchain to embed device code objects
+(usually ELF) into a wrapper host llvm IR (bitcode) file. The wrapper host IR
+is then assembled and linked with host code objects to generate the executable
+binary. See :ref:`image-binary-embedding-execution` for more details.
+
+Usage
+=
+
+This tool can be used as follows:
+
+.. code-block:: console
+
+  $ clang-offload-wrapper -help
+  OVERVIEW: A tool to create a wrapper bitcode for offload target binaries.
+  Takes offload target binaries as input and produces bitcode file containing
+  target binaries packaged as data and initialization code which registers
+  target binaries in offload runtime.
+  USAGE: clang-offload-wrapper [options] 
+  OPTIONS:
+  Generic Options:
+--help - Display available options (--help-hidden for more)
+--help-list- Display list of available options (--help-list-hidden for more)
+--version  - Display the version of this program
+  clang-offload-wrapper options:
+-o=  - Output filename
+--target=  - Target triple for the output module
+
+Example
+===
+
+.. code-block:: console
+
+  clang-offload-wrapper -target host-triple -o host-wrapper.bc gfx90a-binary.out
+
+.. _openmp-device-binary_embedding:
+
+OpenMP Device Binary Embedding
+==
+
+Various structures and functions used in the wrapper host IR form the interface
+between the executable binary and the OpenMP runtime.
+
+Enum Types
+--
+
+:ref:`table-offloading-declare-target-flags` lists different flag for
+offloading entries.
+
+  .. table:: Offloading Declare Target Flags Enum
+:name: table-offloading-declare-target-flags
+
++-+---+--+
+|  Name   | Value | Description  |
++=+===+==+
+| OMP_DECLARE_TARGET_LINK | 0x01  | Mark the entry as having a 'link' attribute (w.r.t. link clause) |
++-+---+--+
+| OMP_DECLARE_TARGET_CTOR | 0x02  | Mark the entry as being a global constructor |
++-+---+--+
+| OMP_DECLARE_TARGET_DTOR | 0x04  | Mark the entry as being a global destructor  |
++-+---+--+
+
+Structure Types
+---
+
+:ref:`table-tgt_offload_entry`, :ref:`table-tgt_device_image`, and
+:ref:`table-tgt_bin_desc` are the structures used in the wrapper host IR.
+
+  .. table:: __tgt_offload_entry structure
+:name: table-tgt_offload_entry
+
++-+++
+|   Type  | Identifier | Description|
++=+++
+|  void*  |addr| Address of global symbol within device image (function or global)  |
++-+++
+|  char*  |name| Name of the symbol |
++-+++
+|  size_t |size| Size of the entry info (0 if it is a function)

[PATCH] D117120: [Doc] Add documentation for the clang-offload-wrapper tool (NFC)

2022-01-18 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam added a comment.

Ping :-)


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D117120/new/

https://reviews.llvm.org/D117120

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D117120: [Doc] Add documentation for the clang-offload-wrapper tool (NFC)

2022-01-12 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam created this revision.
saiislam added reviewers: sdmitriev, jdoerfert, grokos, JonChesterfield, 
carlo.bertolli, vzakhari.
Herald added a subscriber: arphaman.
saiislam requested review of this revision.
Herald added subscribers: cfe-commits, sstefan1.
Herald added a project: clang.

Add the missing documentation for this tool.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D117120

Files:
  clang/docs/ClangOffloadWrapper.rst
  clang/docs/index.rst

Index: clang/docs/index.rst
===
--- clang/docs/index.rst
+++ clang/docs/index.rst
@@ -84,6 +84,7 @@
ClangFormattedStatus
ClangNvlinkWrapper
ClangOffloadBundler
+   ClangOffloadWrapper
 
 Design Documents
 
Index: clang/docs/ClangOffloadWrapper.rst
===
--- /dev/null
+++ clang/docs/ClangOffloadWrapper.rst
@@ -0,0 +1,220 @@
+=
+Clang Offload Wrapper
+=
+
+.. contents::
+   :local:
+
+.. _clang-offload-wrapper:
+
+Introduction
+
+
+This tool is used in OpenMP offloading toolchain to embed device code objects
+(usually ELF) into a wrapper host llvm IR (bitcode) file. The wrapper host IR
+is then assembled and linked with host code objects to generate the executable
+binary. See :ref:`image-binary-embedding-execution` for more details.
+
+Usage
+=
+
+This tool can be used as follows:
+
+.. code-block:: console
+
+  $ clang-offload-wrapper -help
+  OVERVIEW: A tool to create a wrapper bitcode for offload target binaries.
+  Takes offload target binaries as input and produces bitcode file containing
+  target binaries packaged as data and initialization code which registers
+  target binaries in offload runtime.
+  USAGE: clang-offload-wrapper [options] 
+  OPTIONS:
+  Generic Options:
+--help - Display available options (--help-hidden for more)
+--help-list- Display list of available options (--help-list-hidden for more)
+--version  - Display the version of this program
+  clang-offload-wrapper options:
+-o=  - Output filename
+--target=  - Target triple for the output module
+
+Example
+===
+
+.. code-block:: console
+
+  clang-offload-wrapper -target host-triple -o host-wrapper.bc gfx90a-binary.out
+
+.. _openmp-device-binary_embedding:
+
+OpenMP Device Binary Embedding
+==
+
+Various structures and functions used in the wrapper host IR form the interface
+between the executable binary and the OpenMP runtime.
+
+Enum Types
+--
+
+:ref:`table-offloading-declare-target-flags` lists different flag for
+offloading entries.
+
+  .. table:: Offloading Declare Target Flags Enum
+:name: table-offloading-declare-target-flags
+
++-+---+--+
+|  Name   | Value | Description  |
++=+===+==+
+| OMP_DECLARE_TARGET_LINK | 0x01  | Mark the entry as having a 'link' attribute (w.r.t. link clause) |
++-+---+--+
+| OMP_DECLARE_TARGET_CTOR | 0x02  | Mark the entry as being a global constructor |
++-+---+--+
+| OMP_DECLARE_TARGET_DTOR | 0x04  | Mark the entry as being a global destructor  |
++-+---+--+
+
+Structure Types
+---
+
+:ref:`table-tgt_offload_entry`, :ref:`table-tgt_device_image`, and
+:ref:`table-tgt_bin_desc` are the structures used in the wrapper host IR.
+
+  .. table:: __tgt_offload_entry structure
+:name: table-tgt_offload_entry
+
++-+++
+|   Type  | Identifier | Description|
++=+++
+|  void*  |addr| Address of global symbol within device image (function or global)  |
++-+++
+|  char*  |name| Name of the symbol |
++-+++
+|  size_t |size| Size of

[PATCH] D116549: [OpenMP][Clang] Allow passing target features in ISA trait for metadirective clause

2022-01-11 Thread Saiyedul Islam via Phabricator via cfe-commits

This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG876b5ea96bf5: [OpenMP][Clang] Allow passing target features 
in ISA trait for metadirective… (authored by saiislam).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D116549/new/

https://reviews.llvm.org/D116549

Files:
  clang/include/clang/Basic/DiagnosticParseKinds.td
  clang/include/clang/Basic/DiagnosticSemaKinds.td
  clang/lib/Parse/ParseOpenMP.cpp
  clang/test/OpenMP/metadirective_device_isa_codegen.cpp
  clang/test/OpenMP/metadirective_device_isa_codegen_amdgcn.cpp
  clang/test/OpenMP/metadirective_messages.cpp

Index: clang/test/OpenMP/metadirective_messages.cpp
===
--- clang/test/OpenMP/metadirective_messages.cpp
+++ clang/test/OpenMP/metadirective_messages.cpp
@@ -17,4 +17,6 @@
   ;
 #pragma omp metadirective when(device = {arch(nvptx)} : parallel default() // expected-error {{expected ',' or ')' in 'when' clause}} expected-error {{expected expression}}
   ;
+#pragma omp metadirective when(device = {isa("some-unsupported-feature")} : parallel) default(single) // expected-warning {{isa trait 'some-unsupported-feature' is not known to the current target; verify the spelling or consider restricting the context selector with the 'arch' selector further}}
+  ;
 }
Index: clang/test/OpenMP/metadirective_device_isa_codegen_amdgcn.cpp
===
--- /dev/null
+++ clang/test/OpenMP/metadirective_device_isa_codegen_amdgcn.cpp
@@ -0,0 +1,53 @@
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang_cc1 -fopenmp -x c++ -w -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -fopenmp -x c++ -w -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -target-cpu gfx906 -o - | FileCheck %s
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+int amdgcn_device_isa_selected() {
+  int threadCount = 0;
+
+#pragma omp target map(tofrom \
+   : threadCount)
+  {
+#pragma omp metadirective \
+when(device = {isa("flat-address-space")} \
+ : parallel) default(single)
+threadCount++;
+  }
+
+  return threadCount;
+}
+
+// CHECK: define weak amdgpu_kernel void @__omp_offloading_{{.*}}amdgcn_device_isa_selected
+// CHECK: user_code.entry:
+// CHECK: call void @__kmpc_parallel_51
+// CHECK-NOT: call i32 @__kmpc_single
+// CHECK: ret void
+
+int amdgcn_device_isa_not_selected() {
+  int threadCount = 0;
+
+#pragma omp target map(tofrom \
+   : threadCount)
+  {
+#pragma omp metadirective  \
+when(device = {isa("sse")} \
+ : parallel)   \
+when(device = {isa("another-unsupported-gpu-feature")} \
+ : parallel) default(single)
+threadCount++;
+  }
+
+  return threadCount;
+}
+// CHECK: define weak amdgpu_kernel void @__omp_offloading_{{.*}}amdgcn_device_isa_not_selected
+// CHECK: user_code.entry:
+// CHECK: call i32 @__kmpc_single
+// CHECK-NOT: call void @__kmpc_parallel_51
+// CHECK: ret void
+
+#endif
Index: clang/test/OpenMP/metadirective_device_isa_codegen.cpp
===
--- /dev/null
+++ clang/test/OpenMP/metadirective_device_isa_codegen.cpp
@@ -0,0 +1,32 @@
+// RUN: %clang_cc1 -verify -w -fopenmp -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - -fsanitize-address-use-after-scope | FileCheck %s
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+void bar();
+
+void x86_64_device_isa_selected() {
+#pragma omp metadirective when(device = {isa("sse2")} \
+   : parallel) default(single)
+  bar();
+}
+// CHECK-LABEL: void @_Z26x86_64_device_isa_selectedv()
+// CHECK: ...) @__kmpc_fork_call{{.*}}@.omp_outlined.
+// CHECK: ret void
+
+// CHECK: define internal void @.omp_outlined.(
+// CHECK: @_Z3barv
+// CHECK: ret void
+
+void x86_64_device_isa_not_selected() {
+#pragma omp metadirective when(device = {isa("some-unsupported-feature")} \
+   : parallel) default(single)
+  bar();
+}
+// CHECK-LABEL: void @_Z30x86_64_device_isa_not_selectedv()
+// CHECK: call i32 @__kmpc_single
+// CHECK:  @_Z3barv
+// CHECK: call void @__kmpc_end_single
+// CHECK: ret void
+#endif
Index: clang/lib/Parse/ParseOpenMP.cpp
===
--- clang/lib/Parse/ParseOpenMP.cpp
+++ clang/lib/Parse/ParseOpenMP.cpp
@@ -2214,7 +2214,7 @@
   StringRef ISATrait) {
   // TODO Track

[PATCH] D116549: [OpenMP][Clang] Allow passing target features in ISA trait for metadirective clause

2022-01-11 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam updated this revision to Diff 398897.
saiislam added a comment.

1. Used a common diagnostic warning `warn_unknown_declare_variant_isa_trait` 
for ParseOpenMP and SemaOpenMP for decalre variant and metadirectives.
2. Split lit codegen tests into two files, one requiring amdgpu-registered 
target and another for host only.
3. Added warning message lit test at an appropriate place.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D116549/new/

https://reviews.llvm.org/D116549

Files:
  clang/include/clang/Basic/DiagnosticParseKinds.td
  clang/include/clang/Basic/DiagnosticSemaKinds.td
  clang/lib/Parse/ParseOpenMP.cpp
  clang/test/OpenMP/metadirective_device_isa_codegen.cpp
  clang/test/OpenMP/metadirective_device_isa_codegen_amdgcn.cpp
  clang/test/OpenMP/metadirective_messages.cpp

Index: clang/test/OpenMP/metadirective_messages.cpp
===
--- clang/test/OpenMP/metadirective_messages.cpp
+++ clang/test/OpenMP/metadirective_messages.cpp
@@ -17,4 +17,6 @@
   ;
 #pragma omp metadirective when(device = {arch(nvptx)} : parallel default() // expected-error {{expected ',' or ')' in 'when' clause}} expected-error {{expected expression}}
   ;
+#pragma omp metadirective when(device = {isa("some-unsupported-feature")} : parallel) default(single) // expected-warning {{isa trait 'some-unsupported-feature' is not known to the current target; verify the spelling or consider restricting the context selector with the 'arch' selector further}}
+  ;
 }
Index: clang/test/OpenMP/metadirective_device_isa_codegen_amdgcn.cpp
===
--- /dev/null
+++ clang/test/OpenMP/metadirective_device_isa_codegen_amdgcn.cpp
@@ -0,0 +1,53 @@
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang_cc1 -fopenmp -x c++ -w -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -fopenmp -x c++ -w -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -target-cpu gfx906 -o - | FileCheck %s
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+int amdgcn_device_isa_selected() {
+  int threadCount = 0;
+
+#pragma omp target map(tofrom \
+   : threadCount)
+  {
+#pragma omp metadirective \
+when(device = {isa("flat-address-space")} \
+ : parallel) default(single)
+threadCount++;
+  }
+
+  return threadCount;
+}
+
+// CHECK: define weak amdgpu_kernel void @__omp_offloading_{{.*}}amdgcn_device_isa_selected
+// CHECK: user_code.entry:
+// CHECK: call void @__kmpc_parallel_51
+// CHECK-NOT: call i32 @__kmpc_single
+// CHECK: ret void
+
+int amdgcn_device_isa_not_selected() {
+  int threadCount = 0;
+
+#pragma omp target map(tofrom \
+   : threadCount)
+  {
+#pragma omp metadirective  \
+when(device = {isa("sse")} \
+ : parallel)   \
+when(device = {isa("another-unsupported-gpu-feature")} \
+ : parallel) default(single)
+threadCount++;
+  }
+
+  return threadCount;
+}
+// CHECK: define weak amdgpu_kernel void @__omp_offloading_{{.*}}amdgcn_device_isa_not_selected
+// CHECK: user_code.entry:
+// CHECK: call i32 @__kmpc_single
+// CHECK-NOT: call void @__kmpc_parallel_51
+// CHECK: ret void
+
+#endif
Index: clang/test/OpenMP/metadirective_device_isa_codegen.cpp
===
--- /dev/null
+++ clang/test/OpenMP/metadirective_device_isa_codegen.cpp
@@ -0,0 +1,32 @@
+// RUN: %clang_cc1 -verify -w -fopenmp -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - -fsanitize-address-use-after-scope | FileCheck %s
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+void bar();
+
+void x86_64_device_isa_selected() {
+#pragma omp metadirective when(device = {isa("sse2")} \
+   : parallel) default(single)
+  bar();
+}
+// CHECK-LABEL: void @_Z26x86_64_device_isa_selectedv()
+// CHECK: ...) @__kmpc_fork_call{{.*}}@.omp_outlined.
+// CHECK: ret void
+
+// CHECK: define internal void @.omp_outlined.(
+// CHECK: @_Z3barv
+// CHECK: ret void
+
+void x86_64_device_isa_not_selected() {
+#pragma omp metadirective when(device = {isa("some-unsupported-feature")} \
+   : parallel) default(single)
+  bar();
+}
+// CHECK-LABEL: void @_Z30x86_64_device_isa_not_selectedv()
+// CHECK: call i32 @__kmpc_single
+// CHECK:  @_Z3barv
+// CHECK: call void @__kmpc_end_single
+// CHECK: ret void
+#endif
Index: clang/lib/Parse/ParseOpenMP.cpp
===
--- clang/lib/Parse/ParseOpenMP.cpp
+++ clang/lib/Parse/ParseOpenMP.cpp

[PATCH] D116549: [OpenMP][Clang] Allow passing target features in ISA trait for metadirective clause

2022-01-10 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam added inline comments.



Comment at: clang/lib/Parse/ParseOpenMP.cpp:2533
+std::function DiagUnknownTrait = [this, Loc](
+StringRef ISATrait) {};
+TargetOMPContext OMPCtx(ASTContext, std::move(DiagUnknownTrait),

jdoerfert wrote:
> jdoerfert wrote:
> > saiislam wrote:
> > > jdoerfert wrote:
> > > > Why doesn't this diagnose nothing?
> > > Because an isa-feature will fail at least once, for either host 
> > > compilation or device compilation. So, no point in always giving a 
> > > warning.
> > That is debatable. 
> > 
> > First, if I compile for a single architecture there is no device 
> > compilation and it should warn.
> > Second, if I place the metadirective into a declare variant function or add 
> > a `kind(...)` selector to it it will also not warn even if you have 
> > multiple architectures.
> > 
> > 
> ```
> ASTContext  = getASTContext();
> std::function DiagUnknownTrait = [this,
> ┊ ┊ ┊ ┊ ┊ ┊ ┊ ┊ ┊ ┊CE](StringRef 
> ISATrait) {
> ┊ // TODO Track the selector locations in a way that is accessible here to
> ┊ // improve the diagnostic location.
> ┊ Diag(CE->getBeginLoc(), diag::warn_unknown_declare_variant_isa_trait)
> ┊ ┊ ┊ << ISATrait;   
> };
> TargetOMPContext OMPCtx(Context, std::move(DiagUnknownTrait), 
>   
>   
>  
> ┊ ┊ ┊ ┊ ┊ ┊ ┊ ┊ ┊ ┊ getCurFunctionDecl(), 
> DSAStack->getConstructTraits());
> ```
> Already exists (SemaOpenMP). Why do we need a second, different diagnostic?
Isn't giving a remark better than a warning, when we know in many cases this 
will be hit during a normal (expected) compilation for target offload?
Remark diagnostic will give ample handle for understanding the flow without the 
need to explicitly deal with this warning during compilation of user programs.

I am fine changing it to a warning if you feel strongly about this.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D116549/new/

https://reviews.llvm.org/D116549

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D116549: [OpenMP][Clang] Allow passing target features in ISA trait for metadirective clause

2022-01-10 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam updated this revision to Diff 398614.
saiislam added a comment.

Fixed the lit test failing in pre-check build bot.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D116549/new/

https://reviews.llvm.org/D116549

Files:
  clang/include/clang/Basic/DiagnosticParseKinds.td
  clang/lib/Parse/ParseOpenMP.cpp
  clang/test/OpenMP/metadirective_device_isa_codegen.cpp
  clang/test/OpenMP/metadirective_device_isa_messages.c

Index: clang/test/OpenMP/metadirective_device_isa_messages.c
===
--- /dev/null
+++ clang/test/OpenMP/metadirective_device_isa_messages.c
@@ -0,0 +1,14 @@
+// REQUIRES: x86-registered-target
+// RUN: %clang_cc1 -verify -fopenmp -x c -triple x86_64-unknown-linux -emit-llvm-only -target-cpu znver1 %s -Rremark-backend-plugin
+
+#ifndef HEADER
+#define HEADER
+
+void bar();
+
+void foo() {
+#pragma omp metadirective when(device = {isa("some-unsupported-feature")} : parallel) default(single) // expected-remark {{isa trait 'some-unsupported-feature' is not a valid feature of the target 'x86_64'}}
+  bar();
+}
+
+#endif
Index: clang/test/OpenMP/metadirective_device_isa_codegen.cpp
===
--- /dev/null
+++ clang/test/OpenMP/metadirective_device_isa_codegen.cpp
@@ -0,0 +1,81 @@
+// REQUIRES: amdgpu-registered-target
+// REQUIRES: x86-registered-target
+
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -target-cpu gfx906 -o - | FileCheck %s -check-prefix=AMDGPUISA
+
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - -fsanitize-address-use-after-scope -target-cpu x86-64| FileCheck %s -check-prefixes=X86_64ISA
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+int amdgcn_device_isa_selected() {
+  int threadCount = 0;
+
+#pragma omp target map(tofrom \
+   : threadCount)
+  {
+#pragma omp metadirective \
+when(device = {isa("flat-address-space")} \
+ : parallel) default(single)
+threadCount++;
+  }
+
+  return threadCount;
+}
+
+// AMDGPUISA: define weak amdgpu_kernel void @__omp_offloading_{{.*}}amdgcn_device_isa_selected
+// AMDGPUISA: user_code.entry:
+// AMDGPUISA: call void @__kmpc_parallel_51
+// AMDGPUISA-NOT: call i32 @__kmpc_single
+// AMDGPUISA: ret void
+
+int amdgcn_device_isa_not_selected() {
+  int threadCount = 0;
+
+#pragma omp target map(tofrom \
+   : threadCount)
+  {
+#pragma omp metadirective  \
+when(device = {isa("sse")} \
+ : parallel)   \
+when(device = {isa("another-unsupported-gpu-feature")} \
+ : parallel) default(single)
+threadCount++;
+  }
+
+  return threadCount;
+}
+// AMDGPUISA: define weak amdgpu_kernel void @__omp_offloading_{{.*}}amdgcn_device_isa_not_selected
+// AMDGPUISA: user_code.entry:
+// AMDGPUISA: call i32 @__kmpc_single
+// AMDGPUISA-NOT: call void @__kmpc_parallel_51
+// AMDGPUISA: ret void
+
+void bar();
+
+void x86_64_device_isa_selected() {
+#pragma omp metadirective when(device = {isa("sse2")} \
+   : parallel) default(single)
+  bar();
+}
+// X86_64ISA-LABEL: void @_Z26x86_64_device_isa_selectedv()
+// X86_64ISA: ...) @__kmpc_fork_call{{.*}}@.omp_outlined..1
+// X86_64ISA: ret void
+
+// X86_64ISA: define internal void @.omp_outlined..1(
+// X86_64ISA: @_Z3barv
+// X86_64ISA: ret void
+
+void x86_64_device_isa_not_selected() {
+#pragma omp metadirective when(device = {isa("some-unsupported-feature")} \
+   : parallel) default(single)
+  bar();
+}
+// X86_64ISA-LABEL: void @_Z30x86_64_device_isa_not_selectedv()
+// X86_64ISA: call i32 @__kmpc_single
+// X86_64ISA:  @_Z3barv
+// X86_64ISA: call void @__kmpc_end_single
+// X86_64ISA: ret void
+#endif
Index: clang/lib/Parse/ParseOpenMP.cpp
===
--- clang/lib/Parse/ParseOpenMP.cpp
+++ clang/lib/Parse/ParseOpenMP.cpp
@@ -2529,7 +2529,12 @@
 TPA.Revert();
 // End of the first iteration. Parser is reset to the start of metadirective
 
-TargetOMPContext OMPCtx(ASTContext, /* DiagUnknownTrait */ nullptr,
+std::function DiagUnknownTrait =
+[this, Loc](StringRef ISATrait) {
+  Diag(Loc, diag::remark_unknown_declare_variant_isa_trait)
+  << ISATrait << this->getTargetInfo().getTriple().getArchName();
+};
+TargetOMPContext OMPCtx(ASTContext, std::move(DiagUnknownTrait),

[PATCH] D116549: [OpenMP][Clang] Allow passing target features in ISA trait for metadirective clause

2022-01-07 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam updated this revision to Diff 398179.
saiislam marked an inline comment as done.
saiislam added a comment.

Added diagnostic remarks for when ISA trait is not selected.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D116549/new/

https://reviews.llvm.org/D116549

Files:
  clang/include/clang/Basic/DiagnosticParseKinds.td
  clang/lib/Parse/ParseOpenMP.cpp
  clang/test/OpenMP/metadirective_device_isa_codegen.cpp
  clang/test/OpenMP/metadirective_device_isa_messages.c

Index: clang/test/OpenMP/metadirective_device_isa_messages.c
===
--- /dev/null
+++ clang/test/OpenMP/metadirective_device_isa_messages.c
@@ -0,0 +1,15 @@
+// REQUIRES: x86-registered-target
+// RUN: %clang_cc1 -verify -fopenmp -x c -triple x86_64-unknown-linux -emit-llvm-only -target-cpu znver1 %s -Rremark-backend-plugin
+
+#ifndef HEADER
+#define HEADER
+
+void bar();
+
+void foo() {
+#pragma omp metadirective when(device = {isa("some-unsupported-feature")} \
+   : parallel) default(single) // expected-remark {{isa trait 'some-unsupported-feature' is not a valid feature of the target 'x86_64'}}
+  bar();
+}
+
+#endif
Index: clang/test/OpenMP/metadirective_device_isa_codegen.cpp
===
--- /dev/null
+++ clang/test/OpenMP/metadirective_device_isa_codegen.cpp
@@ -0,0 +1,81 @@
+// REQUIRES: amdgpu-registered-target
+// REQUIRES: x86-registered-target
+
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -target-cpu gfx906 -o - | FileCheck %s -check-prefix=AMDGPUISA
+
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - -fsanitize-address-use-after-scope -target-cpu x86-64| FileCheck %s -check-prefixes=X86_64ISA
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+int amdgcn_device_isa_selected() {
+  int threadCount = 0;
+
+#pragma omp target map(tofrom \
+   : threadCount)
+  {
+#pragma omp metadirective \
+when(device = {isa("flat-address-space")} \
+ : parallel) default(single)
+threadCount++;
+  }
+
+  return threadCount;
+}
+
+// AMDGPUISA: define weak amdgpu_kernel void @__omp_offloading_{{.*}}amdgcn_device_isa_selected
+// AMDGPUISA: user_code.entry:
+// AMDGPUISA: call void @__kmpc_parallel_51
+// AMDGPUISA-NOT: call i32 @__kmpc_single
+// AMDGPUISA: ret void
+
+int amdgcn_device_isa_not_selected() {
+  int threadCount = 0;
+
+#pragma omp target map(tofrom \
+   : threadCount)
+  {
+#pragma omp metadirective  \
+when(device = {isa("sse")} \
+ : parallel)   \
+when(device = {isa("another-unsupported-gpu-feature")} \
+ : parallel) default(single)
+threadCount++;
+  }
+
+  return threadCount;
+}
+// AMDGPUISA: define weak amdgpu_kernel void @__omp_offloading_{{.*}}amdgcn_device_isa_not_selected
+// AMDGPUISA: user_code.entry:
+// AMDGPUISA: call i32 @__kmpc_single
+// AMDGPUISA-NOT: call void @__kmpc_parallel_51
+// AMDGPUISA: ret void
+
+void bar();
+
+void x86_64_device_isa_selected() {
+#pragma omp metadirective when(device = {isa("sse2")} \
+   : parallel) default(single)
+  bar();
+}
+// X86_64ISA-LABEL: void @_Z26x86_64_device_isa_selectedv()
+// X86_64ISA: ...) @__kmpc_fork_call{{.*}}@.omp_outlined..1
+// X86_64ISA: ret void
+
+// X86_64ISA: define internal void @.omp_outlined..1(
+// X86_64ISA: @_Z3barv
+// X86_64ISA: ret void
+
+void x86_64_device_isa_not_selected() {
+#pragma omp metadirective when(device = {isa("some-unsupported-feature")} \
+   : parallel) default(single)
+  bar();
+}
+// X86_64ISA-LABEL: void @_Z30x86_64_device_isa_not_selectedv()
+// X86_64ISA: call i32 @__kmpc_single
+// X86_64ISA:  @_Z3barv
+// X86_64ISA: call void @__kmpc_end_single
+// X86_64ISA: ret void
+#endif
Index: clang/lib/Parse/ParseOpenMP.cpp
===
--- clang/lib/Parse/ParseOpenMP.cpp
+++ clang/lib/Parse/ParseOpenMP.cpp
@@ -2529,7 +2529,12 @@
 TPA.Revert();
 // End of the first iteration. Parser is reset to the start of metadirective
 
-TargetOMPContext OMPCtx(ASTContext, /* DiagUnknownTrait */ nullptr,
+std::function DiagUnknownTrait =
+[this, Loc](StringRef ISATrait) {
+  Diag(Loc, diag::remark_unknown_declare_variant_isa_trait)
+  << ISATrait << this->getTargetInfo().getTriple().getArchName();
+};
+

[PATCH] D116549: [OpenMP][Clang] Allow passing target features in ISA trait for metadirective clause

2022-01-06 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam updated this revision to Diff 397912.
saiislam added a comment.
Herald added a subscriber: jvesely.

Added target specific tests for ISA traits, for CPU as well as GPU.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D116549/new/

https://reviews.llvm.org/D116549

Files:
  clang/lib/Parse/ParseOpenMP.cpp
  clang/test/OpenMP/amdgcn_target_codegen.cpp
  clang/test/OpenMP/metadirective_implementation_codegen.c
  clang/test/OpenMP/metadirective_implementation_codegen.cpp

Index: clang/test/OpenMP/metadirective_implementation_codegen.cpp
===
--- clang/test/OpenMP/metadirective_implementation_codegen.cpp
+++ clang/test/OpenMP/metadirective_implementation_codegen.cpp
@@ -1,6 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - -fsanitize-address-use-after-scope | FileCheck %s
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple aarch64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - -fsanitize-address-use-after-scope | FileCheck %s
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple ppc64le-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - -fsanitize-address-use-after-scope | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - -fsanitize-address-use-after-scope -target-cpu x86-64| FileCheck %s -check-prefixes=SUPPORTEDISA,UNSUPPORTEDISA
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -35,6 +36,12 @@
: parallel) default(parallel for)
   for (int i = 0; i < 100; i++)
 ;
+#pragma omp metadirective when(device = {isa("sse2")} \
+   : parallel) default(single)
+  bar();
+#pragma omp metadirective when(device = {isa("some-unsupported-feature")} \
+   : parallel) default(single)
+  bar();
 }
 
 // CHECK-LABEL: void @_Z3foov()
@@ -44,6 +51,10 @@
 // CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_5:@.+]] to void
 // CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_6:@.+]] to void
 // CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_7:@.+]] to void
+// SUPPORTEDISA: ...) @__kmpc_fork_call(
+// UNSUPPORTEDISA: call i32 @__kmpc_single
+// UNSUPPORTEDISA:  @_Z3barv
+// UNSUPPORTEDISA: call void @__kmpc_end_single
 // CHECK: ret void
 
 // CHECK: define internal void [[OUTLINED_2]](
@@ -73,4 +84,7 @@
 // NO-CHECK: call void @__kmpc_for_static_fini
 // CHECK: ret void
 
+// SUPPORTEDISA: define internal void @.omp_outlined..6(
+// SUPPORTEDISA: @_Z3barv
+// SUPPORTEDISA: ret void
 #endif
Index: clang/test/OpenMP/metadirective_implementation_codegen.c
===
--- clang/test/OpenMP/metadirective_implementation_codegen.c
+++ clang/test/OpenMP/metadirective_implementation_codegen.c
@@ -1,6 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -x c -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s
 // RUN: %clang_cc1 -verify -fopenmp -x c -triple aarch64-unknown-linux -emit-llvm %s -o - | FileCheck %s
 // RUN: %clang_cc1 -verify -fopenmp -x c -triple ppc64le-unknown-linux -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -x c -triple x86_64-unknown-linux -emit-llvm -target-cpu x86-64 %s -o - | FileCheck %s -check-prefixes=SUPPORTEDISA,UNSUPPORTEDISA
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -35,10 +36,20 @@
: parallel) default(parallel for)
   for (int i = 0; i < 100; i++)
 ;
+#pragma omp metadirective when(device = {isa("sse2")} \
+   : parallel) default(single)
+  bar();
+#pragma omp metadirective when(device = {isa("some-unsupported-feature")} \
+   : parallel) default(single)
+  bar();
 }
 
 // CHECK: void @foo()
 // CHECK-COUNT-6: ...) @__kmpc_fork_call(
+// SUPPORTEDISA: ...) @__kmpc_fork_call(
+// UNSUPPORTEDISA: call i32 @__kmpc_single
+// UNSUPPORTEDISA: @bar
+// UNSUPPORTEDISA: call void @__kmpc_end_single
 // CHECK: ret void
 
 // CHECK: define internal void @.omp_outlined.(
@@ -68,4 +79,7 @@
 // NO-CHECK: call void @__kmpc_for_static_fini
 // CHECK: ret void
 
+// SUPPORTEDISA: define internal void @.omp_outlined..6(
+// SUPPORTEDISA: @bar
+// SUPPORTEDISA: ret void
 #endif
Index: clang/test/OpenMP/amdgcn_target_codegen.cpp
===
--- clang/test/OpenMP/amdgcn_target_codegen.cpp
+++ clang/test/OpenMP/amdgcn_target_codegen.cpp
@@ -2,6 +2,7 @@
 
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o

[PATCH] D116549: [OpenMP][Clang] Allow passing target features in ISA trait for metadirective clause

2022-01-06 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam added a comment.

In D116549#3218281 , @jdoerfert wrote:

> Can you make the test check for the diagnose message? Also, do we have a test 
> to verify an isa trait is properly handled?

I don't see a point in adding a diagnostic message when a feature is not found 
valid for a certain target. Because if it satisfies one of the host or device 
compilation, then it will definitely fail in the other.
I have added new tests to check that isa trait is handled properly in the next 
revision.




Comment at: clang/lib/Parse/ParseOpenMP.cpp:2533
+std::function DiagUnknownTrait = [this, Loc](
+StringRef ISATrait) {};
+TargetOMPContext OMPCtx(ASTContext, std::move(DiagUnknownTrait),

jdoerfert wrote:
> Why doesn't this diagnose nothing?
Because an isa-feature will fail at least once, for either host compilation or 
device compilation. So, no point in always giving a warning.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D116549/new/

https://reviews.llvm.org/D116549

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D116540: [OpenMP] Add nec and nvidia as compiler vendors for OpenMP

2022-01-04 Thread Saiyedul Islam via Phabricator via cfe-commits

This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG49f23afdc345: [OpenMP] Add nec and nvidia as compiler 
vendors for OpenMP (authored by saiislam).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D116540/new/

https://reviews.llvm.org/D116540

Files:
  clang/test/OpenMP/begin_declare_variant_messages.c
  clang/test/OpenMP/declare_variant_messages.c
  clang/test/OpenMP/declare_variant_messages.cpp
  llvm/include/llvm/Frontend/OpenMP/OMPKinds.def


Index: llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
===
--- llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -1130,6 +1130,8 @@
 __OMP_TRAIT_PROPERTY(implementation, vendor, ibm)
 __OMP_TRAIT_PROPERTY(implementation, vendor, intel)
 __OMP_TRAIT_PROPERTY(implementation, vendor, llvm)
+__OMP_TRAIT_PROPERTY(implementation, vendor, nec)
+__OMP_TRAIT_PROPERTY(implementation, vendor, nvidia)
 __OMP_TRAIT_PROPERTY(implementation, vendor, pgi)
 __OMP_TRAIT_PROPERTY(implementation, vendor, ti)
 __OMP_TRAIT_PROPERTY(implementation, vendor, unknown)
Index: clang/test/OpenMP/declare_variant_messages.cpp
===
--- clang/test/OpenMP/declare_variant_messages.cpp
+++ clang/test/OpenMP/declare_variant_messages.cpp
@@ -31,11 +31,11 @@
 #pragma omp declare variant(foofoo ) match(implementation = {vvv}) 
implementation // expected-warning {{'vvv' is not a valid context selector for 
the context set 'implementation'; selector ignored}} expected-note {{context 
selector options are: 'vendor' 'extension' 'unified_address' 
'unified_shared_memory' 'reverse_offload' 'dynamic_allocators' 
'atomic_default_mem_order'}} expected-note {{the ignored selector spans until 
here}} expected-error {{expected 'match' clause on 'omp declare variant' 
directive}}
 #pragma omp declare variant(foofoo ) match(implementation={xxx}) // 
expected-warning {{'xxx' is not a valid context selector for the context set 
'implementation'; selector ignored}} expected-note {{context selector options 
are: 'vendor' 'extension' 'unified_address' 'unified_shared_memory' 
'reverse_offload' 'dynamic_allocators' 'atomic_default_mem_order'}} 
expected-note {{the ignored selector spans until here}}
 #pragma omp declare variant(foofoo ) match(implementation={vendor}) // 
expected-warning {{the context selector 'vendor' in context set 
'implementation' requires a context property defined in parentheses; selector 
ignored}} expected-note {{the ignored selector spans until here}}
-#pragma omp declare variant(foofoo ) match(implementation={vendor(}) // 
expected-error {{expected ')'}} expected-warning {{expected identifier or 
string literal describing a context property; property skipped}} expected-note 
{{context property options are: 'amd' 'arm' 'bsc' 'cray' 'fujitsu' 'gnu' 'ibm' 
'intel' 'llvm' 'pgi' 'ti' 'unknown'}} expected-note {{to match this '('}}
-#pragma omp declare variant(foofoo ) match(implementation={vendor()}) // 
expected-warning {{expected identifier or string literal describing a context 
property; property skipped}} expected-note {{context property options are: 
'amd' 'arm' 'bsc' 'cray' 'fujitsu' 'gnu' 'ibm' 'intel' 'llvm' 'pgi' 'ti' 
'unknown'}}
+#pragma omp declare variant(foofoo ) match(implementation={vendor(}) // 
expected-error {{expected ')'}} expected-warning {{expected identifier or 
string literal describing a context property; property skipped}} expected-note 
{{context property options are: 'amd' 'arm' 'bsc' 'cray' 'fujitsu' 'gnu' 'ibm' 
'intel' 'llvm' 'nec' 'nvidia' 'pgi' 'ti' 'unknown'}} expected-note {{to match 
this '('}}
+#pragma omp declare variant(foofoo ) match(implementation={vendor()}) // 
expected-warning {{expected identifier or string literal describing a context 
property; property skipped}} expected-note {{context property options are: 
'amd' 'arm' 'bsc' 'cray' 'fujitsu' 'gnu' 'ibm' 'intel' 'llvm' 'nec' 'nvidia' 
'pgi' 'ti' 'unknown'}}
 #pragma omp declare variant(foofoo ) match(implementation={vendor(score 
ibm)}) // expected-error {{expected '(' after 'score'}} expected-warning 
{{expected '':'' after the score expression; '':'' assumed}}
-#pragma omp declare variant(foofoo ) match(implementation={vendor(score( 
ibm)}) // expected-error {{use of undeclared identifier 'ibm'}} expected-error 
{{expected ')'}} expected-warning {{expected '':'' after the score expression; 
'':'' assumed}} expected-warning {{expected identifier or string literal 
describing a context property; property skipped}} expected-note {{context 
property options are: 'amd' 'arm' 'bsc' 'cray' 'fujitsu' 'gnu' 'ibm' 'intel' 
'llvm' 'pgi' 'ti' 'unknown'}} expected-note {{to match this '('}}
-#pragma omp declare variant(foofoo ) match(implementation={vendor(score(2 
ibm)}) // expected-error

[PATCH] D111545: [Clang][NFC] Fix multiline comment prefixes in function headers

2022-01-04 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam added a comment.

In D111545#3218961 , @MyDeveloperDay 
wrote:

> Nit: That is more correct, but actually, you normally make the comment match 
> the param name (I think there might even be a clang-tidy check for that?)
>
> i.e.
>
>   /*isBitCodeSDL=*/

Thanks. I fixed it while committing.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D111545/new/

https://reviews.llvm.org/D111545

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D111545: [Clang][NFC] Fix multiline comment prefixes in function headers

2022-01-04 Thread Saiyedul Islam via Phabricator via cfe-commits

This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG32357266fd05: [Clang][NFC] Fix multiline comment prefixes in 
function headers (authored by saiislam).

Changed prior to commit:
  https://reviews.llvm.org/D111545?vs=397073=397257#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D111545/new/

https://reviews.llvm.org/D111545

Files:
  clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
  clang/lib/Driver/ToolChains/Cuda.cpp


Index: clang/lib/Driver/ToolChains/Cuda.cpp
===
--- clang/lib/Driver/ToolChains/Cuda.cpp
+++ clang/lib/Driver/ToolChains/Cuda.cpp
@@ -612,8 +612,9 @@
 CmdArgs.push_back(CubinF);
   }
 
-  AddStaticDeviceLibsLinking(C, *this, JA, Inputs, Args, CmdArgs, "nvptx", 
GPUArch,
-  false, false);
+  AddStaticDeviceLibsLinking(C, *this, JA, Inputs, Args, CmdArgs, "nvptx",
+ GPUArch, /*isBitCodeSDL=*/false,
+ /*postClangLink=*/false);
 
   // Find nvlink and pass it as "--nvlink-path=" argument of
   // clang-nvlink-wrapper.
@@ -752,8 +753,9 @@
 
 addOpenMPDeviceRTL(getDriver(), DriverArgs, CC1Args, BitcodeSuffix,
getTriple());
-AddStaticDeviceLibsPostLinking(getDriver(), DriverArgs, CC1Args, "nvptx", 
GpuArch,
-/* bitcode SDL?*/ true, /* PostClang Link? */ true);
+AddStaticDeviceLibsPostLinking(getDriver(), DriverArgs, CC1Args, "nvptx",
+   GpuArch, /*isBitCodeSDL=*/true,
+   /*postClangLink=*/true);
   }
 }
 
Index: clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
===
--- clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -131,9 +131,8 @@
   }
 
   AddStaticDeviceLibsLinking(C, *this, JA, Inputs, Args, CmdArgs, "amdgcn",
-  SubArchName,
-  /* bitcode SDL?*/ true,
-  /* PostClang Link? */ false);
+ SubArchName, /*isBitCodeSDL=*/true,
+ /*postClangLink=*/false);
   // Add an intermediate output file.
   CmdArgs.push_back("-o");
   const char *OutputFileName =


Index: clang/lib/Driver/ToolChains/Cuda.cpp
===
--- clang/lib/Driver/ToolChains/Cuda.cpp
+++ clang/lib/Driver/ToolChains/Cuda.cpp
@@ -612,8 +612,9 @@
 CmdArgs.push_back(CubinF);
   }
 
-  AddStaticDeviceLibsLinking(C, *this, JA, Inputs, Args, CmdArgs, "nvptx", GPUArch,
-  false, false);
+  AddStaticDeviceLibsLinking(C, *this, JA, Inputs, Args, CmdArgs, "nvptx",
+ GPUArch, /*isBitCodeSDL=*/false,
+ /*postClangLink=*/false);
 
   // Find nvlink and pass it as "--nvlink-path=" argument of
   // clang-nvlink-wrapper.
@@ -752,8 +753,9 @@
 
 addOpenMPDeviceRTL(getDriver(), DriverArgs, CC1Args, BitcodeSuffix,
getTriple());
-AddStaticDeviceLibsPostLinking(getDriver(), DriverArgs, CC1Args, "nvptx", GpuArch,
-/* bitcode SDL?*/ true, /* PostClang Link? */ true);
+AddStaticDeviceLibsPostLinking(getDriver(), DriverArgs, CC1Args, "nvptx",
+   GpuArch, /*isBitCodeSDL=*/true,
+   /*postClangLink=*/true);
   }
 }
 
Index: clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
===
--- clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -131,9 +131,8 @@
   }
 
   AddStaticDeviceLibsLinking(C, *this, JA, Inputs, Args, CmdArgs, "amdgcn",
-  SubArchName,
-  /* bitcode SDL?*/ true,
-  /* PostClang Link? */ false);
+ SubArchName, /*isBitCodeSDL=*/true,
+ /*postClangLink=*/false);
   // Add an intermediate output file.
   CmdArgs.push_back("-o");
   const char *OutputFileName =
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D116540: [OpenMP] Add nec and nvidia as compiler vendors for OpenMP

2022-01-03 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam added a comment.

In D116540#3217689 , @jdoerfert wrote:

> In D116540#3217684 , @saiislam 
> wrote:
>
>> 1. I don't know why "pgi" is present here as a vendor. Should it be renamed 
>> as nvidia instead? @Meinersbur @tra
>
> It was in the spec list and I would not remove it now.

Specs 5.0 and 5.1 linked above doesn't contain PGI as a vendor. But, we can 
leave it as is.

>> 1. Should "cray" be renamed as "hpe" here? @sandoval
>
> No, both are listed in the spec.

Ok.

>> 1. Should this vendor list be reordered as per Spec 5.1 
>> [https://www.openmp.org/wp-content/uploads/OpenMP-API-Additional-Definitions-2-0.pdf]
>>  ?
>
> It was ordered as per that spec, which is just alphabetical. Can you add 
> `nec` as well and keep the others please.

Spec 5.1 has moved "unknown" from last position to the first. Some reordering 
also due to "hpe or cray" thing as well.

> LG, see above.

Done.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D116540/new/

https://reviews.llvm.org/D116540

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D116540: [OpenMP] Add nvidia as a compiler vendor for OpenMP

2022-01-03 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam updated this revision to Diff 397108.
saiislam added a comment.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

1. Added nec as compiler vendor.
2. Fixed clang test cases.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D116540/new/

https://reviews.llvm.org/D116540

Files:
  clang/test/OpenMP/begin_declare_variant_messages.c
  clang/test/OpenMP/declare_variant_messages.c
  clang/test/OpenMP/declare_variant_messages.cpp
  llvm/include/llvm/Frontend/OpenMP/OMPKinds.def


Index: llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
===
--- llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -1130,6 +1130,8 @@
 __OMP_TRAIT_PROPERTY(implementation, vendor, ibm)
 __OMP_TRAIT_PROPERTY(implementation, vendor, intel)
 __OMP_TRAIT_PROPERTY(implementation, vendor, llvm)
+__OMP_TRAIT_PROPERTY(implementation, vendor, nec)
+__OMP_TRAIT_PROPERTY(implementation, vendor, nvidia)
 __OMP_TRAIT_PROPERTY(implementation, vendor, pgi)
 __OMP_TRAIT_PROPERTY(implementation, vendor, ti)
 __OMP_TRAIT_PROPERTY(implementation, vendor, unknown)
Index: clang/test/OpenMP/declare_variant_messages.cpp
===
--- clang/test/OpenMP/declare_variant_messages.cpp
+++ clang/test/OpenMP/declare_variant_messages.cpp
@@ -31,11 +31,11 @@
 #pragma omp declare variant(foofoo ) match(implementation = {vvv}) 
implementation // expected-warning {{'vvv' is not a valid context selector for 
the context set 'implementation'; selector ignored}} expected-note {{context 
selector options are: 'vendor' 'extension' 'unified_address' 
'unified_shared_memory' 'reverse_offload' 'dynamic_allocators' 
'atomic_default_mem_order'}} expected-note {{the ignored selector spans until 
here}} expected-error {{expected 'match' clause on 'omp declare variant' 
directive}}
 #pragma omp declare variant(foofoo ) match(implementation={xxx}) // 
expected-warning {{'xxx' is not a valid context selector for the context set 
'implementation'; selector ignored}} expected-note {{context selector options 
are: 'vendor' 'extension' 'unified_address' 'unified_shared_memory' 
'reverse_offload' 'dynamic_allocators' 'atomic_default_mem_order'}} 
expected-note {{the ignored selector spans until here}}
 #pragma omp declare variant(foofoo ) match(implementation={vendor}) // 
expected-warning {{the context selector 'vendor' in context set 
'implementation' requires a context property defined in parentheses; selector 
ignored}} expected-note {{the ignored selector spans until here}}
-#pragma omp declare variant(foofoo ) match(implementation={vendor(}) // 
expected-error {{expected ')'}} expected-warning {{expected identifier or 
string literal describing a context property; property skipped}} expected-note 
{{context property options are: 'amd' 'arm' 'bsc' 'cray' 'fujitsu' 'gnu' 'ibm' 
'intel' 'llvm' 'pgi' 'ti' 'unknown'}} expected-note {{to match this '('}}
-#pragma omp declare variant(foofoo ) match(implementation={vendor()}) // 
expected-warning {{expected identifier or string literal describing a context 
property; property skipped}} expected-note {{context property options are: 
'amd' 'arm' 'bsc' 'cray' 'fujitsu' 'gnu' 'ibm' 'intel' 'llvm' 'pgi' 'ti' 
'unknown'}}
+#pragma omp declare variant(foofoo ) match(implementation={vendor(}) // 
expected-error {{expected ')'}} expected-warning {{expected identifier or 
string literal describing a context property; property skipped}} expected-note 
{{context property options are: 'amd' 'arm' 'bsc' 'cray' 'fujitsu' 'gnu' 'ibm' 
'intel' 'llvm' 'nec' 'nvidia' 'pgi' 'ti' 'unknown'}} expected-note {{to match 
this '('}}
+#pragma omp declare variant(foofoo ) match(implementation={vendor()}) // 
expected-warning {{expected identifier or string literal describing a context 
property; property skipped}} expected-note {{context property options are: 
'amd' 'arm' 'bsc' 'cray' 'fujitsu' 'gnu' 'ibm' 'intel' 'llvm' 'nec' 'nvidia' 
'pgi' 'ti' 'unknown'}}
 #pragma omp declare variant(foofoo ) match(implementation={vendor(score 
ibm)}) // expected-error {{expected '(' after 'score'}} expected-warning 
{{expected '':'' after the score expression; '':'' assumed}}
-#pragma omp declare variant(foofoo ) match(implementation={vendor(score( 
ibm)}) // expected-error {{use of undeclared identifier 'ibm'}} expected-error 
{{expected ')'}} expected-warning {{expected '':'' after the score expression; 
'':'' assumed}} expected-warning {{expected identifier or string literal 
describing a context property; property skipped}} expected-note {{context 
property options are: 'amd' 'arm' 'bsc' 'cray' 'fujitsu' 'gnu' 'ibm' 'intel' 
'llvm' 'pgi' 'ti' 'unknown'}} expected-note {{to match this '('}}
-#pragma omp declare variant(foofoo ) match(implementation={vendor(score(2 
ibm)}) // expected-error {{expected ')'}} expected-error {{expected ')'}}

[PATCH] D116549: [OpenMP][Clang] Allow passing target features in ISA trait for metadirective clause

2022-01-03 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam created this revision.
saiislam added reviewers: jdoerfert, alokmishra.besu, cchen, JonChesterfield.
Herald added subscribers: guansong, yaxunl.
saiislam requested review of this revision.
Herald added subscribers: cfe-commits, sstefan1.
Herald added a project: clang.

Passing any feature in the device-isa trait which is not supported by the host
was causing a compilation failure.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D116549

Files:
  clang/lib/Parse/ParseOpenMP.cpp
  clang/test/OpenMP/metadirective_implementation_codegen.c
  clang/test/OpenMP/metadirective_implementation_codegen.cpp


Index: clang/test/OpenMP/metadirective_implementation_codegen.cpp
===
--- clang/test/OpenMP/metadirective_implementation_codegen.cpp
+++ clang/test/OpenMP/metadirective_implementation_codegen.cpp
@@ -35,6 +35,9 @@
: parallel) default(parallel for)
   for (int i = 0; i < 100; i++)
 ;
+#pragma omp metadirective when(device = {isa("flat-address-space")}
   \
+   : parallel) default(target teams)
+  bar();
 }
 
 // CHECK-LABEL: void @_Z3foov()
@@ -44,6 +47,7 @@
 // CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, 
...)* bitcast (void (i32*, i32*)* [[OUTLINED_5:@.+]] to void
 // CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, 
...)* bitcast (void (i32*, i32*)* [[OUTLINED_6:@.+]] to void
 // CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, 
...)* bitcast (void (i32*, i32*)* [[OUTLINED_7:@.+]] to void
+// CHECK: call void @__omp_offloading_{{.*}}()
 // CHECK: ret void
 
 // CHECK: define internal void [[OUTLINED_2]](
@@ -73,4 +77,11 @@
 // NO-CHECK: call void @__kmpc_for_static_fini
 // CHECK: ret void
 
+// CHECK: define internal void @__omp_offloading_
+// CHECK: ...) @__kmpc_fork_teams({{.*}}@.omp_outlined..6
+// CHECK: ret void
+
+// CHECK: define internal void @.omp_outlined..6(
+// CHECK: @_Z3barv
+// CHECK: ret void
 #endif
Index: clang/test/OpenMP/metadirective_implementation_codegen.c
===
--- clang/test/OpenMP/metadirective_implementation_codegen.c
+++ clang/test/OpenMP/metadirective_implementation_codegen.c
@@ -35,10 +35,14 @@
: parallel) default(parallel for)
   for (int i = 0; i < 100; i++)
 ;
+#pragma omp metadirective when(device = {isa("flat-address-space")}
   \
+   : parallel) default(target teams)
+  bar();
 }
 
 // CHECK: void @foo()
 // CHECK-COUNT-6: ...) @__kmpc_fork_call(
+// CHECK: call void @__omp_offloading_{{.*}}()
 // CHECK: ret void
 
 // CHECK: define internal void @.omp_outlined.(
@@ -68,4 +72,11 @@
 // NO-CHECK: call void @__kmpc_for_static_fini
 // CHECK: ret void
 
+// CHECK: define internal void @__omp_offloading_
+// CHECK: ...) @__kmpc_fork_teams({{.*}}@.omp_outlined..6
+// CHECK: ret void
+
+// CHECK: define internal void @.omp_outlined..6(
+// CHECK: @bar
+// CHECK: ret void
 #endif
Index: clang/lib/Parse/ParseOpenMP.cpp
===
--- clang/lib/Parse/ParseOpenMP.cpp
+++ clang/lib/Parse/ParseOpenMP.cpp
@@ -2529,7 +2529,9 @@
 TPA.Revert();
 // End of the first iteration. Parser is reset to the start of 
metadirective
 
-TargetOMPContext OMPCtx(ASTContext, /* DiagUnknownTrait */ nullptr,
+std::function DiagUnknownTrait = [this, Loc](
+StringRef ISATrait) {};
+TargetOMPContext OMPCtx(ASTContext, std::move(DiagUnknownTrait),
 /* CurrentFunctionDecl */ nullptr,
 ArrayRef());
 


Index: clang/test/OpenMP/metadirective_implementation_codegen.cpp
===
--- clang/test/OpenMP/metadirective_implementation_codegen.cpp
+++ clang/test/OpenMP/metadirective_implementation_codegen.cpp
@@ -35,6 +35,9 @@
: parallel) default(parallel for)
   for (int i = 0; i < 100; i++)
 ;
+#pragma omp metadirective when(device = {isa("flat-address-space")}   \
+   : parallel) default(target teams)
+  bar();
 }
 
 // CHECK-LABEL: void @_Z3foov()
@@ -44,6 +47,7 @@
 // CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_5:@.+]] to void
 // CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_6:@.+]] to void
 // CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_7:@.+]] to void
+// CHECK: call void @__omp_offloading_{{.*}}()
 // CHECK: ret void
 
 // CHECK: define internal void [[OUTLINED_2]](
@@ -73,4 +77,11 @@
 //

[PATCH] D111545: [Clang][NFC] Fix multiline comment prefixes in function headers

2022-01-03 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam updated this revision to Diff 397073.
saiislam added a comment.

Updated comments as per suggestion. Apologies for the delay.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D111545/new/

https://reviews.llvm.org/D111545

Files:
  clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
  clang/lib/Driver/ToolChains/Cuda.cpp


Index: clang/lib/Driver/ToolChains/Cuda.cpp
===
--- clang/lib/Driver/ToolChains/Cuda.cpp
+++ clang/lib/Driver/ToolChains/Cuda.cpp
@@ -752,8 +752,9 @@
 
 addOpenMPDeviceRTL(getDriver(), DriverArgs, CC1Args, BitcodeSuffix,
getTriple());
-AddStaticDeviceLibsPostLinking(getDriver(), DriverArgs, CC1Args, "nvptx", 
GpuArch,
-/* bitcode SDL?*/ true, /* PostClang Link? */ true);
+AddStaticDeviceLibsPostLinking(getDriver(), DriverArgs, CC1Args, "nvptx",
+   GpuArch, /*bitcode SDL=*/true,
+   /*PostClang Link=*/true);
   }
 }
 
Index: clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
===
--- clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -131,9 +131,8 @@
   }
 
   AddStaticDeviceLibsLinking(C, *this, JA, Inputs, Args, CmdArgs, "amdgcn",
-  SubArchName,
-  /* bitcode SDL?*/ true,
-  /* PostClang Link? */ false);
+ SubArchName, /* bitcode SDL=*/true,
+ /* PostClang Link=*/false);
   // Add an intermediate output file.
   CmdArgs.push_back("-o");
   const char *OutputFileName =


Index: clang/lib/Driver/ToolChains/Cuda.cpp
===
--- clang/lib/Driver/ToolChains/Cuda.cpp
+++ clang/lib/Driver/ToolChains/Cuda.cpp
@@ -752,8 +752,9 @@
 
 addOpenMPDeviceRTL(getDriver(), DriverArgs, CC1Args, BitcodeSuffix,
getTriple());
-AddStaticDeviceLibsPostLinking(getDriver(), DriverArgs, CC1Args, "nvptx", GpuArch,
-/* bitcode SDL?*/ true, /* PostClang Link? */ true);
+AddStaticDeviceLibsPostLinking(getDriver(), DriverArgs, CC1Args, "nvptx",
+   GpuArch, /*bitcode SDL=*/true,
+   /*PostClang Link=*/true);
   }
 }
 
Index: clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
===
--- clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -131,9 +131,8 @@
   }
 
   AddStaticDeviceLibsLinking(C, *this, JA, Inputs, Args, CmdArgs, "amdgcn",
-  SubArchName,
-  /* bitcode SDL?*/ true,
-  /* PostClang Link? */ false);
+ SubArchName, /* bitcode SDL=*/true,
+ /* PostClang Link=*/false);
   // Add an intermediate output file.
   CmdArgs.push_back("-o");
   const char *OutputFileName =
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D111488: [Clang][clang-nvlink-wrapper] Pass nvlink path to the wrapper

2021-10-12 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam added inline comments.



Comment at: clang/lib/Driver/ToolChains/Cuda.cpp:617
+  // Find nvlink and pass it as "--nvlink-command=" argument of 
clang-nvlink-wrapper.
+  auto NvlinkBin = getToolChain().GetProgramPath("nvlink");
+  const char *NvlinkPath =

Meinersbur wrote:
> [style] [[ 
> https://llvm.org/docs/CodingStandards.html#use-auto-type-deduction-to-make-code-more-readable
>  | LLVM's coding standard does not use almost-always-auto ]].
> 
> It's not immediately obvious here, does `GetProgramPath` look into the 
> BinPath detected by CudaInstallationDetector? I applied the patch locally to 
> http://meinersbur.de:8011/#/builders/1 and it actually does work.
Yes, you are right. `CudaToolChain` constructor initializes binary paths 
obtained from `CudaInstallationDetector` which is used by this call.



Comment at: clang/tools/clang-nvlink-wrapper/ClangNvlinkWrapper.cpp:50
+
+static cl::opt NvlinkUserPath("nvlink-command",
+   cl::desc("path of nvlink binary"),

tra wrote:
> Nit. Clang already has `--ptxas-path=` option. It may be worth using `-path` 
> suffix here for consistency, too.
changed it to --nvlink-path


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D111488/new/

https://reviews.llvm.org/D111488

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D111488: [Clang][clang-nvlink-wrapper] Pass nvlink path to the wrapper

2021-10-12 Thread Saiyedul Islam via Phabricator via cfe-commits

This revision was automatically updated to reflect the committed changes.
Closed by commit rGf56548829c4c: [Clang][clang-nvlink-wrapper] Pass nvlink path 
to the wrapper (authored by saiislam).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D111488/new/

https://reviews.llvm.org/D111488

Files:
  clang/lib/Driver/ToolChains/Cuda.cpp
  clang/tools/clang-nvlink-wrapper/ClangNvlinkWrapper.cpp

Index: clang/tools/clang-nvlink-wrapper/ClangNvlinkWrapper.cpp
===
--- clang/tools/clang-nvlink-wrapper/ClangNvlinkWrapper.cpp
+++ clang/tools/clang-nvlink-wrapper/ClangNvlinkWrapper.cpp
@@ -25,6 +25,7 @@
 /// 2. nvlink -o a.out-openmp-nvptx64 /tmp/a.cubin /tmp/b.cubin
 //===-===//
 
+#include "clang/Basic/Version.h"
 #include "llvm/Object/Archive.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Errc.h"
@@ -41,6 +42,19 @@
 
 static cl::opt Help("h", cl::desc("Alias for -help"), cl::Hidden);
 
+// Mark all our options with this category, everything else (except for -help)
+// will be hidden.
+static cl::OptionCategory
+ClangNvlinkWrapperCategory("clang-nvlink-wrapper options");
+
+static cl::opt NvlinkUserPath("nvlink-path",
+   cl::desc("Path of nvlink binary"),
+   cl::cat(ClangNvlinkWrapperCategory));
+
+// Do not parse nvlink options
+static cl::list
+NVArgs(cl::Sink, cl::desc("..."));
+
 static Error runNVLink(std::string NVLinkPath,
SmallVectorImpl ) {
   std::vector NVLArgs;
@@ -119,8 +133,20 @@
   return Error::success();
 }
 
+static void PrintVersion(raw_ostream ) {
+  OS << clang::getClangToolFullVersion("clang-nvlink-wrapper") << '\n';
+}
+
 int main(int argc, const char **argv) {
   sys::PrintStackTraceOnErrorSignal(argv[0]);
+  cl::SetVersionPrinter(PrintVersion);
+  cl::HideUnrelatedOptions(ClangNvlinkWrapperCategory);
+  cl::ParseCommandLineOptions(
+  argc, argv,
+  "A wrapper tool over nvlink program. It transparently passes every \n"
+  "input option and objects to nvlink except archive files and path of \n"
+  "nvlink binary. It reads each input archive file to extract archived \n"
+  "cubin files as temporary files.\n");
 
   if (Help) {
 cl::PrintHelpMessage();
@@ -132,12 +158,7 @@
 exit(1);
   };
 
-  ErrorOr NvlinkPath = sys::findProgramByName("nvlink");
-  if (!NvlinkPath) {
-reportError(createStringError(NvlinkPath.getError(),
-  "unable to find 'nvlink' in path"));
-  }
-
+  std::string NvlinkPath;
   SmallVector Argv(argv, argv + argc);
   SmallVector ArgvSubst;
   SmallVector TmpFiles;
@@ -145,8 +166,7 @@
   StringSaver Saver(Alloc);
   cl::ExpandResponseFiles(Saver, cl::TokenizeGNUCommandLine, Argv);
 
-  for (size_t i = 1; i < Argv.size(); ++i) {
-std::string Arg = Argv[i];
+  for (const std::string  : NVArgs) {
 if (sys::path::extension(Arg) == ".a") {
   if (Error Err = extractArchiveFiles(Arg, ArgvSubst, TmpFiles))
 reportError(std::move(Err));
@@ -155,7 +175,19 @@
 }
   }
 
-  if (Error Err = runNVLink(NvlinkPath.get(), ArgvSubst))
+  NvlinkPath = NvlinkUserPath;
+
+  // If user hasn't specified nvlink binary then search it in PATH
+  if (NvlinkPath.empty()) {
+ErrorOr NvlinkPathErr = sys::findProgramByName("nvlink");
+if (!NvlinkPathErr) {
+  reportError(createStringError(NvlinkPathErr.getError(),
+"unable to find 'nvlink' in path"));
+}
+NvlinkPath = NvlinkPathErr.get();
+  }
+
+  if (Error Err = runNVLink(NvlinkPath, ArgvSubst))
 reportError(std::move(Err));
   if (Error Err = cleanupTmpFiles(TmpFiles))
 reportError(std::move(Err));
Index: clang/lib/Driver/ToolChains/Cuda.cpp
===
--- clang/lib/Driver/ToolChains/Cuda.cpp
+++ clang/lib/Driver/ToolChains/Cuda.cpp
@@ -613,6 +613,11 @@
   AddStaticDeviceLibsLinking(C, *this, JA, Inputs, Args, CmdArgs, "nvptx", GPUArch,
   false, false);
 
+  // Find nvlink and pass it as "--nvlink-path=" argument of
+  // clang-nvlink-wrapper.
+  CmdArgs.push_back(Args.MakeArgString(
+  Twine("--nvlink-path=" + getToolChain().GetProgramPath("nvlink";
+
   const char *Exec =
   Args.MakeArgString(getToolChain().GetProgramPath("clang-nvlink-wrapper"));
   C.addCommand(std::make_unique(
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D111488: [Clang][clang-nvlink-wrapper] Pass nvlink path to the wrapper

2021-10-12 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam updated this revision to Diff 378979.
saiislam added a comment.

clang-format(ed).


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D111488/new/

https://reviews.llvm.org/D111488

Files:
  clang/lib/Driver/ToolChains/Cuda.cpp
  clang/tools/clang-nvlink-wrapper/ClangNvlinkWrapper.cpp

Index: clang/tools/clang-nvlink-wrapper/ClangNvlinkWrapper.cpp
===
--- clang/tools/clang-nvlink-wrapper/ClangNvlinkWrapper.cpp
+++ clang/tools/clang-nvlink-wrapper/ClangNvlinkWrapper.cpp
@@ -25,6 +25,7 @@
 /// 2. nvlink -o a.out-openmp-nvptx64 /tmp/a.cubin /tmp/b.cubin
 //===-===//
 
+#include "clang/Basic/Version.h"
 #include "llvm/Object/Archive.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Errc.h"
@@ -41,6 +42,19 @@
 
 static cl::opt Help("h", cl::desc("Alias for -help"), cl::Hidden);
 
+// Mark all our options with this category, everything else (except for -help)
+// will be hidden.
+static cl::OptionCategory
+ClangNvlinkWrapperCategory("clang-nvlink-wrapper options");
+
+static cl::opt NvlinkUserPath("nvlink-path",
+   cl::desc("Path of nvlink binary"),
+   cl::cat(ClangNvlinkWrapperCategory));
+
+// Do not parse nvlink options
+static cl::list
+NVArgs(cl::Sink, cl::desc("..."));
+
 static Error runNVLink(std::string NVLinkPath,
SmallVectorImpl ) {
   std::vector NVLArgs;
@@ -119,8 +133,20 @@
   return Error::success();
 }
 
+static void PrintVersion(raw_ostream ) {
+  OS << clang::getClangToolFullVersion("clang-nvlink-wrapper") << '\n';
+}
+
 int main(int argc, const char **argv) {
   sys::PrintStackTraceOnErrorSignal(argv[0]);
+  cl::SetVersionPrinter(PrintVersion);
+  cl::HideUnrelatedOptions(ClangNvlinkWrapperCategory);
+  cl::ParseCommandLineOptions(
+  argc, argv,
+  "A wrapper tool over nvlink program. It transparently passes every \n"
+  "input option and objects to nvlink except archive files and path of \n"
+  "nvlink binary. It reads each input archive file to extract archived \n"
+  "cubin files as temporary files.\n");
 
   if (Help) {
 cl::PrintHelpMessage();
@@ -132,12 +158,7 @@
 exit(1);
   };
 
-  ErrorOr NvlinkPath = sys::findProgramByName("nvlink");
-  if (!NvlinkPath) {
-reportError(createStringError(NvlinkPath.getError(),
-  "unable to find 'nvlink' in path"));
-  }
-
+  std::string NvlinkPath;
   SmallVector Argv(argv, argv + argc);
   SmallVector ArgvSubst;
   SmallVector TmpFiles;
@@ -145,8 +166,7 @@
   StringSaver Saver(Alloc);
   cl::ExpandResponseFiles(Saver, cl::TokenizeGNUCommandLine, Argv);
 
-  for (size_t i = 1; i < Argv.size(); ++i) {
-std::string Arg = Argv[i];
+  for (const std::string  : NVArgs) {
 if (sys::path::extension(Arg) == ".a") {
   if (Error Err = extractArchiveFiles(Arg, ArgvSubst, TmpFiles))
 reportError(std::move(Err));
@@ -155,7 +175,19 @@
 }
   }
 
-  if (Error Err = runNVLink(NvlinkPath.get(), ArgvSubst))
+  NvlinkPath = NvlinkUserPath;
+
+  // If user hasn't specified nvlink binary then search it in PATH
+  if (NvlinkPath.empty()) {
+ErrorOr NvlinkPathErr = sys::findProgramByName("nvlink");
+if (!NvlinkPathErr) {
+  reportError(createStringError(NvlinkPathErr.getError(),
+"unable to find 'nvlink' in path"));
+}
+NvlinkPath = NvlinkPathErr.get();
+  }
+
+  if (Error Err = runNVLink(NvlinkPath, ArgvSubst))
 reportError(std::move(Err));
   if (Error Err = cleanupTmpFiles(TmpFiles))
 reportError(std::move(Err));
Index: clang/lib/Driver/ToolChains/Cuda.cpp
===
--- clang/lib/Driver/ToolChains/Cuda.cpp
+++ clang/lib/Driver/ToolChains/Cuda.cpp
@@ -613,6 +613,11 @@
   AddStaticDeviceLibsLinking(C, *this, JA, Inputs, Args, CmdArgs, "nvptx", GPUArch,
   false, false);
 
+  // Find nvlink and pass it as "--nvlink-path=" argument of
+  // clang-nvlink-wrapper.
+  CmdArgs.push_back(Args.MakeArgString(
+  Twine("--nvlink-path=" + getToolChain().GetProgramPath("nvlink";
+
   const char *Exec =
   Args.MakeArgString(getToolChain().GetProgramPath("clang-nvlink-wrapper"));
   C.addCommand(std::make_unique(
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D111488: [Clang][clang-nvlink-wrapper] Pass nvlink path to the wrapper

2021-10-12 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam updated this revision to Diff 378978.
saiislam marked 5 inline comments as done.
saiislam added a comment.

Thanks, Michael and Artem!

1. Changed nvlink-command to nvlink-path.
2. Answered queries and done refactoring as suggested.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D111488/new/

https://reviews.llvm.org/D111488

Files:
  clang/lib/Driver/ToolChains/Cuda.cpp
  clang/tools/clang-nvlink-wrapper/ClangNvlinkWrapper.cpp

Index: clang/tools/clang-nvlink-wrapper/ClangNvlinkWrapper.cpp
===
--- clang/tools/clang-nvlink-wrapper/ClangNvlinkWrapper.cpp
+++ clang/tools/clang-nvlink-wrapper/ClangNvlinkWrapper.cpp
@@ -25,6 +25,7 @@
 /// 2. nvlink -o a.out-openmp-nvptx64 /tmp/a.cubin /tmp/b.cubin
 //===-===//
 
+#include "clang/Basic/Version.h"
 #include "llvm/Object/Archive.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Errc.h"
@@ -41,6 +42,19 @@
 
 static cl::opt Help("h", cl::desc("Alias for -help"), cl::Hidden);
 
+// Mark all our options with this category, everything else (except for -help)
+// will be hidden.
+static cl::OptionCategory
+ClangNvlinkWrapperCategory("clang-nvlink-wrapper options");
+
+static cl::opt NvlinkUserPath("nvlink-path",
+   cl::desc("Path of nvlink binary"),
+   cl::cat(ClangNvlinkWrapperCategory));
+
+// Do not parse nvlink options
+static cl::list
+NVArgs(cl::Sink, cl::desc("..."));
+
 static Error runNVLink(std::string NVLinkPath,
SmallVectorImpl ) {
   std::vector NVLArgs;
@@ -119,8 +133,20 @@
   return Error::success();
 }
 
+static void PrintVersion(raw_ostream ) {
+  OS << clang::getClangToolFullVersion("clang-nvlink-wrapper") << '\n';
+}
+
 int main(int argc, const char **argv) {
   sys::PrintStackTraceOnErrorSignal(argv[0]);
+  cl::SetVersionPrinter(PrintVersion);
+  cl::HideUnrelatedOptions(ClangNvlinkWrapperCategory);
+  cl::ParseCommandLineOptions(
+  argc, argv,
+  "A wrapper tool over nvlink program. It transparently passes every \n"
+  "input option and objects to nvlink except archive files and path of \n"
+  "nvlink binary. It reads each input archive file to extract archived \n"
+  "cubin files as temporary files.\n");
 
   if (Help) {
 cl::PrintHelpMessage();
@@ -132,12 +158,7 @@
 exit(1);
   };
 
-  ErrorOr NvlinkPath = sys::findProgramByName("nvlink");
-  if (!NvlinkPath) {
-reportError(createStringError(NvlinkPath.getError(),
-  "unable to find 'nvlink' in path"));
-  }
-
+  std::string NvlinkPath;
   SmallVector Argv(argv, argv + argc);
   SmallVector ArgvSubst;
   SmallVector TmpFiles;
@@ -145,8 +166,7 @@
   StringSaver Saver(Alloc);
   cl::ExpandResponseFiles(Saver, cl::TokenizeGNUCommandLine, Argv);
 
-  for (size_t i = 1; i < Argv.size(); ++i) {
-std::string Arg = Argv[i];
+  for (const std::string  : NVArgs) {
 if (sys::path::extension(Arg) == ".a") {
   if (Error Err = extractArchiveFiles(Arg, ArgvSubst, TmpFiles))
 reportError(std::move(Err));
@@ -155,7 +175,19 @@
 }
   }
 
-  if (Error Err = runNVLink(NvlinkPath.get(), ArgvSubst))
+  NvlinkPath = NvlinkUserPath;
+
+  // If user hasn't specified nvlink binary then search it in PATH
+  if (NvlinkPath.empty()) {
+ErrorOr NvlinkPathErr = sys::findProgramByName("nvlink");
+if (!NvlinkPathErr) {
+  reportError(createStringError(NvlinkPathErr.getError(),
+"unable to find 'nvlink' in path"));
+}
+NvlinkPath = NvlinkPathErr.get();
+  }
+
+  if (Error Err = runNVLink(NvlinkPath, ArgvSubst))
 reportError(std::move(Err));
   if (Error Err = cleanupTmpFiles(TmpFiles))
 reportError(std::move(Err));
Index: clang/lib/Driver/ToolChains/Cuda.cpp
===
--- clang/lib/Driver/ToolChains/Cuda.cpp
+++ clang/lib/Driver/ToolChains/Cuda.cpp
@@ -613,6 +613,10 @@
   AddStaticDeviceLibsLinking(C, *this, JA, Inputs, Args, CmdArgs, "nvptx", GPUArch,
   false, false);
 
+  // Find nvlink and pass it as "--nvlink-path=" argument of
+  // clang-nvlink-wrapper.
+  CmdArgs.push_back(Args.MakeArgString(Twine("--nvlink-path=" + getToolChain().GetProgramPath("nvlink";
+
   const char *Exec =
   Args.MakeArgString(getToolChain().GetProgramPath("clang-nvlink-wrapper"));
   C.addCommand(std::make_unique(
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D111545: [Clang][NFC] Fix multiline comment prefixes in function headers

2021-10-12 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam updated this revision to Diff 378957.
saiislam added a comment.

Used comment style for named parameters.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D111545/new/

https://reviews.llvm.org/D111545

Files:
  clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
  clang/lib/Driver/ToolChains/Cuda.cpp


Index: clang/lib/Driver/ToolChains/Cuda.cpp
===
--- clang/lib/Driver/ToolChains/Cuda.cpp
+++ clang/lib/Driver/ToolChains/Cuda.cpp
@@ -744,8 +744,8 @@
 
 addOpenMPDeviceRTL(getDriver(), DriverArgs, CC1Args, BitcodeSuffix,
getTriple());
-AddStaticDeviceLibsPostLinking(getDriver(), DriverArgs, CC1Args, "nvptx", 
GpuArch,
-/* bitcode SDL?*/ true, /* PostClang Link? */ true);
+AddStaticDeviceLibsPostLinking(getDriver(), DriverArgs, CC1Args, "nvptx",
+GpuArch, /* bitcode SDL?*/true, /* PostClang Link? */true);
   }
 }
 
Index: clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
===
--- clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -115,9 +115,7 @@
   }
 
   AddStaticDeviceLibsLinking(C, *this, JA, Inputs, Args, CmdArgs, "amdgcn",
-  SubArchName,
-  /* bitcode SDL?*/ true,
-  /* PostClang Link? */ false);
+  SubArchName, /* bitcode SDL?*/true, /* PostClang Link? */false);
   // Add an intermediate output file.
   CmdArgs.push_back("-o");
   const char *OutputFileName =


Index: clang/lib/Driver/ToolChains/Cuda.cpp
===
--- clang/lib/Driver/ToolChains/Cuda.cpp
+++ clang/lib/Driver/ToolChains/Cuda.cpp
@@ -744,8 +744,8 @@
 
 addOpenMPDeviceRTL(getDriver(), DriverArgs, CC1Args, BitcodeSuffix,
getTriple());
-AddStaticDeviceLibsPostLinking(getDriver(), DriverArgs, CC1Args, "nvptx", GpuArch,
-/* bitcode SDL?*/ true, /* PostClang Link? */ true);
+AddStaticDeviceLibsPostLinking(getDriver(), DriverArgs, CC1Args, "nvptx",
+GpuArch, /* bitcode SDL?*/true, /* PostClang Link? */true);
   }
 }
 
Index: clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
===
--- clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -115,9 +115,7 @@
   }
 
   AddStaticDeviceLibsLinking(C, *this, JA, Inputs, Args, CmdArgs, "amdgcn",
-  SubArchName,
-  /* bitcode SDL?*/ true,
-  /* PostClang Link? */ false);
+  SubArchName, /* bitcode SDL?*/true, /* PostClang Link? */false);
   // Add an intermediate output file.
   CmdArgs.push_back("-o");
   const char *OutputFileName =
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D105191: [Clang][OpenMP] Add partial support for Static Device Libraries

2021-10-11 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam marked an inline comment as done.
saiislam added inline comments.



Comment at: clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp:120
+  /* bitcode SDL?*/ true,
+  /* PostClang Link? */ false);
   // Add an intermediate output file.

MyDeveloperDay wrote:
> This file now fails clang-format
Thanks for reporting. Fixed it in D111545. Please have a look.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D105191/new/

https://reviews.llvm.org/D105191

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D111545: [Clang][NFC] Fix multiline comment prefixes in function headers

2021-10-11 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam created this revision.
saiislam added a reviewer: MyDeveloperDay.
Herald added subscribers: kerbowa, nhaehnle, jvesely.
saiislam requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Cleanup of D105191  after latest clang-format 
changes.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D111545

Files:
  clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
  clang/lib/Driver/ToolChains/Cuda.cpp


Index: clang/lib/Driver/ToolChains/Cuda.cpp
===
--- clang/lib/Driver/ToolChains/Cuda.cpp
+++ clang/lib/Driver/ToolChains/Cuda.cpp
@@ -744,8 +744,8 @@
 
 addOpenMPDeviceRTL(getDriver(), DriverArgs, CC1Args, BitcodeSuffix,
getTriple());
-AddStaticDeviceLibsPostLinking(getDriver(), DriverArgs, CC1Args, "nvptx", 
GpuArch,
-/* bitcode SDL?*/ true, /* PostClang Link? */ true);
+AddStaticDeviceLibsPostLinking(getDriver(), DriverArgs, CC1Args, "nvptx",
+   GpuArch, true, true);
   }
 }
 
Index: clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
===
--- clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -115,9 +115,7 @@
   }
 
   AddStaticDeviceLibsLinking(C, *this, JA, Inputs, Args, CmdArgs, "amdgcn",
-  SubArchName,
-  /* bitcode SDL?*/ true,
-  /* PostClang Link? */ false);
+ SubArchName, true, false);
   // Add an intermediate output file.
   CmdArgs.push_back("-o");
   const char *OutputFileName =


Index: clang/lib/Driver/ToolChains/Cuda.cpp
===
--- clang/lib/Driver/ToolChains/Cuda.cpp
+++ clang/lib/Driver/ToolChains/Cuda.cpp
@@ -744,8 +744,8 @@
 
 addOpenMPDeviceRTL(getDriver(), DriverArgs, CC1Args, BitcodeSuffix,
getTriple());
-AddStaticDeviceLibsPostLinking(getDriver(), DriverArgs, CC1Args, "nvptx", GpuArch,
-/* bitcode SDL?*/ true, /* PostClang Link? */ true);
+AddStaticDeviceLibsPostLinking(getDriver(), DriverArgs, CC1Args, "nvptx",
+   GpuArch, true, true);
   }
 }
 
Index: clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
===
--- clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -115,9 +115,7 @@
   }
 
   AddStaticDeviceLibsLinking(C, *this, JA, Inputs, Args, CmdArgs, "amdgcn",
-  SubArchName,
-  /* bitcode SDL?*/ true,
-  /* PostClang Link? */ false);
+ SubArchName, true, false);
   // Add an intermediate output file.
   CmdArgs.push_back("-o");
   const char *OutputFileName =
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D111488: [Clang][clang-nvlink-wrapper] Pass nvlink path to the wrapper

2021-10-11 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam updated this revision to Diff 378633.
saiislam added a comment.

Fixed typo


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D111488/new/

https://reviews.llvm.org/D111488

Files:
  clang/lib/Driver/ToolChains/Cuda.cpp
  clang/tools/clang-nvlink-wrapper/ClangNvlinkWrapper.cpp

Index: clang/tools/clang-nvlink-wrapper/ClangNvlinkWrapper.cpp
===
--- clang/tools/clang-nvlink-wrapper/ClangNvlinkWrapper.cpp
+++ clang/tools/clang-nvlink-wrapper/ClangNvlinkWrapper.cpp
@@ -25,6 +25,7 @@
 /// 2. nvlink -o a.out-openmp-nvptx64 /tmp/a.cubin /tmp/b.cubin
 //===-===//
 
+#include "clang/Basic/Version.h"
 #include "llvm/Object/Archive.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Errc.h"
@@ -41,6 +42,19 @@
 
 static cl::opt Help("h", cl::desc("Alias for -help"), cl::Hidden);
 
+// Mark all our options with this category, everything else (except for -help)
+// will be hidden.
+static cl::OptionCategory
+ClangNvlinkWrapperCategory("clang-nvlink-wrapper options");
+
+static cl::opt NvlinkUserPath("nvlink-command",
+   cl::desc("path of nvlink binary"),
+   cl::cat(ClangNvlinkWrapperCategory));
+
+// Do not parse nvlink options
+static cl::list
+NVArgs(cl::Sink, cl::desc("..."));
+
 static Error runNVLink(std::string NVLinkPath,
SmallVectorImpl ) {
   std::vector NVLArgs;
@@ -119,8 +133,20 @@
   return Error::success();
 }
 
+static void PrintVersion(raw_ostream ) {
+  OS << clang::getClangToolFullVersion("clang-nvlink-wrapper") << '\n';
+}
+
 int main(int argc, const char **argv) {
   sys::PrintStackTraceOnErrorSignal(argv[0]);
+  cl::SetVersionPrinter(PrintVersion);
+  cl::HideUnrelatedOptions(ClangNvlinkWrapperCategory);
+  cl::ParseCommandLineOptions(
+  argc, argv,
+  "A wrapper tool over nvlink program. It transparently passes every \n"
+  "input option and objects to nvlink except archive files and path of \n"
+  "nvlink binary. It reads each input archive file to extract archived \n"
+  "cubin files as temporary files.\n");
 
   if (Help) {
 cl::PrintHelpMessage();
@@ -132,12 +158,7 @@
 exit(1);
   };
 
-  ErrorOr NvlinkPath = sys::findProgramByName("nvlink");
-  if (!NvlinkPath) {
-reportError(createStringError(NvlinkPath.getError(),
-  "unable to find 'nvlink' in path"));
-  }
-
+  std::string NvlinkPath;
   SmallVector Argv(argv, argv + argc);
   SmallVector ArgvSubst;
   SmallVector TmpFiles;
@@ -145,8 +166,8 @@
   StringSaver Saver(Alloc);
   cl::ExpandResponseFiles(Saver, cl::TokenizeGNUCommandLine, Argv);
 
-  for (size_t i = 1; i < Argv.size(); ++i) {
-std::string Arg = Argv[i];
+  for (size_t i = 0; i < NVArgs.size(); ++i) {
+std::string Arg = NVArgs[i];
 if (sys::path::extension(Arg) == ".a") {
   if (Error Err = extractArchiveFiles(Arg, ArgvSubst, TmpFiles))
 reportError(std::move(Err));
@@ -155,7 +176,19 @@
 }
   }
 
-  if (Error Err = runNVLink(NvlinkPath.get(), ArgvSubst))
+  NvlinkPath = NvlinkUserPath;
+
+  // If user hasn't specified nvlink binary then search it in PATH
+  if (NvlinkPath.empty()) {
+ErrorOr NvlinkPathErr = sys::findProgramByName("nvlink");
+if (!NvlinkPathErr) {
+  reportError(createStringError(NvlinkPathErr.getError(),
+"unable to find 'nvlink' in path"));
+}
+NvlinkPath = NvlinkPathErr.get();
+  }
+
+  if (Error Err = runNVLink(NvlinkPath, ArgvSubst))
 reportError(std::move(Err));
   if (Error Err = cleanupTmpFiles(TmpFiles))
 reportError(std::move(Err));
Index: clang/lib/Driver/ToolChains/Cuda.cpp
===
--- clang/lib/Driver/ToolChains/Cuda.cpp
+++ clang/lib/Driver/ToolChains/Cuda.cpp
@@ -613,6 +613,12 @@
   AddStaticDeviceLibsLinking(C, *this, JA, Inputs, Args, CmdArgs, "nvptx", GPUArch,
   false, false);
 
+  // Find nvlink and pass it as "--nvlink-command=" argument of clang-nvlink-wrapper.
+  auto NvlinkBin = getToolChain().GetProgramPath("nvlink");
+  const char *NvlinkPath =
+  Args.MakeArgString(Twine("--nvlink-command=" + NvlinkBin));
+  CmdArgs.push_back(NvlinkPath);
+
   const char *Exec =
   Args.MakeArgString(getToolChain().GetProgramPath("clang-nvlink-wrapper"));
   C.addCommand(std::make_unique(
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D111488: [Clang][clang-nvlink-wrapper] Pass nvlink path to the wrapper

2021-10-11 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam updated this revision to Diff 378630.
saiislam marked 4 inline comments as done.
saiislam added a comment.

1. Changed the option from path to nvlink-command.
2. Command line arguments are now parsed using proper API.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D111488/new/

https://reviews.llvm.org/D111488

Files:
  clang/lib/Driver/ToolChains/Cuda.cpp
  clang/tools/clang-nvlink-wrapper/ClangNvlinkWrapper.cpp

Index: clang/tools/clang-nvlink-wrapper/ClangNvlinkWrapper.cpp
===
--- clang/tools/clang-nvlink-wrapper/ClangNvlinkWrapper.cpp
+++ clang/tools/clang-nvlink-wrapper/ClangNvlinkWrapper.cpp
@@ -25,6 +25,7 @@
 /// 2. nvlink -o a.out-openmp-nvptx64 /tmp/a.cubin /tmp/b.cubin
 //===-===//
 
+#include "clang/Basic/Version.h"
 #include "llvm/Object/Archive.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Errc.h"
@@ -41,6 +42,19 @@
 
 static cl::opt Help("h", cl::desc("Alias for -help"), cl::Hidden);
 
+// Mark all our options with this category, everything else (except for -help)
+// will be hidden.
+static cl::OptionCategory
+ClangNvlinkWrapperCategory("clang-nvlink-wrapper options");
+
+static cl::opt NvlinkUserPath("nvlink-command",
+   cl::desc("path of nvlink binary"),
+   cl::cat(ClangNvlinkWrapperCategory));
+
+// Do not parse nvlink options
+static cl::list
+NVArgs(cl::Sink, cl::desc("..."));
+
 static Error runNVLink(std::string NVLinkPath,
SmallVectorImpl ) {
   std::vector NVLArgs;
@@ -119,8 +133,20 @@
   return Error::success();
 }
 
+static void PrintVersion(raw_ostream ) {
+  OS << clang::getClangToolFullVersion("clang-offload-bundler") << '\n';
+}
+
 int main(int argc, const char **argv) {
   sys::PrintStackTraceOnErrorSignal(argv[0]);
+  cl::SetVersionPrinter(PrintVersion);
+  cl::HideUnrelatedOptions(ClangNvlinkWrapperCategory);
+  cl::ParseCommandLineOptions(
+  argc, argv,
+  "A wrapper tool over nvlink program. It transparently passes every \n"
+  "input option and objects to nvlink except archive files and path of \n"
+  "nvlink binary. It reads each input archive file to extract archived \n"
+  "cubin files as temporary files.\n");
 
   if (Help) {
 cl::PrintHelpMessage();
@@ -132,12 +158,7 @@
 exit(1);
   };
 
-  ErrorOr NvlinkPath = sys::findProgramByName("nvlink");
-  if (!NvlinkPath) {
-reportError(createStringError(NvlinkPath.getError(),
-  "unable to find 'nvlink' in path"));
-  }
-
+  std::string NvlinkPath;
   SmallVector Argv(argv, argv + argc);
   SmallVector ArgvSubst;
   SmallVector TmpFiles;
@@ -145,8 +166,8 @@
   StringSaver Saver(Alloc);
   cl::ExpandResponseFiles(Saver, cl::TokenizeGNUCommandLine, Argv);
 
-  for (size_t i = 1; i < Argv.size(); ++i) {
-std::string Arg = Argv[i];
+  for (size_t i = 0; i < NVArgs.size(); ++i) {
+std::string Arg = NVArgs[i];
 if (sys::path::extension(Arg) == ".a") {
   if (Error Err = extractArchiveFiles(Arg, ArgvSubst, TmpFiles))
 reportError(std::move(Err));
@@ -155,7 +176,19 @@
 }
   }
 
-  if (Error Err = runNVLink(NvlinkPath.get(), ArgvSubst))
+  NvlinkPath = NvlinkUserPath;
+
+  // If user hasn't specified nvlink binary then search it in PATH
+  if (NvlinkPath.empty()) {
+ErrorOr NvlinkPathErr = sys::findProgramByName("nvlink");
+if (!NvlinkPathErr) {
+  reportError(createStringError(NvlinkPathErr.getError(),
+"unable to find 'nvlink' in path"));
+}
+NvlinkPath = NvlinkPathErr.get();
+  }
+
+  if (Error Err = runNVLink(NvlinkPath, ArgvSubst))
 reportError(std::move(Err));
   if (Error Err = cleanupTmpFiles(TmpFiles))
 reportError(std::move(Err));
Index: clang/lib/Driver/ToolChains/Cuda.cpp
===
--- clang/lib/Driver/ToolChains/Cuda.cpp
+++ clang/lib/Driver/ToolChains/Cuda.cpp
@@ -613,6 +613,12 @@
   AddStaticDeviceLibsLinking(C, *this, JA, Inputs, Args, CmdArgs, "nvptx", GPUArch,
   false, false);
 
+  // Find nvlink and pass it as "--nvlink-command=" argument of clang-nvlink-wrapper.
+  auto NvlinkBin = getToolChain().GetProgramPath("nvlink");
+  const char *NvlinkPath =
+  Args.MakeArgString(Twine("--nvlink-command=" + NvlinkBin));
+  CmdArgs.push_back(NvlinkPath);
+
   const char *Exec =
   Args.MakeArgString(getToolChain().GetProgramPath("clang-nvlink-wrapper"));
   C.addCommand(std::make_unique(
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D111488: [Clang][clang-nvlink-wrapper] Pass nvlink path to the wrapper

2021-10-09 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam created this revision.
saiislam added reviewers: Meinersbur, ye-luo, JonChesterfield.
saiislam requested review of this revision.
Herald added a reviewer: jdoerfert.
Herald added subscribers: cfe-commits, sstefan1.
Herald added a project: clang.

Added support of a "--path=" option in clang-nvlink-wrapper which
takes the path of directory containing nvlink binary.

Static Device Library support for OpenMP (D105191 
) now searches for
nvlink binary and passes its location via this option. In absence
of this option, nvlink binary is searched in locations in PATH.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D111488

Files:
  clang/lib/Driver/ToolChains/Cuda.cpp
  clang/tools/clang-nvlink-wrapper/ClangNvlinkWrapper.cpp


Index: clang/tools/clang-nvlink-wrapper/ClangNvlinkWrapper.cpp
===
--- clang/tools/clang-nvlink-wrapper/ClangNvlinkWrapper.cpp
+++ clang/tools/clang-nvlink-wrapper/ClangNvlinkWrapper.cpp
@@ -41,6 +41,15 @@
 
 static cl::opt Help("h", cl::desc("Alias for -help"), cl::Hidden);
 
+// Mark all our options with this category, everything else (except for -help)
+// will be hidden.
+static cl::OptionCategory
+ClangNvlinkWrapperCategory("clang-nvlink-wrapper options");
+
+static cl::opt
+NvlinkUserPath("path", cl::desc("path of directory containing nvlink"),
+   cl::cat(ClangNvlinkWrapperCategory));
+
 static Error runNVLink(std::string NVLinkPath,
SmallVectorImpl ) {
   std::vector NVLArgs;
@@ -121,7 +130,6 @@
 
 int main(int argc, const char **argv) {
   sys::PrintStackTraceOnErrorSignal(argv[0]);
-
   if (Help) {
 cl::PrintHelpMessage();
 return 0;
@@ -132,12 +140,7 @@
 exit(1);
   };
 
-  ErrorOr NvlinkPath = sys::findProgramByName("nvlink");
-  if (!NvlinkPath) {
-reportError(createStringError(NvlinkPath.getError(),
-  "unable to find 'nvlink' in path"));
-  }
-
+  std::string NvlinkPath;
   SmallVector Argv(argv, argv + argc);
   SmallVector ArgvSubst;
   SmallVector TmpFiles;
@@ -147,15 +150,28 @@
 
   for (size_t i = 1; i < Argv.size(); ++i) {
 std::string Arg = Argv[i];
+StringRef ArgRef(Arg);
+auto NvlPath = ArgRef.startswith_insensitive("--path=");
 if (sys::path::extension(Arg) == ".a") {
   if (Error Err = extractArchiveFiles(Arg, ArgvSubst, TmpFiles))
 reportError(std::move(Err));
+} else if (NvlPath) {
+  NvlinkPath = ArgRef.substr(7).str().append("/nvlink");
 } else {
   ArgvSubst.push_back(Arg);
 }
   }
 
-  if (Error Err = runNVLink(NvlinkPath.get(), ArgvSubst))
+  if (NvlinkPath.empty()) {
+ErrorOr NvlinkPathErr = sys::findProgramByName("nvlink");
+if (!NvlinkPathErr) {
+  reportError(createStringError(NvlinkPathErr.getError(),
+"unable to find 'nvlink' in path"));
+}
+NvlinkPath = NvlinkPathErr.get();
+  }
+
+  if (Error Err = runNVLink(NvlinkPath, ArgvSubst))
 reportError(std::move(Err));
   if (Error Err = cleanupTmpFiles(TmpFiles))
 reportError(std::move(Err));
Index: clang/lib/Driver/ToolChains/Cuda.cpp
===
--- clang/lib/Driver/ToolChains/Cuda.cpp
+++ clang/lib/Driver/ToolChains/Cuda.cpp
@@ -613,6 +613,13 @@
   AddStaticDeviceLibsLinking(C, *this, JA, Inputs, Args, CmdArgs, "nvptx", 
GPUArch,
   false, false);
 
+  // Find nvlink and pass it as "--path=" argument of clang-nvlink-wrapper.
+  auto NvlinkDir =
+  llvm::sys::path::parent_path(getToolChain().GetProgramPath("nvlink"))
+  .str();
+  const char *NvlinkPath = Args.MakeArgString(Twine("--path=" + NvlinkDir));
+  CmdArgs.push_back(NvlinkPath);
+
   const char *Exec =
   
Args.MakeArgString(getToolChain().GetProgramPath("clang-nvlink-wrapper"));
   C.addCommand(std::make_unique(


Index: clang/tools/clang-nvlink-wrapper/ClangNvlinkWrapper.cpp
===
--- clang/tools/clang-nvlink-wrapper/ClangNvlinkWrapper.cpp
+++ clang/tools/clang-nvlink-wrapper/ClangNvlinkWrapper.cpp
@@ -41,6 +41,15 @@
 
 static cl::opt Help("h", cl::desc("Alias for -help"), cl::Hidden);
 
+// Mark all our options with this category, everything else (except for -help)
+// will be hidden.
+static cl::OptionCategory
+ClangNvlinkWrapperCategory("clang-nvlink-wrapper options");
+
+static cl::opt
+NvlinkUserPath("path", cl::desc("path of directory containing nvlink"),
+   cl::cat(ClangNvlinkWrapperCategory));
+
 static Error runNVLink(std::string NVLinkPath,
SmallVectorImpl ) {
   std::vector NVLArgs;
@@ -121,7 +130,6 @@
 
 int main(int argc, const char **argv) {
   sys::PrintStackTraceOnErrorSignal(argv[0]);
-
   if (Help) {
 cl::PrintHelpMessage();
 return 0;
@@ -132,12 +140,7 @@
 exit(1);
   };
 
-  ErrorOr

1 2 3 >

1 - 100 of 279 matches

Mail list logo