[PATCH] D139287: [WIP][OpenMP] Introduce basic JIT support to OpenMP target offloading

2022-12-12 Thread Shilei Tian via Phabricator via cfe-commits
tianshilei1992 updated this revision to Diff 482158.
tianshilei1992 added a comment.

add test


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139287/new/

https://reviews.llvm.org/D139287

Files:
  openmp/cmake/OpenMPTesting.cmake
  openmp/libomptarget/CMakeLists.txt
  openmp/libomptarget/plugins-nextgen/CMakeLists.txt
  openmp/libomptarget/plugins-nextgen/common/PluginInterface/CMakeLists.txt
  openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.cpp
  openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.h
  openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
  openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
  openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
  openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp
  openmp/libomptarget/test/lit.cfg

Index: openmp/libomptarget/test/lit.cfg
===
--- openmp/libomptarget/test/lit.cfg
+++ openmp/libomptarget/test/lit.cfg
@@ -34,6 +34,15 @@
 else:
 config.environment[name] = value
 
+# Evalute the environment variable which is a string boolean value.
+def evaluate_bool_env(env):
+env = env.lower()
+possible_true_values = ["on", "true", "1"]
+for v in possible_true_values:
+if env == v:
+return True
+return False
+
 # name: The name of this test suite.
 config.name = 'libomptarget :: ' + config.libomptarget_current_target
 
@@ -111,10 +120,17 @@
 config.test_flags += " --libomptarget-nvptx-bc-path=" + config.library_dir
 if config.libomptarget_current_target.endswith('-LTO'):
 config.test_flags += " -foffload-lto"
+if config.libomptarget_current_target.endswith('-JIT-LTO') and evaluate_bool_env(
+config.environment['LIBOMPTARGET_NEXTGEN_PLUGINS']
+):
+config.test_flags += " -foffload-lto"
+config.test_flags += " -Wl,--embed-bitcode"
 
 def remove_suffix_if_present(name):
 if name.endswith('-LTO'):
 return name[:-4]
+elif name.endswith('-JIT-LTO'):
+return name[:-8]
 else:
 return name
 
Index: openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp
===
--- openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp
+++ openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp
@@ -358,6 +358,10 @@
   Expected isImageCompatible(__tgt_image_info *Info) const override {
 return true;
   }
+
+  Triple::ArchType getTripleArch() const override {
+return Triple::LIBOMPTARGET_NEXTGEN_GENERIC_PLUGIN_TRIPLE;
+  }
 };
 
 GenericPluginTy *Plugin::createPlugin() { return new GenELF64PluginTy(); }
Index: openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
===
--- openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
+++ openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
@@ -278,6 +278,14 @@
  GridValues.GV_Warp_Size))
   return Err;
 
+if (auto Err = getDeviceAttr(CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
+ ComputeCapability.Major))
+  return Err;
+
+if (auto Err = getDeviceAttr(CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR,
+ ComputeCapability.Minor))
+  return Err;
+
 return Plugin::success();
   }
 
@@ -776,6 +784,8 @@
 return Plugin::check(Res, "Error in cuDeviceGetAttribute: %s");
   }
 
+  std::string getArch() const override { return ComputeCapability.str(); }
+
 private:
   using CUDAStreamManagerTy = GenericDeviceResourceManagerTy;
   using CUDAEventManagerTy = GenericDeviceResourceManagerTy;
@@ -792,6 +802,15 @@
 
   /// The CUDA device handler.
   CUdevice Device = CU_DEVICE_INVALID;
+
+  ///
+  struct ComputeCapabilityTy {
+uint32_t Major;
+uint32_t Minor;
+std::string str() const {
+  return "sm_" + std::to_string(Major * 10 + Minor);
+}
+  } ComputeCapability;
 };
 
 Error CUDAKernelTy::launchImpl(GenericDeviceTy ,
@@ -890,6 +909,11 @@
   /// Get the ELF code for recognizing the compatible image binary.
   uint16_t getMagicElfBits() const override { return ELF::EM_CUDA; }
 
+  Triple::ArchType getTripleArch() const override {
+// TODO: I think we can drop the support for 32-bit NVPTX devices.
+return Triple::nvptx64;
+  }
+
   /// Check whether the image is compatible with the available CUDA devices.
   Expected isImageCompatible(__tgt_image_info *Info) const override {
 for (int32_t DevId = 0; DevId < getNumDevices(); ++DevId) {
Index: openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
===
--- openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
+++ 

[PATCH] D139287: [WIP][OpenMP] Introduce basic JIT support to OpenMP target offloading

2022-12-12 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert added inline comments.



Comment at: 
openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp:685
+  auto TgtImageOrErr = jit::compile(TgtImage, Triple, Arch,
+/* OptLevel */ 3, PostProcessing);
+  if (!TgtImageOrErr) {

tianshilei1992 wrote:
> Do we want a configurable value for the `OptLevel`, or can we know it from 
> somewhere else what value is used at compile time?
We most likely want a env var and maybe later even pass the value through. Env 
var is good enough for now.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139287/new/

https://reviews.llvm.org/D139287

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D139287: [WIP][OpenMP] Introduce basic JIT support to OpenMP target offloading

2022-12-11 Thread Shilei Tian via Phabricator via cfe-commits
tianshilei1992 updated this revision to Diff 481980.
tianshilei1992 added a comment.

rebase and fix comments


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139287/new/

https://reviews.llvm.org/D139287

Files:
  openmp/libomptarget/plugins-nextgen/CMakeLists.txt
  openmp/libomptarget/plugins-nextgen/common/PluginInterface/CMakeLists.txt
  openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.cpp
  openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.h
  openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
  openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
  openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
  openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp

Index: openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp
===
--- openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp
+++ openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp
@@ -358,6 +358,10 @@
   Expected isImageCompatible(__tgt_image_info *Info) const override {
 return true;
   }
+
+  Triple::ArchType getTripleArch() const override {
+return Triple::LIBOMPTARGET_NEXTGEN_GENERIC_PLUGIN_TRIPLE;
+  }
 };
 
 GenericPluginTy *Plugin::createPlugin() { return new GenELF64PluginTy(); }
Index: openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
===
--- openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
+++ openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
@@ -278,6 +278,14 @@
  GridValues.GV_Warp_Size))
   return Err;
 
+if (auto Err = getDeviceAttr(CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
+ ComputeCapability.Major))
+  return Err;
+
+if (auto Err = getDeviceAttr(CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR,
+ ComputeCapability.Minor))
+  return Err;
+
 return Plugin::success();
   }
 
@@ -776,6 +784,8 @@
 return Plugin::check(Res, "Error in cuDeviceGetAttribute: %s");
   }
 
+  std::string getArch() const override { return ComputeCapability.str(); }
+
 private:
   using CUDAStreamManagerTy = GenericDeviceResourceManagerTy;
   using CUDAEventManagerTy = GenericDeviceResourceManagerTy;
@@ -792,6 +802,15 @@
 
   /// The CUDA device handler.
   CUdevice Device = CU_DEVICE_INVALID;
+
+  ///
+  struct ComputeCapabilityTy {
+uint32_t Major;
+uint32_t Minor;
+std::string str() const {
+  return "sm_" + std::to_string(Major * 10 + Minor);
+}
+  } ComputeCapability;
 };
 
 Error CUDAKernelTy::launchImpl(GenericDeviceTy ,
@@ -890,6 +909,11 @@
   /// Get the ELF code for recognizing the compatible image binary.
   uint16_t getMagicElfBits() const override { return ELF::EM_CUDA; }
 
+  Triple::ArchType getTripleArch() const override {
+// TODO: I think we can drop the support for 32-bit NVPTX devices.
+return Triple::nvptx64;
+  }
+
   /// Check whether the image is compatible with the available CUDA devices.
   Expected isImageCompatible(__tgt_image_info *Info) const override {
 for (int32_t DevId = 0; DevId < getNumDevices(); ++DevId) {
Index: openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
===
--- openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
+++ openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
@@ -25,6 +25,7 @@
 #include "omptarget.h"
 
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/Frontend/OpenMP/OMPConstants.h"
 #include "llvm/Frontend/OpenMP/OMPGridValues.h"
 #include "llvm/Support/Allocator.h"
@@ -372,6 +373,17 @@
   }
   uint32_t getDynamicMemorySize() const { return OMPX_SharedMemorySize; }
 
+  /// Get target architecture.
+  virtual std::string getArch() const {
+llvm_unreachable("device doesn't support JIT");
+  }
+
+  /// Post processing after jit backend. The ownership of \p MB will be taken.
+  virtual Expected>
+  doJITPostProcessing(std::unique_ptr MB) const {
+return MB;
+  }
+
 private:
   /// Register offload entry for global variable.
   Error registerGlobalOffloadEntry(DeviceImageTy ,
@@ -482,6 +494,11 @@
   /// Get the ELF code to recognize the binary image of this plugin.
   virtual uint16_t getMagicElfBits() const = 0;
 
+  /// Get the target triple of this plugin.
+  virtual Triple::ArchType getTripleArch() const {
+llvm_unreachable("target doesn't support jit");
+  }
+
   /// Allocate a structure using the internal allocator.
   template  Ty *allocate() {
 return reinterpret_cast(Allocator.Allocate(sizeof(Ty), alignof(Ty)));
Index: openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp

[PATCH] D139287: [WIP][OpenMP] Introduce basic JIT support to OpenMP target offloading

2022-12-11 Thread Shilei Tian via Phabricator via cfe-commits
tianshilei1992 updated this revision to Diff 481960.
tianshilei1992 added a comment.

rebase and fix opt error


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139287/new/

https://reviews.llvm.org/D139287

Files:
  openmp/libomptarget/plugins-nextgen/CMakeLists.txt
  openmp/libomptarget/plugins-nextgen/common/PluginInterface/CMakeLists.txt
  openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.cpp
  openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.h
  openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
  openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
  openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
  openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp

Index: openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp
===
--- openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp
+++ openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp
@@ -358,6 +358,10 @@
   Expected isImageCompatible(__tgt_image_info *Info) const override {
 return true;
   }
+
+  Triple::ArchType getTripleArch() const override {
+return Triple::LIBOMPTARGET_NEXTGEN_GENERIC_PLUGIN_TRIPLE;
+  }
 };
 
 GenericPluginTy *Plugin::createPlugin() { return new GenELF64PluginTy(); }
Index: openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
===
--- openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
+++ openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
@@ -278,6 +278,14 @@
  GridValues.GV_Warp_Size))
   return Err;
 
+if (auto Err = getDeviceAttr(CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
+ ComputeCapability.Major))
+  return Err;
+
+if (auto Err = getDeviceAttr(CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR,
+ ComputeCapability.Minor))
+  return Err;
+
 return Plugin::success();
   }
 
@@ -776,6 +784,8 @@
 return Plugin::check(Res, "Error in cuDeviceGetAttribute: %s");
   }
 
+  std::string getArch() const override { return ComputeCapability.str(); }
+
 private:
   using CUDAStreamManagerTy = GenericDeviceResourceManagerTy;
   using CUDAEventManagerTy = GenericDeviceResourceManagerTy;
@@ -792,6 +802,15 @@
 
   /// The CUDA device handler.
   CUdevice Device = CU_DEVICE_INVALID;
+
+  ///
+  struct ComputeCapabilityTy {
+uint32_t Major;
+uint32_t Minor;
+std::string str() const {
+  return "sm_" + std::to_string(Major * 10 + Minor);
+}
+  } ComputeCapability;
 };
 
 Error CUDAKernelTy::launchImpl(GenericDeviceTy ,
@@ -890,6 +909,11 @@
   /// Get the ELF code for recognizing the compatible image binary.
   uint16_t getMagicElfBits() const override { return ELF::EM_CUDA; }
 
+  Triple::ArchType getTripleArch() const override {
+// TODO: I think we can drop the support for 32-bit NVPTX devices.
+return Triple::nvptx64;
+  }
+
   /// Check whether the image is compatible with the available CUDA devices.
   Expected isImageCompatible(__tgt_image_info *Info) const override {
 for (int32_t DevId = 0; DevId < getNumDevices(); ++DevId) {
Index: openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
===
--- openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
+++ openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
@@ -25,6 +25,7 @@
 #include "omptarget.h"
 
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/Frontend/OpenMP/OMPConstants.h"
 #include "llvm/Frontend/OpenMP/OMPGridValues.h"
 #include "llvm/Support/Allocator.h"
@@ -372,6 +373,17 @@
   }
   uint32_t getDynamicMemorySize() const { return OMPX_SharedMemorySize; }
 
+  /// Get target architecture.
+  virtual std::string getArch() const {
+llvm_unreachable("device doesn't support JIT");
+  }
+
+  /// Post processing after jit backend. The ownership of \p MB will be taken.
+  virtual Expected>
+  doJITPostProcessing(std::unique_ptr MB) const {
+return MB;
+  }
+
 private:
   /// Register offload entry for global variable.
   Error registerGlobalOffloadEntry(DeviceImageTy ,
@@ -482,6 +494,11 @@
   /// Get the ELF code to recognize the binary image of this plugin.
   virtual uint16_t getMagicElfBits() const = 0;
 
+  /// Get the target triple of this plugin.
+  virtual Triple::ArchType getTripleArch() const {
+llvm_unreachable("target doesn't support jit");
+  }
+
   /// Allocate a structure using the internal allocator.
   template  Ty *allocate() {
 return reinterpret_cast(Allocator.Allocate(sizeof(Ty), alignof(Ty)));
Index: openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp

[PATCH] D139287: [WIP][OpenMP] Introduce basic JIT support to OpenMP target offloading

2022-12-09 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert added inline comments.



Comment at: 
openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.cpp:64
+  } else {
+llvm_unreachable("unsupported JIT target");
+  }

Not unreachable. Use a printf and an abort.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139287/new/

https://reviews.llvm.org/D139287

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D139287: [WIP][OpenMP] Introduce basic JIT support to OpenMP target offloading

2022-12-09 Thread Shilei Tian via Phabricator via cfe-commits
tianshilei1992 updated this revision to Diff 481686.
tianshilei1992 added a comment.
Herald added a subscriber: pcwang-thead.

rebase and refine

It currently crashes in `setupLLVMOptimizationRemarks`


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139287/new/

https://reviews.llvm.org/D139287

Files:
  openmp/libomptarget/plugins-nextgen/CMakeLists.txt
  openmp/libomptarget/plugins-nextgen/common/PluginInterface/CMakeLists.txt
  openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.cpp
  openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.h
  openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
  openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
  openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
  openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp

Index: openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp
===
--- openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp
+++ openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp
@@ -358,6 +358,10 @@
   Expected isImageCompatible(__tgt_image_info *Info) const override {
 return true;
   }
+
+  Triple::ArchType getTripleArch() const override {
+return Triple::LIBOMPTARGET_NEXTGEN_GENERIC_PLUGIN_TRIPLE;
+  }
 };
 
 GenericPluginTy *Plugin::createPlugin() { return new GenELF64PluginTy(); }
Index: openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
===
--- openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
+++ openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
@@ -278,6 +278,14 @@
  GridValues.GV_Warp_Size))
   return Err;
 
+if (auto Err = getDeviceAttr(CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
+ ComputeCapability.Major))
+  return Err;
+
+if (auto Err = getDeviceAttr(CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR,
+ ComputeCapability.Minor))
+  return Err;
+
 return Plugin::success();
   }
 
@@ -776,6 +784,8 @@
 return Plugin::check(Res, "Error in cuDeviceGetAttribute: %s");
   }
 
+  std::string getArch() const override { return ComputeCapability.str(); }
+
 private:
   using CUDAStreamManagerTy = GenericDeviceResourceManagerTy;
   using CUDAEventManagerTy = GenericDeviceResourceManagerTy;
@@ -792,6 +802,15 @@
 
   /// The CUDA device handler.
   CUdevice Device = CU_DEVICE_INVALID;
+
+  ///
+  struct ComputeCapabilityTy {
+uint32_t Major;
+uint32_t Minor;
+std::string str() const {
+  return "sm_" + std::to_string(Major * 10 + Minor);
+}
+  } ComputeCapability;
 };
 
 Error CUDAKernelTy::launchImpl(GenericDeviceTy ,
@@ -890,6 +909,11 @@
   /// Get the ELF code for recognizing the compatible image binary.
   uint16_t getMagicElfBits() const override { return ELF::EM_CUDA; }
 
+  Triple::ArchType getTripleArch() const override {
+// TODO: I think we can drop the support for 32-bit NVPTX devices.
+return Triple::nvptx64;
+  }
+
   /// Check whether the image is compatible with the available CUDA devices.
   Expected isImageCompatible(__tgt_image_info *Info) const override {
 for (int32_t DevId = 0; DevId < getNumDevices(); ++DevId) {
Index: openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
===
--- openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
+++ openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
@@ -25,6 +25,7 @@
 #include "omptarget.h"
 
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/Frontend/OpenMP/OMPConstants.h"
 #include "llvm/Frontend/OpenMP/OMPGridValues.h"
 #include "llvm/Support/Allocator.h"
@@ -372,6 +373,17 @@
   }
   uint32_t getDynamicMemorySize() const { return OMPX_SharedMemorySize; }
 
+  /// Get target architecture.
+  virtual std::string getArch() const {
+llvm_unreachable("device doesn't support JIT");
+  }
+
+  /// Post processing after jit backend. The ownership of \p MB will be taken.
+  virtual Expected>
+  doJITPostProcessing(std::unique_ptr MB) const {
+return MB;
+  }
+
 private:
   /// Register offload entry for global variable.
   Error registerGlobalOffloadEntry(DeviceImageTy ,
@@ -482,6 +494,11 @@
   /// Get the ELF code to recognize the binary image of this plugin.
   virtual uint16_t getMagicElfBits() const = 0;
 
+  /// Get the target triple of this plugin.
+  virtual Triple::ArchType getTripleArch() const {
+llvm_unreachable("target doesn't support jit");
+  }
+
   /// Allocate a structure using the internal allocator.
   template  Ty *allocate() {
 return reinterpret_cast(Allocator.Allocate(sizeof(Ty), alignof(Ty)));
Index: 

[PATCH] D139287: [WIP][OpenMP] Introduce basic JIT support to OpenMP target offloading

2022-12-07 Thread Shilei Tian via Phabricator via cfe-commits
tianshilei1992 added a comment.

We probably want to enable a new test configuration to have each test run in 
JIT mode.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139287/new/

https://reviews.llvm.org/D139287

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D139287: [WIP][OpenMP] Introduce basic JIT support to OpenMP target offloading

2022-12-07 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert added a comment.

All but a test and this looks good to me.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139287/new/

https://reviews.llvm.org/D139287

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D139287: [WIP][OpenMP] Introduce basic JIT support to OpenMP target offloading

2022-12-07 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added a comment.

We should probably make a test for this. Do we currently test the nextgen 
plugins?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139287/new/

https://reviews.llvm.org/D139287

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D139287: [WIP][OpenMP] Introduce basic JIT support to OpenMP target offloading

2022-12-07 Thread Shilei Tian via Phabricator via cfe-commits
tianshilei1992 updated this revision to Diff 481023.
tianshilei1992 added a comment.

add build components


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139287/new/

https://reviews.llvm.org/D139287

Files:
  openmp/libomptarget/plugins-nextgen/CMakeLists.txt
  openmp/libomptarget/plugins-nextgen/common/PluginInterface/CMakeLists.txt
  openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.cpp
  openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.h
  openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
  openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
  openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
  openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp

Index: openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp
===
--- openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp
+++ openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp
@@ -358,6 +358,10 @@
   Expected isImageCompatible(__tgt_image_info *Info) const override {
 return true;
   }
+
+  Triple::ArchType getTripleArch() const override {
+return Triple::LIBOMPTARGET_NEXTGEN_GENERIC_PLUGIN_TRIPLE;
+  }
 };
 
 GenericPluginTy *Plugin::createPlugin() { return new GenELF64PluginTy(); }
Index: openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
===
--- openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
+++ openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
@@ -278,6 +278,14 @@
  GridValues.GV_Warp_Size))
   return Err;
 
+if (auto Err = getDeviceAttr(CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
+ ComputeCapability.Major))
+  return Err;
+
+if (auto Err = getDeviceAttr(CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR,
+ ComputeCapability.Minor))
+  return Err;
+
 return Plugin::success();
   }
 
@@ -776,6 +784,8 @@
 return Plugin::check(Res, "Error in cuDeviceGetAttribute: %s");
   }
 
+  std::string getArch() const override { return ComputeCapability.str(); }
+
 private:
   using CUDAStreamManagerTy = GenericDeviceResourceManagerTy;
   using CUDAEventManagerTy = GenericDeviceResourceManagerTy;
@@ -792,6 +802,15 @@
 
   /// The CUDA device handler.
   CUdevice Device = CU_DEVICE_INVALID;
+
+  ///
+  struct ComputeCapabilityTy {
+uint32_t Major;
+uint32_t Minor;
+std::string str() const {
+  return "sm_" + std::to_string(Major * 10 + Minor);
+}
+  } ComputeCapability;
 };
 
 Error CUDAKernelTy::launchImpl(GenericDeviceTy ,
@@ -890,6 +909,11 @@
   /// Get the ELF code for recognizing the compatible image binary.
   uint16_t getMagicElfBits() const override { return ELF::EM_CUDA; }
 
+  Triple::ArchType getTripleArch() const override {
+// TODO: I think we can drop the support for 32-bit NVPTX devices.
+return Triple::nvptx64;
+  }
+
   /// Check whether the image is compatible with the available CUDA devices.
   Expected isImageCompatible(__tgt_image_info *Info) const override {
 for (int32_t DevId = 0; DevId < getNumDevices(); ++DevId) {
Index: openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
===
--- openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
+++ openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
@@ -25,6 +25,7 @@
 #include "omptarget.h"
 
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/Frontend/OpenMP/OMPConstants.h"
 #include "llvm/Frontend/OpenMP/OMPGridValues.h"
 #include "llvm/Support/Allocator.h"
@@ -372,6 +373,17 @@
   }
   uint32_t getDynamicMemorySize() const { return OMPX_SharedMemorySize; }
 
+  /// Get target architecture.
+  virtual std::string getArch() const {
+llvm_unreachable("device doesn't support JIT");
+  }
+
+  /// Post processing after jit backend. The ownership of \p MB will be taken.
+  virtual Expected>
+  doJITPostProcessing(std::unique_ptr MB) const {
+return MB;
+  }
+
 private:
   /// Register offload entry for global variable.
   Error registerGlobalOffloadEntry(DeviceImageTy ,
@@ -482,6 +494,11 @@
   /// Get the ELF code to recognize the binary image of this plugin.
   virtual uint16_t getMagicElfBits() const = 0;
 
+  /// Get the target triple of this plugin.
+  virtual Triple::ArchType getTripleArch() const {
+llvm_unreachable("target doesn't support jit");
+  }
+
   /// Allocate a structure using the internal allocator.
   template  Ty *allocate() {
 return reinterpret_cast(Allocator.Allocate(sizeof(Ty), alignof(Ty)));
Index: openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp

[PATCH] D139287: [WIP][OpenMP] Introduce basic JIT support to OpenMP target offloading

2022-12-07 Thread Shilei Tian via Phabricator via cfe-commits
tianshilei1992 marked 5 inline comments as done.
tianshilei1992 added inline comments.



Comment at: 
openmp/libomptarget/plugins-nextgen/common/PluginInterface/CMakeLists.txt:24
 else()
   llvm_map_components_to_libnames(llvm_libs Support)
 endif()

Have to figure out what components here.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139287/new/

https://reviews.llvm.org/D139287

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D139287: [WIP][OpenMP] Introduce basic JIT support to OpenMP target offloading

2022-12-07 Thread Shilei Tian via Phabricator via cfe-commits
tianshilei1992 updated this revision to Diff 481014.
tianshilei1992 added a comment.
Herald added a subscriber: aheejin.

drop LTO and fix comments


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139287/new/

https://reviews.llvm.org/D139287

Files:
  openmp/libomptarget/plugins-nextgen/CMakeLists.txt
  openmp/libomptarget/plugins-nextgen/common/PluginInterface/CMakeLists.txt
  openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.cpp
  openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.h
  openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
  openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
  openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
  openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp

Index: openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp
===
--- openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp
+++ openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp
@@ -358,6 +358,10 @@
   Expected isImageCompatible(__tgt_image_info *Info) const override {
 return true;
   }
+
+  Triple::ArchType getTripleArch() const override {
+return Triple::LIBOMPTARGET_NEXTGEN_GENERIC_PLUGIN_TRIPLE;
+  }
 };
 
 GenericPluginTy *Plugin::createPlugin() { return new GenELF64PluginTy(); }
Index: openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
===
--- openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
+++ openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
@@ -278,6 +278,14 @@
  GridValues.GV_Warp_Size))
   return Err;
 
+if (auto Err = getDeviceAttr(CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
+ ComputeCapability.Major))
+  return Err;
+
+if (auto Err = getDeviceAttr(CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR,
+ ComputeCapability.Minor))
+  return Err;
+
 return Plugin::success();
   }
 
@@ -776,6 +784,8 @@
 return Plugin::check(Res, "Error in cuDeviceGetAttribute: %s");
   }
 
+  std::string getArch() const override { return ComputeCapability.str(); }
+
 private:
   using CUDAStreamManagerTy = GenericDeviceResourceManagerTy;
   using CUDAEventManagerTy = GenericDeviceResourceManagerTy;
@@ -792,6 +802,15 @@
 
   /// The CUDA device handler.
   CUdevice Device = CU_DEVICE_INVALID;
+
+  ///
+  struct ComputeCapabilityTy {
+uint32_t Major;
+uint32_t Minor;
+std::string str() const {
+  return "sm_" + std::to_string(Major * 10 + Minor);
+}
+  } ComputeCapability;
 };
 
 Error CUDAKernelTy::launchImpl(GenericDeviceTy ,
@@ -890,6 +909,11 @@
   /// Get the ELF code for recognizing the compatible image binary.
   uint16_t getMagicElfBits() const override { return ELF::EM_CUDA; }
 
+  Triple::ArchType getTripleArch() const override {
+// TODO: I think we can drop the support for 32-bit NVPTX devices.
+return Triple::nvptx64;
+  }
+
   /// Check whether the image is compatible with the available CUDA devices.
   Expected isImageCompatible(__tgt_image_info *Info) const override {
 for (int32_t DevId = 0; DevId < getNumDevices(); ++DevId) {
Index: openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
===
--- openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
+++ openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
@@ -25,6 +25,7 @@
 #include "omptarget.h"
 
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/Frontend/OpenMP/OMPConstants.h"
 #include "llvm/Frontend/OpenMP/OMPGridValues.h"
 #include "llvm/Support/Allocator.h"
@@ -372,6 +373,17 @@
   }
   uint32_t getDynamicMemorySize() const { return OMPX_SharedMemorySize; }
 
+  /// Get target architecture.
+  virtual std::string getArch() const {
+llvm_unreachable("device doesn't support JIT");
+  }
+
+  /// Post processing after jit backend. The ownership of \p MB will be taken.
+  virtual Expected>
+  doJITPostProcessing(std::unique_ptr MB) const {
+return MB;
+  }
+
 private:
   /// Register offload entry for global variable.
   Error registerGlobalOffloadEntry(DeviceImageTy ,
@@ -482,6 +494,11 @@
   /// Get the ELF code to recognize the binary image of this plugin.
   virtual uint16_t getMagicElfBits() const = 0;
 
+  /// Get the target triple of this plugin.
+  virtual Triple::ArchType getTripleArch() const {
+llvm_unreachable("target doesn't support jit");
+  }
+
   /// Allocate a structure using the internal allocator.
   template  Ty *allocate() {
 return reinterpret_cast(Allocator.Allocate(sizeof(Ty), alignof(Ty)));
Index: openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp

[PATCH] D139287: [WIP][OpenMP] Introduce basic JIT support to OpenMP target offloading

2022-12-05 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added inline comments.



Comment at: 
openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.cpp:184
+
+  auto AddStream =
+  [&](size_t Task,

tianshilei1992 wrote:
> jhuber6 wrote:
> > tianshilei1992 wrote:
> > > jhuber6 wrote:
> > > > tianshilei1992 wrote:
> > > > > Is there any way that we don't write it to a file here?
> > > > Why do we need to invoke LTO here? I figured that we could call the 
> > > > backend directly since we have no need to actually link any filies, and 
> > > > we may not have a need to run more expensive optimizations when the 
> > > > bitcode is already optimized. If you do that then you should be able to 
> > > > just use a `raw_svector_ostream` as your output stream and get the 
> > > > compiled output written to that buffer.
> > > For the purpose of this basic JIT support, we indeed just need backend. 
> > > However, since we have the plan for super optimization, etc., having an 
> > > optimization pipeline here is also useful.
> > We should be able to configure our own optimization pipeline in that case, 
> > we might want the extra control as well.
> which means we basically rewrite the function `opt` and `backend` in 
> `LTO.cpp`. I thought about just invoking backend before, especially using LTO 
> requires us to build the resolution table. However, after a second thought, I 
> think it would be better to just use LTO.
Building the passes isn't too complicated, it would take up the same amount of 
space as the symbol resolutions and has the advantage that we don't need to 
write the output to a file. I could write an implementation for this to see how 
well it works.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139287/new/

https://reviews.llvm.org/D139287

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D139287: [WIP][OpenMP] Introduce basic JIT support to OpenMP target offloading

2022-12-05 Thread Shilei Tian via Phabricator via cfe-commits
tianshilei1992 added inline comments.



Comment at: 
openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.cpp:184
+
+  auto AddStream =
+  [&](size_t Task,

jhuber6 wrote:
> tianshilei1992 wrote:
> > jhuber6 wrote:
> > > tianshilei1992 wrote:
> > > > Is there any way that we don't write it to a file here?
> > > Why do we need to invoke LTO here? I figured that we could call the 
> > > backend directly since we have no need to actually link any filies, and 
> > > we may not have a need to run more expensive optimizations when the 
> > > bitcode is already optimized. If you do that then you should be able to 
> > > just use a `raw_svector_ostream` as your output stream and get the 
> > > compiled output written to that buffer.
> > For the purpose of this basic JIT support, we indeed just need backend. 
> > However, since we have the plan for super optimization, etc., having an 
> > optimization pipeline here is also useful.
> We should be able to configure our own optimization pipeline in that case, we 
> might want the extra control as well.
which means we basically rewrite the function `opt` and `backend` in `LTO.cpp`. 
I thought about just invoking backend before, especially using LTO requires us 
to build the resolution table. However, after a second thought, I think it 
would be better to just use LTO.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139287/new/

https://reviews.llvm.org/D139287

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D139287: [WIP][OpenMP] Introduce basic JIT support to OpenMP target offloading

2022-12-05 Thread Shilei Tian via Phabricator via cfe-commits
tianshilei1992 added a comment.

In D139287#3971062 , @jhuber6 wrote:

> In D139287#3971024 , 
> @tianshilei1992 wrote:
>
>> In D139287#3970996 , @jhuber6 
>> wrote:
>>
>>> Why do we have the JIT in the nextgen plugins? I figured that JIT would be 
>>> handled by `libomptarget` proper rather than the plugins. I guess this is 
>>> needed for per-kernel specialization? My idea of the rough pseudocode would 
>>> be like this and we wouldn't need a complex class heirarchy. Also I don't 
>>> know if we can skip `ptxas` by giving CUDA the ptx directly, we probably 
>>> will need to invoke `lld` on the command line however right.
>>>
>>>   for each image:
>>> if image is bitcode
>>>   image = compile(image)
>>>register(image)
>>
>> We could handle them in `libomptarget`, but that's gonna require we add 
>> another two interface functions: `is_valid_bitcode_image`, and 
>> `compile_bitcode_image`. It is doable. Handling them in plugin as a separate 
>> module can just reuse the two existing interfaces.
>
> Would we need to consult the plugin? We can just check the `magic` directly, 
> if it's bitcode we just compile it for its triple. If this was wrong then 
> when the plugin gets the compiled image it will error.

I prefer error out at earlier stage, especially if we have a bitcode image, and 
both Nvidia and AMD support JIT, then both NVIDIA and AMD will report a valid 
binary, thus continue compiling the image, initializing the plugin, etc., which 
could give us the wrong results.

>>> Also I don't know if we can skip `ptxas` by giving CUDA the ptx directly, 
>>> we probably will need to invoke `lld` on the command line however right.
>>>
>>>   for each image:
>>> if image is bitcode
>>>   image = compile(image)
>>>register(image)
>>
>> We can give CUDA PTX directly, since the CUDA JIT is to just call `ptxas` 
>> instead of `ptxas -c`, which requires `nvlink` afterwards.
>
> That makes it easier for us, so the only command line tool we need to call is 
> `lld` for AMDGPU.




Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139287/new/

https://reviews.llvm.org/D139287

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D139287: [WIP][OpenMP] Introduce basic JIT support to OpenMP target offloading

2022-12-05 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added a comment.

In D139287#3971024 , @tianshilei1992 
wrote:

> In D139287#3970996 , @jhuber6 wrote:
>
>> Why do we have the JIT in the nextgen plugins? I figured that JIT would be 
>> handled by `libomptarget` proper rather than the plugins. I guess this is 
>> needed for per-kernel specialization? My idea of the rough pseudocode would 
>> be like this and we wouldn't need a complex class heirarchy. Also I don't 
>> know if we can skip `ptxas` by giving CUDA the ptx directly, we probably 
>> will need to invoke `lld` on the command line however right.
>>
>>   for each image:
>> if image is bitcode
>>   image = compile(image)
>>register(image)
>
> We could handle them in `libomptarget`, but that's gonna require we add 
> another two interface functions: `is_valid_bitcode_image`, and 
> `compile_bitcode_image`. It is doable. Handling them in plugin as a separate 
> module can just reuse the two existing interfaces.

Would we need to consult the plugin? We can just check the `magic` directly, if 
it's bitcode we just compile it for its triple. If this was wrong then when the 
plugin gets the compiled image it will error.

>> Also I don't know if we can skip `ptxas` by giving CUDA the ptx directly, we 
>> probably will need to invoke `lld` on the command line however right.
>>
>>   for each image:
>> if image is bitcode
>>   image = compile(image)
>>register(image)
>
> We can give CUDA PTX directly, since the CUDA JIT is to just call `ptxas` 
> instead of `ptxas -c`, which requires `nvlink` afterwards.

That makes it easier for us, so the only command line tool we need to call is 
`lld` for AMDGPU.




Comment at: 
openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.cpp:184
+
+  auto AddStream =
+  [&](size_t Task,

tianshilei1992 wrote:
> jhuber6 wrote:
> > tianshilei1992 wrote:
> > > Is there any way that we don't write it to a file here?
> > Why do we need to invoke LTO here? I figured that we could call the backend 
> > directly since we have no need to actually link any filies, and we may not 
> > have a need to run more expensive optimizations when the bitcode is already 
> > optimized. If you do that then you should be able to just use a 
> > `raw_svector_ostream` as your output stream and get the compiled output 
> > written to that buffer.
> For the purpose of this basic JIT support, we indeed just need backend. 
> However, since we have the plan for super optimization, etc., having an 
> optimization pipeline here is also useful.
We should be able to configure our own optimization pipeline in that case, we 
might want the extra control as well.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139287/new/

https://reviews.llvm.org/D139287

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D139287: [WIP][OpenMP] Introduce basic JIT support to OpenMP target offloading

2022-12-05 Thread Shilei Tian via Phabricator via cfe-commits
tianshilei1992 added a comment.

In D139287#3970996 , @jhuber6 wrote:

> Why do we have the JIT in the nextgen plugins? I figured that JIT would be 
> handled by `libomptarget` proper rather than the plugins. I guess this is 
> needed for per-kernel specialization? My idea of the rough pseudocode would 
> be like this and we wouldn't need a complex class heirarchy. Also I don't 
> know if we can skip `ptxas` by giving CUDA the ptx directly, we probably will 
> need to invoke `lld` on the command line however right.
>
>   for each image:
> if image is bitcode
>   image = compile(image)
>register(image)

We could handle them in `libomptarget`, but that's gonna require we add another 
two interface functions: `is_valid_bitcode_image`, and `compile_bitcode_image`. 
It is doable. Handling them in plugin as a separate module can just reuse the 
two existing interfaces.

> Also I don't know if we can skip `ptxas` by giving CUDA the ptx directly, we 
> probably will need to invoke `lld` on the command line however right.
>
>   for each image:
> if image is bitcode
>   image = compile(image)
>register(image)

We can give CUDA PTX directly, since the CUDA JIT is to just call `ptxas` 
instead of `ptxas -c`, which requires `nvlink` afterwards.




Comment at: 
openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.cpp:184
+
+  auto AddStream =
+  [&](size_t Task,

jhuber6 wrote:
> tianshilei1992 wrote:
> > Is there any way that we don't write it to a file here?
> Why do we need to invoke LTO here? I figured that we could call the backend 
> directly since we have no need to actually link any filies, and we may not 
> have a need to run more expensive optimizations when the bitcode is already 
> optimized. If you do that then you should be able to just use a 
> `raw_svector_ostream` as your output stream and get the compiled output 
> written to that buffer.
For the purpose of this basic JIT support, we indeed just need backend. 
However, since we have the plan for super optimization, etc., having an 
optimization pipeline here is also useful.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139287/new/

https://reviews.llvm.org/D139287

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D139287: [WIP][OpenMP] Introduce basic JIT support to OpenMP target offloading

2022-12-05 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added a comment.

Why do we have the JIT in the nextgen plugins? I figured that JIT would be 
handled by `libomptarget` proper rather than the plugins. I guess this is 
needed for per-kernel specialization? My idea of the rough pseudocode would be 
like this and we wouldn't need a complex class heirarchy. Also I don't know if 
we can skip `ptxas` by giving CUDA the ptx directly, we probably will need to 
invoke `lld` on the command line however right.

  for each image:
if image is bitcode
  image = compile(image)
   register(image)




Comment at: clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp:879
"Cannot embed bitcode with multiple files.");
-OutputFiles.push_back(static_cast(BitcodeOutput.front()));
+OutputFiles.push_back(Args.MakeArgString(BitcodeOutput.front()));
 return Error::success();

tianshilei1992 wrote:
> This will be pushed by Joseph in another patch.
Did that this morning.



Comment at: 
openmp/libomptarget/plugins-nextgen/common/PluginInterface/CMakeLists.txt:24
 # Plugin Interface library.
-add_library(PluginInterface OBJECT PluginInterface.cpp GlobalHandler.cpp)
+add_llvm_library(PluginInterface OBJECT PluginInterface.cpp GlobalHandler.cpp 
JIT.cpp)
 

tianshilei1992 wrote:
> I guess this might cause the issue of non-protected global symbols.
Should we be able to put all this in the `add_llvm_library`?



Comment at: 
openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.cpp:47-51
+  InitializeAllTargetInfos();
+  InitializeAllTargets();
+  InitializeAllTargetMCs();
+  InitializeAllAsmParsers();
+  InitializeAllAsmPrinters();

We could probably limit these to the ones we actually care about since we know 
the triples. Not sure if it would save us much runtime.



Comment at: 
openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.cpp:184
+
+  auto AddStream =
+  [&](size_t Task,

tianshilei1992 wrote:
> Is there any way that we don't write it to a file here?
Why do we need to invoke LTO here? I figured that we could call the backend 
directly since we have no need to actually link any filies, and we may not have 
a need to run more expensive optimizations when the bitcode is already 
optimized. If you do that then you should be able to just use a 
`raw_svector_ostream` as your output stream and get the compiled output written 
to that buffer.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139287/new/

https://reviews.llvm.org/D139287

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D139287: [WIP][OpenMP] Introduce basic JIT support to OpenMP target offloading

2022-12-04 Thread Shilei Tian via Phabricator via cfe-commits
tianshilei1992 added inline comments.



Comment at: 
openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.cpp:228
+namespace jit {
+bool checkBitcodeImage(__tgt_device_image *Image, std::string Triple) {
+  TimeTraceScope TimeScope("Check bitcode image");

I might change the return value to `Expected` such that it is able to 
pass the error info back to caller.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139287/new/

https://reviews.llvm.org/D139287

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D139287: [WIP][OpenMP] Introduce basic JIT support to OpenMP target offloading

2022-12-04 Thread Shilei Tian via Phabricator via cfe-commits
tianshilei1992 added inline comments.



Comment at: 
openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp:685
+  auto TgtImageOrErr = jit::compile(TgtImage, Triple, Arch,
+/* OptLevel */ 3, PostProcessing);
+  if (!TgtImageOrErr) {

Do we want a configurable value for the `OptLevel`, or can we know it from 
somewhere else what value is used at compile time?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139287/new/

https://reviews.llvm.org/D139287

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D139287: [WIP][OpenMP] Introduce basic JIT support to OpenMP target offloading

2022-12-04 Thread Shilei Tian via Phabricator via cfe-commits
tianshilei1992 added inline comments.



Comment at: clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp:879
"Cannot embed bitcode with multiple files.");
-OutputFiles.push_back(static_cast(BitcodeOutput.front()));
+OutputFiles.push_back(Args.MakeArgString(BitcodeOutput.front()));
 return Error::success();

This will be pushed by Joseph in another patch.



Comment at: 
openmp/libomptarget/plugins-nextgen/common/PluginInterface/CMakeLists.txt:24
 # Plugin Interface library.
-add_library(PluginInterface OBJECT PluginInterface.cpp GlobalHandler.cpp)
+add_llvm_library(PluginInterface OBJECT PluginInterface.cpp GlobalHandler.cpp 
JIT.cpp)
 

I guess this might cause the issue of non-protected global symbols.



Comment at: 
openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.cpp:184
+
+  auto AddStream =
+  [&](size_t Task,

Is there any way that we don't write it to a file here?



Comment at: 
openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.cpp:255
+
+  if (ActualTriple.starts_with(Triple)) {
+BitcodeImageMap[Image->ImageStart] = ActualTriple;

Is there better way to compare two triples?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139287/new/

https://reviews.llvm.org/D139287

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D139287: [WIP][OpenMP] Introduce basic JIT support to OpenMP target offloading

2022-12-04 Thread Shilei Tian via Phabricator via cfe-commits
tianshilei1992 created this revision.
tianshilei1992 added reviewers: jdoerfert, ggeorgakoudis, jhuber6.
Herald added subscribers: guansong, yaxunl.
Herald added a project: All.
tianshilei1992 requested review of this revision.
Herald added subscribers: openmp-commits, cfe-commits, sstefan1.
Herald added projects: clang, OpenMP.

This is an ongoing work. There a re still a couple of things missing, but this 
patch can demonstrate how generally the basic JIT will work.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D139287

Files:
  clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
  openmp/libomptarget/plugins-nextgen/CMakeLists.txt
  openmp/libomptarget/plugins-nextgen/common/PluginInterface/CMakeLists.txt
  openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.cpp
  openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.h
  openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
  openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
  openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
  openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp

Index: openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp
===
--- openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp
+++ openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp
@@ -358,6 +358,10 @@
   Expected isImageCompatible(__tgt_image_info *Info) const override {
 return true;
   }
+
+  std::string getTriple() const override {
+return LIBOMPTARGET_NEXTGEN_GENERIC_PLUGIN_TRIPLE;
+  }
 };
 
 GenericPluginTy *Plugin::createPlugin() { return new GenELF64PluginTy(); }
Index: openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
===
--- openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
+++ openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
@@ -278,6 +278,14 @@
  GridValues.GV_Warp_Size))
   return Err;
 
+if (auto Err = getDeviceAttr(CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
+ ComputeCapability.Major))
+  return Err;
+
+if (auto Err = getDeviceAttr(CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR,
+ ComputeCapability.Minor))
+  return Err;
+
 return Plugin::success();
   }
 
@@ -776,6 +784,8 @@
 return Plugin::check(Res, "Error in cuDeviceGetAttribute: %s");
   }
 
+  std::string getArch() const override { return ComputeCapability.str(); }
+
 private:
   using CUDAStreamManagerTy = GenericDeviceResourceManagerTy;
   using CUDAEventManagerTy = GenericDeviceResourceManagerTy;
@@ -792,6 +802,15 @@
 
   /// The CUDA device handler.
   CUdevice Device = CU_DEVICE_INVALID;
+
+  ///
+  struct ComputeCapabilityTy {
+uint32_t Major;
+uint32_t Minor;
+std::string str() const {
+  return "sm_" + std::to_string(Major * 10 + Minor);
+}
+  } ComputeCapability;
 };
 
 Error CUDAKernelTy::launchImpl(GenericDeviceTy ,
@@ -890,6 +909,8 @@
   /// Get the ELF code for recognizing the compatible image binary.
   uint16_t getMagicElfBits() const override { return ELF::EM_CUDA; }
 
+  std::string getTriple() const override { return "nvptx"; }
+
   /// Check whether the image is compatible with the available CUDA devices.
   Expected isImageCompatible(__tgt_image_info *Info) const override {
 for (int32_t DevId = 0; DevId < getNumDevices(); ++DevId) {
Index: openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
===
--- openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
+++ openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
@@ -372,6 +372,20 @@
   }
   uint32_t getDynamicMemorySize() const { return OMPX_SharedMemorySize; }
 
+  /// Get target architecture.
+  virtual std::string getArch() const {
+llvm_unreachable("device doesn't support JIT");
+  }
+
+  /// Post processing after jit backend. Since the output of LTO backend is
+  /// written to file, the only argument here is the temporary file name. It is
+  /// expected to return a memory buffer that contains the binary image. Targets
+  /// like NVPTX can use this function to call the actual assembler to get the
+  /// actual device image; otherwise the default implementation of this function
+  /// simply reads the file.
+  virtual Expected>
+  doJITPostProcessing(StringRef FileName) const;
+
 private:
   /// Register offload entry for global variable.
   Error registerGlobalOffloadEntry(DeviceImageTy ,
@@ -482,6 +496,11 @@
   /// Get the ELF code to recognize the binary image of this plugin.
   virtual uint16_t getMagicElfBits() const = 0;
 
+  /// Get the target triple of this plugin.
+  virtual std::string getTriple() const {
+llvm_unreachable("target doesn't support