[PATCH] D80917: [OpenMP][AMDGCN] Support OpenMP offloading for AMDGCN architecture - Part 2

Saiyedul Islam via Phabricator via cfe-commits Wed, 10 Jun 2020 11:44:20 -0700

This revision was automatically updated to reflect the committed changes.
Closed by commit rG4022bc2a6c5e: [OpenMP][AMDGCN] Support OpenMP offloading for 
AMDGCN architecture - Part 2 (authored by saiislam).


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D80917/new/

https://reviews.llvm.org/D80917

Files:
  clang/include/clang/Basic/TargetInfo.h
  clang/lib/Basic/Targets/AMDGPU.cpp
  clang/lib/Basic/Targets/NVPTX.cpp
  llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h

Index: llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
===================================================================
--- /dev/null
+++ llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
@@ -0,0 +1,131 @@
+//====--- OMPGridValues.h - Language-specific address spaces --*- C++ -*-====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief Provides definitions for Target specific Grid Values
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OPENMP_GRIDVALUES_H
+#define LLVM_OPENMP_GRIDVALUES_H
+
+namespace llvm {
+
+namespace omp {
+
+/// \brief Defines various target-specific GPU grid values that must be
+///        consistent between host RTL (plugin), device RTL, and clang.
+///        We can change grid values for a "fat" binary so that different
+///        passes get the correct values when generating code for a
+///        multi-target binary. Both amdgcn and nvptx values are stored in
+///        this file. In the future, should there be differences between GPUs
+///        of the same architecture, then simply make a different array and
+///        use the new array name.
+///
+/// Example usage in clang:
+///   const unsigned slot_size = ctx.GetTargetInfo().getGridValue(GV_Warp_Size);
+///
+/// Example usage in libomptarget/deviceRTLs:
+///   #include "OMPGridValues.h"
+///   #ifdef __AMDGPU__
+///     #define GRIDVAL AMDGPUGpuGridValues
+///   #else
+///     #define GRIDVAL NVPTXGpuGridValues
+///   #endif
+///   ... Then use this reference for GV_Warp_Size in the deviceRTL source.
+///   GRIDVAL[GV_Warp_Size]
+///
+/// Example usage in libomptarget hsa plugin:
+///   #include "OMPGridValues.h"
+///   #define GRIDVAL AMDGPUGpuGridValues
+///   ... Then use this reference to access GV_Warp_Size in the hsa plugin.
+///   GRIDVAL[GV_Warp_Size]
+///
+/// Example usage in libomptarget cuda plugin:
+///    #include "OMPGridValues.h"
+///    #define GRIDVAL NVPTXGpuGridValues
+///   ... Then use this reference to access GV_Warp_Size in the cuda plugin.
+///    GRIDVAL[GV_Warp_Size]
+///
+enum GVIDX {
+  /// The maximum number of workers in a kernel.
+  /// (THREAD_ABSOLUTE_LIMIT) - (GV_Warp_Size), might be issue for blockDim.z
+  GV_Threads,
+  /// The size reserved for data in a shared memory slot.
+  GV_Slot_Size,
+  /// The default value of maximum number of threads in a worker warp.
+  GV_Warp_Size,
+  /// Alternate warp size for some AMDGCN architectures. Same as GV_Warp_Size
+  /// for NVPTX.
+  GV_Warp_Size_32,
+  /// The number of bits required to represent the max number of threads in warp
+  GV_Warp_Size_Log2,
+  /// GV_Warp_Size * GV_Slot_Size,
+  GV_Warp_Slot_Size,
+  /// the maximum number of teams.
+  GV_Max_Teams,
+  /// Global Memory Alignment
+  GV_Mem_Align,
+  /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
+  GV_Warp_Size_Log2_Mask,
+  // An alternative to the heavy data sharing infrastructure that uses global
+  // memory is one that uses device __shared__ memory.  The amount of such space
+  // (in bytes) reserved by the OpenMP runtime is noted here.
+  GV_SimpleBufferSize,
+  // The absolute maximum team size for a working group
+  GV_Max_WG_Size,
+  // The default maximum team size for a working group
+  GV_Default_WG_Size,
+  // This is GV_Max_WG_Size / GV_WarpSize. 32 for NVPTX and 16 for AMDGCN.
+  GV_Max_Warp_Number,
+  /// The slot size that should be reserved for a working warp.
+  /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
+  GV_Warp_Size_Log2_MaskL
+};
+
+/// For AMDGPU GPUs
+static constexpr unsigned AMDGPUGpuGridValues[] = {
+    448,       // GV_Threads
+    256,       // GV_Slot_Size
+    64,        // GV_Warp_Size
+    32,        // GV_Warp_Size_32
+    6,         // GV_Warp_Size_Log2
+    64 * 256,  // GV_Warp_Slot_Size
+    128,       // GV_Max_Teams
+    256,       // GV_Mem_Align
+    63,        // GV_Warp_Size_Log2_Mask
+    896,       // GV_SimpleBufferSize
+    1024,      // GV_Max_WG_Size,
+    256,       // GV_Defaut_WG_Size
+    1024 / 64, // GV_Max_WG_Size / GV_WarpSize
+    63         // GV_Warp_Size_Log2_MaskL
+};
+
+/// For Nvidia GPUs
+static constexpr unsigned NVPTXGpuGridValues[] = {
+    992,               // GV_Threads
+    256,               // GV_Slot_Size
+    32,                // GV_Warp_Size
+    32,                // GV_Warp_Size_32
+    5,                 // GV_Warp_Size_Log2
+    32 * 256,          // GV_Warp_Slot_Size
+    1024,              // GV_Max_Teams
+    256,               // GV_Mem_Align
+    (~0u >> (32 - 5)), // GV_Warp_Size_Log2_Mask
+    896,               // GV_SimpleBufferSize
+    1024,              // GV_Max_WG_Size
+    128,               // GV_Defaut_WG_Size
+    1024 / 32,         // GV_Max_WG_Size / GV_WarpSize
+    31                 // GV_Warp_Size_Log2_MaskL
+};
+
+} // namespace omp
+} // namespace llvm
+
+#endif // LLVM_OPENMP_GRIDVALUES_H
Index: clang/lib/Basic/Targets/NVPTX.cpp
===================================================================
--- clang/lib/Basic/Targets/NVPTX.cpp
+++ clang/lib/Basic/Targets/NVPTX.cpp
@@ -16,6 +16,7 @@
 #include "clang/Basic/MacroBuilder.h"
 #include "clang/Basic/TargetBuiltins.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/Frontend/OpenMP/OMPGridValues.h"
 
 using namespace clang;
 using namespace clang::targets;
@@ -62,6 +63,7 @@
   TLSSupported = false;
   VLASupported = false;
   AddrSpaceMap = &NVPTXAddrSpaceMap;
+  GridValues = llvm::omp::NVPTXGpuGridValues;
   UseAddrSpaceMapMangling = true;
 
   // Define available target features
Index: clang/lib/Basic/Targets/AMDGPU.cpp
===================================================================
--- clang/lib/Basic/Targets/AMDGPU.cpp
+++ clang/lib/Basic/Targets/AMDGPU.cpp
@@ -17,6 +17,7 @@
 #include "clang/Basic/MacroBuilder.h"
 #include "clang/Basic/TargetBuiltins.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/Frontend/OpenMP/OMPGridValues.h"
 #include "llvm/IR/DataLayout.h"
 
 using namespace clang;
@@ -286,6 +287,7 @@
   resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
                                         : DataLayoutStringR600);
   assert(DataLayout->getAllocaAddrSpace() == Private);
+  GridValues = llvm::omp::AMDGPUGpuGridValues;
 
   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
                      !isAMDGCN(Triple));
Index: clang/include/clang/Basic/TargetInfo.h
===================================================================
--- clang/include/clang/Basic/TargetInfo.h
+++ clang/include/clang/Basic/TargetInfo.h
@@ -15,6 +15,7 @@
 #define LLVM_CLANG_BASIC_TARGETINFO_H
 
 #include "clang/Basic/AddressSpaces.h"
+#include "clang/Basic/CodeGenOptions.h"
 #include "clang/Basic/LLVM.h"
 #include "clang/Basic/LangOptions.h"
 #include "clang/Basic/Specifiers.h"
@@ -29,6 +30,7 @@
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/Frontend/OpenMP/OMPGridValues.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/VersionTuple.h"
 #include <cassert>
@@ -198,6 +200,9 @@
   unsigned char RegParmMax, SSERegParmMax;
   TargetCXXABI TheCXXABI;
   const LangASMap *AddrSpaceMap;
+  const unsigned *GridValues =
+      nullptr; // Array of target-specific GPU grid values that must be
+               // consistent between host RTL (plugin), device RTL, and clang.
 
   mutable StringRef PlatformName;
   mutable VersionTuple PlatformMinVersion;
@@ -1321,6 +1326,12 @@
     return LangAS::Default;
   }
 
+  /// Return a target-specific GPU grid value based on the GVIDX enum \param gv
+  unsigned getGridValue(llvm::omp::GVIDX gv) const {
+    assert(GridValues != nullptr && "GridValues not initialized");
+    return GridValues[gv];
+  }
+
   /// Retrieve the name of the platform as it is used in the
   /// availability attribute.
   StringRef getPlatformName() const { return PlatformName; }

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D80917: [OpenMP][AMDGCN] Support OpenMP offloading for AMDGCN architecture - Part 2

Reply via email to