jdoerfert created this revision.
jdoerfert added reviewers: ABataev, arpith-jacob, guraypp, gtbercea, hfinkel.
jdoerfert added a project: OpenMP.
Herald added a project: clang.

This patch introduces the CGOpenMPRuntimeTarget class to collect helpers
and functionality common to all target offloading code generation
schemes. All initial members have been taken from the NVPTX code
generation and removed there.

This is a preperation patch for https://reviews.llvm.org/D59328

  rG LLVM Github Monorepo



Index: clang/lib/CodeGen/CMakeLists.txt
--- clang/lib/CodeGen/CMakeLists.txt
+++ clang/lib/CodeGen/CMakeLists.txt
@@ -69,6 +69,7 @@
+  CGOpenMPRuntimeTarget.cpp
Index: clang/lib/CodeGen/CGOpenMPRuntimeTarget.h
--- /dev/null
+++ clang/lib/CodeGen/CGOpenMPRuntimeTarget.h
@@ -0,0 +1,104 @@
+//===-- CGOpenMPRuntimeTarget.h --- Common OpenMP target codegen ----------===//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// Code common to all OpenMP target codegens.
+#include "CGOpenMPRuntime.h"
+namespace clang {
+namespace CodeGen {
+struct CGOpenMPRuntimeTarget : public CGOpenMPRuntime {
+  explicit CGOpenMPRuntimeTarget(CodeGenModule &CGM);
+  /// Defines the execution mode.
+  enum ExecutionMode {
+    /// SPMD execution mode (all threads are worker threads).
+    EM_SPMD,
+    /// Non-SPMD execution mode (1 master thread, others are workers).
+    EM_NonSPMD,
+    /// Unknown execution mode (orphaned directive).
+    EM_Unknown,
+  };
+  /// Return the execution mode, if not overloaded this is always Unknown.
+  virtual ExecutionMode getExecutionMode() const { return EM_Unknown; }
+  /// Return the value decleration encapsulated in the expression \p E.
+  static const ValueDecl *getUnderlyingVar(const Expr *E);
+  //
+  // Base class overrides.
+  //
+  /// Creates offloading entry for the provided entry ID \a ID,
+  /// address \a Addr, size \a Size, and flags \a Flags.
+  void createOffloadEntry(llvm::Constant *ID, llvm::Constant *Addr,
+                          uint64_t Size, int32_t Flags,
+                          llvm::GlobalValue::LinkageTypes Linkage) override;
+  /// Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32
+  /// global_tid, int proc_bind) to generate code for 'proc_bind' clause.
+  virtual void emitProcBindClause(CodeGenFunction &CGF,
+                                  OpenMPProcBindClauseKind ProcBind,
+                                  SourceLocation Loc) override;
+  /// Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32
+  /// global_tid, kmp_int32 num_threads) to generate code for 'num_threads'
+  /// clause.
+  /// \param NumThreads An integer value of threads.
+  virtual void emitNumThreadsClause(CodeGenFunction &CGF,
+                                    llvm::Value *NumThreads,
+                                    SourceLocation Loc) override;
+  /// Set the number of teams to \p NumTeams and the thread limit to
+  /// \p ThreadLimit.
+  ///
+  /// \param NumTeams An integer expression of teams.
+  /// \param ThreadLimit An integer expression of threads.
+  void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams,
+                          const Expr *ThreadLimit, SourceLocation Loc) override;
+  /// Choose a default value for the schedule clause.
+  void getDefaultScheduleAndChunk(CodeGenFunction &CGF,
+                                  const OMPLoopDirective &S,
+                                  OpenMPScheduleClauseKind &ScheduleKind,
+                                  const Expr *&ChunkExpr) const override;
+  /// Emits code for teams call of the \a OutlinedFn with
+  /// variables captured in a record which address is stored in \a
+  /// CapturedStruct.
+  /// \param OutlinedFn Outlined function to be run by team masters. Type of
+  /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
+  /// \param CapturedVars A pointer to the record with the references to
+  /// variables used in \a OutlinedFn function.
+  ///
+  void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D,
+                     SourceLocation Loc, llvm::Function *OutlinedFn,
+                     ArrayRef<llvm::Value *> CapturedVars) override;
+  /// Returns default address space for the constant firstprivates, __constant__
+  /// address space by default.
+  unsigned getDefaultFirstprivateAddressSpace() const override;
+  /// Perform check on requires decl to ensure that target architecture
+  /// supports unified addressing
+  void checkArchForUnifiedAddressing(CodeGenModule &CGM,
+                                     const OMPRequiresDecl *D) const override;
+} // namespace CodeGen
+} // namespace clang
Index: clang/lib/CodeGen/CGOpenMPRuntimeTarget.cpp
--- /dev/null
+++ clang/lib/CodeGen/CGOpenMPRuntimeTarget.cpp
@@ -0,0 +1,197 @@
+//===-- CGOpenMPRuntimeTarget.cpp - Common OpenMP target codegen ----------===//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// Implementation of the code generation interface for OpenMP target offloading
+// though the Target Region (TRegion) interface.
+// See the file comment in CGOpenMPRuntimeTarget.h for more information.
+#include "CGOpenMPRuntimeTarget.h"
+#include "CodeGenFunction.h"
+#include "clang/AST/StmtVisitor.h"
+#include "clang/Basic/Cuda.h"
+using namespace clang;
+using namespace CodeGen;
+CGOpenMPRuntimeTarget::CGOpenMPRuntimeTarget(CodeGenModule &CGM)
+    : CGOpenMPRuntime(CGM, "_", "$") {
+  if (!CGM.getLangOpts().OpenMPIsDevice)
+    llvm_unreachable("Target code generation does only handle device code!");
+const ValueDecl *CGOpenMPRuntimeTarget::getUnderlyingVar(const Expr *E) {
+  E = E->IgnoreParens();
+  if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(E)) {
+    const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
+    while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
+      Base = TempASE->getBase()->IgnoreParenImpCasts();
+    E = Base;
+  } else if (auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) {
+    const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
+    while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
+      Base = TempOASE->getBase()->IgnoreParenImpCasts();
+    while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
+      Base = TempASE->getBase()->IgnoreParenImpCasts();
+    E = Base;
+  }
+  E = E->IgnoreParenImpCasts();
+  if (const auto *DE = dyn_cast<DeclRefExpr>(E))
+    return cast<ValueDecl>(DE->getDecl()->getCanonicalDecl());
+  const auto *ME = cast<MemberExpr>(E);
+  return cast<ValueDecl>(ME->getMemberDecl()->getCanonicalDecl());
+void CGOpenMPRuntimeTarget::createOffloadEntry(
+    llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t,
+    llvm::GlobalValue::LinkageTypes) {
+  // TODO: Add support for global variables on the device after declare target
+  // support.
+  if (!isa<llvm::Function>(Addr))
+    return;
+  llvm::Module &M = CGM.getModule();
+  llvm::LLVMContext &Ctx = CGM.getLLVMContext();
+  // Get "nvvm.annotations" metadata node
+  llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations");
+  llvm::Metadata *MDVals[] = {
+      llvm::ConstantAsMetadata::get(Addr), llvm::MDString::get(Ctx, "kernel"),
+      llvm::ConstantAsMetadata::get(
+          llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};
+  // Append metadata to nvvm.annotations
+  MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
+void CGOpenMPRuntimeTarget::emitProcBindClause(
+    CodeGenFunction &CGF, OpenMPProcBindClauseKind ProcBind,
+    SourceLocation Loc) {
+  // Do nothing in case of SPMD mode and L0 parallel.
+  if (getExecutionMode() == CGOpenMPRuntimeTarget::EM_SPMD)
+    return;
+  CGOpenMPRuntime::emitProcBindClause(CGF, ProcBind, Loc);
+void CGOpenMPRuntimeTarget::emitNumThreadsClause(CodeGenFunction &CGF,
+                                                  llvm::Value *NumThreads,
+                                                  SourceLocation Loc) {
+  // Do nothing in case of SPMD mode and L0 parallel.
+  if (getExecutionMode() == CGOpenMPRuntimeTarget::EM_SPMD)
+    return;
+  CGOpenMPRuntime::emitNumThreadsClause(CGF, NumThreads, Loc);
+void CGOpenMPRuntimeTarget::emitNumTeamsClause(CodeGenFunction &CGF,
+                                               const Expr *NumTeams,
+                                               const Expr *ThreadLimit,
+                                               SourceLocation Loc) {}
+void CGOpenMPRuntimeTarget::getDefaultScheduleAndChunk(
+    CodeGenFunction &CGF, const OMPLoopDirective &S,
+    OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
+  ScheduleKind = OMPC_SCHEDULE_static;
+  // Chunk size is 1 in this case.
+  llvm::APInt ChunkSize(32, 1);
+  ChunkExpr = IntegerLiteral::Create(
+      CGF.getContext(), ChunkSize,
+      CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ 0),
+      SourceLocation());
+void CGOpenMPRuntimeTarget::emitTeamsCall(
+    CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc,
+    llvm::Function *OutlinedFn, ArrayRef<llvm::Value *> CapturedVars) {
+  if (!CGF.HaveInsertPoint())
+    return;
+  Address ZeroAddr = CGF.CreateMemTemp(
+      CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
+      /*Name*/ ".zero.addr");
+  CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
+  llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
+  OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).getPointer());
+  OutlinedFnArgs.push_back(ZeroAddr.getPointer());
+  OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
+  emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
+unsigned CGOpenMPRuntimeTarget::getDefaultFirstprivateAddressSpace() const {
+  return CGM.getContext().getTargetAddressSpace(LangAS::cuda_constant);
+// Get current CudaArch and ignore any unknown values
+static CudaArch getCudaArch(CodeGenModule &CGM) {
+  if (!CGM.getTarget().hasFeature("ptx"))
+    return CudaArch::UNKNOWN;
+  llvm::StringMap<bool> Features;
+  CGM.getTarget().initFeatureMap(Features, CGM.getDiags(),
+                                 CGM.getTarget().getTargetOpts().CPU,
+                                 CGM.getTarget().getTargetOpts().Features);
+  for (const auto &Feature : Features) {
+    if (Feature.getValue()) {
+      CudaArch Arch = StringToCudaArch(Feature.getKey());
+      if (Arch != CudaArch::UNKNOWN)
+        return Arch;
+    }
+  }
+  return CudaArch::UNKNOWN;
+/// Check to see if target architecture supports unified addressing which is
+/// a restriction for OpenMP requires clause "unified_shared_memory".
+void CGOpenMPRuntimeTarget::checkArchForUnifiedAddressing(
+    CodeGenModule &CGM, const OMPRequiresDecl *D) const {
+  for (const OMPClause *Clause : D->clauselists()) {
+    if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
+      switch (getCudaArch(CGM)) {
+      case CudaArch::SM_20:
+      case CudaArch::SM_21:
+      case CudaArch::SM_30:
+      case CudaArch::SM_32:
+      case CudaArch::SM_35:
+      case CudaArch::SM_37:
+      case CudaArch::SM_50:
+      case CudaArch::SM_52:
+      case CudaArch::SM_53:
+      case CudaArch::SM_60:
+      case CudaArch::SM_61:
+      case CudaArch::SM_62:
+        CGM.Error(Clause->getBeginLoc(),
+                  "Target architecture does not support unified addressing");
+        return;
+      case CudaArch::SM_70:
+      case CudaArch::SM_72:
+      case CudaArch::SM_75:
+      case CudaArch::GFX600:
+      case CudaArch::GFX601:
+      case CudaArch::GFX700:
+      case CudaArch::GFX701:
+      case CudaArch::GFX702:
+      case CudaArch::GFX703:
+      case CudaArch::GFX704:
+      case CudaArch::GFX801:
+      case CudaArch::GFX802:
+      case CudaArch::GFX803:
+      case CudaArch::GFX810:
+      case CudaArch::GFX900:
+      case CudaArch::GFX902:
+      case CudaArch::GFX904:
+      case CudaArch::GFX906:
+      case CudaArch::GFX909:
+      case CudaArch::UNKNOWN:
+        break;
+      case CudaArch::LAST:
+        llvm_unreachable("Unexpected Cuda arch.");
+      }
+    }
+  }
Index: clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
--- clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
+++ clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
@@ -14,24 +14,18 @@
-#include "CGOpenMPRuntime.h"
+#include "CGOpenMPRuntimeTarget.h"
 #include "CodeGenFunction.h"
 #include "clang/AST/StmtOpenMP.h"
 namespace clang {
 namespace CodeGen {
-class CGOpenMPRuntimeNVPTX : public CGOpenMPRuntime {
+class CGOpenMPRuntimeNVPTX : public CGOpenMPRuntimeTarget {
   /// Defines the execution mode.
-  enum ExecutionMode {
-    /// SPMD execution mode (all threads are worker threads).
-    EM_SPMD,
-    /// Non-SPMD execution mode (1 master thread, others are workers).
-    EM_NonSPMD,
-    /// Unknown execution mode (orphaned directive).
-    EM_Unknown,
-  };
+  using ExecutionMode = CGOpenMPRuntimeTarget::ExecutionMode;
   /// Parallel outlined function work for workers to execute.
   llvm::SmallVector<llvm::Function *, 16> Work;
@@ -52,7 +46,7 @@
     void createWorkerFunction(CodeGenModule &CGM);
-  ExecutionMode getExecutionMode() const;
+  ExecutionMode getExecutionMode() const override;
   bool requiresFullRuntime() const { return RequiresFullRuntime; }
@@ -197,28 +191,6 @@
   explicit CGOpenMPRuntimeNVPTX(CodeGenModule &CGM);
   void clear() override;
-  /// Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32
-  /// global_tid, int proc_bind) to generate code for 'proc_bind' clause.
-  virtual void emitProcBindClause(CodeGenFunction &CGF,
-                                  OpenMPProcBindClauseKind ProcBind,
-                                  SourceLocation Loc) override;
-  /// Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32
-  /// global_tid, kmp_int32 num_threads) to generate code for 'num_threads'
-  /// clause.
-  /// \param NumThreads An integer value of threads.
-  virtual void emitNumThreadsClause(CodeGenFunction &CGF,
-                                    llvm::Value *NumThreads,
-                                    SourceLocation Loc) override;
-  /// This function ought to emit, in the general case, a call to
-  // the openmp runtime kmpc_push_num_teams. In NVPTX backend it is not needed
-  // as these numbers are obtained through the PTX grid and block configuration.
-  /// \param NumTeams An integer expression of teams.
-  /// \param ThreadLimit An integer expression of threads.
-  void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams,
-                          const Expr *ThreadLimit, SourceLocation Loc) override;
   /// Emits inlined function for the specified OpenMP parallel
   //  directive.
   /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
@@ -249,18 +221,6 @@
                             OpenMPDirectiveKind InnermostKind,
                             const RegionCodeGenTy &CodeGen) override;
-  /// Emits code for teams call of the \a OutlinedFn with
-  /// variables captured in a record which address is stored in \a
-  /// CapturedStruct.
-  /// \param OutlinedFn Outlined function to be run by team masters. Type of
-  /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
-  /// \param CapturedVars A pointer to the record with the references to
-  /// variables used in \a OutlinedFn function.
-  ///
-  void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D,
-                     SourceLocation Loc, llvm::Function *OutlinedFn,
-                     ArrayRef<llvm::Value *> CapturedVars) override;
   /// Emits code for parallel or serial call of the \a OutlinedFn with
   /// variables captured in a record which address is stored in \a
   /// CapturedStruct.
@@ -371,25 +331,11 @@
       const OMPLoopDirective &S, OpenMPDistScheduleClauseKind &ScheduleKind,
       llvm::Value *&Chunk) const override;
-  /// Choose a default value for the schedule clause.
-  void getDefaultScheduleAndChunk(CodeGenFunction &CGF,
-      const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind,
-      const Expr *&ChunkExpr) const override;
   /// Adjust some parameters for the target-based directives, like addresses of
   /// the variables captured by reference in lambdas.
   void adjustTargetSpecificDataForLambdas(
       CodeGenFunction &CGF, const OMPExecutableDirective &D) const override;
-  /// Perform check on requires decl to ensure that target architecture
-  /// supports unified addressing
-  void checkArchForUnifiedAddressing(CodeGenModule &CGM,
-                                     const OMPRequiresDecl *D) const override;
-  /// Returns default address space for the constant firstprivates, __constant__
-  /// address space by default.
-  unsigned getDefaultFirstprivateAddressSpace() const override;
   /// Track the execution mode when codegening directives within a target
   /// region. The appropriate mode (SPMD/NON-SPMD) is set on entry to the
Index: clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
--- clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -199,28 +199,6 @@
   SharedMemorySize = 128,
-static const ValueDecl *getPrivateItem(const Expr *RefExpr) {
-  RefExpr = RefExpr->IgnoreParens();
-  if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(RefExpr)) {
-    const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
-    while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
-      Base = TempASE->getBase()->IgnoreParenImpCasts();
-    RefExpr = Base;
-  } else if (auto *OASE = dyn_cast<OMPArraySectionExpr>(RefExpr)) {
-    const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
-    while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
-      Base = TempOASE->getBase()->IgnoreParenImpCasts();
-    while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
-      Base = TempASE->getBase()->IgnoreParenImpCasts();
-    RefExpr = Base;
-  }
-  RefExpr = RefExpr->IgnoreParenImpCasts();
-  if (const auto *DE = dyn_cast<DeclRefExpr>(RefExpr))
-    return cast<ValueDecl>(DE->getDecl()->getCanonicalDecl());
-  const auto *ME = cast<MemberExpr>(RefExpr);
-  return cast<ValueDecl>(ME->getMemberDecl()->getCanonicalDecl());
 typedef std::pair<CharUnits /*Align*/, const ValueDecl *> VarsDataTy;
 static bool stable_sort_comparator(const VarsDataTy P1, const VarsDataTy P2) {
   return P1.first > P2.first;
@@ -1859,28 +1837,6 @@
   return RTLFn;
-void CGOpenMPRuntimeNVPTX::createOffloadEntry(llvm::Constant *ID,
-                                              llvm::Constant *Addr,
-                                              uint64_t Size, int32_t,
-                                              llvm::GlobalValue::LinkageTypes) {
-  // TODO: Add support for global variables on the device after declare target
-  // support.
-  if (!isa<llvm::Function>(Addr))
-    return;
-  llvm::Module &M = CGM.getModule();
-  llvm::LLVMContext &Ctx = CGM.getLLVMContext();
-  // Get "nvvm.annotations" metadata node
-  llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations");
-  llvm::Metadata *MDVals[] = {
-      llvm::ConstantAsMetadata::get(Addr), llvm::MDString::get(Ctx, "kernel"),
-      llvm::ConstantAsMetadata::get(
-          llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};
-  // Append metadata to nvvm.annotations
-  MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
 void CGOpenMPRuntimeNVPTX::emitTargetOutlinedFunction(
     const OMPExecutableDirective &D, StringRef ParentName,
     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
@@ -1933,36 +1889,11 @@
 CGOpenMPRuntimeNVPTX::CGOpenMPRuntimeNVPTX(CodeGenModule &CGM)
-    : CGOpenMPRuntime(CGM, "_", "$") {
+    : CGOpenMPRuntimeTarget(CGM) {
   if (!CGM.getLangOpts().OpenMPIsDevice)
     llvm_unreachable("OpenMP NVPTX can only handle device code.");
-void CGOpenMPRuntimeNVPTX::emitProcBindClause(CodeGenFunction &CGF,
-                                              OpenMPProcBindClauseKind ProcBind,
-                                              SourceLocation Loc) {
-  // Do nothing in case of SPMD mode and L0 parallel.
-  if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD)
-    return;
-  CGOpenMPRuntime::emitProcBindClause(CGF, ProcBind, Loc);
-void CGOpenMPRuntimeNVPTX::emitNumThreadsClause(CodeGenFunction &CGF,
-                                                llvm::Value *NumThreads,
-                                                SourceLocation Loc) {
-  // Do nothing in case of SPMD mode and L0 parallel.
-  if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD)
-    return;
-  CGOpenMPRuntime::emitNumThreadsClause(CGF, NumThreads, Loc);
-void CGOpenMPRuntimeNVPTX::emitNumTeamsClause(CodeGenFunction &CGF,
-                                              const Expr *NumTeams,
-                                              const Expr *ThreadLimit,
-                                              SourceLocation Loc) {}
 llvm::Function *CGOpenMPRuntimeNVPTX::emitParallelOutlinedFunction(
     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
@@ -2024,7 +1955,7 @@
   for (const auto *C : Dir->getClausesOfKind<OMPLastprivateClause>()) {
     for (const Expr *E : C->getVarRefs())
-      Vars.push_back(getPrivateItem(E));
+      Vars.push_back(CGOpenMPRuntimeTarget::getUnderlyingVar(E));
@@ -2036,7 +1967,7 @@
          "expected teams directive.");
   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
     for (const Expr *E : C->privates())
-      Vars.push_back(getPrivateItem(E));
+      Vars.push_back(CGOpenMPRuntimeTarget::getUnderlyingVar(E));
@@ -2460,25 +2391,6 @@
-void CGOpenMPRuntimeNVPTX::emitTeamsCall(CodeGenFunction &CGF,
-                                         const OMPExecutableDirective &D,
-                                         SourceLocation Loc,
-                                         llvm::Function *OutlinedFn,
-                                         ArrayRef<llvm::Value *> CapturedVars) {
-  if (!CGF.HaveInsertPoint())
-    return;
-  Address ZeroAddr = CGF.CreateMemTemp(
-      CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
-      /*Name*/ ".zero.addr");
-  CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
-  llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
-  OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).getPointer());
-  OutlinedFnArgs.push_back(ZeroAddr.getPointer());
-  OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
-  emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
 void CGOpenMPRuntimeNVPTX::emitParallelCall(
     CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn,
     ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond) {
@@ -4769,18 +4681,6 @@
       CGF, S, ScheduleKind, Chunk);
-void CGOpenMPRuntimeNVPTX::getDefaultScheduleAndChunk(
-    CodeGenFunction &CGF, const OMPLoopDirective &S,
-    OpenMPScheduleClauseKind &ScheduleKind,
-    const Expr *&ChunkExpr) const {
-  ScheduleKind = OMPC_SCHEDULE_static;
-  // Chunk size is 1 in this case.
-  llvm::APInt ChunkSize(32, 1);
-  ChunkExpr = IntegerLiteral::Create(CGF.getContext(), ChunkSize,
-      CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
-      SourceLocation());
 void CGOpenMPRuntimeNVPTX::adjustTargetSpecificDataForLambdas(
     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
@@ -4833,10 +4733,6 @@
-unsigned CGOpenMPRuntimeNVPTX::getDefaultFirstprivateAddressSpace() const {
-  return CGM.getContext().getTargetAddressSpace(LangAS::cuda_constant);
 // Get current CudaArch and ignore any unknown values
 static CudaArch getCudaArch(CodeGenModule &CGM) {
   if (!CGM.getTarget().hasFeature("ptx"))
@@ -4855,56 +4751,6 @@
   return CudaArch::UNKNOWN;
-/// Check to see if target architecture supports unified addressing which is
-/// a restriction for OpenMP requires clause "unified_shared_memory".
-void CGOpenMPRuntimeNVPTX::checkArchForUnifiedAddressing(
-    CodeGenModule &CGM, const OMPRequiresDecl *D) const {
-  for (const OMPClause *Clause : D->clauselists()) {
-    if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
-      switch (getCudaArch(CGM)) {
-      case CudaArch::SM_20:
-      case CudaArch::SM_21:
-      case CudaArch::SM_30:
-      case CudaArch::SM_32:
-      case CudaArch::SM_35:
-      case CudaArch::SM_37:
-      case CudaArch::SM_50:
-      case CudaArch::SM_52:
-      case CudaArch::SM_53:
-      case CudaArch::SM_60:
-      case CudaArch::SM_61:
-      case CudaArch::SM_62:
-        CGM.Error(Clause->getBeginLoc(),
-                  "Target architecture does not support unified addressing");
-        return;
-      case CudaArch::SM_70:
-      case CudaArch::SM_72:
-      case CudaArch::SM_75:
-      case CudaArch::GFX600:
-      case CudaArch::GFX601:
-      case CudaArch::GFX700:
-      case CudaArch::GFX701:
-      case CudaArch::GFX702:
-      case CudaArch::GFX703:
-      case CudaArch::GFX704:
-      case CudaArch::GFX801:
-      case CudaArch::GFX802:
-      case CudaArch::GFX803:
-      case CudaArch::GFX810:
-      case CudaArch::GFX900:
-      case CudaArch::GFX902:
-      case CudaArch::GFX904:
-      case CudaArch::GFX906:
-      case CudaArch::GFX909:
-      case CudaArch::UNKNOWN:
-        break;
-      case CudaArch::LAST:
-        llvm_unreachable("Unexpected Cuda arch.");
-      }
-    }
-  }
 /// Get number of SMs and number of blocks per SM.
 static std::pair<unsigned, unsigned> getSMsBlocksPerSM(CodeGenModule &CGM) {
   std::pair<unsigned, unsigned> Data;
cfe-commits mailing list
  • [PATCH] D59418: [OpenMP]... Johannes Doerfert via Phabricator via cfe-commits

Reply via email to