Author: abataev
Date: Mon Apr  8 09:53:57 2019
New Revision: 357923

URL: http://llvm.org/viewvc/llvm-project?rev=357923&view=rev
Log:
[OPENMP][NVPTX]Fixed processing of memory management directives.

Added special processing of the memory management directives/clauses for
NVPTX target. For private locals, omp_default_mem_alloc and
omp_thread_mem_alloc result in allocation in local memory.
omp_const_mem_alloc allocates const memory, omp_teams_mem_alloc
allocates shared memory, and omp_cgroup_mem_alloc and
omp_large_cap_mem_alloc allocate global memory.

Modified:
    cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
    cfe/trunk/test/OpenMP/nvptx_allocate_codegen.cpp

Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp?rev=357923&r1=357922&r2=357923&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp Mon Apr  8 09:53:57 2019
@@ -318,6 +318,9 @@ class CheckVarsEscapingDeclContext final
         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD))
       return;
     VD = cast<ValueDecl>(VD->getCanonicalDecl());
+    // Use user-specified allocation.
+    if (VD->hasAttrs() && VD->hasAttr<OMPAllocateDeclAttr>())
+      return;
     // Variables captured by value must be globalized.
     if (auto *CSI = CGF.CapturedStmtInfo) {
       if (const FieldDecl *FD = CSI->lookup(cast<VarDecl>(VD))) {
@@ -4725,7 +4728,6 @@ void CGOpenMPRuntimeNVPTX::emitFunctionP
 
 Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF,
                                                         const VarDecl *VD) {
-  bool UseDefaultAllocator = true;
   if (VD && VD->hasAttr<OMPAllocateDeclAttr>()) {
     const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
     switch (A->getAllocatorType()) {
@@ -4733,17 +4735,48 @@ Address CGOpenMPRuntimeNVPTX::getAddress
       // threadlocal.
     case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
     case OMPAllocateDeclAttr::OMPThreadMemAlloc:
-      // Just pass-through to check if the globalization is required.
-      break;
-    case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
-    case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
     case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
     case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
-    case OMPAllocateDeclAttr::OMPConstMemAlloc:
-    case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
+      // Follow the user decision - use default allocation.
+      return Address::invalid();
     case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
-      UseDefaultAllocator = false;
-      break;
+      // TODO: implement aupport for user-defined allocators.
+      return Address::invalid();
+    case OMPAllocateDeclAttr::OMPConstMemAlloc: {
+      llvm::Type *VarTy = CGF.ConvertTypeForMem(VD->getType());
+      auto *GV = new llvm::GlobalVariable(
+          CGM.getModule(), VarTy, /*isConstant=*/false,
+          llvm::GlobalValue::InternalLinkage,
+          llvm::Constant::getNullValue(VarTy), VD->getName(),
+          /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal,
+          CGM.getContext().getTargetAddressSpace(LangAS::cuda_constant));
+      CharUnits Align = CGM.getContext().getDeclAlign(VD);
+      GV->setAlignment(Align.getQuantity());
+      return Address(GV, Align);
+    }
+    case OMPAllocateDeclAttr::OMPPTeamMemAlloc: {
+      llvm::Type *VarTy = CGF.ConvertTypeForMem(VD->getType());
+      auto *GV = new llvm::GlobalVariable(
+          CGM.getModule(), VarTy, /*isConstant=*/false,
+          llvm::GlobalValue::InternalLinkage,
+          llvm::Constant::getNullValue(VarTy), VD->getName(),
+          /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal,
+          CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared));
+      CharUnits Align = CGM.getContext().getDeclAlign(VD);
+      GV->setAlignment(Align.getQuantity());
+      return Address(GV, Align);
+    }
+    case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
+    case OMPAllocateDeclAttr::OMPCGroupMemAlloc: {
+      llvm::Type *VarTy = CGF.ConvertTypeForMem(VD->getType());
+      auto *GV = new llvm::GlobalVariable(
+          CGM.getModule(), VarTy, /*isConstant=*/false,
+          llvm::GlobalValue::InternalLinkage,
+          llvm::Constant::getNullValue(VarTy), VD->getName());
+      CharUnits Align = CGM.getContext().getDeclAlign(VD);
+      GV->setAlignment(Align.getQuantity());
+      return Address(GV, Align);
+    }
     }
   }
 
@@ -4769,11 +4802,6 @@ Address CGOpenMPRuntimeNVPTX::getAddress
     }
   }
 
-  // TODO: replace it with return
-  // UseDefaultAllocator ? Address::invalid :
-  // CGOpenMPRuntime::getAddressOfLocalVariable(CGF, VD); when NVPTX libomp
-  // supports __kmpc_alloc|__kmpc_free.
-  (void)UseDefaultAllocator; // Prevent a warning.
   return Address::invalid();
 }
 

Modified: cfe/trunk/test/OpenMP/nvptx_allocate_codegen.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_allocate_codegen.cpp?rev=357923&r1=357922&r2=357923&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_allocate_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_allocate_codegen.cpp Mon Apr  8 09:53:57 2019
@@ -24,6 +24,8 @@ extern const omp_allocator_handle_t omp_
 // CHECK-DAG: @{{.+}}ns{{.+}}a{{.+}} = addrspace(3) global i32 0,
 // CHECK-DAG: @{{.+}}main{{.+}}a{{.*}} = internal global i32 0,
 // CHECK-DAG: @{{.+}}ST{{.+}}m{{.+}} = external global i32,
+// CHECK-DAG: @bar_c = internal global i32 0,
+// CHECK-DAG: @bar_b = internal addrspace(3) global double 0.000000e+00,
 struct St{
  int a;
 };
@@ -64,13 +66,42 @@ int main () {
 #pragma omp allocate(a) allocator(omp_thread_mem_alloc)
   a=2;
   double b = 3;
+  float c;
 #pragma omp allocate(b) allocator(omp_default_mem_alloc)
+#pragma omp allocate(c) allocator(omp_cgroup_mem_alloc)
   return (foo<int>());
 }
 
 // CHECK: define {{.*}}i32 @{{.+}}foo{{.+}}()
-// CHECK: alloca i32,
+// CHECK-NOT: alloca i32,
 
 extern template int ST<int>::m;
+
+void baz(float &);
+
+// CHECK: define void @{{.+}}bar{{.+}}()
+void bar() {
+  // CHECK: alloca float,
+  float bar_a;
+  // CHECK: alloca double,
+  double bar_b;
+  int bar_c;
+#pragma omp allocate(bar_c) allocator(omp_cgroup_mem_alloc)
+  // CHECK: call void [[OUTLINED:@.+]](i32* %{{.+}}, i32* %{{.+}})
+#pragma omp parallel private(bar_a, bar_b) allocate(omp_thread_mem_alloc       
           \
+                                                    : bar_a) 
allocate(omp_pteam_mem_alloc \
+                                                                      : bar_b)
+  {
+    bar_b = bar_a;
+    baz(bar_a);
+  }
+// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias 
%{{.+}})
+// CHECK-NOT: alloca double,
+// CHECK: alloca float,
+// CHECK-NOT: alloca double,
+// CHECK: load float, float* %
+// CHECK: store double {{.+}}, double addrspace(3)* @bar_b,
+}
+
 #pragma omp end declare target
 #endif


_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to