Hi rjmccall, hfinkel,

There are troubles with codegen for 'schedule' clause with non-constant chunk 
size in combined directives like 'omp parallel for'. Currently, all variables, 
used in this chunk expression, must be captured and passed to outlined function 
for implicit 'parallel' region. But this does not happen because this 
expression is generated outside of 'parallel for' directive region and it 
causes compiler crash.
The codegen is changed so, that if non-constant chunk size is found, it is 
evaluated outside of OpenMP region and the value is stored into private global 
variable. When loop directive needs this schedule chunk, it just loads the 
value stored inside this global variable and uses it as a chunk size.

http://reviews.llvm.org/D9606

Files:
  lib/CodeGen/CGStmtOpenMP.cpp
  lib/CodeGen/CodeGenFunction.h
  test/OpenMP/for_codegen.cpp
  test/OpenMP/parallel_for_codegen.cpp

EMAIL PREFERENCES
  http://reviews.llvm.org/settings/panel/emailpreferences/
Index: test/OpenMP/parallel_for_codegen.cpp
===================================================================
--- test/OpenMP/parallel_for_codegen.cpp
+++ test/OpenMP/parallel_for_codegen.cpp
@@ -8,6 +8,22 @@
 #define HEADER
 
 // CHECK: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* }
+// CHECK-DAG: [[CHUNK:@.+]] = {{.+}}global i64
+
+// CHECK-LABEL: with_var_schedule
+void with_var_schedule() {
+  int a = 5;
+// CHECK: store i64 %{{.+}}, i64* [[CHUNK]]
+// CHECK: call void {{.+}} @__kmpc_fork_call(
+// CHECK: [[CHUNK_SIZE:%.+]] = load i64, i64* [[CHUNK]],
+// CHECK: call void @__kmpc_for_static_init_8u([[IDENT_T_TY]]* [[DEFAULT_LOC:@[^,]+]], i32 [[GTID:%[^,]+]], i32 33, i32* [[IS_LAST:%[^,]+]], i64* [[OMP_LB:%[^,]+]], i64* [[OMP_UB:%[^,]+]], i64* [[OMP_ST:%[^,]+]], i64 1, i64 [[CHUNK_SIZE]])
+// CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
+// CHECK: __kmpc_cancel_barrier
+#pragma omp parallel for schedule(static, a)
+  for (unsigned long long i = 1; i < 2; ++i) {
+  }
+}
+
 // CHECK-LABEL: define {{.*void}} @{{.*}}without_schedule_clause{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}})
 void without_schedule_clause(float *a, float *b, float *c, float *d) {
   #pragma omp parallel for
Index: test/OpenMP/for_codegen.cpp
===================================================================
--- test/OpenMP/for_codegen.cpp
+++ test/OpenMP/for_codegen.cpp
@@ -12,6 +12,21 @@
 // CHECK-DAG: [[I:@.+]] = global i8 1,
 // CHECK-DAG: [[J:@.+]] = global i8 2,
 // CHECK-DAG: [[K:@.+]] = global i8 3,
+// CHECK-DAG: [[CHUNK:@.+]] = {{.+}}global i64
+
+// CHECK-LABEL: with_var_schedule
+void with_var_schedule() {
+  int a = 5;
+// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]])
+// CHECK: store i64 %{{.+}}, i64* [[CHUNK]]
+// CHECK: [[CHUNK_SIZE:%.+]] = load i64, i64* [[CHUNK]],
+// CHECK: call void @__kmpc_for_static_init_8u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 33, i32* [[IS_LAST:%[^,]+]], i64* [[OMP_LB:%[^,]+]], i64* [[OMP_UB:%[^,]+]], i64* [[OMP_ST:%[^,]+]], i64 1, i64 [[CHUNK_SIZE]])
+// CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
+// CHECK: __kmpc_cancel_barrier
+#pragma omp for schedule(static, a)
+  for (unsigned long long i = 1; i < 2; ++i) {
+  }
+}
 
 // CHECK-LABEL: define {{.*void}} @{{.*}}without_schedule_clause{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}})
 void without_schedule_clause(float *a, float *b, float *c, float *d) {
Index: lib/CodeGen/CodeGenFunction.h
===================================================================
--- lib/CodeGen/CodeGenFunction.h
+++ lib/CodeGen/CodeGenFunction.h
@@ -2193,9 +2193,12 @@
                        bool SeparateIter = false);
   void EmitOMPSimdFinal(const OMPLoopDirective &S);
   /// \brief Emit code for the worksharing loop-based directive.
+  /// \param Chunk Chunk value for 'schedule' clause. May be a value (if it is a
+  /// constant), pointer to global variable with calculated schedule (if it is
+  /// not a constant) or nullptr (if not specified).
   /// \return true, if this construct has any lastprivate clause, false -
   /// otherwise.
-  bool EmitOMPWorksharingLoop(const OMPLoopDirective &S);
+  bool EmitOMPWorksharingLoop(const OMPLoopDirective &S, llvm::Value *Chunk);
   void EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind,
                            const OMPLoopDirective &S,
                            OMPPrivateScope &LoopScope, llvm::Value *LB,
Index: lib/CodeGen/CGStmtOpenMP.cpp
===================================================================
--- lib/CodeGen/CGStmtOpenMP.cpp
+++ lib/CodeGen/CGStmtOpenMP.cpp
@@ -951,7 +951,39 @@
   return CGF.EmitLValue(Helper);
 }
 
-bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
+static llvm::Value *emitScheduleClause(CodeGenFunction &CGF,
+                                       const OMPLoopDirective &S) {
+  // Detect the loop schedule kind and chunk.
+  auto ScheduleKind = OMPC_SCHEDULE_unknown;
+  llvm::Value *Chunk = nullptr;
+  if (auto *C =
+          cast_or_null<OMPScheduleClause>(S.getSingleClause(OMPC_schedule))) {
+    ScheduleKind = C->getScheduleKind();
+    if (auto *Ch = C->getChunkSize()) {
+      Chunk = CGF.EmitScalarExpr(Ch);
+      Chunk = CGF.EmitScalarConversion(Chunk, Ch->getType(),
+                                       S.getIterationVariable()->getType());
+    }
+  }
+  if (Chunk && !isa<llvm::Constant>(Chunk)) {
+    auto *LoopChunk = new llvm::GlobalVariable(
+        CGF.CGM.getModule(), Chunk->getType(), /*isConstant=*/false,
+        llvm::GlobalValue::PrivateLinkage,
+        llvm::Constant::getNullValue(Chunk->getType()));
+    auto Alignment =
+        CGF.CGM.getContext()
+            .getTypeAlignInChars(S.getIterationVariable()->getType())
+            .getQuantity();
+    LoopChunk->setAlignment(Alignment);
+    LoopChunk->setUnnamedAddr(true);
+    CGF.Builder.CreateAlignedStore(Chunk, LoopChunk, Alignment);
+    return LoopChunk;
+  }
+  return Chunk;
+}
+
+bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S,
+                                             llvm::Value *Chunk) {
   // Emit the loop iteration variable.
   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
@@ -1014,14 +1046,12 @@
 
       // Detect the loop schedule kind and chunk.
       auto ScheduleKind = OMPC_SCHEDULE_unknown;
-      llvm::Value *Chunk = nullptr;
       if (auto C = cast_or_null<OMPScheduleClause>(
               S.getSingleClause(OMPC_schedule))) {
         ScheduleKind = C->getScheduleKind();
-        if (auto Ch = C->getChunkSize()) {
-          Chunk = EmitScalarExpr(Ch);
-          Chunk = EmitScalarConversion(Chunk, Ch->getType(),
-                                       S.getIterationVariable()->getType());
+        if (Chunk && Chunk->getType()->isPointerTy()) {
+          Chunk = Builder.CreateAlignedLoad(
+              Chunk, cast<llvm::GlobalValue>(Chunk)->getAlignment());
         }
       }
       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
@@ -1075,8 +1105,9 @@
 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
   LexicalScope Scope(*this, S.getSourceRange());
   bool HasLastprivates = false;
-  auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF) {
-    HasLastprivates = CGF.EmitOMPWorksharingLoop(S);
+  auto *Chunk = emitScheduleClause(*this, S);
+  auto &&CodeGen = [&S, &HasLastprivates, Chunk](CodeGenFunction &CGF) {
+    HasLastprivates = CGF.EmitOMPWorksharingLoop(S, Chunk);
   };
   CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen);
 
@@ -1329,8 +1360,9 @@
   // Emit directive as a combined directive that consists of two implicit
   // directives: 'parallel' with 'for' directive.
   LexicalScope Scope(*this, S.getSourceRange());
-  auto &&CodeGen = [&S](CodeGenFunction &CGF) {
-    CGF.EmitOMPWorksharingLoop(S);
+  auto *Chunk = emitScheduleClause(*this, S);
+  auto &&CodeGen = [&S, Chunk](CodeGenFunction &CGF) {
+    CGF.EmitOMPWorksharingLoop(S, Chunk);
     // Emit implicit barrier at the end of parallel region, but this barrier
     // is at the end of 'for' directive, so emit it as the implicit barrier for
     // this 'for' directive.
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits

Reply via email to