Hi doug.gregor, hfinkel, rjmccall, rsmith, fraggamuffin, ABataev,
Please review the first patch with codegen of the 'omp for' directive. It
implements the simplest case, which is used when no chunk_size is specified in
the schedule(static) or no 'schedule' clause is specified - the iteration space
is divided by the library into chunks that are approximately equal in size, and
at most one chunk is distributed to each thread. In this case, we do not need
an outer loop in each thread - each thread requests once which iterations range
it should handle (using __kmpc_for_static_init runtime call) and then runs the
inner loop on this range.
http://reviews.llvm.org/D5865
Files:
include/clang/AST/StmtOpenMP.h
lib/AST/Stmt.cpp
lib/CodeGen/CGOpenMPRuntime.cpp
lib/CodeGen/CGOpenMPRuntime.h
lib/CodeGen/CGStmtOpenMP.cpp
lib/CodeGen/CodeGenFunction.h
lib/Sema/SemaOpenMP.cpp
lib/Serialization/ASTReaderStmt.cpp
lib/Serialization/ASTWriterStmt.cpp
test/OpenMP/for_codegen.cpp
Index: test/OpenMP/for_codegen.cpp
===================================================================
--- test/OpenMP/for_codegen.cpp
+++ test/OpenMP/for_codegen.cpp
@@ -0,0 +1,172 @@
+// RUN: %clang_cc1 -verify -fopenmp=libiomp5 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -g -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+//
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* }
+// CHECK-LABEL: define {{.*void}} @{{.*}}without_schedule_clause{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}})
+void without_schedule_clause(float *a, float *b, float *c, float *d) {
+// CHECK: [[GTID:%.+]] = call{{.*}}i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]])
+ #pragma omp for
+// CHECK: call{{.*}}void @__kmpc_for_static_init_8([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 34, i32* [[IS_LAST:%[^,]+]], i64* [[OMP_LB:%[^,]+]], i64* [[OMP_UB:%[^,]+]], i64* [[OMP_ST:%[^,]+]], i64 1, i64 1)
+// UB = min(UB, GlobalUB)
+// CHECK-NEXT: [[UB:%.+]] = load i64* [[OMP_UB]]
+// CHECK-NEXT: [[UBCMP:%.+]] = icmp sgt i64 [[UB]], 4571423
+// CHECK-NEXT: br i1 [[UBCMP]], label [[UB_TRUE:%[^,]+]], label [[UB_FALSE:%[^,]+]]
+// CHECK: [[UBRESULT:%.+]] = phi i64 [ 4571423, [[UB_TRUE]] ], [ [[UBVAL:%[^,]+]], [[UB_FALSE]] ]
+// CHECK-NEXT: store i64 [[UBRESULT]], i64* [[OMP_UB]]
+// CHECK-NEXT: [[LB:%.+]] = load i64* [[OMP_LB]]
+// CHECK-NEXT: store i64 [[LB]], i64* [[OMP_IV:[^,]+]]
+// Loop header
+// CHECK: [[IV:%.+]] = load i64* [[OMP_IV]]
+// CHECK-NEXT: [[UB:%.+]] = load i64* [[OMP_UB]]
+// CHECK-NEXT: [[CMP:%.+]] = icmp sle i64 [[IV]], [[UB]]
+// CHECK-NEXT: br i1 [[CMP]], label %[[LOOP1_BODY:[^,]+]], label %[[LOOP1_END:[^,]+]]
+ for (long long i = 33; i < 32000000; i += 7) {
+// CHECK: [[LOOP1_BODY]]
+// Start of body: calculate i from IV:
+// CHECK: [[IV1_1:%.+]] = load i64* [[OMP_IV]]
+// CHECK-NEXT: [[CALC_I_1:%.+]] = mul nsw i64 [[IV1_1]], 7
+// CHECK-NEXT: [[CALC_I_2:%.+]] = add nsw i64 33, [[CALC_I_1]]
+// CHECK-NEXT: store i64 [[CALC_I_2]], i64* [[LC_I:.+]]
+// ... loop body ...
+// End of body: store into a[i]:
+// CHECK: store float [[RESULT:%.+]], float* {{%.+}}
+ a[i] = b[i] * c[i] * d[i];
+// CHECK: [[IV1_2:%.+]] = load i64* [[OMP_IV]]{{.*}}
+// CHECK-NEXT: [[ADD1_2:%.+]] = add nsw i64 [[IV1_2]], 1
+// CHECK-NEXT: store i64 [[ADD1_2]], i64* [[OMP_IV]]
+// CHECK-NEXT: br label %{{.+}}
+ }
+// CHECK: [[LOOP1_END]]
+// CHECK: call {{.*}}void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
+// CHECK: call {{.*}}void @__kmpc_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]])
+// CHECK: ret void
+}
+
+// CHECK-LABEL: define {{.*void}} @{{.*}}without_schedule_clause2{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}})
+void without_schedule_clause2(float *a, float *b, float *c, float *d) {
+// CHECK: [[GTID:%.+]] = call{{.*}}i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]])
+ #pragma omp for
+// CHECK: call{{.*}}void @__kmpc_for_static_init_8u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 34, i32* [[IS_LAST:%[^,]+]], i64* [[OMP_LB:%[^,]+]], i64* [[OMP_UB:%[^,]+]], i64* [[OMP_ST:%[^,]+]], i64 1, i64 1)
+// UB = min(UB, GlobalUB)
+// CHECK-NEXT: [[UB:%.+]] = load i64* [[OMP_UB]]
+// CHECK-NEXT: [[UBCMP:%.+]] = icmp ugt i64 [[UB]], 4571423
+// CHECK-NEXT: br i1 [[UBCMP]], label [[UB_TRUE:%[^,]+]], label [[UB_FALSE:%[^,]+]]
+// CHECK: [[UBRESULT:%.+]] = phi i64 [ 4571423, [[UB_TRUE]] ], [ [[UBVAL:%[^,]+]], [[UB_FALSE]] ]
+// CHECK-NEXT: store i64 [[UBRESULT]], i64* [[OMP_UB]]
+// CHECK-NEXT: [[LB:%.+]] = load i64* [[OMP_LB]]
+// CHECK-NEXT: store i64 [[LB]], i64* [[OMP_IV:[^,]+]]
+// Loop header
+// CHECK: [[IV:%.+]] = load i64* [[OMP_IV]]
+// CHECK-NEXT: [[UB:%.+]] = load i64* [[OMP_UB]]
+// CHECK-NEXT: [[CMP:%.+]] = icmp ule i64 [[IV]], [[UB]]
+// CHECK-NEXT: br i1 [[CMP]], label %[[LOOP1_BODY:[^,]+]], label %[[LOOP1_END:[^,]+]]
+ for (unsigned long long i = 33; i < 32000000; i += 7) {
+// CHECK: [[LOOP1_BODY]]
+// Start of body: calculate i from IV:
+// CHECK: [[IV1_1:%.+]] = load i64* [[OMP_IV]]
+// CHECK-NEXT: [[CALC_I_1:%.+]] = mul i64 [[IV1_1]], 7
+// CHECK-NEXT: [[CALC_I_2:%.+]] = add i64 33, [[CALC_I_1]]
+// CHECK-NEXT: store i64 [[CALC_I_2]], i64* [[LC_I:.+]]
+// ... loop body ...
+// End of body: store into a[i]:
+// CHECK: store float [[RESULT:%.+]], float* {{%.+}}
+ a[i] = b[i] * c[i] * d[i];
+// CHECK: [[IV1_2:%.+]] = load i64* [[OMP_IV]]{{.*}}
+// CHECK-NEXT: [[ADD1_2:%.+]] = add i64 [[IV1_2]], 1
+// CHECK-NEXT: store i64 [[ADD1_2]], i64* [[OMP_IV]]
+// CHECK-NEXT: br label %{{.+}}
+ }
+// CHECK: [[LOOP1_END]]
+// CHECK: call {{.*}}void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
+// CHECK: call {{.*}}void @__kmpc_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]])
+// CHECK: ret void
+}
+
+// CHECK-LABEL: define {{.*void}} @{{.*}}static_not_chunked{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}})
+void static_not_chunked(float *a, float *b, float *c, float *d) {
+// CHECK: [[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]])
+ #pragma omp for schedule(static)
+// CHECK: call {{.*}}void @__kmpc_for_static_init_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 34, i32* [[IS_LAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]], i32 1, i32 1)
+// UB = min(UB, GlobalUB)
+// CHECK-NEXT: [[UB:%.+]] = load i32* [[OMP_UB]]
+// CHECK-NEXT: [[UBCMP:%.+]] = icmp sgt i32 [[UB]], 25
+// CHECK-NEXT: br i1 [[UBCMP]], label [[UB_TRUE:%[^,]+]], label [[UB_FALSE:%[^,]+]]
+// CHECK: [[UBRESULT:%.+]] = phi i32 [ 25, [[UB_TRUE]] ], [ [[UBVAL:%[^,]+]], [[UB_FALSE]] ]
+// CHECK-NEXT: store i32 [[UBRESULT]], i32* [[OMP_UB]]
+// CHECK-NEXT: [[LB:%.+]] = load i32* [[OMP_LB]]
+// CHECK-NEXT: store i32 [[LB]], i32* [[OMP_IV:[^,]+]]
+// Loop header
+// CHECK: [[IV:%.+]] = load i32* [[OMP_IV]]
+// CHECK-NEXT: [[UB:%.+]] = load i32* [[OMP_UB]]
+// CHECK-NEXT: [[CMP:%.+]] = icmp sle i32 [[IV]], [[UB]]
+// CHECK-NEXT: br i1 [[CMP]], label %[[LOOP1_BODY:[^,]+]], label %[[LOOP1_END:[^,]+]]
+ for (unsigned char i = 'z'; i >= 'a'; i--) {
+// CHECK: [[LOOP1_BODY]]
+// Start of body: calculate i from IV:
+// CHECK: [[IV1_1:%.+]] = load i32* [[OMP_IV]]
+// CHECK-NEXT: [[CALC_I_1:%.+]] = mul nsw i32 [[IV1_1]], 1
+// CHECK-NEXT: [[CALC_I_2:%.+]] = sub nsw i32 122, [[CALC_I_1]]
+// CHECK-NEXT: [[CALC_I_3:%.+]] = trunc i32 [[CALC_I_2]] to i8
+// CHECK-NEXT: store i8 [[CALC_I_3]], i8* [[LC_I:.+]]
+// ... loop body ...
+// End of body: store into a[i]:
+// CHECK: store float [[RESULT:%.+]], float* {{%.+}}
+ a[i] = b[i] * c[i] * d[i];
+// CHECK: [[IV1_2:%.+]] = load i32* [[OMP_IV]]{{.*}}
+// CHECK-NEXT: [[ADD1_2:%.+]] = add nsw i32 [[IV1_2]], 1
+// CHECK-NEXT: store i32 [[ADD1_2]], i32* [[OMP_IV]]
+// CHECK-NEXT: br label %{{.+}}
+ }
+// CHECK: [[LOOP1_END]]
+// CHECK: call{{.*}}void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
+// CHECK: call{{.*}}void @__kmpc_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]])
+// CHECK: ret void
+}
+
+// CHECK-LABEL: define {{.*void}} @{{.*}}static_not_chunked2{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}})
+void static_not_chunked2(float *a, float *b, float *c, float *d) {
+// CHECK: [[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]])
+ #pragma omp for schedule(static)
+// CHECK: call {{.*}}void @__kmpc_for_static_init_4u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 34, i32* [[IS_LAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]], i32 1, i32 1)
+// UB = min(UB, GlobalUB)
+// CHECK-NEXT: [[UB:%.+]] = load i32* [[OMP_UB]]
+// CHECK-NEXT: [[UBCMP:%.+]] = icmp ugt i32 [[UB]], 4571423
+// CHECK-NEXT: br i1 [[UBCMP]], label [[UB_TRUE:%[^,]+]], label [[UB_FALSE:%[^,]+]]
+// CHECK: [[UBRESULT:%.+]] = phi i32 [ 4571423, [[UB_TRUE]] ], [ [[UBVAL:%[^,]+]], [[UB_FALSE]] ]
+// CHECK-NEXT: store i32 [[UBRESULT]], i32* [[OMP_UB]]
+// CHECK-NEXT: [[LB:%.+]] = load i32* [[OMP_LB]]
+// CHECK-NEXT: store i32 [[LB]], i32* [[OMP_IV:[^,]+]]
+// Loop header
+// CHECK: [[IV:%.+]] = load i32* [[OMP_IV]]
+// CHECK-NEXT: [[UB:%.+]] = load i32* [[OMP_UB]]
+// CHECK-NEXT: [[CMP:%.+]] = icmp ule i32 [[IV]], [[UB]]
+// CHECK-NEXT: br i1 [[CMP]], label %[[LOOP1_BODY:[^,]+]], label %[[LOOP1_END:[^,]+]]
+ for (unsigned i = 32000000; i > 33; i -= 7) {
+// CHECK: [[LOOP1_BODY]]
+// Start of body: calculate i from IV:
+// CHECK: [[IV1_1:%.+]] = load i32* [[OMP_IV]]
+// CHECK-NEXT: [[CALC_I_1:%.+]] = mul i32 [[IV1_1]], 7
+// CHECK-NEXT: [[CALC_I_2:%.+]] = sub i32 32000000, [[CALC_I_1]]
+// CHECK-NEXT: store i32 [[CALC_I_2]], i32* [[LC_I:.+]]
+// ... loop body ...
+// End of body: store into a[i]:
+// CHECK: store float [[RESULT:%.+]], float* {{%.+}}
+ a[i] = b[i] * c[i] * d[i];
+// CHECK: [[IV1_2:%.+]] = load i32* [[OMP_IV]]{{.*}}
+// CHECK-NEXT: [[ADD1_2:%.+]] = add i32 [[IV1_2]], 1
+// CHECK-NEXT: store i32 [[ADD1_2]], i32* [[OMP_IV]]
+// CHECK-NEXT: br label %{{.+}}
+ }
+// CHECK: [[LOOP1_END]]
+// CHECK: call{{.*}}void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
+// CHECK: call{{.*}}void @__kmpc_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]])
+// CHECK: ret void
+}
+
+#endif // HEADER
+
Index: include/clang/AST/StmtOpenMP.h
===================================================================
--- include/clang/AST/StmtOpenMP.h
+++ include/clang/AST/StmtOpenMP.h
@@ -264,27 +264,38 @@
SeparatedCondOffset = 6,
InitOffset = 7,
IncOffset = 8,
- ArraysOffset = 9
+ NonworksharingArraysOffset = 9,
+ // The following 7 exprs are used by worksharing loops only.
+ IsLastIterVariableOffset = 9,
+ LowerBoundVariableOffset = 10,
+ UpperBoundVariableOffset = 11,
+ StrideVariableOffset = 12,
+ EnsureUpperBoundOffset = 13,
+ NextLowerBoundOffset = 14,
+ NextUpperBoundOffset = 15,
+ WorksharingArraysOffset = 16
};
/// \brief Get the counters storage.
MutableArrayRef<Expr *> getCounters() {
- Expr **Storage =
- reinterpret_cast<Expr **>(&(*(std::next(child_begin(), ArraysOffset))));
+ Expr **Storage = reinterpret_cast<Expr **>(
+ &(*(std::next(child_begin(), getArraysOffset(getDirectiveKind())))));
return MutableArrayRef<Expr *>(Storage, CollapsedNum);
}
/// \brief Get the updates storage.
MutableArrayRef<Expr *> getUpdates() {
Expr **Storage = reinterpret_cast<Expr **>(
- &*std::next(child_begin(), ArraysOffset + CollapsedNum));
+ &*std::next(child_begin(),
+ getArraysOffset(getDirectiveKind()) + CollapsedNum));
return MutableArrayRef<Expr *>(Storage, CollapsedNum);
}
/// \brief Get the final counter updates storage.
MutableArrayRef<Expr *> getFinals() {
Expr **Storage = reinterpret_cast<Expr **>(
- &*std::next(child_begin(), ArraysOffset + 2 * CollapsedNum));
+ &*std::next(child_begin(),
+ getArraysOffset(getDirectiveKind()) + 2 * CollapsedNum));
return MutableArrayRef<Expr *>(Storage, CollapsedNum);
}
@@ -305,13 +316,21 @@
unsigned CollapsedNum, unsigned NumClauses,
unsigned NumSpecialChildren = 0)
: OMPExecutableDirective(That, SC, Kind, StartLoc, EndLoc, NumClauses,
- numLoopChildren(CollapsedNum) +
+ numLoopChildren(CollapsedNum, Kind) +
NumSpecialChildren),
CollapsedNum(CollapsedNum) {}
+ /// \brief Offset to the start of children expression arrays.
+ static unsigned getArraysOffset(OpenMPDirectiveKind Kind) {
+ return isOpenMPWorksharingDirective(Kind) ? WorksharingArraysOffset
+ : NonworksharingArraysOffset;
+ }
+
/// \brief Children number.
- static unsigned numLoopChildren(unsigned CollapsedNum) {
- return ArraysOffset + 3 * CollapsedNum; // Counters, Updates and Finals
+ static unsigned numLoopChildren(unsigned CollapsedNum,
+ OpenMPDirectiveKind Kind) {
+ return getArraysOffset(Kind) +
+ 3 * CollapsedNum; // Counters, Updates and Finals
}
void setIterationVariable(Expr *IV) {
@@ -332,6 +351,41 @@
}
void setInit(Expr *Init) { *std::next(child_begin(), InitOffset) = Init; }
void setInc(Expr *Inc) { *std::next(child_begin(), IncOffset) = Inc; }
+ void setIsLastIterVariable(Expr *IL) {
+ assert(isOpenMPWorksharingDirective(getDirectiveKind()) &&
+ "expected worksharing loop directive");
+ *std::next(child_begin(), IsLastIterVariableOffset) = IL;
+ }
+ void setLowerBoundVariable(Expr *LB) {
+ assert(isOpenMPWorksharingDirective(getDirectiveKind()) &&
+ "expected worksharing loop directive");
+ *std::next(child_begin(), LowerBoundVariableOffset) = LB;
+ }
+ void setUpperBoundVariable(Expr *UB) {
+ assert(isOpenMPWorksharingDirective(getDirectiveKind()) &&
+ "expected worksharing loop directive");
+ *std::next(child_begin(), UpperBoundVariableOffset) = UB;
+ }
+ void setStrideVariable(Expr *ST) {
+ assert(isOpenMPWorksharingDirective(getDirectiveKind()) &&
+ "expected worksharing loop directive");
+ *std::next(child_begin(), StrideVariableOffset) = ST;
+ }
+ void setEnsureUpperBound(Expr *EUB) {
+ assert(isOpenMPWorksharingDirective(getDirectiveKind()) &&
+ "expected worksharing loop directive");
+ *std::next(child_begin(), EnsureUpperBoundOffset) = EUB;
+ }
+ void setNextLowerBound(Expr *NLB) {
+ assert(isOpenMPWorksharingDirective(getDirectiveKind()) &&
+ "expected worksharing loop directive");
+ *std::next(child_begin(), NextLowerBoundOffset) = NLB;
+ }
+ void setNextUpperBound(Expr *NUB) {
+ assert(isOpenMPWorksharingDirective(getDirectiveKind()) &&
+ "expected worksharing loop directive");
+ *std::next(child_begin(), NextUpperBoundOffset) = NUB;
+ }
void setCounters(ArrayRef<Expr *> A);
void setUpdates(ArrayRef<Expr *> A);
void setFinals(ArrayRef<Expr *> A);
@@ -369,6 +423,48 @@
return const_cast<Expr *>(
reinterpret_cast<const Expr *>(*std::next(child_begin(), IncOffset)));
}
+ Expr *getIsLastIterVariable() const {
+ assert(isOpenMPWorksharingDirective(getDirectiveKind()) &&
+ "expected worksharing loop directive");
+ return const_cast<Expr *>(reinterpret_cast<const Expr *>(
+ *std::next(child_begin(), IsLastIterVariableOffset)));
+ }
+ Expr *getLowerBoundVariable() const {
+ assert(isOpenMPWorksharingDirective(getDirectiveKind()) &&
+ "expected worksharing loop directive");
+ return const_cast<Expr *>(reinterpret_cast<const Expr *>(
+ *std::next(child_begin(), LowerBoundVariableOffset)));
+ }
+ Expr *getUpperBoundVariable() const {
+ assert(isOpenMPWorksharingDirective(getDirectiveKind()) &&
+ "expected worksharing loop directive");
+ return const_cast<Expr *>(reinterpret_cast<const Expr *>(
+ *std::next(child_begin(), UpperBoundVariableOffset)));
+ }
+ Expr *getStrideVariable() const {
+ assert(isOpenMPWorksharingDirective(getDirectiveKind()) &&
+ "expected worksharing loop directive");
+ return const_cast<Expr *>(reinterpret_cast<const Expr *>(
+ *std::next(child_begin(), StrideVariableOffset)));
+ }
+ Expr *getEnsureUpperBound() const {
+ assert(isOpenMPWorksharingDirective(getDirectiveKind()) &&
+ "expected worksharing loop directive");
+ return const_cast<Expr *>(reinterpret_cast<const Expr *>(
+ *std::next(child_begin(), EnsureUpperBoundOffset)));
+ }
+ Expr *getNextLowerBound() const {
+ assert(isOpenMPWorksharingDirective(getDirectiveKind()) &&
+ "expected worksharing loop directive");
+ return const_cast<Expr *>(reinterpret_cast<const Expr *>(
+ *std::next(child_begin(), NextLowerBoundOffset)));
+ }
+ Expr *getNextUpperBound() const {
+ assert(isOpenMPWorksharingDirective(getDirectiveKind()) &&
+ "expected worksharing loop directive");
+ return const_cast<Expr *>(reinterpret_cast<const Expr *>(
+ *std::next(child_begin(), NextUpperBoundOffset)));
+ }
const Stmt *getBody() const {
// This relies on the loop form is already checked by Sema.
Stmt *Body = getAssociatedStmt()->IgnoreContainers(true);
@@ -531,6 +627,13 @@
/// \param Cond Condition.
/// \param SeparatedCond Condition with 1 iteration separated.
/// \param Inc Loop increment.
+ /// \param IL IsLastIteration local variable passed to runtime.
+ /// \param LB LowerBound local variable passed to runtime.
+ /// \param UB UpperBound local variable passed to runtime.
+ /// \param ST Stride local variable passed to runtime.
+ /// \param EUB EnsureUpperBound -- expression LB = min(LB, NumIterations).
+ /// \param NLB Update of LowerBound for statically sheduled 'omp for' loops.
+ /// \param NUB Update of UpperBound for statically sheduled 'omp for' loops.
/// \param Counters Loop counters.
/// \param Updates Expressions for loop counters update for CodeGen.
/// \param Finals Final loop counter values for GodeGen.
@@ -540,8 +643,10 @@
unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses,
Stmt *AssociatedStmt, Expr *IV, Expr *LastIteration,
Expr *CalcLastIteration, Expr *PreCond, Expr *Cond,
- Expr *SeparatedCond, Expr *Init, Expr *Inc, ArrayRef<Expr *> Counters,
- ArrayRef<Expr *> Updates, ArrayRef<Expr *> Finals);
+ Expr *SeparatedCond, Expr *Init, Expr *Inc, Expr *IL, Expr *LB,
+ Expr *UB, Expr *ST, Expr *EUB, Expr *NLB, Expr *NUB,
+ ArrayRef<Expr *> Counters, ArrayRef<Expr *> Updates,
+ ArrayRef<Expr *> Finals);
/// \brief Creates an empty directive with the place
/// for \a NumClauses clauses.
@@ -607,6 +712,13 @@
/// \param Cond Condition.
/// \param SeparatedCond Condition with 1 iteration separated.
/// \param Inc Loop increment.
+ /// \param IL IsLastIteration local variable passed to runtime.
+ /// \param LB LowerBound local variable passed to runtime.
+ /// \param UB UpperBound local variable passed to runtime.
+ /// \param ST Stride local variable passed to runtime.
+ /// \param EUB EnsureUpperBound -- expression LB = min(LB, NumIterations).
+ /// \param NLB Update of LowerBound for statically sheduled 'omp for' loops.
+ /// \param NUB Update of UpperBound for statically sheduled 'omp for' loops.
/// \param Counters Loop counters.
/// \param Updates Expressions for loop counters update for CodeGen.
/// \param Finals Final loop counter values for GodeGen.
@@ -616,8 +728,10 @@
unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses,
Stmt *AssociatedStmt, Expr *IV, Expr *LastIteration,
Expr *CalcLastIteration, Expr *PreCond, Expr *Cond,
- Expr *SeparatedCond, Expr *Init, Expr *Inc, ArrayRef<Expr *> Counters,
- ArrayRef<Expr *> Updates, ArrayRef<Expr *> Finals);
+ Expr *SeparatedCond, Expr *Init, Expr *Inc, Expr *IL, Expr *LB,
+ Expr *UB, Expr *ST, Expr *EUB, Expr *NLB, Expr *NUB,
+ ArrayRef<Expr *> Counters, ArrayRef<Expr *> Updates,
+ ArrayRef<Expr *> Finals);
/// \brief Creates an empty directive with the place
/// for \a NumClauses clauses.
@@ -956,6 +1070,13 @@
/// \param Cond Condition.
/// \param SeparatedCond Condition with 1 iteration separated.
/// \param Inc Loop increment.
+ /// \param IL IsLastIteration local variable passed to runtime.
+ /// \param LB LowerBound local variable passed to runtime.
+ /// \param UB UpperBound local variable passed to runtime.
+ /// \param ST Stride local variable passed to runtime.
+ /// \param EUB EnsureUpperBound -- expression LB = min(LB, NumIterations).
+ /// \param NLB Update of LowerBound for statically sheduled 'omp for' loops.
+ /// \param NUB Update of UpperBound for statically sheduled 'omp for' loops.
/// \param Counters Loop counters.
/// \param Updates Expressions for loop counters update for CodeGen.
/// \param Finals Final loop counter values for GodeGen.
@@ -965,8 +1086,10 @@
unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses,
Stmt *AssociatedStmt, Expr *IV, Expr *LastIteration,
Expr *CalcLastIteration, Expr *PreCond, Expr *Cond,
- Expr *SeparatedCond, Expr *Init, Expr *Inc, ArrayRef<Expr *> Counters,
- ArrayRef<Expr *> Updates, ArrayRef<Expr *> Finals);
+ Expr *SeparatedCond, Expr *Init, Expr *Inc, Expr *IL, Expr *LB,
+ Expr *UB, Expr *ST, Expr *EUB, Expr *NLB, Expr *NUB,
+ ArrayRef<Expr *> Counters, ArrayRef<Expr *> Updates,
+ ArrayRef<Expr *> Finals);
/// \brief Creates an empty directive with the place
/// for \a NumClauses clauses.
@@ -1037,6 +1160,13 @@
/// \param Cond Condition.
/// \param SeparatedCond Condition with 1 iteration separated.
/// \param Inc Loop increment.
+ /// \param IL IsLastIteration local variable passed to runtime.
+ /// \param LB LowerBound local variable passed to runtime.
+ /// \param UB UpperBound local variable passed to runtime.
+ /// \param ST Stride local variable passed to runtime.
+ /// \param EUB EnsureUpperBound -- expression LB = min(LB, NumIterations).
+ /// \param NLB Update of LowerBound for statically sheduled 'omp for' loops.
+ /// \param NUB Update of UpperBound for statically sheduled 'omp for' loops.
/// \param Counters Loop counters.
/// \param Updates Expressions for loop counters update for CodeGen.
/// \param Finals Final loop counter values for GodeGen.
@@ -1046,8 +1176,10 @@
unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses,
Stmt *AssociatedStmt, Expr *IV, Expr *LastIteration,
Expr *CalcLastIteration, Expr *PreCond, Expr *Cond,
- Expr *SeparatedCond, Expr *Init, Expr *Inc, ArrayRef<Expr *> Counters,
- ArrayRef<Expr *> Updates, ArrayRef<Expr *> Finals);
+ Expr *SeparatedCond, Expr *Init, Expr *Inc, Expr *IL, Expr *LB,
+ Expr *UB, Expr *ST, Expr *EUB, Expr *NLB, Expr *NUB,
+ ArrayRef<Expr *> Counters, ArrayRef<Expr *> Updates,
+ ArrayRef<Expr *> Finals);
/// \brief Creates an empty directive with the place
/// for \a NumClauses clauses.
Index: lib/Sema/SemaOpenMP.cpp
===================================================================
--- lib/Sema/SemaOpenMP.cpp
+++ lib/Sema/SemaOpenMP.cpp
@@ -2392,6 +2392,13 @@
Expr *SeparatedCond;
Expr *Init;
Expr *Inc;
+ Expr *IL;
+ Expr *LB;
+ Expr *UB;
+ Expr *ST;
+ Expr *EUB;
+ Expr *NLB;
+ Expr *NUB;
SmallVector<Expr *, 4> Counters;
SmallVector<Expr *, 4> Updates;
SmallVector<Expr *, 4> Finals;
@@ -2410,6 +2417,13 @@
SeparatedCond = nullptr;
Init = nullptr;
Inc = nullptr;
+ IL = nullptr;
+ LB = nullptr;
+ UB = nullptr;
+ ST = nullptr;
+ EUB = nullptr;
+ NLB = nullptr;
+ NUB = nullptr;
Counters.resize(size);
Updates.resize(size);
Finals.resize(size);
@@ -2763,23 +2777,69 @@
CurScope, InitLoc, BO_GT, LastIteration.get(),
SemaRef.ActOnIntegerConstant(SourceLocation(), 0).get());
- // Build the iteration variable and its initialization to zero before loop.
+ QualType VType = LastIteration.get()->getType();
+ // Build variables passed into runtime, nesessary for worksharing directives.
+ ExprResult LB, UB, IL, ST, EUB;
+ if (isOpenMPWorksharingDirective(DKind)) {
+ // Lower bound variable, initialized with zero.
+ VarDecl *LBDecl = BuildVarDecl(SemaRef, InitLoc, VType, ".omp.lb");
+ LB = SemaRef.BuildDeclRefExpr(LBDecl, VType, VK_LValue, InitLoc);
+ SemaRef.AddInitializerToDecl(
+ LBDecl, SemaRef.ActOnIntegerConstant(InitLoc, 0).get(),
+ /*DirectInit*/ false, /*TypeMayContainAuto*/ false);
+
+ // Upper bound variable, initialized with last iteration number.
+ VarDecl *UBDecl = BuildVarDecl(SemaRef, InitLoc, VType, ".omp.ub");
+ UB = SemaRef.BuildDeclRefExpr(UBDecl, VType, VK_LValue, InitLoc);
+ SemaRef.AddInitializerToDecl(UBDecl, LastIteration.get(),
+ /*DirectInit*/ false,
+ /*TypeMayContainAuto*/ false);
+
+ // A 32-bit variable-flag where runtime returns 1 for the last iteration.
+ // This will be used to implement clause 'lastprivate'.
+ QualType Int32Ty = SemaRef.Context.getIntTypeForBitwidth(32, true);
+ VarDecl *ILDecl = BuildVarDecl(SemaRef, InitLoc, Int32Ty, ".omp.is_last");
+ IL = SemaRef.BuildDeclRefExpr(ILDecl, Int32Ty, VK_LValue, InitLoc);
+ SemaRef.AddInitializerToDecl(
+ ILDecl, SemaRef.ActOnIntegerConstant(InitLoc, 0).get(),
+ /*DirectInit*/ false, /*TypeMayContainAuto*/ false);
+
+ // Stride variable returned by runtime (we initialize it to 1 by default).
+ VarDecl *STDecl = BuildVarDecl(SemaRef, InitLoc, VType, ".omp.stride");
+ ST = SemaRef.BuildDeclRefExpr(STDecl, VType, VK_LValue, InitLoc);
+ SemaRef.AddInitializerToDecl(
+ STDecl, SemaRef.ActOnIntegerConstant(InitLoc, 1).get(),
+ /*DirectInit*/ false, /*TypeMayContainAuto*/ false);
+
+ // Build expression: UB = min(UB, LastIteration)
+ // It is nesessary for CodeGen of directives with static scheduling.
+ ExprResult IsUBGreater = SemaRef.BuildBinOp(CurScope, InitLoc, BO_GT,
+ UB.get(), LastIteration.get());
+ ExprResult CondOp = SemaRef.ActOnConditionalOp(
+ InitLoc, InitLoc, IsUBGreater.get(), LastIteration.get(), UB.get());
+ EUB = SemaRef.BuildBinOp(CurScope, InitLoc, BO_Assign, UB.get(),
+ CondOp.get());
+ }
+
+ // Build the iteration variable and its initialization before loop.
ExprResult IV;
ExprResult Init;
{
- VarDecl *IVDecl = BuildVarDecl(SemaRef, InitLoc,
- LastIteration.get()->getType(), ".omp.iv");
- IV = SemaRef.BuildDeclRefExpr(IVDecl, LastIteration.get()->getType(),
- VK_LValue, InitLoc);
- Init = SemaRef.BuildBinOp(
- CurScope, InitLoc, BO_Assign, IV.get(),
- SemaRef.ActOnIntegerConstant(SourceLocation(), 0).get());
+ VarDecl *IVDecl = BuildVarDecl(SemaRef, InitLoc, VType, ".omp.iv");
+ IV = SemaRef.BuildDeclRefExpr(IVDecl, VType, VK_LValue, InitLoc);
+ Expr *RHS = isOpenMPWorksharingDirective(DKind)
+ ? LB.get()
+ : SemaRef.ActOnIntegerConstant(SourceLocation(), 0).get();
+ Init = SemaRef.BuildBinOp(CurScope, InitLoc, BO_Assign, IV.get(), RHS);
}
- // Loop condition (IV < NumIterations)
+ // Loop condition (IV < NumIterations) or (IV <= UB) for worksharing loops.
SourceLocation CondLoc;
- ExprResult Cond = SemaRef.BuildBinOp(CurScope, CondLoc, BO_LT, IV.get(),
- NumIterations.get());
+ ExprResult Cond =
+ isOpenMPWorksharingDirective(DKind)
+ ? SemaRef.BuildBinOp(CurScope, CondLoc, BO_LE, IV.get(), UB.get())
+ : SemaRef.BuildBinOp(CurScope, CondLoc, BO_LT, IV.get(),
+ NumIterations.get());
// Loop condition with 1 iteration separated (IV < LastIteration)
ExprResult SeparatedCond = SemaRef.BuildBinOp(CurScope, CondLoc, BO_LT,
IV.get(), LastIteration.get());
@@ -2792,6 +2852,32 @@
if (!Inc.isUsable())
return 0;
Inc = SemaRef.BuildBinOp(CurScope, IncLoc, BO_Assign, IV.get(), Inc.get());
+ if (!Inc.isUsable())
+ return 0;
+
+ // Increments for worksharing loops (LB = LB + ST; UB = UB + ST).
+ // Used for directives with static scheduling.
+ ExprResult NextLB, NextUB;
+ if (isOpenMPWorksharingDirective(DKind)) {
+ // LB + ST
+ NextLB = SemaRef.BuildBinOp(CurScope, IncLoc, BO_Add, LB.get(), ST.get());
+ if (!NextLB.isUsable())
+ return 0;
+ // LB = LB + ST
+ NextLB =
+ SemaRef.BuildBinOp(CurScope, IncLoc, BO_Assign, LB.get(), NextLB.get());
+ if (!NextLB.isUsable())
+ return 0;
+ // UB + ST
+ NextUB = SemaRef.BuildBinOp(CurScope, IncLoc, BO_Add, UB.get(), ST.get());
+ if (!NextUB.isUsable())
+ return 0;
+ // UB = UB + ST
+ NextUB =
+ SemaRef.BuildBinOp(CurScope, IncLoc, BO_Assign, UB.get(), NextUB.get());
+ if (!NextUB.isUsable())
+ return 0;
+ }
// Build updates and final values of the loop counters.
bool HasErrors = false;
@@ -2881,6 +2967,13 @@
Built.SeparatedCond = SeparatedCond.get();
Built.Init = Init.get();
Built.Inc = Inc.get();
+ Built.LB = LB.get();
+ Built.UB = UB.get();
+ Built.IL = IL.get();
+ Built.ST = ST.get();
+ Built.EUB = EUB.get();
+ Built.NLB = NextLB.get();
+ Built.NUB = NextUB.get();
return NestedLoopCount;
}
@@ -2937,7 +3030,8 @@
return OMPForDirective::Create(
Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt,
B.IterationVarRef, B.LastIteration, B.CalcLastIteration, B.PreCond,
- B.Cond, B.SeparatedCond, B.Init, B.Inc, B.Counters, B.Updates, B.Finals);
+ B.Cond, B.SeparatedCond, B.Init, B.Inc, B.IL, B.LB, B.UB, B.ST, B.EUB,
+ B.NLB, B.NUB, B.Counters, B.Updates, B.Finals);
}
StmtResult Sema::ActOnOpenMPForSimdDirective(
@@ -2952,11 +3046,15 @@
if (NestedLoopCount == 0)
return StmtError();
+ assert((CurContext->isDependentContext() || B.builtAll()) &&
+ "omp for simd loop exprs were not built");
+
getCurFunction()->setHasBranchProtectedScope();
return OMPForSimdDirective::Create(
Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt,
B.IterationVarRef, B.LastIteration, B.CalcLastIteration, B.PreCond,
- B.Cond, B.SeparatedCond, B.Init, B.Inc, B.Counters, B.Updates, B.Finals);
+ B.Cond, B.SeparatedCond, B.Init, B.Inc, B.IL, B.LB, B.UB, B.ST, B.EUB,
+ B.NLB, B.NUB, B.Counters, B.Updates, B.Finals);
}
StmtResult Sema::ActOnOpenMPSectionsDirective(ArrayRef<OMPClause *> Clauses,
@@ -3064,7 +3162,8 @@
return OMPParallelForDirective::Create(
Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt,
B.IterationVarRef, B.LastIteration, B.CalcLastIteration, B.PreCond,
- B.Cond, B.SeparatedCond, B.Init, B.Inc, B.Counters, B.Updates, B.Finals);
+ B.Cond, B.SeparatedCond, B.Init, B.Inc, B.IL, B.LB, B.UB, B.ST, B.EUB,
+ B.NLB, B.NUB, B.Counters, B.Updates, B.Finals);
}
StmtResult Sema::ActOnOpenMPParallelForSimdDirective(
@@ -3092,7 +3191,8 @@
return OMPParallelForSimdDirective::Create(
Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt,
B.IterationVarRef, B.LastIteration, B.CalcLastIteration, B.PreCond,
- B.Cond, B.SeparatedCond, B.Init, B.Inc, B.Counters, B.Updates, B.Finals);
+ B.Cond, B.SeparatedCond, B.Init, B.Inc, B.IL, B.LB, B.UB, B.ST, B.EUB,
+ B.NLB, B.NUB, B.Counters, B.Updates, B.Finals);
}
StmtResult
Index: lib/AST/Stmt.cpp
===================================================================
--- lib/AST/Stmt.cpp
+++ lib/AST/Stmt.cpp
@@ -1486,8 +1486,9 @@
ArrayRef<Expr *> Updates, ArrayRef<Expr *> Finals) {
unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPSimdDirective),
llvm::alignOf<OMPClause *>());
- void *Mem = C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() +
- sizeof(Stmt *) * numLoopChildren(CollapsedNum));
+ void *Mem =
+ C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() +
+ sizeof(Stmt *) * numLoopChildren(CollapsedNum, OMPD_simd));
OMPSimdDirective *Dir = new (Mem)
OMPSimdDirective(StartLoc, EndLoc, CollapsedNum, Clauses.size());
Dir->setClauses(Clauses);
@@ -1511,23 +1512,25 @@
EmptyShell) {
unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPSimdDirective),
llvm::alignOf<OMPClause *>());
- void *Mem = C.Allocate(Size + sizeof(OMPClause *) * NumClauses +
- sizeof(Stmt *) * numLoopChildren(CollapsedNum));
+ void *Mem =
+ C.Allocate(Size + sizeof(OMPClause *) * NumClauses +
+ sizeof(Stmt *) * numLoopChildren(CollapsedNum, OMPD_simd));
return new (Mem) OMPSimdDirective(CollapsedNum, NumClauses);
}
-OMPForDirective *
-OMPForDirective::Create(const ASTContext &C, SourceLocation StartLoc,
- SourceLocation EndLoc, unsigned CollapsedNum,
- ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
- Expr *IV, Expr *LastIteration, Expr *CalcLastIteration,
- Expr *PreCond, Expr *Cond, Expr *SeparatedCond,
- Expr *Init, Expr *Inc, ArrayRef<Expr *> Counters,
- ArrayRef<Expr *> Updates, ArrayRef<Expr *> Finals) {
+OMPForDirective *OMPForDirective::Create(
+ const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+ unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
+ Expr *IV, Expr *LastIteration, Expr *CalcLastIteration, Expr *PreCond,
+ Expr *Cond, Expr *SeparatedCond, Expr *Init, Expr *Inc, Expr *IL, Expr *LB,
+ Expr *UB, Expr *ST, Expr *EUB, Expr *NLB, Expr *NUB,
+ ArrayRef<Expr *> Counters, ArrayRef<Expr *> Updates,
+ ArrayRef<Expr *> Finals) {
unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPForDirective),
llvm::alignOf<OMPClause *>());
- void *Mem = C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() +
- sizeof(Stmt *) * numLoopChildren(CollapsedNum));
+ void *Mem =
+ C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() +
+ sizeof(Stmt *) * numLoopChildren(CollapsedNum, OMPD_for));
OMPForDirective *Dir =
new (Mem) OMPForDirective(StartLoc, EndLoc, CollapsedNum, Clauses.size());
Dir->setClauses(Clauses);
@@ -1539,6 +1542,13 @@
Dir->setCond(Cond, SeparatedCond);
Dir->setInit(Init);
Dir->setInc(Inc);
+ Dir->setIsLastIterVariable(IL);
+ Dir->setLowerBoundVariable(LB);
+ Dir->setUpperBoundVariable(UB);
+ Dir->setStrideVariable(ST);
+ Dir->setEnsureUpperBound(EUB);
+ Dir->setNextLowerBound(NLB);
+ Dir->setNextUpperBound(NUB);
Dir->setCounters(Counters);
Dir->setUpdates(Updates);
Dir->setFinals(Finals);
@@ -1551,22 +1561,25 @@
EmptyShell) {
unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPForDirective),
llvm::alignOf<OMPClause *>());
- void *Mem = C.Allocate(Size + sizeof(OMPClause *) * NumClauses +
- sizeof(Stmt *) * numLoopChildren(CollapsedNum));
+ void *Mem =
+ C.Allocate(Size + sizeof(OMPClause *) * NumClauses +
+ sizeof(Stmt *) * numLoopChildren(CollapsedNum, OMPD_for));
return new (Mem) OMPForDirective(CollapsedNum, NumClauses);
}
OMPForSimdDirective *OMPForSimdDirective::Create(
const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
Expr *IV, Expr *LastIteration, Expr *CalcLastIteration, Expr *PreCond,
- Expr *Cond, Expr *SeparatedCond, Expr *Init, Expr *Inc,
+ Expr *Cond, Expr *SeparatedCond, Expr *Init, Expr *Inc, Expr *IL, Expr *LB,
+ Expr *UB, Expr *ST, Expr *EUB, Expr *NLB, Expr *NUB,
ArrayRef<Expr *> Counters, ArrayRef<Expr *> Updates,
ArrayRef<Expr *> Finals) {
unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPForSimdDirective),
llvm::alignOf<OMPClause *>());
- void *Mem = C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() +
- sizeof(Stmt *) * numLoopChildren(CollapsedNum));
+ void *Mem =
+ C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() +
+ sizeof(Stmt *) * numLoopChildren(CollapsedNum, OMPD_for_simd));
OMPForSimdDirective *Dir = new (Mem)
OMPForSimdDirective(StartLoc, EndLoc, CollapsedNum, Clauses.size());
Dir->setClauses(Clauses);
@@ -1578,6 +1591,13 @@
Dir->setCond(Cond, SeparatedCond);
Dir->setInit(Init);
Dir->setInc(Inc);
+ Dir->setIsLastIterVariable(IL);
+ Dir->setLowerBoundVariable(LB);
+ Dir->setUpperBoundVariable(UB);
+ Dir->setStrideVariable(ST);
+ Dir->setEnsureUpperBound(EUB);
+ Dir->setNextLowerBound(NLB);
+ Dir->setNextUpperBound(NUB);
Dir->setCounters(Counters);
Dir->setUpdates(Updates);
Dir->setFinals(Finals);
@@ -1590,8 +1610,9 @@
EmptyShell) {
unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPForSimdDirective),
llvm::alignOf<OMPClause *>());
- void *Mem = C.Allocate(Size + sizeof(OMPClause *) * NumClauses +
- sizeof(Stmt *) * numLoopChildren(CollapsedNum));
+ void *Mem =
+ C.Allocate(Size + sizeof(OMPClause *) * NumClauses +
+ sizeof(Stmt *) * numLoopChildren(CollapsedNum, OMPD_for_simd));
return new (Mem) OMPForSimdDirective(CollapsedNum, NumClauses);
}
@@ -1709,13 +1730,15 @@
const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
Expr *IV, Expr *LastIteration, Expr *CalcLastIteration, Expr *PreCond,
- Expr *Cond, Expr *SeparatedCond, Expr *Init, Expr *Inc,
+ Expr *Cond, Expr *SeparatedCond, Expr *Init, Expr *Inc, Expr *IL, Expr *LB,
+ Expr *UB, Expr *ST, Expr *EUB, Expr *NLB, Expr *NUB,
ArrayRef<Expr *> Counters, ArrayRef<Expr *> Updates,
ArrayRef<Expr *> Finals) {
unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPParallelForDirective),
llvm::alignOf<OMPClause *>());
void *Mem = C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() +
- sizeof(Stmt *) * numLoopChildren(CollapsedNum));
+ sizeof(Stmt *) *
+ numLoopChildren(CollapsedNum, OMPD_parallel_for));
OMPParallelForDirective *Dir = new (Mem)
OMPParallelForDirective(StartLoc, EndLoc, CollapsedNum, Clauses.size());
Dir->setClauses(Clauses);
@@ -1727,6 +1750,13 @@
Dir->setCond(Cond, SeparatedCond);
Dir->setInit(Init);
Dir->setInc(Inc);
+ Dir->setIsLastIterVariable(IL);
+ Dir->setLowerBoundVariable(LB);
+ Dir->setUpperBoundVariable(UB);
+ Dir->setStrideVariable(ST);
+ Dir->setEnsureUpperBound(EUB);
+ Dir->setNextLowerBound(NLB);
+ Dir->setNextUpperBound(NUB);
Dir->setCounters(Counters);
Dir->setUpdates(Updates);
Dir->setFinals(Finals);
@@ -1739,21 +1769,24 @@
unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPParallelForDirective),
llvm::alignOf<OMPClause *>());
void *Mem = C.Allocate(Size + sizeof(OMPClause *) * NumClauses +
- sizeof(Stmt *) * numLoopChildren(CollapsedNum));
+ sizeof(Stmt *) *
+ numLoopChildren(CollapsedNum, OMPD_parallel_for));
return new (Mem) OMPParallelForDirective(CollapsedNum, NumClauses);
}
OMPParallelForSimdDirective *OMPParallelForSimdDirective::Create(
const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
Expr *IV, Expr *LastIteration, Expr *CalcLastIteration, Expr *PreCond,
- Expr *Cond, Expr *SeparatedCond, Expr *Init, Expr *Inc,
+ Expr *Cond, Expr *SeparatedCond, Expr *Init, Expr *Inc, Expr *IL, Expr *LB,
+ Expr *UB, Expr *ST, Expr *EUB, Expr *NLB, Expr *NUB,
ArrayRef<Expr *> Counters, ArrayRef<Expr *> Updates,
ArrayRef<Expr *> Finals) {
unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPParallelForSimdDirective),
llvm::alignOf<OMPClause *>());
- void *Mem = C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() +
- sizeof(Stmt *) * numLoopChildren(CollapsedNum));
+ void *Mem = C.Allocate(
+ Size + sizeof(OMPClause *) * Clauses.size() +
+ sizeof(Stmt *) * numLoopChildren(CollapsedNum, OMPD_parallel_for_simd));
OMPParallelForSimdDirective *Dir = new (Mem) OMPParallelForSimdDirective(
StartLoc, EndLoc, CollapsedNum, Clauses.size());
Dir->setClauses(Clauses);
@@ -1765,6 +1798,13 @@
Dir->setCond(Cond, SeparatedCond);
Dir->setInit(Init);
Dir->setInc(Inc);
+ Dir->setIsLastIterVariable(IL);
+ Dir->setLowerBoundVariable(LB);
+ Dir->setUpperBoundVariable(UB);
+ Dir->setStrideVariable(ST);
+ Dir->setEnsureUpperBound(EUB);
+ Dir->setNextLowerBound(NLB);
+ Dir->setNextUpperBound(NUB);
Dir->setCounters(Counters);
Dir->setUpdates(Updates);
Dir->setFinals(Finals);
@@ -1777,8 +1817,9 @@
unsigned CollapsedNum, EmptyShell) {
unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPParallelForSimdDirective),
llvm::alignOf<OMPClause *>());
- void *Mem = C.Allocate(Size + sizeof(OMPClause *) * NumClauses +
- sizeof(Stmt *) * numLoopChildren(CollapsedNum));
+ void *Mem = C.Allocate(
+ Size + sizeof(OMPClause *) * NumClauses +
+ sizeof(Stmt *) * numLoopChildren(CollapsedNum, OMPD_parallel_for_simd));
return new (Mem) OMPParallelForSimdDirective(CollapsedNum, NumClauses);
}
Index: lib/CodeGen/CGOpenMPRuntime.cpp
===================================================================
--- lib/CodeGen/CGOpenMPRuntime.cpp
+++ lib/CodeGen/CGOpenMPRuntime.cpp
@@ -150,7 +150,8 @@
OpenMPLocThreadIDMapTy::iterator I = OpenMPLocThreadIDMap.find(CGF.CurFn);
if (I != OpenMPLocThreadIDMap.end()) {
LocValue = I->second.DebugLoc;
- } else {
+ }
+ if (LocValue == nullptr) {
// Generate "ident_t .kmpc_loc.addr;"
llvm::AllocaInst *AI = CGF.CreateTempAlloca(IdentTy, ".kmpc_loc.addr");
AI->setAlignment(CGM.getDataLayout().getPrefTypeAlignment(IdentTy));
@@ -200,8 +201,11 @@
OpenMPLocThreadIDMapTy::iterator I = OpenMPLocThreadIDMap.find(CGF.CurFn);
if (I != OpenMPLocThreadIDMap.end()) {
ThreadID = I->second.ThreadID;
- } else if (auto OMPRegionInfo =
- dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
+ if (ThreadID != nullptr)
+ return ThreadID;
+ }
+ if (auto OMPRegionInfo =
+ dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
// Check if this an outlined function with thread id passed as argument.
auto ThreadIDVar = OMPRegionInfo->getThreadIDVariable();
auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
@@ -296,6 +300,95 @@
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
break;
}
+ // Build __kmpc_for_static_init*(
+ // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
+ // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
+ // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
+ // kmp_int[32|64] incr, kmp_int[32|64] chunk);
+ case OMPRTL__kmpc_for_static_init_4: {
+ auto ITy = CGM.Int32Ty;
+ auto PtrTy = llvm::PointerType::getUnqual(ITy);
+ llvm::Type *TypeParams[] = {
+ getIdentTyPointerTy(), // loc
+ CGM.Int32Ty, // tid
+ CGM.Int32Ty, // schedtype
+ llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
+ PtrTy, // p_lower
+ PtrTy, // p_upper
+ PtrTy, // p_stride
+ ITy, // incr
+ ITy // chunk
+ };
+ llvm::FunctionType *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_init_4");
+ break;
+ }
+ case OMPRTL__kmpc_for_static_init_4u: {
+ auto ITy = CGM.Int32Ty;
+ auto PtrTy = llvm::PointerType::getUnqual(ITy);
+ llvm::Type *TypeParams[] = {
+ getIdentTyPointerTy(), // loc
+ CGM.Int32Ty, // tid
+ CGM.Int32Ty, // schedtype
+ llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
+ PtrTy, // p_lower
+ PtrTy, // p_upper
+ PtrTy, // p_stride
+ ITy, // incr
+ ITy // chunk
+ };
+ llvm::FunctionType *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_init_4u");
+ break;
+ }
+ case OMPRTL__kmpc_for_static_init_8: {
+ auto ITy = CGM.Int64Ty;
+ auto PtrTy = llvm::PointerType::getUnqual(ITy);
+ llvm::Type *TypeParams[] = {
+ getIdentTyPointerTy(), // loc
+ CGM.Int32Ty, // tid
+ CGM.Int32Ty, // schedtype
+ llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
+ PtrTy, // p_lower
+ PtrTy, // p_upper
+ PtrTy, // p_stride
+ ITy, // incr
+ ITy // chunk
+ };
+ llvm::FunctionType *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_init_8");
+ break;
+ }
+ case OMPRTL__kmpc_for_static_init_8u: {
+ auto ITy = CGM.Int64Ty;
+ auto PtrTy = llvm::PointerType::getUnqual(ITy);
+ llvm::Type *TypeParams[] = {
+ getIdentTyPointerTy(), // loc
+ CGM.Int32Ty, // tid
+ CGM.Int32Ty, // schedtype
+ llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
+ PtrTy, // p_lower
+ PtrTy, // p_upper
+ PtrTy, // p_stride
+ ITy, // incr
+ ITy // chunk
+ };
+ llvm::FunctionType *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_init_8u");
+ break;
+ }
+ case OMPRTL__kmpc_for_static_fini: {
+ // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
+ llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
+ llvm::FunctionType *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
+ break;
+ }
case OMPRTL__kmpc_push_num_threads: {
// Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
// kmp_int32 num_threads)
@@ -441,6 +534,103 @@
CGF.EmitRuntimeCall(RTLFn, Args);
}
+/// \brief Schedule types for 'omp for' loops (see enum sched_type in kmp.h).
+enum OpenMPSchedType {
+ /// \brief Lower bound for default (unordered) versions.
+ OMP_sch_lower = 32,
+ OMP_sch_static_chunked = 33,
+ OMP_sch_static = 34,
+ OMP_sch_dynamic_chunked = 35,
+ OMP_sch_guided_chunked = 36,
+ OMP_sch_runtime = 37,
+ OMP_sch_auto = 38,
+ /// \brief Lower bound for 'ordered' versions.
+ OMP_ord_lower = 64,
+ /// \brief Lower bound for 'nomerge' versions.
+ OMP_nm_lower = 160,
+};
+
+/// \brief Map the OpenMP loop schedule to the runtime enumeration.
+static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
+ bool Chunked) {
+ auto Schedule = OMP_sch_static;
+ switch (ScheduleKind) {
+ case OMPC_SCHEDULE_static:
+ Schedule = Chunked ? OMP_sch_static_chunked : OMP_sch_static;
+ break;
+ case OMPC_SCHEDULE_dynamic:
+ Schedule = OMP_sch_dynamic_chunked;
+ break;
+ case OMPC_SCHEDULE_guided:
+ Schedule = OMP_sch_guided_chunked;
+ break;
+ case OMPC_SCHEDULE_auto:
+ Schedule = OMP_sch_auto;
+ break;
+ case OMPC_SCHEDULE_runtime:
+ Schedule = OMP_sch_runtime;
+ break;
+ case OMPC_SCHEDULE_unknown:
+ assert(!Chunked && "chunk was specified but schedule kind not known");
+ break;
+ }
+ return Schedule;
+}
+
+bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
+ bool Chunked) const {
+ auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
+ return Schedule == OMP_sch_static;
+}
+
+void CGOpenMPRuntime::EmitOMPForInit(CodeGenFunction &CGF, SourceLocation Loc,
+ OpenMPScheduleClauseKind ScheduleKind,
+ unsigned IVSize, bool IVSigned,
+ llvm::Value *IL, llvm::Value *LB,
+ llvm::Value *UB, llvm::Value *ST,
+ llvm::Value *Chunk) {
+ OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunk != nullptr);
+ // Call __kmpc_for_static_init(
+ // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
+ // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
+ // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
+ // kmp_int[32|64] incr, kmp_int[32|64] chunk);
+ // TODO: Implement dynamic schedule.
+ if (Chunk == nullptr)
+ Chunk = CGF.Builder.getIntN(IVSize, /*C*/ 1);
+ llvm::Value *Args[] = {
+ EmitOpenMPUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
+ GetOpenMPThreadID(CGF, Loc),
+ CGF.Builder.getInt32(Schedule), // Schedule type
+ IL, // &isLastIter
+ LB, // &LB
+ UB, // &UB
+ ST, // &Stride
+ CGF.Builder.getIntN(IVSize, 1), // Incr
+ Chunk // Chunk
+ };
+ assert((IVSize == 32 || IVSize == 64) &&
+ "Index size is not compatible with the omp runtime");
+ auto F = IVSize == 32 ? (IVSigned ? OMPRTL__kmpc_for_static_init_4
+ : OMPRTL__kmpc_for_static_init_4u)
+ : (IVSigned ? OMPRTL__kmpc_for_static_init_8
+ : OMPRTL__kmpc_for_static_init_8u);
+ auto RTLFn = CreateRuntimeFunction(F);
+ CGF.EmitRuntimeCall(RTLFn, Args);
+}
+
+void CGOpenMPRuntime::EmitOMPForFini(CodeGenFunction &CGF, SourceLocation Loc,
+ OpenMPScheduleClauseKind ScheduleKind) {
+ assert((ScheduleKind == OMPC_SCHEDULE_static ||
+ ScheduleKind == OMPC_SCHEDULE_unknown) &&
+ "Non-static schedule kinds are not yet implemented");
+ // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
+ llvm::Value *Args[] = {EmitOpenMPUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
+ GetOpenMPThreadID(CGF, Loc)};
+ auto RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_for_static_fini);
+ CGF.EmitRuntimeCall(RTLFn, Args);
+}
+
void CGOpenMPRuntime::EmitOMPNumThreadsClause(CodeGenFunction &CGF,
llvm::Value *NumThreads,
SourceLocation Loc) {
Index: lib/CodeGen/CodeGenFunction.h
===================================================================
--- lib/CodeGen/CodeGenFunction.h
+++ lib/CodeGen/CodeGenFunction.h
@@ -2035,12 +2035,17 @@
void EmitOMPTargetDirective(const OMPTargetDirective &S);
void EmitOMPTeamsDirective(const OMPTeamsDirective &S);
- /// Helpers for 'omp simd' directive.
+private:
+
+ /// Helpers for the OpenMP loop directives.
void EmitOMPLoopBody(const OMPLoopDirective &Directive,
bool SeparateIter = false);
void EmitOMPInnerLoop(const OMPLoopDirective &S, OMPPrivateScope &LoopScope,
bool SeparateIter = false);
void EmitOMPSimdFinal(const OMPLoopDirective &S);
+ void EmitOMPWorksharingLoop(const OMPLoopDirective &S);
+
+public:
//===--------------------------------------------------------------------===//
// LValue Expression Emission
Index: lib/CodeGen/CGStmtOpenMP.cpp
===================================================================
--- lib/CodeGen/CGStmtOpenMP.cpp
+++ lib/CodeGen/CGStmtOpenMP.cpp
@@ -443,8 +443,113 @@
DI->EmitLexicalBlockEnd(Builder, S.getSourceRange().getEnd());
}
-void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &) {
- llvm_unreachable("CodeGen for 'omp for' is not supported yet.");
+/// \brief Emit a helper variable and return corresponding lvalue.
+static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
+ const DeclRefExpr *Helper) {
+ auto VDecl = cast<VarDecl>(Helper->getDecl());
+ CGF.EmitVarDecl(*VDecl);
+ return CGF.EmitLValue(Helper);
+}
+
+void CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
+ // Emit the loop iteration variable.
+ auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
+ auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
+ EmitVarDecl(*IVDecl);
+
+ // Emit the iterations count variable.
+ // If it is not a variable, Sema decided to calculate iterations count on each
+ // iteration (e.g., it is foldable into a constant).
+ if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
+ EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
+ // Emit calculation of the iterations count.
+ EmitIgnoredExpr(S.getCalcLastIteration());
+ }
+
+ auto &RT = CGM.getOpenMPRuntime();
+
+ // Check pre-condition.
+ {
+ // Emit: if (LastIteration > 0) - begin.
+ RegionCounter Cnt = getPGORegionCounter(&S);
+ auto ThenBlock = createBasicBlock("omp.precond.then");
+ auto ContBlock = createBasicBlock("omp.precond.end");
+ EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock, Cnt.getCount());
+ EmitBlock(ThenBlock);
+ Cnt.beginRegion(Builder);
+ // Emit 'then' code.
+ {
+ // Emit helper vars inits.
+ LValue LB =
+ EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable()));
+ LValue UB =
+ EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable()));
+ LValue ST =
+ EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
+ LValue IL =
+ EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
+
+ OMPPrivateScope LoopScope(*this);
+ EmitPrivateLoopCounters(*this, LoopScope, S.counters());
+
+ // Detect the loop schedule kind and chunk.
+ auto ScheduleKind = OMPC_SCHEDULE_unknown;
+ llvm::Value *Chunk = nullptr;
+ if (auto C = cast_or_null<OMPScheduleClause>(
+ S.getSingleClause(OMPC_schedule))) {
+ ScheduleKind = C->getScheduleKind();
+ if (auto Ch = C->getChunkSize()) {
+ Chunk = EmitScalarExpr(Ch);
+ Chunk = EmitScalarConversion(Chunk, Ch->getType(),
+ S.getIterationVariable()->getType());
+ }
+ }
+ const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
+ const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
+ if (RT.isStaticNonchunked(ScheduleKind,
+ /* Chunked */ Chunk != nullptr)) {
+ // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
+ // When no chunk_size is specified, the iteration space is divided into
+ // chunks that are approximately equal in size, and at most one chunk is
+ // distributed to each thread. Note that the size of the chunks is
+ // unspecified in this case.
+ RT.EmitOMPForInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned,
+ IL.getAddress(), LB.getAddress(), UB.getAddress(),
+ ST.getAddress());
+ // UB = min(UB, GlobalUB);
+ EmitIgnoredExpr(S.getEnsureUpperBound());
+ // IV = LB;
+ EmitIgnoredExpr(S.getInit());
+ // while (idx <= UB) { BODY; ++idx; }
+ EmitOMPInnerLoop(S, LoopScope);
+ // Tell the runtime we are done.
+ RT.EmitOMPForFini(*this, S.getLocStart(), ScheduleKind);
+ } else
+ llvm_unreachable("Requested OpenMP schedule is not yet implemented");
+ }
+ // Emit: if (LastIteration != 0) - end.
+ EmitBranch(ContBlock);
+ EmitBlock(ContBlock, true);
+ }
+}
+
+void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
+ RunCleanupsScope DirectiveScope(*this);
+
+ CGDebugInfo *DI = getDebugInfo();
+ if (DI)
+ DI->EmitLexicalBlockStart(Builder, S.getSourceRange().getBegin());
+
+ EmitOMPWorksharingLoop(S);
+
+ // Emit an implicit barrier at the end.
+ auto Flags = static_cast<CGOpenMPRuntime::OpenMPLocationFlags>(
+ CGOpenMPRuntime::OMP_IDENT_KMPC |
+ CGOpenMPRuntime::OMP_IDENT_BARRIER_IMPL);
+ CGM.getOpenMPRuntime().EmitOMPBarrierCall(*this, S.getLocStart(), Flags);
+
+ if (DI)
+ DI->EmitLexicalBlockEnd(Builder, S.getSourceRange().getEnd());
}
void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &) {
Index: lib/CodeGen/CGOpenMPRuntime.h
===================================================================
--- lib/CodeGen/CGOpenMPRuntime.h
+++ lib/CodeGen/CGOpenMPRuntime.h
@@ -15,6 +15,7 @@
#define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIME_H
#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/OpenMPKinds.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StringMap.h"
@@ -75,6 +76,12 @@
OMPRTL__kmpc_end_critical,
// Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
OMPRTL__kmpc_barrier,
+ // Calls for static scheduling 'omp for' loops.
+ OMPRTL__kmpc_for_static_init_4,
+ OMPRTL__kmpc_for_static_init_4u,
+ OMPRTL__kmpc_for_static_init_8,
+ OMPRTL__kmpc_for_static_init_8u,
+ OMPRTL__kmpc_for_static_fini,
// Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
// global_tid);
OMPRTL__kmpc_serialized_parallel,
@@ -254,6 +261,56 @@
virtual void EmitOMPBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
OpenMPLocationFlags Flags);
+ /// \brief Check if the specified \a ScheduleKind is static non-chunked.
+ /// This kind of worksharing directive is emitted without outer loop.
+ /// \param ScheduleKind Schedule kind specified in the 'schedule' clause.
+ /// \param Chunked True if chunk is specified in the clause.
+ ///
+ virtual bool isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
+ bool Chunked) const;
+
+ /// \brief If the loop has static schedule, call
+ /// __kmpc_for_static_init(
+ /// ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
+ /// kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
+ /// kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
+ /// kmp_int[32|64] incr, kmp_int[32|64] chunk);
+ ///
+ /// This runtime routine is called before the OpenMP loop with static
+ /// schedule to get the upper/lower bounds \a LB and \a UB for the current
+ /// OpenMP thread, and to get stride \a ST if the schedule is static-chunked.
+ ///
+ /// \param CGF Reference to current CodeGenFunction.
+ /// \param Loc Clang source location.
+ /// \param ScheduleKind Schedule kind, specified by the 'schedule' clause.
+ /// \param IVSize Size of the iteration variable in bits.
+ /// \param IVSigned Sign of the interation variable.
+ /// \param IL Address of the output variable in which the flag of the
+ /// last iteration is returned.
+ /// \param LB Address of the output variable in which the lower iteration
+ /// number is returned.
+ /// \param UB Address of the output variable in which the upper iteration
+ /// number is returned.
+ /// \param ST Address of the output variable in which the stride value is
+ /// returned nesessary to generated the static_chunked scheduled loop.
+ /// \param Chunk Value of the chunk for the static_chunked scheduled loop.
+ ///
+ virtual void EmitOMPForInit(CodeGenFunction &CGF, SourceLocation Loc,
+ OpenMPScheduleClauseKind SchedKind,
+ unsigned IVSize, bool IVSigned, llvm::Value *IL,
+ llvm::Value *LB, llvm::Value *UB, llvm::Value *ST,
+ llvm::Value *Chunk = nullptr);
+
+ /// \brief If the loop has static schedule, call
+ /// __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid)
+ ///
+ /// \param CGF Reference to current CodeGenFunction.
+ /// \param Loc Clang source location.
+ /// \param ScheduleKind Schedule kind, specified by the 'schedule' clause.
+ ///
+ virtual void EmitOMPForFini(CodeGenFunction &CGF, SourceLocation Loc,
+ OpenMPScheduleClauseKind ScheduleKind);
+
/// \brief Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32
/// global_tid, kmp_int32 num_threads) to generate code for 'num_threads'
/// clause.
Index: lib/Serialization/ASTReaderStmt.cpp
===================================================================
--- lib/Serialization/ASTReaderStmt.cpp
+++ lib/Serialization/ASTReaderStmt.cpp
@@ -1986,6 +1986,15 @@
D->setCond(Fst, Snd);
D->setInit(Reader.ReadSubExpr());
D->setInc(Reader.ReadSubExpr());
+ if (isOpenMPWorksharingDirective(D->getDirectiveKind())) {
+ D->setIsLastIterVariable(Reader.ReadSubExpr());
+ D->setLowerBoundVariable(Reader.ReadSubExpr());
+ D->setUpperBoundVariable(Reader.ReadSubExpr());
+ D->setStrideVariable(Reader.ReadSubExpr());
+ D->setEnsureUpperBound(Reader.ReadSubExpr());
+ D->setNextLowerBound(Reader.ReadSubExpr());
+ D->setNextUpperBound(Reader.ReadSubExpr());
+ }
SmallVector<Expr *, 4> Sub;
unsigned CollapsedNum = D->getCollapsedNumber();
Sub.reserve(CollapsedNum);
Index: lib/Serialization/ASTWriterStmt.cpp
===================================================================
--- lib/Serialization/ASTWriterStmt.cpp
+++ lib/Serialization/ASTWriterStmt.cpp
@@ -1863,6 +1863,15 @@
Writer.AddStmt(D->getCond(/* SeparateIter */ true));
Writer.AddStmt(D->getInit());
Writer.AddStmt(D->getInc());
+ if (isOpenMPWorksharingDirective(D->getDirectiveKind())) {
+ Writer.AddStmt(D->getIsLastIterVariable());
+ Writer.AddStmt(D->getLowerBoundVariable());
+ Writer.AddStmt(D->getUpperBoundVariable());
+ Writer.AddStmt(D->getStrideVariable());
+ Writer.AddStmt(D->getEnsureUpperBound());
+ Writer.AddStmt(D->getNextLowerBound());
+ Writer.AddStmt(D->getNextUpperBound());
+ }
for (auto I : D->counters()) {
Writer.AddStmt(I);
}
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits