Hi rjmccall, hfinkel, fraggamuffin, ejstotzer,
If condition evaluates to true, the code executes task by calling
@__kmpc_omp_task() runtime function.
If condition evaluates to false, the code executes serial version of the code
by executing the following code:
```
call void @__kmpc_omp_task_begin_if0(<loc>, <threadid>, <task_t_ptr, returned
by @__kmpc_omp_task_alloc()>);
proxy_task_entry(<gtid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
call void @__kmpc_omp_task_complete_if0(<loc>, <threadid>, <task_t_ptr,
returned by @__kmpc_omp_task_alloc()>);
```
Also it checks if the condition is constant and if it is constant it evaluates
its value and then generates either parallel version of the code (if the
condition evaluates to true), or the serial version of the code (if the
condition evaluates to false).
http://reviews.llvm.org/D9143
Files:
lib/CodeGen/CGOpenMPRuntime.cpp
lib/CodeGen/CGOpenMPRuntime.h
lib/CodeGen/CGStmtOpenMP.cpp
test/OpenMP/task_if_codegen.cpp
EMAIL PREFERENCES
http://reviews.llvm.org/settings/panel/emailpreferences/
Index: test/OpenMP/task_if_codegen.cpp
===================================================================
--- test/OpenMP/task_if_codegen.cpp
+++ test/OpenMP/task_if_codegen.cpp
@@ -0,0 +1,133 @@
+// RUN: %clang_cc1 -verify -fopenmp=libiomp5 -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix=CHECK %s
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+void fn1();
+void fn2();
+void fn3();
+void fn4();
+void fn5();
+void fn6();
+
+int Arg;
+
+// CHECK-LABEL: define void @{{.+}}gtid_test
+void gtid_test() {
+// CHECK: call void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 1, {{.+}}* [[GTID_TEST_REGION1:@.+]] to void
+#pragma omp parallel
+#pragma omp task if (false)
+ gtid_test();
+// CHECK: ret void
+}
+
+// CHECK: define internal void [[GTID_TEST_REGION1]](i32* [[GTID_PARAM:%.+]], i
+// CHECK: store i32* [[GTID_PARAM]], i32** [[GTID_ADDR_REF:%.+]],
+// CHECK: [[GTID_ADDR:%.+]] = load i32*, i32** [[GTID_ADDR_REF]]
+// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_ADDR]]
+// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(
+// CHECK: [[TASK_PTR:%.+]] = bitcast i8* [[ORIG_TASK_PTR]] to
+// CHECK: call void @__kmpc_omp_task_begin_if0(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]], i8* [[ORIG_TASK_PTR]])
+// CHECK: call i32 [[GTID_TEST_REGION2:@.+]](i32 [[GTID]], %{{.+}}* [[TASK_PTR]])
+// CHECK: call void @__kmpc_omp_task_complete_if0(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]], i8* [[ORIG_TASK_PTR]])
+// CHECK: ret void
+
+// CHECK: define internal i32 [[GTID_TEST_REGION2]](
+// CHECK: call void @{{.+}}gtid_test
+// CHECK: ret i32
+
+template <typename T>
+int tmain(T Arg) {
+#pragma omp task if (true)
+ fn1();
+#pragma omp task if (false)
+ fn2();
+#pragma omp task if (Arg)
+ fn3();
+ return 0;
+}
+
+// CHECK-LABEL: @main
+int main() {
+// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(
+// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc({{[^,]+}}, i32 [[GTID]], i32 1, i64 32, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[CAP_FN4:[^ ]+]] to i32 (i32, i8*)*))
+// CHECK: call i32 @__kmpc_omp_task(%{{.+}}* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]])
+#pragma omp task if (true)
+ fn4();
+// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(
+// CHECK: [[TASK_PTR:%.+]] = bitcast i8* [[ORIG_TASK_PTR]] to
+// CHECK: call void @__kmpc_omp_task_begin_if0(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]], i8* [[ORIG_TASK_PTR]])
+// CHECK: call i32 [[CAP_FN5:@.+]](i32 [[GTID]], %{{.+}}* [[TASK_PTR]])
+// CHECK: call void @__kmpc_omp_task_complete_if0(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]], i8* [[ORIG_TASK_PTR]])
+#pragma omp task if (false)
+ fn5();
+
+// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc({{[^,]+}}, i32 [[GTID]], i32 1, i64 32, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[CAP_FN6:[^ ]+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK_PTR:%.+]] = bitcast i8* [[ORIG_TASK_PTR]] to
+// CHECK: br i1 %{{.+}}, label %[[OMP_THEN:.+]], label %[[OMP_ELSE:.+]]
+// CHECK: [[OMP_THEN]]
+// CHECK: call i32 @__kmpc_omp_task(%{{.+}}* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]])
+// CHECK: br label %[[OMP_END:.+]]
+// CHECK: [[OMP_ELSE]]
+// CHECK: call void @__kmpc_omp_task_begin_if0(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]], i8* [[ORIG_TASK_PTR]])
+// CHECK: call i32 [[CAP_FN6:@.+]](i32 [[GTID]], %{{.+}}* [[TASK_PTR]])
+// CHECK: call void @__kmpc_omp_task_complete_if0(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]], i8* [[ORIG_TASK_PTR]])
+// CHECK: br label %[[OMP_END]]
+// CHECK: [[OMP_END]]
+#pragma omp task if (Arg)
+ fn6();
+ // CHECK: = call {{.*}}i{{.+}} @{{.+}}tmain
+ return tmain(Arg);
+}
+
+// CHECK: define internal i32 [[CAP_FN4]]
+// CHECK: call void @{{.+}}fn4
+// CHECK: ret i32
+
+// CHECK: define internal i32 [[CAP_FN5]]
+// CHECK: call void @{{.+}}fn5
+// CHECK: ret i32
+
+// CHECK: define internal i32 [[CAP_FN6]]
+// CHECK: call void @{{.+}}fn6
+// CHECK: ret i32
+
+// CHECK-LABEL: define {{.+}} @{{.+}}tmain
+// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(
+// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^,]+}}, i32 [[GTID]], i32 1, i64 32, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[CAP_FN1:[^ ]+]] to i32 (i32, i8*)*))
+// CHECK: call i32 @__kmpc_omp_task(%{{.+}}* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]])
+
+// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(
+// CHECK: [[TASK_PTR:%.+]] = bitcast i8* [[ORIG_TASK_PTR]] to
+// CHECK: call void @__kmpc_omp_task_begin_if0(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]], i8* [[ORIG_TASK_PTR]])
+// CHECK: call i32 [[CAP_FN2:@.+]](i32 [[GTID]], %{{.+}}* [[TASK_PTR]])
+// CHECK: call void @__kmpc_omp_task_complete_if0(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]], i8* [[ORIG_TASK_PTR]])
+
+// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^,]+}}, i32 [[GTID]], i32 1, i64 32, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[CAP_FN3:[^ ]+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK_PTR:%.+]] = bitcast i8* [[ORIG_TASK_PTR]] to
+// CHECK: br i1 %{{.+}}, label %[[OMP_THEN:.+]], label %[[OMP_ELSE:.+]]
+// CHECK: [[OMP_THEN]]
+// CHECK: call i32 @__kmpc_omp_task(%{{.+}}* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]])
+// CHECK: br label %[[OMP_END:.+]]
+// CHECK: [[OMP_ELSE]]
+// CHECK: call void @__kmpc_omp_task_begin_if0(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]], i8* [[ORIG_TASK_PTR]])
+// CHECK: call i32 [[CAP_FN3:@.+]](i32 [[GTID]], %{{.+}}* [[TASK_PTR]])
+// CHECK: call void @__kmpc_omp_task_complete_if0(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]], i8* [[ORIG_TASK_PTR]])
+// CHECK: br label %[[OMP_END]]
+// CHECK: [[OMP_END]]
+
+// CHECK: define internal i32 [[CAP_FN1]]
+// CHECK: call void @{{.+}}fn1
+// CHECK: ret i32
+
+// CHECK: define internal i32 [[CAP_FN2]]
+// CHECK: call void @{{.+}}fn2
+// CHECK: ret i32
+
+// CHECK: define internal i32 [[CAP_FN3]]
+// CHECK: call void @{{.+}}fn3
+// CHECK: ret i32
+
+#endif
Index: lib/CodeGen/CGOpenMPRuntime.cpp
===================================================================
--- lib/CodeGen/CGOpenMPRuntime.cpp
+++ lib/CodeGen/CGOpenMPRuntime.cpp
@@ -710,6 +710,28 @@
CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
break;
}
+ case OMPRTL__kmpc_omp_task_begin_if0: {
+ // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
+ // *new_task);
+ llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
+ CGM.VoidPtrTy};
+ llvm::FunctionType *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+ RTLFn =
+ CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
+ break;
+ }
+ case OMPRTL__kmpc_omp_task_complete_if0: {
+ // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
+ // *new_task);
+ llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
+ CGM.VoidPtrTy};
+ llvm::FunctionType *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy,
+ /*Name=*/"__kmpc_omp_task_complete_if0");
+ break;
+ }
}
return RTLFn;
}
@@ -1555,7 +1577,8 @@
void CGOpenMPRuntime::emitTaskCall(
CodeGenFunction &CGF, SourceLocation Loc, bool Tied,
llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
- llvm::Value *TaskFunction, QualType SharedsTy, llvm::Value *Shareds) {
+ llvm::Value *TaskFunction, QualType SharedsTy, llvm::Value *Shareds,
+ const Expr *IfCond, const IfCodeGenTy &IfClauseCodeGen) {
auto &C = CGM.getContext();
auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
// Build type kmp_routine_entry_t (if not built yet).
@@ -1618,15 +1641,44 @@
CGF.Builder.CreateStructGEP(KmpTaskTTy, NewTaskNewTaskTTy,
/*Idx=*/KmpTaskTDestructors),
CGM.PointerAlignInBytes);
-
// NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
// libcall.
// Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
// *new_task);
- llvm::Value *TaskArgs[] = {emitUpdateLocation(CGF, Loc),
- getThreadID(CGF, Loc), NewTask};
- // TODO: add check for untied tasks.
- CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
+ auto *ThreadID = getThreadID(CGF, Loc);
+ llvm::Value *TaskArgs[] = {emitUpdateLocation(CGF, Loc), ThreadID, NewTask};
+ auto &&ThenCodeGen = [this, &TaskArgs](CodeGenFunction &CGF) {
+ // TODO: add check for untied tasks.
+ CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
+ };
+ auto &&ElseCodeGen =
+ [this, &TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry](
+ CodeGenFunction &CGF) {
+ CodeGenFunction::RunCleanupsScope LocalScope(CGF);
+ CGF.EmitRuntimeCall(
+ createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs);
+ // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
+ // kmp_task_t *new_task);
+ CGF.EHStack.pushCleanup<CallEndCleanup>(
+ NormalAndEHCleanup,
+ createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0),
+ llvm::makeArrayRef(TaskArgs));
+
+ // Call proxy_task_entry(gtid, new_task);
+ llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
+ CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs);
+ };
+ if (IfCond) {
+ IfClauseCodeGen(CGF, IfCond, [&](bool ThenBlock) {
+ if (ThenBlock) {
+ ThenCodeGen(CGF);
+ } else {
+ ElseCodeGen(CGF);
+ }
+ });
+ } else {
+ ThenCodeGen(CGF);
+ }
}
static llvm::Value *emitReductionFunction(CodeGenModule &CGM,
Index: lib/CodeGen/CGStmtOpenMP.cpp
===================================================================
--- lib/CodeGen/CGStmtOpenMP.cpp
+++ lib/CodeGen/CGStmtOpenMP.cpp
@@ -30,8 +30,8 @@
/// } else {
/// CodeGen(false);
/// }
-static void EmitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
- const std::function<void(bool)> &CodeGen) {
+static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
+ const llvm::function_ref<void(bool)> &CodeGen) {
CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
// If the condition constant folds and can be elided, try to avoid emitting
@@ -505,7 +505,7 @@
S, *CS->getCapturedDecl()->param_begin(), CodeGen);
if (auto C = S.getSingleClause(/*K*/ OMPC_if)) {
auto Cond = cast<OMPIfClause>(C)->getCondition();
- EmitOMPIfClause(CGF, Cond, [&](bool ThenBlock) {
+ emitOMPIfClause(CGF, Cond, [&](bool ThenBlock) {
if (ThenBlock)
emitOMPParallelCall(CGF, S, OutlinedFn, CapturedStruct);
else
@@ -1344,8 +1344,13 @@
Final.setInt(/*IntVal=*/false);
}
auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
+ const Expr *IfCond = nullptr;
+ if (auto C = S.getSingleClause(OMPC_if)) {
+ IfCond = cast<OMPIfClause>(C)->getCondition();
+ }
CGM.getOpenMPRuntime().emitTaskCall(*this, S.getLocStart(), Tied, Final,
- OutlinedFn, SharedsTy, CapturedStruct);
+ OutlinedFn, SharedsTy, CapturedStruct,
+ IfCond, emitOMPIfClause);
}
void CodeGenFunction::EmitOMPTaskyieldDirective(
Index: lib/CodeGen/CGOpenMPRuntime.h
===================================================================
--- lib/CodeGen/CGOpenMPRuntime.h
+++ lib/CodeGen/CGOpenMPRuntime.h
@@ -44,6 +44,9 @@
class CodeGenModule;
typedef llvm::function_ref<void(CodeGenFunction &)> RegionCodeGenTy;
+typedef llvm::function_ref<void(CodeGenFunction &, const Expr *,
+ const llvm::function_ref<void(bool)>)>
+ IfCodeGenTy;
class CGOpenMPRuntime {
private:
@@ -118,6 +121,12 @@
// Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
// kmp_critical_name *lck);
OMPRTL__kmpc_end_reduce_nowait,
+ // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
+ // kmp_task_t * new_task);
+ OMPRTL__kmpc_omp_task_begin_if0,
+ // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
+ // kmp_task_t * new_task);
+ OMPRTL__kmpc_omp_task_complete_if0,
};
/// \brief Values for bit flags used in the ident_t to describe the fields.
@@ -522,10 +531,14 @@
/// \param SharedsTy A type which contains references the shared variables.
/// \param Shareds Context with the list of shared variables from the \a
/// TaskFunction.
+ /// \param IfCond Not a nullptr if 'if' clause was specified, nullptr
+ /// otherwise.
+ /// \param IfClauseGen A codegen procedure for 'if' clause.
virtual void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, bool Tied,
llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
llvm::Value *TaskFunction, QualType SharedsTy,
- llvm::Value *Shareds);
+ llvm::Value *Shareds, const Expr *IfCond,
+ const IfCodeGenTy &IfClauseCodeGen);
/// \brief Emit code for the directive that does not require outlining.
///
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits