Hi rjmccall, fraggamuffin, ejstotzer,
The task region is emmitted in several steps:
1. Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
kmp_routine_entry_t *task_entry). Here task_entry is a pointer to the function:
```
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
```
2. Copy a list of shared variables to field shareds of the resulting structure
kmp_task_t returned by the previous call (if any).
3. Copy a pointer to destructions function to field destructions of the
resulting structure kmp_task_t.
4. Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid,
kmp_task_t *new_task), where new_task is a resulting structure from previous
items.
http://reviews.llvm.org/D7560
Files:
lib/CodeGen/CGOpenMPRuntime.cpp
lib/CodeGen/CGOpenMPRuntime.h
lib/CodeGen/CGStmtOpenMP.cpp
lib/Sema/SemaOpenMP.cpp
test/OpenMP/task_codegen.cpp
EMAIL PREFERENCES
http://reviews.llvm.org/settings/panel/emailpreferences/
Index: test/OpenMP/task_codegen.cpp
===================================================================
--- test/OpenMP/task_codegen.cpp
+++ test/OpenMP/task_codegen.cpp
@@ -0,0 +1,102 @@
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp=libiomp5 -x c++ -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+// CHECK-DAG: [[IDENT_T:%.+]] = type { i32, i32, i32, i32, i8* }
+// CHECK-DAG: [[STRUCT_SHAREDS:%.+]] = type { i8*, [[STRUCT_S:%.+]]* }
+// CHECK-DAG: [[KMP_TASK_T:%.+]] = type { i8*, i32 (i32, i8*)*, i32, i32 (i32, i8*)* }
+struct S {
+ int a;
+ S() : a(0) {}
+ S(const S &s) : a(s.a) {}
+ ~S() {}
+};
+int a;
+// CHECK-LABEL : @main
+int main() {
+// CHECK: [[B:%.+]] = alloca i8
+// CHECK: [[S:%.+]] = alloca [[STRUCT_S]]
+ char b;
+ S s;
+// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T]]* @{{.+}})
+// CHECK: [[B_REF:%.+]] = getelementptr inbounds [[STRUCT_SHAREDS]]* [[CAPTURES:%.+]], i32 0, i32 0
+// CHECK: store i8* [[B]], i8** [[B_REF]]
+// CHECK: [[S_REF:%.+]] = getelementptr inbounds [[STRUCT_SHAREDS]]* [[CAPTURES]], i32 0, i32 1
+// CHECK: store [[STRUCT_S]]* [[S]], [[STRUCT_S]]** [[S_REF]]
+// CHECK: [[TASK_PTR:%.+]] = call [[KMP_TASK_T]]* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 1, i64 32, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.+}}*)* [[TASK_ENTRY1:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[SHAREDS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]]* [[TASK_PTR:%.+]], i32 0, i32 0
+// CHECK: [[SHAREDS_REF:%.+]] = load i8** [[SHAREDS_REF_PTR]]
+// CHECK: [[BITCAST:%.+]] = bitcast [[STRUCT_SHAREDS]]* [[CAPTURES]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[SHAREDS_REF]], i8* [[BITCAST]], i64 16, i32 8, i1 false)
+// CHECK: [[DESTRUCTORS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]]* [[TASK_PTR]], i32 0, i32 3
+// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** [[DESTRUCTORS_REF_PTR]]
+// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], [[KMP_TASK_T]]* [[TASK_PTR]])
+#pragma omp task shared(a, b, s)
+ {
+ a = 15;
+ b = a;
+ s.a = 10;
+ }
+// CHECK: [[TASK_PTR:%.+]] = call [[KMP_TASK_T]]* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 0, i64 32, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.+}}*)* [[TASK_ENTRY2:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[DESTRUCTORS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]]* [[TASK_PTR]], i32 0, i32 3
+// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** [[DESTRUCTORS_REF_PTR]]
+// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], [[KMP_TASK_T]]* [[TASK_PTR]])
+#pragma omp task untied
+ {
+ a = 1;
+ }
+// CHECK: [[TASK_PTR:%.+]] = call [[KMP_TASK_T]]* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 3, i64 32, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.+}}*)* [[TASK_ENTRY3:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[DESTRUCTORS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]]* [[TASK_PTR]], i32 0, i32 3
+// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** [[DESTRUCTORS_REF_PTR]]
+// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], [[KMP_TASK_T]]* [[TASK_PTR]])
+#pragma omp task final(true)
+ {
+ a = 2;
+ }
+// CHECK: [[TASK_PTR:%.+]] = call [[KMP_TASK_T]]* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 1, i64 32, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.+}}*)* [[TASK_ENTRY4:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[DESTRUCTORS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]]* [[TASK_PTR]], i32 0, i32 3
+// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** [[DESTRUCTORS_REF_PTR]]
+// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], [[KMP_TASK_T]]* [[TASK_PTR]])
+ const bool flag = false;
+#pragma omp task final(flag)
+ {
+ a = 3;
+ }
+// CHECK: [[B_VAL:%.+]] = load i8* [[B]]
+// CHECK: [[CMP:%.+]] = icmp ne i8 [[B_VAL]], 0
+// CHECK: [[FINAL:%.+]] = select i1 [[CMP]], i32 2, i32 0
+// CHECK: [[FLAGS:%.+]] = or i32 [[FINAL]], 1
+// CHECK: [[TASK_PTR:%.+]] = call [[KMP_TASK_T]]* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 [[FLAGS]], i64 32, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.+}}*)* [[TASK_ENTRY5:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[DESTRUCTORS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]]* [[TASK_PTR]], i32 0, i32 3
+// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** [[DESTRUCTORS_REF_PTR]]
+// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], [[KMP_TASK_T]]* [[TASK_PTR]])
+#pragma omp task final(b)
+ {
+ a = 4;
+ }
+ return a;
+}
+// CHECK: define internal i32 [[TASK_ENTRY1]](i32, [[KMP_TASK_T]]{{.+}}*)
+// CHECK: store i32 15, i32* [[A_PTR:@.+]]
+// CHECK: [[A_VAL:%.+]] = load i32* [[A_PTR]]
+// CHECK: [[A_VAL_I8:%.+]] = trunc i32 [[A_VAL]] to i8
+// CHECK: store i8 [[A_VAL_I8]], i8* %{{.+}}
+// CHECK: store i32 10, i32* %{{.+}}
+
+// CHECK: define internal i32 [[TASK_ENTRY2]](i32, [[KMP_TASK_T]]{{.+}}*)
+// CHECK: store i32 1, i32* [[A_PTR:@.+]]
+
+// CHECK: define internal i32 [[TASK_ENTRY3]](i32, [[KMP_TASK_T]]{{.+}}*)
+// CHECK: store i32 2, i32* [[A_PTR:@.+]]
+
+// CHECK: define internal i32 [[TASK_ENTRY4]](i32, [[KMP_TASK_T]]{{.+}}*)
+// CHECK: store i32 3, i32* [[A_PTR:@.+]]
+
+// CHECK: define internal i32 [[TASK_ENTRY5]](i32, [[KMP_TASK_T]]{{.+}}*)
+// CHECK: store i32 4, i32* [[A_PTR:@.+]]
+#endif
+
Index: lib/Sema/SemaOpenMP.cpp
===================================================================
--- lib/Sema/SemaOpenMP.cpp
+++ lib/Sema/SemaOpenMP.cpp
@@ -308,8 +308,7 @@
// bound to the current team is shared.
if (DVar.DKind == OMPD_task) {
DSAVarData DVarTemp;
- for (StackTy::reverse_iterator I = std::next(Iter),
- EE = std::prev(Stack.rend());
+ for (StackTy::reverse_iterator I = std::next(Iter), EE = Stack.rend();
I != EE; ++I) {
// OpenMP [2.9.1.1, Data-sharing Attribute Rules for Variables
// Referenced
@@ -1122,11 +1121,19 @@
break;
}
case OMPD_task: {
+ QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1);
Sema::CapturedParamNameType Params[] = {
+ std::make_pair(".global_tid.", KmpInt32Ty),
+ std::make_pair(".part_id.", KmpInt32Ty),
std::make_pair(StringRef(), QualType()) // __context with shared vars
};
ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
Params);
+ // Mark this captured region as inlined, because we don't use outlined
+ // function directly.
+ getCurCapturedRegion()->TheCapturedDecl->addAttr(
+ AlwaysInlineAttr::CreateImplicit(
+ Context, AlwaysInlineAttr::Keyword_forceinline, SourceRange()));
break;
}
case OMPD_ordered: {
Index: lib/CodeGen/CGOpenMPRuntime.cpp
===================================================================
--- lib/CodeGen/CGOpenMPRuntime.cpp
+++ lib/CodeGen/CGOpenMPRuntime.cpp
@@ -31,9 +31,9 @@
class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
public:
CGOpenMPRegionInfo(const OMPExecutableDirective &D, const CapturedStmt &CS,
- const VarDecl *ThreadIDVar)
+ const VarDecl *ThreadIDVar, const VarDecl *PartIDVar)
: CGCapturedStmtInfo(CS, CR_OpenMP), ThreadIDVar(ThreadIDVar),
- Directive(D) {
+ PartIDVar(PartIDVar), Directive(D) {
assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
}
@@ -58,6 +58,9 @@
/// \brief A variable or parameter storing global thread id for OpenMP
/// constructs.
const VarDecl *ThreadIDVar;
+ /// \brief A variable or parameter storing part id for OpenMP tasking
+ /// constructs.
+ const VarDecl *PartIDVar;
/// \brief OpenMP executable directive associated with the region.
const OMPExecutableDirective &Directive;
};
@@ -73,13 +76,26 @@
CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
CGF.EmitOMPPrivateClause(Directive, PrivateScope);
CGF.EmitOMPFirstprivateClause(Directive, PrivateScope);
+ // TODO: add support for privates in tasks.
+ assert((!PartIDVar || !PrivateScope.Privatize()) &&
+ "Private clauses for tasks are not supported yet.");
if (PrivateScope.Privatize())
// Emit implicit barrier to synchronize threads and avoid data races.
CGF.CGM.getOpenMPRuntime().EmitOMPBarrierCall(CGF, Directive.getLocStart(),
/*IsExplicit=*/false);
CGCapturedStmtInfo::EmitBody(CGF, S);
}
+static void addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
+ QualType FieldTy) {
+ auto *Field = FieldDecl::Create(
+ C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
+ C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
+ /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
+ Field->setAccess(AS_public);
+ DC->addDecl(Field);
+}
+
CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
: CGM(CGM), DefaultOpenMPPSource(nullptr) {
IdentTy = llvm::StructType::create(
@@ -91,14 +107,39 @@
llvm::PointerType::getUnqual(CGM.Int32Ty)};
Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
+ // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
+ auto &C = CGM.getContext();
+ auto Int32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
+ QualType KmpRoutineEntryTyArgs[] = {Int32Ty, C.VoidPtrTy};
+ FunctionProtoType::ExtProtoInfo EPI;
+ auto KmpRoutineEntryTy =
+ C.getFunctionType(Int32Ty, KmpRoutineEntryTyArgs, EPI);
+ auto KmpRoutineEntryPointerQTy = C.getPointerType(KmpRoutineEntryTy);
+ KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPointerQTy);
+ // Build struct kmp_task_t.
+ auto *RD = C.buildImplicitRecord("kmp_task_t");
+ RD->startDefinition();
+ // Build void *shareds;
+ addFieldToRecordDecl(C, RD, C.VoidPtrTy);
+ // Build kmp_routine_entry_t routine;
+ addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
+ // Build kmp_int32 part_id;
+ addFieldToRecordDecl(C, RD, Int32Ty);
+ // Build kmp_routine_entry_t destructors;
+ addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
+ RD->completeDefinition();
+ KmpTaskTRD = RD;
+ auto KmpTaskQTy = C.getRecordType(KmpTaskTRD);
+ KmpTaskTTy = CGM.getTypes().ConvertType(KmpTaskQTy);
}
llvm::Value *
CGOpenMPRuntime::EmitOpenMPOutlinedFunction(const OMPExecutableDirective &D,
- const VarDecl *ThreadIDVar) {
+ const VarDecl *ThreadIDVar,
+ const VarDecl *PartIDVar) {
const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt());
CodeGenFunction CGF(CGM, true);
- CGOpenMPRegionInfo CGInfo(D, *CS, ThreadIDVar);
+ CGOpenMPRegionInfo CGInfo(D, *CS, ThreadIDVar, PartIDVar);
CGF.CapturedStmtInfo = &CGInfo;
return CGF.GenerateCapturedStmtFunction(*CS);
}
@@ -208,9 +249,14 @@
auto ThreadIDVar = OMPRegionInfo->getThreadIDVariable();
auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
auto RVal = CGF.EmitLoadOfLValue(LVal, Loc);
- LVal = CGF.MakeNaturalAlignAddrLValue(RVal.getScalarVal(),
- ThreadIDVar->getType());
- ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
+ if (ThreadIDVar->getType()->isPointerType()) {
+ // Thread id is passed as a pointer
+ LVal = CGF.MakeNaturalAlignAddrLValue(RVal.getScalarVal(),
+ ThreadIDVar->getType());
+ ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
+ } else
+ // Thread id is passed as a value (in tasks).
+ ThreadID = RVal.getScalarVal();
// If value loaded in entry block, cache it and use it everywhere in
// function.
if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
@@ -475,6 +521,27 @@
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
break;
}
+ case OMPRTL__kmpc_omp_task_alloc: {
+ // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
+ // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
+ // kmp_routine_entry_t *task_entry);
+ llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
+ CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
+ llvm::FunctionType *FnTy = llvm::FunctionType::get(
+ KmpTaskTTy->getPointerTo(), TypeParams, /*isVarArg=*/false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
+ break;
+ }
+ case OMPRTL__kmpc_omp_task: {
+ // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
+ // *new_task);
+ llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
+ KmpTaskTTy->getPointerTo()};
+ llvm::FunctionType *FnTy =
+ llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
+ break;
+ }
}
return RTLFn;
}
@@ -926,3 +993,154 @@
auto *RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_flush);
CGF.EmitRuntimeCall(RTLFn, Args);
}
+
+namespace {
+/// \brief Fields for type kmp_task_t.
+enum KmpTaskTFields {
+ /// \brief List of shared variables.
+ KmpTaskTShareds,
+ /// \brief Task routine.
+ KmpTaskTRoutine,
+ /// \brief Partition id for the untied tasks.
+ KmpTaskTPartId,
+ /// \brief Function with call of destructors for private variables.
+ KmpTaskTDestructors,
+};
+} // namespace
+
+static RecordDecl *createNewKmpTaskTRecordDecl(ASTContext &C,
+ const RecordDecl *RD) {
+ auto *NewRD = C.buildImplicitRecord("kmp_task_t");
+ NewRD->startDefinition();
+ for (auto *FD : RD->fields()) {
+ // Add new field to record.
+ addFieldToRecordDecl(C, NewRD, FD->getType());
+ }
+ return NewRD;
+}
+
+void CGOpenMPRuntime::EmitOMPTaskCall(
+ CodeGenFunction &CGF, SourceLocation Loc, bool Tied,
+ llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
+ llvm::Value *TaskFunction, QualType SharedsTy, llvm::Value *Shareds) {
+ auto &C = CGM.getContext();
+ // Build particular struct kmp_task_t for the given task.
+ auto *KmpTaskRD = createNewKmpTaskTRecordDecl(C, KmpTaskTRD);
+ // TODO: add private fields.
+ KmpTaskRD->completeDefinition();
+ auto KmpTaskQTy = C.getRecordType(KmpTaskRD);
+ auto KmpTaskTySize = CGM.getSize(C.getTypeSizeInChars(KmpTaskQTy));
+ QualType SharedsPtrTy = C.getPointerType(SharedsTy);
+ // Build proxy function which accepts kmp_task_t as the second argument.
+ // kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
+ // TaskFunction(gtid, tt->part_id, tt->shareds);
+ // return 0;
+ // }
+ auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true);
+ QualType KmpTaskPtrQTy = C.getPointerType(KmpTaskQTy);
+ FunctionArgList Args;
+ ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, SourceLocation(),
+ /*Id=*/nullptr, KmpInt32Ty);
+ ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, SourceLocation(),
+ /*Id=*/nullptr, KmpTaskPtrQTy);
+ Args.push_back(&GtidArg);
+ Args.push_back(&TaskTypeArg);
+ FunctionType::ExtInfo Info;
+ auto &TaskEntryFnInfo =
+ CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info,
+ /*isVariadic=*/false);
+ auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
+ auto *TaskEntry = llvm::Function::Create(
+ TaskEntryTy, /*Linkage=*/llvm::GlobalValue::InternalLinkage,
+ ".omp_task_entry.", &CGM.getModule());
+ CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, TaskEntryFnInfo, TaskEntry);
+ CodeGenFunction TaskEntryCGF(CGM);
+ TaskEntryCGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry,
+ TaskEntryFnInfo, Args);
+ // TaskFunction(gtid, tt->part_id, tt->shareds);
+ auto *GtidParam = TaskEntryCGF.EmitLoadOfScalar(
+ TaskEntryCGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false,
+ C.getTypeAlignInChars(KmpInt32Ty).getQuantity(), KmpInt32Ty,
+ SourceLocation());
+ auto TaskTypeArgAddr = TaskEntryCGF.EmitLoadOfScalar(
+ TaskEntryCGF.GetAddrOfLocalVar(&TaskTypeArg), /*Volatile=*/false,
+ CGM.PointerAlignInBytes, KmpTaskPtrQTy, SourceLocation());
+ auto *PartidPtr =
+ TaskEntryCGF.Builder.CreateStructGEP(TaskTypeArgAddr,
+ /*Idx=*/KmpTaskTPartId);
+ auto *PartidParam = TaskEntryCGF.EmitLoadOfScalar(
+ PartidPtr, /*Volatile=*/false,
+ C.getTypeAlignInChars(KmpInt32Ty).getQuantity(), KmpInt32Ty,
+ SourceLocation());
+ auto *SharedsPtr =
+ TaskEntryCGF.Builder.CreateStructGEP(TaskTypeArgAddr,
+ /*Idx=*/KmpTaskTShareds);
+ auto *SharedsParam = TaskEntryCGF.EmitLoadOfScalar(
+ SharedsPtr, /*Volatile=*/false, CGM.PointerAlignInBytes, C.VoidPtrTy,
+ SourceLocation());
+ llvm::Value *CallArgs[] = {
+ GtidParam, PartidParam,
+ TaskEntryCGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ SharedsParam, TaskEntryCGF.ConvertTypeForMem(SharedsPtrTy))};
+ TaskEntryCGF.EmitCallOrInvoke(TaskFunction, CallArgs);
+ TaskEntryCGF.EmitStoreThroughLValue(
+ RValue::get(TaskEntryCGF.Builder.getInt32(/*C=*/0)),
+ TaskEntryCGF.MakeNaturalAlignAddrLValue(TaskEntryCGF.ReturnValue,
+ KmpInt32Ty));
+ TaskEntryCGF.FinishFunction(SourceLocation());
+ // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
+ // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
+ // kmp_routine_entry_t *task_entry);
+ // Task flags. Format is taken from
+ // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h,
+ // description of kmp_tasking_flags struct.
+ const unsigned TiedFlag = 0x1;
+ const unsigned FinalFlag = 0x2;
+ unsigned Flags = Tied ? TiedFlag : 0;
+ auto *TaskFlags =
+ Final.getPointer()
+ ? CGF.Builder.CreateSelect(Final.getPointer(),
+ CGF.Builder.getInt32(FinalFlag),
+ CGF.Builder.getInt32(/*C=*/0))
+ : CGF.Builder.getInt32(Final.getInt() ? FinalFlag : 0);
+ TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
+ auto SharedsSize = C.getTypeSizeInChars(SharedsTy);
+ llvm::Value *AllocArgs[] = {EmitOpenMPUpdateLocation(CGF, Loc),
+ GetOpenMPThreadID(CGF, Loc), TaskFlags,
+ KmpTaskTySize, CGM.getSize(SharedsSize),
+ CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ TaskEntry, KmpRoutineEntryPtrTy)};
+ auto *RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_omp_task_alloc);
+ auto *NewTask = CGF.EmitRuntimeCall(RTLFn, AllocArgs);
+ auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ NewTask, KmpTaskTTy->getPointerTo());
+ // Fill the data in the resulting kmp_task_t record.
+ // Copy shareds if there are any.
+ if (!SharedsTy->getAsStructureType()->getDecl()->field_empty())
+ CGF.EmitAggregateCopy(
+ CGF.EmitLoadOfScalar(
+ CGF.Builder.CreateStructGEP(NewTaskNewTaskTTy,
+ /*Idx=*/KmpTaskTShareds),
+ /*Volatile=*/false, CGM.PointerAlignInBytes, SharedsPtrTy,
+ SourceLocation()),
+ Shareds, SharedsTy);
+ // TODO: generate function with destructors for privates.
+ // Provide pointer to function with destructors for privates.
+ auto DestructorsType =
+ std::next(KmpTaskRD->field_begin(), KmpTaskTDestructors)->getType();
+ CGF.EmitStoreOfScalar(
+ llvm::ConstantPointerNull::get(
+ cast<llvm::PointerType>(CGF.ConvertType(DestructorsType))),
+ CGF.Builder.CreateStructGEP(NewTaskNewTaskTTy,
+ /*Idx=*/KmpTaskTDestructors),
+ /*Volatile=*/false, CGM.PointerAlignInBytes, DestructorsType);
+ // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
+ // libcall.
+ // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
+ // *new_task);
+ llvm::Value *TaskArgs[] = {EmitOpenMPUpdateLocation(CGF, Loc),
+ GetOpenMPThreadID(CGF, Loc), NewTask};
+ RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_omp_task);
+ CGF.EmitRuntimeCall(RTLFn, TaskArgs);
+}
+
Index: lib/CodeGen/CGStmtOpenMP.cpp
===================================================================
--- lib/CodeGen/CGStmtOpenMP.cpp
+++ lib/CodeGen/CGStmtOpenMP.cpp
@@ -746,8 +746,34 @@
llvm_unreachable("CodeGen for 'omp parallel sections' is not supported yet.");
}
-void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &) {
- llvm_unreachable("CodeGen for 'omp task' is not supported yet.");
+void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
+ // Emit outlined function for task construct.
+ auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
+ auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
+ auto *I = CS->getCapturedDecl()->param_begin();
+ // The first function argument for tasks is a thread id, the second one is a
+ // part id (0 for tied tasks, >=0 for untied task).
+ auto OutlinedFn =
+ CGM.getOpenMPRuntime().EmitOpenMPOutlinedFunction(S, *I, *std::next(I));
+ // Check if we should emit tied or untied task.
+ bool Tied = !S.getSingleClause(/*K=*/OMPC_untied);
+ // Check if the task is final
+ llvm::PointerIntPair<llvm::Value *, 1, bool> Final;
+ if (auto *Clause = S.getSingleClause(/*K=*/OMPC_final)) {
+ // If the condition constant folds and can be elided, try to avoid emitting
+ // the condition and the dead arm of the if/else.
+ auto *Cond = cast<OMPFinalClause>(Clause)->getCondition();
+ bool CondConstant;
+ if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
+ Final.setInt(CondConstant);
+ else
+ Final.setPointer(EvaluateExprAsBool(Cond));
+ } else
+ // By default the task is not final.
+ Final.setInt(/*IntVal=*/false);
+ auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
+ CGM.getOpenMPRuntime().EmitOMPTaskCall(*this, S.getLocStart(), Tied, Final,
+ OutlinedFn, SharedsTy, CapturedStruct);
}
void CodeGenFunction::EmitOMPTaskyieldDirective(const OMPTaskyieldDirective &) {
Index: lib/CodeGen/CGOpenMPRuntime.h
===================================================================
--- lib/CodeGen/CGOpenMPRuntime.h
+++ lib/CodeGen/CGOpenMPRuntime.h
@@ -14,6 +14,7 @@
#ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIME_H
#define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIME_H
+#include "clang/AST/Type.h"
#include "clang/Basic/OpenMPKinds.h"
#include "clang/Basic/SourceLocation.h"
#include "llvm/ADT/DenseMap.h"
@@ -35,16 +36,15 @@
namespace clang {
class Expr;
class OMPExecutableDirective;
+class RecordDecl;
class VarDecl;
namespace CodeGen {
class CodeGenFunction;
class CodeGenModule;
class CGOpenMPRuntime {
-public:
-
private:
enum OpenMPRTLFunction {
/// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
@@ -88,6 +88,13 @@
OMPRTL__kmpc_master,
// Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
OMPRTL__kmpc_end_master,
+ // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
+ // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
+ // kmp_routine_entry_t *task_entry);
+ OMPRTL__kmpc_omp_task_alloc,
+ // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
+ // new_task);
+ OMPRTL__kmpc_omp_task,
};
/// \brief Values for bit flags used in the ident_t to describe the fields.
@@ -186,6 +193,25 @@
/// variables.
llvm::StringMap<llvm::AssertingVH<llvm::Constant>, llvm::BumpPtrAllocator>
InternalVars;
+ /// \brief Type typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *);
+ llvm::Type *KmpRoutineEntryPtrTy;
+ /// \brief This type is used by the tasking constructs. It stores a list of
+ /// shared variables (field shareds), pointer to the outlined task function
+ /// (field routine), current partition id for untied tasks (field part_id),
+ /// pointer to the function with destructors for the private variables(field
+ /// destructors) and a list of private variables itself (if any).
+ /// \code
+ /// struct kmp_task_t {
+ /// void * shareds;
+ /// kmp_routine_entry_t routine;
+ /// kmp_int32 part_id;
+ /// kmp_routine_entry_t destructors;
+ /// /* private vars */
+ /// };
+ /// \endcode
+ llvm::Type *KmpTaskTTy;
+ /// \brief Original RecordDecl for kmp_task_t type.
+ const RecordDecl *KmpTaskTRD;
/// \brief Emits object of ident_t type with info for source location.
/// \param Flags Flags for OpenMP location.
@@ -257,13 +283,16 @@
/// \brief Emits outlined function for the specified OpenMP directive \a D
/// (required for parallel and task directives). This outlined function has
/// type void(*)(kmp_int32 /*ThreadID*/, kmp_int32 /*BoundID*/, struct
- /// context_vars*).
+ /// context_vars*) (if PartIdVar == nullptr) or void(*)(kmp_int32
+ /// /*ThreadID*/, kmp_int32 /*PartID*/, struct context_vars*) (if PartIdVar !=
+ /// nullptr).
/// \param D OpenMP directive.
/// \param ThreadIDVar Variable for thread id in the current OpenMP region.
- ///
+ /// \param PartID If not nullptr - variable used for part id in tasks.
virtual llvm::Value *
EmitOpenMPOutlinedFunction(const OMPExecutableDirective &D,
- const VarDecl *ThreadIDVar);
+ const VarDecl *ThreadIDVar,
+ const VarDecl *PartIDVar = nullptr);
/// \brief Cleans up references to the objects in finished function.
///
@@ -401,6 +430,40 @@
/// \param Vars List of variables to flush.
virtual void EmitOMPFlush(CodeGenFunction &CGF, ArrayRef<const Expr *> Vars,
SourceLocation Loc);
+
+ /// \brief Emit task region for the task directive. The task region is
+ /// emmitted in two steps:
+ /// 1. Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32
+ /// gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
+ /// kmp_routine_entry_t *task_entry). Here task_entry is a pointer to the
+ /// function:
+ /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
+ /// TaskFunction(gtid, tt->part_id, tt->shareds);
+ /// return 0;
+ /// }
+ /// 2. Copy a list of shared variables to field shareds of the resulting
+ /// structure kmp_task_t returned by the previous call (if any).
+ /// 3. Copy a pointer to destructions function to field destructions of the
+ /// resulting structure kmp_task_t.
+ /// 4. Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid,
+ /// kmp_task_t *new_task), where new_task is a resulting structure from
+ /// previous items.
+ /// \param Tied true if the task is tied (the task is tied to the thread that
+ /// can suspend its task region), false - untied (the task is not tied to any
+ /// thread).
+ /// \param Final Contains either constant bool value, or llvm::Value * of i1
+ /// type for final clause. If the value is true, the task forces all of its
+ /// child tasks to become final and included tasks.
+ /// \param TaskFunction An LLVM function with type void (*)(i32 /*gtid*/, i32
+ /// /*part_id*/, captured_struct */*__context*/);
+ /// \param SharedsTy A type which contains references the shared variables.
+ /// \param Shareds Context with the list of shared variables from the \a
+ /// TaskFunction.
+ virtual void
+ EmitOMPTaskCall(CodeGenFunction &CGF, SourceLocation Loc, bool Tied,
+ llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
+ llvm::Value *TaskFunction, QualType SharedsTy,
+ llvm::Value *Shareds);
};
} // namespace CodeGen
} // namespace clang
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits