Hi rjmccall, fraggamuffin, ejstotzer,

The task region is emmitted in several steps:
1. Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 
kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,  
kmp_routine_entry_t *task_entry). Here task_entry is a pointer to the function:

```
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
  TaskFunction(gtid, tt->part_id, tt->shareds);
  return 0;
}
```
2. Copy a list of shared variables to field shareds of the resulting structure 
kmp_task_t returned by the previous call (if any).
3. Copy a pointer to destructions function to field destructions of the 
resulting structure kmp_task_t.
4. Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, 
kmp_task_t *new_task), where new_task is a resulting structure from previous 
items.

http://reviews.llvm.org/D7560

Files:
  lib/CodeGen/CGOpenMPRuntime.cpp
  lib/CodeGen/CGOpenMPRuntime.h
  lib/CodeGen/CGStmtOpenMP.cpp
  lib/Sema/SemaOpenMP.cpp
  test/OpenMP/task_codegen.cpp

EMAIL PREFERENCES
  http://reviews.llvm.org/settings/panel/emailpreferences/
Index: test/OpenMP/task_codegen.cpp
===================================================================
--- test/OpenMP/task_codegen.cpp
+++ test/OpenMP/task_codegen.cpp
@@ -0,0 +1,102 @@
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp=libiomp5 -x c++ -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+// CHECK-DAG: [[IDENT_T:%.+]] = type { i32, i32, i32, i32, i8* }
+// CHECK-DAG: [[STRUCT_SHAREDS:%.+]] = type { i8*, [[STRUCT_S:%.+]]* }
+// CHECK-DAG: [[KMP_TASK_T:%.+]] = type { i8*, i32 (i32, i8*)*, i32, i32 (i32, i8*)* }
+struct S {
+  int a;
+  S() : a(0) {}
+  S(const S &s) : a(s.a) {}
+  ~S() {}
+};
+int a;
+// CHECK-LABEL : @main
+int main() {
+// CHECK: [[B:%.+]] = alloca i8
+// CHECK: [[S:%.+]] = alloca [[STRUCT_S]]
+  char b;
+  S s;
+// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T]]* @{{.+}})
+// CHECK: [[B_REF:%.+]] = getelementptr inbounds [[STRUCT_SHAREDS]]* [[CAPTURES:%.+]], i32 0, i32 0
+// CHECK: store i8* [[B]], i8** [[B_REF]]
+// CHECK: [[S_REF:%.+]] = getelementptr inbounds [[STRUCT_SHAREDS]]* [[CAPTURES]], i32 0, i32 1
+// CHECK: store [[STRUCT_S]]* [[S]], [[STRUCT_S]]** [[S_REF]]
+// CHECK: [[TASK_PTR:%.+]] = call [[KMP_TASK_T]]* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 1, i64 32, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.+}}*)* [[TASK_ENTRY1:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[SHAREDS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]]* [[TASK_PTR:%.+]], i32 0, i32 0
+// CHECK: [[SHAREDS_REF:%.+]] = load i8** [[SHAREDS_REF_PTR]]
+// CHECK: [[BITCAST:%.+]] = bitcast [[STRUCT_SHAREDS]]* [[CAPTURES]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[SHAREDS_REF]], i8* [[BITCAST]], i64 16, i32 8, i1 false)
+// CHECK: [[DESTRUCTORS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]]* [[TASK_PTR]], i32 0, i32 3
+// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** [[DESTRUCTORS_REF_PTR]]
+// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], [[KMP_TASK_T]]* [[TASK_PTR]])
+#pragma omp task shared(a, b, s)
+  {
+    a = 15;
+    b = a;
+    s.a = 10;
+  }
+// CHECK: [[TASK_PTR:%.+]] = call [[KMP_TASK_T]]* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 0, i64 32, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.+}}*)* [[TASK_ENTRY2:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[DESTRUCTORS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]]* [[TASK_PTR]], i32 0, i32 3
+// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** [[DESTRUCTORS_REF_PTR]]
+// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], [[KMP_TASK_T]]* [[TASK_PTR]])
+#pragma omp task untied
+  {
+    a = 1;
+  }
+// CHECK: [[TASK_PTR:%.+]] = call [[KMP_TASK_T]]* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 3, i64 32, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.+}}*)* [[TASK_ENTRY3:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[DESTRUCTORS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]]* [[TASK_PTR]], i32 0, i32 3
+// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** [[DESTRUCTORS_REF_PTR]]
+// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], [[KMP_TASK_T]]* [[TASK_PTR]])
+#pragma omp task final(true)
+  {
+    a = 2;
+  }
+// CHECK: [[TASK_PTR:%.+]] = call [[KMP_TASK_T]]* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 1, i64 32, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.+}}*)* [[TASK_ENTRY4:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[DESTRUCTORS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]]* [[TASK_PTR]], i32 0, i32 3
+// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** [[DESTRUCTORS_REF_PTR]]
+// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], [[KMP_TASK_T]]* [[TASK_PTR]])
+  const bool flag = false;
+#pragma omp task final(flag)
+  {
+    a = 3;
+  }
+// CHECK: [[B_VAL:%.+]] = load i8* [[B]]
+// CHECK: [[CMP:%.+]] = icmp ne i8 [[B_VAL]], 0
+// CHECK: [[FINAL:%.+]] = select i1 [[CMP]], i32 2, i32 0
+// CHECK: [[FLAGS:%.+]] = or i32 [[FINAL]], 1
+// CHECK: [[TASK_PTR:%.+]] = call [[KMP_TASK_T]]* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 [[FLAGS]], i64 32, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.+}}*)* [[TASK_ENTRY5:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[DESTRUCTORS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]]* [[TASK_PTR]], i32 0, i32 3
+// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** [[DESTRUCTORS_REF_PTR]]
+// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], [[KMP_TASK_T]]* [[TASK_PTR]])
+#pragma omp task final(b)
+  {
+    a = 4;
+  }
+  return a;
+}
+// CHECK: define internal i32 [[TASK_ENTRY1]](i32, [[KMP_TASK_T]]{{.+}}*)
+// CHECK: store i32 15, i32* [[A_PTR:@.+]]
+// CHECK: [[A_VAL:%.+]] = load i32* [[A_PTR]]
+// CHECK: [[A_VAL_I8:%.+]] = trunc i32 [[A_VAL]] to i8
+// CHECK: store i8 [[A_VAL_I8]], i8* %{{.+}}
+// CHECK: store i32 10, i32* %{{.+}}
+
+// CHECK: define internal i32 [[TASK_ENTRY2]](i32, [[KMP_TASK_T]]{{.+}}*)
+// CHECK: store i32 1, i32* [[A_PTR:@.+]]
+
+// CHECK: define internal i32 [[TASK_ENTRY3]](i32, [[KMP_TASK_T]]{{.+}}*)
+// CHECK: store i32 2, i32* [[A_PTR:@.+]]
+
+// CHECK: define internal i32 [[TASK_ENTRY4]](i32, [[KMP_TASK_T]]{{.+}}*)
+// CHECK: store i32 3, i32* [[A_PTR:@.+]]
+
+// CHECK: define internal i32 [[TASK_ENTRY5]](i32, [[KMP_TASK_T]]{{.+}}*)
+// CHECK: store i32 4, i32* [[A_PTR:@.+]]
+#endif
+
Index: lib/Sema/SemaOpenMP.cpp
===================================================================
--- lib/Sema/SemaOpenMP.cpp
+++ lib/Sema/SemaOpenMP.cpp
@@ -308,8 +308,7 @@
     //  bound to the current team is shared.
     if (DVar.DKind == OMPD_task) {
       DSAVarData DVarTemp;
-      for (StackTy::reverse_iterator I = std::next(Iter),
-                                     EE = std::prev(Stack.rend());
+      for (StackTy::reverse_iterator I = std::next(Iter), EE = Stack.rend();
            I != EE; ++I) {
         // OpenMP [2.9.1.1, Data-sharing Attribute Rules for Variables
         // Referenced
@@ -1122,11 +1121,19 @@
     break;
   }
   case OMPD_task: {
+    QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1);
     Sema::CapturedParamNameType Params[] = {
+        std::make_pair(".global_tid.", KmpInt32Ty),
+        std::make_pair(".part_id.", KmpInt32Ty),
         std::make_pair(StringRef(), QualType()) // __context with shared vars
     };
     ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
                              Params);
+    // Mark this captured region as inlined, because we don't use outlined
+    // function directly.
+    getCurCapturedRegion()->TheCapturedDecl->addAttr(
+        AlwaysInlineAttr::CreateImplicit(
+            Context, AlwaysInlineAttr::Keyword_forceinline, SourceRange()));
     break;
   }
   case OMPD_ordered: {
Index: lib/CodeGen/CGOpenMPRuntime.cpp
===================================================================
--- lib/CodeGen/CGOpenMPRuntime.cpp
+++ lib/CodeGen/CGOpenMPRuntime.cpp
@@ -31,9 +31,9 @@
 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
 public:
   CGOpenMPRegionInfo(const OMPExecutableDirective &D, const CapturedStmt &CS,
-                     const VarDecl *ThreadIDVar)
+                     const VarDecl *ThreadIDVar, const VarDecl *PartIDVar)
       : CGCapturedStmtInfo(CS, CR_OpenMP), ThreadIDVar(ThreadIDVar),
-        Directive(D) {
+        PartIDVar(PartIDVar), Directive(D) {
     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
   }
 
@@ -58,6 +58,9 @@
   /// \brief A variable or parameter storing global thread id for OpenMP
   /// constructs.
   const VarDecl *ThreadIDVar;
+  /// \brief A variable or parameter storing part id for OpenMP tasking
+  /// constructs.
+  const VarDecl *PartIDVar;
   /// \brief OpenMP executable directive associated with the region.
   const OMPExecutableDirective &Directive;
 };
@@ -73,13 +76,26 @@
   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
   CGF.EmitOMPPrivateClause(Directive, PrivateScope);
   CGF.EmitOMPFirstprivateClause(Directive, PrivateScope);
+  // TODO: add support for privates in tasks.
+  assert((!PartIDVar || !PrivateScope.Privatize()) &&
+         "Private clauses for tasks are not supported yet.");
   if (PrivateScope.Privatize())
     // Emit implicit barrier to synchronize threads and avoid data races.
     CGF.CGM.getOpenMPRuntime().EmitOMPBarrierCall(CGF, Directive.getLocStart(),
                                                   /*IsExplicit=*/false);
   CGCapturedStmtInfo::EmitBody(CGF, S);
 }
 
+static void addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
+                                 QualType FieldTy) {
+  auto *Field = FieldDecl::Create(
+      C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
+      C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
+      /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
+  Field->setAccess(AS_public);
+  DC->addDecl(Field);
+}
+
 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
     : CGM(CGM), DefaultOpenMPPSource(nullptr) {
   IdentTy = llvm::StructType::create(
@@ -91,14 +107,39 @@
                                llvm::PointerType::getUnqual(CGM.Int32Ty)};
   Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
+  // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
+  auto &C = CGM.getContext();
+  auto Int32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
+  QualType KmpRoutineEntryTyArgs[] = {Int32Ty, C.VoidPtrTy};
+  FunctionProtoType::ExtProtoInfo EPI;
+  auto KmpRoutineEntryTy =
+      C.getFunctionType(Int32Ty, KmpRoutineEntryTyArgs, EPI);
+  auto KmpRoutineEntryPointerQTy = C.getPointerType(KmpRoutineEntryTy);
+  KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPointerQTy);
+  // Build struct kmp_task_t.
+  auto *RD = C.buildImplicitRecord("kmp_task_t");
+  RD->startDefinition();
+  // Build void *shareds;
+  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
+  // Build kmp_routine_entry_t routine;
+  addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
+  // Build kmp_int32 part_id;
+  addFieldToRecordDecl(C, RD, Int32Ty);
+  // Build kmp_routine_entry_t destructors;
+  addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
+  RD->completeDefinition();
+  KmpTaskTRD = RD;
+  auto KmpTaskQTy = C.getRecordType(KmpTaskTRD);
+  KmpTaskTTy = CGM.getTypes().ConvertType(KmpTaskQTy);
 }
 
 llvm::Value *
 CGOpenMPRuntime::EmitOpenMPOutlinedFunction(const OMPExecutableDirective &D,
-                                            const VarDecl *ThreadIDVar) {
+                                            const VarDecl *ThreadIDVar,
+                                            const VarDecl *PartIDVar) {
   const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt());
   CodeGenFunction CGF(CGM, true);
-  CGOpenMPRegionInfo CGInfo(D, *CS, ThreadIDVar);
+  CGOpenMPRegionInfo CGInfo(D, *CS, ThreadIDVar, PartIDVar);
   CGF.CapturedStmtInfo = &CGInfo;
   return CGF.GenerateCapturedStmtFunction(*CS);
 }
@@ -208,9 +249,14 @@
     auto ThreadIDVar = OMPRegionInfo->getThreadIDVariable();
     auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
     auto RVal = CGF.EmitLoadOfLValue(LVal, Loc);
-    LVal = CGF.MakeNaturalAlignAddrLValue(RVal.getScalarVal(),
-                                          ThreadIDVar->getType());
-    ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
+    if (ThreadIDVar->getType()->isPointerType()) {
+      // Thread id is passed as a pointer
+      LVal = CGF.MakeNaturalAlignAddrLValue(RVal.getScalarVal(),
+                                            ThreadIDVar->getType());
+      ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
+    } else
+      // Thread id is passed as a value (in tasks).
+      ThreadID = RVal.getScalarVal();
     // If value loaded in entry block, cache it and use it everywhere in
     // function.
     if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
@@ -475,6 +521,27 @@
     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
     break;
   }
+  case OMPRTL__kmpc_omp_task_alloc: {
+    // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
+    // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
+    // kmp_routine_entry_t *task_entry);
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
+                                CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
+    llvm::FunctionType *FnTy = llvm::FunctionType::get(
+        KmpTaskTTy->getPointerTo(), TypeParams, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
+    break;
+  }
+  case OMPRTL__kmpc_omp_task: {
+    // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
+    // *new_task);
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
+                                KmpTaskTTy->getPointerTo()};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
+    break;
+  }
   }
   return RTLFn;
 }
@@ -926,3 +993,154 @@
   auto *RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_flush);
   CGF.EmitRuntimeCall(RTLFn, Args);
 }
+
+namespace {
+/// \brief Fields for type kmp_task_t.
+enum KmpTaskTFields {
+  /// \brief List of shared variables.
+  KmpTaskTShareds,
+  /// \brief Task routine.
+  KmpTaskTRoutine,
+  /// \brief Partition id for the untied tasks.
+  KmpTaskTPartId,
+  /// \brief Function with call of destructors for private variables.
+  KmpTaskTDestructors,
+};
+} // namespace
+
+static RecordDecl *createNewKmpTaskTRecordDecl(ASTContext &C,
+                                               const RecordDecl *RD) {
+  auto *NewRD = C.buildImplicitRecord("kmp_task_t");
+  NewRD->startDefinition();
+  for (auto *FD : RD->fields()) {
+    // Add new field to record.
+    addFieldToRecordDecl(C, NewRD, FD->getType());
+  }
+  return NewRD;
+}
+
+void CGOpenMPRuntime::EmitOMPTaskCall(
+    CodeGenFunction &CGF, SourceLocation Loc, bool Tied,
+    llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
+    llvm::Value *TaskFunction, QualType SharedsTy, llvm::Value *Shareds) {
+  auto &C = CGM.getContext();
+  // Build particular struct kmp_task_t for the given task.
+  auto *KmpTaskRD = createNewKmpTaskTRecordDecl(C, KmpTaskTRD);
+  // TODO: add private fields.
+  KmpTaskRD->completeDefinition();
+  auto KmpTaskQTy = C.getRecordType(KmpTaskRD);
+  auto KmpTaskTySize = CGM.getSize(C.getTypeSizeInChars(KmpTaskQTy));
+  QualType SharedsPtrTy = C.getPointerType(SharedsTy);
+  // Build proxy function which accepts kmp_task_t as the second argument.
+  // kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
+  //   TaskFunction(gtid, tt->part_id, tt->shareds);
+  //   return 0;
+  // }
+  auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true);
+  QualType KmpTaskPtrQTy = C.getPointerType(KmpTaskQTy);
+  FunctionArgList Args;
+  ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, SourceLocation(),
+                            /*Id=*/nullptr, KmpInt32Ty);
+  ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, SourceLocation(),
+                                /*Id=*/nullptr, KmpTaskPtrQTy);
+  Args.push_back(&GtidArg);
+  Args.push_back(&TaskTypeArg);
+  FunctionType::ExtInfo Info;
+  auto &TaskEntryFnInfo =
+      CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info,
+                                                    /*isVariadic=*/false);
+  auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
+  auto *TaskEntry = llvm::Function::Create(
+      TaskEntryTy, /*Linkage=*/llvm::GlobalValue::InternalLinkage,
+      ".omp_task_entry.", &CGM.getModule());
+  CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, TaskEntryFnInfo, TaskEntry);
+  CodeGenFunction TaskEntryCGF(CGM);
+  TaskEntryCGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry,
+                             TaskEntryFnInfo, Args);
+  // TaskFunction(gtid, tt->part_id, tt->shareds);
+  auto *GtidParam = TaskEntryCGF.EmitLoadOfScalar(
+      TaskEntryCGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false,
+      C.getTypeAlignInChars(KmpInt32Ty).getQuantity(), KmpInt32Ty,
+      SourceLocation());
+  auto TaskTypeArgAddr = TaskEntryCGF.EmitLoadOfScalar(
+      TaskEntryCGF.GetAddrOfLocalVar(&TaskTypeArg), /*Volatile=*/false,
+      CGM.PointerAlignInBytes, KmpTaskPtrQTy, SourceLocation());
+  auto *PartidPtr =
+      TaskEntryCGF.Builder.CreateStructGEP(TaskTypeArgAddr,
+                                           /*Idx=*/KmpTaskTPartId);
+  auto *PartidParam = TaskEntryCGF.EmitLoadOfScalar(
+      PartidPtr, /*Volatile=*/false,
+      C.getTypeAlignInChars(KmpInt32Ty).getQuantity(), KmpInt32Ty,
+      SourceLocation());
+  auto *SharedsPtr =
+      TaskEntryCGF.Builder.CreateStructGEP(TaskTypeArgAddr,
+                                           /*Idx=*/KmpTaskTShareds);
+  auto *SharedsParam = TaskEntryCGF.EmitLoadOfScalar(
+      SharedsPtr, /*Volatile=*/false, CGM.PointerAlignInBytes, C.VoidPtrTy,
+      SourceLocation());
+  llvm::Value *CallArgs[] = {
+      GtidParam, PartidParam,
+      TaskEntryCGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+          SharedsParam, TaskEntryCGF.ConvertTypeForMem(SharedsPtrTy))};
+  TaskEntryCGF.EmitCallOrInvoke(TaskFunction, CallArgs);
+  TaskEntryCGF.EmitStoreThroughLValue(
+      RValue::get(TaskEntryCGF.Builder.getInt32(/*C=*/0)),
+      TaskEntryCGF.MakeNaturalAlignAddrLValue(TaskEntryCGF.ReturnValue,
+                                              KmpInt32Ty));
+  TaskEntryCGF.FinishFunction(SourceLocation());
+  // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
+  // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
+  // kmp_routine_entry_t *task_entry);
+  // Task flags. Format is taken from
+  // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h,
+  // description of kmp_tasking_flags struct.
+  const unsigned TiedFlag = 0x1;
+  const unsigned FinalFlag = 0x2;
+  unsigned Flags = Tied ? TiedFlag : 0;
+  auto *TaskFlags =
+      Final.getPointer()
+          ? CGF.Builder.CreateSelect(Final.getPointer(),
+                                     CGF.Builder.getInt32(FinalFlag),
+                                     CGF.Builder.getInt32(/*C=*/0))
+          : CGF.Builder.getInt32(Final.getInt() ? FinalFlag : 0);
+  TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
+  auto SharedsSize = C.getTypeSizeInChars(SharedsTy);
+  llvm::Value *AllocArgs[] = {EmitOpenMPUpdateLocation(CGF, Loc),
+                              GetOpenMPThreadID(CGF, Loc), TaskFlags,
+                              KmpTaskTySize, CGM.getSize(SharedsSize),
+                              CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+                                  TaskEntry, KmpRoutineEntryPtrTy)};
+  auto *RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_omp_task_alloc);
+  auto *NewTask = CGF.EmitRuntimeCall(RTLFn, AllocArgs);
+  auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+      NewTask, KmpTaskTTy->getPointerTo());
+  // Fill the data in the resulting kmp_task_t record.
+  // Copy shareds if there are any.
+  if (!SharedsTy->getAsStructureType()->getDecl()->field_empty())
+    CGF.EmitAggregateCopy(
+        CGF.EmitLoadOfScalar(
+            CGF.Builder.CreateStructGEP(NewTaskNewTaskTTy,
+                                        /*Idx=*/KmpTaskTShareds),
+            /*Volatile=*/false, CGM.PointerAlignInBytes, SharedsPtrTy,
+            SourceLocation()),
+        Shareds, SharedsTy);
+  // TODO: generate function with destructors for privates.
+  // Provide pointer to function with destructors for privates.
+  auto DestructorsType =
+      std::next(KmpTaskRD->field_begin(), KmpTaskTDestructors)->getType();
+  CGF.EmitStoreOfScalar(
+      llvm::ConstantPointerNull::get(
+          cast<llvm::PointerType>(CGF.ConvertType(DestructorsType))),
+      CGF.Builder.CreateStructGEP(NewTaskNewTaskTTy,
+                                  /*Idx=*/KmpTaskTDestructors),
+      /*Volatile=*/false, CGM.PointerAlignInBytes, DestructorsType);
+  // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
+  // libcall.
+  // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
+  // *new_task);
+  llvm::Value *TaskArgs[] = {EmitOpenMPUpdateLocation(CGF, Loc),
+                             GetOpenMPThreadID(CGF, Loc), NewTask};
+  RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_omp_task);
+  CGF.EmitRuntimeCall(RTLFn, TaskArgs);
+}
+
Index: lib/CodeGen/CGStmtOpenMP.cpp
===================================================================
--- lib/CodeGen/CGStmtOpenMP.cpp
+++ lib/CodeGen/CGStmtOpenMP.cpp
@@ -746,8 +746,34 @@
   llvm_unreachable("CodeGen for 'omp parallel sections' is not supported yet.");
 }
 
-void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &) {
-  llvm_unreachable("CodeGen for 'omp task' is not supported yet.");
+void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
+  // Emit outlined function for task construct.
+  auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
+  auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
+  auto *I = CS->getCapturedDecl()->param_begin();
+  // The first function argument for tasks is a thread id, the second one is a
+  // part id (0 for tied tasks, >=0 for untied task).
+  auto OutlinedFn =
+      CGM.getOpenMPRuntime().EmitOpenMPOutlinedFunction(S, *I, *std::next(I));
+  // Check if we should emit tied or untied task.
+  bool Tied = !S.getSingleClause(/*K=*/OMPC_untied);
+  // Check if the task is final
+  llvm::PointerIntPair<llvm::Value *, 1, bool> Final;
+  if (auto *Clause = S.getSingleClause(/*K=*/OMPC_final)) {
+    // If the condition constant folds and can be elided, try to avoid emitting
+    // the condition and the dead arm of the if/else.
+    auto *Cond = cast<OMPFinalClause>(Clause)->getCondition();
+    bool CondConstant;
+    if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
+      Final.setInt(CondConstant);
+    else
+      Final.setPointer(EvaluateExprAsBool(Cond));
+  } else
+    // By default the task is not final.
+    Final.setInt(/*IntVal=*/false);
+  auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
+  CGM.getOpenMPRuntime().EmitOMPTaskCall(*this, S.getLocStart(), Tied, Final,
+                                         OutlinedFn, SharedsTy, CapturedStruct);
 }
 
 void CodeGenFunction::EmitOMPTaskyieldDirective(const OMPTaskyieldDirective &) {
Index: lib/CodeGen/CGOpenMPRuntime.h
===================================================================
--- lib/CodeGen/CGOpenMPRuntime.h
+++ lib/CodeGen/CGOpenMPRuntime.h
@@ -14,6 +14,7 @@
 #ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIME_H
 #define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIME_H
 
+#include "clang/AST/Type.h"
 #include "clang/Basic/OpenMPKinds.h"
 #include "clang/Basic/SourceLocation.h"
 #include "llvm/ADT/DenseMap.h"
@@ -35,16 +36,15 @@
 namespace clang {
 class Expr;
 class OMPExecutableDirective;
+class RecordDecl;
 class VarDecl;
 
 namespace CodeGen {
 
 class CodeGenFunction;
 class CodeGenModule;
 
 class CGOpenMPRuntime {
-public:
-
 private:
   enum OpenMPRTLFunction {
     /// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
@@ -88,6 +88,13 @@
     OMPRTL__kmpc_master,
     // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
     OMPRTL__kmpc_end_master,
+    // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
+    // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
+    // kmp_routine_entry_t *task_entry);
+    OMPRTL__kmpc_omp_task_alloc,
+    // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
+    // new_task);
+    OMPRTL__kmpc_omp_task,
   };
 
   /// \brief Values for bit flags used in the ident_t to describe the fields.
@@ -186,6 +193,25 @@
   /// variables.
   llvm::StringMap<llvm::AssertingVH<llvm::Constant>, llvm::BumpPtrAllocator>
       InternalVars;
+  /// \brief Type typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *);
+  llvm::Type *KmpRoutineEntryPtrTy;
+  /// \brief This type is used by the tasking constructs. It stores a list of
+  /// shared variables (field shareds), pointer to the outlined task function
+  /// (field routine), current partition id for untied tasks (field part_id),
+  /// pointer to the function with destructors for the private variables(field
+  /// destructors) and a list of private variables itself (if any).
+  /// \code
+  /// struct kmp_task_t {
+  ///         void *              shareds;
+  ///         kmp_routine_entry_t routine;
+  ///         kmp_int32           part_id;
+  ///         kmp_routine_entry_t destructors;
+  ///         /*  private vars  */
+  ///       };
+  /// \endcode
+  llvm::Type *KmpTaskTTy;
+  /// \brief Original RecordDecl for kmp_task_t type.
+  const RecordDecl *KmpTaskTRD;
 
   /// \brief Emits object of ident_t type with info for source location.
   /// \param Flags Flags for OpenMP location.
@@ -257,13 +283,16 @@
   /// \brief Emits outlined function for the specified OpenMP directive \a D
   /// (required for parallel and task directives). This outlined function has
   /// type void(*)(kmp_int32 /*ThreadID*/, kmp_int32 /*BoundID*/, struct
-  /// context_vars*).
+  /// context_vars*) (if PartIdVar == nullptr) or void(*)(kmp_int32
+  /// /*ThreadID*/, kmp_int32 /*PartID*/, struct context_vars*) (if PartIdVar !=
+  /// nullptr).
   /// \param D OpenMP directive.
   /// \param ThreadIDVar Variable for thread id in the current OpenMP region.
-  ///
+  /// \param PartID If not nullptr - variable used for part id in tasks.
   virtual llvm::Value *
   EmitOpenMPOutlinedFunction(const OMPExecutableDirective &D,
-                             const VarDecl *ThreadIDVar);
+                             const VarDecl *ThreadIDVar,
+                             const VarDecl *PartIDVar = nullptr);
 
   /// \brief Cleans up references to the objects in finished function.
   ///
@@ -401,6 +430,40 @@
   /// \param Vars List of variables to flush.
   virtual void EmitOMPFlush(CodeGenFunction &CGF, ArrayRef<const Expr *> Vars,
                             SourceLocation Loc);
+
+  /// \brief Emit task region for the task directive. The task region is
+  /// emmitted in two steps:
+  /// 1. Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32
+  /// gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
+  /// kmp_routine_entry_t *task_entry). Here task_entry is a pointer to the
+  /// function:
+  /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
+  ///   TaskFunction(gtid, tt->part_id, tt->shareds);
+  ///   return 0;
+  /// }
+  /// 2. Copy a list of shared variables to field shareds of the resulting
+  /// structure kmp_task_t returned by the previous call (if any).
+  /// 3. Copy a pointer to destructions function to field destructions of the
+  /// resulting structure kmp_task_t.
+  /// 4. Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid,
+  /// kmp_task_t *new_task), where new_task is a resulting structure from
+  /// previous items.
+  /// \param Tied true if the task is tied (the task is tied to the thread that
+  /// can suspend its task region), false - untied (the task is not tied to any
+  /// thread).
+  /// \param Final Contains either constant bool value, or llvm::Value * of i1
+  /// type for final clause. If the value is true, the task forces all of its
+  /// child tasks to become final and included tasks.
+  /// \param TaskFunction An LLVM function with type void (*)(i32 /*gtid*/, i32
+  /// /*part_id*/, captured_struct */*__context*/);
+  /// \param SharedsTy A type which contains references the shared variables.
+  /// \param Shareds Context with the list of shared variables from the \a
+  /// TaskFunction.
+  virtual void
+  EmitOMPTaskCall(CodeGenFunction &CGF, SourceLocation Loc, bool Tied,
+                  llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
+                  llvm::Value *TaskFunction, QualType SharedsTy,
+                  llvm::Value *Shareds);
 };
 } // namespace CodeGen
 } // namespace clang
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits

Reply via email to