Hi doug.gregor, hfinkel, rjmccall, rsmith, fraggamuffin, ejstotzer,

This patch generates call to "__kmpc_push_num_threads(ident_t *loc, kmp_int32 
global_tid, kmp_int32 num_threads);" library function before calling 
"__kmpc_fork_call" each time there is an associated "num_threads" clause in the 
"omp parallel" directive.

http://reviews.llvm.org/D5145

Files:
  include/clang/AST/StmtOpenMP.h
  lib/AST/Stmt.cpp
  lib/CodeGen/CGOpenMPRuntime.cpp
  lib/CodeGen/CGOpenMPRuntime.h
  lib/CodeGen/CGStmtOpenMP.cpp
  lib/Sema/SemaOpenMP.cpp
  test/OpenMP/parallel_num_threads_codegen.cpp
Index: test/OpenMP/parallel_num_threads_codegen.cpp
===================================================================
--- test/OpenMP/parallel_num_threads_codegen.cpp
+++ test/OpenMP/parallel_num_threads_codegen.cpp
@@ -0,0 +1,84 @@
+// RUN: %clang_cc1 -verify -fopenmp=libiomp5 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+typedef __INTPTR_TYPE__ intptr_t;
+
+// CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* }
+// CHECK-DAG: [[S_TY:%.+]] = type { [[INTPTR_T_TY:i[0-9]+]], [[INTPTR_T_TY]], [[INTPTR_T_TY]] }
+// CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
+// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8]* [[STR]], i32 0, i32 0) }
+
+void foo();
+
+struct S {
+  intptr_t a, b, c;
+  S(intptr_t a) : a(a) {}
+  operator char() { return a; }
+  ~S() {}
+};
+
+template <typename T, int C>
+int tmain() {
+#pragma omp parallel num_threads(C)
+  foo();
+#pragma omp parallel num_threads(T(23))
+  foo();
+  return 0;
+}
+
+int main() {
+  S s(0);
+  char a = s;
+#pragma omp parallel num_threads(2)
+  foo();
+#pragma omp parallel num_threads(a)
+  foo();
+  return a + tmain<char, 5>() + tmain<S, 1>();
+}
+
+// CHECK:       define [[INT_TY:i[0-9]+]] @main(
+// CHECK-DAG:   [[S_ADDR:%.+]] = alloca [[S_TY]]
+// CHECK-DAG:   [[A_ADDR:%.+]] = alloca i8
+// CHECK-DAG:   [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEF_LOC_2]])
+// CHECK-DAG:   call void [[S_TY_CONSTR:@.+]]([[S_TY]]* [[S_ADDR]], [[INTPTR_T_TY]] 0)
+// CHECK:       [[S_CHAR_OP:%.+]] = invoke{{.*}} i8 [[S_TY_CHAR_OP:@.+]]([[S_TY]]* [[S_ADDR]])
+// CHECK:       store i8 [[S_CHAR_OP]], i8* [[A_ADDR]]
+// CHECK:       call void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID]], i32 2)
+// CHECK:       call void {{.*}}* @__kmpc_fork_call(
+// CHECK:       [[A_VAL:%.+]] = load i8* [[A_ADDR]]
+// CHECK:       [[RES:%.+]] = sext i8 [[A_VAL]] to i32
+// CHECK:       call void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID]], i32 [[RES]])
+// CHECK:       call void {{.*}}* @__kmpc_fork_call(
+// CHECK:       invoke [[INT_TY]] [[TMAIN_CHAR_5:@.+]]()
+// CHECK:       invoke [[INT_TY]] [[TMAIN_S_1:@.+]]()
+// CHECK:       call void [[S_TY_DESTR:@.+]]([[S_TY]]* [[S_ADDR]])
+// CHECK:       ret [[INT_TY]]
+// CHECK:       }
+
+// CHECK:       define{{.*}} [[INT_TY]] [[TMAIN_CHAR_5]]()
+// CHECK:       [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEF_LOC_2]])
+// CHECK:       call void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID]], i32 5)
+// CHECK:       call void {{.*}}* @__kmpc_fork_call(
+// CHECK:       call void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID]], i32 23)
+// CHECK:       call void {{.*}}* @__kmpc_fork_call(
+// CHECK:       ret [[INT_TY]] 0
+// CHECK-NEXT:  }
+
+// CHECK:       define{{.*}} [[INT_TY]] [[TMAIN_S_1]]()
+// CHECK:       [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEF_LOC_2]])
+// CHECK:       call void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID]], i32 1)
+// CHECK:       call void {{.*}}* @__kmpc_fork_call(
+// CHECK:       call void [[S_TY_CONSTR]]([[S_TY]]* [[S_TEMP:%.+]], [[INTPTR_T_TY]] 23)
+// CHECK:       [[S_CHAR_OP:%.+]] = invoke{{.*}} i8 [[S_TY_CHAR_OP]]([[S_TY]]* [[S_TEMP]])
+// CHECK:       [[RES:%.+]] = sext i8 [[S_CHAR_OP]] to i32
+// CHECK:       call void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID]], i32 [[RES]])
+// CHECK:       call void [[S_TY_DESTR]]([[S_TY]]* [[S_TEMP]])
+// CHECK:       call void {{.*}}* @__kmpc_fork_call(
+// CHECK:       ret [[INT_TY]] 0
+// CHECK:       }
+
+#endif
Index: include/clang/AST/StmtOpenMP.h
===================================================================
--- include/clang/AST/StmtOpenMP.h
+++ include/clang/AST/StmtOpenMP.h
@@ -128,6 +128,10 @@
     operator bool() { return Current != End; }
   };
 
+  /// \brief Gets single clause of the specified kind \a K associated with the
+  /// current directive iff there is only one clause of this kind.
+  const OMPClause *getSingleClause(OpenMPClauseKind K) const;
+
   /// \brief Returns starting location of directive kind.
   SourceLocation getLocStart() const { return StartLoc; }
   /// \brief Returns ending location of directive.
Index: lib/Sema/SemaOpenMP.cpp
===================================================================
--- lib/Sema/SemaOpenMP.cpp
+++ lib/Sema/SemaOpenMP.cpp
@@ -2609,6 +2609,11 @@
           << "num_threads" << NumThreads->getSourceRange();
       return nullptr;
     }
+    // Convert to int32 for runtime call.
+    ValExpr = PerformImplicitConversion(
+        ValExpr,
+        Context.getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true),
+        /*Action*/ AA_Casting).get();
   }
 
   return new (Context)
Index: lib/AST/Stmt.cpp
===================================================================
--- lib/AST/Stmt.cpp
+++ lib/AST/Stmt.cpp
@@ -1384,12 +1384,24 @@
   return new (Mem) OMPFlushClause(N);
 }
 
+const OMPClause *
+OMPExecutableDirective::getSingleClause(OpenMPClauseKind K) const {
+  auto ClauseFilter =
+      [=](const OMPClause *C) -> bool { return C->getClauseKind() == K; };
+  OMPExecutableDirective::filtered_clause_iterator<decltype(ClauseFilter)> I(
+      clauses(), ClauseFilter);
+
+  if (I) {
+    auto PrevI = I;
+    assert(!++I && "There is at least 2 clauses of the  specified kind");
+    return *PrevI;
+  }
+  return nullptr;
+}
+
 OMPParallelDirective *OMPParallelDirective::Create(
-                                              const ASTContext &C,
-                                              SourceLocation StartLoc,
-                                              SourceLocation EndLoc,
-                                              ArrayRef<OMPClause *> Clauses,
-                                              Stmt *AssociatedStmt) {
+    const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+    ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt) {
   unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPParallelDirective),
                                            llvm::alignOf<OMPClause *>());
   void *Mem = C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() +
Index: lib/CodeGen/CGOpenMPRuntime.cpp
===================================================================
--- lib/CodeGen/CGOpenMPRuntime.cpp
+++ lib/CodeGen/CGOpenMPRuntime.cpp
@@ -196,6 +196,16 @@
     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
     break;
   }
+  case OMPRTL__kmpc_push_num_threads: {
+    // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
+    // kmp_int32 num_threads)
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
+                                CGM.Int32Ty};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
+    break;
+  }
   }
   return RTLFn;
 }
Index: lib/CodeGen/CGStmtOpenMP.cpp
===================================================================
--- lib/CodeGen/CGStmtOpenMP.cpp
+++ lib/CodeGen/CGStmtOpenMP.cpp
@@ -23,6 +23,22 @@
 //                              OpenMP Directive Emission
 //===----------------------------------------------------------------------===//
 
+static void EmitOMPNumThreadsClause(CodeGenFunction &CGF,
+                                    const OMPNumThreadsClause *C) {
+  CodeGenFunction::RunCleanupsScope NumThreads(CGF);
+
+  // __kmpc_push_num_threads(&loc, global_tid, num_threads);
+  llvm::Value *Args[] = {
+      CGF.CGM.getOpenMPRuntime()
+          .EmitOpenMPUpdateLocation(CGF, C->getLocStart()),
+      CGF.CGM.getOpenMPRuntime()
+          .GetOpenMPGlobalThreadNum(CGF, C->getLocStart()),
+      CGF.EmitScalarExpr(C->getNumThreads(), /*IgnoreResultAssign*/ true)};
+  llvm::Constant *RTLFn = CGF.CGM.getOpenMPRuntime().CreateRuntimeFunction(
+      CGOpenMPRuntime::OMPRTL__kmpc_push_num_threads);
+  CGF.EmitRuntimeCall(RTLFn, Args);
+}
+
 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
   const CapturedStmt *CS = cast<CapturedStmt>(S.getAssociatedStmt());
   llvm::Value *CapturedStruct = GenerateCapturedStmtArgument(*CS);
@@ -34,6 +50,8 @@
     CGF.CapturedStmtInfo = &CGInfo;
     OutlinedFn = CGF.GenerateCapturedStmtFunction(*CS);
   }
+  if (auto C = S.getSingleClause(/*K*/ OMPC_num_threads))
+    EmitOMPNumThreadsClause(*this, cast<OMPNumThreadsClause>(C));
 
   // Build call __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/)
   llvm::Value *Args[] = {
Index: lib/CodeGen/CGOpenMPRuntime.h
===================================================================
--- lib/CodeGen/CGOpenMPRuntime.h
+++ lib/CodeGen/CGOpenMPRuntime.h
@@ -67,8 +67,11 @@
     // Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
     // microtask, ...);
     OMPRTL__kmpc_fork_call,
-    // Call to kmp_int32 kmpc_global_thread_num(ident_t *loc);
-    OMPRTL__kmpc_global_thread_num
+    // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
+    OMPRTL__kmpc_global_thread_num,
+    // Call to __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
+    // kmp_int32 num_threads);
+    OMPRTL__kmpc_push_num_threads
   };
 
 private:
_______________________________________________
cfe-commits mailing list
cfe-commits@cs.uiuc.edu
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits

Reply via email to