sfantao updated this revision to Diff 49331.
sfantao marked an inline comment as done.
sfantao added a comment.
Emit num teams and thread limit using the inlined directives machinery.
http://reviews.llvm.org/D17019
Files:
lib/CodeGen/CGOpenMPRuntime.cpp
lib/CodeGen/CGOpenMPRuntime.h
lib/CodeGen/CGStmtOpenMP.cpp
lib/CodeGen/CodeGenFunction.h
test/OpenMP/teams_codegen.cpp
Index: test/OpenMP/teams_codegen.cpp
===================================================================
--- /dev/null
+++ test/OpenMP/teams_codegen.cpp
@@ -0,0 +1,194 @@
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+// Test host codegen.
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+#ifdef CK1
+
+int Gbla;
+long long Gblb;
+
+// CK1-LABEL: teams_argument_global_local
+int teams_argument_global_local(int a){
+ int comp = 1;
+
+ int la = 23;
+ float lc = 25.0;
+
+ // CK1: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+ // CK1: call void @{{.+}}(i{{64|32}} %{{.+}})
+ #pragma omp target
+ #pragma omp teams
+ {
+ ++comp;
+ }
+
+ // CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 0)
+ // CK1-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]],
+
+ // CK1: call void @{{.+}}(i{{64|32}} %{{.+}})
+ #pragma omp target
+ #pragma omp teams num_teams(la)
+ {
+ ++comp;
+ }
+
+ // CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 [[NT:%[^,]+]])
+ // CK1-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]],
+
+ // CK1: call void @{{.+}}(i{{64|32}} %{{.+}})
+ #pragma omp target
+ #pragma omp teams thread_limit(la)
+ {
+ ++comp;
+ }
+
+ // CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]])
+
+ // CK1-DAG: [[NT]] = add nsw i32 [[NTA:%[^,]+]], [[NTB:%[^,]+]]
+ // CK1-DAG: [[NTA]] = load i32, i32* @Gbla,
+ // CK1-DAG: [[NTB]] = load i32, i32* %{{.+}},
+
+ // CK1-DAG: [[TL]] = trunc i64 [[TLA:%[^,]+]] to i32
+ // CK1-DAG: [[TLA]] = add nsw i64 [[TLB:%[^,]+]], [[TLC:%[^,]+]]
+ // CK1-DAG: [[TLC]] = fptosi float [[TLD:%[^,]+]] to i64
+ // CK1-DAG: [[TLD]] = load float, float* %{{.+}},
+ // CK1-DAG: [[TLB]] = load i64, i64* @Gblb,
+
+ // CK1: call void @{{.+}}(i{{.+}} {{.+}}, i{{.+}} {{.+}}, i{{.+}} {{.+}}, i{{.+}} {{.+}}, i{{.+}} {{.+}})
+ #pragma omp target
+ #pragma omp teams num_teams(Gbla+a) thread_limit(Gblb+(long long)lc)
+ {
+ ++comp;
+ }
+
+ return comp;
+}
+
+#endif // CK1
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+#ifdef CK2
+
+// CK2-DAG: [[SSI:%.+]] = type { i32, float }
+// CK2-DAG: [[SSL:%.+]] = type { i64, float }
+template <typename T>
+struct SS{
+ T a;
+ float b;
+};
+
+SS<int> Gbla;
+SS<long long> Gblb;
+
+// CK2-LABEL: teams_template_arg
+int teams_template_arg(void) {
+ int comp = 1;
+
+ SS<int> la;
+ SS<long long> lb;
+
+ // CK2-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]])
+
+ // CK2-DAG: [[NT]] = load i32, i32* getelementptr inbounds ([[SSI]], [[SSI]]* @Gbla, i32 0, i32 0)
+
+ // CK2-DAG: [[TL]] = trunc i64 [[TLA:%[^,]+]] to i32
+ // CK2-DAG: [[TLA]] = fptosi float [[TLB:%[^,]+]] to i64
+ // CK2-DAG: [[TLB]] = load float, float* [[TLC:%[^,]+]],
+ // CK2-DAG: [[TLC]] = getelementptr inbounds [[SSI]], [[SSI]]* %{{.+}}, i32 0, i32 1
+
+ // CK2: call void @{{.+}}({{.+}} {{.+}}, {{.+}} {{.+}}, {{.+}} {{.+}})
+ #pragma omp target
+ #pragma omp teams num_teams(Gbla.a) thread_limit((long long)la.b)
+ {
+ ++comp;
+ }
+
+ // CK2-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]])
+
+ // CK2-DAG: [[TL]] = trunc i64 [[TLD:%[^,]+]] to i32
+ // CK2-DAG: [[TLD]] = load i64, i64* getelementptr inbounds ([[SSL]], [[SSL]]* @Gblb, i32 0, i32 0),
+
+ // CK2-DAG: [[NT]] = trunc i64 [[NTA:%[^,]+]] to i32
+ // CK2-DAG: [[NTA]] = fptosi float [[NTB:%[^,]+]] to i64
+ // CK2-DAG: [[NTB]] = load float, float* [[NTC:%[^,]+]],
+ // CK2-DAG: [[NTC]] = getelementptr inbounds [[SSL]], [[SSL]]* %{{.+}}, i32 0, i32 1
+
+ // CK2: call void @{{.+}}({{.+}} {{.+}}, {{.+}} {{.+}}, {{.+}} {{.+}})
+ #pragma omp target
+ #pragma omp teams num_teams((long long)lb.b) thread_limit(Gblb.a)
+ {
+ ++comp;
+ }
+ return comp;
+}
+#endif // CK2
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64
+// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64
+// RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32
+// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32
+#ifdef CK3
+
+// CK3: [[SSI:%.+]] = type { i32, float }
+// CK3-LABEL: teams_template_struct
+
+template <typename T, int X, long long Y>
+struct SS{
+ T a;
+ float b;
+
+ int foo(void) {
+ int comp = 1;
+
+ // CK3-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 123)
+
+ // CK3-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]],
+ // CK3-DAG: [[NTA]] = getelementptr inbounds [[SSI]], [[SSI]]* [[NTB:%[^,]+]], i32 0, i32 0
+ // CK3-DAG: [[NTB]] = load [[SSI]]*, [[SSI]]** %{{.+}},
+
+ // CK3: call void @{{.+}}({{.+}} {{.+}}, {{.+}} {{.+}})
+ #pragma omp target
+ #pragma omp teams num_teams(a) thread_limit(X)
+ {
+ ++comp;
+ }
+
+ // CK3-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 456, i32 [[TL:%[^,]+]])
+
+ // CK3-DAG: [[TL]] = add nsw i32 [[TLA:%[^,]+]], 123
+ // CK3-DAG: [[TLA]] = fptosi float [[TLB:%[^,]+]] to i32
+ // CK3-DAG: [[TLB]] = load float, float* [[TLC:%[^,]+]],
+ // CK3-DAG: [[TLC]] = getelementptr inbounds [[SSI]], [[SSI]]* [[THIS:%[^,]+]], i32 0, i32 1
+
+ // CK3: call void @{{.+}}({{.+}} {{.+}}, {{.+}} {{.+}})
+ #pragma omp target
+ #pragma omp teams num_teams(Y) thread_limit((int)b+X)
+ {
+ ++comp;
+ }
+ return comp;
+ }
+};
+
+int teams_template_struct(void) {
+ SS<int, 123, 456> V;
+ return V.foo();
+
+}
+#endif // CK3
+#endif
Index: lib/CodeGen/CodeGenFunction.h
===================================================================
--- lib/CodeGen/CodeGenFunction.h
+++ lib/CodeGen/CodeGenFunction.h
@@ -2223,6 +2223,16 @@
llvm::Function *GenerateCapturedStmtFunction(const CapturedStmt &S);
Address GenerateCapturedStmtArgument(const CapturedStmt &S);
llvm::Function *GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S);
+ /// \brief Start an OpenMP inlined region by forcing all the variables
+ /// captured to be defined as local variables at the current scope. This is
+ /// required for regions that capture global variables that have to be emitted
+ /// as inlined regions.
+ void StartOpenMPInlinedCapturedRegion(
+ const CapturedStmt &S, SmallVector<const VarDecl *, 4> &ForcedLocalVars);
+ /// \brief Close an OpenMP inlined region by restoring the local variable
+ /// information that existed when the region was started.
+ void
+ CloseOpenMPInlinedCapturedRegion(ArrayRef<const VarDecl *> ForcedLocalVars);
void GenerateOpenMPCapturedVars(const CapturedStmt &S,
SmallVectorImpl<llvm::Value *> &CapturedVars);
void emitOMPSimpleStore(LValue LVal, RValue RVal, QualType RValTy,
Index: lib/CodeGen/CGStmtOpenMP.cpp
===================================================================
--- lib/CodeGen/CGStmtOpenMP.cpp
+++ lib/CodeGen/CGStmtOpenMP.cpp
@@ -239,6 +239,39 @@
return F;
}
+/// \brief Start an OpenMP inlined region by forcing all the variables captured
+/// to be defined as local variables at the current scope. This is required for
+/// regions that capture global variables that have to be emitted as inlined
+/// regions.
+void CodeGenFunction::StartOpenMPInlinedCapturedRegion(
+ const CapturedStmt &S, SmallVector<const VarDecl *, 4> &ForcedLocalVars) {
+ auto I = S.capture_init_begin();
+ auto CI = S.capture_begin();
+ auto CE = S.capture_end();
+ for (; CI != CE; ++CI, ++I) {
+ if (!CI->capturesVariable() && !CI->capturesVariableByCopy())
+ continue;
+
+ auto FoundDecl = LocalDeclMap.find(CI->getCapturedVar());
+ // If it already exist, we do not need to force it.
+ if (FoundDecl != LocalDeclMap.end())
+ continue;
+
+ setAddrOfLocalVar(CI->getCapturedVar(), EmitLValue(*I).getAddress());
+ }
+}
+
+/// \brief Close an OpenMP inlined region by restoring the local variable
+/// information that existed when the region was started.
+void CodeGenFunction::CloseOpenMPInlinedCapturedRegion(
+ ArrayRef<const VarDecl *> ForcedLocalVars) {
+ for (auto *VD : ForcedLocalVars) {
+ assert(LocalDeclMap.find(VD) != LocalDeclMap.end() &&
+ "Variable expected to exist in the local declarations map.");
+ LocalDeclMap.erase(VD);
+ }
+}
+
//===----------------------------------------------------------------------===//
// OpenMP Directive Emission
//===----------------------------------------------------------------------===//
@@ -2667,8 +2700,12 @@
CapturedVars);
}
-void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &) {
- llvm_unreachable("CodeGen for 'omp teams' is not supported yet.");
+void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
+ LexicalScope Scope(*this, S.getSourceRange());
+ const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt());
+
+ // FIXME: We should fork teams here instead of just emit the statement.
+ EmitStmt(CS.getCapturedStmt());
}
void CodeGenFunction::EmitOMPCancellationPointDirective(
Index: lib/CodeGen/CGOpenMPRuntime.h
===================================================================
--- lib/CodeGen/CGOpenMPRuntime.h
+++ lib/CodeGen/CGOpenMPRuntime.h
@@ -34,6 +34,7 @@
} // namespace llvm
namespace clang {
+class CapturedStmt;
class Expr;
class GlobalDecl;
class OMPExecutableDirective;
@@ -661,10 +662,14 @@
/// \param CodeGen Code generation sequence for the \a D directive.
/// \param HasCancel true if region has inner cancel directive, false
/// otherwise.
+ /// \param CS the captured statement associated with the region. It is only
+ /// required if the local variables information of the enclosing function may
+ /// not match with the captured variables.
virtual void emitInlinedDirective(CodeGenFunction &CGF,
OpenMPDirectiveKind InnermostKind,
const RegionCodeGenTy &CodeGen,
- bool HasCancel = false);
+ bool HasCancel = false,
+ const CapturedStmt *CS = nullptr);
/// \brief Emit a code for reduction clause. Next code should be emitted for
/// reduction:
/// \code
Index: lib/CodeGen/CGOpenMPRuntime.cpp
===================================================================
--- lib/CodeGen/CGOpenMPRuntime.cpp
+++ lib/CodeGen/CGOpenMPRuntime.cpp
@@ -255,21 +255,38 @@
/// \brief RAII for emitting code of OpenMP constructs.
class InlinedOpenMPRegionRAII {
CodeGenFunction &CGF;
+ /// \brief Saves the varaibles that were forced to be local in the current
+ /// inlined region.
+ SmallVector<const VarDecl *, 4> ForcedLocalVars;
public:
- /// \brief Constructs region for combined constructs.
+ /// \brief Constructs inlined region. Mostly used for combined constructs. If
+ /// a captured statement is provided it also ensures the captured variables
+ /// are all defined in the scope of the enclosing function. This is typical
+ /// used for regions that make local instances of global variables, e.g.
+ /// target regions.
/// \param CodeGen Code generation sequence for combined directives. Includes
/// a list of functions used for code generation of implicitly inlined
/// regions.
InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
- OpenMPDirectiveKind Kind, bool HasCancel)
+ OpenMPDirectiveKind Kind, bool HasCancel,
+ const CapturedStmt *CS)
: CGF(CGF) {
// Start emission for the construct.
CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
+
+ // Ensures that all the captures are local in the current inlined region.
+ if (CS)
+ CGF.StartOpenMPInlinedCapturedRegion(*CS, ForcedLocalVars);
}
~InlinedOpenMPRegionRAII() {
+ // Restore the local variable information if we have anything forced in this
+ // inlined region.
+ if (!ForcedLocalVars.empty())
+ CGF.CloseOpenMPInlinedCapturedRegion(ForcedLocalVars);
+
// Restore original CapturedStmtInfo only if we're done with code emission.
auto *OldCSI =
cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
@@ -481,6 +498,10 @@
// arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
// *arg_types);
OMPRTL__tgt_target,
+ // Call to int32_t __tgt_target_teams(int32_t device_id, void *host_ptr,
+ // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
+ // int32_t *arg_types, int32_t num_teams, int32_t thread_limit);
+ OMPRTL__tgt_target_teams,
// Call to void __tgt_register_lib(__tgt_bin_desc *desc);
OMPRTL__tgt_register_lib,
// Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
@@ -1153,6 +1174,24 @@
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
break;
}
+ case OMPRTL__tgt_target_teams: {
+ // Build int32_t __tgt_target_teams(int32_t device_id, void *host_ptr,
+ // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
+ // int32_t *arg_types, int32_t num_teams, int32_t thread_limit);
+ llvm::Type *TypeParams[] = {CGM.Int32Ty,
+ CGM.VoidPtrTy,
+ CGM.Int32Ty,
+ CGM.VoidPtrPtrTy,
+ CGM.VoidPtrPtrTy,
+ CGM.SizeTy->getPointerTo(),
+ CGM.Int32Ty->getPointerTo(),
+ CGM.Int32Ty,
+ CGM.Int32Ty};
+ llvm::FunctionType *FnTy =
+ llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
+ break;
+ }
case OMPRTL__tgt_register_lib: {
// Build void __tgt_register_lib(__tgt_bin_desc *desc);
QualType ParamTy =
@@ -3766,10 +3805,11 @@
void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
OpenMPDirectiveKind InnerKind,
const RegionCodeGenTy &CodeGen,
- bool HasCancel) {
+ bool HasCancel,
+ const CapturedStmt *CS) {
if (!CGF.HaveInsertPoint())
return;
- InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
+ InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel, CS);
CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
}
@@ -3972,6 +4012,114 @@
DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID);
}
+/// \brief Emit the num_teams clause of an enclosed teams directive at the
+/// target region scope. If there is no teams directive associated with the
+/// target directive, or if there is no num_teams clause associated with the
+/// enclosed teams directive, return nullptr.
+static llvm::Value *
+emitNumTeamsClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime,
+ CodeGenFunction &CGF,
+ const OMPExecutableDirective &D) {
+
+ assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
+ "teams directive expected to be "
+ "emitted only for the host!");
+
+ // FIXME: For the moment we do not support combined directives with target and
+ // teams, so we do not expect to get any num_teams clause in the provided
+ // directive. Once we support that, this assertion can be replaced by the
+ // actual emission of the clause expression.
+ assert(D.getSingleClause<OMPNumTeamsClause>() == nullptr &&
+ "Not expecting clause in directive.");
+
+ // If the current target region has a teams region enclosed, we need to get
+ // the number of teams to pass to the runtime function call. This is done
+ // by generating the expression in a inlined region. This is required because
+ // the expression is captured in the enclosing target environment when the
+ // teams directive is not combined with target.
+
+ const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
+
+ // FIXME: Accommodate other combined directives with teams when they become
+ // available.
+ if (auto *TeamsDir = dyn_cast<OMPTeamsDirective>(CS.getCapturedStmt())) {
+ if (auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) {
+ llvm::Value *NumTeams = nullptr;
+ auto &&CodeGen = [NTE, &NumTeams](CodeGenFunction &CGF) {
+ NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams());
+ NumTeams =
+ CGF.Builder.CreateIntCast(NumTeams, CGF.Int32Ty, /*IsSigned=*/true);
+ };
+
+ OMPRuntime.emitInlinedDirective(CGF, OMPD_target, CodeGen,
+ /*HasCancel=*/false, &CS);
+ assert(NumTeams && "Invalid number of teams value.");
+ return NumTeams;
+ }
+
+ // If we have an enclosed teams directive but no num_teams clause we use
+ // the default value 0.
+ return CGF.Builder.getInt32(0);
+ }
+
+ // No teams associated with the directive.
+ return nullptr;
+}
+
+/// \brief Emit the thread_limit clause of an enclosed teams directive at the
+/// target region scope. If there is no teams directive associated with the
+/// target directive, or if there is no thread_limit clause associated with the
+/// enclosed teams directive, return nullptr.
+static llvm::Value *
+emitThreadLimitClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime,
+ CodeGenFunction &CGF,
+ const OMPExecutableDirective &D) {
+
+ assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
+ "teams directive expected to be "
+ "emitted only for the host!");
+
+ // FIXME: For the moment we do not support combined directives with target and
+ // teams, so we do not expect to get any thread_limit clause in the provided
+ // directive. Once we support that, this assertion can be replaced by the
+ // actual emission of the clause expression.
+ assert(D.getSingleClause<OMPThreadLimitClause>() == nullptr &&
+ "Not expecting clause in directive.");
+
+ // If the current target region has a teams region enclosed, we need to get
+ // the thread limit to pass to the runtime function call. This is done
+ // by generating the expression in a inlined region. This is required because
+ // the expression is captured in the enclosing target environment when the
+ // teams directive is not combined with target.
+
+ const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
+
+ // FIXME: Accommodate other combined directives with teams when they become
+ // available.
+ if (auto *TeamsDir = dyn_cast<OMPTeamsDirective>(CS.getCapturedStmt())) {
+ if (auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) {
+ llvm::Value *ThreadLimit = nullptr;
+ auto &&CodeGen = [TLE, &ThreadLimit](CodeGenFunction &CGF) {
+ ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit());
+ ThreadLimit = CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty,
+ /*IsSigned=*/true);
+ };
+
+ OMPRuntime.emitInlinedDirective(CGF, OMPD_target, CodeGen,
+ /*HasCancel=*/false, &CS);
+ assert(ThreadLimit && "Invalid number of teams value.");
+ return ThreadLimit;
+ }
+
+ // If we have an enclosed teams directive but no thread_limit clause we use
+ // the default value 0.
+ return CGF.Builder.getInt32(0);
+ }
+
+ // No teams associated with the directive.
+ return nullptr;
+}
+
void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
const OMPExecutableDirective &D,
llvm::Value *OutlinedFn,
@@ -4100,7 +4248,7 @@
// Fill up the pointer arrays and transfer execution to the device.
auto &&ThenGen = [this, &Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes,
hasVLACaptures, Device, OutlinedFnID, OffloadError,
- OffloadErrorQType](CodeGenFunction &CGF) {
+ OffloadErrorQType, &D](CodeGenFunction &CGF) {
unsigned PointerNumVal = BasePointers.size();
llvm::Value *PointerNum = CGF.Builder.getInt32(PointerNumVal);
llvm::Value *BasePointersArray;
@@ -4240,11 +4388,28 @@
else
DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
- llvm::Value *OffloadingArgs[] = {
- DeviceID, OutlinedFnID, PointerNum, BasePointersArray,
- PointersArray, SizesArray, MapTypesArray};
- auto Return = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target),
- OffloadingArgs);
+ // Return value of the runtime offloading call.
+ llvm::Value *Return;
+
+ auto *NumTeams = emitNumTeamsClauseForTargetDirective(*this, CGF, D);
+ auto *ThreadLimit = emitThreadLimitClauseForTargetDirective(*this, CGF, D);
+
+ if (NumTeams) {
+ assert(ThreadLimit && "Thread limit expression should be available along "
+ "with number of teams.");
+ llvm::Value *OffloadingArgs[] = {
+ DeviceID, OutlinedFnID, PointerNum,
+ BasePointersArray, PointersArray, SizesArray,
+ MapTypesArray, NumTeams, ThreadLimit};
+ Return = CGF.EmitRuntimeCall(
+ createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs);
+ } else {
+ llvm::Value *OffloadingArgs[] = {
+ DeviceID, OutlinedFnID, PointerNum, BasePointersArray,
+ PointersArray, SizesArray, MapTypesArray};
+ Return = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target),
+ OffloadingArgs);
+ }
CGF.EmitStoreOfScalar(Return, OffloadError);
};
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits