Hi rsmith, eliben,
Allow using integral non-type template parameters as launch_bounds attribute
arguments.
- Changed CUDALaunchBounds arguments from integers to Expr* so they can
be saved in AST for instantiation, if needed.
- Added support for template instantiation of launch_bounds attrubute.
- Moved evaluation of launch_bounds arguments to NVPTXTargetCodeGenInfo::
SetTargetAttributes() where it can be done after template instantiation.
- Amended test cases.
http://reviews.llvm.org/D8985
Files:
include/clang/Basic/Attr.td
lib/CodeGen/TargetInfo.cpp
lib/Sema/SemaDeclAttr.cpp
lib/Sema/SemaTemplateInstantiateDecl.cpp
test/CodeGenCUDA/launch-bounds.cu
test/SemaCUDA/launch_bounds.cu
EMAIL PREFERENCES
http://reviews.llvm.org/settings/panel/emailpreferences/
Index: include/clang/Basic/Attr.td
===================================================================
--- include/clang/Basic/Attr.td
+++ include/clang/Basic/Attr.td
@@ -581,7 +581,7 @@
def CUDALaunchBounds : InheritableAttr {
let Spellings = [GNU<"launch_bounds">];
- let Args = [IntArgument<"MaxThreads">, DefaultIntArgument<"MinBlocks", 0>];
+ let Args = [ExprArgument<"MaxThreads">, ExprArgument<"MinBlocks", 1>];
let LangOpts = [CUDA];
let Subjects = SubjectList<[ObjCMethod, FunctionLike], WarnDiag,
"ExpectedFunctionOrMethod">;
Index: lib/CodeGen/TargetInfo.cpp
===================================================================
--- lib/CodeGen/TargetInfo.cpp
+++ lib/CodeGen/TargetInfo.cpp
@@ -5079,16 +5079,24 @@
}
if (FD->hasAttr<CUDALaunchBoundsAttr>()) {
// Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node
- addNVVMMetadata(F, "maxntidx",
- FD->getAttr<CUDALaunchBoundsAttr>()->getMaxThreads());
- // min blocks is a default argument for CUDALaunchBoundsAttr, so getting a
- // zero value from getMinBlocks either means it was not specified in
- // __launch_bounds__ or the user specified a 0 value. In both cases, we
+ CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>();
+ llvm::APSInt MaxThreads;
+ if (Attr->getMaxThreads()->EvaluateAsInt(MaxThreads, M.getContext()))
+ addNVVMMetadata(F, "maxntidx", MaxThreads.getExtValue());
+ else
+ llvm_unreachable("launch_bounds arg 1 evaluation failed.");
+
+ // min blocks is a default argument for CUDALaunchBoundsAttr. If it was
+ // not specified in __launch_bounds__ or the user specified a 0 value, we
// don't have to add a PTX directive.
- int MinCTASM = FD->getAttr<CUDALaunchBoundsAttr>()->getMinBlocks();
- if (MinCTASM > 0) {
- // Create !{<func-ref>, metadata !"minctasm", i32 <val>} node
- addNVVMMetadata(F, "minctasm", MinCTASM);
+ if (Attr->getMinBlocks()) {
+ llvm::APSInt MinBlocks;
+ if (Attr->getMinBlocks()->EvaluateAsInt(MinBlocks, M.getContext())) {
+ if (MinBlocks > 0)
+ // Create !{<func-ref>, metadata !"minctasm", i32 <val>} node
+ addNVVMMetadata(F, "minctasm", MinBlocks.getExtValue());
+ } else
+ llvm_unreachable("launch_bounds arg 2 evaluation failed.");
}
}
}
Index: lib/Sema/SemaDeclAttr.cpp
===================================================================
--- lib/Sema/SemaDeclAttr.cpp
+++ lib/Sema/SemaDeclAttr.cpp
@@ -3457,20 +3457,38 @@
return false;
}
+static bool isAcceptableLaunchBoundsArgument(Sema &S, Expr *E) {
+ return E->getType()->isIntegerType() &&
+ !E->containsUnexpandedParameterPack() &&
+ (E->isInstantiationDependent() || E->isEvaluatable(S.Context));
+}
+
static void handleLaunchBoundsAttr(Sema &S, Decl *D,
const AttributeList &Attr) {
- uint32_t MaxThreads, MinBlocks = 0;
- if (!checkUInt32Argument(S, Attr, Attr.getArgAsExpr(0), MaxThreads, 1))
+ if (!checkAttributeAtLeastNumArgs(S, Attr, 1) ||
+ !checkAttributeAtMostNumArgs(S, Attr, 2))
return;
- if (Attr.getNumArgs() > 1 && !checkUInt32Argument(S, Attr,
- Attr.getArgAsExpr(1),
- MinBlocks, 2))
+
+ Expr *MaxThreads = Attr.getArgAsExpr(0);
+ if (!isAcceptableLaunchBoundsArgument(S, MaxThreads)) {
+ S.Diag(Attr.getLoc(), diag::err_attribute_argument_n_type)
+ << Attr.getName() << 0 << AANT_ArgumentIntegerConstant
+ << MaxThreads->getSourceRange();
+ return;
+ }
+
+ Expr *MinBlocks = Attr.getNumArgs() > 1 ? Attr.getArgAsExpr(1) : nullptr;
+ if (MinBlocks && !isAcceptableLaunchBoundsArgument(S, MinBlocks)) {
+ S.Diag(Attr.getLoc(), diag::err_attribute_argument_n_type)
+ << Attr.getName() << 1 << AANT_ArgumentIntegerConstant
+ << MinBlocks->getSourceRange();
return;
+ }
- D->addAttr(::new (S.Context)
- CUDALaunchBoundsAttr(Attr.getRange(), S.Context,
- MaxThreads, MinBlocks,
- Attr.getAttributeSpellingListIndex()));
+ D->addAttr(::new (S.Context) CUDALaunchBoundsAttr(
+ Attr.getRange(), S.Context, MaxThreads, MinBlocks,
+ Attr.getAttributeSpellingListIndex()));
+ return;
}
static void handleArgumentWithTypeTagAttr(Sema &S, Decl *D,
Index: lib/Sema/SemaTemplateInstantiateDecl.cpp
===================================================================
--- lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -202,6 +202,37 @@
New->addAttr(EIA);
}
+static void instantiateDependentCUDALaunchBoundsAttr(
+ Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs,
+ const CUDALaunchBoundsAttr *A, Decl *New) {
+
+ // LaunchBounds arguments are constant expressions
+ EnterExpressionEvaluationContext Unevaluated(S, Sema::ConstantEvaluated);
+ ExprResult MaxThreadsResult = S.SubstExpr(A->getMaxThreads(), TemplateArgs);
+ if (MaxThreadsResult.isInvalid()) {
+ S.Diag(A->getLocation(), diag::err_attribute_argument_n_type)
+ << A->getSpelling() << 0 << AANT_ArgumentIntegerConstant
+ << A->getMaxThreads()->getSourceRange();
+ return;
+ }
+
+ Expr *MinBlocksExpr = A->getMinBlocks();
+ if (MinBlocksExpr) {
+ ExprResult MinBlocksResult = S.SubstExpr(A->getMinBlocks(), TemplateArgs);
+ if (MinBlocksResult.isInvalid()) {
+ S.Diag(A->getLocation(), diag::err_attribute_argument_n_type)
+ << A->getSpelling() << 1 << AANT_ArgumentIntegerConstant
+ << A->getMinBlocks()->getSourceRange();
+ return;
+ }
+ MinBlocksExpr = MinBlocksResult.getAs<Expr>();
+ }
+
+ New->addAttr(::new (S.Context) CUDALaunchBoundsAttr(
+ A->getRange(), S.Context, MaxThreadsResult.getAs<Expr>(), MinBlocksExpr,
+ A->getSpellingListIndex()));
+}
+
void Sema::InstantiateAttrs(const MultiLevelTemplateArgumentList &TemplateArgs,
const Decl *Tmpl, Decl *New,
LateInstantiatedAttrVec *LateAttrs,
@@ -233,6 +264,13 @@
continue;
}
+ const CUDALaunchBoundsAttr *CUDALaunchBounds =
+ dyn_cast<CUDALaunchBoundsAttr>(TmplAttr);
+ if (CUDALaunchBounds) {
+ instantiateDependentCUDALaunchBoundsAttr(*this, TemplateArgs,
+ CUDALaunchBounds, New);
+ continue;
+ }
// Existing DLL attribute on the instantiation takes precedence.
if (TmplAttr->getKind() == attr::DLLExport ||
TmplAttr->getKind() == attr::DLLImport) {
Index: test/CodeGenCUDA/launch-bounds.cu
===================================================================
--- test/CodeGenCUDA/launch-bounds.cu
+++ test/CodeGenCUDA/launch-bounds.cu
@@ -28,3 +28,23 @@
}
// CHECK: !{{[0-9]+}} = !{void ()* @Kernel2, !"maxntidx", i32 256}
+
+template <int max_threads_per_block>
+__global__ void
+__launch_bounds__(max_threads_per_block)
+Kernel3()
+{
+}
+
+template void Kernel3<MAX_THREADS_PER_BLOCK>();
+// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel3{{.*}}, !"maxntidx", i32 256}
+
+template <int max_threads_per_block, int min_blocks_per_mp>
+__global__ void
+__launch_bounds__(max_threads_per_block, min_blocks_per_mp)
+Kernel4()
+{
+}
+template void Kernel4<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP>();
+// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel4{{.*}}, !"maxntidx", i32 256}
+// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel4{{.*}}, !"minctasm", i32 2}
Index: test/SemaCUDA/launch_bounds.cu
===================================================================
--- test/SemaCUDA/launch_bounds.cu
+++ test/SemaCUDA/launch_bounds.cu
@@ -9,3 +9,9 @@
__launch_bounds__() void Test4(void); // expected-error {{'launch_bounds' attribute takes at least 1 argument}}
int Test5 __launch_bounds__(128, 7); // expected-warning {{'launch_bounds' attribute only applies to functions and methods}}
+
+template <int a, int b> __launch_bounds__(a, b) void Test6(void) {}
+template void Test6<128,7>(void);
+
+template <int a> __launch_bounds__(a) void Test7(void) {}
+template void Test7<128>(void);
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits