Addressed review comments by eliben@ and aaron@
- Improved argument type/value checking.
- Added a warning on negative launch_bounds arguments and disabled
corresponding PTX directive emission when that happens.
- Added test cases for various invalid launch_bounds arguments.
http://reviews.llvm.org/D8985
Files:
include/clang/Basic/Attr.td
include/clang/Basic/DiagnosticSemaKinds.td
lib/CodeGen/TargetInfo.cpp
lib/Sema/SemaDeclAttr.cpp
lib/Sema/SemaTemplateInstantiateDecl.cpp
test/CodeGenCUDA/launch-bounds.cu
test/SemaCUDA/launch_bounds.cu
EMAIL PREFERENCES
http://reviews.llvm.org/settings/panel/emailpreferences/
Index: include/clang/Basic/Attr.td
===================================================================
--- include/clang/Basic/Attr.td
+++ include/clang/Basic/Attr.td
@@ -581,7 +581,7 @@
def CUDALaunchBounds : InheritableAttr {
let Spellings = [GNU<"launch_bounds">];
- let Args = [IntArgument<"MaxThreads">, DefaultIntArgument<"MinBlocks", 0>];
+ let Args = [ExprArgument<"MaxThreads">, ExprArgument<"MinBlocks", 1>];
let LangOpts = [CUDA];
let Subjects = SubjectList<[ObjCMethod, FunctionLike], WarnDiag,
"ExpectedFunctionOrMethod">;
Index: include/clang/Basic/DiagnosticSemaKinds.td
===================================================================
--- include/clang/Basic/DiagnosticSemaKinds.td
+++ include/clang/Basic/DiagnosticSemaKinds.td
@@ -2103,6 +2103,9 @@
"use 'isEqual:' instead">;
def err_attribute_argument_is_zero : Error<
"%0 attribute must be greater than 0">;
+def warn_cuda_launch_bounds_argument_n_is_negative : Warning<
+ "%0 attribute parameter %1 is negative which may result in kernel launch failure">,
+ InGroup<CudaCompat>;
def err_property_function_in_objc_container : Error<
"use of Objective-C property in function nested in Objective-C "
"container not supported, move function outside its container">;
Index: lib/CodeGen/TargetInfo.cpp
===================================================================
--- lib/CodeGen/TargetInfo.cpp
+++ lib/CodeGen/TargetInfo.cpp
@@ -5077,18 +5077,26 @@
// Create !{<func-ref>, metadata !"kernel", i32 1} node
addNVVMMetadata(F, "kernel", 1);
}
- if (FD->hasAttr<CUDALaunchBoundsAttr>()) {
+ if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>()) {
// Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node
- addNVVMMetadata(F, "maxntidx",
- FD->getAttr<CUDALaunchBoundsAttr>()->getMaxThreads());
- // min blocks is a default argument for CUDALaunchBoundsAttr, so getting a
- // zero value from getMinBlocks either means it was not specified in
- // __launch_bounds__ or the user specified a 0 value. In both cases, we
- // don't have to add a PTX directive.
- int MinCTASM = FD->getAttr<CUDALaunchBoundsAttr>()->getMinBlocks();
- if (MinCTASM > 0) {
- // Create !{<func-ref>, metadata !"minctasm", i32 <val>} node
- addNVVMMetadata(F, "minctasm", MinCTASM);
+ llvm::APSInt MaxThreads;
+ if (Attr->getMaxThreads()->EvaluateAsInt(MaxThreads, M.getContext())) {
+ if (MaxThreads > 0)
+ addNVVMMetadata(F, "maxntidx", MaxThreads.getExtValue());
+ } else
+ llvm_unreachable("launch_bounds arg 1 evaluation failed.");
+
+ // min blocks is an optional argument for CUDALaunchBoundsAttr. If it was
+ // not specified in __launch_bounds__ or if the user specified a 0 value,
+ // we don't have to add a PTX directive.
+ if (Attr->getMinBlocks()) {
+ llvm::APSInt MinBlocks;
+ if (Attr->getMinBlocks()->EvaluateAsInt(MinBlocks, M.getContext())) {
+ if (MinBlocks > 0)
+ // Create !{<func-ref>, metadata !"minctasm", i32 <val>} node
+ addNVVMMetadata(F, "minctasm", MinBlocks.getExtValue());
+ } else
+ llvm_unreachable("launch_bounds arg 2 evaluation failed.");
}
}
}
Index: lib/Sema/SemaDeclAttr.cpp
===================================================================
--- lib/Sema/SemaDeclAttr.cpp
+++ lib/Sema/SemaDeclAttr.cpp
@@ -208,12 +208,11 @@
/// \brief If Expr is a valid integer constant, get the value of the integer
/// expression and return success or failure. May output an error.
-static bool checkUInt32Argument(Sema &S, const AttributeList &Attr,
- const Expr *Expr, uint32_t &Val,
- unsigned Idx = UINT_MAX) {
- llvm::APSInt I(32);
+static bool checkIntArgument(Sema &S, const AttributeList &Attr,
+ const Expr *Expr, llvm::APSInt &Value,
+ unsigned Idx = UINT_MAX) {
if (Expr->isTypeDependent() || Expr->isValueDependent() ||
- !Expr->isIntegerConstantExpr(I, S.Context)) {
+ !Expr->isIntegerConstantExpr(Value, S.Context)) {
if (Idx != UINT_MAX)
S.Diag(Attr.getLoc(), diag::err_attribute_argument_n_type)
<< Attr.getName() << Idx << AANT_ArgumentIntegerConstant
@@ -224,7 +223,19 @@
<< Expr->getSourceRange();
return false;
}
+ return true;
+}
+/// \brief If Expr is a valid integer constant, get unsigned 32-bit value of the
+/// integer expression and return success or failure. May output an error.
+static bool checkUInt32Argument(Sema &S, const AttributeList &Attr,
+ const Expr *Expr, uint32_t &Val,
+ unsigned Idx = UINT_MAX) {
+ llvm::APSInt I(32);
+ if (!checkIntArgument(S, Attr, Expr, I, Idx))
+ return false;
+
+ // Make sure we can fit it in 32 bits.
if (!I.isIntN(32)) {
S.Diag(Expr->getExprLoc(), diag::err_ice_too_large)
<< I.toString(10, false) << 32 << /* Unsigned */ 1;
@@ -3457,20 +3468,50 @@
return false;
}
+static bool isAcceptableLaunchBoundsArgument(Sema &S, const AttributeList &Attr,
+ Expr *E, unsigned Idx) {
+ // If expression does not depend on template instantiation, check its
+ // evaluated vaule and report any issues.
+ if (!E->isInstantiationDependent()) {
+ llvm::APSInt Value;
+ if (!checkIntArgument(S, Attr, E, Value, Idx))
+ return false;
+ if (Value < 0)
+ S.Diag(Attr.getLoc(),
+ diag::warn_cuda_launch_bounds_argument_n_is_negative)
+ << Attr.getName() << Idx << AANT_ArgumentIntegerConstant
+ << E->getSourceRange();
+ return true;
+ }
+
+ // For template arguments only check if it's an integer, and do the final
+ // checks during template instantiation. We also can't deal with variadic
+ // template arguments.
+ if (!E->getType()->isIntegerType() || E->containsUnexpandedParameterPack()) {
+ S.Diag(Attr.getLoc(), diag::err_attribute_argument_n_type)
+ << Attr.getName() << Idx << AANT_ArgumentIntegerConstant
+ << E->getSourceRange();
+ }
+ return true;
+}
+
static void handleLaunchBoundsAttr(Sema &S, Decl *D,
const AttributeList &Attr) {
- uint32_t MaxThreads, MinBlocks = 0;
- if (!checkUInt32Argument(S, Attr, Attr.getArgAsExpr(0), MaxThreads, 1))
+ if (!checkAttributeAtLeastNumArgs(S, Attr, 1) ||
+ !checkAttributeAtMostNumArgs(S, Attr, 2))
return;
- if (Attr.getNumArgs() > 1 && !checkUInt32Argument(S, Attr,
- Attr.getArgAsExpr(1),
- MinBlocks, 2))
+
+ Expr *MaxThreads = Attr.getArgAsExpr(0);
+ if (!isAcceptableLaunchBoundsArgument(S, Attr, MaxThreads, 0))
+ return;
+
+ Expr *MinBlocks = Attr.getNumArgs() > 1 ? Attr.getArgAsExpr(1) : nullptr;
+ if (MinBlocks && !isAcceptableLaunchBoundsArgument(S, Attr, MinBlocks, 1))
return;
- D->addAttr(::new (S.Context)
- CUDALaunchBoundsAttr(Attr.getRange(), S.Context,
- MaxThreads, MinBlocks,
- Attr.getAttributeSpellingListIndex()));
+ D->addAttr(::new (S.Context) CUDALaunchBoundsAttr(
+ Attr.getRange(), S.Context, MaxThreads, MinBlocks,
+ Attr.getAttributeSpellingListIndex()));
}
static void handleArgumentWithTypeTagAttr(Sema &S, Decl *D,
Index: lib/Sema/SemaTemplateInstantiateDecl.cpp
===================================================================
--- lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -202,6 +202,66 @@
New->addAttr(EIA);
}
+/// Get a substitute expression for TemplateArgExpr(). Check whether it is a
+/// valid integer constant, make sure it can be evaluated and warn if the value
+/// is negative. Returns integer constant Expr on success, nullptr otherwise.
+/// May output an error.
+static Expr *
+getLaunchBoundsArgument(Sema &S,
+ const MultiLevelTemplateArgumentList &TemplateArgs,
+ const Attr &Attr, Expr *TemplateArgExpr, unsigned Idx) {
+ if (!TemplateArgExpr)
+ return nullptr;
+
+ // LaunchBounds arguments are constant expressions
+ EnterExpressionEvaluationContext Unevaluated(S, Sema::ConstantEvaluated);
+
+ ExprResult ExprResult = S.SubstExpr(TemplateArgExpr, TemplateArgs);
+ if (ExprResult.isInvalid()) {
+ S.Diag(Attr.getLocation(), diag::err_attribute_argument_n_type)
+ << &Attr << Idx << AANT_ArgumentIntegerConstant
+ << TemplateArgExpr->getSourceRange();
+ return nullptr;
+ }
+ Expr *E = ExprResult.getAs<Expr>();
+ llvm::APSInt Value;
+ if (E->isTypeDependent() || E->isValueDependent() ||
+ !E->isIntegerConstantExpr(Value, S.Context)) {
+ S.Diag(Attr.getLocation(), diag::err_attribute_argument_n_type)
+ << &Attr << Idx << AANT_ArgumentIntegerConstant << E->getSourceRange();
+ return nullptr;
+ }
+ if (Value < 0)
+ S.Diag(Attr.getLocation(),
+ diag::warn_cuda_launch_bounds_argument_n_is_negative)
+ << &Attr << Idx << AANT_ArgumentIntegerConstant << E->getSourceRange();
+ return E;
+}
+
+// Constructs and adds to New a new instance of CUDALaunchBoundsAttr using
+// template A as the base and arguments from TemplateArgs.
+static void instantiateDependentCUDALaunchBoundsAttr(
+ Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs,
+ const CUDALaunchBoundsAttr &A, Decl *New) {
+
+ Expr *MaxThreadsExpr =
+ getLaunchBoundsArgument(S, TemplateArgs, A, A.getMaxThreads(), 0);
+ if (!MaxThreadsExpr)
+ return;
+
+ Expr *MinBlocksExpr = nullptr;
+ if (A.getMinBlocks()) {
+ MinBlocksExpr =
+ getLaunchBoundsArgument(S, TemplateArgs, A, A.getMinBlocks(), 1);
+ if (!MinBlocksExpr)
+ return;
+ }
+
+ New->addAttr(::new (S.Context) CUDALaunchBoundsAttr(
+ A.getRange(), S.Context, MaxThreadsExpr, MinBlocksExpr,
+ A.getSpellingListIndex()));
+}
+
void Sema::InstantiateAttrs(const MultiLevelTemplateArgumentList &TemplateArgs,
const Decl *Tmpl, Decl *New,
LateInstantiatedAttrVec *LateAttrs,
@@ -233,6 +293,12 @@
continue;
}
+ if (const CUDALaunchBoundsAttr *CUDALaunchBounds =
+ dyn_cast<CUDALaunchBoundsAttr>(TmplAttr)) {
+ instantiateDependentCUDALaunchBoundsAttr(*this, TemplateArgs,
+ *CUDALaunchBounds, New);
+ continue;
+ }
// Existing DLL attribute on the instantiation takes precedence.
if (TmplAttr->getKind() == attr::DLLExport ||
TmplAttr->getKind() == attr::DLLImport) {
Index: test/CodeGenCUDA/launch-bounds.cu
===================================================================
--- test/CodeGenCUDA/launch-bounds.cu
+++ test/CodeGenCUDA/launch-bounds.cu
@@ -28,3 +28,47 @@
}
// CHECK: !{{[0-9]+}} = !{void ()* @Kernel2, !"maxntidx", i32 256}
+
+template <int max_threads_per_block>
+__global__ void
+__launch_bounds__(max_threads_per_block)
+Kernel3()
+{
+}
+
+template void Kernel3<MAX_THREADS_PER_BLOCK>();
+// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel3{{.*}}, !"maxntidx", i32 256}
+
+template <int max_threads_per_block, int min_blocks_per_mp>
+__global__ void
+__launch_bounds__(max_threads_per_block, min_blocks_per_mp)
+Kernel4()
+{
+}
+template void Kernel4<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP>();
+
+// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel4{{.*}}, !"maxntidx", i32 256}
+// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel4{{.*}}, !"minctasm", i32 2}
+
+const int constint = 100;
+template <int max_threads_per_block, int min_blocks_per_mp>
+__global__ void
+__launch_bounds__(max_threads_per_block + constint,
+ min_blocks_per_mp + max_threads_per_block)
+Kernel5()
+{
+}
+template void Kernel5<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP>();
+
+// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel5{{.*}}, !"maxntidx", i32 356}
+// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel5{{.*}}, !"minctasm", i32 258}
+
+// Make sure we don't emit negative launch bounds values.
+__global__ void
+__launch_bounds__( -MAX_THREADS_PER_BLOCK, -MIN_BLOCKS_PER_MP )
+Kernel6()
+{
+}
+// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel6{{.*}}, !"kernel", i32 1}
+// CHECK-NOT: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel5{{.*}}, !"maxntidx",
+// CHECK-NOT: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel6{{.*}}, !"minctasm",
Index: test/SemaCUDA/launch_bounds.cu
===================================================================
--- test/SemaCUDA/launch_bounds.cu
+++ test/SemaCUDA/launch_bounds.cu
@@ -2,10 +2,31 @@
#include "Inputs/cuda.h"
-__launch_bounds__(128, 7) void Test1(void);
-__launch_bounds__(128) void Test2(void);
+__launch_bounds__(128, 7) void Test2Args(void);
+__launch_bounds__(128) void Test1Arg(void);
-__launch_bounds__(1, 2, 3) void Test3(void); // expected-error {{'launch_bounds' attribute takes no more than 2 arguments}}
-__launch_bounds__() void Test4(void); // expected-error {{'launch_bounds' attribute takes at least 1 argument}}
+__launch_bounds__(-128, 7) void TestNegArg1(void); // expected-warning {{'launch_bounds' attribute parameter 0 is negative which may result in kernel launch failure}}
+__launch_bounds__(128, -7) void TestNegArg2(void); // expected-warning {{'launch_bounds' attribute parameter 1 is negative which may result in kernel launch failure}}
-int Test5 __launch_bounds__(128, 7); // expected-warning {{'launch_bounds' attribute only applies to functions and methods}}
+__launch_bounds__(1, 2, 3) void Test3Args(void); // expected-error {{'launch_bounds' attribute takes no more than 2 arguments}}
+__launch_bounds__() void TestNoArgs(void); // expected-error {{'launch_bounds' attribute takes at least 1 argument}}
+
+int TestNoFunction __launch_bounds__(128, 7); // expected-warning {{'launch_bounds' attribute only applies to functions and methods}}
+
+__launch_bounds__(true) void TestBool(void);
+__launch_bounds__(128.0) void TestFP(void); // expected-error {{'launch_bounds' attribute requires parameter 0 to be an integer constant}}
+__launch_bounds__((void*)0) void TestNullptr(void); // expected-error {{'launch_bounds' attribute requires parameter 0 to be an integer constant}}
+int nonconstint = 256;
+__launch_bounds__(nonconstint) void TestNonConstInt(void); // expected-error {{'launch_bounds' attribute requires parameter 0 to be an integer constant}}
+const int constint = 512;
+__launch_bounds__(constint) void TestConstInt(void);
+
+template <int a, int b> __launch_bounds__(a, b) void TestTmpl2Args(void) {}
+template void TestTmpl2Args<128,7>(void);
+
+template <int a> __launch_bounds__(a) void TestTmpl1Arg(void) {}
+template void TestTmpl1Arg<128>(void);
+
+template <int a, int b, int c>
+__launch_bounds__(a + b, c + constint) void TestTmplExpr(void) {}
+template void TestTmplExpr<128, 3, 7>(void);
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits