[PATCH] [CUDA] Allow using integral non-type template parameters as launch_bounds attribute arguments.

Artem Belevich Fri, 10 Apr 2015 15:24:43 -0700

Hi rsmith, eliben,

Allow using integral non-type template parameters as launch_bounds attribute 
arguments.
    
- Changed CUDALaunchBounds arguments from integers to Expr* so they can
  be saved in AST for instantiation, if needed.
- Added support for template instantiation of launch_bounds attrubute.
- Moved evaluation of launch_bounds arguments to NVPTXTargetCodeGenInfo::
  SetTargetAttributes() where it can be done after template instantiation.
- Amended test cases.


http://reviews.llvm.org/D8985

Files:
  include/clang/Basic/Attr.td
  lib/CodeGen/TargetInfo.cpp
  lib/Sema/SemaDeclAttr.cpp
  lib/Sema/SemaTemplateInstantiateDecl.cpp
  test/CodeGenCUDA/launch-bounds.cu
  test/SemaCUDA/launch_bounds.cu

EMAIL PREFERENCES
  http://reviews.llvm.org/settings/panel/emailpreferences/

Index: include/clang/Basic/Attr.td
===================================================================
--- include/clang/Basic/Attr.td
+++ include/clang/Basic/Attr.td
@@ -581,7 +581,7 @@
 
 def CUDALaunchBounds : InheritableAttr {
   let Spellings = [GNU<"launch_bounds">];
-  let Args = [IntArgument<"MaxThreads">, DefaultIntArgument<"MinBlocks", 0>];
+  let Args = [ExprArgument<"MaxThreads">, ExprArgument<"MinBlocks", 1>];
   let LangOpts = [CUDA];
   let Subjects = SubjectList<[ObjCMethod, FunctionLike], WarnDiag,
                              "ExpectedFunctionOrMethod">;
Index: lib/CodeGen/TargetInfo.cpp
===================================================================
--- lib/CodeGen/TargetInfo.cpp
+++ lib/CodeGen/TargetInfo.cpp
@@ -5079,16 +5079,24 @@
     }
     if (FD->hasAttr<CUDALaunchBoundsAttr>()) {
       // Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node
-      addNVVMMetadata(F, "maxntidx",
-                      FD->getAttr<CUDALaunchBoundsAttr>()->getMaxThreads());
-      // min blocks is a default argument for CUDALaunchBoundsAttr, so getting a
-      // zero value from getMinBlocks either means it was not specified in
-      // __launch_bounds__ or the user specified a 0 value. In both cases, we
+      CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>();
+      llvm::APSInt MaxThreads;
+      if (Attr->getMaxThreads()->EvaluateAsInt(MaxThreads, M.getContext()))
+        addNVVMMetadata(F, "maxntidx", MaxThreads.getExtValue());
+      else
+        llvm_unreachable("launch_bounds arg 1 evaluation failed.");
+
+      // min blocks is a default argument for CUDALaunchBoundsAttr. If it was
+      // not specified in __launch_bounds__ or the user specified a 0 value, we
       // don't have to add a PTX directive.
-      int MinCTASM = FD->getAttr<CUDALaunchBoundsAttr>()->getMinBlocks();
-      if (MinCTASM > 0) {
-        // Create !{<func-ref>, metadata !"minctasm", i32 <val>} node
-        addNVVMMetadata(F, "minctasm", MinCTASM);
+      if (Attr->getMinBlocks()) {
+        llvm::APSInt MinBlocks;
+        if (Attr->getMinBlocks()->EvaluateAsInt(MinBlocks, M.getContext())) {
+          if (MinBlocks > 0)
+            // Create !{<func-ref>, metadata !"minctasm", i32 <val>} node
+            addNVVMMetadata(F, "minctasm", MinBlocks.getExtValue());
+        } else
+          llvm_unreachable("launch_bounds arg 2 evaluation failed.");
       }
     }
   }
Index: lib/Sema/SemaDeclAttr.cpp
===================================================================
--- lib/Sema/SemaDeclAttr.cpp
+++ lib/Sema/SemaDeclAttr.cpp
@@ -3457,20 +3457,38 @@
   return false;
 }
 
+static bool isAcceptableLaunchBoundsArgument(Sema &S, Expr *E) {
+  return E->getType()->isIntegerType() &&
+         !E->containsUnexpandedParameterPack() &&
+         (E->isInstantiationDependent() || E->isEvaluatable(S.Context));
+}
+
 static void handleLaunchBoundsAttr(Sema &S, Decl *D,
                                    const AttributeList &Attr) {
-  uint32_t MaxThreads, MinBlocks = 0;
-  if (!checkUInt32Argument(S, Attr, Attr.getArgAsExpr(0), MaxThreads, 1))
+  if (!checkAttributeAtLeastNumArgs(S, Attr, 1) ||
+      !checkAttributeAtMostNumArgs(S, Attr, 2))
     return;
-  if (Attr.getNumArgs() > 1 && !checkUInt32Argument(S, Attr,
-                                                    Attr.getArgAsExpr(1),
-                                                    MinBlocks, 2))
+
+  Expr *MaxThreads = Attr.getArgAsExpr(0);
+  if (!isAcceptableLaunchBoundsArgument(S, MaxThreads)) {
+    S.Diag(Attr.getLoc(), diag::err_attribute_argument_n_type)
+        << Attr.getName() << 0 << AANT_ArgumentIntegerConstant
+        << MaxThreads->getSourceRange();
+    return;
+  }
+
+  Expr *MinBlocks = Attr.getNumArgs() > 1 ? Attr.getArgAsExpr(1) : nullptr;
+  if (MinBlocks && !isAcceptableLaunchBoundsArgument(S, MinBlocks)) {
+    S.Diag(Attr.getLoc(), diag::err_attribute_argument_n_type)
+        << Attr.getName() << 1 << AANT_ArgumentIntegerConstant
+        << MinBlocks->getSourceRange();
     return;
+  }
 
-  D->addAttr(::new (S.Context)
-              CUDALaunchBoundsAttr(Attr.getRange(), S.Context,
-                                  MaxThreads, MinBlocks,
-                                  Attr.getAttributeSpellingListIndex()));
+  D->addAttr(::new (S.Context) CUDALaunchBoundsAttr(
+      Attr.getRange(), S.Context, MaxThreads, MinBlocks,
+      Attr.getAttributeSpellingListIndex()));
+  return;
 }
 
 static void handleArgumentWithTypeTagAttr(Sema &S, Decl *D,
Index: lib/Sema/SemaTemplateInstantiateDecl.cpp
===================================================================
--- lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -202,6 +202,37 @@
   New->addAttr(EIA);
 }
 
+static void instantiateDependentCUDALaunchBoundsAttr(
+    Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs,
+    const CUDALaunchBoundsAttr *A, Decl *New) {
+
+  // LaunchBounds arguments are constant expressions
+  EnterExpressionEvaluationContext Unevaluated(S, Sema::ConstantEvaluated);
+  ExprResult MaxThreadsResult = S.SubstExpr(A->getMaxThreads(), TemplateArgs);
+  if (MaxThreadsResult.isInvalid()) {
+    S.Diag(A->getLocation(), diag::err_attribute_argument_n_type)
+        << A->getSpelling() << 0 << AANT_ArgumentIntegerConstant
+        << A->getMaxThreads()->getSourceRange();
+      return;
+  }
+
+  Expr *MinBlocksExpr = A->getMinBlocks();
+  if (MinBlocksExpr) {
+    ExprResult MinBlocksResult = S.SubstExpr(A->getMinBlocks(), TemplateArgs);
+    if (MinBlocksResult.isInvalid()) {
+      S.Diag(A->getLocation(), diag::err_attribute_argument_n_type)
+          << A->getSpelling() << 1 << AANT_ArgumentIntegerConstant
+          << A->getMinBlocks()->getSourceRange();
+      return;
+    }
+    MinBlocksExpr = MinBlocksResult.getAs<Expr>();
+  }
+
+  New->addAttr(::new (S.Context) CUDALaunchBoundsAttr(
+      A->getRange(), S.Context, MaxThreadsResult.getAs<Expr>(), MinBlocksExpr,
+      A->getSpellingListIndex()));
+}
+
 void Sema::InstantiateAttrs(const MultiLevelTemplateArgumentList &TemplateArgs,
                             const Decl *Tmpl, Decl *New,
                             LateInstantiatedAttrVec *LateAttrs,
@@ -233,6 +264,13 @@
       continue;
     }
 
+    const CUDALaunchBoundsAttr *CUDALaunchBounds =
+        dyn_cast<CUDALaunchBoundsAttr>(TmplAttr);
+    if (CUDALaunchBounds) {
+      instantiateDependentCUDALaunchBoundsAttr(*this, TemplateArgs,
+                                               CUDALaunchBounds, New);
+      continue;
+    }
     // Existing DLL attribute on the instantiation takes precedence.
     if (TmplAttr->getKind() == attr::DLLExport ||
         TmplAttr->getKind() == attr::DLLImport) {
Index: test/CodeGenCUDA/launch-bounds.cu
===================================================================
--- test/CodeGenCUDA/launch-bounds.cu
+++ test/CodeGenCUDA/launch-bounds.cu
@@ -28,3 +28,23 @@
 }
 
 // CHECK: !{{[0-9]+}} = !{void ()* @Kernel2, !"maxntidx", i32 256}
+
+template <int max_threads_per_block>
+__global__ void
+__launch_bounds__(max_threads_per_block)
+Kernel3()
+{
+}
+
+template void Kernel3<MAX_THREADS_PER_BLOCK>();
+// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel3{{.*}}, !"maxntidx", i32 256}
+
+template <int max_threads_per_block, int min_blocks_per_mp>
+__global__ void
+__launch_bounds__(max_threads_per_block, min_blocks_per_mp)
+Kernel4()
+{
+}
+template void Kernel4<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP>();
+// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel4{{.*}}, !"maxntidx", i32 256}
+// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel4{{.*}}, !"minctasm", i32 2}
Index: test/SemaCUDA/launch_bounds.cu
===================================================================
--- test/SemaCUDA/launch_bounds.cu
+++ test/SemaCUDA/launch_bounds.cu
@@ -9,3 +9,9 @@
 __launch_bounds__() void Test4(void); // expected-error {{'launch_bounds' attribute takes at least 1 argument}}
 
 int Test5 __launch_bounds__(128, 7); // expected-warning {{'launch_bounds' attribute only applies to functions and methods}}
+
+template <int a, int b> __launch_bounds__(a, b) void Test6(void) {}
+template void Test6<128,7>(void);
+
+template <int a> __launch_bounds__(a) void Test7(void) {}
+template void Test7<128>(void);

_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits

[PATCH] [CUDA] Allow using integral non-type template parameters as launch_bounds attribute arguments.

Reply via email to