Re: [PATCH] Add support for CUDA unroll pragma

Mark Heffernan Wed, 25 Jun 2014 16:38:05 -0700

Thanks for the comments, Eli.  This patch addresses them all.

http://reviews.llvm.org/D4297


Files:
  docs/ReleaseNotes.rst
  include/clang/Basic/Attr.td
  include/clang/Basic/AttrDocs.td
  include/clang/Basic/DiagnosticSemaKinds.td
  include/clang/Basic/TokenKinds.def
  include/clang/Parse/Parser.h
  include/clang/Sema/CudaUnrollHint.h
  lib/CodeGen/CGStmt.cpp
  lib/CodeGen/CodeGenFunction.h
  lib/Parse/ParsePragma.cpp
  lib/Parse/ParseStmt.cpp
  lib/Sema/SemaStmtAttr.cpp
  test/CodeGen/cuda-pragma-unroll.cu
  test/Misc/ast-print-cuda-pragmas.cu
  test/PCH/cuda-pragma-unroll.cu
  test/Parser/cuda-pragma-unroll.cu

Index: docs/ReleaseNotes.rst
===================================================================
--- docs/ReleaseNotes.rst
+++ docs/ReleaseNotes.rst
@@ -106,6 +106,9 @@
 as interleave and unrolling count can be manually specified.  See language
 extensions for details.
 
+When compiling the CUDA C/C++ language Clang now supports the `#pragma unroll`
+directive to specify loop unrolling optimization hints.
+
 C Language Changes in Clang
 ---------------------------
 
Index: include/clang/Basic/Attr.td
===================================================================
--- include/clang/Basic/Attr.td
+++ include/clang/Basic/Attr.td
@@ -1814,3 +1814,20 @@
 
   let Documentation = [LoopHintDocs];
 }
+
+def CudaUnrollHint : Attr {
+  let Spellings = [Pragma<"", "unroll">];
+
+  let Args = [IntArgument<"Value">];
+
+  let AdditionalMembers = [{
+  void printPrettyPragma(raw_ostream &OS, const PrintingPolicy &Policy) const {
+    // The string 'unroll' is emitted earlier when emitting the pragma name.
+    if (value)
+      OS << " " << value;
+    OS << "\n";
+  }
+  }];
+
+  let Documentation = [CudaUnrollHintDocs];
+}
Index: include/clang/Basic/AttrDocs.td
===================================================================
--- include/clang/Basic/AttrDocs.td
+++ include/clang/Basic/AttrDocs.td
@@ -1024,3 +1024,36 @@
 for details.
   }];
 }
+
+def CudaUnrollHintDocs : Documentation {
+  let Category = DocCatStmt;
+  let Content = [{
+When compiling the CUDA C/C++ language (indicated by specifying the ``-x cuda``
+command-line argument) the ``#pragma unroll`` directive is supported to specify
+loop unrolling hints.  The pragma is placed immediately before a for, while,
+do-while, or c++11 range-based for loop.  The pragma takes an optional parameter
+which must be a positive integer.
+
+.. code-block:: c++
+
+  #pragma unroll
+  for (...) {
+    ...
+  }
+
+  #pragma unroll 16
+  for (...) {
+    ...
+  }
+
+If ``#pragma unroll`` is specified without a parameter the loop unroller will
+attempt to fully unroll the loop if the trip count is known at compile time.
+Specifying the optional parameter, ``#pragma unroll _value_``, directs the
+unroller to unroll the loop ``_value_`` times.  ``#pragma unroll`` and ``#pragma
+unroll _value_`` have identical semantics to ``#pragma clang loop
+unroll(enable)`` and ``#pragma clang loop unroll_count(_value_)`` respectively.
+See `language extensions
+<http://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-loop-hint-optimizations>`_
+for further details including limitations of the unroll hints.
+  }];
+}
Index: include/clang/Basic/DiagnosticSemaKinds.td
===================================================================
--- include/clang/Basic/DiagnosticSemaKinds.td
+++ include/clang/Basic/DiagnosticSemaKinds.td
@@ -549,8 +549,12 @@
 def err_pragma_loop_compatibility : Error<
   "%select{incompatible|duplicate}0 directives '%1(%2)' and '%3(%4)'">;
 def err_pragma_loop_precedes_nonloop : Error<
-  "expected a for, while, or do-while loop to follow the '#pragma clang loop' "
+  "expected a for, while, or do-while loop to follow the '#pragma %0' "
   "directive">;
+def err_incompatible_pragma_loop_unroll : Error<
+  "'#pragma unroll' and '#pragma clang loop %0' cannot both be specified">;
+def err_duplicate_cuda_unroll_pragma : Error<
+  "duplicate '#pragma unroll' directives">;
 
 /// Objective-C parser diagnostics
 def err_duplicate_class_def : Error<
Index: include/clang/Basic/TokenKinds.def
===================================================================
--- include/clang/Basic/TokenKinds.def
+++ include/clang/Basic/TokenKinds.def
@@ -706,6 +706,11 @@
 // handles #pragma loop ... directives.
 ANNOTATION(pragma_loop_hint)
 
+// Annotations for CUDA-specific unroll pragma directives #pragma unroll ...
+// The lexer produces these so that they only take effect when the parser
+// handles #pragma loop ... directives.
+ANNOTATION(pragma_cuda_unroll)
+
 // Annotations for module import translated from #include etc.
 ANNOTATION(module_include)
 ANNOTATION(module_begin)
Index: include/clang/Parse/Parser.h
===================================================================
--- include/clang/Parse/Parser.h
+++ include/clang/Parse/Parser.h
@@ -19,6 +19,7 @@
 #include "clang/Basic/Specifiers.h"
 #include "clang/Lex/CodeCompletionHandler.h"
 #include "clang/Lex/Preprocessor.h"
+#include "clang/Sema/CudaUnrollHint.h"
 #include "clang/Sema/DeclSpec.h"
 #include "clang/Sema/LoopHint.h"
 #include "clang/Sema/Sema.h"
@@ -163,6 +164,7 @@
   std::unique_ptr<PragmaHandler> MSSection;
   std::unique_ptr<PragmaHandler> OptimizeHandler;
   std::unique_ptr<PragmaHandler> LoopHintHandler;
+  std::unique_ptr<PragmaHandler> CudaUnrollHandler;
 
   std::unique_ptr<CommentHandler> CommentSemaHandler;
 
@@ -522,9 +524,13 @@
   StmtResult HandlePragmaCaptured();
 
   /// \brief Handle the annotation token produced for
-  /// #pragma vectorize...
+  /// #pragma clang loop...
   LoopHint HandlePragmaLoopHint();
 
+  /// \brief Handle the CUDA-specific annotation token produced for
+  /// #pragma unroll...
+  CudaUnrollHint HandlePragmaCudaUnroll();
+
   /// GetLookAheadToken - This peeks ahead N tokens and returns that token
   /// without consuming any tokens.  LookAhead(0) returns 'Tok', LookAhead(1)
   /// returns the token after Tok, etc.
@@ -1610,6 +1616,11 @@
   StmtResult ParsePragmaLoopHint(StmtVector &Stmts, bool OnlyStatement,
                                  SourceLocation *TrailingElseLoc,
                                  ParsedAttributesWithRange &Attrs);
+  // Parse a pragma_cuda_unroll annotation token and add a corresponding
+  // CudaUnrollHint attribute to ParsedAttributes.
+  StmtResult ParsePragmaCudaUnroll(StmtVector &Stmts, bool OnlyStatement,
+                                   SourceLocation *TrailingElseLoc,
+                                   ParsedAttributesWithRange &Attrs);
 
   /// \brief Describes the behavior that should be taken for an __if_exists
   /// block.
Index: include/clang/Sema/CudaUnrollHint.h
===================================================================
--- include/clang/Sema/CudaUnrollHint.h
+++ include/clang/Sema/CudaUnrollHint.h
@@ -0,0 +1,35 @@
+//===--- CudaUnrollHint.h - Types for CudaUnrollHint ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_SEMA_CUDAUNROLLHINT_H
+#define LLVM_CLANG_SEMA_CUDAUNROLLHINT_H
+
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Sema/AttributeList.h"
+
+namespace clang {
+
+/// \brief Record for a CUDA loop unrolling pragma.
+struct CudaUnrollHint {
+  // Identifier corresponding to the string "unroll" in "#pragma unroll ...".
+  IdentifierLoc *UnrollLoc;
+  // Source range of the pragma.
+  SourceRange Range;
+  // For pragmas with an unroll count ("#pragma unroll N") this field contains
+  // the identifier for the unroll count value ("N"), or null if the pragma has
+  // no unroll count.
+  IdentifierLoc *ValueLoc;
+  // Expression for the unroll count, or null if the pragma has no unroll count
+  // value.
+  Expr *ValueExpr;
+};
+
+}  // end namespace clang
+
+#endif  // LLVM_CLANG_SEMA_CUDAUNROLLHINT_H
Index: lib/CodeGen/CGStmt.cpp
===================================================================
--- lib/CodeGen/CGStmt.cpp
+++ lib/CodeGen/CGStmt.cpp
@@ -537,17 +537,37 @@
   // Add vectorize and unroll hints to the metadata on the conditional branch.
   SmallVector<llvm::Value *, 2> Metadata(1);
   for (const auto *Attr : Attrs) {
-    const LoopHintAttr *LH = dyn_cast<LoopHintAttr>(Attr);
-
-    // Skip non loop hint attributes
-    if (!LH)
+    std::pair<llvm::MDString *, llvm::Value *> MetadataPair;
+    if (isa<LoopHintAttr>(Attr))
+      MetadataPair = MetadataForLoopHintAttr(cast<LoopHintAttr>(Attr), Context);
+    else if (isa<CudaUnrollHintAttr>(Attr))
+      MetadataPair = MetadataForCudaUnrollHintAttr(
+          cast<CudaUnrollHintAttr>(Attr), Context);
+    else
       continue;
 
-    LoopHintAttr::OptionType Option = LH->getOption();
-    int ValueInt = LH->getValue();
+    // Set or overwrite metadata indicated by Name.
+    Metadata.push_back(
+        llvm::MDNode::get(Context, {MetadataPair.first, MetadataPair.second}));
+  }
+
+  if (!Metadata.empty()) {
+    // Add llvm.loop MDNode to CondBr.
+    llvm::MDNode *LoopID = llvm::MDNode::get(Context, Metadata);
+    LoopID->replaceOperandWith(0, LoopID); // First op points to itself.
+
+    CondBr->setMetadata("llvm.loop", LoopID);
+  }
+}
+
+std::pair<llvm::MDString *, llvm::Value *>
+CodeGenFunction::MetadataForLoopHintAttr(const LoopHintAttr *LH,
+                                         llvm::LLVMContext &Context) {
+  LoopHintAttr::OptionType Option = LH->getOption();
+  int ValueInt = LH->getValue();
 
-    const char *MetadataName;
-    switch (Option) {
+  const char *MetadataName;
+  switch (Option) {
     case LoopHintAttr::Vectorize:
     case LoopHintAttr::VectorizeWidth:
       MetadataName = "llvm.loop.vectorize.width";
@@ -562,11 +582,11 @@
     case LoopHintAttr::UnrollCount:
       MetadataName = "llvm.loop.unroll.count";
       break;
-    }
+  }
 
-    llvm::Value *Value;
-    llvm::MDString *Name;
-    switch (Option) {
+  llvm::Value *Value;
+  llvm::MDString *Name;
+  switch (Option) {
     case LoopHintAttr::Vectorize:
     case LoopHintAttr::Interleave:
       if (ValueInt == 1) {
@@ -592,23 +612,26 @@
       Name = llvm::MDString::get(Context, MetadataName);
       Value = llvm::ConstantInt::get(Int32Ty, ValueInt);
       break;
-    }
-
-    SmallVector<llvm::Value *, 2> OpValues;
-    OpValues.push_back(Name);
-    OpValues.push_back(Value);
-
-    // Set or overwrite metadata indicated by Name.
-    Metadata.push_back(llvm::MDNode::get(Context, OpValues));
   }
+  return std::make_pair(Name, Value);
+}
 
-  if (!Metadata.empty()) {
-    // Add llvm.loop MDNode to CondBr.
-    llvm::MDNode *LoopID = llvm::MDNode::get(Context, Metadata);
-    LoopID->replaceOperandWith(0, LoopID); // First op points to itself.
-
-    CondBr->setMetadata("llvm.loop", LoopID);
+std::pair<llvm::MDString *, llvm::Value *>
+CodeGenFunction::MetadataForCudaUnrollHintAttr(const CudaUnrollHintAttr *CU,
+                                               llvm::LLVMContext &Context) {
+  const char *MetadataName;
+  llvm::Value *Value;
+  int ValueInt = CU->getValue();
+  if (ValueInt) {
+    MetadataName = "llvm.loop.unroll.count";
+    Value = llvm::ConstantInt::get(Int32Ty, ValueInt);
+  } else {
+    // A value of zero indicates that the optional unroll count was not
+    // specified with the unroll pragma.
+    MetadataName = "llvm.loop.unroll.enable";
+    Value = Builder.getTrue();
   }
+  return std::make_pair(llvm::MDString::get(Context, MetadataName), Value);
 }
 
 void CodeGenFunction::EmitWhileStmt(const WhileStmt &S,
Index: lib/CodeGen/CodeGenFunction.h
===================================================================
--- lib/CodeGen/CodeGenFunction.h
+++ lib/CodeGen/CodeGenFunction.h
@@ -1420,6 +1420,13 @@
   /// expression and compare the result against zero, returning an Int1Ty value.
   llvm::Value *EvaluateExprAsBool(const Expr *E);
 
+  /// Return a metadata string/value pair corresponding to the loop or CUDA
+  /// unroll hint attribute.
+  std::pair<llvm::MDString *, llvm::Value *> MetadataForLoopHintAttr(
+      const LoopHintAttr *LH, llvm::LLVMContext &Context);
+  std::pair<llvm::MDString *, llvm::Value *> MetadataForCudaUnrollHintAttr(
+      const CudaUnrollHintAttr *CU, llvm::LLVMContext &Context);
+
   /// EmitIgnoredExpr - Emit an expression in a context which ignores the result.
   void EmitIgnoredExpr(const Expr *E);
 
Index: lib/Parse/ParsePragma.cpp
===================================================================
--- lib/Parse/ParsePragma.cpp
+++ lib/Parse/ParsePragma.cpp
@@ -15,6 +15,7 @@
 #include "clang/Lex/Preprocessor.h"
 #include "clang/Parse/ParseDiagnostic.h"
 #include "clang/Parse/Parser.h"
+#include "clang/Sema/CudaUnrollHint.h"
 #include "clang/Sema/LoopHint.h"
 #include "clang/Sema/Scope.h"
 #include "llvm/ADT/StringSwitch.h"
@@ -148,6 +149,12 @@
                     Token &FirstToken) override;
 };
 
+struct PragmaCudaUnrollHandler : public PragmaHandler {
+  PragmaCudaUnrollHandler() : PragmaHandler("unroll") {}
+  void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer,
+                    Token &FirstToken) override;
+};
+
 }  // end namespace
 
 void Parser::initializePragmaHandlers() {
@@ -218,6 +225,11 @@
 
   LoopHintHandler.reset(new PragmaLoopHintHandler());
   PP.AddPragmaHandler("clang", LoopHintHandler.get());
+
+  if (getLangOpts().CUDA) {
+    CudaUnrollHandler.reset(new PragmaCudaUnrollHandler());
+    PP.AddPragmaHandler(CudaUnrollHandler.get());
+  }
 }
 
 void Parser::resetPragmaHandlers() {
@@ -278,6 +290,11 @@
 
   PP.RemovePragmaHandler("clang", LoopHintHandler.get());
   LoopHintHandler.reset();
+
+  if (getLangOpts().CUDA) {
+    PP.RemovePragmaHandler(CudaUnrollHandler.get());
+    CudaUnrollHandler.reset();
+  }
 }
 
 /// \brief Handle the annotation token produced for #pragma unused(...)
@@ -633,6 +650,42 @@
   return Hint;
 }
 
+struct PragmaCudaUnrollInfo {
+  Token Unroll;
+  Token Value;
+  bool HasValue;
+};
+
+CudaUnrollHint Parser::HandlePragmaCudaUnroll() {
+  assert(Tok.is(tok::annot_pragma_cuda_unroll));
+  PragmaCudaUnrollInfo *Info =
+      static_cast<PragmaCudaUnrollInfo *>(Tok.getAnnotationValue());
+
+  CudaUnrollHint Hint;
+  Hint.UnrollLoc =
+      IdentifierLoc::create(Actions.Context, Info->Unroll.getLocation(),
+                            Info->Unroll.getIdentifierInfo());
+  if (Info->HasValue) {
+    Hint.Range =
+        SourceRange(Info->Unroll.getLocation(), Info->Value.getLocation());
+    Hint.ValueLoc =
+        IdentifierLoc::create(Actions.Context, Info->Value.getLocation(),
+                              Info->Value.getIdentifierInfo());
+    // FIXME: We should allow non-type template parameters for the loop hint
+    // value. See bug report #19610
+    if (Info->Value.is(tok::numeric_constant))
+      Hint.ValueExpr = Actions.ActOnNumericConstant(Info->Value).get();
+    else
+      Hint.ValueExpr = nullptr;
+  } else {
+    Hint.Range = SourceRange(Info->Unroll.getLocation());
+    Hint.ValueLoc = nullptr;
+    Hint.ValueExpr = nullptr;
+  }
+
+  return Hint;
+}
+
 // #pragma GCC visibility comes in two variants:
 //   'push' '(' [visibility] ')'
 //   'pop'
@@ -1755,3 +1808,44 @@
                       /*DisableMacroExpansion=*/false,
                       /*OwnsTokens=*/true);
 }
+
+void PragmaCudaUnrollHandler::HandlePragma(Preprocessor &PP,
+                                           PragmaIntroducerKind Introducer,
+                                           Token &Tok) {
+  // "unroll" token from "#pragma unroll...".
+  Token Unroll = Tok;
+
+  bool HasValue;
+  Token Value;
+  PP.Lex(Tok);
+  if (Tok.is(tok::eod)) {
+    // Bare unroll pragma: #pragma unroll
+    HasValue = false;
+  } else {
+    // Unroll pragma with numeric argument: #pragma unroll N
+    HasValue = true;
+    Value = Tok;
+
+    PP.Lex(Tok);
+    if (Tok.isNot(tok::eod)) {
+      PP.Diag(Tok.getLocation(), diag::warn_pragma_extra_tokens_at_eol)
+          << "unroll";
+      return;
+    }
+  }
+
+  auto *Info = new (PP.getPreprocessorAllocator()) PragmaCudaUnrollInfo;
+  Info->Unroll = Unroll;
+  Info->HasValue = HasValue;
+  if (HasValue)
+    Info->Value = Value;
+
+  Token *TokenArray = new Token[1];
+  TokenArray[0].startToken();
+  TokenArray[0].setKind(tok::annot_pragma_cuda_unroll);
+  TokenArray[0].setLocation(Unroll.getLocation());
+  TokenArray[0].setAnnotationValue(static_cast<void *>(Info));
+  PP.EnterTokenStream(TokenArray, 1,
+                      /*DisableMacroExpansion=*/false,
+                      /*OwnsTokens=*/true);
+}
Index: lib/Parse/ParseStmt.cpp
===================================================================
--- lib/Parse/ParseStmt.cpp
+++ lib/Parse/ParseStmt.cpp
@@ -355,6 +355,10 @@
   case tok::annot_pragma_loop_hint:
     ProhibitAttributes(Attrs);
     return ParsePragmaLoopHint(Stmts, OnlyStatement, TrailingElseLoc, Attrs);
+
+  case tok::annot_pragma_cuda_unroll:
+    ProhibitAttributes(Attrs);
+    return ParsePragmaCudaUnroll(Stmts, OnlyStatement, TrailingElseLoc, Attrs);
   }
 
   // If we reached this code, the statement must end in a semicolon.
@@ -1828,6 +1832,26 @@
   return S;
 }
 
+StmtResult Parser::ParsePragmaCudaUnroll(StmtVector &Stmts, bool OnlyStatement,
+                                         SourceLocation *TrailingElseLoc,
+                                         ParsedAttributesWithRange &Attrs) {
+  assert(Tok.is(tok::annot_pragma_cuda_unroll));
+
+  CudaUnrollHint Hint = HandlePragmaCudaUnroll();
+  ConsumeToken();
+
+  // Get the next statement.
+  MaybeParseCXX11Attributes(Attrs);
+
+  StmtResult S = ParseStatementOrDeclarationAfterAttributes(
+      Stmts, OnlyStatement, TrailingElseLoc, Attrs);
+
+  ArgsUnion ArgHints[] = {Hint.ValueLoc, ArgsUnion(Hint.ValueExpr)};
+  Attrs.addNew(Hint.UnrollLoc->Ident, Hint.Range, nullptr, Hint.UnrollLoc->Loc,
+               ArgHints, 3, AttributeList::AS_Pragma);
+  return S;
+}
+
 Decl *Parser::ParseFunctionStatementBody(Decl *Decl, ParseScope &BodyScope) {
   assert(Tok.is(tok::l_brace));
   SourceLocation LBraceLoc = Tok.getLocation();
Index: lib/Sema/SemaStmtAttr.cpp
===================================================================
--- lib/Sema/SemaStmtAttr.cpp
+++ lib/Sema/SemaStmtAttr.cpp
@@ -49,7 +49,8 @@
       St->getStmtClass() != Stmt::ForStmtClass &&
       St->getStmtClass() != Stmt::CXXForRangeStmtClass &&
       St->getStmtClass() != Stmt::WhileStmtClass) {
-    S.Diag(St->getLocStart(), diag::err_pragma_loop_precedes_nonloop);
+    S.Diag(St->getLocStart(), diag::err_pragma_loop_precedes_nonloop)
+        << "clang loop";
     return nullptr;
   }
 
@@ -104,8 +105,45 @@
                                       A.getRange());
 }
 
-static void
-CheckForIncompatibleAttributes(Sema &S, SmallVectorImpl<const Attr *> &Attrs) {
+static Attr *handleCudaUnrollHintAttr(Sema &S, Stmt *St, const AttributeList &A,
+                                      SourceRange) {
+  if (St->getStmtClass() != Stmt::DoStmtClass &&
+      St->getStmtClass() != Stmt::ForStmtClass &&
+      St->getStmtClass() != Stmt::CXXForRangeStmtClass &&
+      St->getStmtClass() != Stmt::WhileStmtClass) {
+    S.Diag(St->getLocStart(), diag::err_pragma_loop_precedes_nonloop)
+        << "unroll";
+    return nullptr;
+  }
+
+  int ValueInt = 0;
+  if (IdentifierLoc *ValueLoc = A.getArgAsIdent(0)) {
+    Expr *ValueExpr = A.getArgAsExpr(1);
+    // FIXME: We should support template parameters for the loop hint value.
+    // See bug report #19610.
+    llvm::APSInt ValueAPS;
+    if (!ValueExpr || !ValueExpr->isIntegerConstantExpr(ValueAPS, S.Context) ||
+        (ValueInt = ValueAPS.getSExtValue()) < 1) {
+      S.Diag(ValueLoc->Loc, diag::err_pragma_loop_invalid_value);
+      return nullptr;
+    }
+  }
+  return CudaUnrollHintAttr::CreateImplicit(S.Context, ValueInt, A.getRange());
+}
+
+static void CheckForIncompatibleAttributes(
+    Sema &S, SmallVectorImpl<const Attr *> &Attrs) {
+  bool HasCudaUnroll = false;
+  for (const auto *I : Attrs) {
+    const CudaUnrollHintAttr *CU = dyn_cast<CudaUnrollHintAttr>(I);
+    if (!CU)
+      continue;
+
+    if (HasCudaUnroll)
+      S.Diag(CU->getLocation(), diag::err_duplicate_cuda_unroll_pragma);
+    HasCudaUnroll = true;
+  }
+
   // There are 3 categories of loop hints: vectorize, interleave, and
   // unroll. Each comes in two variants: an enable/disable form and a
   // form which takes a numeric argument. For example:
@@ -136,19 +174,20 @@
     int Option = LH->getOption();
     int ValueInt = LH->getValue();
 
+    enum { Vectorize = 0, Interleave = 1, Unroll = 2 };
     int Category;
     switch (Option) {
     case LoopHintAttr::Vectorize:
     case LoopHintAttr::VectorizeWidth:
-      Category = 0;
+      Category = Vectorize;
       break;
     case LoopHintAttr::Interleave:
     case LoopHintAttr::InterleaveCount:
-      Category = 1;
+      Category = Interleave;
       break;
     case LoopHintAttr::Unroll:
     case LoopHintAttr::UnrollCount:
-      Category = 2;
+      Category = Unroll;
       break;
     };
 
@@ -191,6 +230,12 @@
           << LoopHintAttr::getOptionName(CategoryState.NumericOptionId)
           << CategoryState.Value;
     }
+
+    // "#pragma loop unroll*()" and CUDA "#pragma unroll" directives are
+    // incompatible.
+    if (Category == Unroll && HasCudaUnroll)
+      S.Diag(ValueLoc, diag::err_incompatible_pragma_loop_unroll)
+          << LoopHintAttr::getOptionName(Option);
   }
 }
 
@@ -206,6 +251,8 @@
     return handleFallThroughAttr(S, St, A, Range);
   case AttributeList::AT_LoopHint:
     return handleLoopHintAttr(S, St, A, Range);
+  case AttributeList::AT_CudaUnrollHint:
+    return handleCudaUnrollHintAttr(S, St, A, Range);
   default:
     // if we're here, then we parsed a known attribute, but didn't recognize
     // it as a statement attribute => it is declaration attribute
Index: test/CodeGen/cuda-pragma-unroll.cu
===================================================================
--- test/CodeGen/cuda-pragma-unroll.cu
+++ test/CodeGen/cuda-pragma-unroll.cu
@@ -0,0 +1,59 @@
+// RUN: %clang_cc1 -x cuda -triple x86_64-apple-darwin -std=c++11 -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -x c++ -triple x86_64-apple-darwin -std=c++11 -emit-llvm -o - %s | FileCheck -check-prefix=NOTCUDA %s
+
+// If -x cuda is not specified, then the unroll pragma should not be recognized
+// and no loop unroll metadata should be emitted.
+// NOTCUDA-NOT: llvm.loop.unroll
+
+void while_test(int *List, int Length) {
+  int i = 0;
+#pragma unroll
+  while (i < Length) {
+    // CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_1:.*]]
+    List[i] = i * 2;
+    i++;
+  }
+}
+
+void do_test(int *List, int Length) {
+  int i = 0;
+#pragma unroll
+  do {
+    // CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_2:.*]]
+    List[i] = i * 2;
+    i++;
+  } while (i < Length);
+}
+
+void for_test(int *List, int Length) {
+#pragma unroll
+  for (int i = 0; i < Length; i++) {
+    // CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_3:.*]]
+    List[i] = i * 2;
+  }
+}
+
+void range_test(int *List, int Length) {
+  int VList[Length];
+#pragma unroll
+  for (int j : VList) {
+    // CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_4:.*]]
+    VList[j] = List[j];
+  }
+}
+
+void for_unroll_count_test(int *List, int Length) {
+#pragma unroll 16
+  for (int i = 0; i < Length; i++) {
+    // CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_5:.*]]
+    List[i] = i * 2;
+  }
+}
+
+// CHECK: ![[LOOP_1]] = metadata !{metadata ![[LOOP_1]], metadata ![[UNROLL_ENABLE:.*]]}
+// CHECK: ![[UNROLL_ENABLE]] = metadata !{metadata !"llvm.loop.unroll.enable", i1 true}
+// CHECK: ![[LOOP_2]] = metadata !{metadata ![[LOOP_2]], metadata ![[UNROLL_ENABLE:.*]]}
+// CHECK: ![[LOOP_3]] = metadata !{metadata ![[LOOP_3]], metadata ![[UNROLL_ENABLE:.*]]}
+// CHECK: ![[LOOP_4]] = metadata !{metadata ![[LOOP_4]], metadata ![[UNROLL_ENABLE:.*]]}
+// CHECK: ![[LOOP_5]] = metadata !{metadata ![[LOOP_5]], metadata ![[UNROLL_COUNT_16:.*]]}
+// CHECK: ![[UNROLL_COUNT_16]] = metadata !{metadata !"llvm.loop.unroll.count", i32 16}
Index: test/Misc/ast-print-cuda-pragmas.cu
===================================================================
--- test/Misc/ast-print-cuda-pragmas.cu
+++ test/Misc/ast-print-cuda-pragmas.cu
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 -x cuda -ast-print -o - %s | FileCheck %s
+// RUN: %clang_cc1 -x c++ -ast-print -o - %s | FileCheck -check-prefix=NOTCUDA %s
+
+// If -x cuda is not specified, then the unroll pragma should not be recognized
+// and loop unroll pragmas should be emitted.
+// NOTCUDA-NOT: #pragma unroll
+
+void test(int *List, int Length) {
+  int i = 0;
+#pragma unroll
+// CHECK: #pragma unroll
+// CHECK-NEXT: for (int i = 0; i < Length; i++)
+  for (int i = 0; i < Length; i++) {
+    List[i] = i * 2;
+  }
+
+#pragma unroll 8
+// CHECK: #pragma unroll 8
+// CHECK-NEXT: for (int i = 0; i < Length; i++)
+  for (int i = 0; i < Length; i++) {
+    List[i] = i * 2;
+  }
+}
Index: test/PCH/cuda-pragma-unroll.cu
===================================================================
--- test/PCH/cuda-pragma-unroll.cu
+++ test/PCH/cuda-pragma-unroll.cu
@@ -0,0 +1,30 @@
+// RUN: %clang_cc1 -x cuda -emit-pch -o %t.cuda.a %s
+// RUN: %clang_cc1 -x cuda -include-pch %t.cuda.a -ast-print -o - %s | FileCheck %s
+// RUN: %clang_cc1 -x c++ -emit-pch -o %t.notcuda.a %s
+// RUN: %clang_cc1 -x c++ -include-pch %t.notcuda.a -ast-print -o - %s | FileCheck -check-prefix=NOTCUDA %s
+
+// If -x cuda is not specified, then the unroll pragma should not be recognized
+// and loop unroll pragmas should be emitted.
+// NOTCUDA-NOT: #pragma unroll
+
+// CHECK: #pragma unroll
+// CHECK: #pragma unroll 16
+
+#ifndef HEADER
+#define HEADER
+
+void unroll_test(int *List, int Length) {
+#pragma unroll
+  for (int i = 0; i < Length; i++) {
+    List[i] = i * 2;
+  }
+}
+
+void unroll_count_test(int *List, int Length) {
+#pragma unroll 16
+  for (int i = 0; i < Length; i++) {
+    List[i] = i * 2;
+  }
+}
+
+#endif
Index: test/Parser/cuda-pragma-unroll.cu
===================================================================
--- test/Parser/cuda-pragma-unroll.cu
+++ test/Parser/cuda-pragma-unroll.cu
@@ -0,0 +1,46 @@
+// RUN: %clang_cc1 -x cuda -verify %s
+
+// Note that this puts the expected lines before the directives to work around
+// limitations in the -verify mode.
+
+void test(int *List) {
+
+#pragma unroll
+  for (int i = 0; i < 1024; ++i) {
+    List[i] = i;
+  }
+
+#pragma unroll 16
+  for (int i = 0; i < 1024; ++i) {
+    List[i] = i;
+  }
+
+#pragma unroll
+/* expected-error {{'#pragma unroll' and '#pragma clang loop unroll_count' cannot both be specified}} */ #pragma clang loop unroll_count(4)
+  for (int i = 0; i < 1024; ++i) {
+    List[i] = i;
+  }
+
+#pragma unroll
+/* expected-error {{duplicate '#pragma unroll' directives}} */ #pragma unroll
+  for (int i = 0; i < 1024; ++i) {
+    List[i] = i;
+  }
+
+#pragma unroll 4
+/* expected-error {{duplicate '#pragma unroll' directives}} */ #pragma unroll
+  for (int i = 0; i < 1024; ++i) {
+    List[i] = i;
+  }
+
+/* expected-error {{invalid argument; expected a positive integer value}} */ #pragma unroll 0
+/* expected-error {{invalid argument; expected a positive integer value}} */ #pragma unroll garbage
+/* expected-warning {{extra tokens at end of '#pragma unroll'}} */ #pragma unroll 1 2
+/* expected-error {{invalid argument; expected a positive integer value}} */ #pragma unroll +
+  for (int i = 0; i < 1024; ++i) {
+    List[i] = i;
+  }
+
+#pragma unroll
+/* expected-error {{expected a for, while, or do-while loop to follow the '#pragma unroll' directive}} */ List[0] = List[1];
+}

_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits

Re: [PATCH] Add support for CUDA unroll pragma

Reply via email to