Added a test with invalid code.

Hi djasper,

http://llvm-reviews.chandlerc.com/D1858

CHANGE SINCE LAST DIFF
  http://llvm-reviews.chandlerc.com/D1858?vs=4735&id=4736#toc

Files:
  lib/Format/Format.cpp
  lib/Format/TokenAnnotator.cpp
  unittests/Format/FormatTest.cpp
Index: lib/Format/Format.cpp
===================================================================
--- lib/Format/Format.cpp
+++ lib/Format/Format.cpp
@@ -19,6 +19,7 @@
 #include "TokenAnnotator.h"
 #include "UnwrappedLineParser.h"
 #include "WhitespaceManager.h"
+#include "clang/Basic/CharInfo.h"
 #include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Format/Format.h"
@@ -651,6 +652,10 @@
     Tokens.back() = String;
   }
 
+  static bool isWhitespaceToken(const FormatToken &Tok) {
+    return Tok.is(tok::unknown) && Tok.Type != TT_ImplicitStringLiteral;
+  }
+
   FormatToken *getNextToken() {
     if (GreaterStashed) {
       // Create a synthesized second '>' token.
@@ -677,7 +682,7 @@
 
     // Consume and record whitespace until we find a significant token.
     unsigned WhitespaceLength = TrailingWhitespace;
-    while (FormatTok->Tok.is(tok::unknown)) {
+    while (isWhitespaceToken(*FormatTok)) {
       for (int i = 0, e = FormatTok->TokenText.size(); i != e; ++i) {
         switch (FormatTok->TokenText[i]) {
         case '\n':
@@ -785,12 +790,20 @@
     Lex.LexFromRawLexer(Tok.Tok);
     Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()),
                               Tok.Tok.getLength());
-    // For formatting, treat unterminated string literals like normal string
-    // literals.
-    if (Tok.is(tok::unknown) && !Tok.TokenText.empty() &&
-        Tok.TokenText[0] == '"') {
-      Tok.Tok.setKind(tok::string_literal);
-      Tok.IsUnterminatedLiteral = true;
+
+    if (Tok.is(tok::unknown)) {
+      // For formatting, treat unterminated string literals like normal string
+      // literals.
+      if (!Tok.TokenText.empty() && Tok.TokenText[0] == '"') {
+        Tok.Tok.setKind(tok::string_literal);
+        Tok.IsUnterminatedLiteral = true;
+      } else {
+        // Preserve non-whitespace unknown tokens.
+        for (unsigned i = 0, e = Tok.TokenText.size(); i < e; ++i)
+          if (isWhitespace(Tok.TokenText[i]))
+            return;
+        Tok.Type = TT_ImplicitStringLiteral;
+      }
     }
   }
 };
Index: lib/Format/TokenAnnotator.cpp
===================================================================
--- lib/Format/TokenAnnotator.cpp
+++ lib/Format/TokenAnnotator.cpp
@@ -523,7 +523,8 @@
 
     // Reset token type in case we have already looked at it and then recovered
     // from an error (e.g. failure to find the matching >).
-    if (CurrentToken != NULL && CurrentToken->Type != TT_LambdaLSquare)
+    if (CurrentToken != NULL && CurrentToken->Type != TT_LambdaLSquare &&
+        CurrentToken->Type != TT_ImplicitStringLiteral)
       CurrentToken->Type = TT_Unknown;
   }
 
@@ -569,6 +570,11 @@
   };
 
   void determineTokenType(FormatToken &Current) {
+    if (Current.Previous && Current.Previous->Tok.is(tok::unknown))
+      Current.Type = TT_ImplicitStringLiteral;
+    if (Current.Type != TT_Unknown)
+      return;
+
     if (Current.getPrecedence() == prec::Assignment &&
         !Line.First->isOneOf(tok::kw_template, tok::kw_using) &&
         (!Current.Previous || Current.Previous->isNot(tok::kw_operator))) {
@@ -605,91 +611,89 @@
       Contexts.back().IsExpression = true;
     }
 
-    if (Current.Type == TT_Unknown) {
-      // Line.MightBeFunctionDecl can only be true after the parentheses of a
-      // function declaration have been found. In this case, 'Current' is a
-      // trailing token of this declaration and thus cannot be a name.
-      if (isStartOfName(Current) && !Line.MightBeFunctionDecl) {
-        Contexts.back().FirstStartOfName = &Current;
-        Current.Type = TT_StartOfName;
-        NameFound = true;
-      } else if (Current.is(tok::kw_auto)) {
-        AutoFound = true;
-      } else if (Current.is(tok::arrow) && AutoFound &&
-                 Line.MustBeDeclaration) {
-        Current.Type = TT_TrailingReturnArrow;
-      } else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) {
-        Current.Type =
-            determineStarAmpUsage(Current, Contexts.back().CanBeExpression &&
-                                               Contexts.back().IsExpression);
-      } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret)) {
-        Current.Type = determinePlusMinusCaretUsage(Current);
-      } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) {
-        Current.Type = determineIncrementUsage(Current);
-      } else if (Current.is(tok::exclaim)) {
-        Current.Type = TT_UnaryOperator;
-      } else if (Current.isBinaryOperator() &&
-                 (!Current.Previous ||
-                  Current.Previous->isNot(tok::l_square))) {
-        Current.Type = TT_BinaryOperator;
-      } else if (Current.is(tok::comment)) {
-        if (Current.TokenText.startswith("//"))
-          Current.Type = TT_LineComment;
-        else
-          Current.Type = TT_BlockComment;
-      } else if (Current.is(tok::r_paren)) {
-        FormatToken *LeftOfParens = NULL;
-        if (Current.MatchingParen)
-          LeftOfParens = Current.MatchingParen->getPreviousNonComment();
-        bool IsCast = false;
-        bool ParensAreEmpty = Current.Previous == Current.MatchingParen;
-        bool ParensAreType = !Current.Previous ||
-                             Current.Previous->Type == TT_PointerOrReference ||
-                             Current.Previous->Type == TT_TemplateCloser ||
-                             isSimpleTypeSpecifier(*Current.Previous);
-        bool ParensCouldEndDecl =
-            Current.Next &&
-            Current.Next->isOneOf(tok::equal, tok::semi, tok::l_brace);
-        bool IsSizeOfOrAlignOf =
-            LeftOfParens &&
-            LeftOfParens->isOneOf(tok::kw_sizeof, tok::kw_alignof);
-        if (ParensAreType && !ParensCouldEndDecl && !IsSizeOfOrAlignOf &&
-            (Contexts.back().IsExpression ||
-             (Current.Next && Current.Next->isBinaryOperator())))
-          IsCast = true;
-        if (Current.Next && Current.Next->isNot(tok::string_literal) &&
-            (Current.Next->Tok.isLiteral() ||
-             Current.Next->isOneOf(tok::kw_sizeof, tok::kw_alignof)))
-          IsCast = true;
-        // If there is an identifier after the (), it is likely a cast, unless
-        // there is also an identifier before the ().
-        if (LeftOfParens && (LeftOfParens->Tok.getIdentifierInfo() == NULL ||
-                             LeftOfParens->is(tok::kw_return)) &&
-            LeftOfParens->Type != TT_OverloadedOperator &&
-            LeftOfParens->Type != TT_TemplateCloser && Current.Next &&
-            Current.Next->is(tok::identifier))
-          IsCast = true;
-        if (IsCast && !ParensAreEmpty)
-          Current.Type = TT_CastRParen;
-      } else if (Current.is(tok::at) && Current.Next) {
-        switch (Current.Next->Tok.getObjCKeywordID()) {
-        case tok::objc_interface:
-        case tok::objc_implementation:
-        case tok::objc_protocol:
-          Current.Type = TT_ObjCDecl;
-          break;
-        case tok::objc_property:
-          Current.Type = TT_ObjCProperty;
-          break;
-        default:
-          break;
-        }
-      } else if (Current.is(tok::period)) {
-        FormatToken *PreviousNoComment = Current.getPreviousNonComment();
-        if (PreviousNoComment &&
-            PreviousNoComment->isOneOf(tok::comma, tok::l_brace))
-          Current.Type = TT_DesignatedInitializerPeriod;
+    // Line.MightBeFunctionDecl can only be true after the parentheses of a
+    // function declaration have been found. In this case, 'Current' is a
+    // trailing token of this declaration and thus cannot be a name.
+    if (isStartOfName(Current) && !Line.MightBeFunctionDecl) {
+      Contexts.back().FirstStartOfName = &Current;
+      Current.Type = TT_StartOfName;
+      NameFound = true;
+    } else if (Current.is(tok::kw_auto)) {
+      AutoFound = true;
+    } else if (Current.is(tok::arrow) && AutoFound &&
+               Line.MustBeDeclaration) {
+      Current.Type = TT_TrailingReturnArrow;
+    } else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) {
+      Current.Type =
+          determineStarAmpUsage(Current, Contexts.back().CanBeExpression &&
+                                             Contexts.back().IsExpression);
+    } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret)) {
+      Current.Type = determinePlusMinusCaretUsage(Current);
+    } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) {
+      Current.Type = determineIncrementUsage(Current);
+    } else if (Current.is(tok::exclaim)) {
+      Current.Type = TT_UnaryOperator;
+    } else if (Current.isBinaryOperator() &&
+               (!Current.Previous ||
+                Current.Previous->isNot(tok::l_square))) {
+      Current.Type = TT_BinaryOperator;
+    } else if (Current.is(tok::comment)) {
+      if (Current.TokenText.startswith("//"))
+        Current.Type = TT_LineComment;
+      else
+        Current.Type = TT_BlockComment;
+    } else if (Current.is(tok::r_paren)) {
+      FormatToken *LeftOfParens = NULL;
+      if (Current.MatchingParen)
+        LeftOfParens = Current.MatchingParen->getPreviousNonComment();
+      bool IsCast = false;
+      bool ParensAreEmpty = Current.Previous == Current.MatchingParen;
+      bool ParensAreType = !Current.Previous ||
+                           Current.Previous->Type == TT_PointerOrReference ||
+                           Current.Previous->Type == TT_TemplateCloser ||
+                           isSimpleTypeSpecifier(*Current.Previous);
+      bool ParensCouldEndDecl =
+          Current.Next &&
+          Current.Next->isOneOf(tok::equal, tok::semi, tok::l_brace);
+      bool IsSizeOfOrAlignOf =
+          LeftOfParens &&
+          LeftOfParens->isOneOf(tok::kw_sizeof, tok::kw_alignof);
+      if (ParensAreType && !ParensCouldEndDecl && !IsSizeOfOrAlignOf &&
+          (Contexts.back().IsExpression ||
+           (Current.Next && Current.Next->isBinaryOperator())))
+        IsCast = true;
+      if (Current.Next && Current.Next->isNot(tok::string_literal) &&
+          (Current.Next->Tok.isLiteral() ||
+           Current.Next->isOneOf(tok::kw_sizeof, tok::kw_alignof)))
+        IsCast = true;
+      // If there is an identifier after the (), it is likely a cast, unless
+      // there is also an identifier before the ().
+      if (LeftOfParens && (LeftOfParens->Tok.getIdentifierInfo() == NULL ||
+                           LeftOfParens->is(tok::kw_return)) &&
+          LeftOfParens->Type != TT_OverloadedOperator &&
+          LeftOfParens->Type != TT_TemplateCloser && Current.Next &&
+          Current.Next->is(tok::identifier))
+        IsCast = true;
+      if (IsCast && !ParensAreEmpty)
+        Current.Type = TT_CastRParen;
+    } else if (Current.is(tok::at) && Current.Next) {
+      switch (Current.Next->Tok.getObjCKeywordID()) {
+      case tok::objc_interface:
+      case tok::objc_implementation:
+      case tok::objc_protocol:
+        Current.Type = TT_ObjCDecl;
+        break;
+      case tok::objc_property:
+        Current.Type = TT_ObjCProperty;
+        break;
+      default:
+        break;
       }
+    } else if (Current.is(tok::period)) {
+      FormatToken *PreviousNoComment = Current.getPreviousNonComment();
+      if (PreviousNoComment &&
+          PreviousNoComment->isOneOf(tok::comma, tok::l_brace))
+        Current.Type = TT_DesignatedInitializerPeriod;
     }
   }
 
Index: unittests/Format/FormatTest.cpp
===================================================================
--- unittests/Format/FormatTest.cpp
+++ unittests/Format/FormatTest.cpp
@@ -1881,6 +1881,14 @@
   EXPECT_EQ("#define A B", format("#  \\\n define  \\\n    A  \\\n       B"));
 }
 
+TEST_F(FormatTest, DoesntRemoveUnknownTokens) {
+  verifyFormat("#define A \\x20");
+  verifyFormat("#define A ''");
+  verifyFormat("#define A ''qqq");
+  verifyFormat("#define A `qqq");
+  verifyFormat("f(\"aaaa, bbbb, \"\\\"ccccc\\\"\");");
+}
+
 TEST_F(FormatTest, IndentsPPDirectiveInReducedSpace) {
   verifyFormat("#define A(BB)", getLLVMStyleWithColumns(13));
   verifyFormat("#define A( \\\n    BB)", getLLVMStyleWithColumns(12));
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits

Reply via email to