Hi djasper,
Clang-format currently considers all unknown tokens whitespace. It
turns out, that unknown tokens occur in valid code inside macro definitions or
parameters. This patch tries to distinguish between them and whitespace.
http://llvm-reviews.chandlerc.com/D1858
Files:
lib/Format/Format.cpp
lib/Format/TokenAnnotator.cpp
unittests/Format/FormatTest.cpp
Index: lib/Format/Format.cpp
===================================================================
--- lib/Format/Format.cpp
+++ lib/Format/Format.cpp
@@ -19,6 +19,7 @@
#include "TokenAnnotator.h"
#include "UnwrappedLineParser.h"
#include "WhitespaceManager.h"
+#include "clang/Basic/CharInfo.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Format/Format.h"
@@ -651,6 +652,10 @@
Tokens.back() = String;
}
+ static bool isWhitespaceToken(const FormatToken &Tok) {
+ return Tok.is(tok::unknown) && Tok.Type != TT_ImplicitStringLiteral;
+ }
+
FormatToken *getNextToken() {
if (GreaterStashed) {
// Create a synthesized second '>' token.
@@ -677,7 +682,7 @@
// Consume and record whitespace until we find a significant token.
unsigned WhitespaceLength = TrailingWhitespace;
- while (FormatTok->Tok.is(tok::unknown)) {
+ while (isWhitespaceToken(*FormatTok)) {
for (int i = 0, e = FormatTok->TokenText.size(); i != e; ++i) {
switch (FormatTok->TokenText[i]) {
case '\n':
@@ -792,6 +797,12 @@
Tok.Tok.setKind(tok::string_literal);
Tok.IsUnterminatedLiteral = true;
}
+ if (Tok.is(tok::unknown)) {
+ for (unsigned i = 0, e = Tok.TokenText.size(); i < e; ++i)
+ if (isWhitespace(Tok.TokenText[i]))
+ return;
+ Tok.Type = TT_ImplicitStringLiteral;
+ }
}
};
Index: lib/Format/TokenAnnotator.cpp
===================================================================
--- lib/Format/TokenAnnotator.cpp
+++ lib/Format/TokenAnnotator.cpp
@@ -523,7 +523,8 @@
// Reset token type in case we have already looked at it and then recovered
// from an error (e.g. failure to find the matching >).
- if (CurrentToken != NULL && CurrentToken->Type != TT_LambdaLSquare)
+ if (CurrentToken != NULL && CurrentToken->Type != TT_LambdaLSquare &&
+ CurrentToken->Type != TT_ImplicitStringLiteral)
CurrentToken->Type = TT_Unknown;
}
@@ -569,6 +570,11 @@
};
void determineTokenType(FormatToken &Current) {
+ if (Current.Previous && Current.Previous->Tok.is(tok::unknown))
+ Current.Type = TT_ImplicitStringLiteral;
+ if (Current.Type != TT_Unknown)
+ return;
+
if (Current.getPrecedence() == prec::Assignment &&
!Line.First->isOneOf(tok::kw_template, tok::kw_using) &&
(!Current.Previous || Current.Previous->isNot(tok::kw_operator))) {
@@ -605,91 +611,89 @@
Contexts.back().IsExpression = true;
}
- if (Current.Type == TT_Unknown) {
- // Line.MightBeFunctionDecl can only be true after the parentheses of a
- // function declaration have been found. In this case, 'Current' is a
- // trailing token of this declaration and thus cannot be a name.
- if (isStartOfName(Current) && !Line.MightBeFunctionDecl) {
- Contexts.back().FirstStartOfName = &Current;
- Current.Type = TT_StartOfName;
- NameFound = true;
- } else if (Current.is(tok::kw_auto)) {
- AutoFound = true;
- } else if (Current.is(tok::arrow) && AutoFound &&
- Line.MustBeDeclaration) {
- Current.Type = TT_TrailingReturnArrow;
- } else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) {
- Current.Type =
- determineStarAmpUsage(Current, Contexts.back().CanBeExpression &&
- Contexts.back().IsExpression);
- } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret)) {
- Current.Type = determinePlusMinusCaretUsage(Current);
- } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) {
- Current.Type = determineIncrementUsage(Current);
- } else if (Current.is(tok::exclaim)) {
- Current.Type = TT_UnaryOperator;
- } else if (Current.isBinaryOperator() &&
- (!Current.Previous ||
- Current.Previous->isNot(tok::l_square))) {
- Current.Type = TT_BinaryOperator;
- } else if (Current.is(tok::comment)) {
- if (Current.TokenText.startswith("//"))
- Current.Type = TT_LineComment;
- else
- Current.Type = TT_BlockComment;
- } else if (Current.is(tok::r_paren)) {
- FormatToken *LeftOfParens = NULL;
- if (Current.MatchingParen)
- LeftOfParens = Current.MatchingParen->getPreviousNonComment();
- bool IsCast = false;
- bool ParensAreEmpty = Current.Previous == Current.MatchingParen;
- bool ParensAreType = !Current.Previous ||
- Current.Previous->Type == TT_PointerOrReference ||
- Current.Previous->Type == TT_TemplateCloser ||
- isSimpleTypeSpecifier(*Current.Previous);
- bool ParensCouldEndDecl =
- Current.Next &&
- Current.Next->isOneOf(tok::equal, tok::semi, tok::l_brace);
- bool IsSizeOfOrAlignOf =
- LeftOfParens &&
- LeftOfParens->isOneOf(tok::kw_sizeof, tok::kw_alignof);
- if (ParensAreType && !ParensCouldEndDecl && !IsSizeOfOrAlignOf &&
- (Contexts.back().IsExpression ||
- (Current.Next && Current.Next->isBinaryOperator())))
- IsCast = true;
- if (Current.Next && Current.Next->isNot(tok::string_literal) &&
- (Current.Next->Tok.isLiteral() ||
- Current.Next->isOneOf(tok::kw_sizeof, tok::kw_alignof)))
- IsCast = true;
- // If there is an identifier after the (), it is likely a cast, unless
- // there is also an identifier before the ().
- if (LeftOfParens && (LeftOfParens->Tok.getIdentifierInfo() == NULL ||
- LeftOfParens->is(tok::kw_return)) &&
- LeftOfParens->Type != TT_OverloadedOperator &&
- LeftOfParens->Type != TT_TemplateCloser && Current.Next &&
- Current.Next->is(tok::identifier))
- IsCast = true;
- if (IsCast && !ParensAreEmpty)
- Current.Type = TT_CastRParen;
- } else if (Current.is(tok::at) && Current.Next) {
- switch (Current.Next->Tok.getObjCKeywordID()) {
- case tok::objc_interface:
- case tok::objc_implementation:
- case tok::objc_protocol:
- Current.Type = TT_ObjCDecl;
- break;
- case tok::objc_property:
- Current.Type = TT_ObjCProperty;
- break;
- default:
- break;
- }
- } else if (Current.is(tok::period)) {
- FormatToken *PreviousNoComment = Current.getPreviousNonComment();
- if (PreviousNoComment &&
- PreviousNoComment->isOneOf(tok::comma, tok::l_brace))
- Current.Type = TT_DesignatedInitializerPeriod;
+ // Line.MightBeFunctionDecl can only be true after the parentheses of a
+ // function declaration have been found. In this case, 'Current' is a
+ // trailing token of this declaration and thus cannot be a name.
+ if (isStartOfName(Current) && !Line.MightBeFunctionDecl) {
+ Contexts.back().FirstStartOfName = &Current;
+ Current.Type = TT_StartOfName;
+ NameFound = true;
+ } else if (Current.is(tok::kw_auto)) {
+ AutoFound = true;
+ } else if (Current.is(tok::arrow) && AutoFound &&
+ Line.MustBeDeclaration) {
+ Current.Type = TT_TrailingReturnArrow;
+ } else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) {
+ Current.Type =
+ determineStarAmpUsage(Current, Contexts.back().CanBeExpression &&
+ Contexts.back().IsExpression);
+ } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret)) {
+ Current.Type = determinePlusMinusCaretUsage(Current);
+ } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) {
+ Current.Type = determineIncrementUsage(Current);
+ } else if (Current.is(tok::exclaim)) {
+ Current.Type = TT_UnaryOperator;
+ } else if (Current.isBinaryOperator() &&
+ (!Current.Previous ||
+ Current.Previous->isNot(tok::l_square))) {
+ Current.Type = TT_BinaryOperator;
+ } else if (Current.is(tok::comment)) {
+ if (Current.TokenText.startswith("//"))
+ Current.Type = TT_LineComment;
+ else
+ Current.Type = TT_BlockComment;
+ } else if (Current.is(tok::r_paren)) {
+ FormatToken *LeftOfParens = NULL;
+ if (Current.MatchingParen)
+ LeftOfParens = Current.MatchingParen->getPreviousNonComment();
+ bool IsCast = false;
+ bool ParensAreEmpty = Current.Previous == Current.MatchingParen;
+ bool ParensAreType = !Current.Previous ||
+ Current.Previous->Type == TT_PointerOrReference ||
+ Current.Previous->Type == TT_TemplateCloser ||
+ isSimpleTypeSpecifier(*Current.Previous);
+ bool ParensCouldEndDecl =
+ Current.Next &&
+ Current.Next->isOneOf(tok::equal, tok::semi, tok::l_brace);
+ bool IsSizeOfOrAlignOf =
+ LeftOfParens &&
+ LeftOfParens->isOneOf(tok::kw_sizeof, tok::kw_alignof);
+ if (ParensAreType && !ParensCouldEndDecl && !IsSizeOfOrAlignOf &&
+ (Contexts.back().IsExpression ||
+ (Current.Next && Current.Next->isBinaryOperator())))
+ IsCast = true;
+ if (Current.Next && Current.Next->isNot(tok::string_literal) &&
+ (Current.Next->Tok.isLiteral() ||
+ Current.Next->isOneOf(tok::kw_sizeof, tok::kw_alignof)))
+ IsCast = true;
+ // If there is an identifier after the (), it is likely a cast, unless
+ // there is also an identifier before the ().
+ if (LeftOfParens && (LeftOfParens->Tok.getIdentifierInfo() == NULL ||
+ LeftOfParens->is(tok::kw_return)) &&
+ LeftOfParens->Type != TT_OverloadedOperator &&
+ LeftOfParens->Type != TT_TemplateCloser && Current.Next &&
+ Current.Next->is(tok::identifier))
+ IsCast = true;
+ if (IsCast && !ParensAreEmpty)
+ Current.Type = TT_CastRParen;
+ } else if (Current.is(tok::at) && Current.Next) {
+ switch (Current.Next->Tok.getObjCKeywordID()) {
+ case tok::objc_interface:
+ case tok::objc_implementation:
+ case tok::objc_protocol:
+ Current.Type = TT_ObjCDecl;
+ break;
+ case tok::objc_property:
+ Current.Type = TT_ObjCProperty;
+ break;
+ default:
+ break;
}
+ } else if (Current.is(tok::period)) {
+ FormatToken *PreviousNoComment = Current.getPreviousNonComment();
+ if (PreviousNoComment &&
+ PreviousNoComment->isOneOf(tok::comma, tok::l_brace))
+ Current.Type = TT_DesignatedInitializerPeriod;
}
}
Index: unittests/Format/FormatTest.cpp
===================================================================
--- unittests/Format/FormatTest.cpp
+++ unittests/Format/FormatTest.cpp
@@ -1881,6 +1881,13 @@
EXPECT_EQ("#define A B", format("# \\\n define \\\n A \\\n B"));
}
+TEST_F(FormatTest, DoesntRemoveUnknownTokens) {
+ verifyFormat("#define A \\x20");
+ verifyFormat("#define A ''");
+ verifyFormat("#define A ''qqq");
+ verifyFormat("#define A `qqq");
+}
+
TEST_F(FormatTest, IndentsPPDirectiveInReducedSpace) {
verifyFormat("#define A(BB)", getLLVMStyleWithColumns(13));
verifyFormat("#define A( \\\n BB)", getLLVMStyleWithColumns(12));
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits