Hi klimek,
With this patch, braced lists (with more than 3 elements are formatted in a
column layout if possible). E.g.:
static const uint16_t CallerSavedRegs64Bit[] = {
X86::RAX, X86::RDX, X86::RCX, X86::RSI, X86::RDI,
X86::R8, X86::R9, X86::R10, X86::R11, 0
};
Required other changes:
- FormatTokens can now have a special role that contains extra data and can do
special formattings. A comma separated list is currently the only
implementation.
- Move penalty calculation entirely into ContinuationIndenter (there was a last
piece still in UnwrappedLineFormatter).
http://llvm-reviews.chandlerc.com/D1457
Files:
lib/Format/CMakeLists.txt
lib/Format/ContinuationIndenter.cpp
lib/Format/ContinuationIndenter.h
lib/Format/Format.cpp
lib/Format/FormatToken.cpp
lib/Format/FormatToken.h
lib/Format/TokenAnnotator.cpp
unittests/Format/FormatTest.cpp
Index: lib/Format/CMakeLists.txt
===================================================================
--- lib/Format/CMakeLists.txt
+++ lib/Format/CMakeLists.txt
@@ -4,6 +4,7 @@
BreakableToken.cpp
ContinuationIndenter.cpp
Format.cpp
+ FormatToken.cpp
TokenAnnotator.cpp
UnwrappedLineParser.cpp
WhitespaceManager.cpp
Index: lib/Format/ContinuationIndenter.cpp
===================================================================
--- lib/Format/ContinuationIndenter.cpp
+++ lib/Format/ContinuationIndenter.cpp
@@ -176,13 +176,14 @@
}
unsigned ContinuationIndenter::addTokenToState(LineState &State, bool Newline,
- bool DryRun) {
+ bool DryRun,
+ unsigned ExtraSpaces) {
const FormatToken &Current = *State.NextToken;
const FormatToken &Previous = *State.NextToken->Previous;
// Extra penalty that needs to be added because of the way certain line
// breaks are chosen.
- unsigned ExtraPenalty = 0;
+ unsigned Penalty = 0;
if (State.Stack.size() == 0 || Current.Type == TT_ImplicitStringLiteral) {
// FIXME: Is this correct?
@@ -199,13 +200,20 @@
unsigned ContinuationIndent =
std::max(State.Stack.back().LastSpace, State.Stack.back().Indent) + 4;
if (Newline) {
+ // The first line break on any ParenLevel causes an extra penalty in order
+ // prefer similar line breaks.
+ if (!State.Stack.back().ContainsLineBreak)
+ Penalty += 15;
+ State.Stack.back().ContainsLineBreak = true;
+
+ Penalty += State.NextToken->SplitPenalty;
+
// Breaking before the first "<<" is generally not desirable if the LHS is
// short.
if (Current.is(tok::lessless) && State.Stack.back().FirstLessLess == 0 &&
State.Column <= Style.ColumnLimit / 2)
- ExtraPenalty += Style.PenaltyBreakFirstLessLess;
+ Penalty += Style.PenaltyBreakFirstLessLess;
- State.Stack.back().ContainsLineBreak = true;
if (Current.is(tok::r_brace)) {
if (Current.BlockKind == BK_BracedInit)
State.Column = State.Stack[State.Stack.size() - 2].LastSpace;
@@ -333,7 +341,7 @@
State.Stack.back().LastSpace = State.Stack.back().VariablePos;
}
- unsigned Spaces = State.NextToken->SpacesRequiredBefore;
+ unsigned Spaces = State.NextToken->SpacesRequiredBefore + ExtraSpaces;
if (!DryRun)
Whitespaces.replaceWhitespace(Current, 0, Spaces, State.Column + Spaces);
@@ -395,7 +403,7 @@
}
}
- return moveStateToNextToken(State, DryRun, Newline) + ExtraPenalty;
+ return moveStateToNextToken(State, DryRun, Newline) + Penalty;
}
unsigned ContinuationIndenter::moveStateToNextToken(LineState &State,
@@ -542,11 +550,20 @@
State.NextToken = State.NextToken->Next;
- if (!Newline && Style.AlwaysBreakBeforeMultilineStrings &&
- Current.is(tok::string_literal) && Current.CanBreakBefore)
- return 0;
+ unsigned Penalty = 0;
+ if (Newline || !Style.AlwaysBreakBeforeMultilineStrings ||
+ Current.isNot(tok::string_literal) || !Current.CanBreakBefore)
+ Penalty += breakProtrudingToken(Current, State, DryRun);
+
+ // If the previous has a special role, let it consume tokens as appropriate.
+ // It is necessary to start at the previous token for the only implemented
+ // role (comma separated list). That way, the decision whether or not to break
+ // after the "{" is already done and both options are tried and evaluated.
+ // FIXME: This is ugly, find a better way.
+ if (Previous && Previous->Role)
+ Penalty += Previous->Role->format(State, this, DryRun);
- return breakProtrudingToken(Current, State, DryRun);
+ return Penalty;
}
unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
Index: lib/Format/ContinuationIndenter.h
===================================================================
--- lib/Format/ContinuationIndenter.h
+++ lib/Format/ContinuationIndenter.h
@@ -60,7 +60,8 @@
///
/// If \p DryRun is \c false, also creates and stores the required
/// \c Replacement.
- unsigned addTokenToState(LineState &State, bool Newline, bool DryRun);
+ unsigned addTokenToState(LineState &State, bool Newline, bool DryRun,
+ unsigned ExtraSpaces = 0);
/// \brief Get the column limit for this line. This is the style's column
/// limit, potentially reduced for preprocessor definitions.
Index: lib/Format/Format.cpp
===================================================================
--- lib/Format/Format.cpp
+++ lib/Format/Format.cpp
@@ -438,14 +438,14 @@
}
for (std::deque<StateNode *>::iterator I = Path.begin(), E = Path.end();
I != E; ++I) {
+ unsigned Penalty = Indenter->addTokenToState(State, (*I)->NewLine, false);
DEBUG({
if ((*I)->NewLine) {
- llvm::dbgs() << "Penalty for splitting before "
+ llvm::dbgs() << "Penalty for placing "
<< (*I)->Previous->State.NextToken->Tok.getName() << ": "
- << (*I)->Previous->State.NextToken->SplitPenalty << "\n";
+ << Penalty << "\n";
}
});
- Indenter->addTokenToState(State, (*I)->NewLine, false);
}
}
@@ -459,11 +459,6 @@
return;
if (!NewLine && Indenter->mustBreak(PreviousNode->State))
return;
- if (NewLine) {
- if (!PreviousNode->State.Stack.back().ContainsLineBreak)
- Penalty += 15;
- Penalty += PreviousNode->State.NextToken->SplitPenalty;
- }
StateNode *Node = new (Allocator.Allocate())
StateNode(PreviousNode->State, NewLine, PreviousNode);
Index: lib/Format/FormatToken.cpp
===================================================================
--- /dev/null
+++ lib/Format/FormatToken.cpp
@@ -0,0 +1,152 @@
+//===--- FormatToken.cpp - Format C++ code --------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements specific functions of \c FormatTokens and their
+/// roles.
+///
+//===----------------------------------------------------------------------===//
+
+#include "FormatToken.h"
+#include "ContinuationIndenter.h"
+#include "clang/Format/Format.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Debug.h"
+
+namespace clang {
+namespace format {
+
+TokenRole::~TokenRole() {}
+
+void TokenRole::precomputeFormattingInfos(const FormatToken *Token) {}
+
+unsigned CommaSeparatedList::format(LineState &State,
+ ContinuationIndenter *Indenter,
+ bool DryRun) {
+ if (!State.NextToken->Previous || !State.NextToken->Previous->Previous ||
+ Commas.size() <= 2)
+ return 0;
+ const FormatToken *Token = State.NextToken->Previous->Previous;
+ if (Token->isNot(tok::l_brace) ||
+ Token->Next->Type == TT_DesignatedInitializerPeriod)
+ return 0;
+ unsigned RemainingColumns = Style.ColumnLimit - State.Stack.back().Indent;
+ const ColumnFormat *Format = getColumnFormat(RemainingColumns);
+ if (!Format)
+ return 0;
+ unsigned Penalty = 0;
+ unsigned Column = 0;
+ unsigned Item = 0;
+ while (State.NextToken != Token->MatchingParen) {
+ bool NewLine = false;
+ unsigned Spaces = State.NextToken->SpacesRequiredBefore;
+ if (Item < Commas.size() && State.NextToken->Previous == Commas[Item]) {
+ if (!State.NextToken->isTrailingComment()) {
+ Spaces += Format->ColumnSizes[Column] - ItemLengths[Item];
+ ++Column;
+ }
+ ++Item;
+ if (Column == Format->Columns) {
+ Column = 0;
+ NewLine = true;
+ }
+ }
+ if (State.NextToken->MustBreakBefore) {
+ Column = 0;
+ NewLine = true;
+ }
+ if (NewLine)
+ Penalty += State.NextToken->SplitPenalty;
+ Spaces -= State.NextToken->SpacesRequiredBefore;
+ Penalty += Indenter->addTokenToState(State, NewLine, DryRun, Spaces);
+ }
+ return Penalty;
+}
+
+void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) {
+ if (!Token->MatchingParen)
+ return;
+ FormatToken *ItemBegin = Token->Next;
+ SmallVector<bool, 6> MustBreakBeforeItem;
+ SmallVector<unsigned, 6> ItemLengthsInLastColumn;
+ for (unsigned i = 0, e = Commas.size() + 1; i != e; ++i) {
+ // Skip comments on their own line.
+ while (ItemBegin->HasUnescapedNewline && ItemBegin->isTrailingComment())
+ ItemBegin = ItemBegin->Next;
+
+ MustBreakBeforeItem.push_back(ItemBegin->MustBreakBefore);
+ const FormatToken *ItemEnd =
+ i == Commas.size() ? Token->MatchingParen->getPreviousNonComment()
+ : Commas[i];
+ ItemLengths.push_back(ItemEnd->TotalLength - ItemBegin->TotalLength +
+ ItemBegin->CodePointCount);
+ if (ItemEnd->Next && !ItemEnd->Next->HasUnescapedNewline &&
+ ItemEnd->Next->isTrailingComment())
+ ItemEnd = ItemEnd->Next;
+ ItemLengthsInLastColumn.push_back(ItemEnd->TotalLength -
+ ItemBegin->TotalLength +
+ ItemBegin->CodePointCount);
+ ItemBegin = ItemEnd->Next;
+ }
+ // We can never place more than ColumnLimit / 3 items in a row (because of the
+ // spaces and the comma).
+ for (unsigned Columns = 1; Columns <= Style.ColumnLimit / 3; ++Columns) {
+ ColumnFormat Format;
+ Format.Columns = Columns;
+ Format.ColumnSizes.resize(Columns);
+ Format.TotalLength = 0;
+ bool HasRowWithSufficientColumns = false;
+ for (unsigned i = 0, e = ItemLengths.size(), Column = 0; i != e; ++i) {
+ if (MustBreakBeforeItem[i] || Column == Columns) {
+ ++Format.TotalLength;
+ Column = 0;
+ }
+ if (Column == Columns - 1)
+ HasRowWithSufficientColumns = true;
+ unsigned length =
+ (Column == Columns - 1) ? ItemLengthsInLastColumn[i] : ItemLengths[i];
+ Format.ColumnSizes[Column] =
+ std::max(Format.ColumnSizes[Column], length);
+ ++Column;
+ }
+ // If all rows are terminated early (e.g. by trailing comments), we don't
+ // need to look further.
+ if (!HasRowWithSufficientColumns)
+ break;
+ Format.TotalWidth = Columns - 1; // Width of the N-1 spaces.
+ for (unsigned i = 0; i < Columns; ++i) {
+ Format.TotalWidth += Format.ColumnSizes[i];
+ }
+
+ // Ignore layouts that are bound to violate the column limit.
+ if (Format.TotalWidth > Style.ColumnLimit)
+ continue;
+
+ Formats.push_back(Format);
+ }
+}
+
+const CommaSeparatedList::ColumnFormat *
+CommaSeparatedList::getColumnFormat(unsigned RemainingColumns) const {
+ const ColumnFormat *BestFormat = NULL;
+ for (SmallVector<ColumnFormat, 4>::const_reverse_iterator
+ I = Formats.rbegin(),
+ E = Formats.rend();
+ I != E; ++I) {
+ if (I->TotalWidth <= RemainingColumns) {
+ if (BestFormat && I->TotalLength > BestFormat->TotalLength)
+ break;
+ BestFormat = &*I;
+ }
+ }
+ return BestFormat;
+}
+
+} // namespace format
+} // namespace clang
Index: lib/Format/FormatToken.h
===================================================================
--- lib/Format/FormatToken.h
+++ lib/Format/FormatToken.h
@@ -17,7 +17,9 @@
#define LLVM_CLANG_FORMAT_FORMAT_TOKEN_H
#include "clang/Basic/OperatorPrecedence.h"
+#include "clang/Format/Format.h"
#include "clang/Lex/Lexer.h"
+#include "llvm/ADT/OwningPtr.h"
namespace clang {
namespace format {
@@ -72,6 +74,8 @@
PPK_Inconclusive
};
+class TokenRole;
+
/// \brief A wrapper around a \c Token storing information about the
/// whitespace characters preceeding it.
struct FormatToken {
@@ -143,7 +147,10 @@
TokenType Type;
+ /// \brief The number of spaces that should be inserted before this token.
unsigned SpacesRequiredBefore;
+
+ /// \brief \c true if it is allowed to break before this token.
bool CanBreakBefore;
bool ClosesTemplateDeclaration;
@@ -155,6 +162,10 @@
/// the number of commas.
unsigned ParameterCount;
+ /// \brief A token can have a special role that can carry extra information
+ /// about the token's formatting.
+ llvm::OwningPtr<TokenRole> Role;
+
/// \brief If this is an opening parenthesis, how are the parameters packed?
ParameterPackingKind PackingKind;
@@ -289,6 +300,14 @@
return Tok;
}
+ template <typename T> T *getRoleAs() {
+ return dyn_cast_or_null<T>(Role.get());
+ }
+
+ template <typename T> const T *getRoleAs() const {
+ return dyn_cast_or_null<T>(Role.get());
+ }
+
FormatToken *MatchingParen;
FormatToken *Previous;
@@ -300,6 +319,87 @@
void operator=(const FormatToken &) LLVM_DELETED_FUNCTION;
};
+class ContinuationIndenter;
+struct LineState;
+
+class TokenRole {
+public:
+ virtual ~TokenRole();
+
+ /// \brief After the \c TokenAnnotator has finished annotating all the tokens,
+ /// this function precomputes required information for formatting.
+ virtual void precomputeFormattingInfos(const FormatToken *Token);
+
+ /// \brief Discriminator for LLVM-style RTTI (dyn_cast<> et al.).
+ enum RoleKind {
+ RK_List
+ };
+ TokenRole(const FormatStyle &Style, RoleKind K) : Style(Style), Kind(K) {}
+ RoleKind getKind() const { return Kind; }
+
+ /// \brief Apply the special formatting that the given role demands.
+ ///
+ /// Continues formatting from \p State leaving indentation to \p Indenter and
+ /// returns the total penalty that this formatting incurs.
+ virtual unsigned format(LineState &State, ContinuationIndenter *Indenter,
+ bool DryRun) {
+ return 0;
+ }
+
+protected:
+ const FormatStyle &Style;
+
+private:
+ const RoleKind Kind;
+};
+
+class CommaSeparatedList : public TokenRole {
+public:
+ CommaSeparatedList(const FormatStyle &Style) : TokenRole(Style, RK_List) {}
+
+ virtual void precomputeFormattingInfos(const FormatToken *Token);
+
+ virtual unsigned format(LineState &State, ContinuationIndenter *Indenter,
+ bool DryRun);
+
+ /// \brief Adds \p Token as the next comma to the \c CommaSeparated list.
+ void CommaFound(const FormatToken *Token) { Commas.push_back(Token); }
+
+private:
+ /// \brief A struct that holds information on how to format a given list with
+ /// a specific number of columns.
+ struct ColumnFormat {
+ /// \brief The number of columns to use.
+ unsigned Columns;
+
+ /// \brief The total width in characters.
+ unsigned TotalWidth;
+
+ /// \brief The total length in number of lines.
+ unsigned TotalLength;
+
+ /// \brief The size of each column in characters.
+ SmallVector<unsigned, 8> ColumnSizes;
+ };
+
+ /// \brief Calculate which \c ColumnFormat fits best into \p RemainingColumns.
+ const ColumnFormat *getColumnFormat(unsigned RemainingColumns) const;
+
+ /// \brief The ordered \c FormatTokens making up the commas of this list.
+ SmallVector<const FormatToken *, 6> Commas;
+
+ /// \brief The length of each of the lists items in characters.
+ SmallVector<unsigned, 6> ItemLengths;
+
+ /// \brief Precomputed formats that can be used for this list.
+ SmallVector<ColumnFormat, 4> Formats;
+
+public:
+ static bool classof(const TokenRole *Role) {
+ return Role->getKind() == RK_List;
+ }
+};
+
} // namespace format
} // namespace clang
Index: lib/Format/TokenAnnotator.cpp
===================================================================
--- lib/Format/TokenAnnotator.cpp
+++ lib/Format/TokenAnnotator.cpp
@@ -29,9 +29,11 @@
/// into template parameter lists.
class AnnotatingParser {
public:
- AnnotatingParser(AnnotatedLine &Line, IdentifierInfo &Ident_in)
- : Line(Line), CurrentToken(Line.First), KeywordVirtualFound(false),
- NameFound(false), AutoFound(false), Ident_in(Ident_in) {
+ AnnotatingParser(const FormatStyle &Style, AnnotatedLine &Line,
+ IdentifierInfo &Ident_in)
+ : Style(Style), Line(Line), CurrentToken(Line.First),
+ KeywordVirtualFound(false), NameFound(false), AutoFound(false),
+ Ident_in(Ident_in) {
Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/false));
}
@@ -268,6 +270,9 @@
void updateParameterCount(FormatToken *Left, FormatToken *Current) {
if (Current->is(tok::comma)) {
++Left->ParameterCount;
+ if (!Left->Role)
+ Left->Role.reset(new CommaSeparatedList(Style));
+ Left->getRoleAs<CommaSeparatedList>()->CommaFound(Current);
} else if (Left->ParameterCount == 0 && Current->isNot(tok::comment)) {
Left->ParameterCount = 1;
}
@@ -827,6 +832,7 @@
SmallVector<Context, 8> Contexts;
+ const FormatStyle &Style;
AnnotatedLine &Line;
FormatToken *CurrentToken;
bool KeywordVirtualFound;
@@ -937,7 +943,7 @@
} // end anonymous namespace
void TokenAnnotator::annotate(AnnotatedLine &Line) {
- AnnotatingParser Parser(Line, Ident_in);
+ AnnotatingParser Parser(Style, Line, Ident_in);
Line.Type = Parser.parseLine();
if (Line.Type == LT_Invalid)
return;
@@ -1006,6 +1012,11 @@
Current = Current->Next;
}
+ for (Current = Line.First; Current != NULL; Current = Current->Next) {
+ if (Current->Role)
+ Current->Role->precomputeFormattingInfos(Current);
+ }
+
calculateUnbreakableTailLengths(Line);
DEBUG({
printDebugInfo(Line);
Index: unittests/Format/FormatTest.cpp
===================================================================
--- unittests/Format/FormatTest.cpp
+++ unittests/Format/FormatTest.cpp
@@ -1691,16 +1691,6 @@
" 100000000, \"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\"\n"
"};");
- verifyFormat(
- "static SomeClass = { a, b, c, d, e, f, g, h, i, j,\n"
- " looooooooooooooooooooooooooooooooooongname,\n"
- " looooooooooooooooooooooooooooooong };");
- // Allow bin-packing in static initializers as this would often lead to
- // terrible results, e.g.:
- verifyGoogleFormat(
- "static SomeClass = {a, b, c, d, e, f, g, h, i, j,\n"
- " looooooooooooooooooooooooooooooooooongname,\n"
- " looooooooooooooooooooooooooooooong};");
// Here, everything other than the "}" would fit on a line.
verifyFormat("static int LooooooooooooooooooooooooongVariable[1] = {\n"
" 100000000000000000000000\n"
@@ -1782,7 +1772,7 @@
"struct {\n"
" unsigned bit;\n"
" const char *const name;\n"
- "} kBitsToOs[] = { { kOsMac, \"Mac\" }, { kOsWin, \"Windows\" },\n"
+ "} kBitsToOs[] = { { kOsMac, \"Mac\" }, { kOsWin, \"Windows\" },\n"
" { kOsLinux, \"Linux\" }, { kOsCrOS, \"Chrome OS\" } };");
}
@@ -4152,6 +4142,34 @@
NoSpaces);
}
+TEST_F(FormatTest, FormatsBracedListsinColumnLayout) {
+ verifyFormat("vector<int> x = { 1, 22, 333, 4444, 55555, 666666, 7777777,\n"
+ " 1, 22, 333, 4444, 55555, 666666, 7777777,\n"
+ " 1, 22, 333, 4444, 55555, 666666, 7777777,\n"
+ " 1, 22, 333, 4444, 55555, 666666, 7777777,\n"
+ " 1, 22, 333, 4444, 55555, 666666, 7777777,\n"
+ " 1, 22, 333, 4444, 55555, 666666, 7777777 };");
+ verifyFormat("vector<int> x = { 1, 22, 333, 4444, 55555, 666666, 7777777,\n"
+ " // line comment\n"
+ " 1, 22, 333, 4444, 55555, 666666, 7777777,\n"
+ " 1, 22, 333, 4444, 55555,\n"
+ " // line comment\n"
+ " 1, 22, 333, 4444, 55555, 666666, 7777777,\n"
+ " 1, 22, 333, 4444, 55555, 666666, 7777777 };");
+ verifyFormat(
+ "vector<int> x = { 1, 22, 333, 4444, 55555, 666666, 7777777,\n"
+ " 1, 22, 333, 4444, 55555, 666666, 7777777,\n"
+ " 1, 22, 333, 4444, 55555, 666666, // comment\n"
+ " 7777777, 1, 22, 333, 4444, 55555, 666666,\n"
+ " 7777777, 1, 22, 333, 4444, 55555, 666666,\n"
+ " 7777777, 1, 22, 333, 4444, 55555, 666666,\n"
+ " 7777777 };");
+ verifyFormat("static const uint16_t CallerSavedRegs64Bit[] = {\n"
+ " X86::RAX, X86::RDX, X86::RCX, X86::RSI, X86::RDI,\n"
+ " X86::R8, X86::R9, X86::R10, X86::R11, 0\n"
+ "};");
+}
+
TEST_F(FormatTest, PullTrivialFunctionDefinitionsIntoSingleLine) {
verifyFormat("void f() { return 42; }");
verifyFormat("void f() {\n"
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits