[PATCH] clang-format: Add column layout formatting for braced lists

Daniel Jasper Wed, 21 Aug 2013 04:13:10 -0700

Hi klimek,

With this patch, braced lists (with more than 3 elements are formatted in a 
column layout if possible). E.g.:


  static const uint16_t CallerSavedRegs64Bit[] = {
    X86::RAX, X86::RDX, X86::RCX, X86::RSI, X86::RDI,
    X86::R8,  X86::R9,  X86::R10, X86::R11, 0
  };

Required other changes:
- FormatTokens can now have a special role that contains extra data and can do 
special formattings. A comma separated list is currently the only 
implementation.
- Move penalty calculation entirely into ContinuationIndenter (there was a last 
piece still in UnwrappedLineFormatter).

http://llvm-reviews.chandlerc.com/D1457

Files:
  lib/Format/CMakeLists.txt
  lib/Format/ContinuationIndenter.cpp
  lib/Format/ContinuationIndenter.h
  lib/Format/Format.cpp
  lib/Format/FormatToken.cpp
  lib/Format/FormatToken.h
  lib/Format/TokenAnnotator.cpp
  unittests/Format/FormatTest.cpp

Index: lib/Format/CMakeLists.txt
===================================================================
--- lib/Format/CMakeLists.txt
+++ lib/Format/CMakeLists.txt
@@ -4,6 +4,7 @@
   BreakableToken.cpp
   ContinuationIndenter.cpp
   Format.cpp
+  FormatToken.cpp
   TokenAnnotator.cpp
   UnwrappedLineParser.cpp
   WhitespaceManager.cpp
Index: lib/Format/ContinuationIndenter.cpp
===================================================================
--- lib/Format/ContinuationIndenter.cpp
+++ lib/Format/ContinuationIndenter.cpp
@@ -176,13 +176,14 @@
 }
 
 unsigned ContinuationIndenter::addTokenToState(LineState &State, bool Newline,
-                                               bool DryRun) {
+                                               bool DryRun,
+                                               unsigned ExtraSpaces) {
   const FormatToken &Current = *State.NextToken;
   const FormatToken &Previous = *State.NextToken->Previous;
 
   // Extra penalty that needs to be added because of the way certain line
   // breaks are chosen.
-  unsigned ExtraPenalty = 0;
+  unsigned Penalty = 0;
 
   if (State.Stack.size() == 0 || Current.Type == TT_ImplicitStringLiteral) {
     // FIXME: Is this correct?
@@ -199,13 +200,20 @@
   unsigned ContinuationIndent =
       std::max(State.Stack.back().LastSpace, State.Stack.back().Indent) + 4;
   if (Newline) {
+    // The first line break on any ParenLevel causes an extra penalty in order
+    // prefer similar line breaks.
+    if (!State.Stack.back().ContainsLineBreak)
+      Penalty += 15;
+    State.Stack.back().ContainsLineBreak = true;
+
+    Penalty += State.NextToken->SplitPenalty;
+
     // Breaking before the first "<<" is generally not desirable if the LHS is
     // short.
     if (Current.is(tok::lessless) && State.Stack.back().FirstLessLess == 0 &&
         State.Column <= Style.ColumnLimit / 2)
-      ExtraPenalty += Style.PenaltyBreakFirstLessLess;
+      Penalty += Style.PenaltyBreakFirstLessLess;
 
-    State.Stack.back().ContainsLineBreak = true;
     if (Current.is(tok::r_brace)) {
       if (Current.BlockKind == BK_BracedInit)
         State.Column = State.Stack[State.Stack.size() - 2].LastSpace;
@@ -333,7 +341,7 @@
         State.Stack.back().LastSpace = State.Stack.back().VariablePos;
     }
 
-    unsigned Spaces = State.NextToken->SpacesRequiredBefore;
+    unsigned Spaces = State.NextToken->SpacesRequiredBefore + ExtraSpaces;
 
     if (!DryRun)
       Whitespaces.replaceWhitespace(Current, 0, Spaces, State.Column + Spaces);
@@ -395,7 +403,7 @@
     }
   }
 
-  return moveStateToNextToken(State, DryRun, Newline) + ExtraPenalty;
+  return moveStateToNextToken(State, DryRun, Newline) + Penalty;
 }
 
 unsigned ContinuationIndenter::moveStateToNextToken(LineState &State,
@@ -542,11 +550,20 @@
 
   State.NextToken = State.NextToken->Next;
 
-  if (!Newline && Style.AlwaysBreakBeforeMultilineStrings &&
-      Current.is(tok::string_literal) && Current.CanBreakBefore)
-    return 0;
+  unsigned Penalty = 0;
+  if (Newline || !Style.AlwaysBreakBeforeMultilineStrings ||
+      Current.isNot(tok::string_literal) || !Current.CanBreakBefore)
+    Penalty += breakProtrudingToken(Current, State, DryRun);
+
+  // If the previous has a special role, let it consume tokens as appropriate.
+  // It is necessary to start at the previous token for the only implemented
+  // role (comma separated list). That way, the decision whether or not to break
+  // after the "{" is already done and both options are tried and evaluated.
+  // FIXME: This is ugly, find a better way.
+  if (Previous && Previous->Role)
+    Penalty += Previous->Role->format(State, this, DryRun);
 
-  return breakProtrudingToken(Current, State, DryRun);
+  return Penalty;
 }
 
 unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
Index: lib/Format/ContinuationIndenter.h
===================================================================
--- lib/Format/ContinuationIndenter.h
+++ lib/Format/ContinuationIndenter.h
@@ -60,7 +60,8 @@
   ///
   /// If \p DryRun is \c false, also creates and stores the required
   /// \c Replacement.
-  unsigned addTokenToState(LineState &State, bool Newline, bool DryRun);
+  unsigned addTokenToState(LineState &State, bool Newline, bool DryRun,
+                           unsigned ExtraSpaces = 0);
 
   /// \brief Get the column limit for this line. This is the style's column
   /// limit, potentially reduced for preprocessor definitions.
Index: lib/Format/Format.cpp
===================================================================
--- lib/Format/Format.cpp
+++ lib/Format/Format.cpp
@@ -438,14 +438,14 @@
     }
     for (std::deque<StateNode *>::iterator I = Path.begin(), E = Path.end();
          I != E; ++I) {
+      unsigned Penalty = Indenter->addTokenToState(State, (*I)->NewLine, false);
       DEBUG({
         if ((*I)->NewLine) {
-          llvm::dbgs() << "Penalty for splitting before "
+          llvm::dbgs() << "Penalty for placing "
                        << (*I)->Previous->State.NextToken->Tok.getName() << ": "
-                       << (*I)->Previous->State.NextToken->SplitPenalty << "\n";
+                       << Penalty << "\n";
         }
       });
-      Indenter->addTokenToState(State, (*I)->NewLine, false);
     }
   }
 
@@ -459,11 +459,6 @@
       return;
     if (!NewLine && Indenter->mustBreak(PreviousNode->State))
       return;
-    if (NewLine) {
-      if (!PreviousNode->State.Stack.back().ContainsLineBreak)
-        Penalty += 15;
-      Penalty += PreviousNode->State.NextToken->SplitPenalty;
-    }
 
     StateNode *Node = new (Allocator.Allocate())
         StateNode(PreviousNode->State, NewLine, PreviousNode);
Index: lib/Format/FormatToken.cpp
===================================================================
--- /dev/null
+++ lib/Format/FormatToken.cpp
@@ -0,0 +1,152 @@
+//===--- FormatToken.cpp - Format C++ code --------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements specific functions of \c FormatTokens and their
+/// roles.
+///
+//===----------------------------------------------------------------------===//
+
+#include "FormatToken.h"
+#include "ContinuationIndenter.h"
+#include "clang/Format/Format.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Debug.h"
+
+namespace clang {
+namespace format {
+
+TokenRole::~TokenRole() {}
+
+void TokenRole::precomputeFormattingInfos(const FormatToken *Token) {}
+
+unsigned CommaSeparatedList::format(LineState &State,
+                                    ContinuationIndenter *Indenter,
+                                    bool DryRun) {
+  if (!State.NextToken->Previous || !State.NextToken->Previous->Previous ||
+      Commas.size() <= 2)
+    return 0;
+  const FormatToken *Token = State.NextToken->Previous->Previous;
+  if (Token->isNot(tok::l_brace) ||
+      Token->Next->Type == TT_DesignatedInitializerPeriod)
+    return 0;
+  unsigned RemainingColumns = Style.ColumnLimit - State.Stack.back().Indent;
+  const ColumnFormat *Format = getColumnFormat(RemainingColumns);
+  if (!Format)
+    return 0;
+  unsigned Penalty = 0;
+  unsigned Column = 0;
+  unsigned Item = 0;
+  while (State.NextToken != Token->MatchingParen) {
+    bool NewLine = false;
+    unsigned Spaces = State.NextToken->SpacesRequiredBefore;
+    if (Item < Commas.size() && State.NextToken->Previous == Commas[Item]) {
+      if (!State.NextToken->isTrailingComment()) {
+        Spaces += Format->ColumnSizes[Column] - ItemLengths[Item];
+        ++Column;
+      }
+      ++Item;
+      if (Column == Format->Columns) {
+        Column = 0;
+        NewLine = true;
+      }
+    }
+    if (State.NextToken->MustBreakBefore) {
+      Column = 0;
+      NewLine = true;
+    }
+    if (NewLine)
+      Penalty += State.NextToken->SplitPenalty;
+    Spaces -= State.NextToken->SpacesRequiredBefore;
+    Penalty += Indenter->addTokenToState(State, NewLine, DryRun, Spaces);
+  }
+  return Penalty;
+}
+
+void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) {
+  if (!Token->MatchingParen)
+    return;
+  FormatToken *ItemBegin = Token->Next;
+  SmallVector<bool, 6> MustBreakBeforeItem;
+  SmallVector<unsigned, 6> ItemLengthsInLastColumn;
+  for (unsigned i = 0, e = Commas.size() + 1; i != e; ++i) {
+    // Skip comments on their own line.
+    while (ItemBegin->HasUnescapedNewline && ItemBegin->isTrailingComment())
+      ItemBegin = ItemBegin->Next;
+
+    MustBreakBeforeItem.push_back(ItemBegin->MustBreakBefore);
+    const FormatToken *ItemEnd =
+        i == Commas.size() ? Token->MatchingParen->getPreviousNonComment()
+                           : Commas[i];
+    ItemLengths.push_back(ItemEnd->TotalLength - ItemBegin->TotalLength +
+                          ItemBegin->CodePointCount);
+    if (ItemEnd->Next && !ItemEnd->Next->HasUnescapedNewline &&
+        ItemEnd->Next->isTrailingComment())
+      ItemEnd = ItemEnd->Next;
+    ItemLengthsInLastColumn.push_back(ItemEnd->TotalLength -
+                                      ItemBegin->TotalLength +
+                                      ItemBegin->CodePointCount);
+    ItemBegin = ItemEnd->Next;
+  }
+  // We can never place more than ColumnLimit / 3 items in a row (because of the
+  // spaces and the comma).
+  for (unsigned Columns = 1; Columns <= Style.ColumnLimit / 3; ++Columns) {
+    ColumnFormat Format;
+    Format.Columns = Columns;
+    Format.ColumnSizes.resize(Columns);
+    Format.TotalLength = 0;
+    bool HasRowWithSufficientColumns = false;
+    for (unsigned i = 0, e = ItemLengths.size(), Column = 0; i != e; ++i) {
+      if (MustBreakBeforeItem[i] || Column == Columns) {
+        ++Format.TotalLength;
+        Column = 0;
+      }
+      if (Column == Columns - 1)
+        HasRowWithSufficientColumns = true;
+      unsigned length =
+          (Column == Columns - 1) ? ItemLengthsInLastColumn[i] : ItemLengths[i];
+      Format.ColumnSizes[Column] =
+          std::max(Format.ColumnSizes[Column], length);
+      ++Column;
+    }
+    // If all rows are terminated early (e.g. by trailing comments), we don't
+    // need to look further.
+    if (!HasRowWithSufficientColumns)
+      break;
+    Format.TotalWidth = Columns - 1; // Width of the N-1 spaces.
+    for (unsigned i = 0; i < Columns; ++i) {
+      Format.TotalWidth += Format.ColumnSizes[i];
+    }
+
+    // Ignore layouts that are bound to violate the column limit.
+    if (Format.TotalWidth > Style.ColumnLimit)
+      continue;
+
+    Formats.push_back(Format);
+  }
+}
+
+const CommaSeparatedList::ColumnFormat *
+CommaSeparatedList::getColumnFormat(unsigned RemainingColumns) const {
+  const ColumnFormat *BestFormat = NULL;
+  for (SmallVector<ColumnFormat, 4>::const_reverse_iterator
+           I = Formats.rbegin(),
+           E = Formats.rend();
+       I != E; ++I) {
+    if (I->TotalWidth <= RemainingColumns) {
+      if (BestFormat && I->TotalLength > BestFormat->TotalLength)
+        break;
+      BestFormat = &*I;
+    }
+  }
+  return BestFormat;
+}
+
+} // namespace format
+} // namespace clang
Index: lib/Format/FormatToken.h
===================================================================
--- lib/Format/FormatToken.h
+++ lib/Format/FormatToken.h
@@ -17,7 +17,9 @@
 #define LLVM_CLANG_FORMAT_FORMAT_TOKEN_H
 
 #include "clang/Basic/OperatorPrecedence.h"
+#include "clang/Format/Format.h"
 #include "clang/Lex/Lexer.h"
+#include "llvm/ADT/OwningPtr.h"
 
 namespace clang {
 namespace format {
@@ -72,6 +74,8 @@
   PPK_Inconclusive
 };
 
+class TokenRole;
+
 /// \brief A wrapper around a \c Token storing information about the
 /// whitespace characters preceeding it.
 struct FormatToken {
@@ -143,7 +147,10 @@
 
   TokenType Type;
 
+  /// \brief The number of spaces that should be inserted before this token.
   unsigned SpacesRequiredBefore;
+
+  /// \brief \c true if it is allowed to break before this token.
   bool CanBreakBefore;
 
   bool ClosesTemplateDeclaration;
@@ -155,6 +162,10 @@
   /// the number of commas.
   unsigned ParameterCount;
 
+  /// \brief A token can have a special role that can carry extra information
+  /// about the token's formatting.
+  llvm::OwningPtr<TokenRole> Role;
+
   /// \brief If this is an opening parenthesis, how are the parameters packed?
   ParameterPackingKind PackingKind;
 
@@ -289,6 +300,14 @@
     return Tok;
   }
 
+  template <typename T> T *getRoleAs() {
+    return dyn_cast_or_null<T>(Role.get());
+  }
+
+  template <typename T> const T *getRoleAs() const {
+    return dyn_cast_or_null<T>(Role.get());
+  }
+
   FormatToken *MatchingParen;
 
   FormatToken *Previous;
@@ -300,6 +319,87 @@
   void operator=(const FormatToken &) LLVM_DELETED_FUNCTION;
 };
 
+class ContinuationIndenter;
+struct LineState;
+
+class TokenRole {
+public:
+  virtual ~TokenRole();
+
+  /// \brief After the \c TokenAnnotator has finished annotating all the tokens,
+  /// this function precomputes required information for formatting.
+  virtual void precomputeFormattingInfos(const FormatToken *Token);
+
+  /// \brief Discriminator for LLVM-style RTTI (dyn_cast<> et al.).
+  enum RoleKind {
+    RK_List
+  };
+  TokenRole(const FormatStyle &Style, RoleKind K) : Style(Style), Kind(K) {}
+  RoleKind getKind() const { return Kind; }
+
+  /// \brief Apply the special formatting that the given role demands.
+  ///
+  /// Continues formatting from \p State leaving indentation to \p Indenter and
+  /// returns the total penalty that this formatting incurs.
+  virtual unsigned format(LineState &State, ContinuationIndenter *Indenter,
+                          bool DryRun) {
+    return 0;
+  }
+
+protected:
+  const FormatStyle &Style;
+
+private:
+  const RoleKind Kind;
+};
+
+class CommaSeparatedList : public TokenRole {
+public:
+  CommaSeparatedList(const FormatStyle &Style) : TokenRole(Style, RK_List) {}
+
+  virtual void precomputeFormattingInfos(const FormatToken *Token);
+
+  virtual unsigned format(LineState &State, ContinuationIndenter *Indenter,
+                          bool DryRun);
+
+  /// \brief Adds \p Token as the next comma to the \c CommaSeparated list.
+  void CommaFound(const FormatToken *Token) { Commas.push_back(Token); }
+
+private:
+  /// \brief A struct that holds information on how to format a given list with
+  /// a specific number of columns.
+  struct ColumnFormat {
+    /// \brief The number of columns to use.
+    unsigned Columns;
+
+    /// \brief The total width in characters.
+    unsigned TotalWidth;
+
+    /// \brief The total length in number of lines.
+    unsigned TotalLength;
+
+    /// \brief The size of each column in characters.
+    SmallVector<unsigned, 8> ColumnSizes;
+  };
+
+  /// \brief Calculate which \c ColumnFormat fits best into \p RemainingColumns.
+  const ColumnFormat *getColumnFormat(unsigned RemainingColumns) const;
+
+  /// \brief The ordered \c FormatTokens making up the commas of this list.
+  SmallVector<const FormatToken *, 6> Commas;
+
+  /// \brief The length of each of the lists items in characters.
+  SmallVector<unsigned, 6> ItemLengths;
+
+  /// \brief Precomputed formats that can be used for this list.
+  SmallVector<ColumnFormat, 4> Formats;
+
+public:
+  static bool classof(const TokenRole *Role) {
+    return Role->getKind() == RK_List;
+  }
+};
+
 } // namespace format
 } // namespace clang
 
Index: lib/Format/TokenAnnotator.cpp
===================================================================
--- lib/Format/TokenAnnotator.cpp
+++ lib/Format/TokenAnnotator.cpp
@@ -29,9 +29,11 @@
 /// into template parameter lists.
 class AnnotatingParser {
 public:
-  AnnotatingParser(AnnotatedLine &Line, IdentifierInfo &Ident_in)
-      : Line(Line), CurrentToken(Line.First), KeywordVirtualFound(false),
-        NameFound(false), AutoFound(false), Ident_in(Ident_in) {
+  AnnotatingParser(const FormatStyle &Style, AnnotatedLine &Line,
+                   IdentifierInfo &Ident_in)
+      : Style(Style), Line(Line), CurrentToken(Line.First),
+        KeywordVirtualFound(false), NameFound(false), AutoFound(false),
+        Ident_in(Ident_in) {
     Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/false));
   }
 
@@ -268,6 +270,9 @@
   void updateParameterCount(FormatToken *Left, FormatToken *Current) {
     if (Current->is(tok::comma)) {
       ++Left->ParameterCount;
+      if (!Left->Role)
+        Left->Role.reset(new CommaSeparatedList(Style));
+      Left->getRoleAs<CommaSeparatedList>()->CommaFound(Current);
     } else if (Left->ParameterCount == 0 && Current->isNot(tok::comment)) {
       Left->ParameterCount = 1;
     }
@@ -827,6 +832,7 @@
 
   SmallVector<Context, 8> Contexts;
 
+  const FormatStyle &Style;
   AnnotatedLine &Line;
   FormatToken *CurrentToken;
   bool KeywordVirtualFound;
@@ -937,7 +943,7 @@
 } // end anonymous namespace
 
 void TokenAnnotator::annotate(AnnotatedLine &Line) {
-  AnnotatingParser Parser(Line, Ident_in);
+  AnnotatingParser Parser(Style, Line, Ident_in);
   Line.Type = Parser.parseLine();
   if (Line.Type == LT_Invalid)
     return;
@@ -1006,6 +1012,11 @@
     Current = Current->Next;
   }
 
+  for (Current = Line.First; Current != NULL; Current = Current->Next) {
+    if (Current->Role)
+      Current->Role->precomputeFormattingInfos(Current);
+  }
+
   calculateUnbreakableTailLengths(Line);
   DEBUG({
     printDebugInfo(Line);
Index: unittests/Format/FormatTest.cpp
===================================================================
--- unittests/Format/FormatTest.cpp
+++ unittests/Format/FormatTest.cpp
@@ -1691,16 +1691,6 @@
       "  100000000, \"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\"\n"
       "};");
 
-  verifyFormat(
-      "static SomeClass = { a, b, c, d, e, f, g, h, i, j,\n"
-      "                     looooooooooooooooooooooooooooooooooongname,\n"
-      "                     looooooooooooooooooooooooooooooong };");
-  // Allow bin-packing in static initializers as this would often lead to
-  // terrible results, e.g.:
-  verifyGoogleFormat(
-      "static SomeClass = {a, b, c, d, e, f, g, h, i, j,\n"
-      "                    looooooooooooooooooooooooooooooooooongname,\n"
-      "                    looooooooooooooooooooooooooooooong};");
   // Here, everything other than the "}" would fit on a line.
   verifyFormat("static int LooooooooooooooooooooooooongVariable[1] = {\n"
                "  100000000000000000000000\n"
@@ -1782,7 +1772,7 @@
       "struct {\n"
       "  unsigned bit;\n"
       "  const char *const name;\n"
-      "} kBitsToOs[] = { { kOsMac, \"Mac\" }, { kOsWin, \"Windows\" },\n"
+      "} kBitsToOs[] = { { kOsMac, \"Mac\" },     { kOsWin, \"Windows\" },\n"
       "                  { kOsLinux, \"Linux\" }, { kOsCrOS, \"Chrome OS\" } };");
 }
 
@@ -4152,6 +4142,34 @@
                  NoSpaces);
 }
 
+TEST_F(FormatTest, FormatsBracedListsinColumnLayout) {
+  verifyFormat("vector<int> x = { 1, 22, 333, 4444, 55555, 666666, 7777777,\n"
+               "                  1, 22, 333, 4444, 55555, 666666, 7777777,\n"
+               "                  1, 22, 333, 4444, 55555, 666666, 7777777,\n"
+               "                  1, 22, 333, 4444, 55555, 666666, 7777777,\n"
+               "                  1, 22, 333, 4444, 55555, 666666, 7777777,\n"
+               "                  1, 22, 333, 4444, 55555, 666666, 7777777 };");
+  verifyFormat("vector<int> x = { 1, 22, 333, 4444, 55555, 666666, 7777777,\n"
+               "                  // line comment\n"
+               "                  1, 22, 333, 4444, 55555, 666666, 7777777,\n"
+               "                  1, 22, 333, 4444, 55555,\n"
+               "                  // line comment\n"
+               "                  1, 22, 333, 4444, 55555, 666666, 7777777,\n"
+               "                  1, 22, 333, 4444, 55555, 666666, 7777777 };");
+  verifyFormat(
+      "vector<int> x = { 1,       22, 333, 4444, 55555, 666666, 7777777,\n"
+      "                  1,       22, 333, 4444, 55555, 666666, 7777777,\n"
+      "                  1,       22, 333, 4444, 55555, 666666, // comment\n"
+      "                  7777777, 1,  22,  333,  4444,  55555,  666666,\n"
+      "                  7777777, 1,  22,  333,  4444,  55555,  666666,\n"
+      "                  7777777, 1,  22,  333,  4444,  55555,  666666,\n"
+      "                  7777777 };");
+  verifyFormat("static const uint16_t CallerSavedRegs64Bit[] = {\n"
+               "  X86::RAX, X86::RDX, X86::RCX, X86::RSI, X86::RDI,\n"
+               "  X86::R8,  X86::R9,  X86::R10, X86::R11, 0\n"
+               "};");
+}
+
 TEST_F(FormatTest, PullTrivialFunctionDefinitionsIntoSingleLine) {
   verifyFormat("void f() { return 42; }");
   verifyFormat("void f() {\n"

_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits

[PATCH] clang-format: Add column layout formatting for braced lists

Reply via email to