Typz updated this revision to Diff 108651.
Typz added a comment.

Rebase


https://reviews.llvm.org/D33589

Files:
  lib/Format/ContinuationIndenter.cpp
  lib/Format/ContinuationIndenter.h
  lib/Format/UnwrappedLineFormatter.cpp
  unittests/Format/FormatTest.cpp

Index: unittests/Format/FormatTest.cpp
===================================================================
--- unittests/Format/FormatTest.cpp
+++ unittests/Format/FormatTest.cpp
@@ -9475,6 +9475,60 @@
   EXPECT_EQ("#pragma option -C -A", format("#pragma    option   -C   -A"));
 }
 
+TEST_F(FormatTest, OptimizeBreakPenaltyVsExcess) {
+  FormatStyle Style = getLLVMStyle();
+  Style.ColumnLimit = 20;
+
+  verifyFormat("int a; // the\n"
+               "       // comment", Style);
+  EXPECT_EQ("int a; /* first line\n"
+            "        * second\n"
+            "        * line third\n"
+            "        * line\n"
+            "        */",
+			format("int a; /* first line\n"
+                   "        * second\n"
+                   "        * line third\n"
+                   "        * line\n"
+                   "        */", Style));
+  EXPECT_EQ("int a; // first line\n"
+            "       // second\n"
+            "       // line third\n"
+            "       // line",
+            format("int a; // first line\n"
+                   "       // second line\n"
+                   "       // third line",
+                   Style));
+
+  Style.PenaltyExcessCharacter = 90;
+  verifyFormat("int a; // the comment", Style);
+  EXPECT_EQ("int a; // the\n"
+            "       // comment aa",
+            format("int a; // the comment aa", Style));
+  EXPECT_EQ("int a; /* first line\n"
+            "        * second line\n"
+            "        * third line\n"
+            "        */",
+			format("int a; /* first line\n"
+            	   "        * second line\n"
+			       "        * third line\n"
+            	   "        */", Style));
+  EXPECT_EQ("int a; // first line\n"
+            "       // second line\n"
+            "       // third line",
+            format("int a; // first line\n"
+                   "       // second line\n"
+                   "       // third line",
+                   Style));
+  EXPECT_EQ("int a; /* first line\n"
+            "        * second\n"
+            "        * line third\n"
+            "        * line\n"
+            "        */",
+            format("int a; /* first line second line third line"
+				   "\n*/", Style));
+}
+
 #define EXPECT_ALL_STYLES_EQUAL(Styles)                                        \
   for (size_t i = 1; i < Styles.size(); ++i)                                   \
   EXPECT_EQ(Styles[0], Styles[i]) << "Style #" << i << " of " << Styles.size() \
Index: lib/Format/UnwrappedLineFormatter.cpp
===================================================================
--- lib/Format/UnwrappedLineFormatter.cpp
+++ lib/Format/UnwrappedLineFormatter.cpp
@@ -885,6 +885,7 @@
     for (std::deque<StateNode *>::iterator I = Path.begin(), E = Path.end();
          I != E; ++I) {
       unsigned Penalty = 0;
+      State.Reflow = (*I)->State.Reflow;
       formatChildren(State, (*I)->NewLine, /*DryRun=*/false, Penalty);
       Penalty += Indenter->addTokenToState(State, (*I)->NewLine, false);
 
Index: lib/Format/ContinuationIndenter.h
===================================================================
--- lib/Format/ContinuationIndenter.h
+++ lib/Format/ContinuationIndenter.h
@@ -27,6 +27,7 @@
 namespace format {
 
 class AnnotatedLine;
+class BreakableToken;
 struct FormatToken;
 struct LineState;
 struct ParenState;
@@ -100,6 +101,11 @@
   unsigned breakProtrudingToken(const FormatToken &Current, LineState &State,
                                 bool DryRun);
 
+  unsigned reflowProtrudingToken(const FormatToken & Current, LineState & State,
+                                 std::unique_ptr<clang::format::BreakableToken> & Token,
+                                 unsigned ColumnLimit, bool DryRun);
+
+
   /// \brief Appends the next token to \p State and updates information
   /// necessary for indentation.
   ///
@@ -350,6 +356,8 @@
   /// \brief The indent of the first token.
   unsigned FirstIndent;
 
+  bool Reflow = true;
+
   /// \brief The line that is being formatted.
   ///
   /// Does not need to be considered for memoization because it doesn't change.
Index: lib/Format/ContinuationIndenter.cpp
===================================================================
--- lib/Format/ContinuationIndenter.cpp
+++ lib/Format/ContinuationIndenter.cpp
@@ -1219,92 +1219,10 @@
   return 0;
 }
 
-unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
-                                                    LineState &State,
-                                                    bool DryRun) {
-  // Don't break multi-line tokens other than block comments. Instead, just
-  // update the state.
-  if (Current.isNot(TT_BlockComment) && Current.IsMultiline)
-    return addMultilineToken(Current, State);
-
-  // Don't break implicit string literals or import statements.
-  if (Current.is(TT_ImplicitStringLiteral) ||
-      State.Line->Type == LT_ImportStatement)
-    return 0;
-
-  if (!Current.isStringLiteral() && !Current.is(tok::comment))
-    return 0;
-
-  std::unique_ptr<BreakableToken> Token;
+unsigned ContinuationIndenter::reflowProtrudingToken(const FormatToken &Current, LineState &State,
+                                                     std::unique_ptr<BreakableToken> &Token,
+                                                     unsigned ColumnLimit, bool DryRun) {
   unsigned StartColumn = State.Column - Current.ColumnWidth;
-  unsigned ColumnLimit = getColumnLimit(State);
-
-  if (Current.isStringLiteral()) {
-    // FIXME: String literal breaking is currently disabled for Java and JS, as
-    // it requires strings to be merged using "+" which we don't support.
-    if (Style.Language == FormatStyle::LK_Java ||
-        Style.Language == FormatStyle::LK_JavaScript ||
-        !Style.BreakStringLiterals)
-      return 0;
-
-    // Don't break string literals inside preprocessor directives (except for
-    // #define directives, as their contents are stored in separate lines and
-    // are not affected by this check).
-    // This way we avoid breaking code with line directives and unknown
-    // preprocessor directives that contain long string literals.
-    if (State.Line->Type == LT_PreprocessorDirective)
-      return 0;
-    // Exempts unterminated string literals from line breaking. The user will
-    // likely want to terminate the string before any line breaking is done.
-    if (Current.IsUnterminatedLiteral)
-      return 0;
-
-    StringRef Text = Current.TokenText;
-    StringRef Prefix;
-    StringRef Postfix;
-    // FIXME: Handle whitespace between '_T', '(', '"..."', and ')'.
-    // FIXME: Store Prefix and Suffix (or PrefixLength and SuffixLength to
-    // reduce the overhead) for each FormatToken, which is a string, so that we
-    // don't run multiple checks here on the hot path.
-    if ((Text.endswith(Postfix = "\"") &&
-         (Text.startswith(Prefix = "@\"") || Text.startswith(Prefix = "\"") ||
-          Text.startswith(Prefix = "u\"") || Text.startswith(Prefix = "U\"") ||
-          Text.startswith(Prefix = "u8\"") ||
-          Text.startswith(Prefix = "L\""))) ||
-        (Text.startswith(Prefix = "_T(\"") && Text.endswith(Postfix = "\")"))) {
-      Token.reset(new BreakableStringLiteral(Current, StartColumn, Prefix,
-                                             Postfix, State.Line->InPPDirective,
-                                             Encoding, Style));
-    } else {
-      return 0;
-    }
-  } else if (Current.is(TT_BlockComment)) {
-    if (!Current.isTrailingComment() || !Style.ReflowComments ||
-        // If a comment token switches formatting, like
-        // /* clang-format on */, we don't want to break it further,
-        // but we may still want to adjust its indentation.
-        switchesFormatting(Current))
-      return addMultilineToken(Current, State);
-    Token.reset(new BreakableBlockComment(
-        Current, StartColumn, Current.OriginalColumn, !Current.Previous,
-        State.Line->InPPDirective, Encoding, Style));
-  } else if (Current.is(TT_LineComment) &&
-             (Current.Previous == nullptr ||
-              Current.Previous->isNot(TT_ImplicitStringLiteral))) {
-    if (!Style.ReflowComments ||
-        CommentPragmasRegex.match(Current.TokenText.substr(2)) ||
-        switchesFormatting(Current))
-      return 0;
-    Token.reset(new BreakableLineCommentSection(
-        Current, StartColumn, Current.OriginalColumn, !Current.Previous,
-        /*InPPDirective=*/false, Encoding, Style));
-    // We don't insert backslashes when breaking line comments.
-    ColumnLimit = Style.ColumnLimit;
-  } else {
-    return 0;
-  }
-  if (Current.UnbreakableTailLength >= ColumnLimit)
-    return 0;
 
   unsigned RemainingSpace = ColumnLimit - Current.UnbreakableTailLength;
   bool BreakInserted = false;
@@ -1330,14 +1248,18 @@
                                      RemainingSpace, SplitBefore, Whitespaces);
     RemainingTokenColumns = Token->getLineLengthAfterSplitBefore(
         LineIndex, TailOffset, RemainingTokenColumns, ColumnLimit, SplitBefore);
+    if (!State.Reflow) {
+      if (RemainingTokenColumns > RemainingSpace)
+        Penalty += Style.PenaltyExcessCharacter *
+                   (RemainingTokenColumns - RemainingSpace);
+      continue;
+    }
     while (RemainingTokenColumns > RemainingSpace) {
       BreakableToken::Split Split = Token->getSplit(
           LineIndex, TailOffset, ColumnLimit, CommentPragmasRegex);
       if (Split.first == StringRef::npos) {
-        // The last line's penalty is handled in addNextStateToQueue().
-        if (LineIndex < EndIndex - 1)
-          Penalty += Style.PenaltyExcessCharacter *
-                     (RemainingTokenColumns - RemainingSpace);
+        Penalty += Style.PenaltyExcessCharacter *
+                   (RemainingTokenColumns - RemainingSpace);
         break;
       }
       assert(Split.first != 0);
@@ -1393,6 +1315,8 @@
   State.Column = RemainingTokenColumns;
 
   if (BreakInserted) {
+    assert(State.Reflow);
+
     // If we break the token inside a parameter list, we need to break before
     // the next parameter on all levels, so that the next parameter is clearly
     // visible. Line comments already introduce a break.
@@ -1412,6 +1336,121 @@
   return Penalty;
 }
 
+unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
+                                                    LineState &State,
+                                                    bool DryRun) {
+  // Don't break multi-line tokens other than block comments. Instead, just
+  // update the state.
+  if (Current.isNot(TT_BlockComment) && Current.IsMultiline)
+    return addMultilineToken(Current, State);
+
+  // Don't break implicit string literals or import statements.
+  if (Current.is(TT_ImplicitStringLiteral) ||
+      State.Line->Type == LT_ImportStatement)
+    return 0;
+
+  if (!Current.isStringLiteral() && !Current.is(tok::comment))
+    return 0;
+
+  std::unique_ptr<BreakableToken> Token;
+  unsigned StartColumn = State.Column - Current.ColumnWidth;
+  unsigned ColumnLimit = getColumnLimit(State);
+
+  if (Current.isStringLiteral()) {
+    // FIXME: String literal breaking is currently disabled for Java and JS, as
+    // it requires strings to be merged using "+" which we don't support.
+    if (Style.Language == FormatStyle::LK_Java ||
+        Style.Language == FormatStyle::LK_JavaScript ||
+        !Style.BreakStringLiterals)
+      return 0;
+
+    // Don't break string literals inside preprocessor directives (except for
+    // #define directives, as their contents are stored in separate lines and
+    // are not affected by this check).
+    // This way we avoid breaking code with line directives and unknown
+    // preprocessor directives that contain long string literals.
+    if (State.Line->Type == LT_PreprocessorDirective)
+      return 0;
+    // Exempts unterminated string literals from line breaking. The user will
+    // likely want to terminate the string before any line breaking is done.
+    if (Current.IsUnterminatedLiteral)
+      return 0;
+
+    StringRef Text = Current.TokenText;
+    StringRef Prefix;
+    StringRef Postfix;
+    // FIXME: Handle whitespace between '_T', '(', '"..."', and ')'.
+    // FIXME: Store Prefix and Suffix (or PrefixLength and SuffixLength to
+    // reduce the overhead) for each FormatToken, which is a string, so that we
+    // don't run multiple checks here on the hot path.
+    if ((Text.endswith(Postfix = "\"") &&
+         (Text.startswith(Prefix = "@\"") || Text.startswith(Prefix = "\"") ||
+          Text.startswith(Prefix = "u\"") || Text.startswith(Prefix = "U\"") ||
+          Text.startswith(Prefix = "u8\"") ||
+          Text.startswith(Prefix = "L\""))) ||
+        (Text.startswith(Prefix = "_T(\"") && Text.endswith(Postfix = "\")"))) {
+      Token.reset(new BreakableStringLiteral(Current, StartColumn, Prefix,
+                                             Postfix, State.Line->InPPDirective,
+                                             Encoding, Style));
+    } else {
+      return 0;
+    }
+  } else if (Current.is(TT_BlockComment)) {
+    if (!Current.isTrailingComment() || !Style.ReflowComments ||
+        // If a comment token switches formatting, like
+        // /* clang-format on */, we don't want to break it further,
+        // but we may still want to adjust its indentation.
+        switchesFormatting(Current))
+      return addMultilineToken(Current, State);
+    Token.reset(new BreakableBlockComment(
+        Current, StartColumn, Current.OriginalColumn, !Current.Previous,
+        State.Line->InPPDirective, Encoding, Style));
+  } else if (Current.is(TT_LineComment) &&
+             (Current.Previous == nullptr ||
+              Current.Previous->isNot(TT_ImplicitStringLiteral))) {
+    if (!Style.ReflowComments ||
+        CommentPragmasRegex.match(Current.TokenText.substr(2)) ||
+        switchesFormatting(Current))
+      return 0;
+    Token.reset(new BreakableLineCommentSection(
+        Current, StartColumn, Current.OriginalColumn, !Current.Previous,
+        /*InPPDirective=*/false, Encoding, Style));
+    // We don't insert backslashes when breaking line comments.
+    ColumnLimit = Style.ColumnLimit;
+  } else {
+    return 0;
+  }
+  if (Current.UnbreakableTailLength >= ColumnLimit)
+    return 0;
+
+  unsigned Penalty = 0;
+  if (!DryRun) {
+    Penalty = reflowProtrudingToken(Current, State, Token, ColumnLimit, DryRun);
+  } else {
+    LineState NoreflowState = State;
+    NoreflowState.Reflow = false;
+    unsigned NoreflowPenalty = reflowProtrudingToken(Current, NoreflowState, Token, ColumnLimit, DryRun);
+
+    if (NoreflowPenalty > 0) {
+      State.Reflow = true;
+      Penalty = reflowProtrudingToken(Current, State, Token, ColumnLimit, DryRun);
+    }
+
+    if (NoreflowPenalty <= Penalty) {
+      State = NoreflowState;
+      Penalty = NoreflowPenalty;
+    }
+  }
+
+  // Do not count the penalty twice, it will be added afterwards
+  if (State.Column > getColumnLimit(State)) {
+    unsigned ExcessCharacters = State.Column - getColumnLimit(State);
+    Penalty -= Style.PenaltyExcessCharacter * ExcessCharacters;
+  }
+
+  return Penalty;
+}
+
 unsigned ContinuationIndenter::getColumnLimit(const LineState &State) const {
   // In preprocessor directives reserve two chars for trailing " \"
   return Style.ColumnLimit - (State.Line->InPPDirective ? 2 : 0);
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to