klimek updated this revision to Diff 122433.
klimek marked an inline comment as done.
klimek added a comment.

- Add test.

Updating D39900: Refactor ContinuationIndenter's breakProtrudingToken logic 
into slightly more
==============================================================================================

orthogonal pieces.


https://reviews.llvm.org/D39900

Files:
  lib/Format/BreakableToken.cpp
  lib/Format/ContinuationIndenter.cpp
  lib/Format/ContinuationIndenter.h
  unittests/Format/FormatTest.cpp
  unittests/Format/FormatTestComments.cpp

Index: unittests/Format/FormatTestComments.cpp
===================================================================
--- unittests/Format/FormatTestComments.cpp
+++ unittests/Format/FormatTestComments.cpp
@@ -680,15 +680,26 @@
   EXPECT_EQ("{\n"
             "  //\n"
             "  //\\\n"
-            "  // long 1 2 3 4\n"
-            "  // 5\n"
+            "  // long 1 2 3 4 5\n"
             "}",
             format("{\n"
                    "  //\n"
                    "  //\\\n"
                    "  // long 1 2 3 4 5\n"
                    "}",
                    getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("{\n"
+            "  //\n"
+            "  //\\\n"
+            "  // long 1 2 3 4 5\n"
+            "  // 6\n"
+            "}",
+            format("{\n"
+                   "  //\n"
+                   "  //\\\n"
+                   "  // long 1 2 3 4 5 6\n"
+                   "}",
+                   getLLVMStyleWithColumns(20)));
 }
 
 TEST_F(FormatTestComments, PreservesHangingIndentInCxxComments) {
Index: unittests/Format/FormatTest.cpp
===================================================================
--- unittests/Format/FormatTest.cpp
+++ unittests/Format/FormatTest.cpp
@@ -6319,7 +6319,8 @@
                "#include_next <test.h>"
                "#include \"abc.h\" // this is included for ABC\n"
                "#include \"some long include\" // with a comment\n"
-               "#include \"some very long include paaaaaaaaaaaaaaaaaaaaaaath\"",
+               "#include \"some very long include path\"\n"
+               "#include <some/very/long/include/path>\n",
                getLLVMStyleWithColumns(35));
   EXPECT_EQ("#include \"a.h\"", format("#include  \"a.h\""));
   EXPECT_EQ("#include <a>", format("#include<a>"));
Index: lib/Format/ContinuationIndenter.h
===================================================================
--- lib/Format/ContinuationIndenter.h
+++ lib/Format/ContinuationIndenter.h
@@ -29,6 +29,7 @@
 namespace format {
 
 class AnnotatedLine;
+class BreakableToken;
 struct FormatToken;
 struct LineState;
 struct ParenState;
@@ -105,11 +106,20 @@
   /// 
   /// \returns An extra penalty induced by reformatting the token.
   unsigned reformatRawStringLiteral(const FormatToken &Current,
-                                    unsigned StartColumn, LineState &State,
-                                    StringRef Delimiter,
+                                    LineState &State,
                                     const FormatStyle &RawStringStyle,
                                     bool DryRun);
 
+  /// \brief If the current token is at the end of the current line, handle
+  /// the transition to the next line.
+  unsigned handleEndOfLine(const FormatToken &Current, LineState &State,
+                           bool DryRun, bool AllowBreak);
+
+  /// \brief If \p Current is a raw string that is configured to be reformatted,
+  /// return the style to be used.
+  llvm::Optional<FormatStyle> getRawStringStyle(const FormatToken &Current,
+                                                const LineState &State);
+
   /// \brief If the current token sticks out over the end of the line, break
   /// it if possible.
   ///
@@ -120,7 +130,13 @@
   /// penalty for the column limit violation in the last line (and in single
   /// line tokens) is handled in \c addNextStateToQueue.
   unsigned breakProtrudingToken(const FormatToken &Current, LineState &State,
-                                bool DryRun);
+                                bool AllowBreak, bool DryRun);
+
+  /// \brief Returns the \c BreakableToken starting at \p Current, or nullptr
+  /// if the current token cannot be broken.
+  std::unique_ptr<BreakableToken>
+  createBreakableToken(const FormatToken &Current, LineState &State,
+                       bool AllowBreak);
 
   /// \brief Appends the next token to \p State and updates information
   /// necessary for indentation.
Index: lib/Format/ContinuationIndenter.cpp
===================================================================
--- lib/Format/ContinuationIndenter.cpp
+++ lib/Format/ContinuationIndenter.cpp
@@ -1008,8 +1008,8 @@
 
   moveStatePastFakeLParens(State, Newline);
   moveStatePastScopeCloser(State);
-  bool CanBreakProtrudingToken = !State.Stack.back().NoLineBreak &&
-                                 !State.Stack.back().NoLineBreakInOperand;
+  bool AllowBreak = !State.Stack.back().NoLineBreak &&
+                    !State.Stack.back().NoLineBreakInOperand;
   moveStatePastScopeOpener(State, Newline);
   moveStatePastFakeRParens(State);
 
@@ -1023,13 +1023,9 @@
 
   State.Column += Current.ColumnWidth;
   State.NextToken = State.NextToken->Next;
-  unsigned Penalty = 0;
-  if (CanBreakProtrudingToken)
-    Penalty = breakProtrudingToken(Current, State, DryRun);
-  if (State.Column > getColumnLimit(State)) {
-    unsigned ExcessCharacters = State.Column - getColumnLimit(State);
-    Penalty += Style.PenaltyExcessCharacter * ExcessCharacters;
-  }
+
+  unsigned Penalty =
+      handleEndOfLine(Current, State, DryRun, AllowBreak);
 
   if (Current.Role)
     Current.Role->formatFromToken(State, this, DryRun);
@@ -1282,8 +1278,10 @@
 }
 
 unsigned ContinuationIndenter::reformatRawStringLiteral(
-    const FormatToken &Current, unsigned StartColumn, LineState &State,
-    StringRef Delimiter, const FormatStyle &RawStringStyle, bool DryRun) {
+    const FormatToken &Current, LineState &State,
+    const FormatStyle &RawStringStyle, bool DryRun) {
+  unsigned StartColumn = State.Column - Current.ColumnWidth;
+  auto Delimiter = *getRawStringDelimiter(Current.TokenText);
   // The text of a raw string is between the leading 'R"delimiter(' and the
   // trailing 'delimiter)"'.
   unsigned PrefixSize = 3 + Delimiter.size();
@@ -1353,9 +1351,6 @@
 
 unsigned ContinuationIndenter::addMultilineToken(const FormatToken &Current,
                                                  LineState &State) {
-  if (!Current.IsMultiline)
-    return 0;
-
   // Break before further function parameters on all levels.
   for (unsigned i = 0, e = State.Stack.size(); i != e; ++i)
     State.Stack[i].BreakBeforeParameter = true;
@@ -1370,60 +1365,69 @@
   return 0;
 }
 
-unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
-                                                    LineState &State,
-                                                    bool DryRun) {
+unsigned ContinuationIndenter::handleEndOfLine(const FormatToken &Current,
+                                               LineState &State, bool DryRun,
+                                               bool AllowBreak) {
+  unsigned Penalty = 0;
   // Compute the raw string style to use in case this is a raw string literal
   // that can be reformatted.
-  llvm::Optional<StringRef> Delimiter = None;
-  llvm::Optional<FormatStyle> RawStringStyle = None;
-  if (Current.isStringLiteral())
-    Delimiter = getRawStringDelimiter(Current.TokenText);
-  if (Delimiter)
-    RawStringStyle = RawStringFormats.get(*Delimiter);
-
-  // Don't break multi-line tokens other than block comments and raw string
-  // literals. Instead, just update the state.
-  if (Current.isNot(TT_BlockComment) && !RawStringStyle && Current.IsMultiline)
-    return addMultilineToken(Current, State);
-
-  // Don't break implicit string literals or import statements.
-  if (Current.is(TT_ImplicitStringLiteral) ||
-      State.Line->Type == LT_ImportStatement)
-    return 0;
+  auto RawStringStyle = getRawStringStyle(Current, State);
+  if (RawStringStyle) {
+    Penalty = reformatRawStringLiteral(Current, State, *RawStringStyle, DryRun);
+  } else if (Current.IsMultiline && Current.isNot(TT_BlockComment)) {
+    // Don't break multi-line tokens other than block comments and raw string
+    // literals. Instead, just update the state.
+    Penalty = addMultilineToken(Current, State);
+  } else if (State.Line->Type != LT_ImportStatement) {
+    // We generally don't break import statements.
+    Penalty = breakProtrudingToken(Current, State, AllowBreak, DryRun);
+  }
+  if (State.Column > getColumnLimit(State)) {
+    unsigned ExcessCharacters = State.Column - getColumnLimit(State);
+    Penalty += Style.PenaltyExcessCharacter * ExcessCharacters;
+  }
+  return Penalty;
+}
 
-  if (!Current.isStringLiteral() && !Current.is(tok::comment))
-    return 0;
+llvm::Optional<FormatStyle>
+ContinuationIndenter::getRawStringStyle(const FormatToken &Current,
+                                        const LineState &State) {
+  if (!Current.isStringLiteral())
+    return None;
+  auto Delimiter = getRawStringDelimiter(Current.TokenText);
+  if (!Delimiter)
+    return None;
+  auto RawStringStyle = RawStringFormats.get(*Delimiter);
+  if (!RawStringStyle)
+    return None;
+  RawStringStyle->ColumnLimit = getColumnLimit(State);
+  return RawStringStyle;
+}
 
-  std::unique_ptr<BreakableToken> Token;
+std::unique_ptr<BreakableToken> ContinuationIndenter::createBreakableToken(
+    const FormatToken &Current, LineState &State, bool AllowBreak) {
   unsigned StartColumn = State.Column - Current.ColumnWidth;
-  unsigned ColumnLimit = getColumnLimit(State);
-
   if (Current.isStringLiteral()) {
     // FIXME: String literal breaking is currently disabled for Java and JS, as
     // it requires strings to be merged using "+" which we don't support.
     if (Style.Language == FormatStyle::LK_Java ||
         Style.Language == FormatStyle::LK_JavaScript ||
-        !Style.BreakStringLiterals)
-      return 0;
+        !Style.BreakStringLiterals ||
+        !AllowBreak)
+      return nullptr;
 
     // Don't break string literals inside preprocessor directives (except for
     // #define directives, as their contents are stored in separate lines and
     // are not affected by this check).
     // This way we avoid breaking code with line directives and unknown
     // preprocessor directives that contain long string literals.
     if (State.Line->Type == LT_PreprocessorDirective)
-      return 0;
+      return nullptr;
     // Exempts unterminated string literals from line breaking. The user will
     // likely want to terminate the string before any line breaking is done.
     if (Current.IsUnterminatedLiteral)
-      return 0;
+      return nullptr;
 
-    if (RawStringStyle) {
-      RawStringStyle->ColumnLimit = ColumnLimit;
-      return reformatRawStringLiteral(Current, StartColumn, State, *Delimiter,
-                                      *RawStringStyle, DryRun);
-    } 
     StringRef Text = Current.TokenText;
     StringRef Prefix;
     StringRef Postfix;
@@ -1437,36 +1441,48 @@
           Text.startswith(Prefix = "u8\"") ||
           Text.startswith(Prefix = "L\""))) ||
         (Text.startswith(Prefix = "_T(\"") && Text.endswith(Postfix = "\")"))) {
-      Token.reset(new BreakableStringLiteral(Current, StartColumn, Prefix,
-                                             Postfix, State.Line->InPPDirective,
-                                             Encoding, Style));
-    } else {
-      return 0;
+      return llvm::make_unique<BreakableStringLiteral>(
+          Current, StartColumn, Prefix, Postfix, State.Line->InPPDirective,
+          Encoding, Style);
     }
   } else if (Current.is(TT_BlockComment)) {
     if (!Style.ReflowComments ||
         // If a comment token switches formatting, like
         // /* clang-format on */, we don't want to break it further,
         // but we may still want to adjust its indentation.
-        switchesFormatting(Current))
-      return addMultilineToken(Current, State);
-    Token.reset(new BreakableBlockComment(
+        switchesFormatting(Current)) {
+      return nullptr;
+    }
+    return llvm::make_unique<BreakableBlockComment>(
         Current, StartColumn, Current.OriginalColumn, !Current.Previous,
-        State.Line->InPPDirective, Encoding, Style));
+        State.Line->InPPDirective, Encoding, Style);
   } else if (Current.is(TT_LineComment) &&
              (Current.Previous == nullptr ||
               Current.Previous->isNot(TT_ImplicitStringLiteral))) {
     if (!Style.ReflowComments ||
         CommentPragmasRegex.match(Current.TokenText.substr(2)) ||
         switchesFormatting(Current))
-      return 0;
-    Token.reset(new BreakableLineCommentSection(
+      return nullptr;
+    return llvm::make_unique<BreakableLineCommentSection>(
         Current, StartColumn, Current.OriginalColumn, !Current.Previous,
-        /*InPPDirective=*/false, Encoding, Style));
+        /*InPPDirective=*/false, Encoding, Style);
+  }
+  return nullptr;
+}
+
+unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
+                                                    LineState &State,
+                                                    bool AllowBreak,
+                                                    bool DryRun) {
+  std::unique_ptr<BreakableToken> Token =
+      createBreakableToken(Current, State, AllowBreak);
+  if (!Token)
+    return 0;
+  unsigned ColumnLimit = getColumnLimit(State);
+  unsigned StartColumn = State.Column - Current.ColumnWidth;
+  if (Current.is(TT_LineComment)) {
     // We don't insert backslashes when breaking line comments.
     ColumnLimit = Style.ColumnLimit;
-  } else {
-    return 0;
   }
   if (Current.UnbreakableTailLength >= ColumnLimit)
     return 0;
Index: lib/Format/BreakableToken.cpp
===================================================================
--- lib/Format/BreakableToken.cpp
+++ lib/Format/BreakableToken.cpp
@@ -742,6 +742,7 @@
     Prefix.resize(Lines.size());
     OriginalPrefix.resize(Lines.size());
     for (size_t i = FirstLineIndex, e = Lines.size(); i < e; ++i) {
+      Lines[i] = Lines[i].ltrim(Blanks);
       // We need to trim the blanks in case this is not the first line in a
       // multiline comment. Then the indent is included in Lines[i].
       StringRef IndentPrefix =
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to