Re: [PATCH] Split long lines in multi-line comments.

Alexander Kornienko Mon, 18 Mar 2013 11:57:53 -0700

  Addressed djasper's comments, added support for comments in preprocessor 
directives.


Hi djasper, klimek,

http://llvm-reviews.chandlerc.com/D547

CHANGE SINCE LAST DIFF
  http://llvm-reviews.chandlerc.com/D547?vs=1306&id=1309#toc

Files:
  lib/Format/Format.cpp
  unittests/Format/FormatTest.cpp

Index: lib/Format/Format.cpp
===================================================================
--- lib/Format/Format.cpp
+++ lib/Format/Format.cpp
@@ -146,7 +146,7 @@
       alignComments();
 
     if (Tok.Type == TT_BlockComment)
-      indentBlockComment(Tok.FormatTok, Spaces);
+      indentBlockComment(Tok, Spaces, false, Style);
 
     storeReplacement(Tok.FormatTok, getNewLineText(NewLines, Spaces));
   }
@@ -159,6 +159,9 @@
   void replacePPWhitespace(const AnnotatedToken &Tok, unsigned NewLines,
                            unsigned Spaces, unsigned WhitespaceStartColumn,
                            const FormatStyle &Style) {
+    if (Tok.Type == TT_BlockComment)
+      indentBlockComment(Tok, Spaces, true, Style);
+
     storeReplacement(
         Tok.FormatTok,
         getNewLineText(NewLines, Spaces, WhitespaceStartColumn, Style));
@@ -171,19 +174,19 @@
   ///
   /// \p InPPDirective, \p Spaces, \p WhitespaceStartColumn and \p Style are
   /// used to generate the correct line break.
-  void breakToken(const AnnotatedToken &Tok, unsigned Offset, StringRef Prefix,
-                  StringRef Postfix, bool InPPDirective, unsigned Spaces,
+  void breakToken(const FormatToken &Tok, unsigned Offset,
+                  unsigned ReplaceChars, StringRef Prefix, StringRef Postfix,
+                  bool InPPDirective, unsigned Spaces,
                   unsigned WhitespaceStartColumn, const FormatStyle &Style) {
     std::string NewLineText;
     if (!InPPDirective)
       NewLineText = getNewLineText(1, Spaces);
     else
       NewLineText = getNewLineText(1, Spaces, WhitespaceStartColumn, Style);
     std::string ReplacementText = (Prefix + NewLineText + Postfix).str();
-    SourceLocation InsertAt = Tok.FormatTok.WhiteSpaceStart
-        .getLocWithOffset(Tok.FormatTok.WhiteSpaceLength + Offset);
-    Replaces.insert(
-        tooling::Replacement(SourceMgr, InsertAt, 0, ReplacementText));
+    SourceLocation Location = Tok.Tok.getLocation().getLocWithOffset(Offset);
+    Replaces.insert(tooling::Replacement(SourceMgr, Location, ReplaceChars,
+                                         ReplacementText));
   }
 
   /// \brief Returns all the \c Replacements created during formatting.
@@ -193,33 +196,123 @@
   }
 
 private:
-  void indentBlockComment(const FormatToken &Tok, int Indent) {
-    SourceLocation TokenLoc = Tok.Tok.getLocation();
-    int IndentDelta = Indent - SourceMgr.getSpellingColumnNumber(TokenLoc) + 1;
-    const char *Start = SourceMgr.getCharacterData(TokenLoc);
-    const char *Current = Start;
-    const char *TokEnd = Current + Tok.TokenLength;
-    llvm::SmallVector<SourceLocation, 16> LineStarts;
-    while (Current < TokEnd) {
-      if (*Current == '\n') {
-        ++Current;
-        LineStarts.push_back(TokenLoc.getLocWithOffset(Current - Start));
-        // If we need to outdent the line, check that it's indented enough.
-        for (int i = 0; i < -IndentDelta; ++i, ++Current)
-          if (Current >= TokEnd || *Current != ' ')
-            return;
-      } else {
-        ++Current;
+  /// \brief Finds a common prefix of lines of a block comment to properly
+  /// indent (and possibly decorate with '*'s) added lines.
+  ///
+  /// The first line is ignored (it's special and starts with /*).
+  /// When there are less than three lines, we don't have enough information, so
+  /// better use no prefix.
+  static StringRef findCommentLinesPrefix(ArrayRef<StringRef> Lines,
+                                          const char *PrefixChars = " *") {
+    if (Lines.size() < 3)
+      return "";
+    StringRef Prefix(Lines[1].data(), Lines[1].find_first_not_of(PrefixChars));
+    for (size_t i = 2; i < Lines.size(); ++i) {
+      for (size_t j = 0; j < Prefix.size() && j < Lines[i].size(); ++j) {
+        if (Prefix[j] != Lines[i][j]) {
+          Prefix = Prefix.substr(0, j);
+          break;
+        }
+      }
+    }
+    return Prefix;
+  }
+
+  bool isLineSafeToSplit(StringRef Line) {
+    return Line.find_first_of("/\\:<>()[]{}*&%") == StringRef::npos;
+  }
+
+  void splitLineInComment(const FormatToken &Tok, StringRef Line,
+                          size_t ColumnLimit, const StringRef LinePrefix,
+                          bool InPPDirective, const FormatStyle &Style,
+                          const char *WhiteSpaceChars = " ") {
+    if (Line.size() <= ColumnLimit || !isLineSafeToSplit(Line))
+      return;
+
+    const char *TokenStart = SourceMgr.getCharacterData(Tok.Tok.getLocation());
+    while (Line.rtrim().size() > ColumnLimit) {
+      // Try to break at the last whitespace before the column limit.
+      size_t SpacePos = Line.find_last_of(WhiteSpaceChars, ColumnLimit + 1);
+      if (SpacePos == StringRef::npos) {
+        // Try to find any whitespace in the line.
+        SpacePos = Line.find_first_of(WhiteSpaceChars);
+        if (SpacePos == StringRef::npos) // No whitespace found, give up.
+          break;
+      }
+
+      StringRef FirstPiece = Line.substr(0, SpacePos).rtrim();
+      StringRef RemainingLine = Line.substr(SpacePos).ltrim();
+      if (RemainingLine.empty())
+        break;
+      Line = RemainingLine;
+
+      size_t ReplaceChars = Line.begin() - FirstPiece.end();
+      breakToken(Tok, FirstPiece.end() - TokenStart, ReplaceChars, "",
+                 LinePrefix, InPPDirective, 0,
+                 FirstPiece.size() + LinePrefix.size(), Style);
+    }
+
+    StringRef TrimmedLine = Line.rtrim();
+    if (TrimmedLine != Line || InPPDirective) {
+      // Remove trailing whitespace/insert backslash.
+      breakToken(Tok, TrimmedLine.end() - TokenStart,
+                 Line.size() - TrimmedLine.size() + 1, "", "", InPPDirective, 0,
+                 TrimmedLine.size() + LinePrefix.size(), Style);
+    }
+  }
+
+  void indentBlockComment(const AnnotatedToken &Tok, int Indent,
+                          bool InPPDirective, const FormatStyle &Style) {
+    const SourceLocation TokenLoc = Tok.FormatTok.Tok.getLocation();
+    const int CurrentIndent = SourceMgr.getSpellingColumnNumber(TokenLoc) - 1;
+    const int IndentDelta = Indent - CurrentIndent;
+    const StringRef Text(SourceMgr.getCharacterData(TokenLoc),
+                         Tok.FormatTok.TokenLength);
+    SmallVector<StringRef, 16> Lines;
+    Text.split(Lines, "\n");
+
+    if (IndentDelta > 0) {
+      std::string WhiteSpace(IndentDelta, ' ');
+      for (size_t i = 1; i < Lines.size(); ++i) {
+        Replaces.insert(tooling::Replacement(
+            SourceMgr, TokenLoc.getLocWithOffset(Lines[i].data() - Text.data()),
+            0, WhiteSpace));
+      }
+    } else if (IndentDelta < 0) {
+      std::string WhiteSpace(-IndentDelta, ' ');
+      // Check that the line is indented enough.
+      for (size_t i = 1; i < Lines.size(); ++i) {
+        if (!Lines[i].startswith(WhiteSpace))
+          return;
+      }
+      for (size_t i = 1; i < Lines.size(); ++i) {
+        Replaces.insert(tooling::Replacement(
+            SourceMgr, TokenLoc.getLocWithOffset(Lines[i].data() - Text.data()),
+            -IndentDelta, ""));
       }
     }
 
-    for (size_t i = 0; i < LineStarts.size(); ++i) {
-      if (IndentDelta > 0)
-        Replaces.insert(tooling::Replacement(SourceMgr, LineStarts[i], 0,
-                                             std::string(IndentDelta, ' ')));
-      else if (IndentDelta < 0)
-        Replaces.insert(
-            tooling::Replacement(SourceMgr, LineStarts[i], -IndentDelta, ""));
+    // Split long lines in comments.
+    const StringRef Prefix = findCommentLinesPrefix(Lines);
+    std::string LinePrefix;
+    if (IndentDelta < 0)
+      LinePrefix = Prefix.substr(-IndentDelta).str();
+    else
+      LinePrefix = std::string(IndentDelta, ' ') + Prefix.str();
+
+    size_t PrefixSize = Prefix.size();
+    if (Prefix.endswith("*")) {
+      LinePrefix += " ";
+      ++PrefixSize;
+    }
+
+    size_t ColumnLimit = Style.ColumnLimit - PrefixSize - IndentDelta;
+    if (InPPDirective)
+      --ColumnLimit; // Reserve space for terminating backslash.
+
+    for (size_t i = 1; i < Lines.size(); ++i) {
+      splitLineInComment(Tok.FormatTok, Lines[i].substr(PrefixSize),
+                         ColumnLimit, LinePrefix, InPPDirective, Style);
     }
   }
 
@@ -756,17 +849,17 @@
     if (Current.isNot(tok::string_literal))
       return 0;
     // Only break up default narrow strings.
-    if (StringRef(Current.FormatTok.Tok.getLiteralData()).find('"') != 0)
+    const char *LiteralData = Current.FormatTok.Tok.getLiteralData();
+    if (!LiteralData || *LiteralData != '"')
       return 0;
 
     unsigned Penalty = 0;
     unsigned TailOffset = 0;
     unsigned TailLength = Current.FormatTok.TokenLength;
     unsigned StartColumn = State.Column - Current.FormatTok.TokenLength;
     unsigned OffsetFromStart = 0;
     while (StartColumn + TailLength > getColumnLimit()) {
-      StringRef Text = StringRef(
-          Current.FormatTok.Tok.getLiteralData() + TailOffset, TailLength);
+      StringRef Text = StringRef(LiteralData + TailOffset, TailLength);
       if (StartColumn + OffsetFromStart + 1 > getColumnLimit())
         break;
       StringRef::size_type SplitPoint = getSplitPoint(
@@ -780,8 +873,8 @@
           StartColumn + OffsetFromStart + SplitPoint + 2;
       State.Stack.back().LastSpace = StartColumn;
       if (!DryRun) {
-        Whitespaces.breakToken(Current, TailOffset + SplitPoint + 1, "\"", "\"",
-                               Line.InPPDirective, StartColumn,
+        Whitespaces.breakToken(Current.FormatTok, TailOffset + SplitPoint + 1,
+                               0, "\"", "\"", Line.InPPDirective, StartColumn,
                                WhitespaceStartColumn, Style);
       }
       TailOffset += SplitPoint + 1;
Index: unittests/Format/FormatTest.cpp
===================================================================
--- unittests/Format/FormatTest.cpp
+++ unittests/Format/FormatTest.cpp
@@ -651,6 +651,112 @@
                    " */"));
 }
 
+TEST_F(FormatTest, SplitsLongLinesInComments) {
+  EXPECT_EQ("/*\n"
+            "This is a long\n"
+            "comment that doesn't\n"
+            "fit on one line.\n"
+            "*/",
+            format("/*\n"
+                   "This is a long                                         "
+                   "comment that doesn't                                    "
+                   "fit on one line.                                      \n"
+                   "*/", getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("/*\n"
+            " * This is a long\n"
+            " * comment that\n"
+            " * doesn't fit on\n"
+            " * one line.\n"
+            " */",
+            format("/*\n"
+                   " * This is a long "
+                   "   comment that     "
+                   "   doesn't fit on   "
+                   "   one line.                                            \n"
+                   " */", getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("/*\n"
+            " * This_is_a_comment_with_words_that_dont_fit_on_one_line\n"
+            " * so_it_should_be_broken\n"
+            " * wherever_a_space_occurs\n"
+            " */",
+            format("/*\n"
+                   " * This_is_a_comment_with_words_that_dont_fit_on_one_line "
+                   "   so_it_should_be_broken "
+                   "   wherever_a_space_occurs                             \n"
+                   " */",
+                   getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("/*\n"
+            " * This_comment_can_not_be_broken_into_lines\n"
+            " */",
+            format("/*\n"
+                   " * This_comment_can_not_be_broken_into_lines\n"
+                   " */",
+                   getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("{\n"
+            "  /*\n"
+            "  This is another\n"
+            "  long comment that\n"
+            "  doesn't fit on one\n"
+            "  line    1234567890\n"
+            "  */\n"
+            "}",
+            format("{\n"
+                   "/*\n"
+                   "This is another     "
+                   "  long comment that "
+                   "  doesn't fit on one"
+                   "  line    1234567890\n"
+                   "*/\n"
+                   "}", getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("{\n"
+            "  /*\n"
+            "   * This        i s\n"
+            "   * another comment\n"
+            "   * t hat  doesn' t\n"
+            "   * fit on one l i\n"
+            "   * n e\n"
+            "   */\n"
+            "}",
+            format("{\n"
+                   "/*\n"
+                   " * This        i s"
+                   "   another comment"
+                   "   t hat  doesn' t"
+                   "   fit on one l i"
+                   "   n e\n"
+                   " */\n"
+                   "}", getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("/*\n"
+            " * This is a long\n"
+            " * comment that\n"
+            " * doesn't fit on\n"
+            " * one line\n"
+            " */",
+            format("   /*\n"
+                   "    * This is a long comment that doesn't fit on one line\n"
+                   "    */", getLLVMStyleWithColumns(20)));
+}
+
+TEST_F(FormatTest, SplitsLongLinesInCommentsInPreprocessor) {
+  EXPECT_EQ("#define X          \\\n"
+            // FIXME: Backslash should be added here.
+            "  /*\n"
+            "   Macro comment   \\\n"
+            "   with a long     \\\n"
+            "   line            \\\n"
+            // FIXME: We should look at the length of the last line of the token
+            // instead of the full token's length.
+            //"  */               \\\n"
+            "   */\\\n"
+            "  A + B",
+            format("#define X \\\n"
+                   "  /*\n"
+                   "   Macro comment with a long  line\n"
+                   "   */ \\\n"
+                   "  A + B",
+                   getLLVMStyleWithColumns(20)));
+}
+
 TEST_F(FormatTest, CommentsInStaticInitializers) {
   EXPECT_EQ(
       "static SomeType type = { aaaaaaaaaaaaaaaaaaaa, /* comment */\n"

_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits

Re: [PATCH] Split long lines in multi-line comments.

Reply via email to