Re: [PATCH] Split long lines in multi-line comments.

Alexander Kornienko Mon, 18 Mar 2013 08:27:53 -0700

  Addressed djasper's comments, fixed bugs, added more tests. Now it correctly 
trims whitespace in broken lines and tries to handle looooong words that don't 
fit in one line correctly.


Hi djasper, klimek,

http://llvm-reviews.chandlerc.com/D547

CHANGE SINCE LAST DIFF
  http://llvm-reviews.chandlerc.com/D547?vs=1302&id=1306#toc

Files:
  lib/Format/Format.cpp
  unittests/Format/FormatTest.cpp

Index: lib/Format/Format.cpp
===================================================================
--- lib/Format/Format.cpp
+++ lib/Format/Format.cpp
@@ -146,7 +146,7 @@
       alignComments();
 
     if (Tok.Type == TT_BlockComment)
-      indentBlockComment(Tok.FormatTok, Spaces);
+      indentBlockComment(Tok, Spaces, Style);
 
     storeReplacement(Tok.FormatTok, getNewLineText(NewLines, Spaces));
   }
@@ -180,8 +180,8 @@
     else
       NewLineText = getNewLineText(1, Spaces, WhitespaceStartColumn, Style);
     std::string ReplacementText = (Prefix + NewLineText + Postfix).str();
-    SourceLocation InsertAt = Tok.FormatTok.WhiteSpaceStart
-        .getLocWithOffset(Tok.FormatTok.WhiteSpaceLength + Offset);
+    SourceLocation InsertAt =
+        Tok.FormatTok.Tok.getLocation().getLocWithOffset(Offset);
     Replaces.insert(
         tooling::Replacement(SourceMgr, InsertAt, 0, ReplacementText));
   }
@@ -193,33 +193,101 @@
   }
 
 private:
-  void indentBlockComment(const FormatToken &Tok, int Indent) {
-    SourceLocation TokenLoc = Tok.Tok.getLocation();
-    int IndentDelta = Indent - SourceMgr.getSpellingColumnNumber(TokenLoc) + 1;
-    const char *Start = SourceMgr.getCharacterData(TokenLoc);
-    const char *Current = Start;
-    const char *TokEnd = Current + Tok.TokenLength;
-    llvm::SmallVector<SourceLocation, 16> LineStarts;
-    while (Current < TokEnd) {
-      if (*Current == '\n') {
-        ++Current;
-        LineStarts.push_back(TokenLoc.getLocWithOffset(Current - Start));
-        // If we need to outdent the line, check that it's indented enough.
-        for (int i = 0; i < -IndentDelta; ++i, ++Current)
-          if (Current >= TokEnd || *Current != ' ')
-            return;
-      } else {
-        ++Current;
+  /// \brief Finds a common prefix of lines of a block comment to properly
+  /// indent (and possibly decorate with '*'s) added lines.
+  ///
+  /// The first line is ignored (it's special and starts with /*).
+  /// When there are less than three lines, we don't have enough information, so
+  /// better use no prefix.
+  static StringRef findCommentLinesPrefix(ArrayRef<StringRef> Lines,
+                                          const char *PrefixChars = " *") {
+    if (Lines.size() < 3)
+      return "";
+    StringRef Prefix(Lines[1].data(), Lines[1].find_first_not_of(PrefixChars));
+    for (size_t i = 2; i < Lines.size(); ++i) {
+      for (size_t j = 0; j < Prefix.size() && j < Lines[i].size(); ++j) {
+        if (Prefix[j] != Lines[i][j]) {
+          Prefix = Prefix.substr(0, j);
+          break;
+        }
+      }
+    }
+    return Prefix;
+  }
+
+  bool isLineSafeToSplit(StringRef Line) {
+    return Line.find_first_of("/\\:<>()[]{}*&%") == StringRef::npos;
+  }
+
+  void indentBlockComment(const AnnotatedToken &Tok, int Indent,
+                          const FormatStyle &Style) {
+    const SourceLocation TokenLoc = Tok.FormatTok.Tok.getLocation();
+    const int TokenIndent = SourceMgr.getSpellingColumnNumber(TokenLoc) - 1;
+    const int IndentDelta = Indent - TokenIndent;
+    const StringRef Text(SourceMgr.getCharacterData(TokenLoc),
+                         Tok.FormatTok.TokenLength);
+    SmallVector<StringRef, 16> Lines;
+    Text.split(Lines, "\n");
+
+    if (IndentDelta > 0) {
+      std::string WhiteSpace(IndentDelta, ' ');
+      for (size_t i = 1; i < Lines.size(); ++i) {
+        Replaces.insert(tooling::Replacement(
+            SourceMgr, TokenLoc.getLocWithOffset(Lines[i].data() - Text.data()),
+            0, WhiteSpace));
+      }
+    } else if (IndentDelta < 0) {
+      std::string WhiteSpace(-IndentDelta, ' ');
+      // Check that the line is indented enough.
+      for (size_t i = 1; i < Lines.size(); ++i) {
+        if (!Lines[i].startswith(WhiteSpace))
+          return;
+      }
+      for (size_t i = 1; i < Lines.size(); ++i) {
+        Replaces.insert(tooling::Replacement(
+            SourceMgr, TokenLoc.getLocWithOffset(Lines[i].data() - Text.data()),
+            -IndentDelta, ""));
       }
     }
 
-    for (size_t i = 0; i < LineStarts.size(); ++i) {
-      if (IndentDelta > 0)
-        Replaces.insert(tooling::Replacement(SourceMgr, LineStarts[i], 0,
-                                             std::string(IndentDelta, ' ')));
-      else if (IndentDelta < 0)
-        Replaces.insert(
-            tooling::Replacement(SourceMgr, LineStarts[i], -IndentDelta, ""));
+    // Split long lines in comments.
+    const StringRef Prefix = findCommentLinesPrefix(Lines);
+    std::string NewLineText;
+    if (IndentDelta < 0)
+      NewLineText = getNewLineText(1, 0) + Prefix.substr(-IndentDelta).str();
+    else
+      NewLineText = getNewLineText(1, IndentDelta) + Prefix.str();
+
+    size_t PrefixSize = Prefix.size();
+    if (Prefix.endswith("*")) {
+      NewLineText += " ";
+      ++PrefixSize;
+    }
+    size_t ColumnLimit = Style.ColumnLimit - PrefixSize - IndentDelta;
+
+    const char *WhiteSpaceChars = " ";
+    for (size_t i = 1; i < Lines.size(); ++i) {
+      StringRef Line = Lines[i].substr(PrefixSize);
+      if (Line.size() <= ColumnLimit || !isLineSafeToSplit(Line))
+        continue;
+      while (Line.size() > ColumnLimit) {
+        // Try to break at whitespace.
+        size_t SpacePos = Line.find_last_of(WhiteSpaceChars, ColumnLimit + 1);
+        if (SpacePos == StringRef::npos)  // Break at first possibility.
+          SpacePos = Line.find_first_of(WhiteSpaceChars);
+        if (SpacePos == StringRef::npos)  // No whitespace, give up.
+          break;
+        StringRef FirstPiece = Line.substr(0, SpacePos).rtrim();
+        Line = Line.substr(SpacePos).ltrim();
+
+        if (Line.empty())  // Just remove trailing space.
+          NewLineText = "";
+
+        Replaces.insert(tooling::Replacement(
+            SourceMgr,
+            TokenLoc.getLocWithOffset(FirstPiece.end() - Text.data()),
+            Line.begin() - FirstPiece.end(), NewLineText));
+      }
     }
   }
 
@@ -756,17 +824,17 @@
     if (Current.isNot(tok::string_literal))
       return 0;
     // Only break up default narrow strings.
-    if (StringRef(Current.FormatTok.Tok.getLiteralData()).find('"') != 0)
+    const char *LiteralData = Current.FormatTok.Tok.getLiteralData();
+    if (!LiteralData || *LiteralData != '"')
       return 0;
 
     unsigned Penalty = 0;
     unsigned TailOffset = 0;
     unsigned TailLength = Current.FormatTok.TokenLength;
     unsigned StartColumn = State.Column - Current.FormatTok.TokenLength;
     unsigned OffsetFromStart = 0;
     while (StartColumn + TailLength > getColumnLimit()) {
-      StringRef Text = StringRef(
-          Current.FormatTok.Tok.getLiteralData() + TailOffset, TailLength);
+      StringRef Text = StringRef(LiteralData + TailOffset, TailLength);
       if (StartColumn + OffsetFromStart + 1 > getColumnLimit())
         break;
       StringRef::size_type SplitPoint = getSplitPoint(
Index: unittests/Format/FormatTest.cpp
===================================================================
--- unittests/Format/FormatTest.cpp
+++ unittests/Format/FormatTest.cpp
@@ -651,6 +651,92 @@
                    " */"));
 }
 
+TEST_F(FormatTest, SplitsLongLinesInComments) {
+  EXPECT_EQ("/*\n"
+            "This is a long\n"
+            "comment that doesn't\n"
+            "fit on one line.\n"
+            "*/",
+            format("/*\n"
+                   "This is a long                                         "
+                   "comment that doesn't                                    "
+                   "fit on one line.                                      \n"
+                   "*/", getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("/*\n"
+            " * This is a long\n"
+            " * comment that\n"
+            " * doesn't fit on\n"
+            " * one line.\n"
+            " */",
+            format("/*\n"
+                   " * This is a long "
+                   "   comment that     "
+                   "   doesn't fit on   "
+                   "   one line.                                            \n"
+                   " */", getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("/*\n"
+            " * This_is_a_comment_with_words_that_dont_fit_on_one_line\n"
+            " * so_it_should_be_broken\n"
+            " * wherever_a_space_occurs\n"
+            " */",
+            format("/*\n"
+                   " * This_is_a_comment_with_words_that_dont_fit_on_one_line "
+                   "   so_it_should_be_broken "
+                   "   wherever_a_space_occurs                             \n"
+                   " */",
+                   getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("/*\n"
+            " * This_comment_can_not_be_broken_into_lines\n"
+            " */",
+            format("/*\n"
+                   " * This_comment_can_not_be_broken_into_lines\n"
+                   " */",
+                   getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("{\n"
+            "  /*\n"
+            "  This is another\n"
+            "  long comment that\n"
+            "  doesn't fit on one\n"
+            "  line    1234567890\n"
+            "  */\n"
+            "}",
+            format("{\n"
+                   "/*\n"
+                   "This is another     "
+                   "  long comment that "
+                   "  doesn't fit on one"
+                   "  line    1234567890\n"
+                   "*/\n"
+                   "}", getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("{\n"
+            "  /*\n"
+            "   * This        i s\n"
+            "   * another comment\n"
+            "   * t hat  doesn' t\n"
+            "   * fit on one l i\n"
+            "   * n e\n"
+            "   */\n"
+            "}",
+            format("{\n"
+                   "/*\n"
+                   " * This        i s"
+                   "   another comment"
+                   "   t hat  doesn' t"
+                   "   fit on one l i"
+                   "   n e\n"
+                   " */\n"
+                   "}", getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("/*\n"
+            " * This is a long\n"
+            " * comment that\n"
+            " * doesn't fit on\n"
+            " * one line\n"
+            " */",
+            format("   /*\n"
+                   "    * This is a long comment that doesn't fit on one line\n"
+                   "    */", getLLVMStyleWithColumns(20)));
+}
+
 TEST_F(FormatTest, CommentsInStaticInitializers) {
   EXPECT_EQ(
       "static SomeType type = { aaaaaaaaaaaaaaaaaaaa, /* comment */\n"

_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits

Re: [PATCH] Split long lines in multi-line comments.

Reply via email to