[PATCH] D28764: [clang-format] Implement comment reflowing (v3)

2017-01-25 Thread Krasimir Georgiev via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL293055: [clang-format] Implement comment reflowing. 
(authored by krasimir).

Changed prior to commit:
  https://reviews.llvm.org/D28764?vs=85739=85740#toc

Repository:
  rL LLVM

https://reviews.llvm.org/D28764

Files:
  cfe/trunk/lib/Format/BreakableToken.cpp
  cfe/trunk/lib/Format/BreakableToken.h
  cfe/trunk/lib/Format/CMakeLists.txt
  cfe/trunk/lib/Format/Comments.cpp
  cfe/trunk/lib/Format/Comments.h
  cfe/trunk/lib/Format/ContinuationIndenter.cpp
  cfe/trunk/lib/Format/TokenAnnotator.cpp
  cfe/trunk/lib/Format/UnwrappedLineParser.cpp
  cfe/trunk/lib/Format/WhitespaceManager.cpp
  cfe/trunk/unittests/Format/FormatTest.cpp
  cfe/trunk/unittests/Format/FormatTestSelective.cpp

Index: cfe/trunk/lib/Format/BreakableToken.h
===
--- cfe/trunk/lib/Format/BreakableToken.h
+++ cfe/trunk/lib/Format/BreakableToken.h
@@ -8,9 +8,10 @@
 //===--===//
 ///
 /// \file
-/// \brief Declares BreakableToken, BreakableStringLiteral, and
-/// BreakableBlockComment classes, that contain token type-specific logic to
-/// break long lines in tokens.
+/// \brief Declares BreakableToken, BreakableStringLiteral, BreakableComment,
+/// BreakableBlockComment and BreakableLineCommentSection classes, that contain
+/// token type-specific logic to break long lines in tokens and reflow content
+/// between tokens.
 ///
 //===--===//
 
@@ -25,10 +26,43 @@
 namespace clang {
 namespace format {
 
+/// \brief Checks if \p Token switches formatting, like /* clang-format off */.
+/// \p Token must be a comment.
+bool switchesFormatting(const FormatToken );
+
 struct FormatStyle;
 
 /// \brief Base class for strategies on how to break tokens.
 ///
+/// This is organised around the concept of a \c Split, which is a whitespace
+/// range that signifies a position of the content of a token where a
+/// reformatting might be done. Operating with splits is divided into 3
+/// operations:
+/// - getSplit, for finding a split starting at a position,
+/// - getLineLengthAfterSplit, for calculating the size in columns of the rest
+///   of the content after a split has been used for breaking, and
+/// - insertBreak, for executing the split using a whitespace manager.
+///
+/// There is a pair of operations that are used to compress a long whitespace
+/// range with a single space if that will bring the line lenght under the
+/// column limit:
+/// - getLineLengthAfterCompression, for calculating the size in columns of the
+///   line after a whitespace range has been compressed, and
+/// - compressWhitespace, for executing the whitespace compression using a
+///   whitespace manager; note that the compressed whitespace may be in the
+///   middle of the original line and of the reformatted line.
+///
+/// For tokens where the whitespace before each line needs to be also
+/// reformatted, for example for tokens supporting reflow, there are analogous
+/// operations that might be executed before the main line breaking occurs:
+/// - getSplitBefore, for finding a split such that the content preceding it
+///   needs to be specially reflown,
+/// - getLineLengthAfterSplitBefore, for calculating the line length in columns
+///   of the remainder of the content after the beginning of the content has
+///   been reformatted, and
+/// - replaceWhitespaceBefore, for executing the reflow using a whitespace
+///   manager.
+///
 /// FIXME: The interface seems set in stone, so we might want to just pull the
 /// strategy into the class, instead of controlling it from the outside.
 class BreakableToken {
@@ -42,13 +76,13 @@
   virtual unsigned getLineCount() const = 0;
 
   /// \brief Returns the number of columns required to format the piece of line
-  /// at \p LineIndex, from byte offset \p Offset with length \p Length.
+  /// at \p LineIndex, from byte offset \p TailOffset with length \p Length.
   ///
-  /// Note that previous breaks are not taken into account. \p Offset is always
-  /// specified from the start of the (original) line.
+  /// Note that previous breaks are not taken into account. \p TailOffset is
+  /// always specified from the start of the (original) line.
   /// \p Length can be set to StringRef::npos, which means "to the end of line".
   virtual unsigned
-  getLineLengthAfterSplit(unsigned LineIndex, unsigned Offset,
+  getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset,
   StringRef::size_type Length) const = 0;
 
   /// \brief Returns a range (offset, length) at which to break the line at
@@ -61,16 +95,59 @@
   virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
WhitespaceManager ) = 0;
 
+  /// \brief Returns the number of columns required to format the 

[PATCH] D28764: [clang-format] Implement comment reflowing (v3)

2017-01-25 Thread Manuel Klimek via Phabricator via cfe-commits
klimek accepted this revision.
klimek added a comment.
This revision is now accepted and ready to land.

LG


https://reviews.llvm.org/D28764



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D28764: [clang-format] Implement comment reflowing (v3)

2017-01-25 Thread Krasimir Georgiev via Phabricator via cfe-commits
krasimir updated this revision to Diff 85739.
krasimir marked 2 inline comments as done.
krasimir added a comment.

- Address review comments.


https://reviews.llvm.org/D28764

Files:
  lib/Format/BreakableToken.cpp
  lib/Format/BreakableToken.h
  lib/Format/CMakeLists.txt
  lib/Format/Comments.cpp
  lib/Format/Comments.h
  lib/Format/ContinuationIndenter.cpp
  lib/Format/TokenAnnotator.cpp
  lib/Format/UnwrappedLineParser.cpp
  lib/Format/WhitespaceManager.cpp
  unittests/Format/FormatTest.cpp
  unittests/Format/FormatTestSelective.cpp

Index: unittests/Format/FormatTestSelective.cpp
===
--- unittests/Format/FormatTestSelective.cpp
+++ unittests/Format/FormatTestSelective.cpp
@@ -111,13 +111,19 @@
 format("int   a; // comment\n"
"intb; // comment",
0, 0));
-  EXPECT_EQ("int   a; // comment\n"
-" // line 2\n"
+  EXPECT_EQ("int a; // comment\n"
+"   // line 2\n"
 "int b;",
 format("int   a; // comment\n"
"// line 2\n"
"int b;",
28, 0));
+  EXPECT_EQ("int   a; // comment\n"
+"// comment 2\n"
+"int b;",
+format("int   a; // comment\n"
+   "// comment 2\n"
+   "int b;", 28, 0));
   EXPECT_EQ("int aa; // comment\n"
 "int b;\n"
 "int c; // unrelated comment",
Index: unittests/Format/FormatTest.cpp
===
--- unittests/Format/FormatTest.cpp
+++ unittests/Format/FormatTest.cpp
@@ -1783,6 +1783,463 @@
"0x00, 0x00, 0x00, 0x00};// comment\n");
 }
 
+TEST_F(FormatTest, ReflowsComments) {
+  // Break a long line and reflow with the full next line.
+  EXPECT_EQ("// long long long\n"
+"// long long",
+format("// long long long long\n"
+   "// long",
+   getLLVMStyleWithColumns(20)));
+
+  // Keep the trailing newline while reflowing.
+  EXPECT_EQ("// long long long\n"
+"// long long\n",
+format("// long long long long\n"
+   "// long\n",
+   getLLVMStyleWithColumns(20)));
+
+  // Break a long line and reflow with a part of the next line.
+  EXPECT_EQ("// long long long\n"
+"// long long\n"
+"// long_long",
+format("// long long long long\n"
+   "// long long_long",
+   getLLVMStyleWithColumns(20)));
+
+  // Break but do not reflow if the first word from the next line is too long.
+  EXPECT_EQ("// long long long\n"
+"// long\n"
+"// long_long_long\n",
+format("// long long long long\n"
+   "// long_long_long\n",
+   getLLVMStyleWithColumns(20)));
+
+  // Don't break or reflow short lines.
+  verifyFormat("// long\n"
+   "// long long long lo\n"
+   "// long long long lo\n"
+   "// long",
+   getLLVMStyleWithColumns(20));
+
+  // Keep prefixes and decorations while reflowing.
+  EXPECT_EQ("/// long long long\n"
+"/// long long\n",
+format("/// long long long long\n"
+   "/// long\n",
+   getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("//! long long long\n"
+"//! long long\n",
+format("//! long long long long\n"
+   "//! long\n",
+   getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("/* long long long\n"
+" * long long */",
+format("/* long long long long\n"
+   " * long */",
+   getLLVMStyleWithColumns(20)));
+
+  // Don't bring leading whitespace up while reflowing.
+  EXPECT_EQ("/*  long long long\n"
+" * long long long\n"
+" */",
+format("/*  long long long long\n"
+   " *  long long\n"
+   " */",
+   getLLVMStyleWithColumns(20)));
+
+  // Reflow the last line of a block comment with its trailing '*/'.
+  EXPECT_EQ("/* long long long\n"
+"   long long */",
+format("/* long long long long\n"
+   "   long */",
+   getLLVMStyleWithColumns(20)));
+
+  // Reflow two short lines; keep the postfix of the last one.
+  EXPECT_EQ("/* long long long\n"
+" * long long long */",
+format("/* long long long long\n"
+   " * long\n"
+   " * long */",
+   getLLVMStyleWithColumns(20)));
+
+  // Put the postfix of the last short reflow line on a newline if it doesn't
+  // fit.
+  EXPECT_EQ("/* long long long\n"
+" * long long longg\n"
+" */",
+format("/* long long long long\n"
+   " * long\n"
+  

[PATCH] D28764: [clang-format] Implement comment reflowing (v3)

2017-01-25 Thread Krasimir Georgiev via Phabricator via cfe-commits
krasimir added inline comments.



Comment at: lib/Format/BreakableToken.cpp:747
+Split SplitBefore, WhitespaceManager ) {
+  // If this is the first line of a token, we need to inform Whitespace Manager
+  // about it: either adapt the whitespace range preceding it, or mark it as an

klimek wrote:
> What do you mean by "first line of a token"?
Sadly, there are multi-line line comment tokens, as in:
```
// line 1 \
// line 2
```
Add a note about it.



Comment at: lib/Format/BreakableToken.h:110-111
+
+  /// \brief Returns a whitespace range (offset, length) of the content at \p
+  /// LineIndex such that the content preceding this range needs to be
+  /// reformatted before any breaks are made to this line.

klimek wrote:
> I'd keep \p and LineIndex in the same line...
Sadly, that's one thing that the reflowing doesn't support yet...


https://reviews.llvm.org/D28764



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D28764: [clang-format] Implement comment reflowing (v3)

2017-01-25 Thread Manuel Klimek via Phabricator via cfe-commits
klimek added inline comments.



Comment at: lib/Format/BreakableToken.cpp:747
+Split SplitBefore, WhitespaceManager ) {
+  // If this is the first line of a token, we need to inform Whitespace Manager
+  // about it: either adapt the whitespace range preceding it, or mark it as an

What do you mean by "first line of a token"?



Comment at: lib/Format/BreakableToken.h:110-111
+
+  /// \brief Returns a whitespace range (offset, length) of the content at \p
+  /// LineIndex such that the content preceding this range needs to be
+  /// reformatted before any breaks are made to this line.

I'd keep \p and LineIndex in the same line...


https://reviews.llvm.org/D28764



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D28764: [clang-format] Implement comment reflowing (v3)

2017-01-25 Thread Krasimir Georgiev via Phabricator via cfe-commits
krasimir added a comment.

Tried to add some comments around range computations. Most of the time it's 
about converting range offsets from local line-based values to 
start-of-token-based offsets.




Comment at: lib/Format/BreakableToken.cpp:279-280
+  return Content.size() >= 2 &&
+  Content != "clang-format on" &&
+  Content != "clang-format off" &&
+  !Content.endswith("\\") &&

klimek wrote:
> Can we now use delimitsRegion here?
Yep! Actually came up with even better alternative: we use switchesFormatting() 
below:



Comment at: lib/Format/ContinuationIndenter.cpp:1090-1092
+// Checks if Token delimits formatting regions, like /* clang-format off */.
+// Token must be a comment.
+static bool delimitsRegion(const FormatToken ) {

klimek wrote:
> delimitsRegion seems overly abstract. Perhaps switchesFormatting?
Yep. Also moved it to BreakableToken.h/.cpp so it can be used from both 
ContinuationIndenter::breakProtrudingToken and BreakableComment::mayReflow!


https://reviews.llvm.org/D28764



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D28764: [clang-format] Implement comment reflowing (v3)

2017-01-25 Thread Krasimir Georgiev via Phabricator via cfe-commits
krasimir updated this revision to Diff 85737.
krasimir marked 3 inline comments as done.
krasimir added a comment.

- Address comments. Add a bunch of comments about various range computations.


https://reviews.llvm.org/D28764

Files:
  lib/Format/BreakableToken.cpp
  lib/Format/BreakableToken.h
  lib/Format/CMakeLists.txt
  lib/Format/Comments.cpp
  lib/Format/Comments.h
  lib/Format/ContinuationIndenter.cpp
  lib/Format/TokenAnnotator.cpp
  lib/Format/UnwrappedLineParser.cpp
  lib/Format/WhitespaceManager.cpp
  unittests/Format/FormatTest.cpp
  unittests/Format/FormatTestSelective.cpp

Index: unittests/Format/FormatTestSelective.cpp
===
--- unittests/Format/FormatTestSelective.cpp
+++ unittests/Format/FormatTestSelective.cpp
@@ -111,13 +111,19 @@
 format("int   a; // comment\n"
"intb; // comment",
0, 0));
-  EXPECT_EQ("int   a; // comment\n"
-" // line 2\n"
+  EXPECT_EQ("int a; // comment\n"
+"   // line 2\n"
 "int b;",
 format("int   a; // comment\n"
"// line 2\n"
"int b;",
28, 0));
+  EXPECT_EQ("int   a; // comment\n"
+"// comment 2\n"
+"int b;",
+format("int   a; // comment\n"
+   "// comment 2\n"
+   "int b;", 28, 0));
   EXPECT_EQ("int aa; // comment\n"
 "int b;\n"
 "int c; // unrelated comment",
Index: unittests/Format/FormatTest.cpp
===
--- unittests/Format/FormatTest.cpp
+++ unittests/Format/FormatTest.cpp
@@ -1783,6 +1783,463 @@
"0x00, 0x00, 0x00, 0x00};// comment\n");
 }
 
+TEST_F(FormatTest, ReflowsComments) {
+  // Break a long line and reflow with the full next line.
+  EXPECT_EQ("// long long long\n"
+"// long long",
+format("// long long long long\n"
+   "// long",
+   getLLVMStyleWithColumns(20)));
+
+  // Keep the trailing newline while reflowing.
+  EXPECT_EQ("// long long long\n"
+"// long long\n",
+format("// long long long long\n"
+   "// long\n",
+   getLLVMStyleWithColumns(20)));
+
+  // Break a long line and reflow with a part of the next line.
+  EXPECT_EQ("// long long long\n"
+"// long long\n"
+"// long_long",
+format("// long long long long\n"
+   "// long long_long",
+   getLLVMStyleWithColumns(20)));
+
+  // Break but do not reflow if the first word from the next line is too long.
+  EXPECT_EQ("// long long long\n"
+"// long\n"
+"// long_long_long\n",
+format("// long long long long\n"
+   "// long_long_long\n",
+   getLLVMStyleWithColumns(20)));
+
+  // Don't break or reflow short lines.
+  verifyFormat("// long\n"
+   "// long long long lo\n"
+   "// long long long lo\n"
+   "// long",
+   getLLVMStyleWithColumns(20));
+
+  // Keep prefixes and decorations while reflowing.
+  EXPECT_EQ("/// long long long\n"
+"/// long long\n",
+format("/// long long long long\n"
+   "/// long\n",
+   getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("//! long long long\n"
+"//! long long\n",
+format("//! long long long long\n"
+   "//! long\n",
+   getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("/* long long long\n"
+" * long long */",
+format("/* long long long long\n"
+   " * long */",
+   getLLVMStyleWithColumns(20)));
+
+  // Don't bring leading whitespace up while reflowing.
+  EXPECT_EQ("/*  long long long\n"
+" * long long long\n"
+" */",
+format("/*  long long long long\n"
+   " *  long long\n"
+   " */",
+   getLLVMStyleWithColumns(20)));
+
+  // Reflow the last line of a block comment with its trailing '*/'.
+  EXPECT_EQ("/* long long long\n"
+"   long long */",
+format("/* long long long long\n"
+   "   long */",
+   getLLVMStyleWithColumns(20)));
+
+  // Reflow two short lines; keep the postfix of the last one.
+  EXPECT_EQ("/* long long long\n"
+" * long long long */",
+format("/* long long long long\n"
+   " * long\n"
+   " * long */",
+   getLLVMStyleWithColumns(20)));
+
+  // Put the postfix of the last short reflow line on a newline if it doesn't
+  // fit.
+  EXPECT_EQ("/* long long long\n"
+" * long long longg\n"
+" */",
+format("/* long long 

[PATCH] D28764: [clang-format] Implement comment reflowing (v3)

2017-01-25 Thread Manuel Klimek via Phabricator via cfe-commits
klimek added inline comments.



Comment at: lib/Format/BreakableToken.cpp:279-280
+  return Content.size() >= 2 &&
+  Content != "clang-format on" &&
+  Content != "clang-format off" &&
+  !Content.endswith("\\") &&

Can we now use delimitsRegion here?



Comment at: lib/Format/ContinuationIndenter.cpp:1090-1092
+// Checks if Token delimits formatting regions, like /* clang-format off */.
+// Token must be a comment.
+static bool delimitsRegion(const FormatToken ) {

delimitsRegion seems overly abstract. Perhaps switchesFormatting?



Comment at: lib/Format/ContinuationIndenter.cpp:1205-1210
+if (SplitBefore.first != StringRef::npos) {
+  TailOffset = SplitBefore.first + SplitBefore.second;
+  ReflowInProgress = true;
+} else {
+  ReflowInProgress = false;
+}

krasimir wrote:
> klimek wrote:
> > (optional) I'd probably write this:
> >   ReflowInProgress = SplitBefore.first != StringRef::npos;
> >   if (ReflowInProgress) {
> > TailOffset = SplitBefore.first + SplitBefore.second;
> >   }
> How about this?
That works.



Comment at: lib/Format/ContinuationIndenter.cpp:1231-1233
+  unsigned RemainingTokenColumnsAfterCompress =
+  Token->getLineLengthAfterCompress(LineIndex, TailOffset,
+RemainingTokenColumns, Split);

krasimir wrote:
> klimek wrote:
> > I think AfterCompression reads more naturally. Perhaps we should use "trim" 
> > instead of compress, now that I think about it.
> > So I'd probably go for getTrimmedLineLength or something.
> I don't agree with "trimmig" because i have a connotation of "trimming" 
> around the ends of strings like in ltrim/rtrim and this fundamentally 
> operates somewhere inside. +1 for AfterCompression, though.
Yea, correct, I completely missed that this might happen in the middle of the 
string if we happen to be the first token that runs over, which is a rather 
interesting case :)



Comment at: lib/Format/WhitespaceManager.cpp:131
+Changes[i - 1].Kind == tok::comment &&
+OriginalWhitespaceStart != PreviousOriginalWhitespaceEnd;
   }

krasimir wrote:
> klimek wrote:
> > Why was this needed? (what breaks without this change)
> This is a dirty hack. The problem is that WhitespaceManager does its own 
> formatting for trailing comments and sometimes it re-formats a piece of line 
> even after it's been reflown with the previous line. Consider if we reflow 
> the second line up:
> ```
> // line 1
> // line 2
> ```
> That amounts to 2 changes, first (delimited by ()) for the whitespace between 
> the two tokens, and second (delimited by []) for the beginning of the next 
> line:
> ```
> // line 1(
> )[// ]line 2
> ``` 
> So in the end we have two changes like this:
> ```
> // line 1()[ ]line 2
> ```
> Note that the OriginalWhitespaceStart of the second change is the same as the 
> PreviousOriginalWhitespaceEnd of the first change.
> In this case, the above code marks the second line as trailing comment and 
> afterwards applies its own trailing-comment-alignment logic to it, which 
> might introduce extra whitespace before "line 2".
> 
> For a proper solution we need a mechanism to say that a particular change 
> emitted through the whitespace manager breaks the sequence of trailing 
> comments.
> 
> 
Ok. In this case this needs a largish comment with a FIXME, like you just 
described ;)


https://reviews.llvm.org/D28764



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D28764: [clang-format] Implement comment reflowing (v3)

2017-01-25 Thread Krasimir Georgiev via Phabricator via cfe-commits
krasimir added inline comments.



Comment at: lib/Format/ContinuationIndenter.cpp:1158-1159
+CommentPragmasRegex.match(Current.TokenText.substr(2)) ||
+Current.TokenText.substr(2).ltrim().startswith("clang-format on") ||
+Current.TokenText.substr(2).ltrim().startswith("clang-format off"))
   return addMultilineToken(Current, State);

klimek wrote:
> krasimir wrote:
> > klimek wrote:
> > > krasimir wrote:
> > > > klimek wrote:
> > > > > Generally, we shouldn't need those here, as those should be part of 
> > > > > the Token.Finalized state.
> > > > Here the problem is that the comments /* clang-format on */ and /* 
> > > > clang-format off */ control the formatting of the tokens in between, 
> > > > but do not control their own formatting, that is they are not finalized 
> > > > themselves.
> > > > What happens is that if we rely solely on Token.Finalized and the 
> > > > ColumnLimit is small, say 20, then:
> > > > ```
> > > > /* clang-format off */ 
> > > > ```
> > > > gets broken into:
> > > > ```
> > > > /* clang-format off
> > > >   */
> > > > ```
> > > > 
> > > > Add comments about this.
> > > Isn't the right fix to change the tokens of the comment to be finalized?
> > That's arguable. Consider [[ 
> > https://github.com/llvm-mirror/clang/blob/master/unittests/Format/FormatTest.cpp#L10916
> >  | FormatTest.cpp:10916 ]]: in this case we still want to re-indent the /* 
> > clang-format on/off */ comments, just not break them. If we finalize the 
> > token, we can't re-indent.
> I see two options:
> - mark the comment with a special token type for clang-format on/off
> - pull out a function - I see you already have one called mayReflowContent; 
> can we use that?
mayReflowContent is too specific for content during region. I'll add a 
delimitsRegion method.



Comment at: lib/Format/ContinuationIndenter.cpp:1205-1210
+if (SplitBefore.first != StringRef::npos) {
+  TailOffset = SplitBefore.first + SplitBefore.second;
+  ReflowInProgress = true;
+} else {
+  ReflowInProgress = false;
+}

klimek wrote:
> (optional) I'd probably write this:
>   ReflowInProgress = SplitBefore.first != StringRef::npos;
>   if (ReflowInProgress) {
> TailOffset = SplitBefore.first + SplitBefore.second;
>   }
How about this?



Comment at: lib/Format/ContinuationIndenter.cpp:1231-1233
+  unsigned RemainingTokenColumnsAfterCompress =
+  Token->getLineLengthAfterCompress(LineIndex, TailOffset,
+RemainingTokenColumns, Split);

klimek wrote:
> I think AfterCompression reads more naturally. Perhaps we should use "trim" 
> instead of compress, now that I think about it.
> So I'd probably go for getTrimmedLineLength or something.
I don't agree with "trimmig" because i have a connotation of "trimming" around 
the ends of strings like in ltrim/rtrim and this fundamentally operates 
somewhere inside. +1 for AfterCompression, though.



Comment at: lib/Format/WhitespaceManager.cpp:131
+Changes[i - 1].Kind == tok::comment &&
+OriginalWhitespaceStart != PreviousOriginalWhitespaceEnd;
   }

klimek wrote:
> Why was this needed? (what breaks without this change)
This is a dirty hack. The problem is that WhitespaceManager does its own 
formatting for trailing comments and sometimes it re-formats a piece of line 
even after it's been reflown with the previous line. Consider if we reflow the 
second line up:
```
// line 1
// line 2
```
That amounts to 2 changes, first (delimited by ()) for the whitespace between 
the two tokens, and second (delimited by []) for the beginning of the next line:
```
// line 1(
)[// ]line 2
``` 
So in the end we have two changes like this:
```
// line 1()[ ]line 2
```
Note that the OriginalWhitespaceStart of the second change is the same as the 
PreviousOriginalWhitespaceEnd of the first change.
In this case, the above code marks the second line as trailing comment and 
afterwards applies its own trailing-comment-alignment logic to it, which might 
introduce extra whitespace before "line 2".

For a proper solution we need a mechanism to say that a particular change 
emitted through the whitespace manager breaks the sequence of trailing comments.





Comment at: unittests/Format/FormatTest.cpp:2169
+
+  // Don't reflow lines starting with two punctuation characters.
+  EXPECT_EQ("// long long long\n"

klimek wrote:
> But we want to reflow lines starting with one punctuation character?
I guess I'm looking at lines that start like this: "ala", 'ala'. So it may be 
better to special-case these two instead.
The heuristic for now, in mayReflowContent is: the content of the line needs to 
have at least two characters and either the first or the second character must 
be non-punctuation.

Another approach would be to not do these funny things 

[PATCH] D28764: [clang-format] Implement comment reflowing (v3)

2017-01-25 Thread Krasimir Georgiev via Phabricator via cfe-commits
krasimir updated this revision to Diff 85725.
krasimir marked 8 inline comments as done.
krasimir added a comment.

- Address review comments.


https://reviews.llvm.org/D28764

Files:
  lib/Format/BreakableToken.cpp
  lib/Format/BreakableToken.h
  lib/Format/CMakeLists.txt
  lib/Format/Comments.cpp
  lib/Format/Comments.h
  lib/Format/ContinuationIndenter.cpp
  lib/Format/TokenAnnotator.cpp
  lib/Format/UnwrappedLineParser.cpp
  lib/Format/WhitespaceManager.cpp
  unittests/Format/FormatTest.cpp
  unittests/Format/FormatTestSelective.cpp

Index: unittests/Format/FormatTestSelective.cpp
===
--- unittests/Format/FormatTestSelective.cpp
+++ unittests/Format/FormatTestSelective.cpp
@@ -111,13 +111,19 @@
 format("int   a; // comment\n"
"intb; // comment",
0, 0));
-  EXPECT_EQ("int   a; // comment\n"
-" // line 2\n"
+  EXPECT_EQ("int a; // comment\n"
+"   // line 2\n"
 "int b;",
 format("int   a; // comment\n"
"// line 2\n"
"int b;",
28, 0));
+  EXPECT_EQ("int   a; // comment\n"
+"// comment 2\n"
+"int b;",
+format("int   a; // comment\n"
+   "// comment 2\n"
+   "int b;", 28, 0));
   EXPECT_EQ("int aa; // comment\n"
 "int b;\n"
 "int c; // unrelated comment",
Index: unittests/Format/FormatTest.cpp
===
--- unittests/Format/FormatTest.cpp
+++ unittests/Format/FormatTest.cpp
@@ -1783,6 +1783,463 @@
"0x00, 0x00, 0x00, 0x00};// comment\n");
 }
 
+TEST_F(FormatTest, ReflowsComments) {
+  // Break a long line and reflow with the full next line.
+  EXPECT_EQ("// long long long\n"
+"// long long",
+format("// long long long long\n"
+   "// long",
+   getLLVMStyleWithColumns(20)));
+
+  // Keep the trailing newline while reflowing.
+  EXPECT_EQ("// long long long\n"
+"// long long\n",
+format("// long long long long\n"
+   "// long\n",
+   getLLVMStyleWithColumns(20)));
+
+  // Break a long line and reflow with a part of the next line.
+  EXPECT_EQ("// long long long\n"
+"// long long\n"
+"// long_long",
+format("// long long long long\n"
+   "// long long_long",
+   getLLVMStyleWithColumns(20)));
+
+  // Break but do not reflow if the first word from the next line is too long.
+  EXPECT_EQ("// long long long\n"
+"// long\n"
+"// long_long_long\n",
+format("// long long long long\n"
+   "// long_long_long\n",
+   getLLVMStyleWithColumns(20)));
+
+  // Don't break or reflow short lines.
+  verifyFormat("// long\n"
+   "// long long long lo\n"
+   "// long long long lo\n"
+   "// long",
+   getLLVMStyleWithColumns(20));
+
+  // Keep prefixes and decorations while reflowing.
+  EXPECT_EQ("/// long long long\n"
+"/// long long\n",
+format("/// long long long long\n"
+   "/// long\n",
+   getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("//! long long long\n"
+"//! long long\n",
+format("//! long long long long\n"
+   "//! long\n",
+   getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("/* long long long\n"
+" * long long */",
+format("/* long long long long\n"
+   " * long */",
+   getLLVMStyleWithColumns(20)));
+
+  // Don't bring leading whitespace up while reflowing.
+  EXPECT_EQ("/*  long long long\n"
+" * long long long\n"
+" */",
+format("/*  long long long long\n"
+   " *  long long\n"
+   " */",
+   getLLVMStyleWithColumns(20)));
+
+  // Reflow the last line of a block comment with its trailing '*/'.
+  EXPECT_EQ("/* long long long\n"
+"   long long */",
+format("/* long long long long\n"
+   "   long */",
+   getLLVMStyleWithColumns(20)));
+
+  // Reflow two short lines; keep the postfix of the last one.
+  EXPECT_EQ("/* long long long\n"
+" * long long long */",
+format("/* long long long long\n"
+   " * long\n"
+   " * long */",
+   getLLVMStyleWithColumns(20)));
+
+  // Put the postfix of the last short reflow line on a newline if it doesn't
+  // fit.
+  EXPECT_EQ("/* long long long\n"
+" * long long longg\n"
+" */",
+format("/* long long long long\n"
+   " * long\n"
+  

[PATCH] D28764: [clang-format] Implement comment reflowing (v3)

2017-01-24 Thread Manuel Klimek via Phabricator via cfe-commits
klimek added inline comments.



Comment at: lib/Format/BreakableToken.h:42-48
+/// There is a pair of operations that are used to compress a long whitespace
+/// range with a single space if that will bring the line lenght under the
+/// column limit:
+/// - getLineLengthAfterCompress, for calculating the size in columns of the
+///   line after a whitespace range has been compressed, and
+/// - compressWhitespace, for executing the whitespace compression using a
+///   whitespace manager.

krasimir wrote:
> klimek wrote:
> > I assume the reason to not fold this into the next getSplitBefore is that 
> > sometimes we need to do the trimming even if we're the last line? Would it 
> > otherwise make sense?
> The reason why we can't fold is that the compressing and the reflow of the 
> next might be independent and operate on totally different whitespace ranges.
> Consider the following comments with line length 20:
> ```
> // Column limit 20 |
> // long   long
> // long
> ```
> Then after compressWhitespace(line 2) we get:
> ```
> // Column limit 20 |
> // long long
> // long
> ```
> And now replaceWhitespaceBefore(line 3) will reflow:
> ```
> // Column limit 20 |
> // long long long
> ```
> 
> Will add a note about this.
> 
Ah, wow, nice!


https://reviews.llvm.org/D28764



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D28764: [clang-format] Implement comment reflowing (v3)

2017-01-24 Thread Krasimir Georgiev via Phabricator via cfe-commits
krasimir marked an inline comment as done.
krasimir added inline comments.



Comment at: lib/Format/BreakableToken.h:42-48
+/// There is a pair of operations that are used to compress a long whitespace
+/// range with a single space if that will bring the line lenght under the
+/// column limit:
+/// - getLineLengthAfterCompress, for calculating the size in columns of the
+///   line after a whitespace range has been compressed, and
+/// - compressWhitespace, for executing the whitespace compression using a
+///   whitespace manager.

klimek wrote:
> I assume the reason to not fold this into the next getSplitBefore is that 
> sometimes we need to do the trimming even if we're the last line? Would it 
> otherwise make sense?
The reason why we can't fold is that the compressing and the reflow of the next 
might be independent and operate on totally different whitespace ranges.
Consider the following comments with line length 20:
```
// Column limit 20 |
// long   long
// long
```
Then after compressWhitespace(line 2) we get:
```
// Column limit 20 |
// long long
// long
```
And now replaceWhitespaceBefore(line 3) will reflow:
```
// Column limit 20 |
// long long long
```

Will add a note about this.



https://reviews.llvm.org/D28764



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D28764: [clang-format] Implement comment reflowing (v3)

2017-01-24 Thread Manuel Klimek via Phabricator via cfe-commits
klimek added a comment.

This is starting to be pretty awesome. The one thing that would help me review 
the gist of the implementation a bit more is if that had a bit more comments. 
Perhaps go over the math in the code and put some comments in why we're doing 
what at various steps.




Comment at: lib/Format/BreakableToken.h:42-48
+/// There is a pair of operations that are used to compress a long whitespace
+/// range with a single space if that will bring the line lenght under the
+/// column limit:
+/// - getLineLengthAfterCompress, for calculating the size in columns of the
+///   line after a whitespace range has been compressed, and
+/// - compressWhitespace, for executing the whitespace compression using a
+///   whitespace manager.

I assume the reason to not fold this into the next getSplitBefore is that 
sometimes we need to do the trimming even if we're the last line? Would it 
otherwise make sense?



Comment at: lib/Format/BreakableToken.h:93-96
+  /// \brief Returns the number of columns required to format the piece of line
+  /// at \p LineIndex, from byte offset \p TailOffset after the whitespace 
range
+  /// \p Split previously returned by \c getSplit has been compressed into a
+  /// single space.

I would cut "previously returned by getSplit", as I think this is irrelevant to 
this function.
Also, given that we hand in RemainingTokenColumn, why would LineIndex and 
TailOffset ever be relevant?
Lastly, this is not virtual, in which case it seems to not make sense to pass 
in any information that's not relevant for the current implementation?



Comment at: lib/Format/BreakableToken.h:119-122
+  virtual Split getSplitBefore(unsigned LineIndex,
+   unsigned PreviousEndColumn,
+   unsigned ColumnLimit,
+   bool ReflowInProgress) const {

Instead of passing in ReflowInProgress, I'd not call it if !ReflowInProgress 
and make the comment more reflow-examplish.

It basically returns the split of the current line, if any parts of the current 
line fit the the line above it.



Comment at: lib/Format/BreakableToken.h:147-149
+  /// \brief Updates the next token of \p State to the next token after this
+  /// one. This makes sense when this token manages a set of underlying tokens
+  /// as a unit and is responsible for the formatting of the them.

s/makes sense when/can be used when/



Comment at: lib/Format/ContinuationIndenter.cpp:1158-1159
+CommentPragmasRegex.match(Current.TokenText.substr(2)) ||
+Current.TokenText.substr(2).ltrim().startswith("clang-format on") ||
+Current.TokenText.substr(2).ltrim().startswith("clang-format off"))
   return addMultilineToken(Current, State);

krasimir wrote:
> klimek wrote:
> > krasimir wrote:
> > > klimek wrote:
> > > > Generally, we shouldn't need those here, as those should be part of the 
> > > > Token.Finalized state.
> > > Here the problem is that the comments /* clang-format on */ and /* 
> > > clang-format off */ control the formatting of the tokens in between, but 
> > > do not control their own formatting, that is they are not finalized 
> > > themselves.
> > > What happens is that if we rely solely on Token.Finalized and the 
> > > ColumnLimit is small, say 20, then:
> > > ```
> > > /* clang-format off */ 
> > > ```
> > > gets broken into:
> > > ```
> > > /* clang-format off
> > >   */
> > > ```
> > > 
> > > Add comments about this.
> > Isn't the right fix to change the tokens of the comment to be finalized?
> That's arguable. Consider [[ 
> https://github.com/llvm-mirror/clang/blob/master/unittests/Format/FormatTest.cpp#L10916
>  | FormatTest.cpp:10916 ]]: in this case we still want to re-indent the /* 
> clang-format on/off */ comments, just not break them. If we finalize the 
> token, we can't re-indent.
I see two options:
- mark the comment with a special token type for clang-format on/off
- pull out a function - I see you already have one called mayReflowContent; can 
we use that?



Comment at: lib/Format/ContinuationIndenter.cpp:1205-1210
+if (SplitBefore.first != StringRef::npos) {
+  TailOffset = SplitBefore.first + SplitBefore.second;
+  ReflowInProgress = true;
+} else {
+  ReflowInProgress = false;
+}

(optional) I'd probably write this:
  ReflowInProgress = SplitBefore.first != StringRef::npos;
  if (ReflowInProgress) {
TailOffset = SplitBefore.first + SplitBefore.second;
  }



Comment at: lib/Format/ContinuationIndenter.cpp:1231-1233
+  unsigned RemainingTokenColumnsAfterCompress =
+  Token->getLineLengthAfterCompress(LineIndex, TailOffset,
+RemainingTokenColumns, Split);

I think AfterCompression reads more 

[PATCH] D28764: [clang-format] Implement comment reflowing (v3)

2017-01-24 Thread Krasimir Georgiev via Phabricator via cfe-commits
krasimir added inline comments.



Comment at: lib/Format/ContinuationIndenter.cpp:1158-1159
+CommentPragmasRegex.match(Current.TokenText.substr(2)) ||
+Current.TokenText.substr(2).ltrim().startswith("clang-format on") ||
+Current.TokenText.substr(2).ltrim().startswith("clang-format off"))
   return addMultilineToken(Current, State);

klimek wrote:
> krasimir wrote:
> > klimek wrote:
> > > Generally, we shouldn't need those here, as those should be part of the 
> > > Token.Finalized state.
> > Here the problem is that the comments /* clang-format on */ and /* 
> > clang-format off */ control the formatting of the tokens in between, but do 
> > not control their own formatting, that is they are not finalized themselves.
> > What happens is that if we rely solely on Token.Finalized and the 
> > ColumnLimit is small, say 20, then:
> > ```
> > /* clang-format off */ 
> > ```
> > gets broken into:
> > ```
> > /* clang-format off
> >   */
> > ```
> > 
> > Add comments about this.
> Isn't the right fix to change the tokens of the comment to be finalized?
That's arguable. Consider [[ 
https://github.com/llvm-mirror/clang/blob/master/unittests/Format/FormatTest.cpp#L10916
 | FormatTest.cpp:10916 ]]: in this case we still want to re-indent the /* 
clang-format on/off */ comments, just not break them. If we finalize the token, 
we can't re-indent.


https://reviews.llvm.org/D28764



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D28764: [clang-format] Implement comment reflowing (v3)

2017-01-24 Thread Manuel Klimek via Phabricator via cfe-commits
klimek added inline comments.



Comment at: lib/Format/ContinuationIndenter.cpp:1158-1159
+CommentPragmasRegex.match(Current.TokenText.substr(2)) ||
+Current.TokenText.substr(2).ltrim().startswith("clang-format on") ||
+Current.TokenText.substr(2).ltrim().startswith("clang-format off"))
   return addMultilineToken(Current, State);

krasimir wrote:
> klimek wrote:
> > Generally, we shouldn't need those here, as those should be part of the 
> > Token.Finalized state.
> Here the problem is that the comments /* clang-format on */ and /* 
> clang-format off */ control the formatting of the tokens in between, but do 
> not control their own formatting, that is they are not finalized themselves.
> What happens is that if we rely solely on Token.Finalized and the ColumnLimit 
> is small, say 20, then:
> ```
> /* clang-format off */ 
> ```
> gets broken into:
> ```
> /* clang-format off
>   */
> ```
> 
> Add comments about this.
Isn't the right fix to change the tokens of the comment to be finalized?


https://reviews.llvm.org/D28764



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D28764: [clang-format] Implement comment reflowing (v3)

2017-01-24 Thread Krasimir Georgiev via Phabricator via cfe-commits
krasimir updated this revision to Diff 85557.
krasimir marked 3 inline comments as done.
krasimir added a comment.

- Address review comments.


https://reviews.llvm.org/D28764

Files:
  lib/Format/BreakableToken.cpp
  lib/Format/BreakableToken.h
  lib/Format/CMakeLists.txt
  lib/Format/Comments.cpp
  lib/Format/Comments.h
  lib/Format/ContinuationIndenter.cpp
  lib/Format/TokenAnnotator.cpp
  lib/Format/UnwrappedLineParser.cpp
  lib/Format/WhitespaceManager.cpp
  unittests/Format/FormatTest.cpp
  unittests/Format/FormatTestSelective.cpp

Index: unittests/Format/FormatTestSelective.cpp
===
--- unittests/Format/FormatTestSelective.cpp
+++ unittests/Format/FormatTestSelective.cpp
@@ -111,13 +111,19 @@
 format("int   a; // comment\n"
"intb; // comment",
0, 0));
-  EXPECT_EQ("int   a; // comment\n"
-" // line 2\n"
+  EXPECT_EQ("int a; // comment\n"
+"   // line 2\n"
 "int b;",
 format("int   a; // comment\n"
"// line 2\n"
"int b;",
28, 0));
+  EXPECT_EQ("int   a; // comment\n"
+"// comment 2\n"
+"int b;",
+format("int   a; // comment\n"
+   "// comment 2\n"
+   "int b;", 28, 0));
   EXPECT_EQ("int aa; // comment\n"
 "int b;\n"
 "int c; // unrelated comment",
Index: unittests/Format/FormatTest.cpp
===
--- unittests/Format/FormatTest.cpp
+++ unittests/Format/FormatTest.cpp
@@ -1783,6 +1783,455 @@
"0x00, 0x00, 0x00, 0x00};// comment\n");
 }
 
+TEST_F(FormatTest, ReflowsComments) {
+  // Break a long line and reflow with the full next line.
+  EXPECT_EQ("// long long long\n"
+"// long long",
+format("// long long long long\n"
+   "// long",
+   getLLVMStyleWithColumns(20)));
+
+  // Keep the trailing newline while reflowing.
+  EXPECT_EQ("// long long long\n"
+"// long long\n",
+format("// long long long long\n"
+   "// long\n",
+   getLLVMStyleWithColumns(20)));
+
+  // Break a long line and reflow with a part of the next line.
+  EXPECT_EQ("// long long long\n"
+"// long long\n"
+"// long_long",
+format("// long long long long\n"
+   "// long long_long",
+   getLLVMStyleWithColumns(20)));
+
+  // Break but do not reflow if the first word from the next line is too long.
+  EXPECT_EQ("// long long long\n"
+"// long\n"
+"// long_long_long\n",
+format("// long long long long\n"
+   "// long_long_long\n",
+   getLLVMStyleWithColumns(20)));
+
+  // Don't break or reflow short lines.
+  verifyFormat("// long\n"
+   "// long long long lo\n"
+   "// long long long lo\n"
+   "// long",
+   getLLVMStyleWithColumns(20));
+
+  // Keep prefixes and decorations while reflowing.
+  EXPECT_EQ("/// long long long\n"
+"/// long long\n",
+format("/// long long long long\n"
+   "/// long\n",
+   getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("//! long long long\n"
+"//! long long\n",
+format("//! long long long long\n"
+   "//! long\n",
+   getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("/* long long long\n"
+" * long long */",
+format("/* long long long long\n"
+   " * long */",
+   getLLVMStyleWithColumns(20)));
+
+  // Don't bring leading whitespace up while reflowing.
+  EXPECT_EQ("/*  long long long\n"
+" * long long long\n"
+" */",
+format("/*  long long long long\n"
+   " *  long long\n"
+   " */",
+   getLLVMStyleWithColumns(20)));
+
+  // Reflow the last line of a block comment with its trailing '*/'.
+  EXPECT_EQ("/* long long long\n"
+"   long long */",
+format("/* long long long long\n"
+   "   long */",
+   getLLVMStyleWithColumns(20)));
+
+  // Reflow two short lines; keep the postfix of the last one.
+  EXPECT_EQ("/* long long long\n"
+" * long long long */",
+format("/* long long long long\n"
+   " * long\n"
+   " * long */",
+   getLLVMStyleWithColumns(20)));
+
+  // Put the postfix of the last short reflow line on a newline if it doesn't
+  // fit.
+  EXPECT_EQ("/* long long long\n"
+" * long long longg\n"
+" */",
+format("/* long long long long\n"
+   " * long\n"
+  

[PATCH] D28764: [clang-format] Implement comment reflowing (v3)

2017-01-24 Thread Krasimir Georgiev via Phabricator via cfe-commits
krasimir marked 3 inline comments as done.
krasimir added inline comments.



Comment at: lib/Format/BreakableToken.h:55-56
+///   been reformatted, and
+/// - replaceWhitespaceBefore, for executing the reflow using a whitespace
+///   manager.
+///

klimek wrote:
> Shouldn't that be called insertBreakBefore for consistency then?
> 
> Also, perhaps we want to rename replaceWhitespace to cleanupWhitespace or 
> compressWhitespace or something...
insertBreakBefore will not be a good name for this, because it doesn't insert 
breaks.
There are two use-cases for this:
1. The old one: adjusting indent in comments, for example:
```
// before
   // after
```
2. The new one: reflowing, for example
```
before:
  // line1
  // line2
after:
  // line1 line2
```
The break between line1 and line2 gets removed in replaceWhitespaceBefore(2).

I'll rename replaceWhitespace to compressWhitespace.



Comment at: lib/Format/ContinuationIndenter.cpp:1158-1159
+CommentPragmasRegex.match(Current.TokenText.substr(2)) ||
+Current.TokenText.substr(2).ltrim().startswith("clang-format on") ||
+Current.TokenText.substr(2).ltrim().startswith("clang-format off"))
   return addMultilineToken(Current, State);

klimek wrote:
> Generally, we shouldn't need those here, as those should be part of the 
> Token.Finalized state.
Here the problem is that the comments /* clang-format on */ and /* clang-format 
off */ control the formatting of the tokens in between, but do not control 
their own formatting, that is they are not finalized themselves.
What happens is that if we rely solely on Token.Finalized and the ColumnLimit 
is small, say 20, then:
```
/* clang-format off */ 
```
gets broken into:
```
/* clang-format off
  */
```

Add comments about this.



Comment at: lib/Format/ContinuationIndenter.cpp:1220
+RemainingTokenColumns = RemainingTokenColumns + 1 - Split.second;
+ReflowInProgress = true;
 if (!DryRun)

klimek wrote:
> Explain this part in a comment.
Realized that the place of this is not here; introduced a proper method 
BreakableToken::getLineLengthAfterCompres instead.


https://reviews.llvm.org/D28764



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D28764: [clang-format] Implement comment reflowing (v3)

2017-01-23 Thread Manuel Klimek via Phabricator via cfe-commits
klimek added inline comments.



Comment at: lib/Format/BreakableToken.h:55-56
+///   been reformatted, and
+/// - replaceWhitespaceBefore, for executing the reflow using a whitespace
+///   manager.
+///

Shouldn't that be called insertBreakBefore for consistency then?

Also, perhaps we want to rename replaceWhitespace to cleanupWhitespace or 
compressWhitespace or something...



Comment at: lib/Format/ContinuationIndenter.cpp:1158-1159
+CommentPragmasRegex.match(Current.TokenText.substr(2)) ||
+Current.TokenText.substr(2).ltrim().startswith("clang-format on") ||
+Current.TokenText.substr(2).ltrim().startswith("clang-format off"))
   return addMultilineToken(Current, State);

Generally, we shouldn't need those here, as those should be part of the 
Token.Finalized state.



Comment at: lib/Format/ContinuationIndenter.cpp:1220
+RemainingTokenColumns = RemainingTokenColumns + 1 - Split.second;
+ReflowInProgress = true;
 if (!DryRun)

Explain this part in a comment.


https://reviews.llvm.org/D28764



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D28764: [clang-format] Implement comment reflowing (v3)

2017-01-23 Thread Krasimir Georgiev via Phabricator via cfe-commits
krasimir updated this revision to Diff 85381.
krasimir added a comment.

- Add back a test case that I had previously removed for no good reason.


https://reviews.llvm.org/D28764

Files:
  lib/Format/BreakableToken.cpp
  lib/Format/BreakableToken.h
  lib/Format/CMakeLists.txt
  lib/Format/Comments.cpp
  lib/Format/Comments.h
  lib/Format/ContinuationIndenter.cpp
  lib/Format/TokenAnnotator.cpp
  lib/Format/UnwrappedLineParser.cpp
  lib/Format/WhitespaceManager.cpp
  unittests/Format/FormatTest.cpp
  unittests/Format/FormatTestSelective.cpp

Index: unittests/Format/FormatTestSelective.cpp
===
--- unittests/Format/FormatTestSelective.cpp
+++ unittests/Format/FormatTestSelective.cpp
@@ -111,13 +111,19 @@
 format("int   a; // comment\n"
"intb; // comment",
0, 0));
-  EXPECT_EQ("int   a; // comment\n"
-" // line 2\n"
+  EXPECT_EQ("int a; // comment\n"
+"   // line 2\n"
 "int b;",
 format("int   a; // comment\n"
"// line 2\n"
"int b;",
28, 0));
+  EXPECT_EQ("int   a; // comment\n"
+"// comment 2\n"
+"int b;",
+format("int   a; // comment\n"
+   "// comment 2\n"
+   "int b;", 28, 0));
   EXPECT_EQ("int aa; // comment\n"
 "int b;\n"
 "int c; // unrelated comment",
Index: unittests/Format/FormatTest.cpp
===
--- unittests/Format/FormatTest.cpp
+++ unittests/Format/FormatTest.cpp
@@ -1783,6 +1783,455 @@
"0x00, 0x00, 0x00, 0x00};// comment\n");
 }
 
+TEST_F(FormatTest, ReflowsComments) {
+  // Break a long line and reflow with the full next line.
+  EXPECT_EQ("// long long long\n"
+"// long long",
+format("// long long long long\n"
+   "// long",
+   getLLVMStyleWithColumns(20)));
+
+  // Keep the trailing newline while reflowing.
+  EXPECT_EQ("// long long long\n"
+"// long long\n",
+format("// long long long long\n"
+   "// long\n",
+   getLLVMStyleWithColumns(20)));
+
+  // Break a long line and reflow with a part of the next line.
+  EXPECT_EQ("// long long long\n"
+"// long long\n"
+"// long_long",
+format("// long long long long\n"
+   "// long long_long",
+   getLLVMStyleWithColumns(20)));
+
+  // Break but do not reflow if the first word from the next line is too long.
+  EXPECT_EQ("// long long long\n"
+"// long\n"
+"// long_long_long\n",
+format("// long long long long\n"
+   "// long_long_long\n",
+   getLLVMStyleWithColumns(20)));
+
+  // Don't break or reflow short lines.
+  verifyFormat("// long\n"
+   "// long long long lo\n"
+   "// long long long lo\n"
+   "// long",
+   getLLVMStyleWithColumns(20));
+
+  // Keep prefixes and decorations while reflowing.
+  EXPECT_EQ("/// long long long\n"
+"/// long long\n",
+format("/// long long long long\n"
+   "/// long\n",
+   getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("//! long long long\n"
+"//! long long\n",
+format("//! long long long long\n"
+   "//! long\n",
+   getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("/* long long long\n"
+" * long long */",
+format("/* long long long long\n"
+   " * long */",
+   getLLVMStyleWithColumns(20)));
+
+  // Don't bring leading whitespace up while reflowing.
+  EXPECT_EQ("/*  long long long\n"
+" * long long long\n"
+" */",
+format("/*  long long long long\n"
+   " *  long long\n"
+   " */",
+   getLLVMStyleWithColumns(20)));
+
+  // Reflow the last line of a block comment with its trailing '*/'.
+  EXPECT_EQ("/* long long long\n"
+"   long long */",
+format("/* long long long long\n"
+   "   long */",
+   getLLVMStyleWithColumns(20)));
+
+  // Reflow two short lines; keep the postfix of the last one.
+  EXPECT_EQ("/* long long long\n"
+" * long long long */",
+format("/* long long long long\n"
+   " * long\n"
+   " * long */",
+   getLLVMStyleWithColumns(20)));
+
+  // Put the postfix of the last short reflow line on a newline if it doesn't
+  // fit.
+  EXPECT_EQ("/* long long long\n"
+" * long long longg\n"
+" */",
+format("/* long long long long\n"
+   " * long\n"
+   

[PATCH] D28764: [clang-format] Implement comment reflowing (v3)

2017-01-23 Thread Krasimir Georgiev via Phabricator via cfe-commits
krasimir updated this revision to Diff 85376.
krasimir added a comment.

- Add a note about replaceWhitespace in the comments of BreakableToken.


https://reviews.llvm.org/D28764

Files:
  lib/Format/BreakableToken.cpp
  lib/Format/BreakableToken.h
  lib/Format/CMakeLists.txt
  lib/Format/Comments.cpp
  lib/Format/Comments.h
  lib/Format/ContinuationIndenter.cpp
  lib/Format/TokenAnnotator.cpp
  lib/Format/UnwrappedLineParser.cpp
  lib/Format/WhitespaceManager.cpp
  unittests/Format/FormatTest.cpp
  unittests/Format/FormatTestSelective.cpp

Index: unittests/Format/FormatTestSelective.cpp
===
--- unittests/Format/FormatTestSelective.cpp
+++ unittests/Format/FormatTestSelective.cpp
@@ -111,13 +111,19 @@
 format("int   a; // comment\n"
"intb; // comment",
0, 0));
-  EXPECT_EQ("int   a; // comment\n"
-" // line 2\n"
+  EXPECT_EQ("int a; // comment\n"
+"   // line 2\n"
 "int b;",
 format("int   a; // comment\n"
"// line 2\n"
"int b;",
28, 0));
+  EXPECT_EQ("int   a; // comment\n"
+"// comment 2\n"
+"int b;",
+format("int   a; // comment\n"
+   "// comment 2\n"
+   "int b;", 28, 0));
   EXPECT_EQ("int aa; // comment\n"
 "int b;\n"
 "int c; // unrelated comment",
Index: unittests/Format/FormatTest.cpp
===
--- unittests/Format/FormatTest.cpp
+++ unittests/Format/FormatTest.cpp
@@ -1783,6 +1783,455 @@
"0x00, 0x00, 0x00, 0x00};// comment\n");
 }
 
+TEST_F(FormatTest, ReflowsComments) {
+  // Break a long line and reflow with the full next line.
+  EXPECT_EQ("// long long long\n"
+"// long long",
+format("// long long long long\n"
+   "// long",
+   getLLVMStyleWithColumns(20)));
+
+  // Keep the trailing newline while reflowing.
+  EXPECT_EQ("// long long long\n"
+"// long long\n",
+format("// long long long long\n"
+   "// long\n",
+   getLLVMStyleWithColumns(20)));
+
+  // Break a long line and reflow with a part of the next line.
+  EXPECT_EQ("// long long long\n"
+"// long long\n"
+"// long_long",
+format("// long long long long\n"
+   "// long long_long",
+   getLLVMStyleWithColumns(20)));
+
+  // Break but do not reflow if the first word from the next line is too long.
+  EXPECT_EQ("// long long long\n"
+"// long\n"
+"// long_long_long\n",
+format("// long long long long\n"
+   "// long_long_long\n",
+   getLLVMStyleWithColumns(20)));
+
+  // Don't break or reflow short lines.
+  verifyFormat("// long\n"
+   "// long long long lo\n"
+   "// long long long lo\n"
+   "// long",
+   getLLVMStyleWithColumns(20));
+
+  // Keep prefixes and decorations while reflowing.
+  EXPECT_EQ("/// long long long\n"
+"/// long long\n",
+format("/// long long long long\n"
+   "/// long\n",
+   getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("//! long long long\n"
+"//! long long\n",
+format("//! long long long long\n"
+   "//! long\n",
+   getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("/* long long long\n"
+" * long long */",
+format("/* long long long long\n"
+   " * long */",
+   getLLVMStyleWithColumns(20)));
+
+  // Don't bring leading whitespace up while reflowing.
+  EXPECT_EQ("/*  long long long\n"
+" * long long long\n"
+" */",
+format("/*  long long long long\n"
+   " *  long long\n"
+   " */",
+   getLLVMStyleWithColumns(20)));
+
+  // Reflow the last line of a block comment with its trailing '*/'.
+  EXPECT_EQ("/* long long long\n"
+"   long long */",
+format("/* long long long long\n"
+   "   long */",
+   getLLVMStyleWithColumns(20)));
+
+  // Reflow two short lines; keep the postfix of the last one.
+  EXPECT_EQ("/* long long long\n"
+" * long long long */",
+format("/* long long long long\n"
+   " * long\n"
+   " * long */",
+   getLLVMStyleWithColumns(20)));
+
+  // Put the postfix of the last short reflow line on a newline if it doesn't
+  // fit.
+  EXPECT_EQ("/* long long long\n"
+" * long long longg\n"
+" */",
+format("/* long long long long\n"
+   " * long\n"
+

[PATCH] D28764: [clang-format] Implement comment reflowing (v3)

2017-01-23 Thread Manuel Klimek via Phabricator via cfe-commits
klimek added inline comments.



Comment at: lib/Format/BreakableToken.h:40
+///   of the content after a split has been used for breaking, and
+/// - insertBreak, for executing the split using a whitespace manager.
+///

Do we want to describe how replaceWhitespace fits in here, then?


https://reviews.llvm.org/D28764



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D28764: [clang-format] Implement comment reflowing (v3)

2017-01-23 Thread Krasimir Georgiev via Phabricator via cfe-commits
krasimir added inline comments.



Comment at: lib/Format/BreakableToken.h:87
   /// space.
   virtual void replaceWhitespace(unsigned LineIndex, unsigned TailOffset,
  Split Split,

By the way, I got confused, this stays because the new triplet of methods 
replaces the old replaceWhitespace**Before**; this method serves a different 
purpose during breaking, that is tries to shrink whitespace instead of 
inserting a break.


https://reviews.llvm.org/D28764



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D28764: [clang-format] Implement comment reflowing (v3)

2017-01-23 Thread Krasimir Georgiev via Phabricator via cfe-commits
krasimir updated this revision to Diff 85339.
krasimir added a comment.

- [clang-format] Improve the interface of BreakableToken and add comments.


https://reviews.llvm.org/D28764

Files:
  lib/Format/BreakableToken.cpp
  lib/Format/BreakableToken.h
  lib/Format/CMakeLists.txt
  lib/Format/Comments.cpp
  lib/Format/Comments.h
  lib/Format/ContinuationIndenter.cpp
  lib/Format/TokenAnnotator.cpp
  lib/Format/UnwrappedLineParser.cpp
  lib/Format/WhitespaceManager.cpp
  unittests/Format/FormatTest.cpp
  unittests/Format/FormatTestSelective.cpp

Index: unittests/Format/FormatTestSelective.cpp
===
--- unittests/Format/FormatTestSelective.cpp
+++ unittests/Format/FormatTestSelective.cpp
@@ -111,13 +111,19 @@
 format("int   a; // comment\n"
"intb; // comment",
0, 0));
-  EXPECT_EQ("int   a; // comment\n"
-" // line 2\n"
+  EXPECT_EQ("int a; // comment\n"
+"   // line 2\n"
 "int b;",
 format("int   a; // comment\n"
"// line 2\n"
"int b;",
28, 0));
+  EXPECT_EQ("int   a; // comment\n"
+"// comment 2\n"
+"int b;",
+format("int   a; // comment\n"
+   "// comment 2\n"
+   "int b;", 28, 0));
   EXPECT_EQ("int aa; // comment\n"
 "int b;\n"
 "int c; // unrelated comment",
Index: unittests/Format/FormatTest.cpp
===
--- unittests/Format/FormatTest.cpp
+++ unittests/Format/FormatTest.cpp
@@ -1783,6 +1783,455 @@
"0x00, 0x00, 0x00, 0x00};// comment\n");
 }
 
+TEST_F(FormatTest, ReflowsComments) {
+  // Break a long line and reflow with the full next line.
+  EXPECT_EQ("// long long long\n"
+"// long long",
+format("// long long long long\n"
+   "// long",
+   getLLVMStyleWithColumns(20)));
+
+  // Keep the trailing newline while reflowing.
+  EXPECT_EQ("// long long long\n"
+"// long long\n",
+format("// long long long long\n"
+   "// long\n",
+   getLLVMStyleWithColumns(20)));
+
+  // Break a long line and reflow with a part of the next line.
+  EXPECT_EQ("// long long long\n"
+"// long long\n"
+"// long_long",
+format("// long long long long\n"
+   "// long long_long",
+   getLLVMStyleWithColumns(20)));
+
+  // Break but do not reflow if the first word from the next line is too long.
+  EXPECT_EQ("// long long long\n"
+"// long\n"
+"// long_long_long\n",
+format("// long long long long\n"
+   "// long_long_long\n",
+   getLLVMStyleWithColumns(20)));
+
+  // Don't break or reflow short lines.
+  verifyFormat("// long\n"
+   "// long long long lo\n"
+   "// long long long lo\n"
+   "// long",
+   getLLVMStyleWithColumns(20));
+
+  // Keep prefixes and decorations while reflowing.
+  EXPECT_EQ("/// long long long\n"
+"/// long long\n",
+format("/// long long long long\n"
+   "/// long\n",
+   getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("//! long long long\n"
+"//! long long\n",
+format("//! long long long long\n"
+   "//! long\n",
+   getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("/* long long long\n"
+" * long long */",
+format("/* long long long long\n"
+   " * long */",
+   getLLVMStyleWithColumns(20)));
+
+  // Don't bring leading whitespace up while reflowing.
+  EXPECT_EQ("/*  long long long\n"
+" * long long long\n"
+" */",
+format("/*  long long long long\n"
+   " *  long long\n"
+   " */",
+   getLLVMStyleWithColumns(20)));
+
+  // Reflow the last line of a block comment with its trailing '*/'.
+  EXPECT_EQ("/* long long long\n"
+"   long long */",
+format("/* long long long long\n"
+   "   long */",
+   getLLVMStyleWithColumns(20)));
+
+  // Reflow two short lines; keep the postfix of the last one.
+  EXPECT_EQ("/* long long long\n"
+" * long long long */",
+format("/* long long long long\n"
+   " * long\n"
+   " * long */",
+   getLLVMStyleWithColumns(20)));
+
+  // Put the postfix of the last short reflow line on a newline if it doesn't
+  // fit.
+  EXPECT_EQ("/* long long long\n"
+" * long long longg\n"
+" */",
+format("/* long long long long\n"
+   " * long\n"
+ 

[PATCH] D28764: [clang-format] Implement comment reflowing (v3)

2017-01-16 Thread Krasimir Georgiev via Phabricator via cfe-commits
krasimir created this revision.
krasimir added a reviewer: klimek.
krasimir added subscribers: ioeric, cfe-commits, mgorny, klimek, djasper.

This presents a version of the comment reflowing with less mutable state inside
the comment breakable token subclasses. The state has been pushed into the
driving breakProtrudingToken method. For this, the API of BreakableToken is 
enriched 
by the methods getSplitBefore and getLineLengthAfterSplitBefore.


https://reviews.llvm.org/D28764

Files:
  lib/Format/BreakableToken.cpp
  lib/Format/BreakableToken.h
  lib/Format/CMakeLists.txt
  lib/Format/Comments.cpp
  lib/Format/Comments.h
  lib/Format/ContinuationIndenter.cpp
  lib/Format/TokenAnnotator.cpp
  lib/Format/UnwrappedLineParser.cpp
  lib/Format/WhitespaceManager.cpp
  unittests/Format/FormatTest.cpp
  unittests/Format/FormatTestSelective.cpp

Index: unittests/Format/FormatTestSelective.cpp
===
--- unittests/Format/FormatTestSelective.cpp
+++ unittests/Format/FormatTestSelective.cpp
@@ -111,13 +111,19 @@
 format("int   a; // comment\n"
"intb; // comment",
0, 0));
-  EXPECT_EQ("int   a; // comment\n"
-" // line 2\n"
+  EXPECT_EQ("int a; // comment\n"
+"   // line 2\n"
 "int b;",
 format("int   a; // comment\n"
"// line 2\n"
"int b;",
28, 0));
+  EXPECT_EQ("int   a; // comment\n"
+"// comment 2\n"
+"int b;",
+format("int   a; // comment\n"
+   "// comment 2\n"
+   "int b;", 28, 0));
   EXPECT_EQ("int aa; // comment\n"
 "int b;\n"
 "int c; // unrelated comment",
Index: unittests/Format/FormatTest.cpp
===
--- unittests/Format/FormatTest.cpp
+++ unittests/Format/FormatTest.cpp
@@ -1783,6 +1783,455 @@
"0x00, 0x00, 0x00, 0x00};// comment\n");
 }
 
+TEST_F(FormatTest, ReflowsComments) {
+  // Break a long line and reflow with the full next line.
+  EXPECT_EQ("// long long long\n"
+"// long long",
+format("// long long long long\n"
+   "// long",
+   getLLVMStyleWithColumns(20)));
+
+  // Keep the trailing newline while reflowing.
+  EXPECT_EQ("// long long long\n"
+"// long long\n",
+format("// long long long long\n"
+   "// long\n",
+   getLLVMStyleWithColumns(20)));
+
+  // Break a long line and reflow with a part of the next line.
+  EXPECT_EQ("// long long long\n"
+"// long long\n"
+"// long_long",
+format("// long long long long\n"
+   "// long long_long",
+   getLLVMStyleWithColumns(20)));
+
+  // Break but do not reflow if the first word from the next line is too long.
+  EXPECT_EQ("// long long long\n"
+"// long\n"
+"// long_long_long\n",
+format("// long long long long\n"
+   "// long_long_long\n",
+   getLLVMStyleWithColumns(20)));
+
+  // Don't break or reflow short lines.
+  verifyFormat("// long\n"
+   "// long long long lo\n"
+   "// long long long lo\n"
+   "// long",
+   getLLVMStyleWithColumns(20));
+
+  // Keep prefixes and decorations while reflowing.
+  EXPECT_EQ("/// long long long\n"
+"/// long long\n",
+format("/// long long long long\n"
+   "/// long\n",
+   getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("//! long long long\n"
+"//! long long\n",
+format("//! long long long long\n"
+   "//! long\n",
+   getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("/* long long long\n"
+" * long long */",
+format("/* long long long long\n"
+   " * long */",
+   getLLVMStyleWithColumns(20)));
+
+  // Don't bring leading whitespace up while reflowing.
+  EXPECT_EQ("/*  long long long\n"
+" * long long long\n"
+" */",
+format("/*  long long long long\n"
+   " *  long long\n"
+   " */",
+   getLLVMStyleWithColumns(20)));
+
+  // Reflow the last line of a block comment with its trailing '*/'.
+  EXPECT_EQ("/* long long long\n"
+"   long long */",
+format("/* long long long long\n"
+   "   long */",
+   getLLVMStyleWithColumns(20)));
+
+  // Reflow two short lines; keep the postfix of the last one.
+  EXPECT_EQ("/* long long long\n"
+" * long long long */",
+format("/* long long long long\n"
+   " * long\n"
+   " * long */",
+