[PATCH] D40310: Restructure how we break tokens.

2017-11-29 Thread Manuel Klimek via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL319314: Restructure how we break tokens. (authored by 
klimek).

Repository:
  rL LLVM

https://reviews.llvm.org/D40310

Files:
  cfe/trunk/lib/Format/BreakableToken.cpp
  cfe/trunk/lib/Format/BreakableToken.h
  cfe/trunk/lib/Format/ContinuationIndenter.cpp
  cfe/trunk/unittests/Format/FormatTest.cpp
  cfe/trunk/unittests/Format/FormatTestComments.cpp

Index: cfe/trunk/lib/Format/BreakableToken.cpp
===
--- cfe/trunk/lib/Format/BreakableToken.cpp
+++ cfe/trunk/lib/Format/BreakableToken.cpp
@@ -67,6 +67,8 @@
  unsigned ColumnLimit,
  unsigned TabWidth,
  encoding::Encoding Encoding) {
+  DEBUG(llvm::dbgs() << "Comment split: \"" << Text << ", " << ColumnLimit
+ << "\", Content start: " << ContentStartColumn << "\n");
   if (ColumnLimit <= ContentStartColumn + 1)
 return BreakableToken::Split(StringRef::npos, 0);
 
@@ -171,7 +173,7 @@
 }
 
 unsigned
-BreakableToken::getLineLengthAfterCompression(unsigned RemainingTokenColumns,
+BreakableToken::getLengthAfterCompression(unsigned RemainingTokenColumns,
   Split Split) const {
   // Example: consider the content
   // lala  lala
@@ -181,50 +183,58 @@
   // We compute the number of columns when the split is compressed into a single
   // space, like:
   // lala lala
+  //
+  // FIXME: Correctly measure the length of whitespace in Split.second so it
+  // works with tabs.
   return RemainingTokenColumns + 1 - Split.second;
 }
 
-unsigned BreakableSingleLineToken::getLineCount() const { return 1; }
+unsigned BreakableStringLiteral::getLineCount() const { return 1; }
 
-unsigned BreakableSingleLineToken::getLineLengthAfterSplit(
-unsigned LineIndex, unsigned TailOffset,
-StringRef::size_type Length) const {
-  return StartColumn + Prefix.size() + Postfix.size() +
- encoding::columnWidthWithTabs(Line.substr(TailOffset, Length),
-   StartColumn + Prefix.size(),
-   Style.TabWidth, Encoding);
+unsigned BreakableStringLiteral::getRangeLength(unsigned LineIndex,
+unsigned Offset,
+StringRef::size_type Length,
+unsigned StartColumn) const {
+  assert(false &&
+ "Getting the length of a part of the string literal indicates that "
+ "the code tries to reflow it.");
 }
 
-BreakableSingleLineToken::BreakableSingleLineToken(
+unsigned
+BreakableStringLiteral::getRemainingLength(unsigned LineIndex, unsigned Offset,
+   unsigned StartColumn) const {
+  return UnbreakableTailLength + Postfix.size() +
+ encoding::columnWidthWithTabs(Line.substr(Offset, StringRef::npos),
+   StartColumn, Style.TabWidth, Encoding);
+}
+
+unsigned BreakableStringLiteral::getContentStartColumn(unsigned LineIndex,
+   bool Break) const {
+  return StartColumn + Prefix.size();
+}
+
+BreakableStringLiteral::BreakableStringLiteral(
 const FormatToken , unsigned StartColumn, StringRef Prefix,
 StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding,
 const FormatStyle )
 : BreakableToken(Tok, InPPDirective, Encoding, Style),
-  StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix) {
+  StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix),
+  UnbreakableTailLength(Tok.UnbreakableTailLength) {
   assert(Tok.TokenText.startswith(Prefix) && Tok.TokenText.endswith(Postfix));
   Line = Tok.TokenText.substr(
   Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size());
 }
 
-BreakableStringLiteral::BreakableStringLiteral(
-const FormatToken , unsigned StartColumn, StringRef Prefix,
-StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding,
-const FormatStyle )
-: BreakableSingleLineToken(Tok, StartColumn, Prefix, Postfix, InPPDirective,
-   Encoding, Style) {}
-
-BreakableToken::Split
-BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset,
- unsigned ColumnLimit,
- llvm::Regex ) const {
-  return getStringSplit(Line.substr(TailOffset),
-StartColumn + Prefix.size() + Postfix.size(),
-ColumnLimit, Style.TabWidth, Encoding);
+BreakableToken::Split BreakableStringLiteral::getSplit(
+unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,
+unsigned ContentStartColumn, llvm::Regex ) const {
+  return 

[PATCH] D40310: Restructure how we break tokens.

2017-11-29 Thread Krasimir Georgiev via Phabricator via cfe-commits
krasimir accepted this revision.
krasimir added a comment.
This revision is now accepted and ready to land.

LGTM


https://reviews.llvm.org/D40310



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D40310: Restructure how we break tokens.

2017-11-29 Thread Manuel Klimek via Phabricator via cfe-commits
klimek added inline comments.



Comment at: lib/Format/ContinuationIndenter.cpp:1749
+  }
+  if (!Reflow) {
+// If we didn't reflow into the next line, the only space to consider 
is

krasimir wrote:
> nit: Maybe change this to `if (Reflow)` and switch the if-else bodies.
I had that first, but found that harder to follow when re-reading the code - if 
you feel strongly, I'm also happy to turn it around again :)



Comment at: lib/Format/ContinuationIndenter.cpp:1777
+  assert(Penalty >= NewBreakPenalty);
+  Penalty -= NewBreakPenalty;
+}

krasimir wrote:
> Shouldn't we be resetting `NewBreakBefore` to `false` here?
NewBreakBefore is always reset at the start of the loop, so resetting it here 
wouldn't matter.


https://reviews.llvm.org/D40310



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D40310: Restructure how we break tokens.

2017-11-29 Thread Krasimir Georgiev via Phabricator via cfe-commits
krasimir added inline comments.



Comment at: lib/Format/ContinuationIndenter.cpp:1749
+  }
+  if (!Reflow) {
+// If we didn't reflow into the next line, the only space to consider 
is

nit: Maybe change this to `if (Reflow)` and switch the if-else bodies.



Comment at: lib/Format/ContinuationIndenter.cpp:1777
+  assert(Penalty >= NewBreakPenalty);
+  Penalty -= NewBreakPenalty;
+}

Shouldn't we be resetting `NewBreakBefore` to `false` here?


https://reviews.llvm.org/D40310



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D40310: Restructure how we break tokens.

2017-11-29 Thread Manuel Klimek via Phabricator via cfe-commits
klimek added inline comments.



Comment at: lib/Format/ContinuationIndenter.cpp:1707
+  RemainingTokenColumns = Token->getRemainingLength(
+  NextLineIndex, TailOffset, ContentStartColumn);
+  Reflow = true;

krasimir wrote:
> When we're reflowing we replace the reflow split with a space, so IMO this 
> should be:
> ```
> RemainingTokenColumns = Token->getRemainingLength(
>   NextLineIndex, TailOffset, ContentStartColumn + 1);
> ```
Actually, ContentStartColumn is just calculated incorrectly above. Added tests, 
and added the +1 above, which makes it +1 for all code below.



Comment at: lib/Format/ContinuationIndenter.cpp:1721
+Token->getSplit(NextLineIndex, TailOffset, ColumnLimit,
+ContentStartColumn, CommentPragmasRegex);
+if (Split.first == StringRef::npos) {

krasimir wrote:
> Looks like `ContentStartColumn` is consistently used as the start column of 
> the reflown content on the next line.
> I suggest to `++ContentStartColumn` at the beginning of the body of this if 
> statement (and adjust below appropriately).
Yep, that's what I also figured out - that also led to removing 
++ContentStartColumn in the reflow case below, which was what made this work at 
all.
Added tests to ReflowsCommentsPrecise, which flow through all of the corner 
cases of the if's here.


https://reviews.llvm.org/D40310



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D40310: Restructure how we break tokens.

2017-11-29 Thread Manuel Klimek via Phabricator via cfe-commits
klimek updated this revision to Diff 124709.
klimek marked 4 inline comments as done.
klimek added a comment.

Address review comments.


https://reviews.llvm.org/D40310

Files:
  lib/Format/BreakableToken.cpp
  lib/Format/BreakableToken.h
  lib/Format/ContinuationIndenter.cpp
  unittests/Format/FormatTest.cpp
  unittests/Format/FormatTestComments.cpp

Index: unittests/Format/FormatTestComments.cpp
===
--- unittests/Format/FormatTestComments.cpp
+++ unittests/Format/FormatTestComments.cpp
@@ -1096,11 +1096,12 @@
 }
 
 TEST_F(FormatTestComments, SplitsLongLinesInComments) {
+  // FIXME: Do we need to fix up the "  */" at the end?
+  // It doesn't look like any of our current logic triggers this.
   EXPECT_EQ("/* This is a long\n"
 " * comment that\n"
-" * doesn't\n"
-" * fit on one line.\n"
-" */",
+" * doesn't fit on\n"
+" * one line.  */",
 format("/* "
"This is a long "
"comment that "
@@ -2102,6 +2103,85 @@
   EXPECT_EQ("///", format(" ///  ", getLLVMStyleWithColumns(20)));
 }
 
+TEST_F(FormatTestComments, ReflowsCommentsPrecise) {
+  // FIXME: This assumes we do not continue compressing whitespace once we are
+  // in reflow mode. Consider compressing whitespace.
+
+  // Test that we stop reflowing precisely at the column limit.
+  // After reflowing, "// reflows into   foo" does not fit the column limit,
+  // so we compress the whitespace.
+  EXPECT_EQ("// some text that\n"
+"// reflows into foo\n",
+format("// some text that reflows\n"
+   "// into   foo\n",
+   getLLVMStyleWithColumns(20)));
+  // Given one more column, "// reflows into   foo" does fit the limit, so we
+  // do not compress the whitespace.
+  EXPECT_EQ("// some text that\n"
+"// reflows into   foo\n",
+format("// some text that reflows\n"
+   "// into   foo\n",
+   getLLVMStyleWithColumns(21)));
+
+  // Make sure that we correctly account for the space added in the reflow case
+  // when making the reflowing decision.
+  // First, when the next line ends precisely one column over the limit, do not
+  // reflow.
+  EXPECT_EQ("// some text that\n"
+"// reflows\n"
+"// into1234567\n",
+format("// some text that reflows\n"
+   "// into1234567\n",
+   getLLVMStyleWithColumns(21)));
+  // Secondly, when the next line ends later, but the first word in that line
+  // is precisely one column over the limit, do not reflow.
+  EXPECT_EQ("// some text that\n"
+"// reflows\n"
+"// into1234567 f\n",
+format("// some text that reflows\n"
+   "// into1234567 f\n",
+   getLLVMStyleWithColumns(21)));
+}
+
+TEST_F(FormatTestComments, ReflowsCommentsWithExtraWhitespace) {
+  // Baseline.
+  EXPECT_EQ("// some text\n"
+"// that re flows\n",
+format("// some text that\n"
+   "// re flows\n",
+   getLLVMStyleWithColumns(16)));
+  EXPECT_EQ("// some text\n"
+"// that re flows\n",
+format("// some text that\n"
+   "// reflows\n",
+   getLLVMStyleWithColumns(16)));
+  EXPECT_EQ("/* some text\n"
+" * that re flows\n"
+" */\n",
+format("/* some text that\n"
+   "*  re   flows\n"
+   "*/\n",
+   getLLVMStyleWithColumns(16)));
+  // FIXME: We do not reflow if the indent of two subsequent lines differs;
+  // given that this is different behavior from block comments, do we want
+  // to keep this?
+  EXPECT_EQ("// some text\n"
+"// that\n"
+"// re flows\n",
+format("// some text that\n"
+   "// re   flows\n",
+   getLLVMStyleWithColumns(16)));
+  // Space within parts of a line that fit.
+  // FIXME: Use the earliest possible split while reflowing to compress the
+  // whitespace within the line.
+  EXPECT_EQ("// some text that\n"
+"// does re   flow\n"
+"// more  here\n",
+format("// some text that does\n"
+   "// re   flow  more  here\n",
+   getLLVMStyleWithColumns(21)));
+}
+
 TEST_F(FormatTestComments, IgnoresIf0Contents) {
   EXPECT_EQ("#if 0\n"
 "}{)(&*(^%%#%@! fsadj f;ldjs ,:;| <<<>>>][)(][\n"
@@ -2484,6 +2564,7 @@
   " long */\n"
   "  b);",
   format("a = f(a, /* long long */ b);", getLLVMStyleWithColumns(16)));
+
   EXPECT_EQ(
   "a = f(\n"
   "a,\n"
@@ -2888,16 +2969,15 @@
getLLVMStyleWithColumns(20)));
 }
 
-TEST_F(FormatTestComments, NoCrush_Bug34236) {

[PATCH] D40310: Restructure how we break tokens.

2017-11-28 Thread Krasimir Georgiev via Phabricator via cfe-commits
krasimir added a comment.

Re: "I tried to write a test for this, and convinced myself that while +1 is 
correct, it is currently impossible to change behavior based on the missing 
+1.":
In order to have different outcome based on the start column, you could use 
tabs. Consider the content `"aaa\tb"` with 4 spaces of tabstop. This takes up 5 
columns (say, under the column limit) if it starts at column 0, and 8 columns 
(say, over the column limit) if it starts at column 1.


https://reviews.llvm.org/D40310



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D40310: Restructure how we break tokens.

2017-11-28 Thread Krasimir Georgiev via Phabricator via cfe-commits
krasimir added inline comments.



Comment at: lib/Format/BreakableToken.cpp:178
   Split Split) const {
   // Example: consider the content
   // lala  lala

Offtopic: Should add a FIXME that this doesn't really work in the presence of 
tabs.



Comment at: lib/Format/BreakableToken.h:154
   /// \p Split has been compressed into a single space.
   unsigned getLineLengthAfterCompression(unsigned RemainingTokenColumns,
  Split Split) const;

nit: the comment doesn't make sense anymore.



Comment at: lib/Format/ContinuationIndenter.cpp:1625
+  if (ExcessCharactersPenalty < NewBreakPenalty) {
+ContinueOnLine = true;
+  }

nit: no braces around single-statement if body



Comment at: lib/Format/ContinuationIndenter.cpp:1707
+  RemainingTokenColumns = Token->getRemainingLength(
+  NextLineIndex, TailOffset, ContentStartColumn);
+  Reflow = true;

When we're reflowing we replace the reflow split with a space, so IMO this 
should be:
```
RemainingTokenColumns = Token->getRemainingLength(
  NextLineIndex, TailOffset, ContentStartColumn + 1);
```



Comment at: lib/Format/ContinuationIndenter.cpp:1709
+  Reflow = true;
+  if (ContentStartColumn + RemainingTokenColumns > ColumnLimit) {
+DEBUG(llvm::dbgs() << "Over limit after reflow, need: "

IMO this should be `ContentStartColumn + RemainingTokenColumns + 1`, accounting 
for the space that replaces the reflow split.



Comment at: lib/Format/ContinuationIndenter.cpp:1721
+Token->getSplit(NextLineIndex, TailOffset, ColumnLimit,
+ContentStartColumn, CommentPragmasRegex);
+if (Split.first == StringRef::npos) {

Looks like `ContentStartColumn` is consistently used as the start column of the 
reflown content on the next line.
I suggest to `++ContentStartColumn` at the beginning of the body of this if 
statement (and adjust below appropriately).


https://reviews.llvm.org/D40310



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D40310: Restructure how we break tokens.

2017-11-28 Thread Manuel Klimek via Phabricator via cfe-commits
klimek added inline comments.



Comment at: lib/Format/ContinuationIndenter.cpp:1525
+  if (!DryRun)
+Token->adaptStartOfLine(0, Whitespaces);
+

krasimir wrote:
> If we indent here, shouldn't that also change ContentStartColumn?
Nope, this will exactly adapt the start of the line to ContentStartColumn. 
ContentStartColumn is where the line thinks it wants to be, not where it is.



Comment at: lib/Format/ContinuationIndenter.cpp:1557
 if (LineIndex < EndIndex - 1)
+  // The last line's penalty is handled in addNextStateToQueue().
   Penalty += Style.PenaltyExcessCharacter *

krasimir wrote:
> How does the last line's penalty get handled in addNextStateToQueue()?
By the State's Column being above the column limit.



Comment at: lib/Format/ContinuationIndenter.cpp:1578
+LineIndex, TailOffset + Split.first + Split.second, ColumnLimit,
+ContentStartColumn + ToSplitColumns, CommentPragmasRegex);
+// Compute the comlumns necessary to fit the next non-breakable 
sequence

krasimir wrote:
> krasimir wrote:
> > nit: `BreakableToken::Split NextSplit = Token->getSplit(...)`
> Hm, right now `ContentStartColumn + ToSplitColumns` points to the column 
> where the character at offset `TailOffset + Split.first` is supposed to be. 
> Then `NextSplit` is relative to the offset `TailOffset + Split.first + 
> Split.second`, so IMO it shouldn't use `ContentStartColumn + ToSplitColumns` 
> as a start column. I think that `ToSplitColumns` needs to be computed as 
> follows:
> ```
> unsigned ToSplitColumns = Token->getRangeLength(LineIndex, TailOffset, 
> Split.first + Split.second, ContentStartColumn);
> ```
> Also, a comment of the intended meaning of `ToSplitColumns` would be helpful.
Good catch, but the solution is differnet. Using ContentStartColumn + 
ToSplitColumns is incorrect, we need ContentStartColumn + ToSplitColumns + 1 
(as Split.second just contains the whitespace, but we want to consider that 
whitespace compressed).

I tried to write a test for this, and convinced myself that while +1 is 
correct, it is currently impossible to change behavior based on the missing +1.


https://reviews.llvm.org/D40310



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D40310: Restructure how we break tokens.

2017-11-28 Thread Manuel Klimek via Phabricator via cfe-commits
klimek updated this revision to Diff 124581.
klimek marked 3 inline comments as done.
klimek added a comment.

Address review comments.


https://reviews.llvm.org/D40310

Files:
  lib/Format/BreakableToken.cpp
  lib/Format/BreakableToken.h
  lib/Format/ContinuationIndenter.cpp
  unittests/Format/FormatTest.cpp
  unittests/Format/FormatTestComments.cpp

Index: unittests/Format/FormatTestComments.cpp
===
--- unittests/Format/FormatTestComments.cpp
+++ unittests/Format/FormatTestComments.cpp
@@ -1096,11 +1096,12 @@
 }
 
 TEST_F(FormatTestComments, SplitsLongLinesInComments) {
+  // FIXME: Do we need to fix up the "  */" at the end?
+  // It doesn't look like any of our current logic triggers this.
   EXPECT_EQ("/* This is a long\n"
 " * comment that\n"
-" * doesn't\n"
-" * fit on one line.\n"
-" */",
+" * doesn't fit on\n"
+" * one line.  */",
 format("/* "
"This is a long "
"comment that "
@@ -2102,6 +2103,66 @@
   EXPECT_EQ("///", format(" ///  ", getLLVMStyleWithColumns(20)));
 }
 
+TEST_F(FormatTestComments, ReflowsCommentsPrecise) {
+  // FIXME: This assumes we do not continue compressing whitespace once we are
+  // in reflow mode. Consider compressing whitespace.
+
+  // Test that we stop reflowing precisely at the column limit.
+  // After reflowing, "// reflows into   foo" does not fit the column limit,
+  // so we compress the whitespace.
+  EXPECT_EQ("// some text that\n"
+"// reflows into foo\n",
+format("// some text that reflows\n"
+   "// into   foo\n",
+   getLLVMStyleWithColumns(20)));
+  // Given one more column, "// reflows into   foo" does fit the limit, so we
+  // do not compress the whitespace.
+  EXPECT_EQ("// some text that\n"
+"// reflows into   foo\n",
+format("// some text that reflows\n"
+   "// into   foo\n",
+   getLLVMStyleWithColumns(21)));
+}
+
+TEST_F(FormatTestComments, ReflowsCommentsWithExtraWhitespace) {
+  // Baseline.
+  EXPECT_EQ("// some text\n"
+"// that re flows\n",
+format("// some text that\n"
+   "// re flows\n",
+   getLLVMStyleWithColumns(16)));
+  EXPECT_EQ("// some text\n"
+"// that re flows\n",
+format("// some text that\n"
+   "// reflows\n",
+   getLLVMStyleWithColumns(16)));
+  EXPECT_EQ("/* some text\n"
+" * that re flows\n"
+" */\n",
+format("/* some text that\n"
+   "*  re   flows\n"
+   "*/\n",
+   getLLVMStyleWithColumns(16)));
+  // FIXME: We do not reflow if the indent of two subsequent lines differs;
+  // given that this is different behavior from block comments, do we want
+  // to keep this?
+  EXPECT_EQ("// some text\n"
+"// that\n"
+"// re flows\n",
+format("// some text that\n"
+   "// re   flows\n",
+   getLLVMStyleWithColumns(16)));
+  // Space within parts of a line that fit.
+  // FIXME: Use the earliest possible split while reflowing to compress the
+  // whitespace within the line.
+  EXPECT_EQ("// some text that\n"
+"// does re   flow\n"
+"// more  here\n",
+format("// some text that does\n"
+   "// re   flow  more  here\n",
+   getLLVMStyleWithColumns(21)));
+}
+
 TEST_F(FormatTestComments, IgnoresIf0Contents) {
   EXPECT_EQ("#if 0\n"
 "}{)(&*(^%%#%@! fsadj f;ldjs ,:;| <<<>>>][)(][\n"
@@ -2484,6 +2545,7 @@
   " long */\n"
   "  b);",
   format("a = f(a, /* long long */ b);", getLLVMStyleWithColumns(16)));
+
   EXPECT_EQ(
   "a = f(\n"
   "a,\n"
@@ -2888,16 +2950,15 @@
getLLVMStyleWithColumns(20)));
 }
 
-TEST_F(FormatTestComments, NoCrush_Bug34236) {
+TEST_F(FormatTestComments, NoCrash_Bug34236) {
   // This is a test case from a crasher reported in:
   // https://bugs.llvm.org/show_bug.cgi?id=34236
   // Temporarily disable formatting for readability.
   // clang-format off
   EXPECT_EQ(
 "/**/ /*\n"
 "  *   a\n"
-"  * b c\n"
-"  * d*/",
+"  * b c d*/",
   format(
 "/**/ /*\n"
 " *   a b\n"
Index: unittests/Format/FormatTest.cpp

[PATCH] D40310: Restructure how we break tokens.

2017-11-28 Thread Krasimir Georgiev via Phabricator via cfe-commits
krasimir added inline comments.



Comment at: lib/Format/ContinuationIndenter.cpp:1525
+  if (!DryRun)
+Token->adaptStartOfLine(0, Whitespaces);
+

If we indent here, shouldn't that also change ContentStartColumn?



Comment at: lib/Format/ContinuationIndenter.cpp:1557
 if (LineIndex < EndIndex - 1)
+  // The last line's penalty is handled in addNextStateToQueue().
   Penalty += Style.PenaltyExcessCharacter *

How does the last line's penalty get handled in addNextStateToQueue()?



Comment at: lib/Format/ContinuationIndenter.cpp:1565
 
-  // Check if compressing the whitespace range will bring the line length
-  // under the limit. If that is the case, we perform whitespace 
compression
-  // instead of inserting a line break.
-  unsigned RemainingTokenColumnsAfterCompression =
-  Token->getLineLengthAfterCompression(RemainingTokenColumns, Split);
-  if (RemainingTokenColumnsAfterCompression <= RemainingSpace) {
-RemainingTokenColumns = RemainingTokenColumnsAfterCompression;
-ReflowInProgress = true;
-if (!DryRun)
-  Token->compressWhitespace(LineIndex, TailOffset, Split, Whitespaces);
-DEBUG(llvm::dbgs() << "Compressing below limit.\n");
-break;
-  }
-
-  // Compute both the penalties for:
-  // - not breaking, and leaving excess characters
-  // - adding a new line break
-  assert(RemainingTokenColumnsAfterCompression > RemainingSpace);
-  unsigned ExcessCharactersPenalty =
-  (RemainingTokenColumnsAfterCompression - RemainingSpace) *
-  Style.PenaltyExcessCharacter;
-
-  unsigned BreakPenalty = NewBreakPenalty;
-  unsigned ColumnsUsed =
-  Token->getLineLengthAfterSplit(LineIndex, TailOffset, Split.first);
-  if (ColumnsUsed > ColumnLimit)
-BreakPenalty +=
-Style.PenaltyExcessCharacter * (ColumnsUsed - ColumnLimit);
-
-  DEBUG(llvm::dbgs() << "Penalty excess: " << ExcessCharactersPenalty
- << "\nbreak : " << BreakPenalty << "\n");
-  // Only continue to add the line break if the penalty of the excess
-  // characters is larger than the penalty of the line break.
-  // FIXME: This does not take into account when we can later remove the
-  // line break again due to a reflow.
-  if (ExcessCharactersPenalty < BreakPenalty) {
-if (!DryRun)
-  Token->compressWhitespace(LineIndex, TailOffset, Split, Whitespaces);
-// Do not set ReflowInProgress: we do not have any space left to
-// reflow into.
-Penalty += ExcessCharactersPenalty;
-break;
+  if (!Current.isStringLiteral()) {
+// Check whether the next natural split point after the current one

I really dislike this: we shouldn't have the reflow control flow depend on the 
specific type of the token. Better introduce a new virtual method that enables 
this branch and override it appropriately.



Comment at: lib/Format/ContinuationIndenter.cpp:1578
+LineIndex, TailOffset + Split.first + Split.second, ColumnLimit,
+ContentStartColumn + ToSplitColumns, CommentPragmasRegex);
+// Compute the comlumns necessary to fit the next non-breakable 
sequence

nit: `BreakableToken::Split NextSplit = Token->getSplit(...)`



Comment at: lib/Format/ContinuationIndenter.cpp:1578
+LineIndex, TailOffset + Split.first + Split.second, ColumnLimit,
+ContentStartColumn + ToSplitColumns, CommentPragmasRegex);
+// Compute the comlumns necessary to fit the next non-breakable 
sequence

krasimir wrote:
> nit: `BreakableToken::Split NextSplit = Token->getSplit(...)`
Hm, right now `ContentStartColumn + ToSplitColumns` points to the column where 
the character at offset `TailOffset + Split.first` is supposed to be. Then 
`NextSplit` is relative to the offset `TailOffset + Split.first + 
Split.second`, so IMO it shouldn't use `ContentStartColumn + ToSplitColumns` as 
a start column. I think that `ToSplitColumns` needs to be computed as follows:
```
unsigned ToSplitColumns = Token->getRangeLength(LineIndex, TailOffset, 
Split.first + Split.second, ContentStartColumn);
```
Also, a comment of the intended meaning of `ToSplitColumns` would be helpful.



Comment at: lib/Format/ContinuationIndenter.cpp:1579
+ContentStartColumn + ToSplitColumns, CommentPragmasRegex);
+// Compute the comlumns necessary to fit the next non-breakable 
sequence
+// into the current line.

nit: `comlumns`


https://reviews.llvm.org/D40310



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D40310: Restructure how we break tokens.

2017-11-27 Thread Manuel Klimek via Phabricator via cfe-commits
klimek added a comment.

Restructured to make the invariants clearer based on a chat with Krasimir.


https://reviews.llvm.org/D40310



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D40310: Restructure how we break tokens.

2017-11-27 Thread Manuel Klimek via Phabricator via cfe-commits
klimek updated this revision to Diff 124381.
klimek added a comment.

Restructure based on code review feedback.


https://reviews.llvm.org/D40310

Files:
  lib/Format/BreakableToken.cpp
  lib/Format/BreakableToken.h
  lib/Format/ContinuationIndenter.cpp
  unittests/Format/FormatTest.cpp
  unittests/Format/FormatTestComments.cpp

Index: unittests/Format/FormatTestComments.cpp
===
--- unittests/Format/FormatTestComments.cpp
+++ unittests/Format/FormatTestComments.cpp
@@ -1096,11 +1096,12 @@
 }
 
 TEST_F(FormatTestComments, SplitsLongLinesInComments) {
+  // FIXME: Do we need to fix up the "  */" at the end?
+  // It doesn't look like any of our current logic triggers this.
   EXPECT_EQ("/* This is a long\n"
 " * comment that\n"
-" * doesn't\n"
-" * fit on one line.\n"
-" */",
+" * doesn't fit on\n"
+" * one line.  */",
 format("/* "
"This is a long "
"comment that "
@@ -2102,6 +2103,66 @@
   EXPECT_EQ("///", format(" ///  ", getLLVMStyleWithColumns(20)));
 }
 
+TEST_F(FormatTestComments, ReflowsCommentsPrecise) {
+  // FIXME: This assumes we do not continue compressing whitespace once we are
+  // in reflow mode. Consider compressing whitespace.
+
+  // Test that we stop reflowing precisely at the column limit.
+  // After reflowing, "// reflows into   foo" does not fit the column limit,
+  // so we compress the whitespace.
+  EXPECT_EQ("// some text that\n"
+"// reflows into foo\n",
+format("// some text that reflows\n"
+   "// into   foo\n",
+   getLLVMStyleWithColumns(20)));
+  // Given one more column, "// reflows into   foo" does fit the limit, so we
+  // do not compress the whitespace.
+  EXPECT_EQ("// some text that\n"
+"// reflows into   foo\n",
+format("// some text that reflows\n"
+   "// into   foo\n",
+   getLLVMStyleWithColumns(21)));
+}
+
+TEST_F(FormatTestComments, ReflowsCommentsWithExtraWhitespace) {
+  // Baseline.
+  EXPECT_EQ("// some text\n"
+"// that re flows\n",
+format("// some text that\n"
+   "// re flows\n",
+   getLLVMStyleWithColumns(16)));
+  EXPECT_EQ("// some text\n"
+"// that re flows\n",
+format("// some text that\n"
+   "// reflows\n",
+   getLLVMStyleWithColumns(16)));
+  EXPECT_EQ("/* some text\n"
+" * that re flows\n"
+" */\n",
+format("/* some text that\n"
+   "*  re   flows\n"
+   "*/\n",
+   getLLVMStyleWithColumns(16)));
+  // FIXME: We do not reflow if the indent of two subsequent lines differs;
+  // given that this is different behavior from block comments, do we want
+  // to keep this?
+  EXPECT_EQ("// some text\n"
+"// that\n"
+"// re flows\n",
+format("// some text that\n"
+   "// re   flows\n",
+   getLLVMStyleWithColumns(16)));
+  // Space within parts of a line that fit.
+  // FIXME: Use the earliest possible split while reflowing to compress the
+  // whitespace within the line.
+  EXPECT_EQ("// some text that\n"
+"// does re   flow\n"
+"// more  here\n",
+format("// some text that does\n"
+   "// re   flow  more  here\n",
+   getLLVMStyleWithColumns(21)));
+}
+
 TEST_F(FormatTestComments, IgnoresIf0Contents) {
   EXPECT_EQ("#if 0\n"
 "}{)(&*(^%%#%@! fsadj f;ldjs ,:;| <<<>>>][)(][\n"
@@ -2484,6 +2545,7 @@
   " long */\n"
   "  b);",
   format("a = f(a, /* long long */ b);", getLLVMStyleWithColumns(16)));
+
   EXPECT_EQ(
   "a = f(\n"
   "a,\n"
@@ -2888,16 +2950,15 @@
getLLVMStyleWithColumns(20)));
 }
 
-TEST_F(FormatTestComments, NoCrush_Bug34236) {
+TEST_F(FormatTestComments, NoCrash_Bug34236) {
   // This is a test case from a crasher reported in:
   // https://bugs.llvm.org/show_bug.cgi?id=34236
   // Temporarily disable formatting for readability.
   // clang-format off
   EXPECT_EQ(
 "/**/ /*\n"
 "  *   a\n"
-"  * b c\n"
-"  * d*/",
+"  * b c d*/",
   format(
 "/**/ /*\n"
 " *   a b\n"
Index: unittests/Format/FormatTest.cpp
===
--- 

[PATCH] D40310: Restructure how we break tokens.

2017-11-27 Thread Manuel Klimek via Phabricator via cfe-commits
klimek added inline comments.



Comment at: lib/Format/ContinuationIndenter.cpp:1518
+  unsigned RemainingTokenColumns = 0;
+  // The column number we're currently at.
+  unsigned ContentStartColumn = 0;

krasimir wrote:
> Could you please spell out the invariants that we maintain about 
> `TailOffset`, `RemainingTokenColumns` and `ContentStartColumn` (at least) at 
> the beginning of every main loop iteration below? That would surely make it 
> easier to review.
I'm not sure what you mean - I would literally write what I already wrote in 
the comment. Can you formulate questions so I can make sure I answer them?



Comment at: lib/Format/ContinuationIndenter.cpp:1533
+  // ContentStartColumn is either
+  // - at the start of the line, directly after a break
+  // - the end of the last line +1, when continuing a reflow over multiple

krasimir wrote:
> If the previous iteration requested that we try to reflow, how can 
> `ContentStartColumn` be at the start of the (current) line?
After a line break (that was already in the original text), ContentStartColumn 
is set to where we want the first character of the broken line to start.


https://reviews.llvm.org/D40310



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D40310: Restructure how we break tokens.

2017-11-24 Thread Krasimir Georgiev via Phabricator via cfe-commits
krasimir added a comment.

Started the review. It would take a few cycles 




Comment at: lib/Format/ContinuationIndenter.cpp:1518
+  unsigned RemainingTokenColumns = 0;
+  // The column number we're currently at.
+  unsigned ContentStartColumn = 0;

Could you please spell out the invariants that we maintain about `TailOffset`, 
`RemainingTokenColumns` and `ContentStartColumn` (at least) at the beginning of 
every main loop iteration below? That would surely make it easier to review.



Comment at: lib/Format/ContinuationIndenter.cpp:1533
+  // ContentStartColumn is either
+  // - at the start of the line, directly after a break
+  // - the end of the last line +1, when continuing a reflow over multiple

If the previous iteration requested that we try to reflow, how can 
`ContentStartColumn` be at the start of the (current) line?


https://reviews.llvm.org/D40310



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D40310: Restructure how we break tokens.

2017-11-23 Thread Manuel Klimek via Phabricator via cfe-commits
klimek marked an inline comment as done.
klimek added inline comments.



Comment at: lib/Format/ContinuationIndenter.cpp:1504
  : Style.PenaltyBreakComment;
-  unsigned RemainingSpace = ColumnLimit - Current.UnbreakableTailLength;
+  // Stores whether we introduce a break anywhere in the token.
   bool BreakInserted = Token->introducesBreakBeforeToken();

krasimir wrote:
> klimek wrote:
> > krasimir wrote:
> > > Does a reflow count as a break?
> > I do believe so (well, the break in the reflow counts, the reflow itself is 
> > not a break, but removing a break :)
> So, if we add a break then remove a break while reflowing, will 
> `BreakInserted` be true or false?
True.


https://reviews.llvm.org/D40310



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D40310: Restructure how we break tokens.

2017-11-23 Thread Manuel Klimek via Phabricator via cfe-commits
klimek updated this revision to Diff 124095.
klimek added a comment.

Pull out getRemainingLength.


https://reviews.llvm.org/D40310

Files:
  lib/Format/BreakableToken.cpp
  lib/Format/BreakableToken.h
  lib/Format/ContinuationIndenter.cpp
  unittests/Format/FormatTest.cpp
  unittests/Format/FormatTestComments.cpp

Index: unittests/Format/FormatTestComments.cpp
===
--- unittests/Format/FormatTestComments.cpp
+++ unittests/Format/FormatTestComments.cpp
@@ -1096,11 +1096,12 @@
 }
 
 TEST_F(FormatTestComments, SplitsLongLinesInComments) {
+  // FIXME: Do we need to fix up the "  */" at the end?
+  // It doesn't look like any of our current logic triggers this.
   EXPECT_EQ("/* This is a long\n"
 " * comment that\n"
-" * doesn't\n"
-" * fit on one line.\n"
-" */",
+" * doesn't fit on\n"
+" * one line.  */",
 format("/* "
"This is a long "
"comment that "
@@ -2102,6 +2103,66 @@
   EXPECT_EQ("///", format(" ///  ", getLLVMStyleWithColumns(20)));
 }
 
+TEST_F(FormatTestComments, ReflowsCommentsPrecise) {
+  // FIXME: This assumes we do not continue compressing whitespace once we are
+  // in reflow mode. Consider compressing whitespace.
+
+  // Test that we stop reflowing precisely at the column limit.
+  // After reflowing, "// reflows into   foo" does not fit the column limit,
+  // so we compress the whitespace.
+  EXPECT_EQ("// some text that\n"
+"// reflows into foo\n",
+format("// some text that reflows\n"
+   "// into   foo\n",
+   getLLVMStyleWithColumns(20)));
+  // Given one more column, "// reflows into   foo" does fit the limit, so we
+  // do not compress the whitespace.
+  EXPECT_EQ("// some text that\n"
+"// reflows into   foo\n",
+format("// some text that reflows\n"
+   "// into   foo\n",
+   getLLVMStyleWithColumns(21)));
+}
+
+TEST_F(FormatTestComments, ReflowsCommentsWithExtraWhitespace) {
+  // Baseline.
+  EXPECT_EQ("// some text\n"
+"// that re flows\n",
+format("// some text that\n"
+   "// re flows\n",
+   getLLVMStyleWithColumns(16)));
+  EXPECT_EQ("// some text\n"
+"// that re flows\n",
+format("// some text that\n"
+   "// reflows\n",
+   getLLVMStyleWithColumns(16)));
+  EXPECT_EQ("/* some text\n"
+" * that re flows\n"
+" */\n",
+format("/* some text that\n"
+   "*  re   flows\n"
+   "*/\n",
+   getLLVMStyleWithColumns(16)));
+  // FIXME: We do not reflow if the indent of two subsequent lines differs;
+  // given that this is different behavior from block comments, do we want
+  // to keep this?
+  EXPECT_EQ("// some text\n"
+"// that\n"
+"// re flows\n",
+format("// some text that\n"
+   "// re   flows\n",
+   getLLVMStyleWithColumns(16)));
+  // Space within parts of a line that fit.
+  // FIXME: Use the earliest possible split while reflowing to compress the
+  // whitespace within the line.
+  EXPECT_EQ("// some text that\n"
+"// does re   flow\n"
+"// more  here\n",
+format("// some text that does\n"
+   "// re   flow  more  here\n",
+   getLLVMStyleWithColumns(21)));
+}
+
 TEST_F(FormatTestComments, IgnoresIf0Contents) {
   EXPECT_EQ("#if 0\n"
 "}{)(&*(^%%#%@! fsadj f;ldjs ,:;| <<<>>>][)(][\n"
@@ -2484,6 +2545,7 @@
   " long */\n"
   "  b);",
   format("a = f(a, /* long long */ b);", getLLVMStyleWithColumns(16)));
+
   EXPECT_EQ(
   "a = f(\n"
   "a,\n"
@@ -2888,16 +2950,15 @@
getLLVMStyleWithColumns(20)));
 }
 
-TEST_F(FormatTestComments, NoCrush_Bug34236) {
+TEST_F(FormatTestComments, NoCrash_Bug34236) {
   // This is a test case from a crasher reported in:
   // https://bugs.llvm.org/show_bug.cgi?id=34236
   // Temporarily disable formatting for readability.
   // clang-format off
   EXPECT_EQ(
 "/**/ /*\n"
 "  *   a\n"
-"  * b c\n"
-"  * d*/",
+"  * b c d*/",
   format(
 "/**/ /*\n"
 " *   a b\n"
Index: unittests/Format/FormatTest.cpp
===
--- 

[PATCH] D40310: Restructure how we break tokens.

2017-11-23 Thread Krasimir Georgiev via Phabricator via cfe-commits
krasimir added inline comments.



Comment at: lib/Format/BreakableToken.cpp:198
+ "Getting the length of a part of the string literal indicates that "
+ "the code tries to reflow it.");
+  return UnbreakableTailLength + Postfix.size() +

klimek wrote:
> krasimir wrote:
> > How about clients that explicitly pass `Length = Line.size() - Offset`?
> That is different (I now also went and updated the comment for getRangeLength 
> to explain that).
> 
> Generally, Length == Line.size() - Offset is still a portion of the content, 
> as opposed to npos, which has a special meaning. I'm wondering whether I 
> should just pull out a differently named method for it, now that I'm thinking 
> about it.
Yes please!



Comment at: lib/Format/ContinuationIndenter.cpp:1504
  : Style.PenaltyBreakComment;
-  unsigned RemainingSpace = ColumnLimit - Current.UnbreakableTailLength;
+  // Stores whether we introduce a break anywhere in the token.
   bool BreakInserted = Token->introducesBreakBeforeToken();

klimek wrote:
> krasimir wrote:
> > Does a reflow count as a break?
> I do believe so (well, the break in the reflow counts, the reflow itself is 
> not a break, but removing a break :)
So, if we add a break then remove a break while reflowing, will `BreakInserted` 
be true or false?



Comment at: unittests/Format/FormatTestComments.cpp:2149
+  // to keep this?
+  EXPECT_EQ("// some text\n"
+"// that\n"

klimek wrote:
> krasimir wrote:
> > This is like this for cases like lists in comments:
> > ```
> > blah-blah-blah:
> >   1. blah
> >   2. blah-blah
> > ```
> > I think here the block comments behavior might be wrong.
> Note that on the doc I shared you voted the reverse ;)
Then I should consider re-voting :)


https://reviews.llvm.org/D40310



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D40310: Restructure how we break tokens.

2017-11-23 Thread Manuel Klimek via Phabricator via cfe-commits
klimek added inline comments.



Comment at: lib/Format/BreakableToken.cpp:198
+ "Getting the length of a part of the string literal indicates that "
+ "the code tries to reflow it.");
+  return UnbreakableTailLength + Postfix.size() +

krasimir wrote:
> How about clients that explicitly pass `Length = Line.size() - Offset`?
That is different (I now also went and updated the comment for getRangeLength 
to explain that).

Generally, Length == Line.size() - Offset is still a portion of the content, as 
opposed to npos, which has a special meaning. I'm wondering whether I should 
just pull out a differently named method for it, now that I'm thinking about it.



Comment at: lib/Format/BreakableToken.h:100
+  /// \brief Returns the number of columns required to format the range at 
bytes
+  /// \p Offset to \p Offset \c + \p Length.
+  ///

krasimir wrote:
> Does this include the byte `Offset + Length`?
Reworded.



Comment at: lib/Format/BreakableToken.h:107
   ///
-  /// Note that previous breaks are not taken into account. \p TailOffset is
-  /// always specified from the start of the (original) line.
-  /// \p Length can be set to StringRef::npos, which means "to the end of 
line".
-  virtual unsigned
-  getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset,
-  StringRef::size_type Length) const = 0;
+  /// \p StartColumn is the column at which the text starts, needed to compute
+  ///tab stops correctly.

krasimir wrote:
> `text` is ambiguous here: does it refer to the content of the line or to the 
> range defined by the offset and length?
Introduced 'text' in the first paragraph and reworded.



Comment at: lib/Format/BreakableToken.h:120
   /// \p LineIndex, if previously broken at \p TailOffset. If possible, do not
   /// violate \p ColumnLimit.
   virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,

krasimir wrote:
> What is `ReflownColumn` used for?
Argh, actually not called ReflownColumn in the implementations :)
Changed and commented.



Comment at: lib/Format/BreakableToken.h:142
   /// \brief Returns a whitespace range (offset, length) of the content at
-  /// \p LineIndex such that the content preceding this range needs to be
-  /// reformatted before any breaks are made to this line.
+  /// \p LineIndex such that the content of the current line is reflown to the
+  /// end of the previous one.

krasimir wrote:
> Does the current line refer to the line at LineIndex?
Reworded.



Comment at: lib/Format/ContinuationIndenter.cpp:1504
  : Style.PenaltyBreakComment;
-  unsigned RemainingSpace = ColumnLimit - Current.UnbreakableTailLength;
+  // Stores whether we introduce a break anywhere in the token.
   bool BreakInserted = Token->introducesBreakBeforeToken();

krasimir wrote:
> Does a reflow count as a break?
I do believe so (well, the break in the reflow counts, the reflow itself is not 
a break, but removing a break :)



Comment at: unittests/Format/FormatTest.cpp:10603
 format("\"一\t二 \t三 四 五\t六 \t七 八九十\tqq\"",
getLLVMStyleWithColumns(11)));
 

krasimir wrote:
> What happened here?
It was wrong, it's now correct :)
Those characters are printed in double-width, and llvm's encoding library 
correctly returns this. Reflowing previously did not correctly count that, but 
used the character count instead.
At offset 8 (after a \t), C " pokes one column over the limit (C == 
double-width character, space == 1, " == 1 -> 4 -> 12 columns.



Comment at: unittests/Format/FormatTestComments.cpp:1104
+" * doesn't fit on\n"
+" * one line.  */",
 format("/* "

krasimir wrote:
> I think this is desirable. The jsdoc folks really wanted to fix-up the `*/` 
> at the end. I think it has to do with `DelimitersOnNewline`.
> If this already works in Java and js mode, that could be good enough.
I don't know whether it does - I only know I didn't break any tests :)



Comment at: unittests/Format/FormatTestComments.cpp:2108
+  // FIXME: This assumes we do not continue compressing whitespace once we are
+  // in reflow mode. Consider compressing whitespace.
+

krasimir wrote:
> I thinks that we should be compressing whitespace in reflow mode too.
Correct, but in a different patch - this patch is a strict improvement 
regarding whitespace compression, and adding more would make it more complex.



Comment at: unittests/Format/FormatTestComments.cpp:2149
+  // to keep this?
+  EXPECT_EQ("// some text\n"
+"// that\n"

krasimir wrote:
> This is like this for cases like lists in comments:
> ```
> blah-blah-blah:
>   1. blah
>   2. 

[PATCH] D40310: Restructure how we break tokens.

2017-11-23 Thread Manuel Klimek via Phabricator via cfe-commits
klimek updated this revision to Diff 124075.
klimek marked 10 inline comments as done.
klimek added a comment.

Address review comments.


https://reviews.llvm.org/D40310

Files:
  lib/Format/BreakableToken.cpp
  lib/Format/BreakableToken.h
  lib/Format/ContinuationIndenter.cpp
  unittests/Format/FormatTest.cpp
  unittests/Format/FormatTestComments.cpp

Index: unittests/Format/FormatTestComments.cpp
===
--- unittests/Format/FormatTestComments.cpp
+++ unittests/Format/FormatTestComments.cpp
@@ -1096,11 +1096,12 @@
 }
 
 TEST_F(FormatTestComments, SplitsLongLinesInComments) {
+  // FIXME: Do we need to fix up the "  */" at the end?
+  // It doesn't look like any of our current logic triggers this.
   EXPECT_EQ("/* This is a long\n"
 " * comment that\n"
-" * doesn't\n"
-" * fit on one line.\n"
-" */",
+" * doesn't fit on\n"
+" * one line.  */",
 format("/* "
"This is a long "
"comment that "
@@ -2102,6 +2103,66 @@
   EXPECT_EQ("///", format(" ///  ", getLLVMStyleWithColumns(20)));
 }
 
+TEST_F(FormatTestComments, ReflowsCommentsPrecise) {
+  // FIXME: This assumes we do not continue compressing whitespace once we are
+  // in reflow mode. Consider compressing whitespace.
+
+  // Test that we stop reflowing precisely at the column limit.
+  // After reflowing, "// reflows into   foo" does not fit the column limit,
+  // so we compress the whitespace.
+  EXPECT_EQ("// some text that\n"
+"// reflows into foo\n",
+format("// some text that reflows\n"
+   "// into   foo\n",
+   getLLVMStyleWithColumns(20)));
+  // Given one more column, "// reflows into   foo" does fit the limit, so we
+  // do not compress the whitespace.
+  EXPECT_EQ("// some text that\n"
+"// reflows into   foo\n",
+format("// some text that reflows\n"
+   "// into   foo\n",
+   getLLVMStyleWithColumns(21)));
+}
+
+TEST_F(FormatTestComments, ReflowsCommentsWithExtraWhitespace) {
+  // Baseline.
+  EXPECT_EQ("// some text\n"
+"// that re flows\n",
+format("// some text that\n"
+   "// re flows\n",
+   getLLVMStyleWithColumns(16)));
+  EXPECT_EQ("// some text\n"
+"// that re flows\n",
+format("// some text that\n"
+   "// reflows\n",
+   getLLVMStyleWithColumns(16)));
+  EXPECT_EQ("/* some text\n"
+" * that re flows\n"
+" */\n",
+format("/* some text that\n"
+   "*  re   flows\n"
+   "*/\n",
+   getLLVMStyleWithColumns(16)));
+  // FIXME: We do not reflow if the indent of two subsequent lines differs;
+  // given that this is different behavior from block comments, do we want
+  // to keep this?
+  EXPECT_EQ("// some text\n"
+"// that\n"
+"// re flows\n",
+format("// some text that\n"
+   "// re   flows\n",
+   getLLVMStyleWithColumns(16)));
+  // Space within parts of a line that fit.
+  // FIXME: Use the earliest possible split while reflowing to compress the
+  // whitespace within the line.
+  EXPECT_EQ("// some text that\n"
+"// does re   flow\n"
+"// more  here\n",
+format("// some text that does\n"
+   "// re   flow  more  here\n",
+   getLLVMStyleWithColumns(21)));
+}
+
 TEST_F(FormatTestComments, IgnoresIf0Contents) {
   EXPECT_EQ("#if 0\n"
 "}{)(&*(^%%#%@! fsadj f;ldjs ,:;| <<<>>>][)(][\n"
@@ -2484,6 +2545,7 @@
   " long */\n"
   "  b);",
   format("a = f(a, /* long long */ b);", getLLVMStyleWithColumns(16)));
+
   EXPECT_EQ(
   "a = f(\n"
   "a,\n"
@@ -2888,16 +2950,15 @@
getLLVMStyleWithColumns(20)));
 }
 
-TEST_F(FormatTestComments, NoCrush_Bug34236) {
+TEST_F(FormatTestComments, NoCrash_Bug34236) {
   // This is a test case from a crasher reported in:
   // https://bugs.llvm.org/show_bug.cgi?id=34236
   // Temporarily disable formatting for readability.
   // clang-format off
   EXPECT_EQ(
 "/**/ /*\n"
 "  *   a\n"
-"  * b c\n"
-"  * d*/",
+"  * b c d*/",
   format(
 "/**/ /*\n"
 " *   a b\n"
Index: unittests/Format/FormatTest.cpp

[PATCH] D40310: Restructure how we break tokens.

2017-11-22 Thread Krasimir Georgiev via Phabricator via cfe-commits
krasimir added a comment.

Here're a few nits. I'll need an evening to review the meaty part :)




Comment at: lib/Format/BreakableToken.cpp:73
+  if (ColumnLimit <= ContentStartColumn + 1) {
 return BreakableToken::Split(StringRef::npos, 0);
+  }

nit: no braces around single-statement if body.



Comment at: lib/Format/BreakableToken.cpp:198
+ "Getting the length of a part of the string literal indicates that "
+ "the code tries to reflow it.");
+  return UnbreakableTailLength + Postfix.size() +

How about clients that explicitly pass `Length = Line.size() - Offset`?



Comment at: lib/Format/BreakableToken.cpp:476
+StartColumn, Style.TabWidth, Encoding);
+  // FIXME: Test that this is handled correctly for Length != npos!
+  // The last line gets a "*/" postfAix.

Could you elaborate?



Comment at: lib/Format/BreakableToken.cpp:477
+  // FIXME: Test that this is handled correctly for Length != npos!
+  // The last line gets a "*/" postfAix.
   if (LineIndex + 1 == Lines.size()) {

nit: postfix.



Comment at: lib/Format/BreakableToken.cpp:566
 if (DelimitersOnNewline) {
   // Since we're breaking af index 1 below, the break position and the
   // break length are the same.

nit: af -> at



Comment at: lib/Format/BreakableToken.cpp:570
   if (BreakLength != StringRef::npos) {
 insertBreak(LineIndex, 0, Split(1, BreakLength), Whitespaces);
   }

nit: no braces around single-statement if bodies.



Comment at: lib/Format/BreakableToken.cpp:851
   }
+  // FIXME: Decide whether we want to reflow non-regular indents.
   return LineIndex > 0 && !CommentPragmasRegex.match(IndentContent) &&

could you give an example of what's a non-regular indent?



Comment at: lib/Format/BreakableToken.h:52
+///
+/// The mechanism to adapt the layout of the breakable token are organised
+/// around the concept of a \c Split, which is a whitespace range that 
signifies

Replace `are` with `is`.



Comment at: lib/Format/BreakableToken.h:100
+  /// \brief Returns the number of columns required to format the range at 
bytes
+  /// \p Offset to \p Offset \c + \p Length.
+  ///

Does this include the byte `Offset + Length`?



Comment at: lib/Format/BreakableToken.h:107
   ///
-  /// Note that previous breaks are not taken into account. \p TailOffset is
-  /// always specified from the start of the (original) line.
-  /// \p Length can be set to StringRef::npos, which means "to the end of 
line".
-  virtual unsigned
-  getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset,
-  StringRef::size_type Length) const = 0;
+  /// \p StartColumn is the column at which the text starts, needed to compute
+  ///tab stops correctly.

`text` is ambiguous here: does it refer to the content of the line or to the 
range defined by the offset and length?



Comment at: lib/Format/BreakableToken.h:114
+  /// \brief Returns the column at which content in line \p LineIndex starts,
+  /// assuming no reflow.
+  virtual unsigned getContentStartColumn(unsigned LineIndex,

What is `Break` used for?



Comment at: lib/Format/BreakableToken.h:120
   /// \p LineIndex, if previously broken at \p TailOffset. If possible, do not
   /// violate \p ColumnLimit.
   virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,

What is `ReflownColumn` used for?



Comment at: lib/Format/BreakableToken.h:142
   /// \brief Returns a whitespace range (offset, length) of the content at
-  /// \p LineIndex such that the content preceding this range needs to be
-  /// reformatted before any breaks are made to this line.
+  /// \p LineIndex such that the content of the current line is reflown to the
+  /// end of the previous one.

Does the current line refer to the line at LineIndex?



Comment at: lib/Format/ContinuationIndenter.cpp:1504
  : Style.PenaltyBreakComment;
-  unsigned RemainingSpace = ColumnLimit - Current.UnbreakableTailLength;
+  // Stores whether we introduce a break anywhere in the token.
   bool BreakInserted = Token->introducesBreakBeforeToken();

Does a reflow count as a break?



Comment at: unittests/Format/FormatTest.cpp:7735
 
+TEST_F(FormatTest, BreaksStringLiteralsTODO) {
+  EXPECT_EQ("C a = \"some more \"\n"

TODO?



Comment at: unittests/Format/FormatTest.cpp:10603
 format("\"一\t二 \t三 四 五\t六 \t七 八九十\tqq\"",
getLLVMStyleWithColumns(11)));
 

What 

[PATCH] D40310: Restructure how we break tokens.

2017-11-21 Thread Manuel Klimek via Phabricator via cfe-commits
klimek created this revision.

This fixes some bugs in the reflowing logic and splits out the concerns
of reflowing from BreakableToken.

Things to do after this patch:

- Refactor the breakProtrudingToken function possibly into a class, so we can 
split it up into methods that operate on the common state.
- Optimize whitespace compression when reflowing by using the next possible 
split point instead of the latest possible split point.


https://reviews.llvm.org/D40310

Files:
  lib/Format/BreakableToken.cpp
  lib/Format/BreakableToken.h
  lib/Format/ContinuationIndenter.cpp
  unittests/Format/FormatTest.cpp
  unittests/Format/FormatTestComments.cpp

Index: unittests/Format/FormatTestComments.cpp
===
--- unittests/Format/FormatTestComments.cpp
+++ unittests/Format/FormatTestComments.cpp
@@ -1096,11 +1096,12 @@
 }
 
 TEST_F(FormatTestComments, SplitsLongLinesInComments) {
+  // FIXME: Do we need to fix up the "  */" at the end?
+  // It doesn't look like any of our current logic triggers this.
   EXPECT_EQ("/* This is a long\n"
 " * comment that\n"
-" * doesn't\n"
-" * fit on one line.\n"
-" */",
+" * doesn't fit on\n"
+" * one line.  */",
 format("/* "
"This is a long "
"comment that "
@@ -2102,6 +2103,66 @@
   EXPECT_EQ("///", format(" ///  ", getLLVMStyleWithColumns(20)));
 }
 
+TEST_F(FormatTestComments, ReflowsCommentsPrecise) {
+  // FIXME: This assumes we do not continue compressing whitespace once we are
+  // in reflow mode. Consider compressing whitespace.
+
+  // Test that we stop reflowing precisely at the column limit.
+  // After reflowing, "// reflows into   foo" does not fit the column limit,
+  // so we compress the whitespace.
+  EXPECT_EQ("// some text that\n"
+"// reflows into foo\n",
+format("// some text that reflows\n"
+   "// into   foo\n",
+   getLLVMStyleWithColumns(20)));
+  // Given one more column, "// reflows into   foo" does fit the limit, so we
+  // do not compress the whitespace.
+  EXPECT_EQ("// some text that\n"
+"// reflows into   foo\n",
+format("// some text that reflows\n"
+   "// into   foo\n",
+   getLLVMStyleWithColumns(21)));
+}
+
+TEST_F(FormatTestComments, ReflowsCommentsWithExtraWhitespace) {
+  // Baseline.
+  EXPECT_EQ("// some text\n"
+"// that re flows\n",
+format("// some text that\n"
+   "// re flows\n",
+   getLLVMStyleWithColumns(16)));
+  EXPECT_EQ("// some text\n"
+"// that re flows\n",
+format("// some text that\n"
+   "// reflows\n",
+   getLLVMStyleWithColumns(16)));
+  EXPECT_EQ("/* some text\n"
+" * that re flows\n"
+" */\n",
+format("/* some text that\n"
+   "*  re   flows\n"
+   "*/\n",
+   getLLVMStyleWithColumns(16)));
+  // FIXME: We do not reflow if the indent of two subsequent lines differs;
+  // given that this is different behavior from block comments, do we want
+  // to keep this?
+  EXPECT_EQ("// some text\n"
+"// that\n"
+"// re flows\n",
+format("// some text that\n"
+   "// re   flows\n",
+   getLLVMStyleWithColumns(16)));
+  // Space within parts of a line that fit.
+  // FIXME: Use the earliest possible split while reflowing to compress the
+  // whitespace within the line.
+  EXPECT_EQ("// some text that\n"
+"// does re   flow\n"
+"// more  here\n",
+format("// some text that does\n"
+   "// re   flow  more  here\n",
+   getLLVMStyleWithColumns(21)));
+}
+
 TEST_F(FormatTestComments, IgnoresIf0Contents) {
   EXPECT_EQ("#if 0\n"
 "}{)(&*(^%%#%@! fsadj f;ldjs ,:;| <<<>>>][)(][\n"
@@ -2484,6 +2545,7 @@
   " long */\n"
   "  b);",
   format("a = f(a, /* long long */ b);", getLLVMStyleWithColumns(16)));
+
   EXPECT_EQ(
   "a = f(\n"
   "a,\n"
@@ -2888,16 +2950,15 @@
getLLVMStyleWithColumns(20)));
 }
 
-TEST_F(FormatTestComments, NoCrush_Bug34236) {
+TEST_F(FormatTestComments, NoCrash_Bug34236) {
   // This is a test case from a crasher reported in:
   // https://bugs.llvm.org/show_bug.cgi?id=34236
   // Temporarily disable formatting for readability.
   // clang-format off
   EXPECT_EQ(
 "/**/ /*\n"
 "  *   a\n"
-"  * b c\n"
-"