[PATCH] D33285: clang-format: do not reflow bullet lists
This revision was automatically updated to reflect the committed changes. Closed by commit rL303556: clang-format: do not reflow bullet lists (authored by Typz). Changed prior to commit: https://reviews.llvm.org/D33285?vs=99436=99764#toc Repository: rL LLVM https://reviews.llvm.org/D33285 Files: cfe/trunk/lib/Format/BreakableToken.cpp cfe/trunk/unittests/Format/FormatTestComments.cpp Index: cfe/trunk/unittests/Format/FormatTestComments.cpp === --- cfe/trunk/unittests/Format/FormatTestComments.cpp +++ cfe/trunk/unittests/Format/FormatTestComments.cpp @@ -1577,7 +1577,7 @@ " *\n" " * long */", getLLVMStyleWithColumns(20))); - + // Don't reflow lines having content that is a single character. EXPECT_EQ("// long long long\n" "// long\n" @@ -1602,7 +1602,7 @@ format("// long long long long\n" "// @param arg", getLLVMStyleWithColumns(20))); - + // Don't reflow lines starting with 'TODO'. EXPECT_EQ("// long long long\n" "// long\n" @@ -1671,6 +1671,69 @@ "// long", getLLVMStyleWithColumns(20))); + // Don't reflow separate bullets in list + EXPECT_EQ("// - long long long\n" +"// long\n" +"// - long", +format("// - long long long long\n" + "// - long", + getLLVMStyleWithColumns(20))); + EXPECT_EQ("// * long long long\n" +"// long\n" +"// * long", +format("// * long long long long\n" + "// * long", + getLLVMStyleWithColumns(20))); + EXPECT_EQ("// + long long long\n" +"// long\n" +"// + long", +format("// + long long long long\n" + "// + long", + getLLVMStyleWithColumns(20))); + EXPECT_EQ("// 1. long long long\n" +"// long\n" +"// 2. long", +format("// 1. long long long long\n" + "// 2. long", + getLLVMStyleWithColumns(20))); + EXPECT_EQ("// -# long long long\n" +"// long\n" +"// -# long", +format("// -# long long long long\n" + "// -# long", + getLLVMStyleWithColumns(20))); + + EXPECT_EQ("// - long long long\n" +"// long long long\n" +"// - long", +format("// - long long long long\n" + "// long long\n" + "// - long", + getLLVMStyleWithColumns(20))); + EXPECT_EQ("// - long long long\n" +"// long long long\n" +"// long\n" +"// - long", +format("// - long long long long\n" + "// long long long\n" + "// - long", + getLLVMStyleWithColumns(20))); + + // Large number (>2 digits) are not list items + EXPECT_EQ("// long long long\n" +"// long 1024. long.", +format("// long long long long\n" + "// 1024. long.", + getLLVMStyleWithColumns(20))); + + // Do not break before number, to avoid introducing a non-reflowable doxygen + // list item. + EXPECT_EQ("// long long\n" +"// long 10. long.", +format("// long long long 10.\n" + "// long.", + getLLVMStyleWithColumns(20))); + // Don't break or reflow after implicit string literals. verifyFormat("#include // l l l\n" " // l", Index: cfe/trunk/lib/Format/BreakableToken.cpp === --- cfe/trunk/lib/Format/BreakableToken.cpp +++ cfe/trunk/lib/Format/BreakableToken.cpp @@ -78,6 +78,14 @@ } StringRef::size_type SpaceOffset = Text.find_last_of(Blanks, MaxSplitBytes); + + // Do not split before a number followed by a dot: this would be interpreted + // as a numbered list, which would prevent re-flowing in subsequent passes. + static llvm::Regex kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\."); + if (SpaceOffset != StringRef::npos && + kNumberedListRegexp.match(Text.substr(SpaceOffset).ltrim(Blanks))) +SpaceOffset = Text.find_last_of(Blanks, SpaceOffset); + if (SpaceOffset == StringRef::npos || // Don't break at leading whitespace. Text.find_last_not_of(Blanks, SpaceOffset) == StringRef::npos) { @@ -299,15 +307,24 @@ static bool mayReflowContent(StringRef Content) { Content = Content.trim(Blanks); // Lines starting with '@' commonly have special meaning. - static const SmallVectorkSpecialMeaningPrefixes = { - "@", "TODO", "FIXME", "XXX"}; + // Lines starting with '-', '-#', '+' or '*' are bulleted/numbered lists. + static const SmallVector kSpecialMeaningPrefixes = { +
[PATCH] D33285: clang-format: do not reflow bullet lists
krasimir accepted this revision. krasimir added a comment. This revision is now accepted and ready to land. Looks great! https://reviews.llvm.org/D33285 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D33285: clang-format: do not reflow bullet lists
Typz updated this revision to Diff 99436. Typz marked an inline comment as done. Typz added a comment. Limit to 2 digits and not break before a matching numbered list sequence followed by a fullstop, to avoid interpreting numbers at the end of sentence as numbered bullets (and thus preventing reflow). https://reviews.llvm.org/D33285 Files: lib/Format/BreakableToken.cpp unittests/Format/FormatTestComments.cpp Index: unittests/Format/FormatTestComments.cpp === --- unittests/Format/FormatTestComments.cpp +++ unittests/Format/FormatTestComments.cpp @@ -1534,7 +1534,7 @@ " *\n" " * long */", getLLVMStyleWithColumns(20))); - + // Don't reflow lines having content that is a single character. EXPECT_EQ("// long long long\n" "// long\n" @@ -1559,7 +1559,7 @@ format("// long long long long\n" "// @param arg", getLLVMStyleWithColumns(20))); - + // Don't reflow lines starting with 'TODO'. EXPECT_EQ("// long long long\n" "// long\n" @@ -1628,6 +1628,69 @@ "// long", getLLVMStyleWithColumns(20))); + // Don't reflow separate bullets in list + EXPECT_EQ("// - long long long\n" +"// long\n" +"// - long", +format("// - long long long long\n" + "// - long", + getLLVMStyleWithColumns(20))); + EXPECT_EQ("// * long long long\n" +"// long\n" +"// * long", +format("// * long long long long\n" + "// * long", + getLLVMStyleWithColumns(20))); + EXPECT_EQ("// + long long long\n" +"// long\n" +"// + long", +format("// + long long long long\n" + "// + long", + getLLVMStyleWithColumns(20))); + EXPECT_EQ("// 1. long long long\n" +"// long\n" +"// 2. long", +format("// 1. long long long long\n" + "// 2. long", + getLLVMStyleWithColumns(20))); + EXPECT_EQ("// -# long long long\n" +"// long\n" +"// -# long", +format("// -# long long long long\n" + "// -# long", + getLLVMStyleWithColumns(20))); + + EXPECT_EQ("// - long long long\n" +"// long long long\n" +"// - long", +format("// - long long long long\n" + "// long long\n" + "// - long", + getLLVMStyleWithColumns(20))); + EXPECT_EQ("// - long long long\n" +"// long long long\n" +"// long\n" +"// - long", +format("// - long long long long\n" + "// long long long\n" + "// - long", + getLLVMStyleWithColumns(20))); + + // Large number (>2 digits) are not list items + EXPECT_EQ("// long long long\n" +"// long 1024. long.", +format("// long long long long\n" + "// 1024. long.", + getLLVMStyleWithColumns(20))); + + // Do not break before number, to avoid introducing a non-reflowable doxygen + // list item. + EXPECT_EQ("// long long\n" +"// long 10. long.", +format("// long long long 10.\n" + "// long.", + getLLVMStyleWithColumns(20))); + // Don't break or reflow after implicit string literals. verifyFormat("#include // l l l\n" " // l", Index: lib/Format/BreakableToken.cpp === --- lib/Format/BreakableToken.cpp +++ lib/Format/BreakableToken.cpp @@ -77,6 +77,14 @@ } StringRef::size_type SpaceOffset = Text.find_last_of(Blanks, MaxSplitBytes); + + // Do not split before a number followed by a dot: this would be interpreted + // as a numbered list, which would prevent re-flowing in subsequent passes. + static llvm::Regex kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\."); + if (SpaceOffset != StringRef::npos && + kNumberedListRegexp.match(Text.substr(SpaceOffset).ltrim(Blanks))) +SpaceOffset = Text.find_last_of(Blanks, SpaceOffset); + if (SpaceOffset == StringRef::npos || // Don't break at leading whitespace. Text.find_last_not_of(Blanks, SpaceOffset) == StringRef::npos) { @@ -298,15 +306,24 @@ static bool mayReflowContent(StringRef Content) { Content = Content.trim(Blanks); // Lines starting with '@' commonly have special meaning. - static const SmallVectorkSpecialMeaningPrefixes = { - "@", "TODO", "FIXME", "XXX"}; + // Lines starting with '-', '-#', '+' or '*' are bulleted/numbered lists. + static const SmallVector kSpecialMeaningPrefixes = { + "@", "TODO", "FIXME", "XXX", "-#
[PATCH] D33285: clang-format: do not reflow bullet lists
Typz marked 3 inline comments as done. Typz added inline comments. Comment at: lib/Format/BreakableToken.cpp:313 + // Numbered lists may also start with a number followed by '.' + static const char *kNumberedListPattern = "^[0-9]+\\. "; + hasSpecialMeaningPrefix = hasSpecialMeaningPrefix || krasimir wrote: > Typz wrote: > > krasimir wrote: > > > A problem with this is that sometimes you have a sentence ending with a > > > number, like this one, in **2016.** If this sentence also happens to just > > > go over the column width, its last part would be reflown and during > > > subsequent passes it will be seen as a numbered list, which is super > > > unfortunate. I'd like us to come up with a more refined strategy of > > > handling this case. Maybe we should look at how others are doing it? > > Looking at doxygen, it seems there is no extra protection: just a number > > followed by a dot... > > So it means: > > * We should never break before a such a sequence, to avoid the issue. > > * We may also limit the expression to limit the size of the number: I am > > not sure there are cases where bullet lists with hundreds of items are > > used, esp. with explicit values (uses the auto-numbering -# would be much > > simpler in that case). Maybe a limit of 2 digits? The same limit would be > > applied to prevent breaking before a number followed by a dot. > > > > What do you think? > I like the combination of the two options: let's limit to 2 digits and not > break before a matching numbered list sequence followed by a fullstop. That > would require also a little change to `BreakableToken::getCommentSplit`. Done, but I could find a use-case where this would break subsequent passes, apart from re-running clang-format ; but in this case it is fine, since the comments are already formatted to fit, and will thus not be reflowed... https://reviews.llvm.org/D33285 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D33285: clang-format: do not reflow bullet lists
krasimir added inline comments. Comment at: lib/Format/BreakableToken.cpp:313 + // Numbered lists may also start with a number followed by '.' + static const char *kNumberedListPattern = "^[0-9]+\\. "; + hasSpecialMeaningPrefix = hasSpecialMeaningPrefix || Typz wrote: > krasimir wrote: > > A problem with this is that sometimes you have a sentence ending with a > > number, like this one, in **2016.** If this sentence also happens to just > > go over the column width, its last part would be reflown and during > > subsequent passes it will be seen as a numbered list, which is super > > unfortunate. I'd like us to come up with a more refined strategy of > > handling this case. Maybe we should look at how others are doing it? > Looking at doxygen, it seems there is no extra protection: just a number > followed by a dot... > So it means: > * We should never break before a such a sequence, to avoid the issue. > * We may also limit the expression to limit the size of the number: I am > not sure there are cases where bullet lists with hundreds of items are used, > esp. with explicit values (uses the auto-numbering -# would be much simpler > in that case). Maybe a limit of 2 digits? The same limit would be applied to > prevent breaking before a number followed by a dot. > > What do you think? I like the combination of the two options: let's limit to 2 digits and not break before a matching numbered list sequence followed by a fullstop. That would require also a little change to `BreakableToken::getCommentSplit`. https://reviews.llvm.org/D33285 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D33285: clang-format: do not reflow bullet lists
Typz updated this revision to Diff 99427. Typz added a comment. - Use static regex to avoid recreating it each time - Add more tests https://reviews.llvm.org/D33285 Files: lib/Format/BreakableToken.cpp unittests/Format/FormatTestComments.cpp Index: unittests/Format/FormatTestComments.cpp === --- unittests/Format/FormatTestComments.cpp +++ unittests/Format/FormatTestComments.cpp @@ -1534,7 +1534,7 @@ " *\n" " * long */", getLLVMStyleWithColumns(20))); - + // Don't reflow lines having content that is a single character. EXPECT_EQ("// long long long\n" "// long\n" @@ -1559,7 +1559,7 @@ format("// long long long long\n" "// @param arg", getLLVMStyleWithColumns(20))); - + // Don't reflow lines starting with 'TODO'. EXPECT_EQ("// long long long\n" "// long\n" @@ -1628,6 +1628,54 @@ "// long", getLLVMStyleWithColumns(20))); + // Don't reflow separate bullets in list + EXPECT_EQ("// - long long long\n" +"// long\n" +"// - long", +format("// - long long long long\n" + "// - long", + getLLVMStyleWithColumns(20))); + EXPECT_EQ("// * long long long\n" +"// long\n" +"// * long", +format("// * long long long long\n" + "// * long", + getLLVMStyleWithColumns(20))); + EXPECT_EQ("// + long long long\n" +"// long\n" +"// + long", +format("// + long long long long\n" + "// + long", + getLLVMStyleWithColumns(20))); + EXPECT_EQ("// 1. long long long\n" +"// long\n" +"// 2. long", +format("// 1. long long long long\n" + "// 2. long", + getLLVMStyleWithColumns(20))); + EXPECT_EQ("// -# long long long\n" +"// long\n" +"// -# long", +format("// -# long long long long\n" + "// -# long", + getLLVMStyleWithColumns(20))); + + EXPECT_EQ("// - long long long\n" +"// long long long\n" +"// - long", +format("// - long long long long\n" + "// long long\n" + "// - long", + getLLVMStyleWithColumns(20))); + EXPECT_EQ("// - long long long\n" +"// long long long\n" +"// long\n" +"// - long", +format("// - long long long long\n" + "// long long long\n" + "// - long", + getLLVMStyleWithColumns(20))); + // Don't break or reflow after implicit string literals. verifyFormat("#include // l l l\n" " // l", Index: lib/Format/BreakableToken.cpp === --- lib/Format/BreakableToken.cpp +++ lib/Format/BreakableToken.cpp @@ -298,15 +298,22 @@ static bool mayReflowContent(StringRef Content) { Content = Content.trim(Blanks); // Lines starting with '@' commonly have special meaning. - static const SmallVectorkSpecialMeaningPrefixes = { - "@", "TODO", "FIXME", "XXX"}; + // Lines starting with '-', '-#', '+' or '*' are bulleted/numbered lists. + static const SmallVector kSpecialMeaningPrefixes = { + "@", "TODO", "FIXME", "XXX", "-# ", "- ", "+ ", "* " }; bool hasSpecialMeaningPrefix = false; for (StringRef Prefix : kSpecialMeaningPrefixes) { if (Content.startswith(Prefix)) { hasSpecialMeaningPrefix = true; break; } } + + // Numbered lists may also start with a number followed by '.' + static llvm::Regex kNumberedListRegexp = llvm::Regex("^[1-9][0-9]*\\. "); + hasSpecialMeaningPrefix = hasSpecialMeaningPrefix || +kNumberedListRegexp.match(Content); + // Simple heuristic for what to reflow: content should contain at least two // characters and either the first or second character must be // non-punctuation. ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D33285: clang-format: do not reflow bullet lists
Typz marked an inline comment as done. Typz added inline comments. Comment at: lib/Format/BreakableToken.cpp:313 + // Numbered lists may also start with a number followed by '.' + static const char *kNumberedListPattern = "^[0-9]+\\. "; + hasSpecialMeaningPrefix = hasSpecialMeaningPrefix || krasimir wrote: > A problem with this is that sometimes you have a sentence ending with a > number, like this one, in **2016.** If this sentence also happens to just go > over the column width, its last part would be reflown and during subsequent > passes it will be seen as a numbered list, which is super unfortunate. I'd > like us to come up with a more refined strategy of handling this case. Maybe > we should look at how others are doing it? Looking at doxygen, it seems there is no extra protection: just a number followed by a dot... So it means: * We should never break before a such a sequence, to avoid the issue. * We may also limit the expression to limit the size of the number: I am not sure there are cases where bullet lists with hundreds of items are used, esp. with explicit values (uses the auto-numbering -# would be much simpler in that case). Maybe a limit of 2 digits? The same limit would be applied to prevent breaking before a number followed by a dot. What do you think? Comment at: lib/Format/BreakableToken.cpp:315 + hasSpecialMeaningPrefix = hasSpecialMeaningPrefix || +llvm::Regex(kNumberedListPattern).match(Content); + krasimir wrote: > This builds an `llvm::Regex` on each invocation, which is wasteful. I did this to keep the function re-entrant; but since the code is not multi-thread I can use a static variable instead. https://reviews.llvm.org/D33285 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D33285: clang-format: do not reflow bullet lists
krasimir added inline comments. Comment at: lib/Format/BreakableToken.cpp:313 + // Numbered lists may also start with a number followed by '.' + static const char *kNumberedListPattern = "^[0-9]+\\. "; + hasSpecialMeaningPrefix = hasSpecialMeaningPrefix || A problem with this is that sometimes you have a sentence ending with a number, like this one, in **2016.** If this sentence also happens to just go over the column width, its last part would be reflown and during subsequent passes it will be seen as a numbered list, which is super unfortunate. I'd like us to come up with a more refined strategy of handling this case. Maybe we should look at how others are doing it? Comment at: lib/Format/BreakableToken.cpp:315 + hasSpecialMeaningPrefix = hasSpecialMeaningPrefix || +llvm::Regex(kNumberedListPattern).match(Content); + This builds an `llvm::Regex` on each invocation, which is wasteful. Comment at: unittests/Format/FormatTestComments.cpp:1663 + getLLVMStyleWithColumns(20))); + // Don't break or reflow after implicit string literals. I'd also like to see tests where we correctly reflow lists with multiline entries. https://reviews.llvm.org/D33285 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D33285: clang-format: do not reflow bullet lists
Typz created this revision. Herald added a subscriber: klimek. This patch prevents reflowing bullet lists in block comments. It handles all lists supported by doxygen and markdown, e.g. bullet lists starting with '-', '*', '+', as well as numbered lists starting with -# or a number followed by a dot. https://reviews.llvm.org/D33285 Files: lib/Format/BreakableToken.cpp unittests/Format/FormatTestComments.cpp Index: unittests/Format/FormatTestComments.cpp === --- unittests/Format/FormatTestComments.cpp +++ unittests/Format/FormatTestComments.cpp @@ -1629,6 +1629,38 @@ "// long", getLLVMStyleWithColumns(20))); + // Don't reflow separate bullets in list + EXPECT_EQ("// - long long long\n" +"// long\n" +"// - long", +format("// - long long long long\n" + "// - long", + getLLVMStyleWithColumns(20))); + EXPECT_EQ("// * long long long\n" +"// long\n" +"// * long", +format("// * long long long long\n" + "// * long", + getLLVMStyleWithColumns(20))); + EXPECT_EQ("// + long long long\n" +"// long\n" +"// + long", +format("// + long long long long\n" + "// + long", + getLLVMStyleWithColumns(20))); + EXPECT_EQ("// 1. long long long\n" +"// long\n" +"// 2. long", +format("// 1. long long long long\n" + "// 2. long", + getLLVMStyleWithColumns(20))); + EXPECT_EQ("// -# long long long\n" +"// long\n" +"// -# long", +format("// -# long long long long\n" + "// -# long", + getLLVMStyleWithColumns(20))); + // Don't break or reflow after implicit string literals. verifyFormat("#include // l l l\n" " // l", Index: lib/Format/BreakableToken.cpp === --- lib/Format/BreakableToken.cpp +++ lib/Format/BreakableToken.cpp @@ -298,15 +298,22 @@ static bool mayReflowContent(StringRef Content) { Content = Content.trim(Blanks); // Lines starting with '@' commonly have special meaning. - static const SmallVectorkSpecialMeaningPrefixes = { - "@", "TODO", "FIXME", "XXX"}; + // Lines starting with '-', '-#', '+' or '*' are bulleted/numbered lists. + static const SmallVector kSpecialMeaningPrefixes = { + "@", "TODO", "FIXME", "XXX", "-# ", "- ", "+ ", "* " }; bool hasSpecialMeaningPrefix = false; for (StringRef Prefix : kSpecialMeaningPrefixes) { if (Content.startswith(Prefix)) { hasSpecialMeaningPrefix = true; break; } } + + // Numbered lists may also start with a number followed by '.' + static const char *kNumberedListPattern = "^[0-9]+\\. "; + hasSpecialMeaningPrefix = hasSpecialMeaningPrefix || +llvm::Regex(kNumberedListPattern).match(Content); + // Simple heuristic for what to reflow: content should contain at least two // characters and either the first or second character must be // non-punctuation. Index: unittests/Format/FormatTestComments.cpp === --- unittests/Format/FormatTestComments.cpp +++ unittests/Format/FormatTestComments.cpp @@ -1629,6 +1629,38 @@ "// long", getLLVMStyleWithColumns(20))); + // Don't reflow separate bullets in list + EXPECT_EQ("// - long long long\n" +"// long\n" +"// - long", +format("// - long long long long\n" + "// - long", + getLLVMStyleWithColumns(20))); + EXPECT_EQ("// * long long long\n" +"// long\n" +"// * long", +format("// * long long long long\n" + "// * long", + getLLVMStyleWithColumns(20))); + EXPECT_EQ("// + long long long\n" +"// long\n" +"// + long", +format("// + long long long long\n" + "// + long", + getLLVMStyleWithColumns(20))); + EXPECT_EQ("// 1. long long long\n" +"// long\n" +"// 2. long", +format("// 1. long long long long\n" + "// 2. long", + getLLVMStyleWithColumns(20))); + EXPECT_EQ("// -# long long long\n" +"// long\n" +"// -# long", +format("// -# long long long long\n" + "// -# long", + getLLVMStyleWithColumns(20))); + // Don't break or reflow after implicit string literals. verifyFormat("#include // l l l\n" " // l", Index: lib/Format/BreakableToken.cpp