sstwcw updated this revision to Diff 553729.
sstwcw added a comment.

Stop using `SmallString` in the tests.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D154093/new/

https://reviews.llvm.org/D154093

Files:
  clang/docs/ClangFormatStyleOptions.rst
  clang/include/clang/Format/Format.h
  clang/lib/Format/BreakableToken.cpp
  clang/lib/Format/BreakableToken.h
  clang/lib/Format/ContinuationIndenter.cpp
  clang/lib/Format/FormatToken.h
  clang/lib/Format/TokenAnnotator.cpp
  clang/lib/Format/WhitespaceManager.cpp
  clang/unittests/Format/FormatTestCSharp.cpp
  clang/unittests/Format/FormatTestJS.cpp
  clang/unittests/Format/FormatTestJava.cpp
  clang/unittests/Format/FormatTestVerilog.cpp
  clang/unittests/Format/TokenAnnotatorTest.cpp

Index: clang/unittests/Format/TokenAnnotatorTest.cpp
===================================================================
--- clang/unittests/Format/TokenAnnotatorTest.cpp
+++ clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -1863,6 +1863,30 @@
   EXPECT_TOKEN(Tokens[2], tok::l_paren, TT_Unknown);
   EXPECT_TOKEN(Tokens[5], tok::comma, TT_Unknown);
   EXPECT_TOKEN(Tokens[8], tok::r_paren, TT_Unknown);
+
+  // String literals in concatenation.
+  Tokens = Annotate("x = {\"\"};");
+  ASSERT_EQ(Tokens.size(), 7u) << Tokens;
+  EXPECT_TOKEN(Tokens[3], tok::string_literal, TT_StringInConcatenation);
+  Tokens = Annotate("x = {\"\", \"\"};");
+  ASSERT_EQ(Tokens.size(), 9u) << Tokens;
+  EXPECT_TOKEN(Tokens[3], tok::string_literal, TT_StringInConcatenation);
+  EXPECT_TOKEN(Tokens[5], tok::string_literal, TT_StringInConcatenation);
+  Tokens = Annotate("x = '{{\"\"}};");
+  ASSERT_EQ(Tokens.size(), 10u) << Tokens;
+  EXPECT_TOKEN(Tokens[5], tok::string_literal, TT_StringInConcatenation);
+  // Cases where the string should not be annotated that type.  Fix the
+  // `TT_Unknown` if needed in the future.
+  Tokens = Annotate("x = {\"\" == \"\"};");
+  ASSERT_EQ(Tokens.size(), 9u) << Tokens;
+  EXPECT_TOKEN(Tokens[3], tok::string_literal, TT_Unknown);
+  EXPECT_TOKEN(Tokens[5], tok::string_literal, TT_Unknown);
+  Tokens = Annotate("x = {(\"\")};");
+  ASSERT_EQ(Tokens.size(), 9u) << Tokens;
+  EXPECT_TOKEN(Tokens[4], tok::string_literal, TT_Unknown);
+  Tokens = Annotate("x = '{\"\"};");
+  ASSERT_EQ(Tokens.size(), 8u) << Tokens;
+  EXPECT_TOKEN(Tokens[4], tok::string_literal, TT_Unknown);
 }
 
 TEST_F(TokenAnnotatorTest, UnderstandConstructors) {
Index: clang/unittests/Format/FormatTestVerilog.cpp
===================================================================
--- clang/unittests/Format/FormatTestVerilog.cpp
+++ clang/unittests/Format/FormatTestVerilog.cpp
@@ -1157,6 +1157,66 @@
   verifyFormat("{<<byte{j}} = x;");
 }
 
+TEST_F(FormatTestVerilog, StringLiteral) {
+  // Long strings should be broken.
+  verifyFormat(R"(x({"xxxxxxxxxxxxxxxx ",
+   "xxxx"});)",
+               R"(x({"xxxxxxxxxxxxxxxx xxxx"});)",
+               getStyleWithColumns(getDefaultStyle(), 23));
+  verifyFormat(R"(x({"xxxxxxxxxxxxxxxx",
+   " xxxx"});)",
+               R"(x({"xxxxxxxxxxxxxxxx xxxx"});)",
+               getStyleWithColumns(getDefaultStyle(), 22));
+  // Braces should be added when they don't already exist.
+  verifyFormat(R"(x({"xxxxxxxxxxxxxxxx ",
+   "xxxx"});)",
+               R"(x("xxxxxxxxxxxxxxxx xxxx");)",
+               getStyleWithColumns(getDefaultStyle(), 23));
+  verifyFormat(R"(x({"xxxxxxxxxxxxxxxx",
+   " xxxx"});)",
+               R"(x("xxxxxxxxxxxxxxxx xxxx");)",
+               getStyleWithColumns(getDefaultStyle(), 22));
+  verifyFormat(R"(x({{"xxxxxxxxxxxxxxxx ",
+    "xxxx"} == x});)",
+               R"(x({"xxxxxxxxxxxxxxxx xxxx" == x});)",
+               getStyleWithColumns(getDefaultStyle(), 24));
+  verifyFormat(R"(string x = {"xxxxxxxxxxxxxxxx ",
+            "xxxxxxxx"};)",
+               R"(string x = "xxxxxxxxxxxxxxxx xxxxxxxx";)",
+               getStyleWithColumns(getDefaultStyle(), 32));
+  // Space around braces should be correct.
+  auto Style = getStyleWithColumns(getDefaultStyle(), 24);
+  Style.Cpp11BracedListStyle = false;
+  verifyFormat(R"(x({ "xxxxxxxxxxxxxxxx ",
+    "xxxx" });)",
+               R"(x("xxxxxxxxxxxxxxxx xxxx");)", Style);
+  // Braces should not be added twice.
+  verifyFormat(R"(x({"xxxxxxxx",
+   "xxxxxxxx",
+   "xxxxxx"});)",
+               R"(x("xxxxxxxxxxxxxxxxxxxxxx");)",
+               getStyleWithColumns(getDefaultStyle(), 14));
+  verifyFormat(R"(x({"xxxxxxxxxxxxxxxx ",
+   "xxxxxxxxxxxxxxxx ",
+   "xxxx"});)",
+               R"(x("xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx xxxx");)",
+               getStyleWithColumns(getDefaultStyle(), 23));
+  verifyFormat(R"(x({"xxxxxxxxxxxxxxxx ",
+   "xxxxxxxxxxxxxxxx ",
+   "xxxx"});)",
+               R"(x({"xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx ", "xxxx"});)",
+               getStyleWithColumns(getDefaultStyle(), 23));
+  // These kinds of strings don't exist in Verilog.
+  verifyNoCrash(R"(x(@"xxxxxxxxxxxxxxxx xxxx");)",
+                getStyleWithColumns(getDefaultStyle(), 23));
+  verifyNoCrash(R"(x(u"xxxxxxxxxxxxxxxx xxxx");)",
+                getStyleWithColumns(getDefaultStyle(), 23));
+  verifyNoCrash(R"(x(u8"xxxxxxxxxxxxxxxx xxxx");)",
+                getStyleWithColumns(getDefaultStyle(), 23));
+  verifyNoCrash(R"(x(_T("xxxxxxxxxxxxxxxx xxxx"));)",
+                getStyleWithColumns(getDefaultStyle(), 23));
+}
+
 TEST_F(FormatTestVerilog, StructLiteral) {
   verifyFormat("c = '{0, 0.0};");
   verifyFormat("c = '{'{1, 1.0}, '{2, 2.0}};");
Index: clang/unittests/Format/FormatTestJava.cpp
===================================================================
--- clang/unittests/Format/FormatTestJava.cpp
+++ clang/unittests/Format/FormatTestJava.cpp
@@ -6,34 +6,19 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "FormatTestUtils.h"
-#include "clang/Format/Format.h"
-#include "llvm/Support/Debug.h"
-#include "gtest/gtest.h"
+#include "FormatTestBase.h"
 
 #define DEBUG_TYPE "format-test"
 
 namespace clang {
 namespace format {
+namespace test {
+namespace {
 
-class FormatTestJava : public ::testing::Test {
+class FormatTestJava : public test::FormatTestBase {
 protected:
-  static std::string format(llvm::StringRef Code, unsigned Offset,
-                            unsigned Length, const FormatStyle &Style) {
-    LLVM_DEBUG(llvm::errs() << "---\n");
-    LLVM_DEBUG(llvm::errs() << Code << "\n\n");
-    std::vector<tooling::Range> Ranges(1, tooling::Range(Offset, Length));
-    tooling::Replacements Replaces = reformat(Style, Code, Ranges);
-    auto Result = applyAllReplacements(Code, Replaces);
-    EXPECT_TRUE(static_cast<bool>(Result));
-    LLVM_DEBUG(llvm::errs() << "\n" << *Result << "\n\n");
-    return *Result;
-  }
-
-  static std::string
-  format(llvm::StringRef Code,
-         const FormatStyle &Style = getGoogleStyle(FormatStyle::LK_Java)) {
-    return format(Code, 0, Code.size(), Style);
+  FormatStyle getDefaultStyle() const override {
+    return getGoogleStyle(FormatStyle::LK_Java);
   }
 
   static FormatStyle getStyleWithColumns(unsigned ColumnLimit) {
@@ -41,13 +26,6 @@
     Style.ColumnLimit = ColumnLimit;
     return Style;
   }
-
-  static void verifyFormat(
-      llvm::StringRef Code,
-      const FormatStyle &Style = getGoogleStyle(FormatStyle::LK_Java)) {
-    EXPECT_EQ(Code.str(), format(Code, Style)) << "Expected code is not stable";
-    EXPECT_EQ(Code.str(), format(test::messUp(Code), Style));
-  }
 };
 
 TEST_F(FormatTestJava, NoAlternativeOperatorNames) {
@@ -565,9 +543,9 @@
 }
 
 TEST_F(FormatTestJava, BreaksStringLiterals) {
-  // FIXME: String literal breaking is currently disabled for Java and JS, as it
-  // requires strings to be merged using "+" which we don't support.
-  verifyFormat("\"some text other\";", getStyleWithColumns(14));
+  verifyFormat("x = \"some text \"\n"
+               "    + \"other\";",
+               "x = \"some text other\";", getStyleWithColumns(18));
 }
 
 TEST_F(FormatTestJava, AlignsBlockComments) {
@@ -625,5 +603,7 @@
                Style);
 }
 
+} // namespace
+} // namespace test
 } // namespace format
-} // end namespace clang
+} // namespace clang
Index: clang/unittests/Format/FormatTestJS.cpp
===================================================================
--- clang/unittests/Format/FormatTestJS.cpp
+++ clang/unittests/Format/FormatTestJS.cpp
@@ -1505,6 +1505,97 @@
 TEST_F(FormatTestJS, StringLiteralConcatenation) {
   verifyFormat("var literal = 'hello ' +\n"
                "    'world';");
+
+  // Long strings should be broken.
+  verifyFormat("var literal =\n"
+               "    'xxxxxxxx ' +\n"
+               "    'xxxxxxxx';",
+               "var literal = 'xxxxxxxx xxxxxxxx';",
+               getGoogleJSStyleWithColumns(17));
+  verifyFormat("var literal =\n"
+               "    'xxxxxxxx ' +\n"
+               "    'xxxxxxxx';",
+               "var literal = 'xxxxxxxx xxxxxxxx';",
+               getGoogleJSStyleWithColumns(18));
+  verifyFormat("var literal =\n"
+               "    'xxxxxxxx' +\n"
+               "    ' xxxxxxxx';",
+               "var literal = 'xxxxxxxx xxxxxxxx';",
+               getGoogleJSStyleWithColumns(16));
+  // The quotes should be correct.
+  for (char OriginalQuote : {'\'', '"'}) {
+    auto VerifyQuotes = [=](FormatStyle::JavaScriptQuoteStyle StyleQuote,
+                            char TargetQuote) {
+      auto Style = getGoogleJSStyleWithColumns(17);
+      Style.JavaScriptQuotes = StyleQuote;
+      std::string Target{"var literal =\n"
+                         "    \"xxxxxxxx \" +\n"
+                         "    \"xxxxxxxx\";"};
+      std::string Original{"var literal = \"xxxxxxxx xxxxxxxx\";"};
+      std::replace(Target.begin(), Target.end(), '"', TargetQuote);
+      std::replace(Original.begin(), Original.end(), '"', OriginalQuote);
+      verifyFormat(Target, Original, Style);
+    };
+    VerifyQuotes(FormatStyle::JSQS_Leave, OriginalQuote);
+    VerifyQuotes(FormatStyle::JSQS_Single, '\'');
+    VerifyQuotes(FormatStyle::JSQS_Double, '"');
+  }
+  // Parentheses should be added when necessary.
+  verifyFormat("var literal =\n"
+               "    ('xxxxxxxx ' +\n"
+               "     'xxxxxx')[0];",
+               "var literal = 'xxxxxxxx xxxxxx'[0];",
+               getGoogleJSStyleWithColumns(18));
+  auto Style = getGoogleJSStyleWithColumns(20);
+  Style.SpacesInParens = FormatStyle::SIPO_Custom;
+  Style.SpacesInParensOptions.Other = true;
+  verifyFormat("var literal =\n"
+               "    ( 'xxxxxxxx ' +\n"
+               "      'xxxxxx' )[0];",
+               "var literal = 'xxxxxxxx xxxxxx'[0];", Style);
+  // FIXME: When the part before the string literal is shorter than the
+  // continuation indentation, and the option AlignAfterOpenBracket is set to
+  // AlwaysBreak which is the default for the Google style, the unbroken string
+  // does not get to a new line while the broken string does due to the added
+  // parentheses. The formatter does not do it in one pass.
+  EXPECT_EQ(
+      "x = ('xxxxxxxx ' +\n"
+      "     'xxxxxx')[0];",
+      format("x = 'xxxxxxxx xxxxxx'[0];", getGoogleJSStyleWithColumns(18)));
+  verifyFormat("x =\n"
+               "    ('xxxxxxxx ' +\n"
+               "     'xxxxxx')[0];",
+               getGoogleJSStyleWithColumns(18));
+  // Breaking of template strings ans regular expressions is not implemented.
+  verifyFormat("var literal =\n"
+               "    `xxxxxxxx xxxxxxxx`;",
+               getGoogleJSStyleWithColumns(18));
+  verifyFormat("var literal =\n"
+               "    /xxxxxxxx xxxxxxxx/;",
+               getGoogleJSStyleWithColumns(18));
+  // There can be breaks in the code inside a template string.
+  verifyFormat("var literal = `xxxxxx ${\n"
+               "    xxxxxxxxxx} xxxxxx`;",
+               "var literal = `xxxxxx ${xxxxxxxxxx} xxxxxx`;",
+               getGoogleJSStyleWithColumns(14));
+  verifyFormat("var literal = `xxxxxx ${\n"
+               "    xxxxxxxxxx} xxxxxx`;",
+               "var literal = `xxxxxx ${xxxxxxxxxx} xxxxxx`;",
+               getGoogleJSStyleWithColumns(15));
+  // Identifiers inside the code inside a template string should not be broken
+  // even if the column limit is exceeded.  This following behavior is not
+  // optimal.  The part after the closing brace which exceeds the column limit
+  // can be put on a new line.  Change this test when it is implemented.
+  verifyFormat("var literal = `xxxxxx ${\n"
+               "    xxxxxxxxxxxxxxxxxxxxxx} xxxxxx`;",
+               "var literal = `xxxxxx ${xxxxxxxxxxxxxxxxxxxxxx} xxxxxx`;",
+               getGoogleJSStyleWithColumns(14));
+  verifyFormat("var literal = `xxxxxx ${\n"
+               "    xxxxxxxxxxxxxxxxxxxxxx +\n"
+               "    xxxxxxxxxxxxxxxxxxxxxx} xxxxxx`;",
+               "var literal = `xxxxxx ${xxxxxxxxxxxxxxxxxxxxxx + "
+               "xxxxxxxxxxxxxxxxxxxxxx} xxxxxx`;",
+               getGoogleJSStyleWithColumns(14));
 }
 
 TEST_F(FormatTestJS, RegexLiteralClassification) {
Index: clang/unittests/Format/FormatTestCSharp.cpp
===================================================================
--- clang/unittests/Format/FormatTestCSharp.cpp
+++ clang/unittests/Format/FormatTestCSharp.cpp
@@ -6,18 +6,21 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "FormatTestUtils.h"
-#include "clang/Format/Format.h"
-#include "llvm/Support/Debug.h"
-#include "gtest/gtest.h"
+#include "FormatTestBase.h"
 
 #define DEBUG_TYPE "format-test"
 
 namespace clang {
 namespace format {
+namespace test {
+namespace {
 
-class FormatTestCSharp : public ::testing::Test {
+class FormatTestCSharp : public test::FormatTestBase {
 protected:
+  FormatStyle getDefaultStyle() const override {
+    return getMicrosoftStyle(FormatStyle::LK_CSharp);
+  }
+
   static std::string format(llvm::StringRef Code, unsigned Offset,
                             unsigned Length, const FormatStyle &Style) {
     LLVM_DEBUG(llvm::errs() << "---\n");
@@ -41,13 +44,6 @@
     Style.ColumnLimit = ColumnLimit;
     return Style;
   }
-
-  static void verifyFormat(
-      llvm::StringRef Code,
-      const FormatStyle &Style = getMicrosoftStyle(FormatStyle::LK_CSharp)) {
-    EXPECT_EQ(Code.str(), format(Code, Style)) << "Expected code is not stable";
-    EXPECT_EQ(Code.str(), format(test::messUp(Code), Style));
-  }
 };
 
 TEST_F(FormatTestCSharp, CSharpClass) {
@@ -129,9 +125,65 @@
 }
 
 TEST_F(FormatTestCSharp, NoStringLiteralBreaks) {
+  // Breaking of interpolated strings is not implemented.
+  auto Style = getDefaultStyle();
+  Style.ColumnLimit = 40;
+  Style.BreakStringLiterals = true;
+  verifyFormat("foo("
+               "$\"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+               "aaaaaaa\");",
+               Style);
+}
+
+TEST_F(FormatTestCSharp, StringLiteralBreaks) {
+  // The line is 75 characters long.  The default limit for the Microsoft style
+  // is 120.
+  auto Style = getDefaultStyle();
+  Style.BreakStringLiterals = true;
   verifyFormat("foo("
                "\"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
-               "aaaaaa\");");
+               "aaaaaa\");",
+               Style);
+  // When the column limit is smaller, the string should get broken.
+  Style.ColumnLimit = 40;
+  verifyFormat(R"(foo("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +
+    "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +
+    "aaa");)",
+               "foo("
+               "\"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+               "aaaaaa\");",
+               Style);
+  // The new quotes should be the same as the original.
+  verifyFormat(R"(foo(@"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +
+    @"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +
+    @"aaaaa");)",
+               "foo("
+               "@\"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+               "aaaaaaa\");",
+               Style);
+  // The operators can be on either line.
+  Style.BreakBeforeBinaryOperators = FormatStyle::BOS_NonAssignment;
+  verifyFormat(R"(foo("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+    + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+    + "a");)",
+               "foo("
+               "\"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+               "aaaaaa\");",
+               Style);
+  Style.AlignOperands = FormatStyle::OAS_AlignAfterOperator;
+  verifyFormat(R"(foo("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+    + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+    + "a");)",
+               "foo("
+               "\"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+               "aaaaaa\");",
+               Style);
+  verifyFormat(R"(x = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+  + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";)",
+               "x = "
+               "\"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+               "aaaaaa\";",
+               Style);
 }
 
 TEST_F(FormatTestCSharp, CSharpVerbatiumStringLiterals) {
@@ -166,7 +218,7 @@
 }
 
 TEST_F(FormatTestCSharp, CSharpFatArrows) {
-  verifyFormat("Task serverTask = Task.Run(async() => {");
+  verifyIncompleteFormat("Task serverTask = Task.Run(async() => {");
   verifyFormat("public override string ToString() => \"{Name}\\{Age}\";");
 }
 
@@ -282,7 +334,7 @@
                "listening on provided host\")]\n"
                "public string Host { set; get; }");
 
-  verifyFormat(
+  verifyIncompleteFormat(
       "[DllImport(\"Hello\", EntryPoint = \"hello_world\")]\n"
       "// The const char* returned by hello_world must not be deleted.\n"
       "private static extern IntPtr HelloFromCpp();)");
@@ -1138,7 +1190,8 @@
                Style);
   verifyFormat(R"(Apply(x => x.Name, x => () => x.ID);)", Style);
   verifyFormat(R"(bool[] xs = { true, true };)", Style);
-  verifyFormat(R"(taskContext.Factory.Run(async () => doThing(args);)", Style);
+  verifyIncompleteFormat(
+      R"(taskContext.Factory.Run(async () => doThing(args);)", Style);
   verifyFormat(R"(catch (TestException) when (innerFinallyExecuted))", Style);
   verifyFormat(R"(private float[,] Values;)", Style);
   verifyFormat(R"(Result this[Index x] => Foo(x);)", Style);
@@ -1612,5 +1665,7 @@
   EXPECT_NE("", format("int where b <")); // reduced from crasher
 }
 
+} // namespace
+} // namespace test
 } // namespace format
-} // end namespace clang
+} // namespace clang
Index: clang/lib/Format/WhitespaceManager.cpp
===================================================================
--- clang/lib/Format/WhitespaceManager.cpp
+++ clang/lib/Format/WhitespaceManager.cpp
@@ -22,8 +22,13 @@
 bool WhitespaceManager::Change::IsBeforeInFile::operator()(
     const Change &C1, const Change &C2) const {
   return SourceMgr.isBeforeInTranslationUnit(
-      C1.OriginalWhitespaceRange.getBegin(),
-      C2.OriginalWhitespaceRange.getBegin());
+             C1.OriginalWhitespaceRange.getBegin(),
+             C2.OriginalWhitespaceRange.getBegin()) ||
+         (C1.OriginalWhitespaceRange.getBegin() ==
+              C2.OriginalWhitespaceRange.getBegin() &&
+          SourceMgr.isBeforeInTranslationUnit(
+              C1.OriginalWhitespaceRange.getEnd(),
+              C2.OriginalWhitespaceRange.getEnd()));
 }
 
 WhitespaceManager::Change::Change(const FormatToken &Tok,
@@ -1509,10 +1514,55 @@
 void WhitespaceManager::generateChanges() {
   for (unsigned i = 0, e = Changes.size(); i != e; ++i) {
     const Change &C = Changes[i];
-    if (i > 0 && Changes[i - 1].OriginalWhitespaceRange.getBegin() ==
-                     C.OriginalWhitespaceRange.getBegin()) {
-      // Do not generate two replacements for the same location.
-      continue;
+    if (i > 0) {
+      auto Last = Changes[i - 1].OriginalWhitespaceRange;
+      auto New = Changes[i].OriginalWhitespaceRange;
+      // Do not generate two replacements for the same location.  As a special
+      // case, it is allowed if there is a replacement for the empty range
+      // between 2 tokens and another non-empty range at the start of the second
+      // token.  We didn't implement logic to combine replacements for 2
+      // consecutive source ranges into a single replacement, because the
+      // program works fine without it.
+      //
+      // We can't eliminate empty original whitespace ranges.  They appear when
+      // 2 tokens have no whitespace in between in the input.  It does not
+      // matter whether whitespace is to be added.  If no whitespace is to be
+      // added, the replacement will be empty, and it gets eliminated after this
+      // step in storeReplacement.  For example, if the input is `foo();`,
+      // there will be a replacement for the range between every consecutive
+      // pair of tokens.
+      //
+      // A replacement at the start of a token can be added by
+      // BreakableStringLiteralUsingOperators::insertBreak when it adds braces
+      // around the string literal.  Say Verilog code is being formatted and the
+      // first line is to become the next 2 lines.
+      //     x("long string");
+      //     x({"long ",
+      //        "string"});
+      // There will be a replacement for the empty range between the parenthesis
+      // and the string and another replacement for the quote character.  The
+      // replacement for the empty range between the parenthesis and the quote
+      // comes from ContinuationIndenter::addTokenOnCurrentLine when it changes
+      // the original empty range between the parenthesis and the string to
+      // another empty one.  The replacement for the quote character comes from
+      // BreakableStringLiteralUsingOperators::insertBreak when it adds the
+      // brace.  In the example, the replacement for the empty range is the same
+      // as the original text.  However, eliminating replacements that are same
+      // as the original does not help in general.  For example, a newline can
+      // be inserted, causing the first line to become the next 3 lines.
+      //     xxxxxxxxxxx("long string");
+      //     xxxxxxxxxxx(
+      //         {"long ",
+      //          "string"});
+      // In that case, the empty range between the parenthesis and the string
+      // will be replaced by a newline and 4 spaces.  So we will still have to
+      // deal with a replacement for an empty source range followed by a
+      // replacement for a non-empty source range.
+      if (Last.getBegin() == New.getBegin() &&
+          (Last.getEnd() != Last.getBegin() ||
+           New.getEnd() == New.getBegin())) {
+        continue;
+      }
     }
     if (C.CreateReplacement) {
       std::string ReplacementText = C.PreviousLinePostfix;
Index: clang/lib/Format/TokenAnnotator.cpp
===================================================================
--- clang/lib/Format/TokenAnnotator.cpp
+++ clang/lib/Format/TokenAnnotator.cpp
@@ -863,6 +863,11 @@
         OpeningBrace.Previous->is(TT_JsTypeColon)) {
       Contexts.back().IsExpression = false;
     }
+    if (Style.isVerilog() &&
+        (!OpeningBrace.getPreviousNonComment() ||
+         OpeningBrace.getPreviousNonComment()->isNot(Keywords.kw_apostrophe))) {
+      Contexts.back().VerilogMayBeConcatenation = true;
+    }
 
     unsigned CommaCount = 0;
     while (CurrentToken) {
@@ -1737,6 +1742,9 @@
     bool InCpp11AttributeSpecifier = false;
     bool InCSharpAttributeSpecifier = false;
     bool VerilogAssignmentFound = false;
+    // Whether the braces may mean concatenation instead of structure or array
+    // literal.
+    bool VerilogMayBeConcatenation = false;
     enum {
       Unknown,
       // Like the part after `:` in a constructor.
@@ -2070,6 +2078,14 @@
       } else {
         Current.setType(TT_LineComment);
       }
+    } else if (Current.is(tok::string_literal)) {
+      if (Style.isVerilog() && Contexts.back().VerilogMayBeConcatenation &&
+          Current.getPreviousNonComment() &&
+          Current.getPreviousNonComment()->isOneOf(tok::comma, tok::l_brace) &&
+          Current.getNextNonComment() &&
+          Current.getNextNonComment()->isOneOf(tok::comma, tok::r_brace)) {
+        Current.setType(TT_StringInConcatenation);
+      }
     } else if (Current.is(tok::l_paren)) {
       if (lParenStartsCppCast(Current))
         Current.setType(TT_CppCastLParen);
@@ -2740,6 +2756,19 @@
         Start = Current;
       }
 
+      if ((Style.isCSharp() || Style.isJavaScript() ||
+           Style.Language == FormatStyle::LK_Java) &&
+          Precedence == prec::Additive && Current) {
+        // A string can be broken without parentheses around it when it is
+        // already in a sequence of strings joined by `+` signs.
+        FormatToken *Prev = Current->getPreviousNonComment();
+        if (Prev && Prev->is(tok::string_literal) &&
+            (Prev == Start || Prev->endsSequence(tok::string_literal, tok::plus,
+                                                 TT_StringInConcatenation))) {
+          Prev->setType(TT_StringInConcatenation);
+        }
+      }
+
       // At the end of the line or when an operator with lower precedence is
       // found, insert fake parenthesis and return.
       if (!Current ||
Index: clang/lib/Format/FormatToken.h
===================================================================
--- clang/lib/Format/FormatToken.h
+++ clang/lib/Format/FormatToken.h
@@ -134,6 +134,11 @@
   TYPE(StartOfName)                                                            \
   TYPE(StatementAttributeLikeMacro)                                            \
   TYPE(StatementMacro)                                                         \
+  /* A string that is part of a string concatenation. For C#, JavaScript, and  \
+   * Java, it is used for marking whether a string needs parentheses around it \
+   * if it is to be split into parts joined by `+`. For Verilog, whether       \
+   * braces need to be added to split it. Not used for other languages. */     \
+  TYPE(StringInConcatenation)                                                  \
   TYPE(StructLBrace)                                                           \
   TYPE(StructuredBindingLSquare)                                               \
   TYPE(TemplateCloser)                                                         \
Index: clang/lib/Format/ContinuationIndenter.cpp
===================================================================
--- clang/lib/Format/ContinuationIndenter.cpp
+++ clang/lib/Format/ContinuationIndenter.cpp
@@ -36,6 +36,14 @@
   return Style.IndentWrappedFunctionNames || LineType == LT_ObjCMethodDecl;
 }
 
+// Returns true if a binary operator following \p Tok should be unindented when
+// the style permits it.
+static bool shouldUnindentNextOperator(const FormatToken &Tok) {
+  const FormatToken *Previous = Tok.getPreviousNonComment();
+  return Previous && (Previous->getPrecedence() == prec::Assignment ||
+                      Previous->isOneOf(tok::kw_return, TT_RequiresClause));
+}
+
 // Returns the length of everything up to the first possible line break after
 // the ), ], } or > matching \c Tok.
 static unsigned getLengthToMatchingParen(const FormatToken &Tok,
@@ -1618,11 +1626,10 @@
     if (Previous && Previous->endsSequence(tok::l_paren, tok::kw__Generic))
       NewParenState.Indent = CurrentState.LastSpace;
 
-    if (Previous &&
-        (Previous->getPrecedence() == prec::Assignment ||
-         Previous->isOneOf(tok::kw_return, TT_RequiresClause) ||
-         (PrecedenceLevel == prec::Conditional && Previous->is(tok::question) &&
-          Previous->is(TT_ConditionalExpr))) &&
+    if ((shouldUnindentNextOperator(Current) ||
+         (Previous &&
+          (PrecedenceLevel == prec::Conditional &&
+           Previous->is(tok::question) && Previous->is(TT_ConditionalExpr)))) &&
         !Newline) {
       // If BreakBeforeBinaryOperators is set, un-indent a bit to account for
       // the operator and keep the operands aligned.
@@ -2186,14 +2193,9 @@
                                            LineState &State, bool AllowBreak) {
   unsigned StartColumn = State.Column - Current.ColumnWidth;
   if (Current.isStringLiteral()) {
-    // FIXME: String literal breaking is currently disabled for C#, Java, Json
-    // and JavaScript, as it requires strings to be merged using "+" which we
-    // don't support.
-    if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
-        Style.isCSharp() || Style.isJson() || !Style.BreakStringLiterals ||
-        !AllowBreak) {
+    // Strings in JSON can not be broken.
+    if (Style.isJson() || !Style.BreakStringLiterals || !AllowBreak)
       return nullptr;
-    }
 
     // Don't break string literals inside preprocessor directives (except for
     // #define directives, as their contents are stored in separate lines and
@@ -2212,6 +2214,33 @@
       return nullptr;
 
     StringRef Text = Current.TokenText;
+    // We need this to address the case where there is an unbreakable tail only
+    // if certain other formatting decisions have been taken. The
+    // UnbreakableTailLength of Current is an overapproximation is that case and
+    // we need to be correct here.
+    unsigned UnbreakableTailLength = (State.NextToken && canBreak(State))
+                                         ? 0
+                                         : Current.UnbreakableTailLength;
+
+    if (Style.isVerilog() || Style.Language == FormatStyle::LK_Java ||
+        Style.isJavaScript() || Style.isCSharp()) {
+      BreakableStringLiteralUsingOperators::QuoteStyleType QuoteStyle;
+      if (Style.isJavaScript() && Text.startswith("'") && Text.endswith("'")) {
+        QuoteStyle = BreakableStringLiteralUsingOperators::SingleQuotes;
+      } else if (Style.isCSharp() && Text.startswith("@\"") &&
+                 Text.endswith("\"")) {
+        QuoteStyle = BreakableStringLiteralUsingOperators::AtDoubleQuotes;
+      } else if (Text.startswith("\"") && Text.endswith("\"")) {
+        QuoteStyle = BreakableStringLiteralUsingOperators::DoubleQuotes;
+      } else {
+        return nullptr;
+      }
+      return std::make_unique<BreakableStringLiteralUsingOperators>(
+          Current, QuoteStyle,
+          /*UnindentPlus=*/shouldUnindentNextOperator(Current), StartColumn,
+          UnbreakableTailLength, State.Line->InPPDirective, Encoding, Style);
+    }
+
     StringRef Prefix;
     StringRef Postfix;
     // FIXME: Handle whitespace between '_T', '(', '"..."', and ')'.
@@ -2224,13 +2253,6 @@
           Text.startswith(Prefix = "u8\"") ||
           Text.startswith(Prefix = "L\""))) ||
         (Text.startswith(Prefix = "_T(\"") && Text.endswith(Postfix = "\")"))) {
-      // We need this to address the case where there is an unbreakable tail
-      // only if certain other formatting decisions have been taken. The
-      // UnbreakableTailLength of Current is an overapproximation is that case
-      // and we need to be correct here.
-      unsigned UnbreakableTailLength = (State.NextToken && canBreak(State))
-                                           ? 0
-                                           : Current.UnbreakableTailLength;
       return std::make_unique<BreakableStringLiteral>(
           Current, StartColumn, Prefix, Postfix, UnbreakableTailLength,
           State.Line->InPPDirective, Encoding, Style);
@@ -2631,6 +2653,9 @@
                  Current.UnbreakableTailLength;
 
   if (BreakInserted) {
+    if (!DryRun)
+      Token->updateAfterBroken(Whitespaces);
+
     // If we break the token inside a parameter list, we need to break before
     // the next parameter on all levels, so that the next parameter is clearly
     // visible. Line comments already introduce a break.
Index: clang/lib/Format/BreakableToken.h
===================================================================
--- clang/lib/Format/BreakableToken.h
+++ clang/lib/Format/BreakableToken.h
@@ -230,6 +230,11 @@
   /// as a unit and is responsible for the formatting of the them.
   virtual void updateNextToken(LineState &State) const {}
 
+  /// Adds replacements that are needed when the token is broken. Such as
+  /// wrapping a JavaScript string in parentheses after it gets broken with plus
+  /// signs.
+  virtual void updateAfterBroken(WhitespaceManager &Whitespaces) const {}
+
 protected:
   BreakableToken(const FormatToken &Tok, bool InPPDirective,
                  encoding::Encoding Encoding, const FormatStyle &Style)
@@ -283,6 +288,44 @@
   unsigned UnbreakableTailLength;
 };
 
+class BreakableStringLiteralUsingOperators : public BreakableStringLiteral {
+public:
+  enum QuoteStyleType {
+    DoubleQuotes,   // The string is quoted with double quotes.
+    SingleQuotes,   // The JavaScript string is quoted with single quotes.
+    AtDoubleQuotes, // The C# verbatim string is quoted with the at sign and
+                    // double quotes.
+  };
+  /// Creates a breakable token for a single line string literal for C#, Java,
+  /// JavaScript, or Verilog.
+  ///
+  /// \p StartColumn specifies the column in which the token will start
+  /// after formatting.
+  BreakableStringLiteralUsingOperators(
+      const FormatToken &Tok, QuoteStyleType QuoteStyle, bool UnindentPlus,
+      unsigned StartColumn, unsigned UnbreakableTailLength, bool InPPDirective,
+      encoding::Encoding Encoding, const FormatStyle &Style);
+  unsigned getRemainingLength(unsigned LineIndex, unsigned Offset,
+                              unsigned StartColumn) const override;
+  unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override;
+  void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
+                   unsigned ContentIndent,
+                   WhitespaceManager &Whitespaces) const override;
+  void updateAfterBroken(WhitespaceManager &Whitespaces) const override;
+
+protected:
+  // Whether braces or parentheses should be inserted around the string to form
+  // a concatenation.
+  bool BracesNeeded;
+  QuoteStyleType QuoteStyle;
+  // The braces or parentheses along with the first character which they
+  // replace, either a quote or at sign.
+  StringRef LeftBraceQuote;
+  StringRef RightBraceQuote;
+  // Width added to the left due to the added. Does not apply to the first line.
+  int ContinuationIndent;
+};
+
 class BreakableComment : public BreakableToken {
 protected:
   /// Creates a breakable token for a comment.
Index: clang/lib/Format/BreakableToken.cpp
===================================================================
--- clang/lib/Format/BreakableToken.cpp
+++ clang/lib/Format/BreakableToken.cpp
@@ -292,6 +292,120 @@
       Prefix, InPPDirective, 1, StartColumn);
 }
 
+BreakableStringLiteralUsingOperators::BreakableStringLiteralUsingOperators(
+    const FormatToken &Tok, QuoteStyleType QuoteStyle, bool UnindentPlus,
+    unsigned StartColumn, unsigned UnbreakableTailLength, bool InPPDirective,
+    encoding::Encoding Encoding, const FormatStyle &Style)
+    : BreakableStringLiteral(
+          Tok, StartColumn, /*Prefix=*/QuoteStyle == SingleQuotes ? "'"
+                            : QuoteStyle == AtDoubleQuotes        ? "@\""
+                                                                  : "\"",
+          /*Postfix=*/QuoteStyle == SingleQuotes ? "'" : "\"",
+          UnbreakableTailLength, InPPDirective, Encoding, Style),
+      BracesNeeded(Tok.isNot(TT_StringInConcatenation)),
+      QuoteStyle(QuoteStyle) {
+  // Find the replacement text for inserting braces and quotes and line breaks.
+  // We don't create an allocated string concatenated from parts here because it
+  // has to outlive the BreakableStringliteral object.  The brace replacements
+  // include a quote so that WhitespaceManager can tell it apart from whitespace
+  // replacements between the string and surrounding tokens.
+
+  // The option is not implemented in JavaScript.
+  bool SignOnNewLine =
+      !Style.isJavaScript() &&
+      Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None;
+
+  if (Style.isVerilog()) {
+    // In Verilog, all strings are quoted by double quotes, joined by commas,
+    // and wrapped in braces.  The comma is always before the newline.
+    assert(QuoteStyle == DoubleQuotes);
+    LeftBraceQuote = Style.Cpp11BracedListStyle ? "{\"" : "{ \"";
+    RightBraceQuote = Style.Cpp11BracedListStyle ? "\"}" : "\" }";
+    Postfix = "\",";
+    Prefix = "\"";
+  } else {
+    // The plus sign may be on either line.  And also C# and JavaScript have
+    // different quoting styles.
+    if (QuoteStyle == SingleQuotes) {
+      LeftBraceQuote = Style.SpacesInParensOptions.Other ? "( '" : "('";
+      RightBraceQuote = Style.SpacesInParensOptions.Other ? "' )" : "')";
+      Postfix = SignOnNewLine ? "'" : "' +";
+      Prefix = SignOnNewLine ? "+ '" : "'";
+    } else {
+      if (QuoteStyle == AtDoubleQuotes) {
+        LeftBraceQuote = Style.SpacesInParensOptions.Other ? "( @" : "(@";
+        Prefix = SignOnNewLine ? "+ @\"" : "@\"";
+      } else {
+        LeftBraceQuote = Style.SpacesInParensOptions.Other ? "( \"" : "(\"";
+        Prefix = SignOnNewLine ? "+ \"" : "\"";
+      }
+      RightBraceQuote = Style.SpacesInParensOptions.Other ? "\" )" : "\")";
+      Postfix = SignOnNewLine ? "\"" : "\" +";
+    }
+  }
+
+  // Following lines are indented by the width of the brace and space if any.
+  ContinuationIndent = BracesNeeded ? LeftBraceQuote.size() - 1 : 0;
+  // The plus sign may need to be unindented depending on the style.
+  // FIXME: Add support for DontAlign.
+  if (!Style.isVerilog() && SignOnNewLine && !BracesNeeded && UnindentPlus &&
+      Style.AlignOperands == FormatStyle::OAS_AlignAfterOperator) {
+    ContinuationIndent -= 2;
+  }
+}
+
+unsigned BreakableStringLiteralUsingOperators::getRemainingLength(
+    unsigned LineIndex, unsigned Offset, unsigned StartColumn) const {
+  return UnbreakableTailLength + (BracesNeeded ? RightBraceQuote.size() : 1) +
+         encoding::columnWidthWithTabs(Line.substr(Offset), StartColumn,
+                                       Style.TabWidth, Encoding);
+}
+
+unsigned
+BreakableStringLiteralUsingOperators::getContentStartColumn(unsigned LineIndex,
+                                                            bool Break) const {
+  return std::max(
+      0,
+      static_cast<int>(StartColumn) +
+          (Break ? ContinuationIndent + static_cast<int>(Prefix.size())
+                 : (BracesNeeded ? static_cast<int>(LeftBraceQuote.size()) - 1
+                                 : 0) +
+                       (QuoteStyle == AtDoubleQuotes ? 2 : 1)));
+}
+
+void BreakableStringLiteralUsingOperators::insertBreak(
+    unsigned LineIndex, unsigned TailOffset, Split Split,
+    unsigned ContentIndent, WhitespaceManager &Whitespaces) const {
+  Whitespaces.replaceWhitespaceInToken(
+      Tok, /*Offset=*/(QuoteStyle == AtDoubleQuotes ? 2 : 1) + TailOffset +
+               Split.first,
+      /*ReplaceChars=*/Split.second, /*PreviousPostfix=*/Postfix,
+      /*CurrentPrefix=*/Prefix, InPPDirective, /*NewLines=*/1,
+      /*Spaces=*/
+      std::max(0, static_cast<int>(StartColumn) + ContinuationIndent));
+}
+
+void BreakableStringLiteralUsingOperators::updateAfterBroken(
+    WhitespaceManager &Whitespaces) const {
+  // Add the braces required for breaking the token if they are needed.
+  if (!BracesNeeded)
+    return;
+
+  // To add a brace or parenthesis, we replace the quote (or the at sign) with a
+  // brace and another quote.  This is because the rest of the program requires
+  // one replacement for each source range.  If we replace the empty strings
+  // around the string, it may conflict with whitespace replacements between the
+  // string and adjacent tokens.
+  Whitespaces.replaceWhitespaceInToken(
+      Tok, /*Offset=*/0, /*ReplaceChars=*/1, /*PreviousPostfix=*/"",
+      /*CurrentPrefix=*/LeftBraceQuote, InPPDirective, /*NewLines=*/0,
+      /*Spaces=*/0);
+  Whitespaces.replaceWhitespaceInToken(
+      Tok, /*Offset=*/Tok.TokenText.size() - 1, /*ReplaceChars=*/1,
+      /*PreviousPostfix=*/RightBraceQuote,
+      /*CurrentPrefix=*/"", InPPDirective, /*NewLines=*/0, /*Spaces=*/0);
+}
+
 BreakableComment::BreakableComment(const FormatToken &Token,
                                    unsigned StartColumn, bool InPPDirective,
                                    encoding::Encoding Encoding,
Index: clang/include/clang/Format/Format.h
===================================================================
--- clang/include/clang/Format/Format.h
+++ clang/include/clang/Format/Format.h
@@ -2008,6 +2008,8 @@
   bool BreakAfterJavaFieldAnnotations;
 
   /// Allow breaking string literals when formatting.
+  ///
+  /// In C, C++, and Objective-C:
   /// \code
   ///    true:
   ///    const char* x = "veryVeryVeryVeryVeryVe"
@@ -2016,8 +2018,34 @@
   ///
   ///    false:
   ///    const char* x =
-  ///      "veryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryLongString";
+  ///        "veryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryLongString";
+  /// \endcode
+  ///
+  /// In C#, Java, and JavaScript:
+  /// \code
+  ///    true:
+  ///    var x = "veryVeryVeryVeryVeryVe" +
+  ///            "ryVeryVeryVeryVeryVery" +
+  ///            "VeryLongString";
+  ///
+  ///    false:
+  ///    var x =
+  ///        "veryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryLongString";
   /// \endcode
+  /// C# and JavaScript interpolated strings are not broken.
+  ///
+  /// In Verilog:
+  /// \code
+  ///    true:
+  ///    string x = {"veryVeryVeryVeryVeryVe",
+  ///                "ryVeryVeryVeryVeryVery",
+  ///                "VeryLongString"};
+  ///
+  ///    false:
+  ///    string x =
+  ///        "veryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryLongString";
+  /// \endcode
+  ///
   /// \version 3.9
   bool BreakStringLiterals;
 
Index: clang/docs/ClangFormatStyleOptions.rst
===================================================================
--- clang/docs/ClangFormatStyleOptions.rst
+++ clang/docs/ClangFormatStyleOptions.rst
@@ -2722,6 +2722,8 @@
 **BreakStringLiterals** (``Boolean``) :versionbadge:`clang-format 3.9` :ref:`¶ <BreakStringLiterals>`
   Allow breaking string literals when formatting.
 
+  In C, C++, and Objective-C:
+
   .. code-block:: c++
 
      true:
@@ -2731,7 +2733,34 @@
 
      false:
      const char* x =
-       "veryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryLongString";
+         "veryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryLongString";
+
+  In C#, Java, and JavaScript:
+
+  .. code-block:: c++
+
+     true:
+     var x = "veryVeryVeryVeryVeryVe" +
+             "ryVeryVeryVeryVeryVery" +
+             "VeryLongString";
+
+     false:
+     var x =
+         "veryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryLongString";
+  C# and JavaScript interpolated strings are not broken.
+
+  In Verilog:
+
+  .. code-block:: c++
+
+     true:
+     string x = {"veryVeryVeryVeryVeryVe",
+                 "ryVeryVeryVeryVeryVery",
+                 "VeryLongString"};
+
+     false:
+     string x =
+         "veryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryLongString";
 
 .. _ColumnLimit:
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to