https://github.com/owenca updated 
https://github.com/llvm/llvm-project/pull/141433

>From d32d17a56f6d53cebc6299c0474fc95abeb141ed Mon Sep 17 00:00:00 2001
From: Owen Pan <owenpi...@gmail.com>
Date: Sun, 25 May 2025 15:40:45 -0700
Subject: [PATCH 1/3] [clang-format] Handle Java text blocks (#141334)

Fix #61954

(cherry picked from commit b7f5950bb3b97eac979925a3bbf015530c26962e)
---
 clang/lib/Format/FormatTokenLexer.cpp     | 32 +++++++++++++
 clang/lib/Format/FormatTokenLexer.h       |  2 +
 clang/unittests/Format/FormatTestJava.cpp | 57 +++++++++++++++++++++++
 3 files changed, 91 insertions(+)

diff --git a/clang/lib/Format/FormatTokenLexer.cpp 
b/clang/lib/Format/FormatTokenLexer.cpp
index 16f0a76f3a954..d8ad3a1d52115 100644
--- a/clang/lib/Format/FormatTokenLexer.cpp
+++ b/clang/lib/Format/FormatTokenLexer.cpp
@@ -636,6 +636,36 @@ bool FormatTokenLexer::canPrecedeRegexLiteral(FormatToken 
*Prev) {
   return true;
 }
 
+void FormatTokenLexer::tryParseJavaTextBlock() {
+  if (FormatTok->TokenText != "\"\"")
+    return;
+
+  const auto *S = Lex->getBufferLocation();
+  const auto *End = Lex->getBuffer().end();
+
+  if (S == End || *S != '\"')
+    return;
+
+  ++S; // Skip the `"""` that begins a text block.
+
+  // Find the `"""` that ends the text block.
+  for (int Count = 0; Count < 3 && S < End; ++S) {
+    switch (*S) {
+    case '\\':
+      Count = -1;
+      break;
+    case '\"':
+      ++Count;
+      break;
+    default:
+      Count = 0;
+    }
+  }
+
+  // Ignore the possibly invalid text block.
+  resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(S)));
+}
+
 // Tries to parse a JavaScript Regex literal starting at the current token,
 // if that begins with a slash and is in a location where JavaScript allows
 // regex literals. Changes the current token to a regex literal and updates
@@ -1326,6 +1356,8 @@ FormatToken *FormatTokenLexer::getNextToken() {
     FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
     ++Column;
     StateStack.push(LexerState::TOKEN_STASHED);
+  } else if (Style.isJava() && FormatTok->is(tok::string_literal)) {
+    tryParseJavaTextBlock();
   }
 
   if (Style.isVerilog() && Tokens.size() > 0 &&
diff --git a/clang/lib/Format/FormatTokenLexer.h 
b/clang/lib/Format/FormatTokenLexer.h
index 61474a3f9ada8..d9a25c8ef3538 100644
--- a/clang/lib/Format/FormatTokenLexer.h
+++ b/clang/lib/Format/FormatTokenLexer.h
@@ -71,6 +71,8 @@ class FormatTokenLexer {
 
   bool canPrecedeRegexLiteral(FormatToken *Prev);
 
+  void tryParseJavaTextBlock();
+
   // Tries to parse a JavaScript Regex literal starting at the current token,
   // if that begins with a slash and is in a location where JavaScript allows
   // regex literals. Changes the current token to a regex literal and updates
diff --git a/clang/unittests/Format/FormatTestJava.cpp 
b/clang/unittests/Format/FormatTestJava.cpp
index 33998bc7ff858..d0a3b4eb96d69 100644
--- a/clang/unittests/Format/FormatTestJava.cpp
+++ b/clang/unittests/Format/FormatTestJava.cpp
@@ -789,6 +789,63 @@ TEST_F(FormatTestJava, AlignCaseArrows) {
                Style);
 }
 
+TEST_F(FormatTestJava, TextBlock) {
+  verifyNoChange("String myStr = \"\"\"\n"
+                 "hello\n"
+                 "there\n"
+                 "\"\"\";");
+
+  verifyNoChange("String tb = \"\"\"\n"
+                 "            the new\"\"\";");
+
+  verifyNoChange("System.out.println(\"\"\"\n"
+                 "    This is the first line\n"
+                 "    This is the second line\n"
+                 "    \"\"\");");
+
+  verifyNoChange("void writeHTML() {\n"
+                 "  String html = \"\"\" \n"
+                 "                <html>\n"
+                 "                    <p>Hello World.</p>\n"
+                 "                </html>\n"
+                 "\"\"\";\n"
+                 "  writeOutput(html);\n"
+                 "}");
+
+  verifyNoChange("String colors = \"\"\"\t\n"
+                 "    red\n"
+                 "    green\n"
+                 "    blue\"\"\".indent(4);");
+
+  verifyNoChange("String code = \"\"\"\n"
+                 "    String source = \\\"\"\"\n"
+                 "        String message = \"Hello, World!\";\n"
+                 "        System.out.println(message);\n"
+                 "        \\\"\"\";\n"
+                 "    \"\"\";");
+
+  verifyNoChange(
+      "class Outer {\n"
+      "  void printPoetry() {\n"
+      "    String lilacs = \"\"\"\n"
+      "Passing the apple-tree blows of white and pink in the orchards\n"
+      "\"\"\";\n"
+      "    System.out.println(lilacs);\n"
+      "  }\n"
+      "}");
+
+  verifyNoChange("String name = \"\"\"\r\n"
+                 "        red\n"
+                 "        green\n"
+                 "        blue\\\n"
+                 "    \"\"\";");
+
+  verifyFormat("String name = \"\"\"Pat Q. Smith\"\"\";");
+
+  verifyNoChange("String name = \"\"\"\n"
+                 "              Pat Q. Smith");
+}
+
 } // namespace
 } // namespace test
 } // namespace format

>From def14516c2b0997656d6f87947217e6b5b6e9984 Mon Sep 17 00:00:00 2001
From: Owen Pan <owenpi...@gmail.com>
Date: Sun, 25 May 2025 16:56:04 -0700
Subject: [PATCH 2/3] Update FormatTokenLexer.cpp

---
 clang/lib/Format/FormatTokenLexer.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/lib/Format/FormatTokenLexer.cpp 
b/clang/lib/Format/FormatTokenLexer.cpp
index d8ad3a1d52115..9fdce7b4aa7fc 100644
--- a/clang/lib/Format/FormatTokenLexer.cpp
+++ b/clang/lib/Format/FormatTokenLexer.cpp
@@ -1356,8 +1356,8 @@ FormatToken *FormatTokenLexer::getNextToken() {
     FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
     ++Column;
     StateStack.push(LexerState::TOKEN_STASHED);
-  } else if (Style.isJava() && FormatTok->is(tok::string_literal)) {
-    tryParseJavaTextBlock();
+  } else if (Style.Language == FormatStyle::LK_Java &&
+             FormatTok->is(tok::string_literal)) {
   }
 
   if (Style.isVerilog() && Tokens.size() > 0 &&

>From c70ec3e86493c1ba7ac28a27ceb4c8cb04a92f64 Mon Sep 17 00:00:00 2001
From: Owen Pan <owenpi...@gmail.com>
Date: Sun, 25 May 2025 16:58:12 -0700
Subject: [PATCH 3/3] Update FormatTokenLexer.cpp

---
 clang/lib/Format/FormatTokenLexer.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/clang/lib/Format/FormatTokenLexer.cpp 
b/clang/lib/Format/FormatTokenLexer.cpp
index 9fdce7b4aa7fc..0755a5d355394 100644
--- a/clang/lib/Format/FormatTokenLexer.cpp
+++ b/clang/lib/Format/FormatTokenLexer.cpp
@@ -1358,6 +1358,7 @@ FormatToken *FormatTokenLexer::getNextToken() {
     StateStack.push(LexerState::TOKEN_STASHED);
   } else if (Style.Language == FormatStyle::LK_Java &&
              FormatTok->is(tok::string_literal)) {
+    tryParseJavaTextBlock();
   }
 
   if (Style.isVerilog() && Tokens.size() > 0 &&

_______________________________________________
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to