https://github.com/owenca updated https://github.com/llvm/llvm-project/pull/141433
>From d32d17a56f6d53cebc6299c0474fc95abeb141ed Mon Sep 17 00:00:00 2001 From: Owen Pan <owenpi...@gmail.com> Date: Sun, 25 May 2025 15:40:45 -0700 Subject: [PATCH 1/3] [clang-format] Handle Java text blocks (#141334) Fix #61954 (cherry picked from commit b7f5950bb3b97eac979925a3bbf015530c26962e) --- clang/lib/Format/FormatTokenLexer.cpp | 32 +++++++++++++ clang/lib/Format/FormatTokenLexer.h | 2 + clang/unittests/Format/FormatTestJava.cpp | 57 +++++++++++++++++++++++ 3 files changed, 91 insertions(+) diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp index 16f0a76f3a954..d8ad3a1d52115 100644 --- a/clang/lib/Format/FormatTokenLexer.cpp +++ b/clang/lib/Format/FormatTokenLexer.cpp @@ -636,6 +636,36 @@ bool FormatTokenLexer::canPrecedeRegexLiteral(FormatToken *Prev) { return true; } +void FormatTokenLexer::tryParseJavaTextBlock() { + if (FormatTok->TokenText != "\"\"") + return; + + const auto *S = Lex->getBufferLocation(); + const auto *End = Lex->getBuffer().end(); + + if (S == End || *S != '\"') + return; + + ++S; // Skip the `"""` that begins a text block. + + // Find the `"""` that ends the text block. + for (int Count = 0; Count < 3 && S < End; ++S) { + switch (*S) { + case '\\': + Count = -1; + break; + case '\"': + ++Count; + break; + default: + Count = 0; + } + } + + // Ignore the possibly invalid text block. + resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(S))); +} + // Tries to parse a JavaScript Regex literal starting at the current token, // if that begins with a slash and is in a location where JavaScript allows // regex literals. Changes the current token to a regex literal and updates @@ -1326,6 +1356,8 @@ FormatToken *FormatTokenLexer::getNextToken() { FormatTok->TokenText = FormatTok->TokenText.substr(0, 1); ++Column; StateStack.push(LexerState::TOKEN_STASHED); + } else if (Style.isJava() && FormatTok->is(tok::string_literal)) { + tryParseJavaTextBlock(); } if (Style.isVerilog() && Tokens.size() > 0 && diff --git a/clang/lib/Format/FormatTokenLexer.h b/clang/lib/Format/FormatTokenLexer.h index 61474a3f9ada8..d9a25c8ef3538 100644 --- a/clang/lib/Format/FormatTokenLexer.h +++ b/clang/lib/Format/FormatTokenLexer.h @@ -71,6 +71,8 @@ class FormatTokenLexer { bool canPrecedeRegexLiteral(FormatToken *Prev); + void tryParseJavaTextBlock(); + // Tries to parse a JavaScript Regex literal starting at the current token, // if that begins with a slash and is in a location where JavaScript allows // regex literals. Changes the current token to a regex literal and updates diff --git a/clang/unittests/Format/FormatTestJava.cpp b/clang/unittests/Format/FormatTestJava.cpp index 33998bc7ff858..d0a3b4eb96d69 100644 --- a/clang/unittests/Format/FormatTestJava.cpp +++ b/clang/unittests/Format/FormatTestJava.cpp @@ -789,6 +789,63 @@ TEST_F(FormatTestJava, AlignCaseArrows) { Style); } +TEST_F(FormatTestJava, TextBlock) { + verifyNoChange("String myStr = \"\"\"\n" + "hello\n" + "there\n" + "\"\"\";"); + + verifyNoChange("String tb = \"\"\"\n" + " the new\"\"\";"); + + verifyNoChange("System.out.println(\"\"\"\n" + " This is the first line\n" + " This is the second line\n" + " \"\"\");"); + + verifyNoChange("void writeHTML() {\n" + " String html = \"\"\" \n" + " <html>\n" + " <p>Hello World.</p>\n" + " </html>\n" + "\"\"\";\n" + " writeOutput(html);\n" + "}"); + + verifyNoChange("String colors = \"\"\"\t\n" + " red\n" + " green\n" + " blue\"\"\".indent(4);"); + + verifyNoChange("String code = \"\"\"\n" + " String source = \\\"\"\"\n" + " String message = \"Hello, World!\";\n" + " System.out.println(message);\n" + " \\\"\"\";\n" + " \"\"\";"); + + verifyNoChange( + "class Outer {\n" + " void printPoetry() {\n" + " String lilacs = \"\"\"\n" + "Passing the apple-tree blows of white and pink in the orchards\n" + "\"\"\";\n" + " System.out.println(lilacs);\n" + " }\n" + "}"); + + verifyNoChange("String name = \"\"\"\r\n" + " red\n" + " green\n" + " blue\\\n" + " \"\"\";"); + + verifyFormat("String name = \"\"\"Pat Q. Smith\"\"\";"); + + verifyNoChange("String name = \"\"\"\n" + " Pat Q. Smith"); +} + } // namespace } // namespace test } // namespace format >From def14516c2b0997656d6f87947217e6b5b6e9984 Mon Sep 17 00:00:00 2001 From: Owen Pan <owenpi...@gmail.com> Date: Sun, 25 May 2025 16:56:04 -0700 Subject: [PATCH 2/3] Update FormatTokenLexer.cpp --- clang/lib/Format/FormatTokenLexer.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp index d8ad3a1d52115..9fdce7b4aa7fc 100644 --- a/clang/lib/Format/FormatTokenLexer.cpp +++ b/clang/lib/Format/FormatTokenLexer.cpp @@ -1356,8 +1356,8 @@ FormatToken *FormatTokenLexer::getNextToken() { FormatTok->TokenText = FormatTok->TokenText.substr(0, 1); ++Column; StateStack.push(LexerState::TOKEN_STASHED); - } else if (Style.isJava() && FormatTok->is(tok::string_literal)) { - tryParseJavaTextBlock(); + } else if (Style.Language == FormatStyle::LK_Java && + FormatTok->is(tok::string_literal)) { } if (Style.isVerilog() && Tokens.size() > 0 && >From c70ec3e86493c1ba7ac28a27ceb4c8cb04a92f64 Mon Sep 17 00:00:00 2001 From: Owen Pan <owenpi...@gmail.com> Date: Sun, 25 May 2025 16:58:12 -0700 Subject: [PATCH 3/3] Update FormatTokenLexer.cpp --- clang/lib/Format/FormatTokenLexer.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp index 9fdce7b4aa7fc..0755a5d355394 100644 --- a/clang/lib/Format/FormatTokenLexer.cpp +++ b/clang/lib/Format/FormatTokenLexer.cpp @@ -1358,6 +1358,7 @@ FormatToken *FormatTokenLexer::getNextToken() { StateStack.push(LexerState::TOKEN_STASHED); } else if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::string_literal)) { + tryParseJavaTextBlock(); } if (Style.isVerilog() && Tokens.size() > 0 && _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits