https://github.com/hbatagelo updated 
https://github.com/llvm/llvm-project/pull/196861

>From a740c3c7b75ef1c00e4ae974be4a3227e0b5d1ec Mon Sep 17 00:00:00 2001
From: Harlen Batagelo <[email protected]>
Date: Sun, 10 May 2026 21:49:41 -0300
Subject: [PATCH 1/2] Synthesize missing eof token

---
 clang/lib/Tooling/Syntax/Tokens.cpp           | 10 +++++
 clang/unittests/Tooling/Syntax/TokensTest.cpp | 42 +++++++++++++++++--
 2 files changed, 49 insertions(+), 3 deletions(-)

diff --git a/clang/lib/Tooling/Syntax/Tokens.cpp 
b/clang/lib/Tooling/Syntax/Tokens.cpp
index 260654a0701fd..e6ca70e0cfb29 100644
--- a/clang/lib/Tooling/Syntax/Tokens.cpp
+++ b/clang/lib/Tooling/Syntax/Tokens.cpp
@@ -712,6 +712,16 @@ class TokenCollector::Builder {
 
   TokenBuffer build() && {
     assert(!Result.ExpandedTokens.empty());
+
+    // When the parser hits a hard limit (e.g. bracket depth or function scope
+    // depth), it halts prematurely and leaves the expanded token stream
+    // truncated with no final `eof` token. To keep the invariant, synthesize 
an
+    // `eof` at the location of the last collected token.
+    if (Result.ExpandedTokens.back().kind() != tok::eof) {
+      SourceLocation Loc = Result.ExpandedTokens.back().location();
+      Result.ExpandedTokens.emplace_back(Loc, 0, tok::eof);
+    }
+
     assert(Result.ExpandedTokens.back().kind() == tok::eof);
 
     // Tokenize every file that contributed tokens to the expanded stream.
diff --git a/clang/unittests/Tooling/Syntax/TokensTest.cpp 
b/clang/unittests/Tooling/Syntax/TokensTest.cpp
index 468ca5ddd2c75..ae84bda5b228b 100644
--- a/clang/unittests/Tooling/Syntax/TokensTest.cpp
+++ b/clang/unittests/Tooling/Syntax/TokensTest.cpp
@@ -92,7 +92,8 @@ class TokenCollectorTest : public ::testing::Test {
   /// Run the clang frontend, collect the preprocessed tokens from the frontend
   /// invocation and store them in this->Buffer.
   /// This also clears SourceManager before running the compiler.
-  void recordTokens(llvm::StringRef Code) {
+  void recordTokens(llvm::StringRef Code,
+                    llvm::ArrayRef<const char *> ExtraArgs = {}) {
     class RecordTokens : public ASTFrontendAction {
     public:
       explicit RecordTokens(TokenBuffer &Result) : Result(Result) {}
@@ -123,8 +124,10 @@ class TokenCollectorTest : public ::testing::Test {
     // Prepare to run a compiler.
     if (!Diags->getClient())
       Diags->setClient(new IgnoringDiagConsumer);
-    std::vector<const char *> Args = {"tok-test", "-std=c++03", 
"-fsyntax-only",
-                                      FileName};
+    std::vector<const char *> Args = {"tok-test", "-std=c++03",
+                                      "-fsyntax-only"};
+    Args.insert(Args.end(), ExtraArgs.begin(), ExtraArgs.end());
+    Args.push_back(FileName);
     CreateInvocationOptions CIOpts;
     CIOpts.Diags = Diags;
     CIOpts.VFS = FS;
@@ -1148,4 +1151,37 @@ TEST_F(TokenCollectorTest, Pragmas) {
     }
   )cpp");
 }
+
+TEST_F(TokenBufferTest, EofTokenOnFunctionScopeDepthLimit) {
+  static_assert(ParmVarDecl::getMaxFunctionScopeDepth() == 127,
+                "Test input relies on a max depth of 127");
+
+  // Force parser to bail out due to exceeding the function scope depth limit.
+  // https://github.com/llvm/llvm-project/issues/196244
+  recordTokens(R"cpp(
+    #define L [](int=
+    #define L4 L L L L
+    #define L16 L4 L4 L4 L4
+    #define L64 L16 L16 L16 L16
+
+    void foo() {
+      L64 L64 L
+    }
+  )cpp");
+
+  ASSERT_GE(Buffer.expandedTokens().size(), 2u);
+  // The stream is truncated but ends with an `eof`.
+  EXPECT_EQ(Buffer.expandedTokens().back().kind(), tok::eof);
+  EXPECT_EQ(Buffer.expandedTokens().drop_back().back().kind(), tok::kw_int);
+}
+
+TEST_F(TokenBufferTest, EofTokenOnBracketDepthLimit) {
+  // Force parser to bail out due to exceeding the bracket depth limit.
+  recordTokens("((;", {"-fbracket-depth=1"});
+
+  ASSERT_GE(Buffer.expandedTokens().size(), 2u);
+  // The stream is truncated but ends with an `eof`.
+  EXPECT_EQ(Buffer.expandedTokens().back().kind(), tok::eof);
+  EXPECT_EQ(Buffer.expandedTokens().drop_back().back().kind(), tok::l_paren);
+}
 } // namespace

>From 81e668b1ec63858884237789342604f55d4d76de Mon Sep 17 00:00:00 2001
From: Harlen Batagelo <[email protected]>
Date: Mon, 11 May 2026 10:30:10 -0300
Subject: [PATCH 2/2] Remove redundant test case and assertion

---
 clang/lib/Tooling/Syntax/Tokens.cpp           |  2 --
 clang/unittests/Tooling/Syntax/TokensTest.cpp | 23 -------------------
 2 files changed, 25 deletions(-)

diff --git a/clang/lib/Tooling/Syntax/Tokens.cpp 
b/clang/lib/Tooling/Syntax/Tokens.cpp
index e6ca70e0cfb29..9ad8a149675d9 100644
--- a/clang/lib/Tooling/Syntax/Tokens.cpp
+++ b/clang/lib/Tooling/Syntax/Tokens.cpp
@@ -722,8 +722,6 @@ class TokenCollector::Builder {
       Result.ExpandedTokens.emplace_back(Loc, 0, tok::eof);
     }
 
-    assert(Result.ExpandedTokens.back().kind() == tok::eof);
-
     // Tokenize every file that contributed tokens to the expanded stream.
     buildSpelledTokens();
 
diff --git a/clang/unittests/Tooling/Syntax/TokensTest.cpp 
b/clang/unittests/Tooling/Syntax/TokensTest.cpp
index ae84bda5b228b..8af9308828c28 100644
--- a/clang/unittests/Tooling/Syntax/TokensTest.cpp
+++ b/clang/unittests/Tooling/Syntax/TokensTest.cpp
@@ -1152,29 +1152,6 @@ TEST_F(TokenCollectorTest, Pragmas) {
   )cpp");
 }
 
-TEST_F(TokenBufferTest, EofTokenOnFunctionScopeDepthLimit) {
-  static_assert(ParmVarDecl::getMaxFunctionScopeDepth() == 127,
-                "Test input relies on a max depth of 127");
-
-  // Force parser to bail out due to exceeding the function scope depth limit.
-  // https://github.com/llvm/llvm-project/issues/196244
-  recordTokens(R"cpp(
-    #define L [](int=
-    #define L4 L L L L
-    #define L16 L4 L4 L4 L4
-    #define L64 L16 L16 L16 L16
-
-    void foo() {
-      L64 L64 L
-    }
-  )cpp");
-
-  ASSERT_GE(Buffer.expandedTokens().size(), 2u);
-  // The stream is truncated but ends with an `eof`.
-  EXPECT_EQ(Buffer.expandedTokens().back().kind(), tok::eof);
-  EXPECT_EQ(Buffer.expandedTokens().drop_back().back().kind(), tok::kw_int);
-}
-
 TEST_F(TokenBufferTest, EofTokenOnBracketDepthLimit) {
   // Force parser to bail out due to exceeding the bracket depth limit.
   recordTokens("((;", {"-fbracket-depth=1"});

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to