llvmbot wrote:

<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang-tools-extra

@llvm/pr-subscribers-clangd

Author: None (yronglin)

<details>
<summary>Changes</summary>

- Since https://github.com/llvm/llvm-project/pull/173130, clang convert module 
name pp-token sequence into a annot_module_name token for C++20 module/import 
directive. This PR follows the changes and correct the module name handling in 
clangd.
- Propagate and record the EOF token when the parser hit an module load fatal 
error and try to cutOffParsing. (This is the root cause of clangd crash issue).

Fixes https://github.com/llvm/llvm-project/issues/181358.

---
Full diff: https://github.com/llvm/llvm-project/pull/187858.diff


5 Files Affected:

- (added) clang-tools-extra/clangd/test/non-existent.test (+82) 
- (modified) clang/include/clang/Lex/Preprocessor.h (+9) 
- (modified) clang/lib/Parse/Parser.cpp (+2) 
- (modified) clang/lib/Tooling/Syntax/Tokens.cpp (+11-2) 
- (modified) clang/unittests/Tooling/Syntax/TokensTest.cpp (+34-4) 


``````````diff
diff --git a/clang-tools-extra/clangd/test/non-existent.test 
b/clang-tools-extra/clangd/test/non-existent.test
new file mode 100644
index 0000000000000..dd46ce25bc884
--- /dev/null
+++ b/clang-tools-extra/clangd/test/non-existent.test
@@ -0,0 +1,82 @@
+#
+## reproduce a crash in module processing - seems having a non-existent module 
imported
+## as the last statement in a file causes clangd to crash
+## modified from modules.test
+#
+# Windows have different escaping modes.
+# FIXME: We should add one for windows.
+# UNSUPPORTED: system-windows
+#
+# RUN: rm -fr %t
+# RUN: mkdir -p %t
+# RUN: split-file %s %t
+#
+# RUN: sed -e "s|DIR|%/t|g" %t/compile_commands.json.tmpl > 
%t/compile_commands.json.tmp
+# RUN: sed -e "s|CLANG_CC|%clang|g" %t/compile_commands.json.tmp > 
%t/compile_commands.json
+# RUN: sed -e "s|DIR|%/t|g" %t/definition.jsonrpc.tmpl > %t/definition.jsonrpc
+#
+# RUN: clangd -experimental-modules-support -lit-test < %t/definition.jsonrpc
+
+#--- A.cppm
+module;
+export module A;
+
+#--- Use.cpp
+module;
+export module Use;
+import A;
+import NonExistent;
+
+#--- compile_commands.json.tmpl
+[
+    {
+      "directory": "DIR",
+      "command": "CLANG_CC -fprebuilt-module-path=DIR -std=c++20 -o 
DIR/main.cpp.o  DIR/Use.cpp  -fmodule-file=A=DIR/A.pcm",
+      "file": "DIR/Use.cpp"
+      "output": "DIR/main.cpp.o"
+    },
+    {
+      "directory": "DIR",
+      "command": "CLANG_CC -fprebuilt-module-path=DIR --std=c++20 DIR/A.cppm 
--precompile -o DIR/A.pcm",
+      "file": "DIR/A.cppm"
+      "output": "DIR/A.pcm"
+    }
+]
+
+#--- definition.jsonrpc.tmpl
+{
+  "jsonrpc": "2.0",
+  "id": 0,
+  "method": "initialize",
+  "params": {
+    "processId": 123,
+    "rootPath": "clangd",
+    "capabilities": {
+      "textDocument": {
+        "completion": {
+          "completionItem": {
+            "snippetSupport": true
+          }
+        }
+      }
+    },
+    "trace": "off"
+  }
+}
+---
+{
+  "jsonrpc": "2.0",
+  "method": "textDocument/didOpen",
+  "params": {
+    "textDocument": {
+      "uri": "file://DIR/Use.cpp",
+      "languageId": "cpp",
+      "version": 1,
+      "text": "module;\nexport module Use;\nimport A;\nimport NonExistent;\n"
+    }
+  }
+}
+---
+{"jsonrpc":"2.0","id":2,"method":"shutdown"}
+---
+{"jsonrpc":"2.0","method":"exit"}
diff --git a/clang/include/clang/Lex/Preprocessor.h 
b/clang/include/clang/Lex/Preprocessor.h
index c7e152a75f51f..8e7fc9a5d92ef 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -339,6 +339,9 @@ class Preprocessor {
   /// True to avoid tearing down the lexer etc on EOF
   bool IncrementalProcessing = false;
 
+  /// True if the curOfLexing was called
+  bool IsCutOffLexing = false;
+
 public:
   /// The kind of translation unit we are processing.
   const TranslationUnitKind TUKind;
@@ -1331,6 +1334,12 @@ class Preprocessor {
   /// false if it is producing tokens to be consumed by Parse and Sema.
   bool isPreprocessedOutput() const { return PreprocessedOutput; }
 
+  void cutOffLexing() {
+    assert(LexLevel == 0 && "Must called in top-level");
+    if (CurLexer)
+      CurLexer->cutOffLexing();
+  }
+
   /// Return true if we are lexing directly from the specified lexer.
   bool isCurrentLexer(const PreprocessorLexer *L) const {
     return CurPPLexer == L;
diff --git a/clang/lib/Parse/Parser.cpp b/clang/lib/Parse/Parser.cpp
index 5d18414b1a746..e8343b619785f 100644
--- a/clang/lib/Parse/Parser.cpp
+++ b/clang/lib/Parse/Parser.cpp
@@ -2447,6 +2447,8 @@ Decl *Parser::ParseModuleImport(SourceLocation AtLoc,
   if (PP.hadModuleLoaderFatalFailure()) {
     // With a fatal failure in the module loader, we abort parsing.
     cutOffParsing();
+    PP.cutOffLexing();
+    ConsumeAnyToken();
     return nullptr;
   }
 
diff --git a/clang/lib/Tooling/Syntax/Tokens.cpp 
b/clang/lib/Tooling/Syntax/Tokens.cpp
index 260654a0701fd..863e25fbd2d6f 100644
--- a/clang/lib/Tooling/Syntax/Tokens.cpp
+++ b/clang/lib/Tooling/Syntax/Tokens.cpp
@@ -681,8 +681,18 @@ class TokenCollector::CollectPPExpansions : public 
PPCallbacks {
 TokenCollector::TokenCollector(Preprocessor &PP) : PP(PP) {
   // Collect the expanded token stream during preprocessing.
   PP.setTokenWatcher([this](const clang::Token &T) {
-    if (T.isAnnotation())
+    if (T.is(tok::annot_module_name)) {
+      auto &SM = this->PP.getSourceManager();
+      StringRef Text = Lexer::getSourceText(
+          CharSourceRange::getTokenRange(T.getAnnotationRange()), SM,
+          this->PP.getLangOpts());
+      Expanded.push_back(
+          syntax::Token(T.getLocation(), Text.size(), tok::annot_module_name));
+    } else if (T.isAnnotation()) {
       return;
+    } else {
+      Expanded.push_back(syntax::Token(T));
+    }
     DEBUG_WITH_TYPE("collect-tokens", llvm::dbgs()
                                           << "Token: "
                                           << syntax::Token(T).dumpForTests(
@@ -690,7 +700,6 @@ TokenCollector::TokenCollector(Preprocessor &PP) : PP(PP) {
                                           << "\n"
 
     );
-    Expanded.push_back(syntax::Token(T));
   });
   // And locations of macro calls, to properly recover boundaries of those in
   // case of empty expansions.
diff --git a/clang/unittests/Tooling/Syntax/TokensTest.cpp 
b/clang/unittests/Tooling/Syntax/TokensTest.cpp
index 468ca5ddd2c75..a43d71142dd59 100644
--- a/clang/unittests/Tooling/Syntax/TokensTest.cpp
+++ b/clang/unittests/Tooling/Syntax/TokensTest.cpp
@@ -16,6 +16,7 @@
 #include "clang/Basic/FileSystemOptions.h"
 #include "clang/Basic/LLVM.h"
 #include "clang/Basic/LangOptions.h"
+#include "clang/Basic/LangStandard.h"
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Basic/TokenKinds.def"
@@ -39,6 +40,7 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Testing/Annotations/Annotations.h"
 #include "llvm/Testing/Support/SupportHelpers.h"
+#include "gmock/gmock.h"
 #include <cassert>
 #include <cstdlib>
 #include <gmock/gmock.h>
@@ -123,8 +125,8 @@ class TokenCollectorTest : public ::testing::Test {
     // Prepare to run a compiler.
     if (!Diags->getClient())
       Diags->setClient(new IgnoringDiagConsumer);
-    std::vector<const char *> Args = {"tok-test", "-std=c++03", 
"-fsyntax-only",
-                                      FileName};
+    std::vector<const char *> Args = {"tok-test", LangStandard.c_str(),
+                                      "-fsyntax-only", FileName};
     CreateInvocationOptions CIOpts;
     CIOpts.Diags = Diags;
     CIOpts.VFS = FS;
@@ -141,8 +143,11 @@ class TokenCollectorTest : public ::testing::Test {
 
     this->Buffer = TokenBuffer(*SourceMgr);
     RecordTokens Recorder(this->Buffer);
-    ASSERT_TRUE(Compiler.ExecuteAction(Recorder))
-        << "failed to run the frontend";
+    if (AllowErrors)
+      Compiler.ExecuteAction(Recorder);
+    else
+      ASSERT_TRUE(Compiler.ExecuteAction(Recorder))
+          << "failed to run the frontend";
   }
 
   /// Record the tokens and return a test dump of the resulting buffer.
@@ -250,6 +255,8 @@ class TokenCollectorTest : public ::testing::Test {
   }
 
   // Data fields.
+  std::string LangStandard = "-std=c++03";
+  bool AllowErrors = false;
   DiagnosticOptions DiagOpts;
   llvm::IntrusiveRefCntPtr<DiagnosticsEngine> Diags =
       llvm::makeIntrusiveRefCnt<DiagnosticsEngine>(DiagnosticIDs::create(),
@@ -1148,4 +1155,27 @@ TEST_F(TokenCollectorTest, Pragmas) {
     }
   )cpp");
 }
+
+TEST_F(TokenCollectorTest, CXX20ModuleImportModule) {
+  LangStandard = "-std=c++20";
+  AllowErrors = true;
+
+  recordTokens("import Non.Existent;\n");
+  EXPECT_THAT(Buffer.expandedTokens(),
+              ElementsAre(Kind(tok::kw_import), Kind(tok::annot_module_name),
+                          Kind(tok::semi), Kind(tok::eof)));
+}
+
+TEST_F(TokenCollectorTest, CXX20ModuleImportPartition) {
+  LangStandard = "-std=c++20";
+  AllowErrors = true;
+
+  recordTokens("export module M.N;\nimport :Non.Existent;\n");
+  EXPECT_THAT(Buffer.expandedTokens(),
+              ElementsAre(Kind(tok::kw_export), Kind(tok::kw_module),
+                          Kind(tok::annot_module_name), Kind(tok::semi),
+                          Kind(tok::kw_import), Kind(tok::colon),
+                          Kind(tok::annot_module_name), Kind(tok::semi),
+                          Kind(tok::eof)));
+}
 } // namespace

``````````

</details>


https://github.com/llvm/llvm-project/pull/187858
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to