[clang] [C++][Modules][Preprocessor] Clang should not convert a import preprocessing token to contextual keyword if a digraph character following import (PR #191004)

via cfe-commits Wed, 08 Apr 2026 15:20:10 -0700

llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang-modules

@llvm/pr-subscribers-clang

Author: yronglin

<details>
<summary>Changes</summary>

Clang should not convert a import preprocessing token to contextual keyword if 
a digraph character following import.

```cpp
// expected-no-diagnostics
int
import &lt;:10
:&gt;;

void foo() {
    for (int i = 0; i &lt; 10; ++i)
        import[i] = i;
}
```

```cpp
// expected-no-diagnostics
using import = int;

void bar(int);

void foo(int val =
import &lt;%%&gt;
) {
   bar(val);
}
```

Fixes https://github.com/llvm/llvm-project/issues/190693.

---
Full diff: https://github.com/llvm/llvm-project/pull/191004.diff


5 Files Affected:

- (modified) clang/docs/ReleaseNotes.rst (+1) 
- (modified) clang/include/clang/Lex/Lexer.h (+4) 
- (modified) clang/lib/Lex/Lexer.cpp (+17-9) 
- (modified) clang/lib/Lex/Preprocessor.cpp (+27-6) 
- (modified) clang/test/CXX/module/cpp.pre/p1.cpp (+26) 


``````````diff
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 2da7175b51ea3..9c0155265874b 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -406,6 +406,7 @@ Bug Fixes in This Version
 - Fixed a crash on _BitInt(N) arrays where 129 ≤ N ≤ 192 due to incorrect 
array filler lowering. (#GH189643)
 - Fixed the behavior in C23 of ``auto``, by emitting an error when an array 
type is specified for a ``char *``. (#GH162694)
 - Fixed incorrect rejection of ``auto`` with reordered declaration specifiers 
in C23. (#GH164121)
+- Fixed incorrect handling of C++ import preprocessing token when a digraph 
character after import. (#GH190693)
 
 Bug Fixes to Compiler Builtins
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/include/clang/Lex/Lexer.h b/clang/include/clang/Lex/Lexer.h
index 0459a863bc08d..8e4cc7a95b327 100644
--- a/clang/include/clang/Lex/Lexer.h
+++ b/clang/include/clang/Lex/Lexer.h
@@ -732,6 +732,10 @@ class Lexer : public PreprocessorLexer {
   /// otherwise return P.
   static const char *SkipEscapedNewLines(const char *P);
 
+  /// SkipHorizontalWhitespace - Skip the horizontak whitespace characters and
+  /// returns the advanced pointer.
+  static const char *SkipHorizontalWhitespace(const char *Ptr);
+
   /// getCharAndSizeSlowNoWarn - Same as getCharAndSizeSlow, but never emits a
   /// diagnostic.
   static SizedChar getCharAndSizeSlowNoWarn(const char *Ptr,
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index 10246552bb13d..29caeb943e3df 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -1336,6 +1336,18 @@ const char *Lexer::SkipEscapedNewLines(const char *P) {
   }
 }
 
+const char *Lexer::SkipHorizontalWhitespace(const char *Ptr) {
+  // Small amounts of horizontal whitespace is very common between tokens.
+  // Check for space character separately to skip the expensive
+  // isHorizontalWhitespace() check
+  if (*Ptr == ' ' || isHorizontalWhitespace(*Ptr)) {
+    do {
+      ++Ptr;
+    } while (*Ptr == ' ' || isHorizontalWhitespace(*Ptr));
+  }
+  return Ptr;
+}
+
 std::optional<Token> Lexer::findNextToken(SourceLocation Loc,
                                           const SourceManager &SM,
                                           const LangOptions &LangOpts,
@@ -3764,16 +3776,12 @@ bool Lexer::LexTokenInternal(Token &Result) {
   assert(!Result.hasPtrData() && "Result has not been reset");
 
   // CurPtr - Cache BufferPtr in an automatic variable.
-  const char *CurPtr = BufferPtr;
-
-  // Small amounts of horizontal whitespace is very common between tokens.
-  // Check for space character separately to skip the expensive
-  // isHorizontalWhitespace() check
-  if (*CurPtr == ' ' || isHorizontalWhitespace(*CurPtr)) {
-    do {
-      ++CurPtr;
-    } while (*CurPtr == ' ' || isHorizontalWhitespace(*CurPtr));
+  const char *CurPtr = SkipHorizontalWhitespace(BufferPtr);
 
+  /// CurPtr has been advanced forward, indicating that a horizontal whitespace
+  /// character has been encountered. Check if the Lexer is in keep whitespace
+  /// mode.
+  if (CurPtr != BufferPtr) {
     // If we are keeping whitespace and other tokens, just return what we just
     // skipped.  The next lexer invocation will return the token after the
     // whitespace.
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index c430da67c1469..4130e64be855e 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -1380,13 +1380,34 @@ bool Preprocessor::HandleModuleContextualKeyword(Token 
&Result) {
   llvm::SaveAndRestore<bool> SavedParsingPreprocessorDirective(
       CurPPLexer->ParsingPreprocessorDirective, true);
 
-  // The next token may be an angled string literal after import keyword.
-  llvm::SaveAndRestore<bool> SavedParsingFilemame(
-      CurPPLexer->ParsingFilename,
-      Result.getIdentifierInfo()->isImportKeyword());
+  bool ParsingFilename = false;
+  if (Result.getIdentifierInfo()->isImportKeyword()) {
+    if (getLangOpts().Digraphs && CurLexer &&
+        CurLexer->getCurrentBufferOffset() + 2 < CurLexer->getBuffer().size()) 
{
+      // If the import preprocessing token folled by a digraph character '<:',
+      // the import preprocessing should not traited as a import contextual
+      // keyword. Eg.
+      //    int
+      //    import <:10
+      //    :>;
+      //
+      // This is a array definition, and equivalent to:
+      //
+      //    int import[10];
+      const char *CurPtr = CurLexer->getBufferLocation();
+      CurPtr = Lexer::SkipHorizontalWhitespace(CurPtr);
+      auto C0 = Lexer::getCharAndSizeNoWarn(CurPtr, getLangOpts());
+      auto C1 = Lexer::getCharAndSizeNoWarn(CurPtr + C0.Size, getLangOpts());
+      if (C0.Char == '<' && (C1.Char == ':' || C1.Char == '%'))
+        return false;
+    }
+    ParsingFilename = true;
+  }
 
-  std::optional<Token> NextTok =
-      CurLexer ? CurLexer->peekNextPPToken() : 
CurTokenLexer->peekNextPPToken();
+  // The next token may be an angled string literal after import keyword.
+  llvm::SaveAndRestore<bool> SavedParsingFilemame(CurPPLexer->ParsingFilename,
+                                                  ParsingFilename);
+  std::optional<Token> NextTok = peekNextPPToken();
   if (!NextTok)
     return false;
 
diff --git a/clang/test/CXX/module/cpp.pre/p1.cpp 
b/clang/test/CXX/module/cpp.pre/p1.cpp
index 989915004ff57..0e2fb65390e99 100644
--- a/clang/test/CXX/module/cpp.pre/p1.cpp
+++ b/clang/test/CXX/module/cpp.pre/p1.cpp
@@ -38,6 +38,8 @@
 // RUN: %clang_cc1 -std=c++20 %t/func_like_macro.cpp -D'm(x)=x' -fsyntax-only 
-verify
 // RUN: %clang_cc1 -std=c++20 %t/lparen.cpp -D'm(x)=x' -D'LPAREN=(' 
-fsyntax-only -verify
 // RUN: %clang_cc1 -std=c++20 %t/control_line.cpp -fsyntax-only -verify
+// RUN: %clang_cc1 -std=c++20 %t/digraph.cpp -fsyntax-only -verify
+// RUN: %clang_cc1 -std=c++20 %t/digraph2.cpp -fsyntax-only -verify
 
 
 //--- hash.cpp
@@ -205,3 +207,27 @@ export module m; // expected-error {{module directive 
lines are not allowed on l
                  // expected-error {{module declaration must occur at the 
start of the translation unit}} \
                  // expected-note@#1 {{add 'module;'}}
 #endif
+
+//--- digraph.cpp
+// expected-no-diagnostics
+int
+import <:10
+:>;
+
+void foo() {
+    for (int i = 0; i < 10; ++i)
+        import[i] = i;
+}
+
+//--- digraph2.cpp
+// expected-no-diagnostics
+using import = int;
+
+void bar(int);
+
+void foo(int val =
+import <%%>
+) {
+   bar(val);
+}
+

``````````

</details>


https://github.com/llvm/llvm-project/pull/191004
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [C++][Modules][Preprocessor] Clang should not convert a import preprocessing token to contextual keyword if a digraph character following import (PR #191004)

Reply via email to