https://github.com/yronglin created
https://github.com/llvm/llvm-project/pull/191004
Clang should not convert a import preprocessing token to contextual keyword if
a digraph character following import.
```cpp
// expected-no-diagnostics
int
import <:10
:>;
void foo() {
for (int i = 0; i < 10; ++i)
import[i] = i;
}
```
```cpp
// expected-no-diagnostics
using import = int;
void bar(int);
void foo(int val =
import <%%>
) {
bar(val);
}
```
Fixes https://github.com/llvm/llvm-project/issues/190693.
>From 373880aed203efd8521dfb76a3f52fedee2592dc Mon Sep 17 00:00:00 2001
From: yronglin <[email protected]>
Date: Thu, 9 Apr 2026 00:19:55 +0800
Subject: [PATCH] [C++][Modules][Preprocessor] Clang should not convert a
import preprocessing token to contextual keyword if a digraph character
following import
Signed-off-by: yronglin <[email protected]>
---
clang/docs/ReleaseNotes.rst | 1 +
clang/include/clang/Lex/Lexer.h | 4 ++++
clang/lib/Lex/Lexer.cpp | 26 ++++++++++++++--------
clang/lib/Lex/Preprocessor.cpp | 33 +++++++++++++++++++++++-----
clang/test/CXX/module/cpp.pre/p1.cpp | 26 ++++++++++++++++++++++
5 files changed, 75 insertions(+), 15 deletions(-)
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 2da7175b51ea3..9c0155265874b 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -406,6 +406,7 @@ Bug Fixes in This Version
- Fixed a crash on _BitInt(N) arrays where 129 ≤ N ≤ 192 due to incorrect
array filler lowering. (#GH189643)
- Fixed the behavior in C23 of ``auto``, by emitting an error when an array
type is specified for a ``char *``. (#GH162694)
- Fixed incorrect rejection of ``auto`` with reordered declaration specifiers
in C23. (#GH164121)
+- Fixed incorrect handling of C++ import preprocessing token when a digraph
character after import. (#GH190693)
Bug Fixes to Compiler Builtins
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/include/clang/Lex/Lexer.h b/clang/include/clang/Lex/Lexer.h
index 0459a863bc08d..8e4cc7a95b327 100644
--- a/clang/include/clang/Lex/Lexer.h
+++ b/clang/include/clang/Lex/Lexer.h
@@ -732,6 +732,10 @@ class Lexer : public PreprocessorLexer {
/// otherwise return P.
static const char *SkipEscapedNewLines(const char *P);
+ /// SkipHorizontalWhitespace - Skip the horizontak whitespace characters and
+ /// returns the advanced pointer.
+ static const char *SkipHorizontalWhitespace(const char *Ptr);
+
/// getCharAndSizeSlowNoWarn - Same as getCharAndSizeSlow, but never emits a
/// diagnostic.
static SizedChar getCharAndSizeSlowNoWarn(const char *Ptr,
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index 10246552bb13d..29caeb943e3df 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -1336,6 +1336,18 @@ const char *Lexer::SkipEscapedNewLines(const char *P) {
}
}
+const char *Lexer::SkipHorizontalWhitespace(const char *Ptr) {
+ // Small amounts of horizontal whitespace is very common between tokens.
+ // Check for space character separately to skip the expensive
+ // isHorizontalWhitespace() check
+ if (*Ptr == ' ' || isHorizontalWhitespace(*Ptr)) {
+ do {
+ ++Ptr;
+ } while (*Ptr == ' ' || isHorizontalWhitespace(*Ptr));
+ }
+ return Ptr;
+}
+
std::optional<Token> Lexer::findNextToken(SourceLocation Loc,
const SourceManager &SM,
const LangOptions &LangOpts,
@@ -3764,16 +3776,12 @@ bool Lexer::LexTokenInternal(Token &Result) {
assert(!Result.hasPtrData() && "Result has not been reset");
// CurPtr - Cache BufferPtr in an automatic variable.
- const char *CurPtr = BufferPtr;
-
- // Small amounts of horizontal whitespace is very common between tokens.
- // Check for space character separately to skip the expensive
- // isHorizontalWhitespace() check
- if (*CurPtr == ' ' || isHorizontalWhitespace(*CurPtr)) {
- do {
- ++CurPtr;
- } while (*CurPtr == ' ' || isHorizontalWhitespace(*CurPtr));
+ const char *CurPtr = SkipHorizontalWhitespace(BufferPtr);
+ /// CurPtr has been advanced forward, indicating that a horizontal whitespace
+ /// character has been encountered. Check if the Lexer is in keep whitespace
+ /// mode.
+ if (CurPtr != BufferPtr) {
// If we are keeping whitespace and other tokens, just return what we just
// skipped. The next lexer invocation will return the token after the
// whitespace.
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index c430da67c1469..4130e64be855e 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -1380,13 +1380,34 @@ bool Preprocessor::HandleModuleContextualKeyword(Token
&Result) {
llvm::SaveAndRestore<bool> SavedParsingPreprocessorDirective(
CurPPLexer->ParsingPreprocessorDirective, true);
- // The next token may be an angled string literal after import keyword.
- llvm::SaveAndRestore<bool> SavedParsingFilemame(
- CurPPLexer->ParsingFilename,
- Result.getIdentifierInfo()->isImportKeyword());
+ bool ParsingFilename = false;
+ if (Result.getIdentifierInfo()->isImportKeyword()) {
+ if (getLangOpts().Digraphs && CurLexer &&
+ CurLexer->getCurrentBufferOffset() + 2 < CurLexer->getBuffer().size())
{
+ // If the import preprocessing token folled by a digraph character '<:',
+ // the import preprocessing should not traited as a import contextual
+ // keyword. Eg.
+ // int
+ // import <:10
+ // :>;
+ //
+ // This is a array definition, and equivalent to:
+ //
+ // int import[10];
+ const char *CurPtr = CurLexer->getBufferLocation();
+ CurPtr = Lexer::SkipHorizontalWhitespace(CurPtr);
+ auto C0 = Lexer::getCharAndSizeNoWarn(CurPtr, getLangOpts());
+ auto C1 = Lexer::getCharAndSizeNoWarn(CurPtr + C0.Size, getLangOpts());
+ if (C0.Char == '<' && (C1.Char == ':' || C1.Char == '%'))
+ return false;
+ }
+ ParsingFilename = true;
+ }
- std::optional<Token> NextTok =
- CurLexer ? CurLexer->peekNextPPToken() :
CurTokenLexer->peekNextPPToken();
+ // The next token may be an angled string literal after import keyword.
+ llvm::SaveAndRestore<bool> SavedParsingFilemame(CurPPLexer->ParsingFilename,
+ ParsingFilename);
+ std::optional<Token> NextTok = peekNextPPToken();
if (!NextTok)
return false;
diff --git a/clang/test/CXX/module/cpp.pre/p1.cpp
b/clang/test/CXX/module/cpp.pre/p1.cpp
index 989915004ff57..0e2fb65390e99 100644
--- a/clang/test/CXX/module/cpp.pre/p1.cpp
+++ b/clang/test/CXX/module/cpp.pre/p1.cpp
@@ -38,6 +38,8 @@
// RUN: %clang_cc1 -std=c++20 %t/func_like_macro.cpp -D'm(x)=x' -fsyntax-only
-verify
// RUN: %clang_cc1 -std=c++20 %t/lparen.cpp -D'm(x)=x' -D'LPAREN=('
-fsyntax-only -verify
// RUN: %clang_cc1 -std=c++20 %t/control_line.cpp -fsyntax-only -verify
+// RUN: %clang_cc1 -std=c++20 %t/digraph.cpp -fsyntax-only -verify
+// RUN: %clang_cc1 -std=c++20 %t/digraph2.cpp -fsyntax-only -verify
//--- hash.cpp
@@ -205,3 +207,27 @@ export module m; // expected-error {{module directive
lines are not allowed on l
// expected-error {{module declaration must occur at the
start of the translation unit}} \
// expected-note@#1 {{add 'module;'}}
#endif
+
+//--- digraph.cpp
+// expected-no-diagnostics
+int
+import <:10
+:>;
+
+void foo() {
+ for (int i = 0; i < 10; ++i)
+ import[i] = i;
+}
+
+//--- digraph2.cpp
+// expected-no-diagnostics
+using import = int;
+
+void bar(int);
+
+void foo(int val =
+import <%%>
+) {
+ bar(val);
+}
+
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits