https://github.com/Tsche updated https://github.com/llvm/llvm-project/pull/200614
>From 4f0d13f79cfe3af808609a85ca3c8ff9df8b8f88 Mon Sep 17 00:00:00 2001 From: Matthias Wippich <[email protected]> Date: Fri, 22 May 2026 04:17:22 +0200 Subject: [PATCH 1/4] initial attempt at allowing $ in macros unconditionally --- .../include/clang/Basic/DiagnosticLexKinds.td | 3 ++ clang/include/clang/Basic/IdentifierTable.h | 15 +++++++- clang/include/clang/Lex/Lexer.h | 3 +- clang/include/clang/Lex/Preprocessor.h | 2 ++ clang/lib/Lex/Lexer.cpp | 34 +++++++++---------- clang/lib/Lex/Preprocessor.cpp | 21 ++++++++++-- clang/test/Lexer/dollar-macros.cpp | 27 +++++++++++++++ 7 files changed, 83 insertions(+), 22 deletions(-) create mode 100644 clang/test/Lexer/dollar-macros.cpp diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td index 383bf1a7fdb3f..2eb90f788b122 100644 --- a/clang/include/clang/Basic/DiagnosticLexKinds.td +++ b/clang/include/clang/Basic/DiagnosticLexKinds.td @@ -55,6 +55,9 @@ def ext_line_comment : Extension< def warn_no_newline_eof : Warning<"no newline at end of file">, InGroup<NewlineEOF>, DefaultIgnore; +def err_dollar_in_identifier : Error< + "identifier '%0' is not supported with the current configuration">; + def ext_dollar_in_identifier : Extension<"'$' in identifier">, InGroup<DiagGroup<"dollar-in-identifier-extension">>; def ext_charize_microsoft : Extension< diff --git a/clang/include/clang/Basic/IdentifierTable.h b/clang/include/clang/Basic/IdentifierTable.h index ff5864bc31927..b7b950910d9b0 100644 --- a/clang/include/clang/Basic/IdentifierTable.h +++ b/clang/include/clang/Basic/IdentifierTable.h @@ -256,7 +256,11 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo { LLVM_PREFERRED_TYPE(bool) unsigned IsKeywordInCpp : 1; - // 21 bits left in a 64-bit word. + // True if this identifier contains a $ + LLVM_PREFERRED_TYPE(bool) + unsigned IsDollarIdentifier : 1; + + // 20 bits left in a 64-bit word. // Managed by the language front-end. void *FETokenInfo = nullptr; @@ -472,6 +476,15 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo { RecomputeNeedsHandleIdentifier(); } + bool isDollarIdentifier() const { return IsDollarIdentifier; } + void setDollarIdentifier(bool Val) { + IsDollarIdentifier = Val; + if (Val) + NeedsHandleIdentifier = true; + else + RecomputeNeedsHandleIdentifier(); + } + /// is/setIsFutureCompatKeyword - Initialize information about whether or not /// this language token is a keyword in a newer or proposed Standard. This /// controls compatibility warnings, and is only true when not parsing the diff --git a/clang/include/clang/Lex/Lexer.h b/clang/include/clang/Lex/Lexer.h index b042e5fb088fa..d3f8ea3720d34 100644 --- a/clang/include/clang/Lex/Lexer.h +++ b/clang/include/clang/Lex/Lexer.h @@ -759,7 +759,8 @@ class Lexer : public PreprocessorLexer { // This function handles both ASCII and Unicode identifiers after // the first codepoint of the identifyier has been parsed. - bool LexIdentifierContinue(Token &Result, const char *CurPtr); + bool LexIdentifierContinue(Token &Result, const char *CurPtr, + bool HasDollar = false); bool LexNumericConstant (Token &Result, const char *CurPtr); bool LexStringLiteral (Token &Result, const char *CurPtr, diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index 8b684e85eb1c1..cf3a04fd24e38 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -2398,6 +2398,8 @@ class Preprocessor { public: void PoisonSEHIdentifiers(bool Poison = true); // Borland + bool HandleDollarIdentifier(Token &Identifier); + /// Callback invoked when the lexer reads an identifier and has /// filled in the tokens IdentifierInfo member. /// diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index 2797212c229f5..ccbc981cad84f 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -1182,7 +1182,7 @@ StringRef Lexer::getImmediateMacroNameForDiagnostics( } bool Lexer::isAsciiIdentifierContinueChar(char c, const LangOptions &LangOpts) { - return isAsciiIdentifierContinue(c, LangOpts.DollarIdents); + return isAsciiIdentifierContinue(c, true); } bool Lexer::isNewLineEscaped(const char *BufferStart, const char *Str) { @@ -1633,7 +1633,7 @@ static bool isAllowedIDChar(uint32_t C, const LangOptions &LangOpts, bool &IsExtension) { if (LangOpts.AsmPreprocessor) { return false; - } else if (LangOpts.DollarIdents && '$' == C) { + } else if ('$' == C) { return true; } else if (LangOpts.CPlusPlus || LangOpts.C23) { // A non-leading codepoint must have the XID_Continue property. @@ -2005,8 +2005,9 @@ fastParseASCIIIdentifier(const char *CurPtr, return CurPtr; } -bool Lexer::LexIdentifierContinue(Token &Result, const char *CurPtr) { - // Match [_A-Za-z0-9]*, we have already matched an identifier start. +bool Lexer::LexIdentifierContinue(Token &Result, const char *CurPtr, + bool HasDollar) { + // Match [_A-Za-z0-9$]*, we have already matched an identifier start. while (true) { @@ -2020,12 +2021,10 @@ bool Lexer::LexIdentifierContinue(Token &Result, const char *CurPtr) { continue; } if (C == '$') { - // If we hit a $ and they are not supported in identifiers, we are done. - if (!LangOpts.DollarIdents) - break; - // Otherwise, emit a diagnostic and continue. if (!isLexingRawMode()) Diag(CurPtr, diag::ext_dollar_in_identifier); + + HasDollar = true; CurPtr = ConsumeChar(CurPtr, Size, Result); continue; } @@ -2048,10 +2047,13 @@ bool Lexer::LexIdentifierContinue(Token &Result, const char *CurPtr) { // Fill in Result.IdentifierInfo and update the token kind, // looking up the identifier in the identifier table. - const IdentifierInfo *II = PP->LookUpIdentifierInfo(Result); + IdentifierInfo *II = PP->LookUpIdentifierInfo(Result); // Note that we have to call PP->LookUpIdentifierInfo() even for code // completion, it writes IdentifierInfo into Result, and callers rely on it. + if (HasDollar) + II->setDollarIdentifier(true); + // If the completion point is at the end of an identifier, we want to treat // the identifier as incomplete even if it resolves to a macro or a keyword. // This allows e.g. 'class^' to complete to 'classifier'. @@ -4084,16 +4086,12 @@ bool Lexer::LexTokenInternal(Token &Result) { MIOpt.ReadToken(); return LexIdentifierContinue(Result, CurPtr); case '$': // $ in identifiers. - if (LangOpts.DollarIdents) { - if (!isLexingRawMode()) - Diag(CurPtr-1, diag::ext_dollar_in_identifier); - // Notify MIOpt that we read a non-whitespace/non-comment token. - MIOpt.ReadToken(); - return LexIdentifierContinue(Result, CurPtr); - } + if (!isLexingRawMode()) + Diag(CurPtr - 1, diag::ext_dollar_in_identifier); - Kind = tok::unknown; - break; + // Notify MIOpt that we read a non-whitespace/non-comment token. + MIOpt.ReadToken(); + return LexIdentifierContinue(Result, CurPtr, true); // C99 6.4.4: Character Constants. case '\'': diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index 1e21b4a94cea3..62c24f0cc0232 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -822,6 +822,22 @@ void Preprocessor::updateOutOfDateIdentifier(const IdentifierInfo &II) const { getExternalSource()->updateOutOfDateIdentifier(II); } +bool Preprocessor::HandleDollarIdentifier(Token &Identifier) { + // skip validation in macros + if (InMacroArgs || (CurLexer && (CurLexer->ParsingPreprocessorDirective || + CurLexer->isLexingRawMode()))) + return true; + + IdentifierInfo *II = Identifier.getIdentifierInfo(); + + // Emit error for $identifiers in configurations that do not allow them to + // persist after phase 6 + if (II && II->isDollarIdentifier() && !getLangOpts().DollarIdents) + Diag(Identifier, diag::err_dollar_in_identifier) << II->getName(); + + return true; +} + /// HandleIdentifier - This callback is invoked when the lexer reads an /// identifier. This callback looks up the identifier in the map and/or /// potentially macro expands it or turns it into a named token (like 'for'). @@ -870,7 +886,8 @@ bool Preprocessor::HandleIdentifier(Token &Identifier) { // C99 6.10.3p10: If the preprocessing token immediately after the // macro name isn't a '(', this macro should not be expanded. if (!MI->isFunctionLike() || isNextPPTokenOneOf(tok::l_paren)) - return HandleMacroExpandedIdentifier(Identifier, MD); + return HandleMacroExpandedIdentifier(Identifier, MD) && + HandleDollarIdentifier(Identifier); } else { // C99 6.10.3.4p2 says that a disabled macro may never again be // expanded, even if it's in a context where it could be expanded in the @@ -917,7 +934,7 @@ bool Preprocessor::HandleIdentifier(Token &Identifier) { return hadModuleLoaderFatalFailure(); } - return true; + return HandleDollarIdentifier(Identifier); } void Preprocessor::Lex(Token &Result) { diff --git a/clang/test/Lexer/dollar-macros.cpp b/clang/test/Lexer/dollar-macros.cpp new file mode 100644 index 0000000000000..cd91815d409ba --- /dev/null +++ b/clang/test/Lexer/dollar-macros.cpp @@ -0,0 +1,27 @@ +// RUN: %clang_cc1 %s -verify -fsyntax-only -fno-dollar-idents + +#define f(x) #x +#define g(x) f(x) +#define foo bar + +constexpr auto expanded = g($foo); +static_assert(expanded[0] == '$' + && expanded[1] == 'f' + && expanded[2] == 'o' + && expanded[3] == 'o'); + +#define $test 1 +int a = $test; + +int $b = 2; // expected-error {{identifier '$b' is not supported with the current configuration}} + +#define name $name +int name = 2; +// expected-error@-1 {{identifier '$name' is not supported with the current configuration}} + +int $zoinks() { // expected-error {{identifier '$zoinks' is not supported with the current configuration}} + return $y + name; + // expected-error@-1 {{identifier '$y' is not supported with the current configuration}} \ + // expected-error@-1 {{use of undeclared identifier '$y'}} \ + // expected-error@-1 {{identifier '$name' is not supported with the current configuration}} +} >From 05dbdd6c4e0bbe23d3178037befbbaf05fbc7697 Mon Sep 17 00:00:00 2001 From: Matthias Wippich <[email protected]> Date: Mon, 25 May 2026 23:13:08 +0200 Subject: [PATCH 2/4] add option for old behavior --- clang/include/clang/Basic/LangOptions.def | 1 + clang/include/clang/Options/Options.td | 5 +++++ clang/lib/Driver/ToolChains/Clang.cpp | 2 ++ clang/lib/Lex/Lexer.cpp | 13 +++++++++++-- clang/test/Lexer/dollar-macros.cpp | 2 +- 5 files changed, 20 insertions(+), 3 deletions(-) diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index 4a3e3b7c04822..59bef7daa330a 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -76,6 +76,7 @@ LANGOPT(Char8 , 1, 0, NotCompatible, "char8_t keyword") LANGOPT(IEEE128 , 1, 0, NotCompatible, "__ieee128 keyword") LANGOPT(DeclSpecKeyword , 1, 0, NotCompatible, "__declspec keyword") LANGOPT(DollarIdents , 1, 1, Benign, "'$' in identifiers") +LANGOPT(DollarMacros , 1, 1, Benign, "'$' in macros") LANGOPT(AsmPreprocessor , 1, 0, Benign, "preprocessor in asm mode") LANGOPT(GNUMode , 1, 1, NotCompatible, "GNU extensions") LANGOPT(GNUKeywords , 1, 1, NotCompatible, "GNU keywords") diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td index 025e8e7d7d761..78b736a59cd75 100644 --- a/clang/include/clang/Options/Options.td +++ b/clang/include/clang/Options/Options.td @@ -2274,6 +2274,11 @@ defm dollars_in_identifiers : BoolFOption<"dollars-in-identifiers", PosFlag<SetTrue, [], [ClangOption], "Allow">, NegFlag<SetFalse, [], [ClangOption], "Disallow">, BothFlags<[], [ClangOption, CC1Option], " '$' in identifiers">>; +defm dollars_in_macros : BoolFOption<"dollars-in-macros", + LangOpts<"DollarMacros">, DefaultTrue, + PosFlag<SetTrue, [], [CC1Option], "Allow">, + NegFlag<SetFalse, [], [CC1Option], "Disallow">, + BothFlags<[], [CC1Option], " '$' in macros">>; defm dwarf2_cfi_asm : BoolFOption<"dwarf2-cfi-asm", CodeGenOpts<"Dwarf2CFIAsm">, DefaultFalse, diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 7657afb14f077..de8e89ca0e4e3 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -7855,6 +7855,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-fno-dollars-in-identifiers"); } + + Args.addOptInFlag(CmdArgs, options::OPT_fapple_pragma_pack, options::OPT_fno_apple_pragma_pack); diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index ccbc981cad84f..b0266f21ade02 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -1182,7 +1182,7 @@ StringRef Lexer::getImmediateMacroNameForDiagnostics( } bool Lexer::isAsciiIdentifierContinueChar(char c, const LangOptions &LangOpts) { - return isAsciiIdentifierContinue(c, true); + return isAsciiIdentifierContinue(c, LangOpts.DollarMacros); } bool Lexer::isNewLineEscaped(const char *BufferStart, const char *Str) { @@ -1633,7 +1633,7 @@ static bool isAllowedIDChar(uint32_t C, const LangOptions &LangOpts, bool &IsExtension) { if (LangOpts.AsmPreprocessor) { return false; - } else if ('$' == C) { + } else if (LangOpts.DollarMacros && '$' == C) { return true; } else if (LangOpts.CPlusPlus || LangOpts.C23) { // A non-leading codepoint must have the XID_Continue property. @@ -2021,6 +2021,10 @@ bool Lexer::LexIdentifierContinue(Token &Result, const char *CurPtr, continue; } if (C == '$') { + // If we hit a $ and they are not supported in macros, we are done. + if (!LangOpts.DollarMacros) + break; + // Otherwise, emit a diagnostic and continue. if (!isLexingRawMode()) Diag(CurPtr, diag::ext_dollar_in_identifier); @@ -4086,6 +4090,11 @@ bool Lexer::LexTokenInternal(Token &Result) { MIOpt.ReadToken(); return LexIdentifierContinue(Result, CurPtr); case '$': // $ in identifiers. + if (!LangOpts.DollarMacros) { + Kind = tok::unknown; + break; + } + if (!isLexingRawMode()) Diag(CurPtr - 1, diag::ext_dollar_in_identifier); diff --git a/clang/test/Lexer/dollar-macros.cpp b/clang/test/Lexer/dollar-macros.cpp index cd91815d409ba..6ed042fbdc719 100644 --- a/clang/test/Lexer/dollar-macros.cpp +++ b/clang/test/Lexer/dollar-macros.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 %s -verify -fsyntax-only -fno-dollar-idents +// RUN: %clang_cc1 %s -verify -fsyntax-only -fno-dollars-in-identifiers #define f(x) #x #define g(x) f(x) >From 8d26f31924a7ad024569637c19784c4e2f178673 Mon Sep 17 00:00:00 2001 From: Matthias Wippich <[email protected]> Date: Sat, 30 May 2026 23:23:53 +0200 Subject: [PATCH 3/4] add release note --- clang/docs/ReleaseNotes.rst | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index fc0a1d2d4c926..953a0a9f9e072 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -71,6 +71,28 @@ C/C++ Language Potentially Breaking Changes Clang would previously ``break`` out of the ``while`` loop, whereas GCC (since version 9) would ``break`` out of the ``for`` loop here. Now, Clang and GCC both break out of the ``for`` loop. + - Clang now allows `$` to appear in identifiers during preprocessing, even when they are not + otherwise supported in identifiers (e.g. when compiling with ``-fno-dollars-in-identifiers``). + + Previously preprocessing behavior was depending on whether `$` is supported in identifiers or not. + This changed whether a `$` was parsed as (part of) an identifier token or a separate token. + + For example consider: + + .. code-block:: c++ + + #define f(x) #x + #define g(x) f(x) + #define foo bar + g($foo) + + Before this change, preprocessing beavior would silently change - if `$` is allowed in identifiers, + the code above would expand to `"$foo"` and `"$bar"` otherwise. Now, it will always expand to `"$foo"`, + regardless of support for `$` in identifiers. + + The previous behavior can be restored by passing the option ``-fno-dollars-in-macros`` to the clang frontend (in + conjunction with ``-fno-dollars-in-identifiers``). + C++ Specific Potentially Breaking Changes ----------------------------------------- >From 7c88a53b72d47cd0e40d4ee1768dca80a4a47434 Mon Sep 17 00:00:00 2001 From: Matthias Wippich <[email protected]> Date: Sun, 31 May 2026 01:39:21 +0200 Subject: [PATCH 4/4] fix formatting --- clang/lib/Driver/ToolChains/Clang.cpp | 2 -- clang/lib/Lex/Lexer.cpp | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index de8e89ca0e4e3..7657afb14f077 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -7855,8 +7855,6 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-fno-dollars-in-identifiers"); } - - Args.addOptInFlag(CmdArgs, options::OPT_fapple_pragma_pack, options::OPT_fno_apple_pragma_pack); diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index b0266f21ade02..57f4b4807076d 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -2023,7 +2023,7 @@ bool Lexer::LexIdentifierContinue(Token &Result, const char *CurPtr, if (C == '$') { // If we hit a $ and they are not supported in macros, we are done. if (!LangOpts.DollarMacros) - break; + break; // Otherwise, emit a diagnostic and continue. if (!isLexingRawMode()) Diag(CurPtr, diag::ext_dollar_in_identifier); _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
