https://github.com/dzbarsky created https://github.com/llvm/llvm-project/pull/203259
This replaces the generated `AddKeyword` calls with one pointer-free spelling blob and 8-byte scalar records containing the token kind, language flags, and spelling length, while preserving the original `CXX_KEYWORD_OPERATOR` registration position and the existing Objective-C and notable-identifier passes. On an arm64 Release build, standalone clang decreases by 16,400 bytes raw and 16,496 bytes stripped, the multicall driver decreases by 32,928 bytes raw and 33,024 bytes stripped, and clangd decreases by 32,928 bytes raw and 33,040 bytes stripped. Fifteen keyword tests pass, linked fixups are unchanged, and a 200-pair tiny-translation-unit startup benchmark is neutral within its 95% confidence intervals. Work towards #202616 AI tool disclosure: Co-authored with OpenAI Codex. >From ee85c486ef9209c3d80bc0957c9ad44e3695b6ef Mon Sep 17 00:00:00 2001 From: David Zbarsky <[email protected]> Date: Thu, 11 Jun 2026 09:15:11 -0400 Subject: [PATCH] [clang][Basic] Table-drive keyword registration Replace the generated AddKeyword calls with one pointer-free spelling blob and 8-byte scalar records containing the token kind, language flags, and spelling length. Keep CXX_KEYWORD_OPERATOR registration at its original TokenKinds.def position, and leave Objective-C and notable identifiers in their existing generated passes. On an arm64 Release build, IdentifierTable.cpp.o decreases by 45,048 bytes, __TEXT,__text decreases by 36,592 bytes, __TEXT,__const increases by 8,988 bytes, __TEXT,__cstring decreases by 688 bytes, and object relocations decrease by 1,491. Standalone clang decreases by 16,400 bytes raw and 16,496 bytes stripped. clangd decreases by 32,928 bytes raw and 33,040 bytes stripped. The multicall driver decreases by 32,928 bytes raw and 33,024 bytes stripped. Linked fixups are unchanged. Fifteen keyword lexer, driver, preprocessor, parser, and Sema tests pass. In a 200-pair tiny-translation-unit startup benchmark, wall time changes by +0.287% with a 95% bootstrap confidence interval of [-0.674%, +1.253%], and user time changes by -0.044% with an interval of [-0.424%, +0.340%]. --- clang/lib/Basic/IdentifierTable.cpp | 97 ++++++++++++++++++++++++++--- 1 file changed, 88 insertions(+), 9 deletions(-) diff --git a/clang/lib/Basic/IdentifierTable.cpp b/clang/lib/Basic/IdentifierTable.cpp index a2e9316e4e372..b9efd5bb4fa54 100644 --- a/clang/lib/Basic/IdentifierTable.cpp +++ b/clang/lib/Basic/IdentifierTable.cpp @@ -19,6 +19,7 @@ #include "clang/Basic/Specifiers.h" #include "clang/Basic/TargetBuiltins.h" #include "clang/Basic/TokenKinds.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/StringMap.h" @@ -26,8 +27,10 @@ #include "llvm/Support/Allocator.h" #include "llvm/Support/raw_ostream.h" #include <cassert> +#include <cstdint> #include <cstdio> #include <cstring> +#include <limits> #include <string> using namespace clang; @@ -267,27 +270,103 @@ static void AddNotableIdentifier(StringRef Name, } } +namespace { + +struct KeywordTableEntry { + uint32_t Flags; + uint16_t TokenCode; + uint8_t SpellingLength; +}; + +template <size_t Size> +constexpr uint8_t keywordSpellingLength(const char (&)[Size]) { + static_assert(Size - 1 <= std::numeric_limits<uint8_t>::max()); + return Size - 1; +} + +constexpr char KeywordSpellings[] = +#define KEYWORD(NAME, FLAGS) #NAME "\0" +#define ALIAS(NAME, TOK, FLAGS) NAME "\0" +#define TESTING_KEYWORD(NAME, FLAGS) +#include "clang/Basic/TokenKinds.def" + ""; + +constexpr KeywordTableEntry KeywordTable[] = { +#define KEYWORD(NAME, FLAGS) \ + {FLAGS, static_cast<uint16_t>(tok::kw_##NAME), keywordSpellingLength(#NAME)}, +#define ALIAS(NAME, TOK, FLAGS) \ + {FLAGS, static_cast<uint16_t>(tok::kw_##TOK), keywordSpellingLength(NAME)}, +#define TESTING_KEYWORD(NAME, FLAGS) +#include "clang/Basic/TokenKinds.def" +}; + +constexpr size_t keywordSpellingBytes() { + size_t Size = 0; + for (const KeywordTableEntry &Keyword : KeywordTable) + Size += Keyword.SpellingLength + 1; + return Size; +} + +static_assert(sizeof(KeywordTableEntry) == 8); +static_assert(tok::NUM_TOKENS <= std::numeric_limits<uint16_t>::max()); +static_assert((KEYMAX | (KEYMAX - 1)) <= std::numeric_limits<uint32_t>::max()); +constexpr size_t KeywordTokenCount = tok::kw___unknown_anytype - tok::kw_auto; +constexpr size_t AliasCount = +#define KEYWORD(NAME, FLAGS) +#define ALIAS(NAME, TOK, FLAGS) 1 + +#define TESTING_KEYWORD(NAME, FLAGS) +#include "clang/Basic/TokenKinds.def" + 0; +static_assert(sizeof(KeywordTable) / sizeof(KeywordTable[0]) == + KeywordTokenCount + AliasCount); +static_assert(sizeof(KeywordSpellings) == keywordSpellingBytes() + 1); +// CXX_KEYWORD_OPERATOR entries precede restrict in TokenKinds.def. +constexpr size_t CXXOperatorPosition = tok::kw_restrict - tok::kw_auto; +static_assert(KeywordTable[CXXOperatorPosition].TokenCode == tok::kw_restrict); + +} // namespace + /// AddKeywords - Add all keywords to the symbol table. /// void IdentifierTable::AddKeywords(const LangOptions &LangOpts) { // Add keywords and tokens for the current language. -#define KEYWORD(NAME, FLAGS) \ - AddKeyword(StringRef(#NAME), tok::kw_ ## NAME, \ - FLAGS, LangOpts, *this); -#define ALIAS(NAME, TOK, FLAGS) \ - AddKeyword(StringRef(NAME), tok::kw_ ## TOK, \ - FLAGS, LangOpts, *this); + const char *KeywordSpelling = KeywordSpellings; + auto AddKeywordRange = [&](size_t Begin, size_t End) { + for (const KeywordTableEntry &Keyword : + llvm::ArrayRef(KeywordTable).slice(Begin, End - Begin)) { + AddKeyword(StringRef(KeywordSpelling, Keyword.SpellingLength), + static_cast<tok::TokenKind>(Keyword.TokenCode), Keyword.Flags, + LangOpts, *this); + KeywordSpelling += Keyword.SpellingLength + 1; + } + }; + + AddKeywordRange(0, CXXOperatorPosition); + +#define KEYWORD(NAME, FLAGS) +#define ALIAS(NAME, TOK, FLAGS) #define CXX_KEYWORD_OPERATOR(NAME, ALIAS) \ if (LangOpts.CXXOperatorNames) \ AddCXXOperatorKeyword(StringRef(#NAME), tok::ALIAS, *this); \ else \ MarkIdentifierAsKeywordInCpp(*this, StringRef(#NAME)); -#define OBJC_AT_KEYWORD(NAME) \ - if (LangOpts.ObjC) \ +#define OBJC_AT_KEYWORD(NAME) +#define NOTABLE_IDENTIFIER(NAME) +#define TESTING_KEYWORD(NAME, FLAGS) +#include "clang/Basic/TokenKinds.def" + + AddKeywordRange(CXXOperatorPosition, + sizeof(KeywordTable) / sizeof(KeywordTable[0])); + assert(KeywordSpelling == KeywordSpellings + sizeof(KeywordSpellings) - 1); + +#define KEYWORD(NAME, FLAGS) +#define ALIAS(NAME, TOK, FLAGS) +#define CXX_KEYWORD_OPERATOR(NAME, ALIAS) +#define OBJC_AT_KEYWORD(NAME) \ + if (LangOpts.ObjC) \ AddObjCKeyword(StringRef(#NAME), tok::objc_##NAME, *this); #define NOTABLE_IDENTIFIER(NAME) \ AddNotableIdentifier(StringRef(#NAME), tok::NAME, *this); - #define TESTING_KEYWORD(NAME, FLAGS) #include "clang/Basic/TokenKinds.def" _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
