[clang] [clang] Optimize Lexer hot path to reduce compile time (PR #177153)

via cfe-commits Sun, 15 Feb 2026 17:11:21 -0800

https://github.com/yronglin updated 
https://github.com/llvm/llvm-project/pull/177153


>From 0e6ee2c9a8f6abf273375e79e673c450005f8969 Mon Sep 17 00:00:00 2001
From: yronglin <[email protected]>
Date: Wed, 11 Feb 2026 19:18:04 -0800
Subject: [PATCH 1/3] [clang] Introduce TokenFlag::PhysicalStartOfLine and
 process module contextual keyword in HandleIdentifier

Signed-off-by: yronglin <[email protected]>
---
 clang/include/clang/Lex/Lexer.h        | 11 ++--
 clang/include/clang/Lex/Preprocessor.h | 29 +--------
 clang/include/clang/Lex/Token.h        |  5 ++
 clang/lib/Lex/Lexer.cpp                | 83 ++++++++++----------------
 clang/lib/Lex/PPDirectives.cpp         | 18 +++---
 clang/lib/Lex/Preprocessor.cpp         | 27 ++++++---
 6 files changed, 71 insertions(+), 102 deletions(-)

diff --git a/clang/include/clang/Lex/Lexer.h b/clang/include/clang/Lex/Lexer.h
index 423f2ffe2f852..f843880153f94 100644
--- a/clang/include/clang/Lex/Lexer.h
+++ b/clang/include/clang/Lex/Lexer.h
@@ -627,7 +627,7 @@ class Lexer : public PreprocessorLexer {
   /// LexTokenInternal - Internal interface to lex a preprocessing token. 
Called
   /// by Lex.
   ///
-  bool LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine);
+  bool LexTokenInternal(Token &Result);
 
   bool CheckUnicodeWhitespace(Token &Result, uint32_t C, const char *CurPtr);
 
@@ -762,12 +762,9 @@ class Lexer : public PreprocessorLexer {
   bool LexCharConstant       (Token &Result, const char *CurPtr,
                               tok::TokenKind Kind);
   bool LexEndOfFile          (Token &Result, const char *CurPtr);
-  bool SkipWhitespace        (Token &Result, const char *CurPtr,
-                              bool &TokAtPhysicalStartOfLine);
-  bool SkipLineComment       (Token &Result, const char *CurPtr,
-                              bool &TokAtPhysicalStartOfLine);
-  bool SkipBlockComment      (Token &Result, const char *CurPtr,
-                              bool &TokAtPhysicalStartOfLine);
+  bool SkipWhitespace        (Token &Result, const char *CurPtr);
+  bool SkipLineComment       (Token &Result, const char *CurPtr);
+  bool SkipBlockComment      (Token &Result, const char *CurPtr);
   bool SaveLineComment       (Token &Result, const char *CurPtr);
 
   bool IsStartOfConflictMarker(const char *CurPtr);
diff --git a/clang/include/clang/Lex/Preprocessor.h 
b/clang/include/clang/Lex/Preprocessor.h
index 5fb83eafc6b2a..b6e42a6151ac3 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -137,30 +137,6 @@ struct CXXStandardLibraryVersionInfo {
   std::uint64_t Version;
 };
 
-/// Record the previous 'export' keyword info.
-///
-/// Since P1857R3, the standard introduced several rules to determine whether
-/// the 'module', 'export module', 'import', 'export import' is a valid
-/// directive introducer. This class is used to record the previous 'export'
-/// keyword token, and then handle 'export module' and 'export import'.
-class ExportContextualKeywordInfo {
-  Token ExportTok;
-  bool AtPhysicalStartOfLine = false;
-
-public:
-  ExportContextualKeywordInfo() = default;
-  ExportContextualKeywordInfo(const Token &Tok, bool AtPhysicalStartOfLine)
-      : ExportTok(Tok), AtPhysicalStartOfLine(AtPhysicalStartOfLine) {}
-
-  bool isValid() const { return ExportTok.is(tok::kw_export); }
-  bool isAtPhysicalStartOfLine() const { return AtPhysicalStartOfLine; }
-  Token getExportTok() const { return ExportTok; }
-  void reset() {
-    ExportTok.startToken();
-    AtPhysicalStartOfLine = false;
-  }
-};
-
 class ModuleNameLoc final
     : llvm::TrailingObjects<ModuleNameLoc, IdentifierLoc> {
   friend TrailingObjects;
@@ -415,7 +391,7 @@ class Preprocessor {
   bool ImportingCXXNamedModules = false;
 
   /// Whether the last token we lexed was an 'export' keyword.
-  ExportContextualKeywordInfo LastTokenWasExportKeyword;
+  Token LastExportKeyword;
 
   /// First pp-token source location in current translation unit.
   SourceLocation FirstPPTokenLoc;
@@ -1869,8 +1845,7 @@ class Preprocessor {
   /// This consumes the import/module directive, modifies the
   /// lexer/preprocessor state, and advances the lexer(s) so that the next 
token
   /// read is the correct one.
-  bool HandleModuleContextualKeyword(Token &Result,
-                                     bool TokAtPhysicalStartOfLine);
+  bool HandleModuleContextualKeyword(Token &Result);
 
   /// Get the start location of the first pp-token in main file.
   SourceLocation getMainFileFirstPPTokenLoc() const {
diff --git a/clang/include/clang/Lex/Token.h b/clang/include/clang/Lex/Token.h
index d09e951908129..886fbfb8af029 100644
--- a/clang/include/clang/Lex/Token.h
+++ b/clang/include/clang/Lex/Token.h
@@ -92,6 +92,7 @@ class Token {
     HasSeenNoTrivialPPDirective =
         0x1000, // Whether we've seen any 'no-trivial' pp-directives before
                 // current position.
+    PhysicalStartOfLine = 0x2000, // This token is at the start of a physical 
line.
   };
 
   tok::TokenKind getKind() const { return Kind; }
@@ -283,6 +284,10 @@ class Token {
   ///
   bool isAtStartOfLine() const { return getFlag(StartOfLine); }
 
+  /// isAtPhysicalStartOfLine - Return true if this token is at the start of a 
physical line.
+  ///
+  bool isAtPhysicalStartOfLine() const { return getFlag(PhysicalStartOfLine); }
+
   /// Return true if this token has whitespace before it.
   ///
   bool hasLeadingSpace() const { return getFlag(LeadingSpace); }
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index 1498657047bd6..f54484d6c5f87 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -2034,7 +2034,8 @@ bool Lexer::LexIdentifierContinue(Token &Result, const 
char *CurPtr) {
 
   // Finally, now that we know we have an identifier, pass this off to the
   // preprocessor, which may macro expand it or something.
-  if (II->isHandleIdentifierCase())
+  if (II->isHandleIdentifierCase() || II->isModuleKeyword() ||
+      II->isImportKeyword() || II->getTokenID() == tok::kw_export)
     return PP->HandleIdentifier(Result);
 
   return true;
@@ -2515,8 +2516,7 @@ bool Lexer::LexCharConstant(Token &Result, const char 
*CurPtr,
 /// Update BufferPtr to point to the next non-whitespace character and return.
 ///
 /// This method forms a token and returns true if KeepWhitespaceMode is 
enabled.
-bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr,
-                           bool &TokAtPhysicalStartOfLine) {
+bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr) {
   // Whitespace - Skip it, then return the token after the whitespace.
   bool SawNewline = isVerticalWhitespace(CurPtr[-1]);
 
@@ -2572,7 +2572,7 @@ bool Lexer::SkipWhitespace(Token &Result, const char 
*CurPtr,
   Result.setFlagValue(Token::LeadingSpace, HasLeadingSpace);
   if (SawNewline) {
     Result.setFlag(Token::StartOfLine);
-    TokAtPhysicalStartOfLine = true;
+    Result.setFlag(Token::PhysicalStartOfLine);
 
     if (NewLinePtr && lastNewLine && NewLinePtr != lastNewLine && PP) {
       if (auto *Handler = PP->getEmptylineHandler())
@@ -2591,8 +2591,7 @@ bool Lexer::SkipWhitespace(Token &Result, const char 
*CurPtr,
 ///
 /// If we're in KeepCommentMode or any CommentHandler has inserted
 /// some tokens, this will store the first token and return true.
-bool Lexer::SkipLineComment(Token &Result, const char *CurPtr,
-                            bool &TokAtPhysicalStartOfLine) {
+bool Lexer::SkipLineComment(Token &Result, const char *CurPtr) {
   // If Line comments aren't explicitly enabled for this language, emit an
   // extension warning.
   if (!LineComment) {
@@ -2748,7 +2747,7 @@ bool Lexer::SkipLineComment(Token &Result, const char 
*CurPtr,
 
   // The next returned token is at the start of the line.
   Result.setFlag(Token::StartOfLine);
-  TokAtPhysicalStartOfLine = true;
+  Result.setFlag(Token::PhysicalStartOfLine);
   // No leading whitespace seen so far.
   Result.clearFlag(Token::LeadingSpace);
   BufferPtr = CurPtr;
@@ -2873,8 +2872,7 @@ static bool isEndOfBlockCommentWithEscapedNewLine(const 
char *CurPtr, Lexer *L,
 ///
 /// If we're in KeepCommentMode or any CommentHandler has inserted
 /// some tokens, this will store the first token and return true.
-bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr,
-                             bool &TokAtPhysicalStartOfLine) {
+bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr) {
   // Scan one character past where we should, looking for a '/' character.  
Once
   // we find it, check to see if it was preceded by a *.  This common
   // optimization helps people who like to put a lot of * characters in their
@@ -3077,7 +3075,7 @@ bool Lexer::SkipBlockComment(Token &Result, const char 
*CurPtr,
   // efficiently now.  This is safe even in KeepWhitespaceMode because we would
   // have already returned above with the comment as a token.
   if (isHorizontalWhitespace(*CurPtr)) {
-    SkipWhitespace(Result, CurPtr+1, TokAtPhysicalStartOfLine);
+    SkipWhitespace(Result, CurPtr+1);
     return false;
   }
 
@@ -3722,6 +3720,11 @@ bool Lexer::Lex(Token &Result) {
     IsAtStartOfLine = false;
   }
 
+  if (IsAtPhysicalStartOfLine) {
+    Result.setFlag(Token::PhysicalStartOfLine);
+    IsAtPhysicalStartOfLine = false;
+  }
+
   if (HasLeadingSpace) {
     Result.setFlag(Token::LeadingSpace);
     HasLeadingSpace = false;
@@ -3732,11 +3735,9 @@ bool Lexer::Lex(Token &Result) {
     HasLeadingEmptyMacro = false;
   }
 
-  bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
-  IsAtPhysicalStartOfLine = false;
   bool isRawLex = isLexingRawMode();
   (void) isRawLex;
-  bool returnedToken = LexTokenInternal(Result, atPhysicalStartOfLine);
+  bool returnedToken = LexTokenInternal(Result);
   // (After the LexTokenInternal call, the lexer might be destroyed.)
   assert((returnedToken || !isRawLex) && "Raw lex must succeed");
   return returnedToken;
@@ -3747,7 +3748,7 @@ bool Lexer::Lex(Token &Result) {
 /// has a null character at the end of the file.  This returns a preprocessing
 /// token, not a normal token, as such, it is an internal interface.  It 
assumes
 /// that the Flags of result have been cleared before calling this.
-bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
+bool Lexer::LexTokenInternal(Token &Result) {
 LexStart:
   assert(!Result.needsCleaning() && "Result needs cleaning");
   assert(!Result.hasPtrData() && "Result has not been reset");
@@ -3800,7 +3801,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool 
TokAtPhysicalStartOfLine) {
     if (!isLexingRawMode())
       Diag(CurPtr-1, diag::null_in_file);
     Result.setFlag(Token::LeadingSpace);
-    if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
+    if (SkipWhitespace(Result, CurPtr))
       return true; // KeepWhitespaceMode
 
     // We know the lexer hasn't changed, so just try again with this lexer.
@@ -3846,7 +3847,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool 
TokAtPhysicalStartOfLine) {
     // No leading whitespace seen so far.
     Result.clearFlag(Token::LeadingSpace);
 
-    if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
+    if (SkipWhitespace(Result, CurPtr))
       return true; // KeepWhitespaceMode
 
     // We only saw whitespace, so just try again with this lexer.
@@ -3858,7 +3859,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool 
TokAtPhysicalStartOfLine) {
   case '\v':
   SkipHorizontalWhitespace:
     Result.setFlag(Token::LeadingSpace);
-    if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
+    if (SkipWhitespace(Result, CurPtr))
       return true; // KeepWhitespaceMode
 
   SkipIgnoredUnits:
@@ -3868,11 +3869,11 @@ bool Lexer::LexTokenInternal(Token &Result, bool 
TokAtPhysicalStartOfLine) {
     // too (without going through the big switch stmt).
     if (CurPtr[0] == '/' && CurPtr[1] == '/' && !inKeepCommentMode() &&
         LineComment && (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP)) {
-      if (SkipLineComment(Result, CurPtr+2, TokAtPhysicalStartOfLine))
+      if (SkipLineComment(Result, CurPtr+2))
         return true; // There is a token to return.
       goto SkipIgnoredUnits;
     } else if (CurPtr[0] == '/' && CurPtr[1] == '*' && !inKeepCommentMode()) {
-      if (SkipBlockComment(Result, CurPtr+2, TokAtPhysicalStartOfLine))
+      if (SkipBlockComment(Result, CurPtr+2))
         return true; // There is a token to return.
       goto SkipIgnoredUnits;
     } else if (isHorizontalWhitespace(*CurPtr)) {
@@ -4030,23 +4031,10 @@ bool Lexer::LexTokenInternal(Token &Result, bool 
TokAtPhysicalStartOfLine) {
   case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
   case 'o': case 'p': case 'q': case 'r': case 's': case 't':    /*'u'*/
   case 'v': case 'w': case 'x': case 'y': case 'z':
-  case '_': {
+  case '_':
     // Notify MIOpt that we read a non-whitespace/non-comment token.
     MIOpt.ReadToken();
-
-    // LexIdentifierContinue may trigger HandleEndOfFile which would
-    // normally destroy this Lexer. However, the Preprocessor now defers
-    // lexer destruction until the stack of Lexer unwinds (LexLevel == 0),
-    // so it's safe to access member variables after this call returns.
-    bool returnedToken = LexIdentifierContinue(Result, CurPtr);
-
-    if (returnedToken && !LexingRawMode && !Is_PragmaLexer &&
-        !ParsingPreprocessorDirective && LangOpts.CPlusPlusModules &&
-        Result.isModuleContextualKeyword() &&
-        PP->HandleModuleContextualKeyword(Result, TokAtPhysicalStartOfLine))
-      goto HandleDirective;
-    return returnedToken;
-  }
+    return LexIdentifierContinue(Result, CurPtr);
   case '$':   // $ in identifiers.
     if (LangOpts.DollarIdents) {
       if (!isLexingRawMode())
@@ -4196,8 +4184,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool 
TokAtPhysicalStartOfLine) {
           TreatAsComment = getCharAndSize(CurPtr+SizeTmp, SizeTmp2) != '*';
 
       if (TreatAsComment) {
-        if (SkipLineComment(Result, ConsumeChar(CurPtr, SizeTmp, Result),
-                            TokAtPhysicalStartOfLine))
+        if (SkipLineComment(Result, ConsumeChar(CurPtr, SizeTmp, Result)))
           return true; // There is a token to return.
 
         // It is common for the tokens immediately after a // comment to be
@@ -4208,8 +4195,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool 
TokAtPhysicalStartOfLine) {
     }
 
     if (Char == '*') {  // /**/ comment.
-      if (SkipBlockComment(Result, ConsumeChar(CurPtr, SizeTmp, Result),
-                           TokAtPhysicalStartOfLine))
+      if (SkipBlockComment(Result, ConsumeChar(CurPtr, SizeTmp, Result)))
         return true; // There is a token to return.
 
       // We only saw whitespace, so just try again with this lexer.
@@ -4249,12 +4235,8 @@ bool Lexer::LexTokenInternal(Token &Result, bool 
TokAtPhysicalStartOfLine) {
         // it's actually the start of a preprocessing directive.  Callback to
         // the preprocessor to handle it.
         // TODO: -fpreprocessed mode??
-        if (TokAtPhysicalStartOfLine && !LexingRawMode && !Is_PragmaLexer) {
-          // We parsed a # character and it's the start of a preprocessing
-          // directive.
-          FormTokenWithChars(Result, CurPtr, tok::hash);
+        if (Result.isAtPhysicalStartOfLine() && !LexingRawMode && 
!Is_PragmaLexer)
           goto HandleDirective;
-        }
 
         Kind = tok::hash;
       }
@@ -4444,12 +4426,8 @@ bool Lexer::LexTokenInternal(Token &Result, bool 
TokAtPhysicalStartOfLine) {
       // it's actually the start of a preprocessing directive.  Callback to
       // the preprocessor to handle it.
       // TODO: -fpreprocessed mode??
-      if (TokAtPhysicalStartOfLine && !LexingRawMode && !Is_PragmaLexer) {
-        // We parsed a # character and it's the start of a preprocessing
-        // directive.
-        FormTokenWithChars(Result, CurPtr, tok::hash);
+      if (Result.isAtPhysicalStartOfLine() && !LexingRawMode && 
!Is_PragmaLexer)
         goto HandleDirective;
-      }
 
       Kind = tok::hash;
     }
@@ -4468,7 +4446,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool 
TokAtPhysicalStartOfLine) {
     if (!LangOpts.AsmPreprocessor) {
       if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, &Result)) {
         if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {
-          if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
+          if (SkipWhitespace(Result, CurPtr))
             return true; // KeepWhitespaceMode
 
           // We only saw whitespace, so just try again with this lexer.
@@ -4501,7 +4479,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool 
TokAtPhysicalStartOfLine) {
                                   llvm::strictConversion);
     if (Status == llvm::conversionOK) {
       if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {
-        if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
+        if (SkipWhitespace(Result, CurPtr))
           return true; // KeepWhitespaceMode
 
         // We only saw whitespace, so just try again with this lexer.
@@ -4539,6 +4517,9 @@ bool Lexer::LexTokenInternal(Token &Result, bool 
TokAtPhysicalStartOfLine) {
   return true;
 
 HandleDirective:
+  // We parsed a # character and it's the start of a preprocessing directive.
+
+  FormTokenWithChars(Result, CurPtr, tok::hash);
   PP->HandleDirective(Result);
 
   if (PP->hadModuleLoaderFatalFailure())
@@ -4623,7 +4604,7 @@ bool Lexer::LexDependencyDirectiveToken(Token &Result) {
     if (!isLexingRawMode()) {
       const IdentifierInfo *II = PP->LookUpIdentifierInfo(Result);
       if (LangOpts.CPlusPlusModules && Result.isModuleContextualKeyword() &&
-          PP->HandleModuleContextualKeyword(Result, Result.isAtStartOfLine())) 
{
+          PP->HandleModuleContextualKeyword(Result)) {
         PP->HandleDirective(Result);
         return false;
       }
diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
index 85edbabf09ed3..f37215653b23c 100644
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -642,13 +642,13 @@ void 
Preprocessor::SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,
             (Tok.getRawIdentifier() == "export" ||
              Tok.getRawIdentifier() == "module")) {
           llvm::SaveAndRestore ModuleDirectiveSkipping(
-              LastTokenWasExportKeyword);
-          LastTokenWasExportKeyword.reset();
+              LastExportKeyword);
+          LastExportKeyword.startToken();
           LookUpIdentifierInfo(Tok);
           IdentifierInfo *II = Tok.getIdentifierInfo();
 
           if (II->getName()[0] == 'e') { // export
-            HandleModuleContextualKeyword(Tok, Tok.isAtStartOfLine());
+            HandleModuleContextualKeyword(Tok);
             CurLexer->Lex(Tok);
             if (Tok.is(tok::raw_identifier)) {
               LookUpIdentifierInfo(Tok);
@@ -661,7 +661,7 @@ void 
Preprocessor::SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,
             // to save RawLexingMode
             llvm::SaveAndRestore 
RestoreLexingRawMode(CurPPLexer->LexingRawMode,
                                                       false);
-            if (HandleModuleContextualKeyword(Tok, Tok.isAtStartOfLine())) {
+            if (HandleModuleContextualKeyword(Tok)) {
               // We just parsed a # character at the start of a line, so we're
               // in directive mode.  Tell the lexer this so any newlines we see
               // will be converted into an EOD token (this terminates the
@@ -4193,8 +4193,8 @@ void Preprocessor::HandleCXXImportDirective(Token 
ImportTok) {
   llvm::SaveAndRestore<bool> SaveImportingCXXModules(
       this->ImportingCXXNamedModules, true);
 
-  if (LastTokenWasExportKeyword.isValid())
-    LastTokenWasExportKeyword.reset();
+  if (LastExportKeyword.is(tok::kw_export))
+    LastExportKeyword.startToken();
 
   Token Tok;
   if (LexHeaderName(Tok)) {
@@ -4352,9 +4352,9 @@ void Preprocessor::HandleCXXImportDirective(Token 
ImportTok) {
 void Preprocessor::HandleCXXModuleDirective(Token ModuleTok) {
   assert(getLangOpts().CPlusPlusModules && ModuleTok.is(tok::kw_module));
   Token Introducer = ModuleTok;
-  if (LastTokenWasExportKeyword.isValid()) {
-    Introducer = LastTokenWasExportKeyword.getExportTok();
-    LastTokenWasExportKeyword.reset();
+  if (LastExportKeyword.is(tok::kw_export)) {
+    Introducer = LastExportKeyword;
+    LastExportKeyword.startToken();
   }
 
   SourceLocation StartLoc = Introducer.getLocation();
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index 791a9644b6e85..726c59cd194d5 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -119,7 +119,7 @@ Preprocessor::Preprocessor(const PreprocessorOptions 
&PPOpts,
   // We haven't read anything from the external source.
   ReadMacrosFromExternalSource = false;
 
-  LastTokenWasExportKeyword.reset();
+  LastExportKeyword.startToken();
 
   BuiltinInfo = std::make_unique<Builtin::Context>();
 
@@ -878,6 +878,17 @@ bool Preprocessor::HandleIdentifier(Token &Identifier) {
   if (II.isExtensionToken() && !DisableMacroExpansion)
     Diag(Identifier, diag::ext_token_used);
 
+  // Handle module contextual keyword.
+  if (getLangOpts().CPlusPlusModules && CurLexer &&
+      !CurLexer->isLexingRawMode() && !CurLexer->isPragmaLexer() &&
+      !CurLexer->ParsingPreprocessorDirective &&
+      Identifier.isModuleContextualKeyword() &&
+      HandleModuleContextualKeyword(Identifier)) {
+    HandleDirective(Identifier);
+    // With a fatal failure in the module loader, we abort parsing.
+    return hadModuleLoaderFatalFailure();
+  }
+
   // If this is the 'import' contextual keyword following an '@', note
   // that the next token indicates a module name.
   //
@@ -996,7 +1007,7 @@ void Preprocessor::Lex(Token &Result) {
 
   LastTokenWasAt = Result.is(tok::at);
   if (Result.isNot(tok::kw_export))
-    LastTokenWasExportKeyword.reset();
+    LastExportKeyword.startToken();
 
   --LexLevel;
 
@@ -1259,12 +1270,12 @@ bool Preprocessor::LexModuleNameContinue(Token &Tok, 
SourceLocation UseLoc,
 ///     - ; for 'module'
 /// Otherwise the token is treated as an identifier.
 bool Preprocessor::HandleModuleContextualKeyword(
-    Token &Result, bool TokAtPhysicalStartOfLine) {
+    Token &Result) {
   if (!getLangOpts().CPlusPlusModules || !Result.isModuleContextualKeyword())
     return false;
 
   if (Result.is(tok::kw_export)) {
-    LastTokenWasExportKeyword = {Result, TokAtPhysicalStartOfLine};
+    LastExportKeyword = Result;
     return false;
   }
 
@@ -1277,17 +1288,17 @@ bool Preprocessor::HandleModuleContextualKeyword(
        II->isStr(tok::getKeywordSpelling(tok::kw_module))))
     return false;
 
-  if (LastTokenWasExportKeyword.isValid()) {
+  if (LastExportKeyword.is(tok::kw_export)) {
     // The export keyword was not at the start of line, it's not a
     // directive-introducing token.
-    if (!LastTokenWasExportKeyword.isAtPhysicalStartOfLine())
+    if (!LastExportKeyword.isAtPhysicalStartOfLine())
       return false;
     // [cpp.pre]/1.4
     // export         // not a preprocessing directive
     // import foo;    // preprocessing directive (ill-formed at phase7)
-    if (TokAtPhysicalStartOfLine)
+    if (Result.isAtPhysicalStartOfLine())
       return false;
-  } else if (!TokAtPhysicalStartOfLine)
+  } else if (!Result.isAtPhysicalStartOfLine())
     return false;
 
   llvm::SaveAndRestore<bool> SavedParsingPreprocessorDirective(

>From ba9e5c0c4c8ee7ba38617ae5731242751dbd9e3c Mon Sep 17 00:00:00 2001
From: yronglin <[email protected]>
Date: Wed, 11 Feb 2026 19:34:22 -0800
Subject: [PATCH 2/3] Format

Signed-off-by: yronglin <[email protected]>
---
 clang/include/clang/Lex/Lexer.h | 6 +++---
 clang/include/clang/Lex/Token.h | 6 ++++--
 clang/lib/Lex/Lexer.cpp         | 9 +++++----
 clang/lib/Lex/PPDirectives.cpp  | 3 +--
 clang/lib/Lex/Preprocessor.cpp  | 3 +--
 5 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/clang/include/clang/Lex/Lexer.h b/clang/include/clang/Lex/Lexer.h
index f843880153f94..0459a863bc08d 100644
--- a/clang/include/clang/Lex/Lexer.h
+++ b/clang/include/clang/Lex/Lexer.h
@@ -762,9 +762,9 @@ class Lexer : public PreprocessorLexer {
   bool LexCharConstant       (Token &Result, const char *CurPtr,
                               tok::TokenKind Kind);
   bool LexEndOfFile          (Token &Result, const char *CurPtr);
-  bool SkipWhitespace        (Token &Result, const char *CurPtr);
-  bool SkipLineComment       (Token &Result, const char *CurPtr);
-  bool SkipBlockComment      (Token &Result, const char *CurPtr);
+  bool SkipWhitespace(Token &Result, const char *CurPtr);
+  bool SkipLineComment(Token &Result, const char *CurPtr);
+  bool SkipBlockComment(Token &Result, const char *CurPtr);
   bool SaveLineComment       (Token &Result, const char *CurPtr);
 
   bool IsStartOfConflictMarker(const char *CurPtr);
diff --git a/clang/include/clang/Lex/Token.h b/clang/include/clang/Lex/Token.h
index 886fbfb8af029..e19e04e78e3b7 100644
--- a/clang/include/clang/Lex/Token.h
+++ b/clang/include/clang/Lex/Token.h
@@ -92,7 +92,8 @@ class Token {
     HasSeenNoTrivialPPDirective =
         0x1000, // Whether we've seen any 'no-trivial' pp-directives before
                 // current position.
-    PhysicalStartOfLine = 0x2000, // This token is at the start of a physical 
line.
+    PhysicalStartOfLine =
+        0x2000, // This token is at the start of a physical line.
   };
 
   tok::TokenKind getKind() const { return Kind; }
@@ -284,7 +285,8 @@ class Token {
   ///
   bool isAtStartOfLine() const { return getFlag(StartOfLine); }
 
-  /// isAtPhysicalStartOfLine - Return true if this token is at the start of a 
physical line.
+  /// isAtPhysicalStartOfLine - Return true if this token is at the start of a
+  /// physical line.
   ///
   bool isAtPhysicalStartOfLine() const { return getFlag(PhysicalStartOfLine); }
 
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index f54484d6c5f87..0d89703e5c538 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -3075,7 +3075,7 @@ bool Lexer::SkipBlockComment(Token &Result, const char 
*CurPtr) {
   // efficiently now.  This is safe even in KeepWhitespaceMode because we would
   // have already returned above with the comment as a token.
   if (isHorizontalWhitespace(*CurPtr)) {
-    SkipWhitespace(Result, CurPtr+1);
+    SkipWhitespace(Result, CurPtr + 1);
     return false;
   }
 
@@ -3869,11 +3869,11 @@ bool Lexer::LexTokenInternal(Token &Result) {
     // too (without going through the big switch stmt).
     if (CurPtr[0] == '/' && CurPtr[1] == '/' && !inKeepCommentMode() &&
         LineComment && (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP)) {
-      if (SkipLineComment(Result, CurPtr+2))
+      if (SkipLineComment(Result, CurPtr + 2))
         return true; // There is a token to return.
       goto SkipIgnoredUnits;
     } else if (CurPtr[0] == '/' && CurPtr[1] == '*' && !inKeepCommentMode()) {
-      if (SkipBlockComment(Result, CurPtr+2))
+      if (SkipBlockComment(Result, CurPtr + 2))
         return true; // There is a token to return.
       goto SkipIgnoredUnits;
     } else if (isHorizontalWhitespace(*CurPtr)) {
@@ -4235,7 +4235,8 @@ bool Lexer::LexTokenInternal(Token &Result) {
         // it's actually the start of a preprocessing directive.  Callback to
         // the preprocessor to handle it.
         // TODO: -fpreprocessed mode??
-        if (Result.isAtPhysicalStartOfLine() && !LexingRawMode && 
!Is_PragmaLexer)
+        if (Result.isAtPhysicalStartOfLine() && !LexingRawMode &&
+            !Is_PragmaLexer)
           goto HandleDirective;
 
         Kind = tok::hash;
diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
index f37215653b23c..4a854c213926b 100644
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -641,8 +641,7 @@ void 
Preprocessor::SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,
             Tok.is(tok::raw_identifier) &&
             (Tok.getRawIdentifier() == "export" ||
              Tok.getRawIdentifier() == "module")) {
-          llvm::SaveAndRestore ModuleDirectiveSkipping(
-              LastExportKeyword);
+          llvm::SaveAndRestore ModuleDirectiveSkipping(LastExportKeyword);
           LastExportKeyword.startToken();
           LookUpIdentifierInfo(Tok);
           IdentifierInfo *II = Tok.getIdentifierInfo();
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index 726c59cd194d5..2d8fe688a5002 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -1269,8 +1269,7 @@ bool Preprocessor::LexModuleNameContinue(Token &Tok, 
SourceLocation UseLoc,
 ///     - <, ", or : (but not ::) pp tokens for 'import', or
 ///     - ; for 'module'
 /// Otherwise the token is treated as an identifier.
-bool Preprocessor::HandleModuleContextualKeyword(
-    Token &Result) {
+bool Preprocessor::HandleModuleContextualKeyword(Token &Result) {
   if (!getLangOpts().CPlusPlusModules || !Result.isModuleContextualKeyword())
     return false;
 

>From 69d60ec0dcb1ee37945b84e998443e4284e65e89 Mon Sep 17 00:00:00 2001
From: yronglin <[email protected]>
Date: Sun, 15 Feb 2026 17:10:03 -0800
Subject: [PATCH 3/3] Refine comments

Signed-off-by: yronglin <[email protected]>
---
 clang/include/clang/Lex/Token.h | 1 -
 clang/lib/Lex/Lexer.cpp         | 2 +-
 clang/lib/Lex/Preprocessor.cpp  | 2 +-
 3 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/clang/include/clang/Lex/Token.h b/clang/include/clang/Lex/Token.h
index e19e04e78e3b7..9904b271c200e 100644
--- a/clang/include/clang/Lex/Token.h
+++ b/clang/include/clang/Lex/Token.h
@@ -287,7 +287,6 @@ class Token {
 
   /// isAtPhysicalStartOfLine - Return true if this token is at the start of a
   /// physical line.
-  ///
   bool isAtPhysicalStartOfLine() const { return getFlag(PhysicalStartOfLine); }
 
   /// Return true if this token has whitespace before it.
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index 0d89703e5c538..92c3046a6fd19 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -4518,8 +4518,8 @@ bool Lexer::LexTokenInternal(Token &Result) {
   return true;
 
 HandleDirective:
-  // We parsed a # character and it's the start of a preprocessing directive.
 
+  // We parsed a # character and it's the start of a preprocessing directive.
   FormTokenWithChars(Result, CurPtr, tok::hash);
   PP->HandleDirective(Result);
 
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index 2d8fe688a5002..035007ad4cd73 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -878,7 +878,7 @@ bool Preprocessor::HandleIdentifier(Token &Identifier) {
   if (II.isExtensionToken() && !DisableMacroExpansion)
     Diag(Identifier, diag::ext_token_used);
 
-  // Handle module contextual keyword.
+  // Handle module contextual keywords.
   if (getLangOpts().CPlusPlusModules && CurLexer &&
       !CurLexer->isLexingRawMode() && !CurLexer->isPragmaLexer() &&
       !CurLexer->ParsingPreprocessorDirective &&

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [clang] Optimize Lexer hot path to reduce compile time (PR #177153)

Reply via email to