https://github.com/yronglin updated https://github.com/llvm/llvm-project/pull/177153
>From 0e6ee2c9a8f6abf273375e79e673c450005f8969 Mon Sep 17 00:00:00 2001 From: yronglin <[email protected]> Date: Wed, 11 Feb 2026 19:18:04 -0800 Subject: [PATCH 1/3] [clang] Introduce TokenFlag::PhysicalStartOfLine and process module contextual keyword in HandleIdentifier Signed-off-by: yronglin <[email protected]> --- clang/include/clang/Lex/Lexer.h | 11 ++-- clang/include/clang/Lex/Preprocessor.h | 29 +-------- clang/include/clang/Lex/Token.h | 5 ++ clang/lib/Lex/Lexer.cpp | 83 ++++++++++---------------- clang/lib/Lex/PPDirectives.cpp | 18 +++--- clang/lib/Lex/Preprocessor.cpp | 27 ++++++--- 6 files changed, 71 insertions(+), 102 deletions(-) diff --git a/clang/include/clang/Lex/Lexer.h b/clang/include/clang/Lex/Lexer.h index 423f2ffe2f852..f843880153f94 100644 --- a/clang/include/clang/Lex/Lexer.h +++ b/clang/include/clang/Lex/Lexer.h @@ -627,7 +627,7 @@ class Lexer : public PreprocessorLexer { /// LexTokenInternal - Internal interface to lex a preprocessing token. Called /// by Lex. /// - bool LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine); + bool LexTokenInternal(Token &Result); bool CheckUnicodeWhitespace(Token &Result, uint32_t C, const char *CurPtr); @@ -762,12 +762,9 @@ class Lexer : public PreprocessorLexer { bool LexCharConstant (Token &Result, const char *CurPtr, tok::TokenKind Kind); bool LexEndOfFile (Token &Result, const char *CurPtr); - bool SkipWhitespace (Token &Result, const char *CurPtr, - bool &TokAtPhysicalStartOfLine); - bool SkipLineComment (Token &Result, const char *CurPtr, - bool &TokAtPhysicalStartOfLine); - bool SkipBlockComment (Token &Result, const char *CurPtr, - bool &TokAtPhysicalStartOfLine); + bool SkipWhitespace (Token &Result, const char *CurPtr); + bool SkipLineComment (Token &Result, const char *CurPtr); + bool SkipBlockComment (Token &Result, const char *CurPtr); bool SaveLineComment (Token &Result, const char *CurPtr); bool IsStartOfConflictMarker(const char *CurPtr); diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index 5fb83eafc6b2a..b6e42a6151ac3 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -137,30 +137,6 @@ struct CXXStandardLibraryVersionInfo { std::uint64_t Version; }; -/// Record the previous 'export' keyword info. -/// -/// Since P1857R3, the standard introduced several rules to determine whether -/// the 'module', 'export module', 'import', 'export import' is a valid -/// directive introducer. This class is used to record the previous 'export' -/// keyword token, and then handle 'export module' and 'export import'. -class ExportContextualKeywordInfo { - Token ExportTok; - bool AtPhysicalStartOfLine = false; - -public: - ExportContextualKeywordInfo() = default; - ExportContextualKeywordInfo(const Token &Tok, bool AtPhysicalStartOfLine) - : ExportTok(Tok), AtPhysicalStartOfLine(AtPhysicalStartOfLine) {} - - bool isValid() const { return ExportTok.is(tok::kw_export); } - bool isAtPhysicalStartOfLine() const { return AtPhysicalStartOfLine; } - Token getExportTok() const { return ExportTok; } - void reset() { - ExportTok.startToken(); - AtPhysicalStartOfLine = false; - } -}; - class ModuleNameLoc final : llvm::TrailingObjects<ModuleNameLoc, IdentifierLoc> { friend TrailingObjects; @@ -415,7 +391,7 @@ class Preprocessor { bool ImportingCXXNamedModules = false; /// Whether the last token we lexed was an 'export' keyword. - ExportContextualKeywordInfo LastTokenWasExportKeyword; + Token LastExportKeyword; /// First pp-token source location in current translation unit. SourceLocation FirstPPTokenLoc; @@ -1869,8 +1845,7 @@ class Preprocessor { /// This consumes the import/module directive, modifies the /// lexer/preprocessor state, and advances the lexer(s) so that the next token /// read is the correct one. - bool HandleModuleContextualKeyword(Token &Result, - bool TokAtPhysicalStartOfLine); + bool HandleModuleContextualKeyword(Token &Result); /// Get the start location of the first pp-token in main file. SourceLocation getMainFileFirstPPTokenLoc() const { diff --git a/clang/include/clang/Lex/Token.h b/clang/include/clang/Lex/Token.h index d09e951908129..886fbfb8af029 100644 --- a/clang/include/clang/Lex/Token.h +++ b/clang/include/clang/Lex/Token.h @@ -92,6 +92,7 @@ class Token { HasSeenNoTrivialPPDirective = 0x1000, // Whether we've seen any 'no-trivial' pp-directives before // current position. + PhysicalStartOfLine = 0x2000, // This token is at the start of a physical line. }; tok::TokenKind getKind() const { return Kind; } @@ -283,6 +284,10 @@ class Token { /// bool isAtStartOfLine() const { return getFlag(StartOfLine); } + /// isAtPhysicalStartOfLine - Return true if this token is at the start of a physical line. + /// + bool isAtPhysicalStartOfLine() const { return getFlag(PhysicalStartOfLine); } + /// Return true if this token has whitespace before it. /// bool hasLeadingSpace() const { return getFlag(LeadingSpace); } diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index 1498657047bd6..f54484d6c5f87 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -2034,7 +2034,8 @@ bool Lexer::LexIdentifierContinue(Token &Result, const char *CurPtr) { // Finally, now that we know we have an identifier, pass this off to the // preprocessor, which may macro expand it or something. - if (II->isHandleIdentifierCase()) + if (II->isHandleIdentifierCase() || II->isModuleKeyword() || + II->isImportKeyword() || II->getTokenID() == tok::kw_export) return PP->HandleIdentifier(Result); return true; @@ -2515,8 +2516,7 @@ bool Lexer::LexCharConstant(Token &Result, const char *CurPtr, /// Update BufferPtr to point to the next non-whitespace character and return. /// /// This method forms a token and returns true if KeepWhitespaceMode is enabled. -bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr, - bool &TokAtPhysicalStartOfLine) { +bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr) { // Whitespace - Skip it, then return the token after the whitespace. bool SawNewline = isVerticalWhitespace(CurPtr[-1]); @@ -2572,7 +2572,7 @@ bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr, Result.setFlagValue(Token::LeadingSpace, HasLeadingSpace); if (SawNewline) { Result.setFlag(Token::StartOfLine); - TokAtPhysicalStartOfLine = true; + Result.setFlag(Token::PhysicalStartOfLine); if (NewLinePtr && lastNewLine && NewLinePtr != lastNewLine && PP) { if (auto *Handler = PP->getEmptylineHandler()) @@ -2591,8 +2591,7 @@ bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr, /// /// If we're in KeepCommentMode or any CommentHandler has inserted /// some tokens, this will store the first token and return true. -bool Lexer::SkipLineComment(Token &Result, const char *CurPtr, - bool &TokAtPhysicalStartOfLine) { +bool Lexer::SkipLineComment(Token &Result, const char *CurPtr) { // If Line comments aren't explicitly enabled for this language, emit an // extension warning. if (!LineComment) { @@ -2748,7 +2747,7 @@ bool Lexer::SkipLineComment(Token &Result, const char *CurPtr, // The next returned token is at the start of the line. Result.setFlag(Token::StartOfLine); - TokAtPhysicalStartOfLine = true; + Result.setFlag(Token::PhysicalStartOfLine); // No leading whitespace seen so far. Result.clearFlag(Token::LeadingSpace); BufferPtr = CurPtr; @@ -2873,8 +2872,7 @@ static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr, Lexer *L, /// /// If we're in KeepCommentMode or any CommentHandler has inserted /// some tokens, this will store the first token and return true. -bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr, - bool &TokAtPhysicalStartOfLine) { +bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr) { // Scan one character past where we should, looking for a '/' character. Once // we find it, check to see if it was preceded by a *. This common // optimization helps people who like to put a lot of * characters in their @@ -3077,7 +3075,7 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr, // efficiently now. This is safe even in KeepWhitespaceMode because we would // have already returned above with the comment as a token. if (isHorizontalWhitespace(*CurPtr)) { - SkipWhitespace(Result, CurPtr+1, TokAtPhysicalStartOfLine); + SkipWhitespace(Result, CurPtr+1); return false; } @@ -3722,6 +3720,11 @@ bool Lexer::Lex(Token &Result) { IsAtStartOfLine = false; } + if (IsAtPhysicalStartOfLine) { + Result.setFlag(Token::PhysicalStartOfLine); + IsAtPhysicalStartOfLine = false; + } + if (HasLeadingSpace) { Result.setFlag(Token::LeadingSpace); HasLeadingSpace = false; @@ -3732,11 +3735,9 @@ bool Lexer::Lex(Token &Result) { HasLeadingEmptyMacro = false; } - bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine; - IsAtPhysicalStartOfLine = false; bool isRawLex = isLexingRawMode(); (void) isRawLex; - bool returnedToken = LexTokenInternal(Result, atPhysicalStartOfLine); + bool returnedToken = LexTokenInternal(Result); // (After the LexTokenInternal call, the lexer might be destroyed.) assert((returnedToken || !isRawLex) && "Raw lex must succeed"); return returnedToken; @@ -3747,7 +3748,7 @@ bool Lexer::Lex(Token &Result) { /// has a null character at the end of the file. This returns a preprocessing /// token, not a normal token, as such, it is an internal interface. It assumes /// that the Flags of result have been cleared before calling this. -bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { +bool Lexer::LexTokenInternal(Token &Result) { LexStart: assert(!Result.needsCleaning() && "Result needs cleaning"); assert(!Result.hasPtrData() && "Result has not been reset"); @@ -3800,7 +3801,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { if (!isLexingRawMode()) Diag(CurPtr-1, diag::null_in_file); Result.setFlag(Token::LeadingSpace); - if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine)) + if (SkipWhitespace(Result, CurPtr)) return true; // KeepWhitespaceMode // We know the lexer hasn't changed, so just try again with this lexer. @@ -3846,7 +3847,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { // No leading whitespace seen so far. Result.clearFlag(Token::LeadingSpace); - if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine)) + if (SkipWhitespace(Result, CurPtr)) return true; // KeepWhitespaceMode // We only saw whitespace, so just try again with this lexer. @@ -3858,7 +3859,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { case '\v': SkipHorizontalWhitespace: Result.setFlag(Token::LeadingSpace); - if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine)) + if (SkipWhitespace(Result, CurPtr)) return true; // KeepWhitespaceMode SkipIgnoredUnits: @@ -3868,11 +3869,11 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { // too (without going through the big switch stmt). if (CurPtr[0] == '/' && CurPtr[1] == '/' && !inKeepCommentMode() && LineComment && (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP)) { - if (SkipLineComment(Result, CurPtr+2, TokAtPhysicalStartOfLine)) + if (SkipLineComment(Result, CurPtr+2)) return true; // There is a token to return. goto SkipIgnoredUnits; } else if (CurPtr[0] == '/' && CurPtr[1] == '*' && !inKeepCommentMode()) { - if (SkipBlockComment(Result, CurPtr+2, TokAtPhysicalStartOfLine)) + if (SkipBlockComment(Result, CurPtr+2)) return true; // There is a token to return. goto SkipIgnoredUnits; } else if (isHorizontalWhitespace(*CurPtr)) { @@ -4030,23 +4031,10 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': /*'u'*/ case 'v': case 'w': case 'x': case 'y': case 'z': - case '_': { + case '_': // Notify MIOpt that we read a non-whitespace/non-comment token. MIOpt.ReadToken(); - - // LexIdentifierContinue may trigger HandleEndOfFile which would - // normally destroy this Lexer. However, the Preprocessor now defers - // lexer destruction until the stack of Lexer unwinds (LexLevel == 0), - // so it's safe to access member variables after this call returns. - bool returnedToken = LexIdentifierContinue(Result, CurPtr); - - if (returnedToken && !LexingRawMode && !Is_PragmaLexer && - !ParsingPreprocessorDirective && LangOpts.CPlusPlusModules && - Result.isModuleContextualKeyword() && - PP->HandleModuleContextualKeyword(Result, TokAtPhysicalStartOfLine)) - goto HandleDirective; - return returnedToken; - } + return LexIdentifierContinue(Result, CurPtr); case '$': // $ in identifiers. if (LangOpts.DollarIdents) { if (!isLexingRawMode()) @@ -4196,8 +4184,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { TreatAsComment = getCharAndSize(CurPtr+SizeTmp, SizeTmp2) != '*'; if (TreatAsComment) { - if (SkipLineComment(Result, ConsumeChar(CurPtr, SizeTmp, Result), - TokAtPhysicalStartOfLine)) + if (SkipLineComment(Result, ConsumeChar(CurPtr, SizeTmp, Result))) return true; // There is a token to return. // It is common for the tokens immediately after a // comment to be @@ -4208,8 +4195,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { } if (Char == '*') { // /**/ comment. - if (SkipBlockComment(Result, ConsumeChar(CurPtr, SizeTmp, Result), - TokAtPhysicalStartOfLine)) + if (SkipBlockComment(Result, ConsumeChar(CurPtr, SizeTmp, Result))) return true; // There is a token to return. // We only saw whitespace, so just try again with this lexer. @@ -4249,12 +4235,8 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { // it's actually the start of a preprocessing directive. Callback to // the preprocessor to handle it. // TODO: -fpreprocessed mode?? - if (TokAtPhysicalStartOfLine && !LexingRawMode && !Is_PragmaLexer) { - // We parsed a # character and it's the start of a preprocessing - // directive. - FormTokenWithChars(Result, CurPtr, tok::hash); + if (Result.isAtPhysicalStartOfLine() && !LexingRawMode && !Is_PragmaLexer) goto HandleDirective; - } Kind = tok::hash; } @@ -4444,12 +4426,8 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { // it's actually the start of a preprocessing directive. Callback to // the preprocessor to handle it. // TODO: -fpreprocessed mode?? - if (TokAtPhysicalStartOfLine && !LexingRawMode && !Is_PragmaLexer) { - // We parsed a # character and it's the start of a preprocessing - // directive. - FormTokenWithChars(Result, CurPtr, tok::hash); + if (Result.isAtPhysicalStartOfLine() && !LexingRawMode && !Is_PragmaLexer) goto HandleDirective; - } Kind = tok::hash; } @@ -4468,7 +4446,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { if (!LangOpts.AsmPreprocessor) { if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, &Result)) { if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) { - if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine)) + if (SkipWhitespace(Result, CurPtr)) return true; // KeepWhitespaceMode // We only saw whitespace, so just try again with this lexer. @@ -4501,7 +4479,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { llvm::strictConversion); if (Status == llvm::conversionOK) { if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) { - if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine)) + if (SkipWhitespace(Result, CurPtr)) return true; // KeepWhitespaceMode // We only saw whitespace, so just try again with this lexer. @@ -4539,6 +4517,9 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { return true; HandleDirective: + // We parsed a # character and it's the start of a preprocessing directive. + + FormTokenWithChars(Result, CurPtr, tok::hash); PP->HandleDirective(Result); if (PP->hadModuleLoaderFatalFailure()) @@ -4623,7 +4604,7 @@ bool Lexer::LexDependencyDirectiveToken(Token &Result) { if (!isLexingRawMode()) { const IdentifierInfo *II = PP->LookUpIdentifierInfo(Result); if (LangOpts.CPlusPlusModules && Result.isModuleContextualKeyword() && - PP->HandleModuleContextualKeyword(Result, Result.isAtStartOfLine())) { + PP->HandleModuleContextualKeyword(Result)) { PP->HandleDirective(Result); return false; } diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp index 85edbabf09ed3..f37215653b23c 100644 --- a/clang/lib/Lex/PPDirectives.cpp +++ b/clang/lib/Lex/PPDirectives.cpp @@ -642,13 +642,13 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation HashTokenLoc, (Tok.getRawIdentifier() == "export" || Tok.getRawIdentifier() == "module")) { llvm::SaveAndRestore ModuleDirectiveSkipping( - LastTokenWasExportKeyword); - LastTokenWasExportKeyword.reset(); + LastExportKeyword); + LastExportKeyword.startToken(); LookUpIdentifierInfo(Tok); IdentifierInfo *II = Tok.getIdentifierInfo(); if (II->getName()[0] == 'e') { // export - HandleModuleContextualKeyword(Tok, Tok.isAtStartOfLine()); + HandleModuleContextualKeyword(Tok); CurLexer->Lex(Tok); if (Tok.is(tok::raw_identifier)) { LookUpIdentifierInfo(Tok); @@ -661,7 +661,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation HashTokenLoc, // to save RawLexingMode llvm::SaveAndRestore RestoreLexingRawMode(CurPPLexer->LexingRawMode, false); - if (HandleModuleContextualKeyword(Tok, Tok.isAtStartOfLine())) { + if (HandleModuleContextualKeyword(Tok)) { // We just parsed a # character at the start of a line, so we're // in directive mode. Tell the lexer this so any newlines we see // will be converted into an EOD token (this terminates the @@ -4193,8 +4193,8 @@ void Preprocessor::HandleCXXImportDirective(Token ImportTok) { llvm::SaveAndRestore<bool> SaveImportingCXXModules( this->ImportingCXXNamedModules, true); - if (LastTokenWasExportKeyword.isValid()) - LastTokenWasExportKeyword.reset(); + if (LastExportKeyword.is(tok::kw_export)) + LastExportKeyword.startToken(); Token Tok; if (LexHeaderName(Tok)) { @@ -4352,9 +4352,9 @@ void Preprocessor::HandleCXXImportDirective(Token ImportTok) { void Preprocessor::HandleCXXModuleDirective(Token ModuleTok) { assert(getLangOpts().CPlusPlusModules && ModuleTok.is(tok::kw_module)); Token Introducer = ModuleTok; - if (LastTokenWasExportKeyword.isValid()) { - Introducer = LastTokenWasExportKeyword.getExportTok(); - LastTokenWasExportKeyword.reset(); + if (LastExportKeyword.is(tok::kw_export)) { + Introducer = LastExportKeyword; + LastExportKeyword.startToken(); } SourceLocation StartLoc = Introducer.getLocation(); diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index 791a9644b6e85..726c59cd194d5 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -119,7 +119,7 @@ Preprocessor::Preprocessor(const PreprocessorOptions &PPOpts, // We haven't read anything from the external source. ReadMacrosFromExternalSource = false; - LastTokenWasExportKeyword.reset(); + LastExportKeyword.startToken(); BuiltinInfo = std::make_unique<Builtin::Context>(); @@ -878,6 +878,17 @@ bool Preprocessor::HandleIdentifier(Token &Identifier) { if (II.isExtensionToken() && !DisableMacroExpansion) Diag(Identifier, diag::ext_token_used); + // Handle module contextual keyword. + if (getLangOpts().CPlusPlusModules && CurLexer && + !CurLexer->isLexingRawMode() && !CurLexer->isPragmaLexer() && + !CurLexer->ParsingPreprocessorDirective && + Identifier.isModuleContextualKeyword() && + HandleModuleContextualKeyword(Identifier)) { + HandleDirective(Identifier); + // With a fatal failure in the module loader, we abort parsing. + return hadModuleLoaderFatalFailure(); + } + // If this is the 'import' contextual keyword following an '@', note // that the next token indicates a module name. // @@ -996,7 +1007,7 @@ void Preprocessor::Lex(Token &Result) { LastTokenWasAt = Result.is(tok::at); if (Result.isNot(tok::kw_export)) - LastTokenWasExportKeyword.reset(); + LastExportKeyword.startToken(); --LexLevel; @@ -1259,12 +1270,12 @@ bool Preprocessor::LexModuleNameContinue(Token &Tok, SourceLocation UseLoc, /// - ; for 'module' /// Otherwise the token is treated as an identifier. bool Preprocessor::HandleModuleContextualKeyword( - Token &Result, bool TokAtPhysicalStartOfLine) { + Token &Result) { if (!getLangOpts().CPlusPlusModules || !Result.isModuleContextualKeyword()) return false; if (Result.is(tok::kw_export)) { - LastTokenWasExportKeyword = {Result, TokAtPhysicalStartOfLine}; + LastExportKeyword = Result; return false; } @@ -1277,17 +1288,17 @@ bool Preprocessor::HandleModuleContextualKeyword( II->isStr(tok::getKeywordSpelling(tok::kw_module)))) return false; - if (LastTokenWasExportKeyword.isValid()) { + if (LastExportKeyword.is(tok::kw_export)) { // The export keyword was not at the start of line, it's not a // directive-introducing token. - if (!LastTokenWasExportKeyword.isAtPhysicalStartOfLine()) + if (!LastExportKeyword.isAtPhysicalStartOfLine()) return false; // [cpp.pre]/1.4 // export // not a preprocessing directive // import foo; // preprocessing directive (ill-formed at phase7) - if (TokAtPhysicalStartOfLine) + if (Result.isAtPhysicalStartOfLine()) return false; - } else if (!TokAtPhysicalStartOfLine) + } else if (!Result.isAtPhysicalStartOfLine()) return false; llvm::SaveAndRestore<bool> SavedParsingPreprocessorDirective( >From ba9e5c0c4c8ee7ba38617ae5731242751dbd9e3c Mon Sep 17 00:00:00 2001 From: yronglin <[email protected]> Date: Wed, 11 Feb 2026 19:34:22 -0800 Subject: [PATCH 2/3] Format Signed-off-by: yronglin <[email protected]> --- clang/include/clang/Lex/Lexer.h | 6 +++--- clang/include/clang/Lex/Token.h | 6 ++++-- clang/lib/Lex/Lexer.cpp | 9 +++++---- clang/lib/Lex/PPDirectives.cpp | 3 +-- clang/lib/Lex/Preprocessor.cpp | 3 +-- 5 files changed, 14 insertions(+), 13 deletions(-) diff --git a/clang/include/clang/Lex/Lexer.h b/clang/include/clang/Lex/Lexer.h index f843880153f94..0459a863bc08d 100644 --- a/clang/include/clang/Lex/Lexer.h +++ b/clang/include/clang/Lex/Lexer.h @@ -762,9 +762,9 @@ class Lexer : public PreprocessorLexer { bool LexCharConstant (Token &Result, const char *CurPtr, tok::TokenKind Kind); bool LexEndOfFile (Token &Result, const char *CurPtr); - bool SkipWhitespace (Token &Result, const char *CurPtr); - bool SkipLineComment (Token &Result, const char *CurPtr); - bool SkipBlockComment (Token &Result, const char *CurPtr); + bool SkipWhitespace(Token &Result, const char *CurPtr); + bool SkipLineComment(Token &Result, const char *CurPtr); + bool SkipBlockComment(Token &Result, const char *CurPtr); bool SaveLineComment (Token &Result, const char *CurPtr); bool IsStartOfConflictMarker(const char *CurPtr); diff --git a/clang/include/clang/Lex/Token.h b/clang/include/clang/Lex/Token.h index 886fbfb8af029..e19e04e78e3b7 100644 --- a/clang/include/clang/Lex/Token.h +++ b/clang/include/clang/Lex/Token.h @@ -92,7 +92,8 @@ class Token { HasSeenNoTrivialPPDirective = 0x1000, // Whether we've seen any 'no-trivial' pp-directives before // current position. - PhysicalStartOfLine = 0x2000, // This token is at the start of a physical line. + PhysicalStartOfLine = + 0x2000, // This token is at the start of a physical line. }; tok::TokenKind getKind() const { return Kind; } @@ -284,7 +285,8 @@ class Token { /// bool isAtStartOfLine() const { return getFlag(StartOfLine); } - /// isAtPhysicalStartOfLine - Return true if this token is at the start of a physical line. + /// isAtPhysicalStartOfLine - Return true if this token is at the start of a + /// physical line. /// bool isAtPhysicalStartOfLine() const { return getFlag(PhysicalStartOfLine); } diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index f54484d6c5f87..0d89703e5c538 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -3075,7 +3075,7 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr) { // efficiently now. This is safe even in KeepWhitespaceMode because we would // have already returned above with the comment as a token. if (isHorizontalWhitespace(*CurPtr)) { - SkipWhitespace(Result, CurPtr+1); + SkipWhitespace(Result, CurPtr + 1); return false; } @@ -3869,11 +3869,11 @@ bool Lexer::LexTokenInternal(Token &Result) { // too (without going through the big switch stmt). if (CurPtr[0] == '/' && CurPtr[1] == '/' && !inKeepCommentMode() && LineComment && (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP)) { - if (SkipLineComment(Result, CurPtr+2)) + if (SkipLineComment(Result, CurPtr + 2)) return true; // There is a token to return. goto SkipIgnoredUnits; } else if (CurPtr[0] == '/' && CurPtr[1] == '*' && !inKeepCommentMode()) { - if (SkipBlockComment(Result, CurPtr+2)) + if (SkipBlockComment(Result, CurPtr + 2)) return true; // There is a token to return. goto SkipIgnoredUnits; } else if (isHorizontalWhitespace(*CurPtr)) { @@ -4235,7 +4235,8 @@ bool Lexer::LexTokenInternal(Token &Result) { // it's actually the start of a preprocessing directive. Callback to // the preprocessor to handle it. // TODO: -fpreprocessed mode?? - if (Result.isAtPhysicalStartOfLine() && !LexingRawMode && !Is_PragmaLexer) + if (Result.isAtPhysicalStartOfLine() && !LexingRawMode && + !Is_PragmaLexer) goto HandleDirective; Kind = tok::hash; diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp index f37215653b23c..4a854c213926b 100644 --- a/clang/lib/Lex/PPDirectives.cpp +++ b/clang/lib/Lex/PPDirectives.cpp @@ -641,8 +641,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation HashTokenLoc, Tok.is(tok::raw_identifier) && (Tok.getRawIdentifier() == "export" || Tok.getRawIdentifier() == "module")) { - llvm::SaveAndRestore ModuleDirectiveSkipping( - LastExportKeyword); + llvm::SaveAndRestore ModuleDirectiveSkipping(LastExportKeyword); LastExportKeyword.startToken(); LookUpIdentifierInfo(Tok); IdentifierInfo *II = Tok.getIdentifierInfo(); diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index 726c59cd194d5..2d8fe688a5002 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -1269,8 +1269,7 @@ bool Preprocessor::LexModuleNameContinue(Token &Tok, SourceLocation UseLoc, /// - <, ", or : (but not ::) pp tokens for 'import', or /// - ; for 'module' /// Otherwise the token is treated as an identifier. -bool Preprocessor::HandleModuleContextualKeyword( - Token &Result) { +bool Preprocessor::HandleModuleContextualKeyword(Token &Result) { if (!getLangOpts().CPlusPlusModules || !Result.isModuleContextualKeyword()) return false; >From 69d60ec0dcb1ee37945b84e998443e4284e65e89 Mon Sep 17 00:00:00 2001 From: yronglin <[email protected]> Date: Sun, 15 Feb 2026 17:10:03 -0800 Subject: [PATCH 3/3] Refine comments Signed-off-by: yronglin <[email protected]> --- clang/include/clang/Lex/Token.h | 1 - clang/lib/Lex/Lexer.cpp | 2 +- clang/lib/Lex/Preprocessor.cpp | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/clang/include/clang/Lex/Token.h b/clang/include/clang/Lex/Token.h index e19e04e78e3b7..9904b271c200e 100644 --- a/clang/include/clang/Lex/Token.h +++ b/clang/include/clang/Lex/Token.h @@ -287,7 +287,6 @@ class Token { /// isAtPhysicalStartOfLine - Return true if this token is at the start of a /// physical line. - /// bool isAtPhysicalStartOfLine() const { return getFlag(PhysicalStartOfLine); } /// Return true if this token has whitespace before it. diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index 0d89703e5c538..92c3046a6fd19 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -4518,8 +4518,8 @@ bool Lexer::LexTokenInternal(Token &Result) { return true; HandleDirective: - // We parsed a # character and it's the start of a preprocessing directive. + // We parsed a # character and it's the start of a preprocessing directive. FormTokenWithChars(Result, CurPtr, tok::hash); PP->HandleDirective(Result); diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index 2d8fe688a5002..035007ad4cd73 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -878,7 +878,7 @@ bool Preprocessor::HandleIdentifier(Token &Identifier) { if (II.isExtensionToken() && !DisableMacroExpansion) Diag(Identifier, diag::ext_token_used); - // Handle module contextual keyword. + // Handle module contextual keywords. if (getLangOpts().CPlusPlusModules && CurLexer && !CurLexer->isLexingRawMode() && !CurLexer->isPragmaLexer() && !CurLexer->ParsingPreprocessorDirective && _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
