llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang-modules Author: None (llvmbot) <details> <summary>Changes</summary> Backport e6e874ce8f055f5b8c5d7f8c7fb0afe764d1d350 Requested by: @<!-- -->yronglin --- Patch is 49.91 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/154077.diff 14 Files Affected: - (modified) clang/include/clang/Lex/Lexer.h (-3) - (added) clang/include/clang/Lex/NoTrivialPPDirectiveTracer.h (+310) - (modified) clang/include/clang/Lex/Preprocessor.h (+12) - (modified) clang/include/clang/Lex/Token.h (+9-8) - (modified) clang/include/clang/Sema/Sema.h (+1-1) - (modified) clang/lib/Lex/Lexer.cpp (-9) - (modified) clang/lib/Lex/Preprocessor.cpp (+37-3) - (modified) clang/lib/Parse/Parser.cpp (+5-3) - (modified) clang/lib/Sema/SemaModule.cpp (+4-2) - (modified) clang/test/CXX/module/cpp.pre/module_decl.cpp (+140-1) - (modified) clang/unittests/Lex/CMakeLists.txt (+1) - (modified) clang/unittests/Lex/LexerTest.cpp (+2-2) - (modified) clang/unittests/Lex/ModuleDeclStateTest.cpp (+59-69) - (added) clang/unittests/Lex/NoTrivialPPDirectiveTracerTest.cpp (+182) ``````````diff diff --git a/clang/include/clang/Lex/Lexer.h b/clang/include/clang/Lex/Lexer.h index 06971ff87ab96..423f2ffe2f852 100644 --- a/clang/include/clang/Lex/Lexer.h +++ b/clang/include/clang/Lex/Lexer.h @@ -143,9 +143,6 @@ class Lexer : public PreprocessorLexer { /// True if this is the first time we're lexing the input file. bool IsFirstTimeLexingFile; - /// True if current lexing token is the first pp-token. - bool IsFirstPPToken; - // NewLinePtr - A pointer to new line character '\n' being lexed. For '\r\n', // it also points to '\n.' const char *NewLinePtr; diff --git a/clang/include/clang/Lex/NoTrivialPPDirectiveTracer.h b/clang/include/clang/Lex/NoTrivialPPDirectiveTracer.h new file mode 100644 index 0000000000000..9ab3c6a528a1a --- /dev/null +++ b/clang/include/clang/Lex/NoTrivialPPDirectiveTracer.h @@ -0,0 +1,310 @@ +//===--- NoTrivialPPDirectiveTracer.h ---------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the NoTrivialPPDirectiveTracer interface. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LEX_NO_TRIVIAL_PPDIRECTIVE_TRACER_H +#define LLVM_CLANG_LEX_NO_TRIVIAL_PPDIRECTIVE_TRACER_H + +#include "clang/Lex/PPCallbacks.h" + +namespace clang { +class Preprocessor; + +/// Consider the following code: +/// +/// # 1 __FILE__ 1 3 +/// export module a; +/// +/// According to the wording in +/// [P1857R3](https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p1857r3.html): +/// +/// A module directive may only appear as the first preprocessing tokens in a +/// file (excluding the global module fragment.) +/// +/// and the wording in +/// [[cpp.pre]](https://eel.is/c++draft/cpp.pre#nt:module-file): +/// module-file: +/// pp-global-module-fragment[opt] pp-module group[opt] +/// pp-private-module-fragment[opt] +/// +/// `#` is the first pp-token in the translation unit, and it was rejected by +/// clang, but they really should be exempted from this rule. The goal is to not +/// allow any preprocessor conditionals or most state changes, but these don't +/// fit that. +/// +/// State change would mean most semantically observable preprocessor state, +/// particularly anything that is order dependent. Global flags like being a +/// system header/module shouldn't matter. +/// +/// We should exempt a brunch of directives, even though it violates the current +/// standard wording. +/// +/// This class used to trace 'no-trivial' pp-directives in main file, which may +/// change the preprocessing state. +/// +/// FIXME: Once the wording of the standard is revised, we need to follow the +/// wording of the standard. Currently this is just a workaround +class NoTrivialPPDirectiveTracer : public PPCallbacks { + Preprocessor &PP; + + /// Whether preprocessing main file. We only focus on the main file. + bool InMainFile = true; + + /// Whether one or more conditional, include or other 'no-trivial' + /// pp-directives has seen before. + bool SeenNoTrivialPPDirective = false; + + void setSeenNoTrivialPPDirective(); + +public: + NoTrivialPPDirectiveTracer(Preprocessor &P) : PP(P) {} + + bool hasSeenNoTrivialPPDirective() const; + + /// Callback invoked whenever the \p Lexer moves to a different file for + /// lexing. Unlike \p FileChanged line number directives and other related + /// pragmas do not trigger callbacks to \p LexedFileChanged. + /// + /// \param FID The \p FileID that the \p Lexer moved to. + /// + /// \param Reason Whether the \p Lexer entered a new file or exited one. + /// + /// \param FileType The \p CharacteristicKind of the file the \p Lexer moved + /// to. + /// + /// \param PrevFID The \p FileID the \p Lexer was using before the change. + /// + /// \param Loc The location where the \p Lexer entered a new file from or the + /// location that the \p Lexer moved into after exiting a file. + void LexedFileChanged(FileID FID, LexedFileChangeReason Reason, + SrcMgr::CharacteristicKind FileType, FileID PrevFID, + SourceLocation Loc) override; + + /// Callback invoked whenever an embed directive has been processed, + /// regardless of whether the embed will actually find a file. + /// + /// \param HashLoc The location of the '#' that starts the embed directive. + /// + /// \param FileName The name of the file being included, as written in the + /// source code. + /// + /// \param IsAngled Whether the file name was enclosed in angle brackets; + /// otherwise, it was enclosed in quotes. + /// + /// \param File The actual file that may be included by this embed directive. + /// + /// \param Params The parameters used by the directive. + void EmbedDirective(SourceLocation HashLoc, StringRef FileName, bool IsAngled, + OptionalFileEntryRef File, + const LexEmbedParametersResult &Params) override { + setSeenNoTrivialPPDirective(); + } + + /// Callback invoked whenever an inclusion directive of + /// any kind (\c \#include, \c \#import, etc.) has been processed, regardless + /// of whether the inclusion will actually result in an inclusion. + /// + /// \param HashLoc The location of the '#' that starts the inclusion + /// directive. + /// + /// \param IncludeTok The token that indicates the kind of inclusion + /// directive, e.g., 'include' or 'import'. + /// + /// \param FileName The name of the file being included, as written in the + /// source code. + /// + /// \param IsAngled Whether the file name was enclosed in angle brackets; + /// otherwise, it was enclosed in quotes. + /// + /// \param FilenameRange The character range of the quotes or angle brackets + /// for the written file name. + /// + /// \param File The actual file that may be included by this inclusion + /// directive. + /// + /// \param SearchPath Contains the search path which was used to find the file + /// in the file system. If the file was found via an absolute include path, + /// SearchPath will be empty. For framework includes, the SearchPath and + /// RelativePath will be split up. For example, if an include of "Some/Some.h" + /// is found via the framework path + /// "path/to/Frameworks/Some.framework/Headers/Some.h", SearchPath will be + /// "path/to/Frameworks/Some.framework/Headers" and RelativePath will be + /// "Some.h". + /// + /// \param RelativePath The path relative to SearchPath, at which the include + /// file was found. This is equal to FileName except for framework includes. + /// + /// \param SuggestedModule The module suggested for this header, if any. + /// + /// \param ModuleImported Whether this include was translated into import of + /// \p SuggestedModule. + /// + /// \param FileType The characteristic kind, indicates whether a file or + /// directory holds normal user code, system code, or system code which is + /// implicitly 'extern "C"' in C++ mode. + /// + void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok, + StringRef FileName, bool IsAngled, + CharSourceRange FilenameRange, + OptionalFileEntryRef File, StringRef SearchPath, + StringRef RelativePath, const Module *SuggestedModule, + bool ModuleImported, + SrcMgr::CharacteristicKind FileType) override { + setSeenNoTrivialPPDirective(); + } + + /// Callback invoked whenever there was an explicit module-import + /// syntax. + /// + /// \param ImportLoc The location of import directive token. + /// + /// \param Path The identifiers (and their locations) of the module + /// "path", e.g., "std.vector" would be split into "std" and "vector". + /// + /// \param Imported The imported module; can be null if importing failed. + /// + void moduleImport(SourceLocation ImportLoc, ModuleIdPath Path, + const Module *Imported) override { + setSeenNoTrivialPPDirective(); + } + + /// Callback invoked when the end of the main file is reached. + /// + /// No subsequent callbacks will be made. + void EndOfMainFile() override { setSeenNoTrivialPPDirective(); } + + /// Callback invoked when start reading any pragma directive. + void PragmaDirective(SourceLocation Loc, + PragmaIntroducerKind Introducer) override {} + + /// Called by Preprocessor::HandleMacroExpandedIdentifier when a + /// macro invocation is found. + void MacroExpands(const Token &MacroNameTok, const MacroDefinition &MD, + SourceRange Range, const MacroArgs *Args) override; + + /// Hook called whenever a macro definition is seen. + void MacroDefined(const Token &MacroNameTok, + const MacroDirective *MD) override { + setSeenNoTrivialPPDirective(); + } + + /// Hook called whenever a macro \#undef is seen. + /// \param MacroNameTok The active Token + /// \param MD A MacroDefinition for the named macro. + /// \param Undef New MacroDirective if the macro was defined, null otherwise. + /// + /// MD is released immediately following this callback. + void MacroUndefined(const Token &MacroNameTok, const MacroDefinition &MD, + const MacroDirective *Undef) override { + setSeenNoTrivialPPDirective(); + } + + /// Hook called whenever the 'defined' operator is seen. + /// \param MD The MacroDirective if the name was a macro, null otherwise. + void Defined(const Token &MacroNameTok, const MacroDefinition &MD, + SourceRange Range) override { + setSeenNoTrivialPPDirective(); + } + + /// Hook called whenever an \#if is seen. + /// \param Loc the source location of the directive. + /// \param ConditionRange The SourceRange of the expression being tested. + /// \param ConditionValue The evaluated value of the condition. + /// + // FIXME: better to pass in a list (or tree!) of Tokens. + void If(SourceLocation Loc, SourceRange ConditionRange, + ConditionValueKind ConditionValue) override { + setSeenNoTrivialPPDirective(); + } + + /// Hook called whenever an \#elif is seen. + /// \param Loc the source location of the directive. + /// \param ConditionRange The SourceRange of the expression being tested. + /// \param ConditionValue The evaluated value of the condition. + /// \param IfLoc the source location of the \#if/\#ifdef/\#ifndef directive. + // FIXME: better to pass in a list (or tree!) of Tokens. + void Elif(SourceLocation Loc, SourceRange ConditionRange, + ConditionValueKind ConditionValue, SourceLocation IfLoc) override { + setSeenNoTrivialPPDirective(); + } + + /// Hook called whenever an \#ifdef is seen. + /// \param Loc the source location of the directive. + /// \param MacroNameTok Information on the token being tested. + /// \param MD The MacroDefinition if the name was a macro, null otherwise. + void Ifdef(SourceLocation Loc, const Token &MacroNameTok, + const MacroDefinition &MD) override { + setSeenNoTrivialPPDirective(); + } + + /// Hook called whenever an \#elifdef branch is taken. + /// \param Loc the source location of the directive. + /// \param MacroNameTok Information on the token being tested. + /// \param MD The MacroDefinition if the name was a macro, null otherwise. + void Elifdef(SourceLocation Loc, const Token &MacroNameTok, + const MacroDefinition &MD) override { + setSeenNoTrivialPPDirective(); + } + /// Hook called whenever an \#elifdef is skipped. + /// \param Loc the source location of the directive. + /// \param ConditionRange The SourceRange of the expression being tested. + /// \param IfLoc the source location of the \#if/\#ifdef/\#ifndef directive. + // FIXME: better to pass in a list (or tree!) of Tokens. + void Elifdef(SourceLocation Loc, SourceRange ConditionRange, + SourceLocation IfLoc) override { + setSeenNoTrivialPPDirective(); + } + + /// Hook called whenever an \#ifndef is seen. + /// \param Loc the source location of the directive. + /// \param MacroNameTok Information on the token being tested. + /// \param MD The MacroDefiniton if the name was a macro, null otherwise. + void Ifndef(SourceLocation Loc, const Token &MacroNameTok, + const MacroDefinition &MD) override { + setSeenNoTrivialPPDirective(); + } + + /// Hook called whenever an \#elifndef branch is taken. + /// \param Loc the source location of the directive. + /// \param MacroNameTok Information on the token being tested. + /// \param MD The MacroDefinition if the name was a macro, null otherwise. + void Elifndef(SourceLocation Loc, const Token &MacroNameTok, + const MacroDefinition &MD) override { + setSeenNoTrivialPPDirective(); + } + /// Hook called whenever an \#elifndef is skipped. + /// \param Loc the source location of the directive. + /// \param ConditionRange The SourceRange of the expression being tested. + /// \param IfLoc the source location of the \#if/\#ifdef/\#ifndef directive. + // FIXME: better to pass in a list (or tree!) of Tokens. + void Elifndef(SourceLocation Loc, SourceRange ConditionRange, + SourceLocation IfLoc) override { + setSeenNoTrivialPPDirective(); + } + + /// Hook called whenever an \#else is seen. + /// \param Loc the source location of the directive. + /// \param IfLoc the source location of the \#if/\#ifdef/\#ifndef directive. + void Else(SourceLocation Loc, SourceLocation IfLoc) override { + setSeenNoTrivialPPDirective(); + } + + /// Hook called whenever an \#endif is seen. + /// \param Loc the source location of the directive. + /// \param IfLoc the source location of the \#if/\#ifdef/\#ifndef directive. + void Endif(SourceLocation Loc, SourceLocation IfLoc) override { + setSeenNoTrivialPPDirective(); + } +}; + +} // namespace clang + +#endif // LLVM_CLANG_LEX_NO_TRIVIAL_PPDIRECTIVE_TRACER_H diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index 4d82e20e5d4f3..e90564a9739a5 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -82,6 +82,7 @@ class PreprocessorLexer; class PreprocessorOptions; class ScratchBuffer; class TargetInfo; +class NoTrivialPPDirectiveTracer; namespace Builtin { class Context; @@ -353,6 +354,11 @@ class Preprocessor { /// First pp-token source location in current translation unit. SourceLocation FirstPPTokenLoc; + /// A preprocessor directive tracer to trace whether the preprocessing + /// state changed. These changes would mean most semantically observable + /// preprocessor state, particularly anything that is order dependent. + NoTrivialPPDirectiveTracer *DirTracer = nullptr; + /// A position within a C++20 import-seq. class StdCXXImportSeq { public: @@ -609,6 +615,8 @@ class Preprocessor { return State == NamedModuleImplementation && !getName().contains(':'); } + bool isNotAModuleDecl() const { return State == NotAModuleDecl; } + StringRef getName() const { assert(isNamedModule() && "Can't get name from a non named module"); return Name; @@ -3087,6 +3095,10 @@ class Preprocessor { bool setDeserializedSafeBufferOptOutMap( const SmallVectorImpl<SourceLocation> &SrcLocSeqs); + /// Whether we've seen pp-directives which may have changed the preprocessing + /// state. + bool hasSeenNoTrivialPPDirective() const; + private: /// Helper functions to forward lexing to the actual lexer. They all share the /// same signature. diff --git a/clang/include/clang/Lex/Token.h b/clang/include/clang/Lex/Token.h index fc43e72593b94..d9dc5a562d802 100644 --- a/clang/include/clang/Lex/Token.h +++ b/clang/include/clang/Lex/Token.h @@ -86,12 +86,12 @@ class Token { // macro stringizing or charizing operator. CommaAfterElided = 0x200, // The comma following this token was elided (MS). IsEditorPlaceholder = 0x400, // This identifier is a placeholder. - - IsReinjected = 0x800, // A phase 4 token that was produced before and - // re-added, e.g. via EnterTokenStream. Annotation - // tokens are *not* reinjected. - FirstPPToken = 0x1000, // This token is the first pp token in the - // translation unit. + IsReinjected = 0x800, // A phase 4 token that was produced before and + // re-added, e.g. via EnterTokenStream. Annotation + // tokens are *not* reinjected. + HasSeenNoTrivialPPDirective = + 0x1000, // Whether we've seen any 'no-trivial' pp-directives before + // current position. }; tok::TokenKind getKind() const { return Kind; } @@ -321,8 +321,9 @@ class Token { /// lexer uses identifier tokens to represent placeholders. bool isEditorPlaceholder() const { return getFlag(IsEditorPlaceholder); } - /// Returns true if this token is the first pp-token. - bool isFirstPPToken() const { return getFlag(FirstPPToken); } + bool hasSeenNoTrivialPPDirective() const { + return getFlag(HasSeenNoTrivialPPDirective); + } }; /// Information about the conditional stack (\#if directives) diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index b331acbe606b7..7b0be368d67f9 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -9836,7 +9836,7 @@ class Sema final : public SemaBase { SourceLocation ModuleLoc, ModuleDeclKind MDK, ModuleIdPath Path, ModuleIdPath Partition, ModuleImportState &ImportState, - bool IntroducerIsFirstPPToken); + bool SeenNoTrivialPPDirective); /// The parser has processed a global-module-fragment declaration that begins /// the definition of the global module fragment of the current module unit. diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index 1f695b4a8676c..b282a600c0e56 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -174,8 +174,6 @@ void Lexer::InitLexer(const char *BufStart, const char *BufPtr, ExtendedTokenMode = 0; NewLinePtr = nullptr; - - IsFirstPPToken = true; } /// Lexer constructor - Create a new lexer object for the specified buffer @@ -3225,7 +3223,6 @@ std::optional<Token> Lexer::peekNextPPToken() { bool atStartOfLine = IsAtStartOfLine; bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine; bool leadingSpace = HasLeadingSpace; - bool isFirstPPToken = IsFirstPPToken; Token Tok; Lex(Tok); @@ -3236,7 +3233,6 @@ std::optional<Token> Lexer::peekNextPPToken() { HasLeadingSpace = leadingSpace; IsAtStartOfLine = atStartOfLine; IsAtPhysicalStartOfLine = atPhysicalStartOfLine; - IsFirstPPToken = isFirstPPToken; // Restore the lexer back to non-skipping mode. LexingRawMode = false; @@ -3726,11 +3722,6 @@ bool Lexer::Lex(Token &Result) { HasLeadingEmptyMacro = false; } - if (IsFirstPPToken) { - Result.setFlag(Token::FirstPPToken); - IsFirstPPToken = false; - } - bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine; IsAtPhysicalStartOfLine = false; bool isRawLex = isLexingRawMode(); diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index bcd3ea60ce3da..2120e45dd8f8a 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -43,6 +43,7 @@ #include "clang/Lex/MacroArgs.h" #include "clang/Lex/MacroInfo.h" #include "clang/Lex/ModuleLoader.h" +#include "clang/Lex/NoTrivialPPDirectiveTracer.h" #include "clang/Lex/Pragma.h" #include "clang/Lex/PreprocessingRecord.h" #include "clang/Lex/PreprocessorLexer.h" @@ -247,8 +248,6 @@ void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const { llvm::errs() << " [LeadingSpace]"; if (Tok.isExpandDisabled()) llvm::errs() << " [ExpandDisabled]"; - if (Tok.isFirstPPToken()) - llvm::errs() << " [First pp-token]"; if (To... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/154077 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits