llvmorg-github-actions[bot] wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang-tools-extra Author: Daniil Dudkin (unterumarmung) <details> <summary>Changes</summary> --- Patch is 30.10 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/196764.diff 4 Files Affected: - (modified) clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h (+29) - (modified) clang-tools-extra/include-cleaner/lib/Analysis.cpp (+219-59) - (modified) clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp (+1-1) - (modified) clang-tools-extra/include-cleaner/unittests/AnalysisTest.cpp (+318-7) ``````````diff diff --git a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h index 8e4912fa7bd84..1d28d87c025ca 100644 --- a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h +++ b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h @@ -62,24 +62,53 @@ void walkUsed(llvm::ArrayRef<Decl *> ASTRoots, const PragmaIncludes *PI, const Preprocessor &PP, UsedSymbolCB CB); +/// A location kind where a symbol reference is observed. +enum class SymbolReferenceOrigin { + MainFile, + Preamble, + Fragment, +}; + /// A missing include insertion candidate. struct MissingInclude { std::string Spelling; Header Provider; }; +/// A missing include finding with per-reference provenance. +struct MissingIncludeRef { + SymbolReference Ref; + llvm::SmallVector<Header> Providers; + SymbolReferenceOrigin Origin = SymbolReferenceOrigin::MainFile; + /// Null if the fragment file has multiple direct include sites. + const Include *FragmentInclude = nullptr; +}; + +/// An include kept alive only by fragment usage. +struct FragmentDependency { + const Include *Preserved = nullptr; + llvm::SmallVector<const Include *> Fragments; +}; + /// The result of include-cleaner analysis for one main file. struct AnalysisResults { std::vector<const Include *> Unused; /// Deduplicated insertion plan, e.g. "<vector>" paired with the chosen /// provider Header. std::vector<MissingInclude> MissingIncludes; + /// Per-reference provenance for consumers that need richer diagnostics. + std::vector<MissingIncludeRef> MissingRefs; + /// Include-preservation provenance for fragment-only uses. + std::vector<FragmentDependency> FragmentDependencies; }; /// Analysis configuration shared by include-cleaner consumers. struct AnalysisOptions { /// No analysis will be performed for headers that satisfy the predicate. std::function<bool(const Header &)> HeaderFilter; + /// A predicate matched against normalized resolved paths, and normalized + /// spelled paths as a fallback, to identify direct include fragments. + std::function<bool(llvm::StringRef)> FragmentHeaderFilter; }; /// Determine which headers should be inserted or removed from the main file. diff --git a/clang-tools-extra/include-cleaner/lib/Analysis.cpp b/clang-tools-extra/include-cleaner/lib/Analysis.cpp index cd0b3396bf418..321b67a5787d3 100644 --- a/clang-tools-extra/include-cleaner/lib/Analysis.cpp +++ b/clang-tools-extra/include-cleaner/lib/Analysis.cpp @@ -8,6 +8,7 @@ #include "clang-include-cleaner/Analysis.h" #include "AnalysisInternal.h" +#include "TypesInternal.h" #include "clang-include-cleaner/IncludeSpeller.h" #include "clang-include-cleaner/Record.h" #include "clang-include-cleaner/Types.h" @@ -22,9 +23,11 @@ #include "clang/Tooling/Core/Replacement.h" #include "clang/Tooling/Inclusions/StandardLibrary.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/STLFunctionalExtras.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" @@ -32,13 +35,91 @@ #include "llvm/Support/ErrorHandling.h" #include <cassert> #include <climits> +#include <optional> #include <string> namespace clang::include_cleaner { namespace { + +struct ClassifiedReference { + SymbolReferenceOrigin Origin; + // Null if the fragment file has multiple direct include sites. + const Include *FragmentInclude = nullptr; +}; + +class FragmentTracker { +public: + FragmentTracker(const Includes &Inc, const SourceManager &SM, + const std::function<bool(llvm::StringRef)> &HeaderFilter) + : SM(SM) { + if (!HeaderFilter) + return; + + for (const Include &I : Inc.all()) { + if (!I.Resolved || + locateInMainFile(I.HashLocation, SM) != MainFileLocation::MainFile) { + continue; + } + + // Match the canonical path first, but fall back to the spelled include + // so generated paths can still be configured even when resolution loses + // that spelling detail. + const llvm::SmallString<128> ResolvedPath = + normalizePath(I.Resolved->getName()); + bool IsFragment = HeaderFilter(ResolvedPath); + if (!IsFragment) { + const llvm::SmallString<128> SpelledPath = normalizePath(I.Spelled); + if (!SpelledPath.empty()) + IsFragment = HeaderFilter(SpelledPath); + } + if (!IsFragment) + continue; + + // Fragments are intentionally direct-includes-only for now. If a + // fragment includes another file, that nested include keeps normal + // header semantics. + IncludeSitesByFile[&I.Resolved->getFileEntry()].push_back(&I); + DirectIncludes.insert(&I); + } + } + + std::optional<ClassifiedReference> classify(FileID FID) const { + if (FID == SM.getMainFileID()) + return ClassifiedReference{SymbolReferenceOrigin::MainFile}; + if (FID == SM.getPreambleFileID()) + return ClassifiedReference{SymbolReferenceOrigin::Preamble}; + auto FE = SM.getFileEntryRefForID(FID); + if (!FE) + return std::nullopt; + auto It = IncludeSitesByFile.find(&FE->getFileEntry()); + if (It == IncludeSitesByFile.end()) + return std::nullopt; + if (It->second.size() != 1) + return ClassifiedReference{SymbolReferenceOrigin::Fragment}; + return ClassifiedReference{SymbolReferenceOrigin::Fragment, + It->second.front()}; + } + + bool isDirectInclude(const Include *I) const { + return DirectIncludes.contains(I); + } + +private: + const SourceManager &SM; + llvm::DenseMap<const FileEntry *, llvm::SmallVector<const Include *>> + IncludeSitesByFile; + llvm::DenseSet<const Include *> DirectIncludes; +}; + +using UsedSymbolWithOriginCB = llvm::function_ref<void( + const SymbolReference &Ref, llvm::ArrayRef<Header> Providers, + const ClassifiedReference &Info)>; + bool shouldIgnoreMacroReference(const Preprocessor &PP, const Macro &M) { - auto *MI = PP.getMacroInfo(M.Name); + auto &MutablePP = const_cast<Preprocessor &>(PP); + auto MD = MutablePP.getMacroDefinitionAtLoc(M.Name, M.Definition); + auto *MI = MD ? MD.getMacroInfo() : PP.getMacroInfo(M.Name); // Macros that expand to themselves are confusing from user's point of view. // They usually aspect the usage to be attributed to the underlying decl and // not the macro definition. So ignore such macros (e.g. std{in,out,err} are @@ -47,15 +128,12 @@ bool shouldIgnoreMacroReference(const Preprocessor &PP, const Macro &M) { return MI && MI->getNumTokens() == 1 && MI->isObjectLike() && MI->getReplacementToken(0).getIdentifierInfo() == M.Name; } -} // namespace -void walkUsed(llvm::ArrayRef<Decl *> ASTRoots, - llvm::ArrayRef<SymbolReference> MacroRefs, - const PragmaIncludes *PI, const Preprocessor &PP, - UsedSymbolCB CB) { +void walkUsedWithOrigins( + llvm::ArrayRef<Decl *> ASTRoots, llvm::ArrayRef<SymbolReference> MacroRefs, + const PragmaIncludes *PI, const Preprocessor &PP, UsedSymbolWithOriginCB CB, + llvm::function_ref<std::optional<ClassifiedReference>(FileID)> Classify) { const auto &SM = PP.getSourceManager(); - // This is duplicated in writeHTMLReport, changes should be mirrored there. - tooling::stdlib::Recognizer Recognizer; for (auto *Root : ASTRoots) { walkAST(*Root, [&](SourceLocation Loc, NamedDecl &ND, RefType RT) { auto SpellLoc = SM.getSpellingLoc(Loc); @@ -71,25 +149,28 @@ void walkUsed(llvm::ArrayRef<Decl *> ASTRoots, SpellLoc = SM.getSpellingLoc(Loc); } auto FID = SM.getFileID(SpellLoc); - if (FID != SM.getMainFileID() && FID != SM.getPreambleFileID()) + auto Info = Classify(FID); + if (!Info) return; // FIXME: Most of the work done here is repetitive. It might be useful to // have a cache/batching. SymbolReference SymRef{ND, Loc, RT}; - return CB(SymRef, headersForSymbol(ND, PP, PI)); + return CB(SymRef, headersForSymbol(ND, PP, PI), *Info); }); } + for (const SymbolReference &MacroRef : MacroRefs) { assert(MacroRef.Target.kind() == Symbol::Macro); - if (!SM.isWrittenInMainFile(SM.getSpellingLoc(MacroRef.RefLocation)) || - shouldIgnoreMacroReference(PP, MacroRef.Target.macro())) + if (shouldIgnoreMacroReference(PP, MacroRef.Target.macro())) continue; - CB(MacroRef, headersForSymbol(MacroRef.Target, PP, PI)); + auto FID = SM.getFileID(SM.getSpellingLoc(MacroRef.RefLocation)); + auto Info = Classify(FID); + if (!Info) + continue; + CB(MacroRef, headersForSymbol(MacroRef.Target, PP, PI), *Info); } } -namespace { - bool isFilteredInclude(const Include &I, llvm::function_ref<bool(const Header &)> HeaderFilter, const Preprocessor &PP) { @@ -97,8 +178,9 @@ bool isFilteredInclude(const Include &I, auto Lang = PP.getLangOpts().CPlusPlus ? tooling::stdlib::Lang::CXX : tooling::stdlib::Lang::C; if (auto StdHeader = tooling::stdlib::Header::named(I.quote(), Lang); - StdHeader && HeaderFilter(*StdHeader)) + StdHeader && HeaderFilter(*StdHeader)) { return true; + } } return I.Resolved && HeaderFilter(*I.Resolved); } @@ -109,32 +191,89 @@ bool shouldSuppressIncludeDiagnostic( const PragmaIncludes *PI, const Preprocessor &PP, OptionalDirectoryEntryRef ResourceDir) { if (!I.Resolved || I.Resolved->getDir() == ResourceDir || - isFilteredInclude(I, HeaderFilter, PP)) + isFilteredInclude(I, HeaderFilter, PP)) { return true; + } if (!PI) return false; if (PI->shouldKeep(*I.Resolved)) return true; // Check if main file is the public interface for a private header. If so - // we shouldn't diagnose it as unused. + // we shouldn't diagnose it as unused or record fragment dependencies. if (auto PHeader = PI->getPublic(*I.Resolved); !PHeader.empty()) { PHeader = PHeader.trim("<>\""); - // Since most private -> public mappings happen in a verbatim way, we - // check textually here. This might go wrong in presence of symlinks or - // header mappings. But that's not different than rest of the places. if (MainFile.getName().ends_with(PHeader)) return true; } return false; } +void walkUsedInFiles(llvm::ArrayRef<Decl *> ASTRoots, + llvm::ArrayRef<SymbolReference> MacroRefs, + const PragmaIncludes *PI, const Preprocessor &PP, + UsedSymbolCB CB, + llvm::function_ref<bool(FileID)> IsMainFile) { + walkUsedWithOrigins( + ASTRoots, MacroRefs, PI, PP, + [&](const SymbolReference &Ref, llvm::ArrayRef<Header> Providers, + const ClassifiedReference &) { CB(Ref, Providers); }, + [&](FileID FID) -> std::optional<ClassifiedReference> { + if (!IsMainFile(FID)) + return std::nullopt; + return ClassifiedReference{SymbolReferenceOrigin::MainFile}; + }); +} + +} // namespace + +void walkUsed(llvm::ArrayRef<Decl *> ASTRoots, + llvm::ArrayRef<SymbolReference> MacroRefs, + const PragmaIncludes *PI, const Preprocessor &PP, + UsedSymbolCB CB) { + const auto &SM = PP.getSourceManager(); + walkUsedInFiles(ASTRoots, MacroRefs, PI, PP, CB, [&](FileID FID) { + return FID == SM.getMainFileID() || FID == SM.getPreambleFileID(); + }); +} + +namespace { + class IncludeUsage { public: - void mark(const Include *I) { Used.insert(I); } + void mark(const Include *I, const ClassifiedReference &Info) { + Used.insert(I); + if (Info.Origin != SymbolReferenceOrigin::Fragment) { + UsedByMainOrPreamble.insert(I); + return; + } + + UsedByFragment.insert(I); + if (!Info.FragmentInclude) + return; + auto &Reasons = ByPreserved[I]; + if (!llvm::is_contained(Reasons, Info.FragmentInclude)) + Reasons.push_back(Info.FragmentInclude); + } + bool contains(const Include *I) const { return Used.contains(I); } + bool isFragmentOnly(const Include *I) const { + return UsedByFragment.contains(I) && !UsedByMainOrPreamble.contains(I); + } + + llvm::SmallVector<const Include *> fragmentSites(const Include *I) const { + auto It = ByPreserved.find(I); + if (It == ByPreserved.end()) + return {}; + return It->second; + } + private: llvm::DenseSet<const Include *> Used; + llvm::DenseSet<const Include *> UsedByMainOrPreamble; + llvm::DenseSet<const Include *> UsedByFragment; + llvm::DenseMap<const Include *, llvm::SmallVector<const Include *>> + ByPreserved; }; class MissingIncludeCollector { @@ -170,48 +309,69 @@ AnalysisResults analyze(llvm::ArrayRef<Decl *> ASTRoots, auto HeaderFilter = [&](const Header &H) { return Options.HeaderFilter && Options.HeaderFilter(H); }; + FragmentTracker Fragments(Inc, SM, Options.FragmentHeaderFilter); IncludeUsage Usage; MissingIncludeCollector MissingIncludes; + std::vector<MissingIncludeRef> MissingRefs; OptionalDirectoryEntryRef ResourceDir = PP.getHeaderSearchInfo().getModuleMap().getBuiltinDir(); - walkUsed(ASTRoots, MacroRefs, PI, PP, - [&](const SymbolReference &Ref, llvm::ArrayRef<Header> Providers) { - bool Satisfied = false; - for (const Header &H : Providers) { - if (H.kind() == Header::Physical && - (H.physical() == MainFile || - H.physical().getDir() == ResourceDir)) { - Satisfied = true; - } - for (const Include *I : Inc.match(H)) { - Usage.mark(I); - Satisfied = true; - } - } - // Bail out if we can't (or need not) insert an include. - if (Satisfied || Providers.empty() || Ref.RT != RefType::Explicit) - return; - if (HeaderFilter(Providers.front())) - return; - // Check if we have any headers with the same spelling, in edge - // cases like `#include_next "foo.h"`, the user can't ever - // include the physical foo.h, but can have a spelling that - // refers to it. - auto Spelling = spellHeader( - {Providers.front(), PP.getHeaderSearchInfo(), MainFile}); - for (const Include *I : Inc.match(Header{Spelling})) { - Usage.mark(I); - Satisfied = true; - } - if (!Satisfied) - MissingIncludes.add(Spelling, Providers.front()); - }); - - AnalysisResults Results; + + walkUsedWithOrigins( + ASTRoots, MacroRefs, PI, PP, + [&](const SymbolReference &Ref, llvm::ArrayRef<Header> Providers, + const ClassifiedReference &Info) { + bool Satisfied = false; + for (const Header &H : Providers) { + if (H.kind() == Header::Physical && + (H.physical() == MainFile || + H.physical().getDir() == ResourceDir)) { + Satisfied = true; + } + for (const Include *I : Inc.match(H)) { + Usage.mark(I, Info); + Satisfied = true; + } + } + + if (Satisfied || Providers.empty() || Ref.RT != RefType::Explicit) + return; + if (HeaderFilter(Providers.front())) + return; + + auto Spelling = spellHeader( + {Providers.front(), PP.getHeaderSearchInfo(), MainFile}); + for (const Include *I : Inc.match(Header{Spelling})) { + Usage.mark(I, Info); + Satisfied = true; + } + if (Satisfied) + return; + + // MissingIncludes drives edits, while MissingRefs preserves where the + // unsatisfied use came from for higher-level diagnostics. + MissingIncludes.add(Spelling, Providers.front()); + MissingRefs.push_back( + MissingIncludeRef{Ref, llvm::SmallVector<Header>(Providers), + Info.Origin, Info.FragmentInclude}); + }, + [&](FileID FID) { return Fragments.classify(FID); }); + + AnalysisResults Results{{}, {}, std::move(MissingRefs), {}}; for (const Include &I : Inc.all()) { - if (Usage.contains(&I) || - shouldSuppressIncludeDiagnostic(I, MainFile, HeaderFilter, PI, PP, - ResourceDir)) + bool Suppressed = shouldSuppressIncludeDiagnostic(I, MainFile, HeaderFilter, + PI, PP, ResourceDir); + if (Usage.contains(&I)) { + const llvm::SmallVector<const Include *> FragmentSites = + Usage.fragmentSites(&I); + // Ambiguous fragment sites still preserve the header, but get no comment. + if (!Suppressed && Usage.isFragmentOnly(&I) && + !Fragments.isDirectInclude(&I) && !FragmentSites.empty()) { + Results.FragmentDependencies.push_back( + FragmentDependency{&I, FragmentSites}); + } + continue; + } + if (Suppressed) continue; Results.Unused.push_back(&I); } diff --git a/clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp b/clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp index d72d6adf33c9b..49bd5495bcc13 100644 --- a/clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp +++ b/clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp @@ -192,7 +192,7 @@ class Action : public clang::ASTFrontendAction { SM.getFileManager().makeAbsolutePath(AbsPath); llvm::StringRef Code = SM.getBufferData(SM.getMainFileID()); - AnalysisOptions AnalyzeOptions{HeaderFilter}; + AnalysisOptions AnalyzeOptions{HeaderFilter, {}}; auto Results = analyze(AST.Roots, PP.MacroReferences, PP.Includes, &PI, getCompilerInstance().getPreprocessor(), AnalyzeOptions); diff --git a/clang-tools-extra/include-cleaner/unittests/AnalysisTest.cpp b/clang-tools-extra/include-cleaner/unittests/AnalysisTest.cpp index 3fcf82251d14b..0c68fb58032fb 100644 --- a/clang-tools-extra/include-cleaner/unittests/AnalysisTest.cpp +++ b/clang-tools-extra/include-cleaner/unittests/AnalysisTest.cpp @@ -39,6 +39,14 @@ #include <vector> namespace clang::include_cleaner { + +static std::vector<Decl *> topLevelDecls(TestAST &AST) { + std::vector<Decl *> Decls; + for (auto *D : AST.context().getTranslationUnitDecl()->decls()) + Decls.push_back(D); + return Decls; +} + namespace { using testing::_; using testing::AllOf; @@ -375,9 +383,7 @@ TEST_F(AnalyzeTest, SpellingIncludesWithSymlinks) { /*ModificationTime=*/{}); TestAST AST(Inputs); - std::vector<Decl *> DeclsInTU; - for (auto *D : AST.context().getTranslationUnitDecl()->decls()) - DeclsInTU.push_back(D); + std::vector<Decl *> DeclsInTU = topLevelDecls(AST); auto Results = analyze(DeclsInTU, {}, PP.Includes, &PI, AST.preprocessor()); // Check that we're spelling header using the symlink, and not underlying // path. @@ -388,8 +394,10 @@ TEST_F(AnalyzeTest, SpellingIncludesWithSymlinks) { { // Make sure filtering is also applied to symlink, not underlying file. - AnalysisOptions Options{ - [](const Header &H) { return H.resolvedPath() == "inner.h"; }}; + AnalysisOptions Options; + Options.HeaderFilter = [](const Header &H) { + return H.resolvedPath() == "inner.h"; + }; Results = analyze(DeclsInTU, {}, PP.Includes, &PI, AST.preprocessor(), Options); EXPECT_THAT(Results.MissingIncludes, @@ -398,8 +406,10 @@ TEST_F(AnalyzeTest, SpellingIncludesWithSymlinks) { EXPECT_THAT(Results.Unused, Not(testing::IsEmpty())); } { - AnalysisOptions Options{ - [](const Header &H) { return H.resolvedPath() == "header.h"; }}; + AnalysisOptions Options; + Options.HeaderFilter = [](const Header &H) { + return H.resolvedPath() == "header.h"; + }; Results = analyze(DeclsInTU, {}, PP.Includes, &PI, AST.preprocessor(), Options); // header.h should be ignored now. @@ -409,6 +419,307 @@ TEST_F(AnalyzeTest, SpellingIn... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/196764 _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
