https://github.com/unterumarmung created https://github.com/llvm/llvm-project/pull/196763
None >From 6e146f087045416aad4ead1c35a38db9392f469d Mon Sep 17 00:00:00 2001 From: Daniil Dudkin <[email protected]> Date: Sun, 10 May 2026 01:01:20 +0300 Subject: [PATCH] [include-cleaner][NFC] factor analysis bookkeeping --- .../include/clang-include-cleaner/Analysis.h | 35 +++-- .../include-cleaner/lib/Analysis.cpp | 130 +++++++++++++----- .../include-cleaner/tool/IncludeCleaner.cpp | 32 +++-- .../unittests/AnalysisTest.cpp | 50 ++++--- 4 files changed, 164 insertions(+), 83 deletions(-) diff --git a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h index c3241763237d1..8e4912fa7bd84 100644 --- a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h +++ b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h @@ -20,8 +20,9 @@ #include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include <functional> #include <string> -#include <utility> +#include <vector> namespace clang { class SourceLocation; @@ -61,23 +62,33 @@ void walkUsed(llvm::ArrayRef<Decl *> ASTRoots, const PragmaIncludes *PI, const Preprocessor &PP, UsedSymbolCB CB); +/// A missing include insertion candidate. +struct MissingInclude { + std::string Spelling; + Header Provider; +}; + +/// The result of include-cleaner analysis for one main file. struct AnalysisResults { std::vector<const Include *> Unused; - // Spellings, like "<vector>" paired with the Header that generated it. - std::vector<std::pair<std::string, Header>> Missing; + /// Deduplicated insertion plan, e.g. "<vector>" paired with the chosen + /// provider Header. + std::vector<MissingInclude> MissingIncludes; +}; + +/// Analysis configuration shared by include-cleaner consumers. +struct AnalysisOptions { + /// No analysis will be performed for headers that satisfy the predicate. + std::function<bool(const Header &)> HeaderFilter; }; /// Determine which headers should be inserted or removed from the main file. /// This exposes conclusions but not reasons: use lower-level walkUsed for that. -/// -/// The HeaderFilter is a predicate that receives absolute path or spelling -/// without quotes/brackets, when a phyiscal file doesn't exist. -/// No analysis will be performed for headers that satisfy the predicate. -AnalysisResults -analyze(llvm::ArrayRef<Decl *> ASTRoots, - llvm::ArrayRef<SymbolReference> MacroRefs, const Includes &I, - const PragmaIncludes *PI, const Preprocessor &PP, - llvm::function_ref<bool(llvm::StringRef)> HeaderFilter = nullptr); +AnalysisResults analyze(llvm::ArrayRef<Decl *> ASTRoots, + llvm::ArrayRef<SymbolReference> MacroRefs, + const Includes &I, const PragmaIncludes *PI, + const Preprocessor &PP, + const AnalysisOptions &Options = {}); /// Removes unused includes and inserts missing ones in the main file. /// Returns the modified main-file code. diff --git a/clang-tools-extra/include-cleaner/lib/Analysis.cpp b/clang-tools-extra/include-cleaner/lib/Analysis.cpp index e48a380211af0..cd0b3396bf418 100644 --- a/clang-tools-extra/include-cleaner/lib/Analysis.cpp +++ b/clang-tools-extra/include-cleaner/lib/Analysis.cpp @@ -88,18 +88,90 @@ void walkUsed(llvm::ArrayRef<Decl *> ASTRoots, } } -AnalysisResults -analyze(llvm::ArrayRef<Decl *> ASTRoots, - llvm::ArrayRef<SymbolReference> MacroRefs, const Includes &Inc, - const PragmaIncludes *PI, const Preprocessor &PP, - llvm::function_ref<bool(llvm::StringRef)> HeaderFilter) { - auto &SM = PP.getSourceManager(); - const auto MainFile = *SM.getFileEntryRefForID(SM.getMainFileID()); +namespace { + +bool isFilteredInclude(const Include &I, + llvm::function_ref<bool(const Header &)> HeaderFilter, + const Preprocessor &PP) { + if (I.Angled) { + auto Lang = PP.getLangOpts().CPlusPlus ? tooling::stdlib::Lang::CXX + : tooling::stdlib::Lang::C; + if (auto StdHeader = tooling::stdlib::Header::named(I.quote(), Lang); + StdHeader && HeaderFilter(*StdHeader)) + return true; + } + return I.Resolved && HeaderFilter(*I.Resolved); +} + +bool shouldSuppressIncludeDiagnostic( + const Include &I, FileEntryRef MainFile, + llvm::function_ref<bool(const Header &)> HeaderFilter, + const PragmaIncludes *PI, const Preprocessor &PP, + OptionalDirectoryEntryRef ResourceDir) { + if (!I.Resolved || I.Resolved->getDir() == ResourceDir || + isFilteredInclude(I, HeaderFilter, PP)) + return true; + if (!PI) + return false; + if (PI->shouldKeep(*I.Resolved)) + return true; + // Check if main file is the public interface for a private header. If so + // we shouldn't diagnose it as unused. + if (auto PHeader = PI->getPublic(*I.Resolved); !PHeader.empty()) { + PHeader = PHeader.trim("<>\""); + // Since most private -> public mappings happen in a verbatim way, we + // check textually here. This might go wrong in presence of symlinks or + // header mappings. But that's not different than rest of the places. + if (MainFile.getName().ends_with(PHeader)) + return true; + } + return false; +} + +class IncludeUsage { +public: + void mark(const Include *I) { Used.insert(I); } + bool contains(const Include *I) const { return Used.contains(I); } + +private: llvm::DenseSet<const Include *> Used; +}; + +class MissingIncludeCollector { +public: + void add(llvm::StringRef Spelling, const Header &Provider) { + Missing.try_emplace(Spelling, Provider); + } + + std::vector<MissingInclude> take() && { + std::vector<MissingInclude> Result; + Result.reserve(Missing.size()); + for (auto &E : Missing) + Result.push_back(MissingInclude{E.first().str(), E.second}); + llvm::sort(Result, [](const MissingInclude &L, const MissingInclude &R) { + return L.Spelling < R.Spelling; + }); + return Result; + } + +private: llvm::StringMap<Header> Missing; - constexpr auto DefaultHeaderFilter = [](llvm::StringRef) { return false; }; - if (!HeaderFilter) - HeaderFilter = DefaultHeaderFilter; +}; + +} // namespace + +AnalysisResults analyze(llvm::ArrayRef<Decl *> ASTRoots, + llvm::ArrayRef<SymbolReference> MacroRefs, + const Includes &Inc, const PragmaIncludes *PI, + const Preprocessor &PP, + const AnalysisOptions &Options) { + auto &SM = PP.getSourceManager(); + const auto MainFile = *SM.getFileEntryRefForID(SM.getMainFileID()); + auto HeaderFilter = [&](const Header &H) { + return Options.HeaderFilter && Options.HeaderFilter(H); + }; + IncludeUsage Usage; + MissingIncludeCollector MissingIncludes; OptionalDirectoryEntryRef ResourceDir = PP.getHeaderSearchInfo().getModuleMap().getBuiltinDir(); walkUsed(ASTRoots, MacroRefs, PI, PP, @@ -112,14 +184,14 @@ analyze(llvm::ArrayRef<Decl *> ASTRoots, Satisfied = true; } for (const Include *I : Inc.match(H)) { - Used.insert(I); + Usage.mark(I); Satisfied = true; } } // Bail out if we can't (or need not) insert an include. if (Satisfied || Providers.empty() || Ref.RT != RefType::Explicit) return; - if (HeaderFilter(Providers.front().resolvedPath())) + if (HeaderFilter(Providers.front())) return; // Check if we have any headers with the same spelling, in edge // cases like `#include_next "foo.h"`, the user can't ever @@ -128,38 +200,22 @@ analyze(llvm::ArrayRef<Decl *> ASTRoots, auto Spelling = spellHeader( {Providers.front(), PP.getHeaderSearchInfo(), MainFile}); for (const Include *I : Inc.match(Header{Spelling})) { - Used.insert(I); + Usage.mark(I); Satisfied = true; } if (!Satisfied) - Missing.try_emplace(std::move(Spelling), Providers.front()); + MissingIncludes.add(Spelling, Providers.front()); }); AnalysisResults Results; for (const Include &I : Inc.all()) { - if (Used.contains(&I) || !I.Resolved || - HeaderFilter(I.Resolved->getName()) || - I.Resolved->getDir() == ResourceDir) + if (Usage.contains(&I) || + shouldSuppressIncludeDiagnostic(I, MainFile, HeaderFilter, PI, PP, + ResourceDir)) continue; - if (PI) { - if (PI->shouldKeep(*I.Resolved)) - continue; - // Check if main file is the public interface for a private header. If so - // we shouldn't diagnose it as unused. - if (auto PHeader = PI->getPublic(*I.Resolved); !PHeader.empty()) { - PHeader = PHeader.trim("<>\""); - // Since most private -> public mappings happen in a verbatim way, we - // check textually here. This might go wrong in presence of symlinks or - // header mappings. But that's not different than rest of the places. - if (MainFile.getName().ends_with(PHeader)) - continue; - } - } Results.Unused.push_back(&I); } - for (auto &E : Missing) - Results.Missing.emplace_back(E.first().str(), E.second); - llvm::sort(Results.Missing); + Results.MissingIncludes = std::move(MissingIncludes).take(); return Results; } @@ -171,9 +227,9 @@ std::string fixIncludes(const AnalysisResults &Results, // Encode insertions/deletions in the magic way clang-format understands. for (const Include *I : Results.Unused) cantFail(R.add(tooling::Replacement(FileName, UINT_MAX, 1, I->quote()))); - for (auto &[Spelled, _] : Results.Missing) - cantFail(R.add( - tooling::Replacement(FileName, UINT_MAX, 0, "#include " + Spelled))); + for (const MissingInclude &Missing : Results.MissingIncludes) + cantFail(R.add(tooling::Replacement(FileName, UINT_MAX, 0, + "#include " + Missing.Spelling))); // "cleanup" actually turns the UINT_MAX replacements into concrete edits. auto Positioned = cantFail(format::cleanupAroundReplacements(Code, R, Style)); return cantFail(tooling::applyAllReplacements(Code, Positioned)); diff --git a/clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp b/clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp index fefbfc3a9614d..d72d6adf33c9b 100644 --- a/clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp +++ b/clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp @@ -130,15 +130,15 @@ format::FormatStyle getStyle(llvm::StringRef Filename) { class Action : public clang::ASTFrontendAction { public: - Action(llvm::function_ref<bool(llvm::StringRef)> HeaderFilter, + Action(std::function<bool(const Header &)> HeaderFilter, llvm::StringMap<std::string> &EditedFiles) - : HeaderFilter(HeaderFilter), EditedFiles(EditedFiles) {} + : HeaderFilter(std::move(HeaderFilter)), EditedFiles(EditedFiles) {} private: RecordedAST AST; RecordedPP PP; PragmaIncludes PI; - llvm::function_ref<bool(llvm::StringRef)> HeaderFilter; + std::function<bool(const Header &)> HeaderFilter; llvm::StringMap<std::string> &EditedFiles; bool BeginInvocation(CompilerInstance &CI) override { @@ -192,9 +192,10 @@ class Action : public clang::ASTFrontendAction { SM.getFileManager().makeAbsolutePath(AbsPath); llvm::StringRef Code = SM.getBufferData(SM.getMainFileID()); + AnalysisOptions AnalyzeOptions{HeaderFilter}; auto Results = analyze(AST.Roots, PP.MacroReferences, PP.Includes, &PI, - getCompilerInstance().getPreprocessor(), HeaderFilter); + getCompilerInstance().getPreprocessor(), AnalyzeOptions); if (!Insert) { llvm::errs() @@ -213,7 +214,7 @@ class Action : public clang::ASTFrontendAction { } if (!Insert || DisableInsert) - Results.Missing.clear(); + Results.MissingIncludes.clear(); if (!Remove || DisableRemove) Results.Unused.clear(); std::string Final = fixIncludes(Results, AbsPath, Code, getStyle(AbsPath)); @@ -223,8 +224,8 @@ class Action : public clang::ASTFrontendAction { case PrintStyle::Changes: for (const Include *I : Results.Unused) llvm::outs() << "- " << I->quote() << " @Line:" << I->Line << "\n"; - for (const auto &[I, _] : Results.Missing) - llvm::outs() << "+ " << I << "\n"; + for (const MissingInclude &I : Results.MissingIncludes) + llvm::outs() << "+ " << I.Spelling << "\n"; break; case PrintStyle::Final: llvm::outs() << Final; @@ -232,7 +233,7 @@ class Action : public clang::ASTFrontendAction { } } - if (!Results.Missing.empty() || !Results.Unused.empty()) + if (!Results.MissingIncludes.empty() || !Results.Unused.empty()) EditedFiles.try_emplace(AbsPath, Final); } @@ -252,8 +253,8 @@ class Action : public clang::ASTFrontendAction { }; class ActionFactory : public tooling::FrontendActionFactory { public: - ActionFactory(llvm::function_ref<bool(llvm::StringRef)> HeaderFilter) - : HeaderFilter(HeaderFilter) {} + ActionFactory(std::function<bool(const Header &)> HeaderFilter) + : HeaderFilter(std::move(HeaderFilter)) {} std::unique_ptr<clang::FrontendAction> create() override { return std::make_unique<Action>(HeaderFilter, EditedFiles); @@ -264,7 +265,7 @@ class ActionFactory : public tooling::FrontendActionFactory { } private: - llvm::function_ref<bool(llvm::StringRef)> HeaderFilter; + std::function<bool(const Header &)> HeaderFilter; // Map from file name to final code with the include edits applied. llvm::StringMap<std::string> EditedFiles; }; @@ -295,16 +296,17 @@ std::function<bool(llvm::StringRef)> matchesAny(llvm::StringRef RegexFlag) { }; } -std::function<bool(llvm::StringRef)> headerFilter() { +std::function<bool(const Header &)> headerFilter() { auto OnlyMatches = matchesAny(OnlyHeaders); auto IgnoreMatches = matchesAny(IgnoreHeaders); if (!OnlyMatches || !IgnoreMatches) return nullptr; - return [OnlyMatches, IgnoreMatches](llvm::StringRef Header) { - if (!OnlyHeaders.empty() && !OnlyMatches(Header)) + return [OnlyMatches, IgnoreMatches](const Header &H) { + llvm::StringRef Path = H.resolvedPath(); + if (!OnlyHeaders.empty() && !OnlyMatches(Path)) return true; - if (!IgnoreHeaders.empty() && IgnoreMatches(Header)) + if (!IgnoreHeaders.empty() && IgnoreMatches(Path)) return true; return false; }; diff --git a/clang-tools-extra/include-cleaner/unittests/AnalysisTest.cpp b/clang-tools-extra/include-cleaner/unittests/AnalysisTest.cpp index ba5a3fbbcaeb2..3fcf82251d14b 100644 --- a/clang-tools-extra/include-cleaner/unittests/AnalysisTest.cpp +++ b/clang-tools-extra/include-cleaner/unittests/AnalysisTest.cpp @@ -51,6 +51,12 @@ std::string guard(llvm::StringRef Code) { return "#pragma once\n" + Code.str(); } +MATCHER_P2(missingInclude, Spelling, Provider, "") { + return arg.Spelling == Spelling && arg.Provider == Provider; +} + +MATCHER_P(missingSpelling, Spelling, "") { return arg.Spelling == Spelling; } + class WalkUsedTest : public testing::Test { protected: TestInputs Inputs; @@ -269,7 +275,8 @@ int x = a + c; const Include *B = PP.Includes.atLine(3); ASSERT_EQ(B->Spelled, "b.h"); - EXPECT_THAT(Results.Missing, ElementsAre(Pair("\"c.h\"", Header(CHeader)))); + EXPECT_THAT(Results.MissingIncludes, + ElementsAre(missingInclude("\"c.h\"", Header(CHeader)))); EXPECT_THAT(Results.Unused, ElementsAre(B)); } @@ -317,7 +324,7 @@ TEST_F(AnalyzeTest, ResourceDirIsIgnored) { TestAST AST(Inputs); auto Results = analyze({}, {}, PP.Includes, &PI, AST.preprocessor()); EXPECT_THAT(Results.Unused, testing::IsEmpty()); - EXPECT_THAT(Results.Missing, testing::IsEmpty()); + EXPECT_THAT(Results.MissingIncludes, testing::IsEmpty()); } TEST_F(AnalyzeTest, DifferentHeaderSameSpelling) { @@ -342,7 +349,7 @@ TEST_F(AnalyzeTest, DifferentHeaderSameSpelling) { DeclsInTU.push_back(D); auto Results = analyze(DeclsInTU, {}, PP.Includes, &PI, AST.preprocessor()); EXPECT_THAT(Results.Unused, testing::IsEmpty()); - EXPECT_THAT(Results.Missing, testing::IsEmpty()); + EXPECT_THAT(Results.MissingIncludes, testing::IsEmpty()); } TEST_F(AnalyzeTest, SpellingIncludesWithSymlinks) { @@ -374,26 +381,31 @@ TEST_F(AnalyzeTest, SpellingIncludesWithSymlinks) { auto Results = analyze(DeclsInTU, {}, PP.Includes, &PI, AST.preprocessor()); // Check that we're spelling header using the symlink, and not underlying // path. - EXPECT_THAT(Results.Missing, testing::ElementsAre(Pair("\"inner.h\"", _))); + EXPECT_THAT(Results.MissingIncludes, + testing::ElementsAre(missingSpelling("\"inner.h\""))); // header.h should be unused. EXPECT_THAT(Results.Unused, Not(testing::IsEmpty())); { // Make sure filtering is also applied to symlink, not underlying file. - auto HeaderFilter = [](llvm::StringRef Path) { return Path == "inner.h"; }; - Results = analyze(DeclsInTU, {}, PP.Includes, &PI, AST.preprocessor(), - HeaderFilter); - EXPECT_THAT(Results.Missing, testing::ElementsAre(Pair("\"inner.h\"", _))); + AnalysisOptions Options{ + [](const Header &H) { return H.resolvedPath() == "inner.h"; }}; + Results = + analyze(DeclsInTU, {}, PP.Includes, &PI, AST.preprocessor(), Options); + EXPECT_THAT(Results.MissingIncludes, + testing::ElementsAre(missingSpelling("\"inner.h\""))); // header.h should be unused. EXPECT_THAT(Results.Unused, Not(testing::IsEmpty())); } { - auto HeaderFilter = [](llvm::StringRef Path) { return Path == "header.h"; }; - Results = analyze(DeclsInTU, {}, PP.Includes, &PI, AST.preprocessor(), - HeaderFilter); + AnalysisOptions Options{ + [](const Header &H) { return H.resolvedPath() == "header.h"; }}; + Results = + analyze(DeclsInTU, {}, PP.Includes, &PI, AST.preprocessor(), Options); // header.h should be ignored now. EXPECT_THAT(Results.Unused, Not(testing::IsEmpty())); - EXPECT_THAT(Results.Missing, testing::ElementsAre(Pair("\"inner.h\"", _))); + EXPECT_THAT(Results.MissingIncludes, + testing::ElementsAre(missingSpelling("\"inner.h\""))); } } @@ -422,7 +434,7 @@ TEST_F(AnalyzeTest, ImplicitOperatorNewDeleteNotMissing) { for (auto *D : AST.context().getTranslationUnitDecl()->decls()) DeclsInTU.push_back(D); auto Results = analyze(DeclsInTU, {}, PP.Includes, &PI, AST.preprocessor()); - EXPECT_THAT(Results.Missing, testing::IsEmpty()); + EXPECT_THAT(Results.MissingIncludes, testing::IsEmpty()); } TEST_F(AnalyzeTest, ImplicitOperatorNewDeleteNotUnused) { @@ -467,14 +479,14 @@ TEST(FixIncludes, Basic) { Inc.add(I); AnalysisResults Results; - Results.Missing.emplace_back("\"aa.h\"", Header("")); - Results.Missing.emplace_back("\"ab.h\"", Header("")); - Results.Missing.emplace_back("<e.h>", Header("")); + Results.MissingIncludes.push_back(MissingInclude{"\"aa.h\"", Header("")}); + Results.MissingIncludes.push_back(MissingInclude{"\"ab.h\"", Header("")}); + Results.MissingIncludes.push_back(MissingInclude{"<e.h>", Header("")}); Results.Unused.push_back(Inc.atLine(3)); Results.Unused.push_back(Inc.atLine(4)); EXPECT_EQ(fixIncludes(Results, "d.cc", Code, format::getLLVMStyle()), -R"cpp(#include "d.h" + R"cpp(#include "d.h" #include "a.h" #include "aa.h" #include "ab.h" @@ -482,10 +494,10 @@ R"cpp(#include "d.h" )cpp"); Results = {}; - Results.Missing.emplace_back("\"d.h\"", Header("")); + Results.MissingIncludes.push_back(MissingInclude{"\"d.h\"", Header("")}); Code = R"cpp(#include "a.h")cpp"; EXPECT_EQ(fixIncludes(Results, "d.cc", Code, format::getLLVMStyle()), -R"cpp(#include "d.h" + R"cpp(#include "d.h" #include "a.h")cpp"); } _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
