https://github.com/unterumarmung updated https://github.com/llvm/llvm-project/pull/189458
>From 8f6ff6cbfb39e0b187eee423601a3cda048e2ab2 Mon Sep 17 00:00:00 2001 From: Daniil Dudkin <[email protected]> Date: Mon, 30 Mar 2026 14:24:55 +0300 Subject: [PATCH 1/3] [include-cleaner][NFC] expose and test `norlmalizePath` Also fixes a bug where the root `/` path would become an empty string. --- clang-tools-extra/include-cleaner/lib/Types.cpp | 4 +--- .../include-cleaner/lib/TypesInternal.h | 2 ++ .../include-cleaner/unittests/TypesTest.cpp | 16 +++++++++++++++- 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/clang-tools-extra/include-cleaner/lib/Types.cpp b/clang-tools-extra/include-cleaner/lib/Types.cpp index 7a637639edf8b..d7635c258e522 100644 --- a/clang-tools-extra/include-cleaner/lib/Types.cpp +++ b/clang-tools-extra/include-cleaner/lib/Types.cpp @@ -100,14 +100,12 @@ std::string Include::quote() const { .str(); } -static llvm::SmallString<128> normalizePath(llvm::StringRef Path) { +llvm::SmallString<128> normalizePath(llvm::StringRef Path) { namespace path = llvm::sys::path; llvm::SmallString<128> P = Path; path::remove_dots(P, /*remove_dot_dot=*/true); path::native(P, path::Style::posix); - while (!P.empty() && P.back() == '/') - P.pop_back(); return P; } diff --git a/clang-tools-extra/include-cleaner/lib/TypesInternal.h b/clang-tools-extra/include-cleaner/lib/TypesInternal.h index 0bd2766c900ab..60c28956c13c9 100644 --- a/clang-tools-extra/include-cleaner/lib/TypesInternal.h +++ b/clang-tools-extra/include-cleaner/lib/TypesInternal.h @@ -96,6 +96,8 @@ template <typename T> struct Hinted : public T { } }; +llvm::SmallString<128> normalizePath(llvm::StringRef Path); + } // namespace clang::include_cleaner #endif diff --git a/clang-tools-extra/include-cleaner/unittests/TypesTest.cpp b/clang-tools-extra/include-cleaner/unittests/TypesTest.cpp index 609563da488e3..cdc719d620b4c 100644 --- a/clang-tools-extra/include-cleaner/unittests/TypesTest.cpp +++ b/clang-tools-extra/include-cleaner/unittests/TypesTest.cpp @@ -1,4 +1,4 @@ -//===-- RecordTest.cpp ----------------------------------------------------===// +//===-- TypesTest.cpp -----------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "clang-include-cleaner/Types.h" +#include "TypesInternal.h" #include "clang/Basic/FileManager.h" #include "clang/Basic/FileSystemOptions.h" #include "clang/Tooling/Inclusions/StandardLibrary.h" @@ -98,5 +99,18 @@ TEST(RecordedIncludesTest, MatchVerbatimMixedAbsoluteRelative) { EXPECT_THAT(Inc.match(Header("<bar.h>")), IsEmpty()); } +TEST(NormalizePathTest, RemovesDotSegments) { + EXPECT_EQ(normalizePath("foo/./bar/../baz").str(), "foo/baz"); + EXPECT_EQ(normalizePath("foo/bar/").str(), "foo/bar"); +} + +TEST(NormalizePathTest, CanonicalizesSeparators) { + EXPECT_EQ(normalizePath("foo\\bar").str(), "foo/bar"); +} + +TEST(NormalizePathTest, PreservesRootPath) { + EXPECT_EQ(normalizePath("/").str(), "/"); +} + } // namespace } // namespace clang::include_cleaner >From 47a1dde26f791d1afb35d19ca6bbb3e1b83bb7e1 Mon Sep 17 00:00:00 2001 From: Daniil Dudkin <[email protected]> Date: Mon, 30 Mar 2026 14:25:56 +0300 Subject: [PATCH 2/3] [include-cleaner][NFC] record macro refs in direct includes --- .../include/clang-include-cleaner/Record.h | 5 +- .../include-cleaner/lib/Record.cpp | 26 ++++--- .../include-cleaner/unittests/RecordTest.cpp | 71 ++++++++++++++++++- 3 files changed, 90 insertions(+), 12 deletions(-) diff --git a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Record.h b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Record.h index 2dcb5ea2555c5..98c2b63489f41 100644 --- a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Record.h +++ b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Record.h @@ -132,7 +132,7 @@ struct RecordedAST { std::vector<Decl *> Roots; }; -/// Recorded main-file preprocessor events relevant to include-cleaner. +/// Recorded preprocessor events relevant to include-cleaner. /// /// This doesn't include facts that we record globally for the whole TU, even /// when they occur in the main file (e.g. IWYU pragmas). @@ -140,7 +140,8 @@ struct RecordedPP { /// The callback (when installed into clang) tracks macros/includes in this. std::unique_ptr<PPCallbacks> record(const Preprocessor &PP); - /// Describes where macros were used in the main file. + /// Describes where macros were used in the main file or in files directly + /// included by the main file. std::vector<SymbolReference> MacroReferences; /// The include directives seen in the main file. diff --git a/clang-tools-extra/include-cleaner/lib/Record.cpp b/clang-tools-extra/include-cleaner/lib/Record.cpp index 0284d6842e2d2..a19fcf525919d 100644 --- a/clang-tools-extra/include-cleaner/lib/Record.cpp +++ b/clang-tools-extra/include-cleaner/lib/Record.cpp @@ -84,13 +84,13 @@ class PPRecorder : public PPCallbacks { void MacroExpands(const Token &MacroName, const MacroDefinition &MD, SourceRange Range, const MacroArgs *Args) override { - if (!Active) + if (!shouldRecordMacroRef(MacroName.getLocation())) return; recordMacroRef(MacroName, *MD.getMacroInfo()); } void MacroDefined(const Token &MacroName, const MacroDirective *MD) override { - if (!Active) + if (!shouldRecordMacroRef(MacroName.getLocation())) return; const auto *MI = MD->getMacroInfo(); @@ -110,7 +110,7 @@ class PPRecorder : public PPCallbacks { void MacroUndefined(const Token &MacroName, const MacroDefinition &MD, const MacroDirective *) override { - if (!Active) + if (!shouldRecordMacroRef(MacroName.getLocation())) return; if (const auto *MI = MD.getMacroInfo()) recordMacroRef(MacroName, *MI); @@ -118,7 +118,7 @@ class PPRecorder : public PPCallbacks { void Ifdef(SourceLocation Loc, const Token &MacroNameTok, const MacroDefinition &MD) override { - if (!Active) + if (!shouldRecordMacroRef(MacroNameTok.getLocation())) return; if (const auto *MI = MD.getMacroInfo()) recordMacroRef(MacroNameTok, *MI, RefType::Ambiguous); @@ -126,7 +126,7 @@ class PPRecorder : public PPCallbacks { void Ifndef(SourceLocation Loc, const Token &MacroNameTok, const MacroDefinition &MD) override { - if (!Active) + if (!shouldRecordMacroRef(MacroNameTok.getLocation())) return; if (const auto *MI = MD.getMacroInfo()) recordMacroRef(MacroNameTok, *MI, RefType::Ambiguous); @@ -136,14 +136,14 @@ class PPRecorder : public PPCallbacks { using PPCallbacks::Elifndef; void Elifdef(SourceLocation Loc, const Token &MacroNameTok, const MacroDefinition &MD) override { - if (!Active) + if (!shouldRecordMacroRef(MacroNameTok.getLocation())) return; if (const auto *MI = MD.getMacroInfo()) recordMacroRef(MacroNameTok, *MI, RefType::Ambiguous); } void Elifndef(SourceLocation Loc, const Token &MacroNameTok, const MacroDefinition &MD) override { - if (!Active) + if (!shouldRecordMacroRef(MacroNameTok.getLocation())) return; if (const auto *MI = MD.getMacroInfo()) recordMacroRef(MacroNameTok, *MI, RefType::Ambiguous); @@ -151,13 +151,23 @@ class PPRecorder : public PPCallbacks { void Defined(const Token &MacroNameTok, const MacroDefinition &MD, SourceRange Range) override { - if (!Active) + if (!shouldRecordMacroRef(MacroNameTok.getLocation())) return; if (const auto *MI = MD.getMacroInfo()) recordMacroRef(MacroNameTok, *MI, RefType::Ambiguous); } private: + bool shouldRecordMacroRef(SourceLocation Loc) const { + const SourceLocation ExpandedLoc = SM.getExpansionLoc(Loc); + const FileID FID = SM.getFileID(ExpandedLoc); + if (FID == SM.getMainFileID()) + return true; + const SourceLocation IncludeLoc = SM.getIncludeLoc(FID); + return IncludeLoc.isValid() && + SM.getFileID(IncludeLoc) == SM.getMainFileID(); + } + void recordMacroRef(const Token &Tok, const MacroInfo &MI, RefType RT = RefType::Explicit) { if (MI.isBuiltinMacro()) diff --git a/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp b/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp index cbf7bae23b365..c4b785f763750 100644 --- a/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp +++ b/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp @@ -209,8 +209,8 @@ TEST_F(RecordPPTest, CapturesMacroRefs) { #define $def^X 1 // Refs, but not in main file. - #define Y X - int one = X; + #define Y $hdr^X + int one = $hdr^X; )cpp"); llvm::Annotations MainFile(R"cpp( #define EARLY X // not a ref, no definition @@ -244,11 +244,15 @@ TEST_F(RecordPPTest, CapturesMacroRefs) { std::vector<unsigned> RefOffsets; std::vector<unsigned> ExpOffsets; // Expansion locs of refs in macro locs. + std::vector<unsigned> HeaderOffsets; for (const auto &Ref : Recorded.MacroReferences) { if (Ref.Target == OrigX) { auto [FID, Off] = SM.getDecomposedLoc(Ref.RefLocation); if (FID == SM.getMainFileID()) { RefOffsets.push_back(Off); + } else if (FID == SM.translateFile(*AST.fileManager().getOptionalFileRef( + "header.h"))) { + HeaderOffsets.push_back(Off); } else if (Ref.RefLocation.isMacroID() && SM.isWrittenInMainFile(SM.getExpansionLoc(Ref.RefLocation))) { ExpOffsets.push_back( @@ -260,6 +264,7 @@ TEST_F(RecordPPTest, CapturesMacroRefs) { } EXPECT_THAT(RefOffsets, ElementsAreArray(MainFile.points())); EXPECT_THAT(ExpOffsets, ElementsAreArray(MainFile.points("exp"))); + EXPECT_THAT(HeaderOffsets, ElementsAreArray(Header.points("hdr"))); } TEST_F(RecordPPTest, CapturesConditionalMacroRefs) { @@ -300,6 +305,68 @@ TEST_F(RecordPPTest, CapturesConditionalMacroRefs) { EXPECT_THAT(RefOffsets, ElementsAreArray(MainFile.points())); } +TEST_F(RecordPPTest, CapturesConditionalMacroRefsInDirectIncludes) { + llvm::Annotations Header(R"cpp( + #ifdef ^X + #endif + + #if defined(^X) + #endif + + #ifndef ^X + #endif + + #ifdef Y + #elifdef ^X + #endif + + #ifndef ^X + #elifndef ^X + #endif + )cpp"); + + Inputs.Code = R"cpp( + #define X 1 + #include "header.h" + )cpp"; + Inputs.ExtraFiles["header.h"] = Header.code(); + Inputs.ExtraArgs.push_back("-std=c++2b"); + auto AST = build(); + + std::vector<unsigned> RefOffsets; + SourceManager &SM = AST.sourceManager(); + FileID HeaderID = + SM.translateFile(*AST.fileManager().getOptionalFileRef("header.h")); + for (const auto &Ref : Recorded.MacroReferences) { + auto [FID, Off] = SM.getDecomposedLoc(Ref.RefLocation); + ASSERT_EQ(FID, HeaderID); + EXPECT_EQ(Ref.RT, RefType::Ambiguous); + EXPECT_EQ("X", Ref.Target.macro().Name->getName()); + RefOffsets.push_back(Off); + } + EXPECT_THAT(RefOffsets, ElementsAreArray(Header.points())); +} + +TEST_F(RecordPPTest, DoesNotCaptureMacroRefsInTransitiveIncludes) { + Inputs.Code = R"cpp( + #define X 1 + #include "header.h" + )cpp"; + Inputs.ExtraFiles["header.h"] = R"cpp( + #include "nested.h" + )cpp"; + Inputs.ExtraFiles["nested.h"] = R"cpp( + int one = X; + + #ifdef X + #endif + )cpp"; + Inputs.ExtraArgs.push_back("-std=c++2b"); + build(); + + EXPECT_THAT(Recorded.MacroReferences, IsEmpty()); +} + class PragmaIncludeTest : public ::testing::Test { protected: // We don't build an AST, we just run a preprocessor action! >From dd878075111c2c7d4f276dcf5c5fe850114d77d2 Mon Sep 17 00:00:00 2001 From: Daniil Dudkin <[email protected]> Date: Mon, 30 Mar 2026 14:26:42 +0300 Subject: [PATCH 3/3] [include-cleaner] add fragment header support --- clang-tools-extra/docs/ReleaseNotes.rst | 8 + .../include/clang-include-cleaner/Analysis.h | 107 +++- .../include-cleaner/lib/Analysis.cpp | 524 +++++++++++++--- .../include-cleaner/lib/Record.cpp | 9 +- .../include-cleaner/lib/Types.cpp | 17 + .../include-cleaner/lib/TypesInternal.h | 10 + .../include-cleaner/test/Inputs/a.inc | 4 + .../include-cleaner/test/Inputs/b.inc | 4 + .../include-cleaner/test/Inputs/gen.inc | 4 + .../test/Inputs/generated/gen.inc | 4 + .../include-cleaner/test/Inputs/inner.inc | 4 + .../include-cleaner/test/Inputs/outer.inc | 2 + .../include-cleaner/test/Inputs/vector | 4 + .../test/fragments-block-comment.cpp | 6 + .../test/fragments-empty-comment.cpp | 6 + .../include-cleaner/test/fragments-multi.cpp | 10 + .../test/fragments-nonrecursive.cpp | 5 + .../test/fragments-spelled-path.cpp | 8 + .../include-cleaner/test/fragments.cpp | 16 + .../include-cleaner/tool/IncludeCleaner.cpp | 127 +++- .../unittests/AnalysisTest.cpp | 578 +++++++++++++++++- 21 files changed, 1306 insertions(+), 151 deletions(-) create mode 100644 clang-tools-extra/include-cleaner/test/Inputs/a.inc create mode 100644 clang-tools-extra/include-cleaner/test/Inputs/b.inc create mode 100644 clang-tools-extra/include-cleaner/test/Inputs/gen.inc create mode 100644 clang-tools-extra/include-cleaner/test/Inputs/generated/gen.inc create mode 100644 clang-tools-extra/include-cleaner/test/Inputs/inner.inc create mode 100644 clang-tools-extra/include-cleaner/test/Inputs/outer.inc create mode 100644 clang-tools-extra/include-cleaner/test/Inputs/vector create mode 100644 clang-tools-extra/include-cleaner/test/fragments-block-comment.cpp create mode 100644 clang-tools-extra/include-cleaner/test/fragments-empty-comment.cpp create mode 100644 clang-tools-extra/include-cleaner/test/fragments-multi.cpp create mode 100644 clang-tools-extra/include-cleaner/test/fragments-nonrecursive.cpp create mode 100644 clang-tools-extra/include-cleaner/test/fragments-spelled-path.cpp create mode 100644 clang-tools-extra/include-cleaner/test/fragments.cpp diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index eb735e6e62ee4..9f733f7f09065 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -450,6 +450,14 @@ Miscellaneous Improvements to include-fixer ----------------------------- +Improvements to clang-include-cleaner +------------------------------------- + +- Added :program:`clang-include-cleaner` support for treating matching direct + includes as fragments of the main file with ``--fragment-headers`` and for + optionally annotating preserved includes with + ``--fragment-dependency-comment-format``. + Improvements to clang-include-fixer ----------------------------------- diff --git a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h index c3241763237d1..d53796c659384 100644 --- a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h +++ b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h @@ -16,21 +16,24 @@ #include "clang/Format/Format.h" #include "clang/Lex/HeaderSearch.h" #include "clang/Lex/Preprocessor.h" +#include "clang/Tooling/Core/Replacement.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include <functional> +#include <optional> #include <string> #include <utility> namespace clang { class SourceLocation; +class FileID; class SourceManager; class Decl; class FileEntry; class HeaderSearch; namespace tooling { -class Replacements; struct IncludeStyle; } // namespace tooling namespace include_cleaner { @@ -60,24 +63,100 @@ void walkUsed(llvm::ArrayRef<Decl *> ASTRoots, llvm::ArrayRef<SymbolReference> MacroRefs, const PragmaIncludes *PI, const Preprocessor &PP, UsedSymbolCB CB); +/// Overload that allows customizing which FileIDs are treated as "main file". +/// The predicate is evaluated on the expansion file of a reference. +void walkUsed(llvm::ArrayRef<Decl *> ASTRoots, + llvm::ArrayRef<SymbolReference> MacroRefs, + const PragmaIncludes *PI, const Preprocessor &PP, UsedSymbolCB CB, + llvm::function_ref<bool(FileID)> IsMainFile); + +/// A location kind where a symbol reference is observed. +enum class SymbolReferenceOrigin { + MainFile, + Preamble, + Fragment, +}; + +/// A missing include finding with per-reference provenance. +struct MissingIncludeRef { + SymbolReference Ref; + llvm::SmallVector<Header> Providers; + SymbolReferenceOrigin Origin = SymbolReferenceOrigin::MainFile; + // Null if the fragment file has multiple direct include sites. + const Include *FragmentInclude = nullptr; +}; +/// An include kept alive only by fragment usage. +struct FragmentDependency { + const Include *Preserved = nullptr; + llvm::SmallVector<const Include *> Fragments; +}; + +/// The result of include-cleaner analysis for one main file. struct AnalysisResults { std::vector<const Include *> Unused; - // Spellings, like "<vector>" paired with the Header that generated it. - std::vector<std::pair<std::string, Header>> Missing; + // Deduplicated insertion plan, e.g. "<vector>" paired with the chosen + // provider Header. + std::vector<std::pair<std::string, Header>> MissingIncludes; + // Per-reference provenance for consumers that need richer diagnostics. + std::vector<MissingIncludeRef> MissingRefs; + std::vector<FragmentDependency> FragmentDependencies; +}; + +/// Analysis configuration shared by include-cleaner consumers. +struct AnalysisOptions { + /// No analysis will be performed for headers that satisfy the predicate. + std::function<bool(const Header &)> HeaderFilter; + /// A predicate matched against normalized resolved paths, and normalized + /// spelled paths as a fallback, to identify direct include fragments. + std::function<bool(llvm::StringRef)> FragmentHeaderFilter; }; /// Determine which headers should be inserted or removed from the main file. /// This exposes conclusions but not reasons: use lower-level walkUsed for that. -/// -/// The HeaderFilter is a predicate that receives absolute path or spelling -/// without quotes/brackets, when a phyiscal file doesn't exist. -/// No analysis will be performed for headers that satisfy the predicate. -AnalysisResults -analyze(llvm::ArrayRef<Decl *> ASTRoots, - llvm::ArrayRef<SymbolReference> MacroRefs, const Includes &I, - const PragmaIncludes *PI, const Preprocessor &PP, - llvm::function_ref<bool(llvm::StringRef)> HeaderFilter = nullptr); +AnalysisResults analyze(llvm::ArrayRef<Decl *> ASTRoots, + llvm::ArrayRef<SymbolReference> MacroRefs, + const Includes &I, const PragmaIncludes *PI, + const Preprocessor &PP, + const AnalysisOptions &Options = {}); + +enum class FragmentDependencyCommentStatus { + CanInsert, + AlreadyPresent, + ConflictingComment, +}; + +/// Planned comment state for an include kept alive by fragments. +struct FragmentDependencyComment { + const Include *Preserved = nullptr; + llvm::SmallVector<const Include *> Fragments; + std::string Text; + FragmentDependencyCommentStatus Status = + FragmentDependencyCommentStatus::CanInsert; + std::optional<tooling::Replacement> Replacement; +}; + +/// Replacements computed from include-cleaner findings. +struct IncludeFixes { + tooling::Replacements Replacements; + std::vector<FragmentDependencyComment> FragmentComments; +}; + +/// Options for turning analysis results into source edits. +struct FixIncludesOptions { + /// Raw trailing comment text without the leading //. + /// + /// When it contains `{0}`, that placeholder is replaced with a comma- + /// separated list of direct fragment include spellings that keep the include + /// alive. + llvm::StringRef FragmentDependencyCommentFormat; +}; + +/// Computes replacements to apply include-cleaner findings to the main file. +IncludeFixes computeIncludeFixes(const AnalysisResults &Results, + llvm::StringRef FileName, llvm::StringRef Code, + const format::FormatStyle &IncludeStyle, + const FixIncludesOptions &Options = {}); /// Removes unused includes and inserts missing ones in the main file. /// Returns the modified main-file code. @@ -85,6 +164,10 @@ analyze(llvm::ArrayRef<Decl *> ASTRoots, std::string fixIncludes(const AnalysisResults &Results, llvm::StringRef FileName, llvm::StringRef Code, const format::FormatStyle &IncludeStyle); +std::string fixIncludes(const AnalysisResults &Results, + llvm::StringRef FileName, llvm::StringRef Code, + const format::FormatStyle &IncludeStyle, + const FixIncludesOptions &Options); /// Gets all the providers for a symbol by traversing each location. /// Returned headers are sorted by relevance, first element is the most diff --git a/clang-tools-extra/include-cleaner/lib/Analysis.cpp b/clang-tools-extra/include-cleaner/lib/Analysis.cpp index e48a380211af0..fa64a9dea13c3 100644 --- a/clang-tools-extra/include-cleaner/lib/Analysis.cpp +++ b/clang-tools-extra/include-cleaner/lib/Analysis.cpp @@ -8,6 +8,7 @@ #include "clang-include-cleaner/Analysis.h" #include "AnalysisInternal.h" +#include "TypesInternal.h" #include "clang-include-cleaner/IncludeSpeller.h" #include "clang-include-cleaner/Record.h" #include "clang-include-cleaner/Types.h" @@ -22,9 +23,11 @@ #include "clang/Tooling/Core/Replacement.h" #include "clang/Tooling/Inclusions/StandardLibrary.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/STLFunctionalExtras.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" @@ -32,13 +35,91 @@ #include "llvm/Support/ErrorHandling.h" #include <cassert> #include <climits> +#include <optional> #include <string> namespace clang::include_cleaner { namespace { + +struct ClassifiedReference { + SymbolReferenceOrigin Origin; + // Null if the fragment file has multiple direct include sites. + const Include *FragmentInclude = nullptr; +}; + +class FragmentTracker { +public: + FragmentTracker(const Includes &Inc, const SourceManager &SM, + const std::function<bool(llvm::StringRef)> &HeaderFilter) + : SM(SM) { + if (!HeaderFilter) + return; + + for (const Include &I : Inc.all()) { + if (!I.Resolved || + locateInMainFile(I.HashLocation, SM) != MainFileLocation::MainFile) { + continue; + } + + // Match the canonical path first, but fall back to the spelled include + // so generated paths can still be configured even when resolution loses + // that spelling detail. + const llvm::SmallString<128> ResolvedPath = + normalizePath(I.Resolved->getName()); + bool IsFragment = HeaderFilter(ResolvedPath); + if (!IsFragment) { + const llvm::SmallString<128> SpelledPath = normalizePath(I.Spelled); + if (!SpelledPath.empty()) + IsFragment = HeaderFilter(SpelledPath); + } + if (!IsFragment) + continue; + + // Fragments are intentionally direct-includes-only for now. If a + // fragment includes another file, that nested include keeps normal + // header semantics. + IncludeSitesByFile[&I.Resolved->getFileEntry()].push_back(&I); + DirectIncludes.insert(&I); + } + } + + std::optional<ClassifiedReference> classify(FileID FID) const { + if (FID == SM.getMainFileID()) + return ClassifiedReference{SymbolReferenceOrigin::MainFile}; + if (FID == SM.getPreambleFileID()) + return ClassifiedReference{SymbolReferenceOrigin::Preamble}; + auto FE = SM.getFileEntryRefForID(FID); + if (!FE) + return std::nullopt; + auto It = IncludeSitesByFile.find(&FE->getFileEntry()); + if (It == IncludeSitesByFile.end()) + return std::nullopt; + if (It->second.size() != 1) + return ClassifiedReference{SymbolReferenceOrigin::Fragment}; + return ClassifiedReference{SymbolReferenceOrigin::Fragment, + It->second.front()}; + } + + bool isDirectInclude(const Include *I) const { + return DirectIncludes.contains(I); + } + +private: + const SourceManager &SM; + llvm::DenseMap<const FileEntry *, llvm::SmallVector<const Include *>> + IncludeSitesByFile; + llvm::DenseSet<const Include *> DirectIncludes; +}; + +using UsedSymbolWithOriginCB = llvm::function_ref<void( + const SymbolReference &Ref, llvm::ArrayRef<Header> Providers, + const ClassifiedReference &Info)>; + bool shouldIgnoreMacroReference(const Preprocessor &PP, const Macro &M) { - auto *MI = PP.getMacroInfo(M.Name); + auto &MutablePP = const_cast<Preprocessor &>(PP); + auto MD = MutablePP.getMacroDefinitionAtLoc(M.Name, M.Definition); + auto *MI = MD ? MD.getMacroInfo() : PP.getMacroInfo(M.Name); // Macros that expand to themselves are confusing from user's point of view. // They usually aspect the usage to be attributed to the underlying decl and // not the macro definition. So ignore such macros (e.g. std{in,out,err} are @@ -47,15 +128,12 @@ bool shouldIgnoreMacroReference(const Preprocessor &PP, const Macro &M) { return MI && MI->getNumTokens() == 1 && MI->isObjectLike() && MI->getReplacementToken(0).getIdentifierInfo() == M.Name; } -} // namespace -void walkUsed(llvm::ArrayRef<Decl *> ASTRoots, - llvm::ArrayRef<SymbolReference> MacroRefs, - const PragmaIncludes *PI, const Preprocessor &PP, - UsedSymbolCB CB) { +void walkUsedWithOrigins( + llvm::ArrayRef<Decl *> ASTRoots, llvm::ArrayRef<SymbolReference> MacroRefs, + const PragmaIncludes *PI, const Preprocessor &PP, UsedSymbolWithOriginCB CB, + llvm::function_ref<std::optional<ClassifiedReference>(FileID)> Classify) { const auto &SM = PP.getSourceManager(); - // This is duplicated in writeHTMLReport, changes should be mirrored there. - tooling::stdlib::Recognizer Recognizer; for (auto *Root : ASTRoots) { walkAST(*Root, [&](SourceLocation Loc, NamedDecl &ND, RefType RT) { auto SpellLoc = SM.getSpellingLoc(Loc); @@ -71,111 +149,385 @@ void walkUsed(llvm::ArrayRef<Decl *> ASTRoots, SpellLoc = SM.getSpellingLoc(Loc); } auto FID = SM.getFileID(SpellLoc); - if (FID != SM.getMainFileID() && FID != SM.getPreambleFileID()) + auto Info = Classify(FID); + if (!Info) return; // FIXME: Most of the work done here is repetitive. It might be useful to // have a cache/batching. SymbolReference SymRef{ND, Loc, RT}; - return CB(SymRef, headersForSymbol(ND, PP, PI)); + return CB(SymRef, headersForSymbol(ND, PP, PI), *Info); }); } + for (const SymbolReference &MacroRef : MacroRefs) { assert(MacroRef.Target.kind() == Symbol::Macro); - if (!SM.isWrittenInMainFile(SM.getSpellingLoc(MacroRef.RefLocation)) || - shouldIgnoreMacroReference(PP, MacroRef.Target.macro())) + if (shouldIgnoreMacroReference(PP, MacroRef.Target.macro())) continue; - CB(MacroRef, headersForSymbol(MacroRef.Target, PP, PI)); + auto FID = SM.getFileID(SM.getSpellingLoc(MacroRef.RefLocation)); + auto Info = Classify(FID); + if (!Info) + continue; + CB(MacroRef, headersForSymbol(MacroRef.Target, PP, PI), *Info); } } -AnalysisResults -analyze(llvm::ArrayRef<Decl *> ASTRoots, - llvm::ArrayRef<SymbolReference> MacroRefs, const Includes &Inc, - const PragmaIncludes *PI, const Preprocessor &PP, - llvm::function_ref<bool(llvm::StringRef)> HeaderFilter) { - auto &SM = PP.getSourceManager(); - const auto MainFile = *SM.getFileEntryRefForID(SM.getMainFileID()); +bool isFilteredInclude(const Include &I, + llvm::function_ref<bool(const Header &)> HeaderFilter, + const Preprocessor &PP) { + if (I.Angled) { + auto Lang = PP.getLangOpts().CPlusPlus ? tooling::stdlib::Lang::CXX + : tooling::stdlib::Lang::C; + if (auto StdHeader = tooling::stdlib::Header::named(I.quote(), Lang); + StdHeader && HeaderFilter(*StdHeader)) { + return true; + } + } + return I.Resolved && HeaderFilter(*I.Resolved); +} + +bool shouldSuppressIncludeDiagnostic( + const Include &I, FileEntryRef MainFile, + llvm::function_ref<bool(const Header &)> HeaderFilter, + const PragmaIncludes *PI, const Preprocessor &PP, + OptionalDirectoryEntryRef ResourceDir) { + if (!I.Resolved || I.Resolved->getDir() == ResourceDir || + isFilteredInclude(I, HeaderFilter, PP)) { + return true; + } + if (!PI) + return false; + if (PI->shouldKeep(*I.Resolved)) + return true; + // Check if main file is the public interface for a private header. If so + // we shouldn't diagnose it as unused, or add fragment comments for it. + if (auto PHeader = PI->getPublic(*I.Resolved); !PHeader.empty()) { + PHeader = PHeader.trim("<>\""); + if (MainFile.getName().ends_with(PHeader)) + return true; + } + return false; +} + +} // namespace + +void walkUsed(llvm::ArrayRef<Decl *> ASTRoots, + llvm::ArrayRef<SymbolReference> MacroRefs, + const PragmaIncludes *PI, const Preprocessor &PP, + UsedSymbolCB CB) { + const auto &SM = PP.getSourceManager(); + walkUsed(ASTRoots, MacroRefs, PI, PP, CB, [&](FileID FID) { + return FID == SM.getMainFileID() || FID == SM.getPreambleFileID(); + }); +} + +void walkUsed(llvm::ArrayRef<Decl *> ASTRoots, + llvm::ArrayRef<SymbolReference> MacroRefs, + const PragmaIncludes *PI, const Preprocessor &PP, UsedSymbolCB CB, + llvm::function_ref<bool(FileID)> IsMainFile) { + walkUsedWithOrigins( + ASTRoots, MacroRefs, PI, PP, + [&](const SymbolReference &Ref, llvm::ArrayRef<Header> Providers, + const ClassifiedReference &) { CB(Ref, Providers); }, + [&](FileID FID) -> std::optional<ClassifiedReference> { + if (!IsMainFile(FID)) + return std::nullopt; + return ClassifiedReference{SymbolReferenceOrigin::MainFile}; + }); +} + +namespace { + +class IncludeUsage { +public: + void mark(const Include *I, const ClassifiedReference &Info) { + Used.insert(I); + if (Info.Origin != SymbolReferenceOrigin::Fragment) { + UsedByMainOrPreamble.insert(I); + return; + } + + UsedByFragment.insert(I); + if (!Info.FragmentInclude) + return; + auto &Reasons = ByPreserved[I]; + if (!llvm::is_contained(Reasons, Info.FragmentInclude)) + Reasons.push_back(Info.FragmentInclude); + } + + bool contains(const Include *I) const { return Used.contains(I); } + + bool isFragmentOnly(const Include *I) const { + return UsedByFragment.contains(I) && !UsedByMainOrPreamble.contains(I); + } + + llvm::SmallVector<const Include *> fragmentSites(const Include *I) const { + auto It = ByPreserved.find(I); + if (It == ByPreserved.end()) + return {}; + return It->second; + } + +private: llvm::DenseSet<const Include *> Used; + llvm::DenseSet<const Include *> UsedByMainOrPreamble; + llvm::DenseSet<const Include *> UsedByFragment; + llvm::DenseMap<const Include *, llvm::SmallVector<const Include *>> + ByPreserved; +}; + +class MissingIncludeCollector { +public: + void add(llvm::StringRef Spelling, const Header &Provider) { + Missing.try_emplace(Spelling, Provider); + } + + std::vector<std::pair<std::string, Header>> take() && { + std::vector<std::pair<std::string, Header>> Result; + Result.reserve(Missing.size()); + for (auto &E : Missing) + Result.emplace_back(E.first().str(), E.second); + llvm::sort(Result); + return Result; + } + +private: llvm::StringMap<Header> Missing; - constexpr auto DefaultHeaderFilter = [](llvm::StringRef) { return false; }; - if (!HeaderFilter) - HeaderFilter = DefaultHeaderFilter; +}; + +} // namespace + +AnalysisResults analyze(llvm::ArrayRef<Decl *> ASTRoots, + llvm::ArrayRef<SymbolReference> MacroRefs, + const Includes &Inc, const PragmaIncludes *PI, + const Preprocessor &PP, + const AnalysisOptions &Options) { + auto &SM = PP.getSourceManager(); + const auto MainFile = *SM.getFileEntryRefForID(SM.getMainFileID()); + auto HeaderFilter = [&](const Header &H) { + return Options.HeaderFilter && Options.HeaderFilter(H); + }; + FragmentTracker Fragments(Inc, SM, Options.FragmentHeaderFilter); + IncludeUsage Usage; + MissingIncludeCollector MissingIncludes; + std::vector<MissingIncludeRef> MissingRefs; OptionalDirectoryEntryRef ResourceDir = PP.getHeaderSearchInfo().getModuleMap().getBuiltinDir(); - walkUsed(ASTRoots, MacroRefs, PI, PP, - [&](const SymbolReference &Ref, llvm::ArrayRef<Header> Providers) { - bool Satisfied = false; - for (const Header &H : Providers) { - if (H.kind() == Header::Physical && - (H.physical() == MainFile || - H.physical().getDir() == ResourceDir)) { - Satisfied = true; - } - for (const Include *I : Inc.match(H)) { - Used.insert(I); - Satisfied = true; - } - } - // Bail out if we can't (or need not) insert an include. - if (Satisfied || Providers.empty() || Ref.RT != RefType::Explicit) - return; - if (HeaderFilter(Providers.front().resolvedPath())) - return; - // Check if we have any headers with the same spelling, in edge - // cases like `#include_next "foo.h"`, the user can't ever - // include the physical foo.h, but can have a spelling that - // refers to it. - auto Spelling = spellHeader( - {Providers.front(), PP.getHeaderSearchInfo(), MainFile}); - for (const Include *I : Inc.match(Header{Spelling})) { - Used.insert(I); - Satisfied = true; - } - if (!Satisfied) - Missing.try_emplace(std::move(Spelling), Providers.front()); - }); - - AnalysisResults Results; + + walkUsedWithOrigins( + ASTRoots, MacroRefs, PI, PP, + [&](const SymbolReference &Ref, llvm::ArrayRef<Header> Providers, + const ClassifiedReference &Info) { + bool Satisfied = false; + for (const Header &H : Providers) { + if (H.kind() == Header::Physical && + (H.physical() == MainFile || + H.physical().getDir() == ResourceDir)) { + Satisfied = true; + } + for (const Include *I : Inc.match(H)) { + Usage.mark(I, Info); + Satisfied = true; + } + } + + if (Satisfied || Providers.empty() || Ref.RT != RefType::Explicit) + return; + if (HeaderFilter(Providers.front())) + return; + + auto Spelling = spellHeader( + {Providers.front(), PP.getHeaderSearchInfo(), MainFile}); + for (const Include *I : Inc.match(Header{Spelling})) { + Usage.mark(I, Info); + Satisfied = true; + } + if (Satisfied) + return; + + // MissingIncludes drives edits, while MissingRefs preserves where the + // unsatisfied use came from for higher-level diagnostics. + MissingIncludes.add(Spelling, Providers.front()); + MissingRefs.push_back(MissingIncludeRef{ + Ref, llvm::SmallVector<Header>(Providers.begin(), Providers.end()), + Info.Origin, Info.FragmentInclude}); + }, + [&](FileID FID) { return Fragments.classify(FID); }); + + AnalysisResults Results{{}, {}, std::move(MissingRefs), {}}; for (const Include &I : Inc.all()) { - if (Used.contains(&I) || !I.Resolved || - HeaderFilter(I.Resolved->getName()) || - I.Resolved->getDir() == ResourceDir) - continue; - if (PI) { - if (PI->shouldKeep(*I.Resolved)) - continue; - // Check if main file is the public interface for a private header. If so - // we shouldn't diagnose it as unused. - if (auto PHeader = PI->getPublic(*I.Resolved); !PHeader.empty()) { - PHeader = PHeader.trim("<>\""); - // Since most private -> public mappings happen in a verbatim way, we - // check textually here. This might go wrong in presence of symlinks or - // header mappings. But that's not different than rest of the places. - if (MainFile.getName().ends_with(PHeader)) - continue; + bool Suppressed = shouldSuppressIncludeDiagnostic(I, MainFile, HeaderFilter, + PI, PP, ResourceDir); + if (Usage.contains(&I)) { + const llvm::SmallVector<const Include *> FragmentSites = + Usage.fragmentSites(&I); + // Ambiguous fragment sites still preserve the header, but get no comment. + if (!Suppressed && Usage.isFragmentOnly(&I) && + !Fragments.isDirectInclude(&I) && !FragmentSites.empty()) { + Results.FragmentDependencies.push_back( + FragmentDependency{&I, FragmentSites}); } + continue; } + if (Suppressed) + continue; Results.Unused.push_back(&I); } - for (auto &E : Missing) - Results.Missing.emplace_back(E.first().str(), E.second); - llvm::sort(Results.Missing); + Results.MissingIncludes = std::move(MissingIncludes).take(); return Results; } -std::string fixIncludes(const AnalysisResults &Results, - llvm::StringRef FileName, llvm::StringRef Code, - const format::FormatStyle &Style) { +namespace { + +// computeIncludeFixes works on plain text, so comment planning needs its own +// lightweight line lookup rather than SourceManager-based offsets. +class LineIndex { +public: + explicit LineIndex(llvm::StringRef Code) : Code(Code) { + Starts.push_back(0); + for (unsigned I = 0; I < Code.size(); ++I) { + if (Code[I] == '\n') + Starts.push_back(I + 1); + } + Starts.push_back(Code.size()); + } + + llvm::StringRef line(unsigned OneBasedLine) const { + auto [Start, End] = bounds(OneBasedLine); + return Code.slice(Start, End); + } + + unsigned trimmedLineEnd(unsigned OneBasedLine) const { + auto [Start, End] = bounds(OneBasedLine); + while (End > Start && (Code[End - 1] == ' ' || Code[End - 1] == '\t')) + --End; + return End; + } + +private: + std::pair<unsigned, unsigned> bounds(unsigned OneBasedLine) const { + if (OneBasedLine == 0 || OneBasedLine >= Starts.size()) + return {Code.size(), Code.size()}; + unsigned Start = Starts[OneBasedLine - 1]; + unsigned End = + Starts[OneBasedLine] == 0 ? Code.size() : Starts[OneBasedLine] - 1; + if (End > Start && Code[End - 1] == '\r') + --End; + return {Start, End}; + } + + llvm::StringRef Code; + llvm::SmallVector<unsigned> Starts; +}; + +std::string formatFragmentDependencyComment( + llvm::StringRef Format, llvm::ArrayRef<const Include *> FragmentIncludes) { + if (Format.empty()) + return {}; + + std::string FragmentList; + for (const Include *Fragment : FragmentIncludes) { + if (!FragmentList.empty()) + FragmentList += ", "; + FragmentList += Fragment->quote(); + } + + std::string Result; + llvm::StringRef Remaining = Format; + while (true) { + auto Pos = Remaining.find("{0}"); + if (Pos == llvm::StringRef::npos) { + Result += Remaining.str(); + return Result; + } + Result += Remaining.take_front(Pos).str(); + Result += FragmentList; + Remaining = Remaining.drop_front(Pos + 3); + } +} + +FragmentDependencyComment inspectFragmentDependencyComment( + const FragmentDependency &Dependency, llvm::StringRef FileName, + const LineIndex &Lines, llvm::StringRef CommentFormat) { + FragmentDependencyComment Comment{ + Dependency.Preserved, Dependency.Fragments, + formatFragmentDependencyComment(CommentFormat, Dependency.Fragments), + FragmentDependencyCommentStatus::CanInsert, std::nullopt}; + if (Comment.Text.empty()) + return Comment; + + llvm::StringRef Line = Lines.line(Dependency.Preserved->Line); + size_t IncludePos = Line.find(Dependency.Preserved->quote()); + if (IncludePos == llvm::StringRef::npos) { + Comment.Status = FragmentDependencyCommentStatus::ConflictingComment; + return Comment; + } + + llvm::StringRef Tail = + Line.drop_front(IncludePos + Dependency.Preserved->quote().size()); + llvm::StringRef TrimmedTail = Tail.ltrim(" \t"); + if (TrimmedTail.empty()) { + Comment.Status = FragmentDependencyCommentStatus::CanInsert; + Comment.Replacement = tooling::Replacement( + FileName, Lines.trimmedLineEnd(Dependency.Preserved->Line), 0, + " // " + Comment.Text); + return Comment; + } + + if (TrimmedTail.starts_with("//")) { + llvm::StringRef Existing = TrimmedTail.drop_front(2).trim(); + Comment.Status = Existing == Comment.Text + ? FragmentDependencyCommentStatus::AlreadyPresent + : FragmentDependencyCommentStatus::ConflictingComment; + return Comment; + } + + Comment.Status = FragmentDependencyCommentStatus::ConflictingComment; + return Comment; +} + +} // namespace + +IncludeFixes computeIncludeFixes(const AnalysisResults &Results, + llvm::StringRef FileName, llvm::StringRef Code, + const format::FormatStyle &Style, + const FixIncludesOptions &Options) { assert(Style.isCpp() && "Only C++ style supports include insertions!"); - tooling::Replacements R; - // Encode insertions/deletions in the magic way clang-format understands. + IncludeFixes Fixes; for (const Include *I : Results.Unused) - cantFail(R.add(tooling::Replacement(FileName, UINT_MAX, 1, I->quote()))); - for (auto &[Spelled, _] : Results.Missing) - cantFail(R.add( + cantFail(Fixes.Replacements.add( + tooling::Replacement(FileName, UINT_MAX, 1, I->quote()))); + for (const auto &[Spelled, _] : Results.MissingIncludes) + cantFail(Fixes.Replacements.add( tooling::Replacement(FileName, UINT_MAX, 0, "#include " + Spelled))); - // "cleanup" actually turns the UINT_MAX replacements into concrete edits. - auto Positioned = cantFail(format::cleanupAroundReplacements(Code, R, Style)); + + if (Options.FragmentDependencyCommentFormat.empty()) + return Fixes; + + LineIndex Lines(Code); + for (const FragmentDependency &Dependency : Results.FragmentDependencies) { + FragmentDependencyComment Comment = inspectFragmentDependencyComment( + Dependency, FileName, Lines, Options.FragmentDependencyCommentFormat); + if (Comment.Replacement) + cantFail(Fixes.Replacements.add(*Comment.Replacement)); + Fixes.FragmentComments.push_back(std::move(Comment)); + } + return Fixes; +} + +std::string fixIncludes(const AnalysisResults &Results, + llvm::StringRef FileName, llvm::StringRef Code, + const format::FormatStyle &Style) { + return fixIncludes(Results, FileName, Code, Style, {}); +} + +std::string fixIncludes(const AnalysisResults &Results, + llvm::StringRef FileName, llvm::StringRef Code, + const format::FormatStyle &Style, + const FixIncludesOptions &Options) { + IncludeFixes Fixes = + computeIncludeFixes(Results, FileName, Code, Style, Options); + auto Positioned = cantFail( + format::cleanupAroundReplacements(Code, Fixes.Replacements, Style)); return cantFail(tooling::applyAllReplacements(Code, Positioned)); } diff --git a/clang-tools-extra/include-cleaner/lib/Record.cpp b/clang-tools-extra/include-cleaner/lib/Record.cpp index a19fcf525919d..2632f58ba8c9d 100644 --- a/clang-tools-extra/include-cleaner/lib/Record.cpp +++ b/clang-tools-extra/include-cleaner/lib/Record.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "clang-include-cleaner/Record.h" +#include "TypesInternal.h" #include "clang-include-cleaner/Types.h" #include "clang/AST/ASTConsumer.h" #include "clang/AST/ASTContext.h" @@ -159,13 +160,7 @@ class PPRecorder : public PPCallbacks { private: bool shouldRecordMacroRef(SourceLocation Loc) const { - const SourceLocation ExpandedLoc = SM.getExpansionLoc(Loc); - const FileID FID = SM.getFileID(ExpandedLoc); - if (FID == SM.getMainFileID()) - return true; - const SourceLocation IncludeLoc = SM.getIncludeLoc(FID); - return IncludeLoc.isValid() && - SM.getFileID(IncludeLoc) == SM.getMainFileID(); + return locateInMainFile(Loc, SM) != MainFileLocation::Other; } void recordMacroRef(const Token &Tok, const MacroInfo &MI, diff --git a/clang-tools-extra/include-cleaner/lib/Types.cpp b/clang-tools-extra/include-cleaner/lib/Types.cpp index d7635c258e522..8a102cc2ab848 100644 --- a/clang-tools-extra/include-cleaner/lib/Types.cpp +++ b/clang-tools-extra/include-cleaner/lib/Types.cpp @@ -10,6 +10,7 @@ #include "TypesInternal.h" #include "clang/AST/Decl.h" #include "clang/Basic/FileEntry.h" +#include "clang/Basic/SourceManager.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" @@ -109,6 +110,22 @@ llvm::SmallString<128> normalizePath(llvm::StringRef Path) { return P; } +MainFileLocation locateInMainFile(SourceLocation Loc, const SourceManager &SM) { + const SourceLocation ExpandedLoc = SM.getExpansionLoc(Loc); + const FileID FID = SM.getFileID(ExpandedLoc); + if (FID == SM.getMainFileID() || FID == SM.getPreambleFileID()) + return MainFileLocation::MainFile; + const SourceLocation IncludeLoc = SM.getIncludeLoc(FID); + if (!IncludeLoc.isValid()) + return MainFileLocation::Other; + const FileID IncludeFID = SM.getFileID(SM.getExpansionLoc(IncludeLoc)); + if (IncludeFID == SM.getMainFileID() || + IncludeFID == SM.getPreambleFileID()) { + return MainFileLocation::DirectInclude; + } + return MainFileLocation::Other; +} + void Includes::addSearchDirectory(llvm::StringRef Path) { SearchPath.try_emplace(normalizePath(Path)); } diff --git a/clang-tools-extra/include-cleaner/lib/TypesInternal.h b/clang-tools-extra/include-cleaner/lib/TypesInternal.h index 60c28956c13c9..df53deaebf72f 100644 --- a/clang-tools-extra/include-cleaner/lib/TypesInternal.h +++ b/clang-tools-extra/include-cleaner/lib/TypesInternal.h @@ -19,6 +19,9 @@ namespace llvm { class raw_ostream; } +namespace clang { +class SourceManager; +} namespace clang::include_cleaner { /// A place where a symbol can be provided. /// It is either a physical file of the TU (SourceLocation) or a logical @@ -49,6 +52,12 @@ struct SymbolLocation { }; llvm::raw_ostream &operator<<(llvm::raw_ostream &, const SymbolLocation &); +enum class MainFileLocation { + MainFile, + DirectInclude, + Other, +}; + /// Represents properties of a symbol provider. /// /// Hints represents the properties of the edges traversed when finding headers @@ -97,6 +106,7 @@ template <typename T> struct Hinted : public T { }; llvm::SmallString<128> normalizePath(llvm::StringRef Path); +MainFileLocation locateInMainFile(SourceLocation Loc, const SourceManager &SM); } // namespace clang::include_cleaner diff --git a/clang-tools-extra/include-cleaner/test/Inputs/a.inc b/clang-tools-extra/include-cleaner/test/Inputs/a.inc new file mode 100644 index 0000000000000..1d44a1554bcb8 --- /dev/null +++ b/clang-tools-extra/include-cleaner/test/Inputs/a.inc @@ -0,0 +1,4 @@ +#pragma once +struct A { + std::vector<int> Values; +}; diff --git a/clang-tools-extra/include-cleaner/test/Inputs/b.inc b/clang-tools-extra/include-cleaner/test/Inputs/b.inc new file mode 100644 index 0000000000000..a36c95d1eb829 --- /dev/null +++ b/clang-tools-extra/include-cleaner/test/Inputs/b.inc @@ -0,0 +1,4 @@ +#pragma once +struct B { + std::vector<int> Values; +}; diff --git a/clang-tools-extra/include-cleaner/test/Inputs/gen.inc b/clang-tools-extra/include-cleaner/test/Inputs/gen.inc new file mode 100644 index 0000000000000..966fff546162e --- /dev/null +++ b/clang-tools-extra/include-cleaner/test/Inputs/gen.inc @@ -0,0 +1,4 @@ +#pragma once +struct Gen { + std::vector<int> Values; +}; diff --git a/clang-tools-extra/include-cleaner/test/Inputs/generated/gen.inc b/clang-tools-extra/include-cleaner/test/Inputs/generated/gen.inc new file mode 100644 index 0000000000000..051180bcc7a83 --- /dev/null +++ b/clang-tools-extra/include-cleaner/test/Inputs/generated/gen.inc @@ -0,0 +1,4 @@ +#pragma once +struct SpelledGen { + std::vector<int> Values; +}; diff --git a/clang-tools-extra/include-cleaner/test/Inputs/inner.inc b/clang-tools-extra/include-cleaner/test/Inputs/inner.inc new file mode 100644 index 0000000000000..2f52678c091ca --- /dev/null +++ b/clang-tools-extra/include-cleaner/test/Inputs/inner.inc @@ -0,0 +1,4 @@ +#pragma once +struct Inner { + std::vector<int> Values; +}; diff --git a/clang-tools-extra/include-cleaner/test/Inputs/outer.inc b/clang-tools-extra/include-cleaner/test/Inputs/outer.inc new file mode 100644 index 0000000000000..4f68e88d1730a --- /dev/null +++ b/clang-tools-extra/include-cleaner/test/Inputs/outer.inc @@ -0,0 +1,2 @@ +#pragma once +#include "inner.inc" diff --git a/clang-tools-extra/include-cleaner/test/Inputs/vector b/clang-tools-extra/include-cleaner/test/Inputs/vector new file mode 100644 index 0000000000000..a2956ae8e54da --- /dev/null +++ b/clang-tools-extra/include-cleaner/test/Inputs/vector @@ -0,0 +1,4 @@ +#pragma once +namespace std { +template <typename T> struct vector {}; +} // namespace std diff --git a/clang-tools-extra/include-cleaner/test/fragments-block-comment.cpp b/clang-tools-extra/include-cleaner/test/fragments-block-comment.cpp new file mode 100644 index 0000000000000..c5a8e9b73a3ef --- /dev/null +++ b/clang-tools-extra/include-cleaner/test/fragments-block-comment.cpp @@ -0,0 +1,6 @@ +#include <vector> /* keep me */ +#include "gen.inc" + +Gen G; + +// RUN: clang-include-cleaner -print=changes %s --fragment-headers=.*\.inc$ --fragment-dependency-comment-format='needed by {0}' -- -I%S/Inputs/ | count 0 diff --git a/clang-tools-extra/include-cleaner/test/fragments-empty-comment.cpp b/clang-tools-extra/include-cleaner/test/fragments-empty-comment.cpp new file mode 100644 index 0000000000000..50ffc87c10969 --- /dev/null +++ b/clang-tools-extra/include-cleaner/test/fragments-empty-comment.cpp @@ -0,0 +1,6 @@ +#include <vector> +#include "gen.inc" + +Gen G; + +// RUN: clang-include-cleaner -print=changes %s --fragment-headers=.*\.inc$ -- -I%S/Inputs/ | count 0 diff --git a/clang-tools-extra/include-cleaner/test/fragments-multi.cpp b/clang-tools-extra/include-cleaner/test/fragments-multi.cpp new file mode 100644 index 0000000000000..8b29a475ce151 --- /dev/null +++ b/clang-tools-extra/include-cleaner/test/fragments-multi.cpp @@ -0,0 +1,10 @@ +#include <vector> +#include "a.inc" +#include "b.inc" + +A AValue; +B BValue; + +// RUN: clang-include-cleaner -print=changes %s --fragment-headers=.*\.inc$ --fragment-dependency-comment-format='needed by {0}' -- -I%S/Inputs/ | FileCheck --check-prefix=CHANGES %s +// CHANGES: ~ <vector> @Line:1 // needed by "a.inc", "b.inc" +// CHANGES-NOT: - <vector> diff --git a/clang-tools-extra/include-cleaner/test/fragments-nonrecursive.cpp b/clang-tools-extra/include-cleaner/test/fragments-nonrecursive.cpp new file mode 100644 index 0000000000000..a34bb34623c24 --- /dev/null +++ b/clang-tools-extra/include-cleaner/test/fragments-nonrecursive.cpp @@ -0,0 +1,5 @@ +#include <vector> +#include "outer.inc" + +// RUN: clang-include-cleaner -print=changes %s --fragment-headers=.*\.inc$ -- -I%S/Inputs/ | FileCheck --check-prefix=CHANGES %s +// CHANGES: - <vector> @Line:1 diff --git a/clang-tools-extra/include-cleaner/test/fragments-spelled-path.cpp b/clang-tools-extra/include-cleaner/test/fragments-spelled-path.cpp new file mode 100644 index 0000000000000..45174240ab498 --- /dev/null +++ b/clang-tools-extra/include-cleaner/test/fragments-spelled-path.cpp @@ -0,0 +1,8 @@ +#include <vector> +#include "generated/gen.inc" + +SpelledGen G; + +// RUN: clang-include-cleaner -print=changes %s --fragment-headers='generated/gen\.inc' --fragment-dependency-comment-format='needed by {0}' -- -I%S/Inputs/ | FileCheck --check-prefix=CHANGES %s +// CHANGES: ~ <vector> @Line:1 // needed by "generated/gen.inc" +// CHANGES-NOT: - <vector> diff --git a/clang-tools-extra/include-cleaner/test/fragments.cpp b/clang-tools-extra/include-cleaner/test/fragments.cpp new file mode 100644 index 0000000000000..efa1dd52b4f48 --- /dev/null +++ b/clang-tools-extra/include-cleaner/test/fragments.cpp @@ -0,0 +1,16 @@ +#include <vector> +#include "gen.inc" + +Gen G; + +// RUN: clang-include-cleaner -print=changes %s --fragment-headers=.*\\.inc$ --fragment-dependency-comment-format='needed by {0}' -- -I%S/Inputs/ | FileCheck --check-prefix=CHANGES %s +// CHANGES: ~ <vector> @Line:1 // needed by "gen.inc" +// CHANGES-NOT: - <vector> + +// RUN: clang-include-cleaner -print %s --fragment-headers=.*\\.inc$ --fragment-dependency-comment-format='needed by {0}' -- -I%S/Inputs/ | FileCheck --check-prefix=PRINT %s +// PRINT: #include <vector> // needed by "gen.inc" + +// RUN: cp %s %t.cpp +// RUN: clang-include-cleaner -edit %t.cpp --fragment-headers=.*\\.inc$ --fragment-dependency-comment-format='IWYU pragma: keep' -- -I%S/Inputs/ +// RUN: FileCheck --check-prefix=EDIT %s < %t.cpp +// EDIT: #include <vector> // IWYU pragma: keep diff --git a/clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp b/clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp index fefbfc3a9614d..cfd29ce8d6a65 100644 --- a/clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp +++ b/clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp @@ -31,9 +31,16 @@ namespace clang { namespace include_cleaner { + namespace { namespace cl = llvm::cl; +llvm::SmallVector<Decl *> topLevelDecls(ASTContext &Ctx); +std::function<bool(llvm::StringRef)> matchesAny(llvm::StringRef RegexFlag, + bool AnchorToSuffix); +std::function<bool(const Header &)> headerFilter(); +std::function<bool(llvm::StringRef)> fragmentHeaderFilter(); + llvm::StringRef Overview = llvm::StringLiteral(R"( clang-include-cleaner analyzes the #include directives in source code. @@ -73,6 +80,24 @@ cl::opt<std::string> IgnoreHeaders{ cl::cat(IncludeCleaner), }; +cl::opt<std::string> FragmentHeaders{ + "fragment-headers", + cl::desc("A comma-separated list of regular expressions matched against " + "normalized include paths. Matching direct includes are treated " + "as fragments of the main file."), + cl::init(""), + cl::cat(IncludeCleaner), +}; + +cl::opt<std::string> FragmentDependencyCommentFormat{ + "fragment-dependency-comment-format", + cl::desc("Append this trailing comment to includes that are used only by " + "fragment headers. Use {0} for the matched fragment include " + "spellings."), + cl::init(""), + cl::cat(IncludeCleaner), +}; + enum class PrintStyle { Changes, Final }; cl::opt<PrintStyle> Print{ "print", @@ -130,15 +155,23 @@ format::FormatStyle getStyle(llvm::StringRef Filename) { class Action : public clang::ASTFrontendAction { public: - Action(llvm::function_ref<bool(llvm::StringRef)> HeaderFilter, + Action(std::function<bool(const Header &)> HeaderFilter, + std::function<bool(llvm::StringRef)> FragmentHeaderFilter, + std::string FragmentDependencyCommentFormat, llvm::StringMap<std::string> &EditedFiles) - : HeaderFilter(HeaderFilter), EditedFiles(EditedFiles) {} + : HeaderFilter(std::move(HeaderFilter)), + FragmentHeaderFilter(std::move(FragmentHeaderFilter)), + FragmentDependencyCommentFormat( + std::move(FragmentDependencyCommentFormat)), + EditedFiles(EditedFiles) {} private: RecordedAST AST; RecordedPP PP; PragmaIncludes PI; - llvm::function_ref<bool(llvm::StringRef)> HeaderFilter; + std::function<bool(const Header &)> HeaderFilter; + std::function<bool(llvm::StringRef)> FragmentHeaderFilter; + std::string FragmentDependencyCommentFormat; llvm::StringMap<std::string> &EditedFiles; bool BeginInvocation(CompilerInstance &CI) override { @@ -192,9 +225,13 @@ class Action : public clang::ASTFrontendAction { SM.getFileManager().makeAbsolutePath(AbsPath); llvm::StringRef Code = SM.getBufferData(SM.getMainFileID()); + AnalysisOptions AnalyzeOptions; + AnalyzeOptions.HeaderFilter = HeaderFilter; + AnalyzeOptions.FragmentHeaderFilter = FragmentHeaderFilter; + llvm::SmallVector<Decl *> RootDecls = topLevelDecls(*AST.Ctx); auto Results = - analyze(AST.Roots, PP.MacroReferences, PP.Includes, &PI, - getCompilerInstance().getPreprocessor(), HeaderFilter); + analyze(RootDecls, PP.MacroReferences, PP.Includes, &PI, + getCompilerInstance().getPreprocessor(), AnalyzeOptions); if (!Insert) { llvm::errs() @@ -213,18 +250,34 @@ class Action : public clang::ASTFrontendAction { } if (!Insert || DisableInsert) - Results.Missing.clear(); + Results.MissingIncludes.clear(); if (!Remove || DisableRemove) Results.Unused.clear(); - std::string Final = fixIncludes(Results, AbsPath, Code, getStyle(AbsPath)); + format::FormatStyle Style = getStyle(AbsPath); + FixIncludesOptions FixOptions; + FixOptions.FragmentDependencyCommentFormat = + FragmentDependencyCommentFormat; + IncludeFixes Fixes = + computeIncludeFixes(Results, AbsPath, Code, Style, FixOptions); + auto Positioned = cantFail( + format::cleanupAroundReplacements(Code, Fixes.Replacements, Style)); + std::string Final = + cantFail(tooling::applyAllReplacements(Code, Positioned)); if (Print.getNumOccurrences()) { switch (Print) { case PrintStyle::Changes: for (const Include *I : Results.Unused) llvm::outs() << "- " << I->quote() << " @Line:" << I->Line << "\n"; - for (const auto &[I, _] : Results.Missing) + for (const auto &[I, _] : Results.MissingIncludes) llvm::outs() << "+ " << I << "\n"; + for (const auto &Comment : Fixes.FragmentComments) { + if (!Comment.Replacement) + continue; + llvm::outs() << "~ " << Comment.Preserved->quote() + << " @Line:" << Comment.Preserved->Line << " // " + << Comment.Text << "\n"; + } break; case PrintStyle::Final: llvm::outs() << Final; @@ -232,7 +285,7 @@ class Action : public clang::ASTFrontendAction { } } - if (!Results.Missing.empty() || !Results.Unused.empty()) + if (!Fixes.Replacements.empty()) EditedFiles.try_emplace(AbsPath, Final); } @@ -246,17 +299,24 @@ class Action : public clang::ASTFrontendAction { return; } writeHTMLReport(AST.Ctx->getSourceManager().getMainFileID(), PP.Includes, - AST.Roots, PP.MacroReferences, *AST.Ctx, + topLevelDecls(*AST.Ctx), PP.MacroReferences, *AST.Ctx, getCompilerInstance().getPreprocessor(), &PI, OS); } }; class ActionFactory : public tooling::FrontendActionFactory { public: - ActionFactory(llvm::function_ref<bool(llvm::StringRef)> HeaderFilter) - : HeaderFilter(HeaderFilter) {} + ActionFactory(std::function<bool(const Header &)> HeaderFilter, + std::function<bool(llvm::StringRef)> FragmentHeaderFilter, + std::string FragmentDependencyCommentFormat) + : HeaderFilter(std::move(HeaderFilter)), + FragmentHeaderFilter(std::move(FragmentHeaderFilter)), + FragmentDependencyCommentFormat( + std::move(FragmentDependencyCommentFormat)) {} std::unique_ptr<clang::FrontendAction> create() override { - return std::make_unique<Action>(HeaderFilter, EditedFiles); + return std::make_unique<Action>(HeaderFilter, FragmentHeaderFilter, + FragmentDependencyCommentFormat, + EditedFiles); } const llvm::StringMap<std::string> &editedFiles() const { @@ -264,19 +324,31 @@ class ActionFactory : public tooling::FrontendActionFactory { } private: - llvm::function_ref<bool(llvm::StringRef)> HeaderFilter; + std::function<bool(const Header &)> HeaderFilter; + std::function<bool(llvm::StringRef)> FragmentHeaderFilter; + std::string FragmentDependencyCommentFormat; // Map from file name to final code with the include edits applied. llvm::StringMap<std::string> EditedFiles; }; +llvm::SmallVector<Decl *> topLevelDecls(ASTContext &Ctx) { + llvm::SmallVector<Decl *> Decls; + for (Decl *D : Ctx.getTranslationUnitDecl()->decls()) + Decls.push_back(D); + return Decls; +} + // Compiles a regex list into a function that return true if any match a header. // Prints and returns nullptr if any regexes are invalid. -std::function<bool(llvm::StringRef)> matchesAny(llvm::StringRef RegexFlag) { +std::function<bool(llvm::StringRef)> matchesAny(llvm::StringRef RegexFlag, + bool AnchorToSuffix) { auto FilterRegs = std::make_shared<std::vector<llvm::Regex>>(); llvm::SmallVector<llvm::StringRef> Headers; RegexFlag.split(Headers, ',', -1, /*KeepEmpty=*/false); for (auto HeaderPattern : Headers) { - std::string AnchoredPattern = "(" + HeaderPattern.str() + ")$"; + std::string AnchoredPattern = HeaderPattern.str(); + if (AnchorToSuffix) + AnchoredPattern = "(" + AnchoredPattern + ")$"; llvm::Regex CompiledRegex(AnchoredPattern); std::string RegexError; if (!CompiledRegex.isValid(RegexError)) { @@ -295,21 +367,26 @@ std::function<bool(llvm::StringRef)> matchesAny(llvm::StringRef RegexFlag) { }; } -std::function<bool(llvm::StringRef)> headerFilter() { - auto OnlyMatches = matchesAny(OnlyHeaders); - auto IgnoreMatches = matchesAny(IgnoreHeaders); +std::function<bool(const Header &)> headerFilter() { + auto OnlyMatches = matchesAny(OnlyHeaders, /*AnchorToSuffix=*/true); + auto IgnoreMatches = matchesAny(IgnoreHeaders, /*AnchorToSuffix=*/true); if (!OnlyMatches || !IgnoreMatches) return nullptr; - return [OnlyMatches, IgnoreMatches](llvm::StringRef Header) { - if (!OnlyHeaders.empty() && !OnlyMatches(Header)) + return [OnlyMatches, IgnoreMatches](const Header &Header) { + llvm::StringRef Path = Header.resolvedPath(); + if (!OnlyHeaders.empty() && !OnlyMatches(Path)) return true; - if (!IgnoreHeaders.empty() && IgnoreMatches(Header)) + if (!IgnoreHeaders.empty() && IgnoreMatches(Path)) return true; return false; }; } +std::function<bool(llvm::StringRef)> fragmentHeaderFilter() { + return matchesAny(FragmentHeaders, /*AnchorToSuffix=*/false); +} + // Maps absolute path of each files of each compilation commands to the // absolute path of the input file. llvm::Expected<std::map<std::string, std::string, std::less<>>> @@ -388,9 +465,11 @@ int main(int argc, const char **argv) { clang::tooling::ClangTool Tool(CDB, OptionsParser->getSourcePathList()); auto HeaderFilter = headerFilter(); - if (!HeaderFilter) + auto FragmentHeaderFilter = fragmentHeaderFilter(); + if (!HeaderFilter || !FragmentHeaderFilter) return 1; // error already reported. - ActionFactory Factory(HeaderFilter); + ActionFactory Factory(HeaderFilter, FragmentHeaderFilter, + FragmentDependencyCommentFormat); auto ErrorCode = Tool.run(&Factory); if (Edit) { for (const auto &NameAndContent : Factory.editedFiles()) { diff --git a/clang-tools-extra/include-cleaner/unittests/AnalysisTest.cpp b/clang-tools-extra/include-cleaner/unittests/AnalysisTest.cpp index ba5a3fbbcaeb2..7fa7317294586 100644 --- a/clang-tools-extra/include-cleaner/unittests/AnalysisTest.cpp +++ b/clang-tools-extra/include-cleaner/unittests/AnalysisTest.cpp @@ -39,6 +39,14 @@ #include <vector> namespace clang::include_cleaner { + +static std::vector<Decl *> topLevelDecls(TestAST &AST) { + std::vector<Decl *> Decls; + for (auto *D : AST.context().getTranslationUnitDecl()->decls()) + Decls.push_back(D); + return Decls; +} + namespace { using testing::_; using testing::AllOf; @@ -269,7 +277,8 @@ int x = a + c; const Include *B = PP.Includes.atLine(3); ASSERT_EQ(B->Spelled, "b.h"); - EXPECT_THAT(Results.Missing, ElementsAre(Pair("\"c.h\"", Header(CHeader)))); + EXPECT_THAT(Results.MissingIncludes, + ElementsAre(Pair("\"c.h\"", Header(CHeader)))); EXPECT_THAT(Results.Unused, ElementsAre(B)); } @@ -317,7 +326,7 @@ TEST_F(AnalyzeTest, ResourceDirIsIgnored) { TestAST AST(Inputs); auto Results = analyze({}, {}, PP.Includes, &PI, AST.preprocessor()); EXPECT_THAT(Results.Unused, testing::IsEmpty()); - EXPECT_THAT(Results.Missing, testing::IsEmpty()); + EXPECT_THAT(Results.MissingIncludes, testing::IsEmpty()); } TEST_F(AnalyzeTest, DifferentHeaderSameSpelling) { @@ -342,7 +351,7 @@ TEST_F(AnalyzeTest, DifferentHeaderSameSpelling) { DeclsInTU.push_back(D); auto Results = analyze(DeclsInTU, {}, PP.Includes, &PI, AST.preprocessor()); EXPECT_THAT(Results.Unused, testing::IsEmpty()); - EXPECT_THAT(Results.Missing, testing::IsEmpty()); + EXPECT_THAT(Results.MissingIncludes, testing::IsEmpty()); } TEST_F(AnalyzeTest, SpellingIncludesWithSymlinks) { @@ -368,33 +377,361 @@ TEST_F(AnalyzeTest, SpellingIncludesWithSymlinks) { /*ModificationTime=*/{}); TestAST AST(Inputs); - std::vector<Decl *> DeclsInTU; - for (auto *D : AST.context().getTranslationUnitDecl()->decls()) - DeclsInTU.push_back(D); + std::vector<Decl *> DeclsInTU = topLevelDecls(AST); auto Results = analyze(DeclsInTU, {}, PP.Includes, &PI, AST.preprocessor()); // Check that we're spelling header using the symlink, and not underlying // path. - EXPECT_THAT(Results.Missing, testing::ElementsAre(Pair("\"inner.h\"", _))); + EXPECT_THAT(Results.MissingIncludes, + testing::ElementsAre(Pair("\"inner.h\"", _))); // header.h should be unused. EXPECT_THAT(Results.Unused, Not(testing::IsEmpty())); { // Make sure filtering is also applied to symlink, not underlying file. - auto HeaderFilter = [](llvm::StringRef Path) { return Path == "inner.h"; }; - Results = analyze(DeclsInTU, {}, PP.Includes, &PI, AST.preprocessor(), - HeaderFilter); - EXPECT_THAT(Results.Missing, testing::ElementsAre(Pair("\"inner.h\"", _))); + AnalysisOptions Options; + Options.HeaderFilter = [](const Header &H) { + return H.resolvedPath() == "inner.h"; + }; + Results = + analyze(DeclsInTU, {}, PP.Includes, &PI, AST.preprocessor(), Options); + EXPECT_THAT(Results.MissingIncludes, + testing::ElementsAre(Pair("\"inner.h\"", _))); // header.h should be unused. EXPECT_THAT(Results.Unused, Not(testing::IsEmpty())); } { - auto HeaderFilter = [](llvm::StringRef Path) { return Path == "header.h"; }; - Results = analyze(DeclsInTU, {}, PP.Includes, &PI, AST.preprocessor(), - HeaderFilter); + AnalysisOptions Options; + Options.HeaderFilter = [](const Header &H) { + return H.resolvedPath() == "header.h"; + }; + Results = + analyze(DeclsInTU, {}, PP.Includes, &PI, AST.preprocessor(), Options); // header.h should be ignored now. EXPECT_THAT(Results.Unused, Not(testing::IsEmpty())); - EXPECT_THAT(Results.Missing, testing::ElementsAre(Pair("\"inner.h\"", _))); + EXPECT_THAT(Results.MissingIncludes, + testing::ElementsAre(Pair("\"inner.h\"", _))); + } +} + +TEST_F(AnalyzeTest, FragmentDeclUsePreservesInclude) { + Inputs.Code = R"cpp( +#include "support.h" +#include "gen.inc" +Holder H; +)cpp"; + Inputs.ExtraFiles["support.h"] = guard(R"cpp( + namespace support { + template <typename> struct vector {}; + } + )cpp"); + Inputs.ExtraFiles["gen.inc"] = guard(R"cpp( + struct Holder { + support::vector<int> Values; + }; + )cpp"); + + TestAST AST(Inputs); + AnalysisOptions Options; + Options.FragmentHeaderFilter = [](llvm::StringRef Path) { + return Path.ends_with(".inc"); + }; + auto Results = analyze(topLevelDecls(AST), PP.MacroReferences, PP.Includes, + &PI, AST.preprocessor(), Options); + + const Include *Vector = PP.Includes.atLine(2); + const Include *Fragment = PP.Includes.atLine(3); + ASSERT_NE(Vector, nullptr); + ASSERT_NE(Fragment, nullptr); + EXPECT_THAT(Results.Unused, testing::IsEmpty()); + ASSERT_THAT(Results.FragmentDependencies, testing::SizeIs(1)); + EXPECT_EQ(Results.FragmentDependencies.front().Preserved, Vector); + EXPECT_THAT(Results.FragmentDependencies.front().Fragments, + ElementsAre(Fragment)); +} + +TEST_F(AnalyzeTest, FragmentDependencyExcludedWhenAlsoUsedInMainFile) { + Inputs.Code = R"cpp( +#include "support.h" +#include "gen.inc" +support::vector<int> MainValues; +)cpp"; + Inputs.ExtraFiles["support.h"] = guard(R"cpp( + namespace support { + template <typename> struct vector {}; } + )cpp"); + Inputs.ExtraFiles["gen.inc"] = guard(R"cpp( + struct Holder { + support::vector<int> Values; + }; + )cpp"); + + TestAST AST(Inputs); + AnalysisOptions Options; + Options.FragmentHeaderFilter = [](llvm::StringRef Path) { + return Path.ends_with(".inc"); + }; + auto Results = analyze(topLevelDecls(AST), PP.MacroReferences, PP.Includes, + &PI, AST.preprocessor(), Options); + + const Include *Support = PP.Includes.atLine(2); + ASSERT_NE(Support, nullptr); + EXPECT_THAT(Results.Unused, testing::Not(Contains(Support))); + EXPECT_THAT(Results.FragmentDependencies, testing::IsEmpty()); +} + +TEST_F(AnalyzeTest, MultipleFragmentsPreserveSingleInclude) { + Inputs.Code = R"cpp( +#include "support.h" +#include "a.inc" +#include "b.inc" +A AValue; +B BValue; +)cpp"; + Inputs.ExtraFiles["support.h"] = guard(R"cpp( + namespace support { + template <typename> struct vector {}; + } + )cpp"); + Inputs.ExtraFiles["a.inc"] = guard(R"cpp( + struct A { + support::vector<int> Values; + }; + )cpp"); + Inputs.ExtraFiles["b.inc"] = guard(R"cpp( + struct B { + support::vector<int> Values; + }; + )cpp"); + + TestAST AST(Inputs); + AnalysisOptions Options; + Options.FragmentHeaderFilter = [](llvm::StringRef Path) { + return Path.ends_with(".inc"); + }; + auto Results = analyze(topLevelDecls(AST), PP.MacroReferences, PP.Includes, + &PI, AST.preprocessor(), Options); + + const Include *Support = PP.Includes.atLine(2); + const Include *FragmentA = PP.Includes.atLine(3); + const Include *FragmentB = PP.Includes.atLine(4); + ASSERT_NE(Support, nullptr); + ASSERT_NE(FragmentA, nullptr); + ASSERT_NE(FragmentB, nullptr); + EXPECT_THAT(Results.Unused, testing::IsEmpty()); + ASSERT_THAT(Results.FragmentDependencies, testing::SizeIs(1)); + EXPECT_EQ(Results.FragmentDependencies.front().Preserved, Support); + EXPECT_THAT(Results.FragmentDependencies.front().Fragments, + ElementsAre(FragmentA, FragmentB)); +} + +TEST_F(AnalyzeTest, FragmentDependencyDeduplicatesFragmentReason) { + Inputs.Code = R"cpp( +#include "support.h" +#include "gen.inc" +Holder H; +)cpp"; + Inputs.ExtraFiles["support.h"] = guard(R"cpp( + namespace support { + template <typename> struct vector {}; + } + )cpp"); + Inputs.ExtraFiles["gen.inc"] = guard(R"cpp( + struct Holder { + support::vector<int> First; + support::vector<int> Second; + }; + )cpp"); + + TestAST AST(Inputs); + AnalysisOptions Options; + Options.FragmentHeaderFilter = [](llvm::StringRef Path) { + return Path.ends_with(".inc"); + }; + auto Results = analyze(topLevelDecls(AST), PP.MacroReferences, PP.Includes, + &PI, AST.preprocessor(), Options); + + const Include *Fragment = PP.Includes.atLine(3); + ASSERT_NE(Fragment, nullptr); + ASSERT_THAT(Results.FragmentDependencies, testing::SizeIs(1)); + EXPECT_THAT(Results.FragmentDependencies.front().Fragments, + ElementsAre(Fragment)); +} + +TEST_F(AnalyzeTest, DuplicateFragmentIncludesPreserveIncludeWithoutProvenance) { + Inputs.Code = R"cpp( +#include "support.h" +#include "gen.inc" +#include "./gen.inc" +)cpp"; + Inputs.ExtraFiles["support.h"] = guard("struct SupportType {};"); + Inputs.ExtraFiles["gen.inc"] = "void use(SupportType);\n"; + + TestAST AST(Inputs); + AnalysisOptions Options; + Options.FragmentHeaderFilter = [](llvm::StringRef Path) { + return Path.ends_with(".inc"); + }; + auto Results = analyze(topLevelDecls(AST), PP.MacroReferences, PP.Includes, + &PI, AST.preprocessor(), Options); + + const Include *Support = PP.Includes.atLine(2); + ASSERT_NE(Support, nullptr); + EXPECT_THAT(Results.Unused, testing::Not(Contains(Support))); + EXPECT_THAT(Results.FragmentDependencies, testing::IsEmpty()); +} + +TEST_F(AnalyzeTest, FragmentMacroUsePreservesInclude) { + Inputs.Code = R"cpp( +#include "macro.h" +#include "gen.inc" +)cpp"; + Inputs.ExtraFiles["macro.h"] = guard("#define FOO 42"); + Inputs.ExtraFiles["gen.inc"] = guard("int Value = FOO;"); + + TestAST AST(Inputs); + AnalysisOptions Options; + Options.FragmentHeaderFilter = [](llvm::StringRef Path) { + return Path.ends_with(".inc"); + }; + auto Results = analyze(topLevelDecls(AST), PP.MacroReferences, PP.Includes, + &PI, AST.preprocessor(), Options); + + const Include *Macro = PP.Includes.atLine(2); + const Include *Fragment = PP.Includes.atLine(3); + ASSERT_NE(Macro, nullptr); + ASSERT_NE(Fragment, nullptr); + EXPECT_THAT(Results.Unused, testing::IsEmpty()); + ASSERT_THAT(Results.FragmentDependencies, testing::SizeIs(1)); + EXPECT_EQ(Results.FragmentDependencies.front().Preserved, Macro); + EXPECT_THAT(Results.FragmentDependencies.front().Fragments, + ElementsAre(Fragment)); +} + +TEST_F(AnalyzeTest, FragmentHeadersAreNotRecursive) { + Inputs.Code = R"cpp( +#include "support.h" +#include "outer.inc" +)cpp"; + Inputs.ExtraFiles["support.h"] = guard(R"cpp( + namespace support { + template <typename> struct vector {}; + } + )cpp"); + Inputs.ExtraFiles["outer.inc"] = guard(R"cpp( + #include "inner.inc" + )cpp"); + Inputs.ExtraFiles["inner.inc"] = guard(R"cpp( + struct Inner { + support::vector<int> Values; + }; + )cpp"); + + TestAST AST(Inputs); + AnalysisOptions Options; + Options.FragmentHeaderFilter = [](llvm::StringRef Path) { + return Path.ends_with(".inc"); + }; + auto Results = analyze(topLevelDecls(AST), PP.MacroReferences, PP.Includes, + &PI, AST.preprocessor(), Options); + + const Include *Support = PP.Includes.atLine(2); + ASSERT_NE(Support, nullptr); + EXPECT_THAT(Results.Unused, Contains(Support)); + EXPECT_THAT(Results.FragmentDependencies, testing::IsEmpty()); +} + +TEST_F(AnalyzeTest, FragmentHeaderMatcherFallsBackToSpelledPath) { + Inputs.ExtraArgs.push_back("-I/headers"); + Inputs.Code = R"cpp( +#include "support.h" +#include "generated/gen.inc" +Holder H; +)cpp"; + Inputs.ExtraFiles["/headers/support.h"] = guard(R"cpp( + namespace support { + template <typename> struct vector {}; + } + )cpp"); + Inputs.ExtraFiles["/headers/generated/gen.inc"] = guard(R"cpp( + struct Holder { + support::vector<int> Values; + }; + )cpp"); + + TestAST AST(Inputs); + const Include *Fragment = PP.Includes.atLine(3); + ASSERT_NE(Fragment, nullptr); + ASSERT_TRUE(Fragment->Resolved.has_value()); + EXPECT_NE(normalizePath(Fragment->Resolved->getName()).str(), + "generated/gen.inc"); + + AnalysisOptions Options; + Options.FragmentHeaderFilter = [](llvm::StringRef Path) { + return Path == "generated/gen.inc"; + }; + auto Results = analyze(topLevelDecls(AST), PP.MacroReferences, PP.Includes, + &PI, AST.preprocessor(), Options); + + const Include *Support = PP.Includes.atLine(2); + ASSERT_NE(Support, nullptr); + ASSERT_THAT(Results.FragmentDependencies, testing::SizeIs(1)); + EXPECT_EQ(Results.FragmentDependencies.front().Preserved, Support); + EXPECT_THAT(Results.FragmentDependencies.front().Fragments, + ElementsAre(Fragment)); +} + +TEST_F(AnalyzeTest, FragmentMissingIncludeRecordsOrigin) { + Inputs.Code = R"cpp( +#include "gen.inc" +)cpp"; + Inputs.ExtraFiles["foo.h"] = guard("int foo();"); + Inputs.ExtraFiles["gen.inc"] = guard(R"cpp( + #include "foo.h" + int Value = foo(); + )cpp"); + + TestAST AST(Inputs); + AnalysisOptions Options; + Options.FragmentHeaderFilter = [](llvm::StringRef Path) { + return Path.ends_with(".inc"); + }; + auto Results = analyze(topLevelDecls(AST), PP.MacroReferences, PP.Includes, + &PI, AST.preprocessor(), Options); + + const Include *Fragment = PP.Includes.atLine(2); + ASSERT_NE(Fragment, nullptr); + ASSERT_THAT(Results.MissingRefs, testing::SizeIs(1)); + EXPECT_EQ(Results.MissingRefs.front().Origin, + SymbolReferenceOrigin::Fragment); + EXPECT_EQ(Results.MissingRefs.front().FragmentInclude, Fragment); + EXPECT_EQ(Results.MissingIncludes.front().first, "\"foo.h\""); +} + +TEST_F(AnalyzeTest, DuplicateFragmentIncludesSuppressMissingProvenance) { + Inputs.Code = R"cpp( +#include "gen.inc" +#include "./gen.inc" +)cpp"; + Inputs.ExtraFiles["foo.h"] = guard("int foo();"); + Inputs.ExtraFiles["gen.inc"] = R"cpp( + #include "foo.h" + auto useFoo() -> decltype(foo()); + )cpp"; + + TestAST AST(Inputs); + AnalysisOptions Options; + Options.FragmentHeaderFilter = [](llvm::StringRef Path) { + return Path.ends_with(".inc"); + }; + auto Results = analyze(topLevelDecls(AST), PP.MacroReferences, PP.Includes, + &PI, AST.preprocessor(), Options); + + ASSERT_FALSE(Results.MissingRefs.empty()); + EXPECT_EQ(Results.MissingRefs.front().Origin, + SymbolReferenceOrigin::Fragment); + EXPECT_EQ(Results.MissingRefs.front().FragmentInclude, nullptr); + EXPECT_EQ(Results.MissingIncludes.front().first, "\"foo.h\""); } // Make sure that the references to implicit operator new/delete are reported as @@ -422,7 +759,7 @@ TEST_F(AnalyzeTest, ImplicitOperatorNewDeleteNotMissing) { for (auto *D : AST.context().getTranslationUnitDecl()->decls()) DeclsInTU.push_back(D); auto Results = analyze(DeclsInTU, {}, PP.Includes, &PI, AST.preprocessor()); - EXPECT_THAT(Results.Missing, testing::IsEmpty()); + EXPECT_THAT(Results.MissingIncludes, testing::IsEmpty()); } TEST_F(AnalyzeTest, ImplicitOperatorNewDeleteNotUnused) { @@ -467,14 +804,14 @@ TEST(FixIncludes, Basic) { Inc.add(I); AnalysisResults Results; - Results.Missing.emplace_back("\"aa.h\"", Header("")); - Results.Missing.emplace_back("\"ab.h\"", Header("")); - Results.Missing.emplace_back("<e.h>", Header("")); + Results.MissingIncludes.emplace_back("\"aa.h\"", Header("")); + Results.MissingIncludes.emplace_back("\"ab.h\"", Header("")); + Results.MissingIncludes.emplace_back("<e.h>", Header("")); Results.Unused.push_back(Inc.atLine(3)); Results.Unused.push_back(Inc.atLine(4)); EXPECT_EQ(fixIncludes(Results, "d.cc", Code, format::getLLVMStyle()), -R"cpp(#include "d.h" + R"cpp(#include "d.h" #include "a.h" #include "aa.h" #include "ab.h" @@ -482,13 +819,210 @@ R"cpp(#include "d.h" )cpp"); Results = {}; - Results.Missing.emplace_back("\"d.h\"", Header("")); + Results.MissingIncludes.emplace_back("\"d.h\"", Header("")); Code = R"cpp(#include "a.h")cpp"; EXPECT_EQ(fixIncludes(Results, "d.cc", Code, format::getLLVMStyle()), -R"cpp(#include "d.h" + R"cpp(#include "d.h" #include "a.h")cpp"); } +TEST(FixIncludes, FragmentDependencyComments) { + llvm::StringRef Code = R"cpp(#include "a.h" +#include "gen.inc" +#include "existing.h" // needed by "gen.inc" +#include "conflict.h" // keep me +)cpp"; + + Includes Inc; + Include A; + A.Spelled = "a.h"; + A.Line = 1; + Inc.add(A); + Include Gen; + Gen.Spelled = "gen.inc"; + Gen.Line = 2; + Inc.add(Gen); + Include Existing; + Existing.Spelled = "existing.h"; + Existing.Line = 3; + Inc.add(Existing); + Include Conflict; + Conflict.Spelled = "conflict.h"; + Conflict.Line = 4; + Inc.add(Conflict); + + AnalysisResults Results; + Results.FragmentDependencies.push_back({Inc.atLine(1), {Inc.atLine(2)}}); + Results.FragmentDependencies.push_back({Inc.atLine(3), {Inc.atLine(2)}}); + Results.FragmentDependencies.push_back({Inc.atLine(4), {Inc.atLine(2)}}); + + FixIncludesOptions Options; + Options.FragmentDependencyCommentFormat = "needed by {0}"; + IncludeFixes Fixes = computeIncludeFixes(Results, "d.cc", Code, + format::getLLVMStyle(), Options); + + ASSERT_THAT(Fixes.FragmentComments, testing::SizeIs(3)); + EXPECT_EQ(Fixes.FragmentComments[0].Status, + FragmentDependencyCommentStatus::CanInsert); + ASSERT_TRUE(Fixes.FragmentComments[0].Replacement.has_value()); + EXPECT_EQ(Fixes.FragmentComments[1].Status, + FragmentDependencyCommentStatus::AlreadyPresent); + EXPECT_FALSE(Fixes.FragmentComments[1].Replacement.has_value()); + EXPECT_EQ(Fixes.FragmentComments[2].Status, + FragmentDependencyCommentStatus::ConflictingComment); + EXPECT_FALSE(Fixes.FragmentComments[2].Replacement.has_value()); + + EXPECT_EQ(fixIncludes(Results, "d.cc", Code, format::getLLVMStyle(), Options), + R"cpp(#include "a.h" // needed by "gen.inc" +#include "gen.inc" +#include "existing.h" // needed by "gen.inc" +#include "conflict.h" // keep me +)cpp"); +} + +TEST(FixIncludes, FragmentDependencyCommentsEmptyFormat) { + llvm::StringRef Code = R"cpp(#include "a.h" +#include "gen.inc" +)cpp"; + + Includes Inc; + Include A; + A.Spelled = "a.h"; + A.Line = 1; + Inc.add(A); + Include Gen; + Gen.Spelled = "gen.inc"; + Gen.Line = 2; + Inc.add(Gen); + + AnalysisResults Results; + Results.FragmentDependencies.push_back({Inc.atLine(1), {Inc.atLine(2)}}); + + IncludeFixes Fixes = + computeIncludeFixes(Results, "d.cc", Code, format::getLLVMStyle(), {}); + EXPECT_THAT(Fixes.FragmentComments, testing::IsEmpty()); + EXPECT_EQ(fixIncludes(Results, "d.cc", Code, format::getLLVMStyle()), Code); +} + +TEST(FixIncludes, FragmentDependencyCommentsLiteralFormat) { + llvm::StringRef Code = R"cpp(#include "a.h" +#include "gen.inc" +)cpp"; + + Includes Inc; + Include A; + A.Spelled = "a.h"; + A.Line = 1; + Inc.add(A); + Include Gen; + Gen.Spelled = "gen.inc"; + Gen.Line = 2; + Inc.add(Gen); + + AnalysisResults Results; + Results.FragmentDependencies.push_back({Inc.atLine(1), {Inc.atLine(2)}}); + + FixIncludesOptions Options; + Options.FragmentDependencyCommentFormat = "IWYU pragma: keep"; + IncludeFixes Fixes = computeIncludeFixes(Results, "d.cc", Code, + format::getLLVMStyle(), Options); + ASSERT_THAT(Fixes.FragmentComments, testing::SizeIs(1)); + EXPECT_EQ(Fixes.FragmentComments.front().Text, "IWYU pragma: keep"); + EXPECT_EQ(fixIncludes(Results, "d.cc", Code, format::getLLVMStyle(), Options), + R"cpp(#include "a.h" // IWYU pragma: keep +#include "gen.inc" +)cpp"); +} + +TEST(FixIncludes, FragmentDependencyCommentsMultipleFragments) { + llvm::StringRef Code = R"cpp(#include "a.h" +#include "a.inc" +#include "b.inc" +)cpp"; + + Includes Inc; + Include A; + A.Spelled = "a.h"; + A.Line = 1; + Inc.add(A); + Include FragmentA; + FragmentA.Spelled = "a.inc"; + FragmentA.Line = 2; + Inc.add(FragmentA); + Include FragmentB; + FragmentB.Spelled = "b.inc"; + FragmentB.Line = 3; + Inc.add(FragmentB); + + AnalysisResults Results; + Results.FragmentDependencies.push_back( + {Inc.atLine(1), {Inc.atLine(2), Inc.atLine(3)}}); + + FixIncludesOptions Options; + Options.FragmentDependencyCommentFormat = "needed by {0}"; + IncludeFixes Fixes = computeIncludeFixes(Results, "d.cc", Code, + format::getLLVMStyle(), Options); + ASSERT_THAT(Fixes.FragmentComments, testing::SizeIs(1)); + EXPECT_EQ(Fixes.FragmentComments.front().Text, + "needed by \"a.inc\", \"b.inc\""); +} + +TEST(FixIncludes, FragmentDependencyCommentsBlockCommentConflict) { + llvm::StringRef Code = R"cpp(#include "a.h" /* keep me */ +#include "gen.inc" +)cpp"; + + Includes Inc; + Include A; + A.Spelled = "a.h"; + A.Line = 1; + Inc.add(A); + Include Gen; + Gen.Spelled = "gen.inc"; + Gen.Line = 2; + Inc.add(Gen); + + AnalysisResults Results; + Results.FragmentDependencies.push_back({Inc.atLine(1), {Inc.atLine(2)}}); + + FixIncludesOptions Options; + Options.FragmentDependencyCommentFormat = "needed by {0}"; + IncludeFixes Fixes = computeIncludeFixes(Results, "d.cc", Code, + format::getLLVMStyle(), Options); + ASSERT_THAT(Fixes.FragmentComments, testing::SizeIs(1)); + EXPECT_EQ(Fixes.FragmentComments.front().Status, + FragmentDependencyCommentStatus::ConflictingComment); + EXPECT_FALSE(Fixes.FragmentComments.front().Replacement.has_value()); +} + +TEST(FixIncludes, FragmentDependencyCommentsCRLFAndTrailingWhitespace) { + llvm::StringRef Code = "#include \"a.h\" \r\n#include \"gen.inc\"\r\n"; + + Includes Inc; + Include A; + A.Spelled = "a.h"; + A.Line = 1; + Inc.add(A); + Include Gen; + Gen.Spelled = "gen.inc"; + Gen.Line = 2; + Inc.add(Gen); + + AnalysisResults Results; + Results.FragmentDependencies.push_back({Inc.atLine(1), {Inc.atLine(2)}}); + + FixIncludesOptions Options; + Options.FragmentDependencyCommentFormat = "needed by {0}"; + IncludeFixes Fixes = computeIncludeFixes(Results, "d.cc", Code, + format::getLLVMStyle(), Options); + ASSERT_THAT(Fixes.FragmentComments, testing::SizeIs(1)); + ASSERT_TRUE(Fixes.FragmentComments.front().Replacement.has_value()); + size_t ExpectedOffset = Code.find(" \r\n"); + ASSERT_NE(ExpectedOffset, llvm::StringRef::npos); + EXPECT_EQ(Fixes.FragmentComments.front().Replacement->getOffset(), + ExpectedOffset); +} + MATCHER_P3(expandedAt, FileID, Offset, SM, "") { auto [ExpanedFileID, ExpandedOffset] = SM->getDecomposedExpansionLoc(arg); return ExpanedFileID == FileID && ExpandedOffset == Offset; _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
