llvmorg-github-actions[bot] wrote:

<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang-tools-extra

Author: Daniil Dudkin (unterumarmung)

<details>
<summary>Changes</summary>



---

Patch is 30.10 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/196764.diff


4 Files Affected:

- (modified) 
clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h 
(+29) 
- (modified) clang-tools-extra/include-cleaner/lib/Analysis.cpp (+219-59) 
- (modified) clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp (+1-1) 
- (modified) clang-tools-extra/include-cleaner/unittests/AnalysisTest.cpp 
(+318-7) 


``````````diff
diff --git 
a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h 
b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h
index 8e4912fa7bd84..1d28d87c025ca 100644
--- a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h
+++ b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h
@@ -62,24 +62,53 @@ void walkUsed(llvm::ArrayRef<Decl *> ASTRoots,
               const PragmaIncludes *PI, const Preprocessor &PP,
               UsedSymbolCB CB);
 
+/// A location kind where a symbol reference is observed.
+enum class SymbolReferenceOrigin {
+  MainFile,
+  Preamble,
+  Fragment,
+};
+
 /// A missing include insertion candidate.
 struct MissingInclude {
   std::string Spelling;
   Header Provider;
 };
 
+/// A missing include finding with per-reference provenance.
+struct MissingIncludeRef {
+  SymbolReference Ref;
+  llvm::SmallVector<Header> Providers;
+  SymbolReferenceOrigin Origin = SymbolReferenceOrigin::MainFile;
+  /// Null if the fragment file has multiple direct include sites.
+  const Include *FragmentInclude = nullptr;
+};
+
+/// An include kept alive only by fragment usage.
+struct FragmentDependency {
+  const Include *Preserved = nullptr;
+  llvm::SmallVector<const Include *> Fragments;
+};
+
 /// The result of include-cleaner analysis for one main file.
 struct AnalysisResults {
   std::vector<const Include *> Unused;
   /// Deduplicated insertion plan, e.g. "<vector>" paired with the chosen
   /// provider Header.
   std::vector<MissingInclude> MissingIncludes;
+  /// Per-reference provenance for consumers that need richer diagnostics.
+  std::vector<MissingIncludeRef> MissingRefs;
+  /// Include-preservation provenance for fragment-only uses.
+  std::vector<FragmentDependency> FragmentDependencies;
 };
 
 /// Analysis configuration shared by include-cleaner consumers.
 struct AnalysisOptions {
   /// No analysis will be performed for headers that satisfy the predicate.
   std::function<bool(const Header &)> HeaderFilter;
+  /// A predicate matched against normalized resolved paths, and normalized
+  /// spelled paths as a fallback, to identify direct include fragments.
+  std::function<bool(llvm::StringRef)> FragmentHeaderFilter;
 };
 
 /// Determine which headers should be inserted or removed from the main file.
diff --git a/clang-tools-extra/include-cleaner/lib/Analysis.cpp 
b/clang-tools-extra/include-cleaner/lib/Analysis.cpp
index cd0b3396bf418..321b67a5787d3 100644
--- a/clang-tools-extra/include-cleaner/lib/Analysis.cpp
+++ b/clang-tools-extra/include-cleaner/lib/Analysis.cpp
@@ -8,6 +8,7 @@
 
 #include "clang-include-cleaner/Analysis.h"
 #include "AnalysisInternal.h"
+#include "TypesInternal.h"
 #include "clang-include-cleaner/IncludeSpeller.h"
 #include "clang-include-cleaner/Record.h"
 #include "clang-include-cleaner/Types.h"
@@ -22,9 +23,11 @@
 #include "clang/Tooling/Core/Replacement.h"
 #include "clang/Tooling/Inclusions/StandardLibrary.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/STLFunctionalExtras.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
@@ -32,13 +35,91 @@
 #include "llvm/Support/ErrorHandling.h"
 #include <cassert>
 #include <climits>
+#include <optional>
 #include <string>
 
 namespace clang::include_cleaner {
 
 namespace {
+
+struct ClassifiedReference {
+  SymbolReferenceOrigin Origin;
+  // Null if the fragment file has multiple direct include sites.
+  const Include *FragmentInclude = nullptr;
+};
+
+class FragmentTracker {
+public:
+  FragmentTracker(const Includes &Inc, const SourceManager &SM,
+                  const std::function<bool(llvm::StringRef)> &HeaderFilter)
+      : SM(SM) {
+    if (!HeaderFilter)
+      return;
+
+    for (const Include &I : Inc.all()) {
+      if (!I.Resolved ||
+          locateInMainFile(I.HashLocation, SM) != MainFileLocation::MainFile) {
+        continue;
+      }
+
+      // Match the canonical path first, but fall back to the spelled include
+      // so generated paths can still be configured even when resolution loses
+      // that spelling detail.
+      const llvm::SmallString<128> ResolvedPath =
+          normalizePath(I.Resolved->getName());
+      bool IsFragment = HeaderFilter(ResolvedPath);
+      if (!IsFragment) {
+        const llvm::SmallString<128> SpelledPath = normalizePath(I.Spelled);
+        if (!SpelledPath.empty())
+          IsFragment = HeaderFilter(SpelledPath);
+      }
+      if (!IsFragment)
+        continue;
+
+      // Fragments are intentionally direct-includes-only for now. If a
+      // fragment includes another file, that nested include keeps normal
+      // header semantics.
+      IncludeSitesByFile[&I.Resolved->getFileEntry()].push_back(&I);
+      DirectIncludes.insert(&I);
+    }
+  }
+
+  std::optional<ClassifiedReference> classify(FileID FID) const {
+    if (FID == SM.getMainFileID())
+      return ClassifiedReference{SymbolReferenceOrigin::MainFile};
+    if (FID == SM.getPreambleFileID())
+      return ClassifiedReference{SymbolReferenceOrigin::Preamble};
+    auto FE = SM.getFileEntryRefForID(FID);
+    if (!FE)
+      return std::nullopt;
+    auto It = IncludeSitesByFile.find(&FE->getFileEntry());
+    if (It == IncludeSitesByFile.end())
+      return std::nullopt;
+    if (It->second.size() != 1)
+      return ClassifiedReference{SymbolReferenceOrigin::Fragment};
+    return ClassifiedReference{SymbolReferenceOrigin::Fragment,
+                               It->second.front()};
+  }
+
+  bool isDirectInclude(const Include *I) const {
+    return DirectIncludes.contains(I);
+  }
+
+private:
+  const SourceManager &SM;
+  llvm::DenseMap<const FileEntry *, llvm::SmallVector<const Include *>>
+      IncludeSitesByFile;
+  llvm::DenseSet<const Include *> DirectIncludes;
+};
+
+using UsedSymbolWithOriginCB = llvm::function_ref<void(
+    const SymbolReference &Ref, llvm::ArrayRef<Header> Providers,
+    const ClassifiedReference &Info)>;
+
 bool shouldIgnoreMacroReference(const Preprocessor &PP, const Macro &M) {
-  auto *MI = PP.getMacroInfo(M.Name);
+  auto &MutablePP = const_cast<Preprocessor &>(PP);
+  auto MD = MutablePP.getMacroDefinitionAtLoc(M.Name, M.Definition);
+  auto *MI = MD ? MD.getMacroInfo() : PP.getMacroInfo(M.Name);
   // Macros that expand to themselves are confusing from user's point of view.
   // They usually aspect the usage to be attributed to the underlying decl and
   // not the macro definition. So ignore such macros (e.g. std{in,out,err} are
@@ -47,15 +128,12 @@ bool shouldIgnoreMacroReference(const Preprocessor &PP, 
const Macro &M) {
   return MI && MI->getNumTokens() == 1 && MI->isObjectLike() &&
          MI->getReplacementToken(0).getIdentifierInfo() == M.Name;
 }
-} // namespace
 
-void walkUsed(llvm::ArrayRef<Decl *> ASTRoots,
-              llvm::ArrayRef<SymbolReference> MacroRefs,
-              const PragmaIncludes *PI, const Preprocessor &PP,
-              UsedSymbolCB CB) {
+void walkUsedWithOrigins(
+    llvm::ArrayRef<Decl *> ASTRoots, llvm::ArrayRef<SymbolReference> MacroRefs,
+    const PragmaIncludes *PI, const Preprocessor &PP, UsedSymbolWithOriginCB 
CB,
+    llvm::function_ref<std::optional<ClassifiedReference>(FileID)> Classify) {
   const auto &SM = PP.getSourceManager();
-  // This is duplicated in writeHTMLReport, changes should be mirrored there.
-  tooling::stdlib::Recognizer Recognizer;
   for (auto *Root : ASTRoots) {
     walkAST(*Root, [&](SourceLocation Loc, NamedDecl &ND, RefType RT) {
       auto SpellLoc = SM.getSpellingLoc(Loc);
@@ -71,25 +149,28 @@ void walkUsed(llvm::ArrayRef<Decl *> ASTRoots,
         SpellLoc = SM.getSpellingLoc(Loc);
       }
       auto FID = SM.getFileID(SpellLoc);
-      if (FID != SM.getMainFileID() && FID != SM.getPreambleFileID())
+      auto Info = Classify(FID);
+      if (!Info)
         return;
       // FIXME: Most of the work done here is repetitive. It might be useful to
       // have a cache/batching.
       SymbolReference SymRef{ND, Loc, RT};
-      return CB(SymRef, headersForSymbol(ND, PP, PI));
+      return CB(SymRef, headersForSymbol(ND, PP, PI), *Info);
     });
   }
+
   for (const SymbolReference &MacroRef : MacroRefs) {
     assert(MacroRef.Target.kind() == Symbol::Macro);
-    if (!SM.isWrittenInMainFile(SM.getSpellingLoc(MacroRef.RefLocation)) ||
-        shouldIgnoreMacroReference(PP, MacroRef.Target.macro()))
+    if (shouldIgnoreMacroReference(PP, MacroRef.Target.macro()))
       continue;
-    CB(MacroRef, headersForSymbol(MacroRef.Target, PP, PI));
+    auto FID = SM.getFileID(SM.getSpellingLoc(MacroRef.RefLocation));
+    auto Info = Classify(FID);
+    if (!Info)
+      continue;
+    CB(MacroRef, headersForSymbol(MacroRef.Target, PP, PI), *Info);
   }
 }
 
-namespace {
-
 bool isFilteredInclude(const Include &I,
                        llvm::function_ref<bool(const Header &)> HeaderFilter,
                        const Preprocessor &PP) {
@@ -97,8 +178,9 @@ bool isFilteredInclude(const Include &I,
     auto Lang = PP.getLangOpts().CPlusPlus ? tooling::stdlib::Lang::CXX
                                            : tooling::stdlib::Lang::C;
     if (auto StdHeader = tooling::stdlib::Header::named(I.quote(), Lang);
-        StdHeader && HeaderFilter(*StdHeader))
+        StdHeader && HeaderFilter(*StdHeader)) {
       return true;
+    }
   }
   return I.Resolved && HeaderFilter(*I.Resolved);
 }
@@ -109,32 +191,89 @@ bool shouldSuppressIncludeDiagnostic(
     const PragmaIncludes *PI, const Preprocessor &PP,
     OptionalDirectoryEntryRef ResourceDir) {
   if (!I.Resolved || I.Resolved->getDir() == ResourceDir ||
-      isFilteredInclude(I, HeaderFilter, PP))
+      isFilteredInclude(I, HeaderFilter, PP)) {
     return true;
+  }
   if (!PI)
     return false;
   if (PI->shouldKeep(*I.Resolved))
     return true;
   // Check if main file is the public interface for a private header. If so
-  // we shouldn't diagnose it as unused.
+  // we shouldn't diagnose it as unused or record fragment dependencies.
   if (auto PHeader = PI->getPublic(*I.Resolved); !PHeader.empty()) {
     PHeader = PHeader.trim("<>\"");
-    // Since most private -> public mappings happen in a verbatim way, we
-    // check textually here. This might go wrong in presence of symlinks or
-    // header mappings. But that's not different than rest of the places.
     if (MainFile.getName().ends_with(PHeader))
       return true;
   }
   return false;
 }
 
+void walkUsedInFiles(llvm::ArrayRef<Decl *> ASTRoots,
+                     llvm::ArrayRef<SymbolReference> MacroRefs,
+                     const PragmaIncludes *PI, const Preprocessor &PP,
+                     UsedSymbolCB CB,
+                     llvm::function_ref<bool(FileID)> IsMainFile) {
+  walkUsedWithOrigins(
+      ASTRoots, MacroRefs, PI, PP,
+      [&](const SymbolReference &Ref, llvm::ArrayRef<Header> Providers,
+          const ClassifiedReference &) { CB(Ref, Providers); },
+      [&](FileID FID) -> std::optional<ClassifiedReference> {
+        if (!IsMainFile(FID))
+          return std::nullopt;
+        return ClassifiedReference{SymbolReferenceOrigin::MainFile};
+      });
+}
+
+} // namespace
+
+void walkUsed(llvm::ArrayRef<Decl *> ASTRoots,
+              llvm::ArrayRef<SymbolReference> MacroRefs,
+              const PragmaIncludes *PI, const Preprocessor &PP,
+              UsedSymbolCB CB) {
+  const auto &SM = PP.getSourceManager();
+  walkUsedInFiles(ASTRoots, MacroRefs, PI, PP, CB, [&](FileID FID) {
+    return FID == SM.getMainFileID() || FID == SM.getPreambleFileID();
+  });
+}
+
+namespace {
+
 class IncludeUsage {
 public:
-  void mark(const Include *I) { Used.insert(I); }
+  void mark(const Include *I, const ClassifiedReference &Info) {
+    Used.insert(I);
+    if (Info.Origin != SymbolReferenceOrigin::Fragment) {
+      UsedByMainOrPreamble.insert(I);
+      return;
+    }
+
+    UsedByFragment.insert(I);
+    if (!Info.FragmentInclude)
+      return;
+    auto &Reasons = ByPreserved[I];
+    if (!llvm::is_contained(Reasons, Info.FragmentInclude))
+      Reasons.push_back(Info.FragmentInclude);
+  }
+
   bool contains(const Include *I) const { return Used.contains(I); }
 
+  bool isFragmentOnly(const Include *I) const {
+    return UsedByFragment.contains(I) && !UsedByMainOrPreamble.contains(I);
+  }
+
+  llvm::SmallVector<const Include *> fragmentSites(const Include *I) const {
+    auto It = ByPreserved.find(I);
+    if (It == ByPreserved.end())
+      return {};
+    return It->second;
+  }
+
 private:
   llvm::DenseSet<const Include *> Used;
+  llvm::DenseSet<const Include *> UsedByMainOrPreamble;
+  llvm::DenseSet<const Include *> UsedByFragment;
+  llvm::DenseMap<const Include *, llvm::SmallVector<const Include *>>
+      ByPreserved;
 };
 
 class MissingIncludeCollector {
@@ -170,48 +309,69 @@ AnalysisResults analyze(llvm::ArrayRef<Decl *> ASTRoots,
   auto HeaderFilter = [&](const Header &H) {
     return Options.HeaderFilter && Options.HeaderFilter(H);
   };
+  FragmentTracker Fragments(Inc, SM, Options.FragmentHeaderFilter);
   IncludeUsage Usage;
   MissingIncludeCollector MissingIncludes;
+  std::vector<MissingIncludeRef> MissingRefs;
   OptionalDirectoryEntryRef ResourceDir =
       PP.getHeaderSearchInfo().getModuleMap().getBuiltinDir();
-  walkUsed(ASTRoots, MacroRefs, PI, PP,
-           [&](const SymbolReference &Ref, llvm::ArrayRef<Header> Providers) {
-             bool Satisfied = false;
-             for (const Header &H : Providers) {
-               if (H.kind() == Header::Physical &&
-                   (H.physical() == MainFile ||
-                    H.physical().getDir() == ResourceDir)) {
-                 Satisfied = true;
-               }
-               for (const Include *I : Inc.match(H)) {
-                 Usage.mark(I);
-                 Satisfied = true;
-               }
-             }
-             // Bail out if we can't (or need not) insert an include.
-             if (Satisfied || Providers.empty() || Ref.RT != RefType::Explicit)
-               return;
-             if (HeaderFilter(Providers.front()))
-               return;
-             // Check if we have any headers with the same spelling, in edge
-             // cases like `#include_next "foo.h"`, the user can't ever
-             // include the physical foo.h, but can have a spelling that
-             // refers to it.
-             auto Spelling = spellHeader(
-                 {Providers.front(), PP.getHeaderSearchInfo(), MainFile});
-             for (const Include *I : Inc.match(Header{Spelling})) {
-               Usage.mark(I);
-               Satisfied = true;
-             }
-             if (!Satisfied)
-               MissingIncludes.add(Spelling, Providers.front());
-           });
-
-  AnalysisResults Results;
+
+  walkUsedWithOrigins(
+      ASTRoots, MacroRefs, PI, PP,
+      [&](const SymbolReference &Ref, llvm::ArrayRef<Header> Providers,
+          const ClassifiedReference &Info) {
+        bool Satisfied = false;
+        for (const Header &H : Providers) {
+          if (H.kind() == Header::Physical &&
+              (H.physical() == MainFile ||
+               H.physical().getDir() == ResourceDir)) {
+            Satisfied = true;
+          }
+          for (const Include *I : Inc.match(H)) {
+            Usage.mark(I, Info);
+            Satisfied = true;
+          }
+        }
+
+        if (Satisfied || Providers.empty() || Ref.RT != RefType::Explicit)
+          return;
+        if (HeaderFilter(Providers.front()))
+          return;
+
+        auto Spelling = spellHeader(
+            {Providers.front(), PP.getHeaderSearchInfo(), MainFile});
+        for (const Include *I : Inc.match(Header{Spelling})) {
+          Usage.mark(I, Info);
+          Satisfied = true;
+        }
+        if (Satisfied)
+          return;
+
+        // MissingIncludes drives edits, while MissingRefs preserves where the
+        // unsatisfied use came from for higher-level diagnostics.
+        MissingIncludes.add(Spelling, Providers.front());
+        MissingRefs.push_back(
+            MissingIncludeRef{Ref, llvm::SmallVector<Header>(Providers),
+                              Info.Origin, Info.FragmentInclude});
+      },
+      [&](FileID FID) { return Fragments.classify(FID); });
+
+  AnalysisResults Results{{}, {}, std::move(MissingRefs), {}};
   for (const Include &I : Inc.all()) {
-    if (Usage.contains(&I) ||
-        shouldSuppressIncludeDiagnostic(I, MainFile, HeaderFilter, PI, PP,
-                                        ResourceDir))
+    bool Suppressed = shouldSuppressIncludeDiagnostic(I, MainFile, 
HeaderFilter,
+                                                      PI, PP, ResourceDir);
+    if (Usage.contains(&I)) {
+      const llvm::SmallVector<const Include *> FragmentSites =
+          Usage.fragmentSites(&I);
+      // Ambiguous fragment sites still preserve the header, but get no 
comment.
+      if (!Suppressed && Usage.isFragmentOnly(&I) &&
+          !Fragments.isDirectInclude(&I) && !FragmentSites.empty()) {
+        Results.FragmentDependencies.push_back(
+            FragmentDependency{&I, FragmentSites});
+      }
+      continue;
+    }
+    if (Suppressed)
       continue;
     Results.Unused.push_back(&I);
   }
diff --git a/clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp 
b/clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp
index d72d6adf33c9b..49bd5495bcc13 100644
--- a/clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp
+++ b/clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp
@@ -192,7 +192,7 @@ class Action : public clang::ASTFrontendAction {
     SM.getFileManager().makeAbsolutePath(AbsPath);
     llvm::StringRef Code = SM.getBufferData(SM.getMainFileID());
 
-    AnalysisOptions AnalyzeOptions{HeaderFilter};
+    AnalysisOptions AnalyzeOptions{HeaderFilter, {}};
     auto Results =
         analyze(AST.Roots, PP.MacroReferences, PP.Includes, &PI,
                 getCompilerInstance().getPreprocessor(), AnalyzeOptions);
diff --git a/clang-tools-extra/include-cleaner/unittests/AnalysisTest.cpp 
b/clang-tools-extra/include-cleaner/unittests/AnalysisTest.cpp
index 3fcf82251d14b..0c68fb58032fb 100644
--- a/clang-tools-extra/include-cleaner/unittests/AnalysisTest.cpp
+++ b/clang-tools-extra/include-cleaner/unittests/AnalysisTest.cpp
@@ -39,6 +39,14 @@
 #include <vector>
 
 namespace clang::include_cleaner {
+
+static std::vector<Decl *> topLevelDecls(TestAST &AST) {
+  std::vector<Decl *> Decls;
+  for (auto *D : AST.context().getTranslationUnitDecl()->decls())
+    Decls.push_back(D);
+  return Decls;
+}
+
 namespace {
 using testing::_;
 using testing::AllOf;
@@ -375,9 +383,7 @@ TEST_F(AnalyzeTest, SpellingIncludesWithSymlinks) {
                            /*ModificationTime=*/{});
 
   TestAST AST(Inputs);
-  std::vector<Decl *> DeclsInTU;
-  for (auto *D : AST.context().getTranslationUnitDecl()->decls())
-    DeclsInTU.push_back(D);
+  std::vector<Decl *> DeclsInTU = topLevelDecls(AST);
   auto Results = analyze(DeclsInTU, {}, PP.Includes, &PI, AST.preprocessor());
   // Check that we're spelling header using the symlink, and not underlying
   // path.
@@ -388,8 +394,10 @@ TEST_F(AnalyzeTest, SpellingIncludesWithSymlinks) {
 
   {
     // Make sure filtering is also applied to symlink, not underlying file.
-    AnalysisOptions Options{
-        [](const Header &H) { return H.resolvedPath() == "inner.h"; }};
+    AnalysisOptions Options;
+    Options.HeaderFilter = [](const Header &H) {
+      return H.resolvedPath() == "inner.h";
+    };
     Results =
         analyze(DeclsInTU, {}, PP.Includes, &PI, AST.preprocessor(), Options);
     EXPECT_THAT(Results.MissingIncludes,
@@ -398,8 +406,10 @@ TEST_F(AnalyzeTest, SpellingIncludesWithSymlinks) {
     EXPECT_THAT(Results.Unused, Not(testing::IsEmpty()));
   }
   {
-    AnalysisOptions Options{
-        [](const Header &H) { return H.resolvedPath() == "header.h"; }};
+    AnalysisOptions Options;
+    Options.HeaderFilter = [](const Header &H) {
+      return H.resolvedPath() == "header.h";
+    };
     Results =
         analyze(DeclsInTU, {}, PP.Includes, &PI, AST.preprocessor(), Options);
     // header.h should be ignored now.
@@ -409,6 +419,307 @@ TEST_F(AnalyzeTest, SpellingIn...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/196764
_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to