Author: Jan Svoboda
Date: 2026-06-04T10:49:49-07:00
New Revision: f357a470de89660b6a887abc95f6a15aae205e64

URL: 
https://github.com/llvm/llvm-project/commit/f357a470de89660b6a887abc95f6a15aae205e64
DIFF: 
https://github.com/llvm/llvm-project/commit/f357a470de89660b6a887abc95f6a15aae205e64.diff

LOG: [clang][lex] Store `HeaderFileInfo` in a `DenseMap` (#200968)

Calling `FileManager::GetUniqueIDMapping()` during modular builds gets
very expensive if the `FileManager` has seen lots of files. This
function is used in two places in the `ASTWriter` to look up
`HeaderFileInfo` in `HeaderSearch`.

This PR changes the storage of `HeaderFileInfo` from
`FileEntry::getUID()`-indexed `std::vector<T>` to
`llvm::DenseMap<FileEntryRef, T>`, improving scanning performance by
~2.5%.

Added: 
    

Modified: 
    clang/include/clang/Basic/FileManager.h
    clang/include/clang/Lex/HeaderSearch.h
    clang/lib/Basic/FileManager.cpp
    clang/lib/Lex/HeaderSearch.cpp
    clang/lib/Serialization/ASTWriter.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/FileManager.h 
b/clang/include/clang/Basic/FileManager.h
index 8ba56c2d778d8..f328969b1033a 100644
--- a/clang/include/clang/Basic/FileManager.h
+++ b/clang/include/clang/Basic/FileManager.h
@@ -306,11 +306,6 @@ class FileManager : public RefCountedBase<FileManager> {
   bool makeAbsolutePath(SmallVectorImpl<char> &Path,
                         bool Canonicalize = false) const;
 
-  /// Produce an array mapping from the unique IDs assigned to each
-  /// file to the corresponding FileEntryRef.
-  void
-  GetUniqueIDMapping(SmallVectorImpl<OptionalFileEntryRef> &UIDToFiles) const;
-
   /// Retrieve the canonical name for a given directory.
   ///
   /// This is a very expensive operation, despite its results being cached,

diff  --git a/clang/include/clang/Lex/HeaderSearch.h 
b/clang/include/clang/Lex/HeaderSearch.h
index 0baf07e95ef83..5e4abbff83665 100644
--- a/clang/include/clang/Lex/HeaderSearch.h
+++ b/clang/include/clang/Lex/HeaderSearch.h
@@ -286,9 +286,8 @@ class HeaderSearch {
   /// SpecificModuleCachePath.
   size_t NormalizedModuleCachePathLen = 0;
 
-  /// All of the preprocessor-specific data about files that are
-  /// included, indexed by the FileEntry's UID.
-  mutable std::vector<HeaderFileInfo> FileInfo;
+  /// All the preprocessor-specific data about files that are included.
+  mutable llvm::DenseMap<FileEntryRef, HeaderFileInfo> FileInfo;
 
   /// Keeps track of each lookup performed by LookupFile.
   struct LookupFileCacheInfo {
@@ -899,8 +898,6 @@ class HeaderSearch {
   /// Retrieve the module map.
   const ModuleMap &getModuleMap() const { return ModMap; }
 
-  unsigned header_file_size() const { return FileInfo.size(); }
-
   /// Return the HeaderFileInfo structure for the specified FileEntry, in
   /// preparation for updating it in some way.
   HeaderFileInfo &getFileInfo(FileEntryRef FE);
@@ -909,9 +906,10 @@ class HeaderSearch {
   /// ever been filled in (either locally or externally).
   const HeaderFileInfo *getExistingFileInfo(FileEntryRef FE) const;
 
-  /// Return the headerFileInfo structure for the specified FileEntry, if it 
has
-  /// ever been filled in locally.
-  const HeaderFileInfo *getExistingLocalFileInfo(FileEntryRef FE) const;
+  /// Iterate HeaderFileInfo structures and their corresponding FileEntryRef, 
if
+  /// they have ever been filled in locally.
+  void forEachExistingLocalFileInfo(
+      llvm::function_ref<void(FileEntryRef, const HeaderFileInfo &)> Fn) const;
 
   SearchDirIterator search_dir_begin() { return {*this, 0}; }
   SearchDirIterator search_dir_end() { return {*this, SearchDirs.size()}; }

diff  --git a/clang/lib/Basic/FileManager.cpp b/clang/lib/Basic/FileManager.cpp
index efa4707731b8e..8fb3ba0a27aad 100644
--- a/clang/lib/Basic/FileManager.cpp
+++ b/clang/lib/Basic/FileManager.cpp
@@ -642,24 +642,6 @@ std::error_code FileManager::getStatValue(StringRef Path,
   return std::error_code();
 }
 
-void FileManager::GetUniqueIDMapping(
-    SmallVectorImpl<OptionalFileEntryRef> &UIDToFiles) const {
-  UIDToFiles.clear();
-  UIDToFiles.resize(NextFileUID);
-
-  for (const auto &Entry : SeenFileEntries) {
-    // Only return files that exist and are not redirected.
-    if (!Entry.getValue() || !isa<FileEntry *>(Entry.getValue()->V))
-      continue;
-    FileEntryRef FE(Entry);
-    // Add this file if it's the first one with the UID, or if its name is
-    // better than the existing one.
-    OptionalFileEntryRef &ExistingFE = UIDToFiles[FE.getUID()];
-    if (!ExistingFE || FE.getName() < ExistingFE->getName())
-      ExistingFE = FE;
-  }
-}
-
 StringRef FileManager::getCanonicalName(DirectoryEntryRef Dir) {
   return getCanonicalName(Dir, Dir.getName());
 }

diff  --git a/clang/lib/Lex/HeaderSearch.cpp b/clang/lib/Lex/HeaderSearch.cpp
index 782b789f9260b..ecd80db10f2bd 100644
--- a/clang/lib/Lex/HeaderSearch.cpp
+++ b/clang/lib/Lex/HeaderSearch.cpp
@@ -91,8 +91,8 @@ void HeaderSearch::PrintStats() {
   llvm::errs() << "\n*** HeaderSearch Stats:\n"
                << FileInfo.size() << " files tracked.\n";
   unsigned NumOnceOnlyFiles = 0;
-  for (unsigned i = 0, e = FileInfo.size(); i != e; ++i)
-    NumOnceOnlyFiles += (FileInfo[i].isPragmaOnce || FileInfo[i].isImport);
+  for (const auto &[FE, HFI] : FileInfo)
+    NumOnceOnlyFiles += (HFI.isPragmaOnce || HFI.isImport);
   llvm::errs() << "  " << NumOnceOnlyFiles << " #import/#pragma once files.\n";
 
   llvm::errs() << "  " << NumIncluded << " #include/#include_next/#import.\n"
@@ -1376,10 +1376,7 @@ static void mergeHeaderFileInfo(HeaderFileInfo &HFI,
 }
 
 HeaderFileInfo &HeaderSearch::getFileInfo(FileEntryRef FE) {
-  if (FE.getUID() >= FileInfo.size())
-    FileInfo.resize(FE.getUID() + 1);
-
-  HeaderFileInfo *HFI = &FileInfo[FE.getUID()];
+  HeaderFileInfo *HFI = &FileInfo[FE];
   // FIXME: Use a generation count to check whether this is really up to date.
   if (ExternalSource && !HFI->Resolved) {
     auto ExternalHFI = ExternalSource->GetHeaderFileInfo(FE);
@@ -1400,10 +1397,7 @@ HeaderFileInfo &HeaderSearch::getFileInfo(FileEntryRef 
FE) {
 const HeaderFileInfo *HeaderSearch::getExistingFileInfo(FileEntryRef FE) const 
{
   HeaderFileInfo *HFI;
   if (ExternalSource) {
-    if (FE.getUID() >= FileInfo.size())
-      FileInfo.resize(FE.getUID() + 1);
-
-    HFI = &FileInfo[FE.getUID()];
+    HFI = &FileInfo[FE];
     // FIXME: Use a generation count to check whether this is really up to 
date.
     if (!HFI->Resolved) {
       auto ExternalHFI = ExternalSource->GetHeaderFileInfo(FE);
@@ -1413,8 +1407,8 @@ const HeaderFileInfo 
*HeaderSearch::getExistingFileInfo(FileEntryRef FE) const {
           mergeHeaderFileInfo(*HFI, ExternalHFI);
       }
     }
-  } else if (FE.getUID() < FileInfo.size()) {
-    HFI = &FileInfo[FE.getUID()];
+  } else if (auto It = FileInfo.find(FE); It != FileInfo.end()) {
+    HFI = &It->second;
   } else {
     HFI = nullptr;
   }
@@ -1422,16 +1416,11 @@ const HeaderFileInfo 
*HeaderSearch::getExistingFileInfo(FileEntryRef FE) const {
   return (HFI && HFI->IsValid) ? HFI : nullptr;
 }
 
-const HeaderFileInfo *
-HeaderSearch::getExistingLocalFileInfo(FileEntryRef FE) const {
-  HeaderFileInfo *HFI;
-  if (FE.getUID() < FileInfo.size()) {
-    HFI = &FileInfo[FE.getUID()];
-  } else {
-    HFI = nullptr;
-  }
-
-  return (HFI && HFI->IsValid && !HFI->External) ? HFI : nullptr;
+void HeaderSearch::forEachExistingLocalFileInfo(
+    llvm::function_ref<void(FileEntryRef, const HeaderFileInfo &)> Fn) const {
+  for (const auto &[FE, HFI] : FileInfo)
+    if (HFI.IsValid && !HFI.External)
+      Fn(FE, HFI);
 }
 
 bool HeaderSearch::isFileMultipleIncludeGuarded(FileEntryRef File) const {

diff  --git a/clang/lib/Serialization/ASTWriter.cpp 
b/clang/lib/Serialization/ASTWriter.cpp
index 074b0fccdb65d..6497c22a762ae 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -246,30 +246,17 @@ GetAffectingModuleMaps(const Preprocessor &PP, Module 
*RootModule) {
   }
 
   // Handle textually-included headers that belong to other modules.
-
-  SmallVector<OptionalFileEntryRef, 16> FilesByUID;
-  HS.getFileMgr().GetUniqueIDMapping(FilesByUID);
-
-  if (FilesByUID.size() > HS.header_file_size())
-    FilesByUID.resize(HS.header_file_size());
-
-  for (unsigned UID = 0, LastUID = FilesByUID.size(); UID != LastUID; ++UID) {
-    OptionalFileEntryRef File = FilesByUID[UID];
-    if (!File)
-      continue;
-
-    const HeaderFileInfo *HFI = HS.getExistingLocalFileInfo(*File);
-    if (!HFI)
-      continue; // We have no information on this being a header file.
-    if (!HFI->isCompilingModuleHeader && HFI->isModuleHeader)
-      continue; // Modular header, handled in the above module-based loop.
-    if (!HFI->isCompilingModuleHeader && !HFI->IsLocallyIncluded)
-      continue; // Non-modular header not included locally is not affecting.
-
-    for (const auto &KH : HS.findResolvedModulesForHeader(*File))
-      if (const Module *M = KH.getModule())
-        CollectModuleMapsForHierarchy(M, AR_TextualHeader);
-  }
+  HS.forEachExistingLocalFileInfo(
+      [&](FileEntryRef File, const HeaderFileInfo &HFI) {
+        if (!HFI.isCompilingModuleHeader && HFI.isModuleHeader)
+          return; // Modular header, handled in the above module-based loop.
+        if (!HFI.isCompilingModuleHeader && !HFI.IsLocallyIncluded)
+          return; // Non-modular header not included locally is not affecting.
+
+        for (const auto &KH : HS.findResolvedModulesForHeader(File))
+          if (const Module *M = KH.getModule())
+            CollectModuleMapsForHierarchy(M, AR_TextualHeader);
+      });
 
   // FIXME: This algorithm is not correct for module map hierarchies where
   // module map file defining a (sub)module of a top-level module X includes
@@ -2252,46 +2239,36 @@ void ASTWriter::WriteHeaderSearch(const HeaderSearch 
&HS) {
     }
   }
 
-  SmallVector<OptionalFileEntryRef, 16> FilesByUID;
-  HS.getFileMgr().GetUniqueIDMapping(FilesByUID);
-
-  if (FilesByUID.size() > HS.header_file_size())
-    FilesByUID.resize(HS.header_file_size());
-
-  for (unsigned UID = 0, LastUID = FilesByUID.size(); UID != LastUID; ++UID) {
-    OptionalFileEntryRef File = FilesByUID[UID];
-    if (!File)
-      continue;
-
-    const HeaderFileInfo *HFI = HS.getExistingLocalFileInfo(*File);
-    if (!HFI)
-      continue; // We have no information on this being a header file.
-    if (!HFI->isCompilingModuleHeader && HFI->isModuleHeader)
-      continue; // Header file info is tracked by the owning module file.
-    if (!HFI->isCompilingModuleHeader && !HFI->IsLocallyIncluded)
-      continue; // Header file info is tracked by the including module file.
-
-    // Massage the file path into an appropriate form.
-    StringRef Filename = File->getName();
-    SmallString<128> FilenameTmp(Filename);
-    if (PreparePathForOutput(FilenameTmp)) {
-      // If we performed any translation on the file name at all, we need to
-      // save this string, since the generator will refer to it later.
-      Filename = StringRef(strdup(FilenameTmp.c_str()));
-      SavedStrings.push_back(Filename.data());
-    }
+  HS.forEachExistingLocalFileInfo(
+      [&](FileEntryRef File, const HeaderFileInfo &HFI) {
+        if (!HFI.isCompilingModuleHeader && HFI.isModuleHeader)
+          return; // Header file info is tracked by the owning module file.
+        if (!HFI.isCompilingModuleHeader && !HFI.IsLocallyIncluded)
+          return; // Header file info is tracked by the including module file.
+
+        // Massage the file path into an appropriate form.
+        StringRef Filename = File.getName();
+        SmallString<128> FilenameTmp(Filename);
+        if (PreparePathForOutput(FilenameTmp)) {
+          // If we performed any translation on the file name at all, we need 
to
+          // save this string, since the generator will refer to it later.
+          Filename = StringRef(strdup(FilenameTmp.c_str()));
+          SavedStrings.push_back(Filename.data());
+        }
 
-    bool Included = HFI->IsLocallyIncluded || PP->alreadyIncluded(*File);
+        bool Included = HFI.IsLocallyIncluded || PP->alreadyIncluded(File);
 
-    HeaderFileInfoTrait::key_type Key = {
-        Filename, File->getSize(),
-        getTimestampForOutput(File->getModificationTime())};
-    HeaderFileInfoTrait::data_type Data = {
-      *HFI, Included, HS.getModuleMap().findResolvedModulesForHeader(*File), {}
-    };
-    Generator.insert(Key, Data, GeneratorTrait);
-    ++NumHeaderSearchEntries;
-  }
+        HeaderFileInfoTrait::key_type Key = {
+            Filename, File.getSize(),
+            getTimestampForOutput(File.getModificationTime())};
+        HeaderFileInfoTrait::data_type Data = {
+            HFI,
+            Included,
+            HS.getModuleMap().findResolvedModulesForHeader(File),
+            {}};
+        Generator.insert(Key, Data, GeneratorTrait);
+        ++NumHeaderSearchEntries;
+      });
 
   // Create the on-disk hash table in a buffer.
   SmallString<4096> TableData;


        
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to