Author: Jan Svoboda Date: 2026-06-04T10:49:49-07:00 New Revision: f357a470de89660b6a887abc95f6a15aae205e64
URL: https://github.com/llvm/llvm-project/commit/f357a470de89660b6a887abc95f6a15aae205e64 DIFF: https://github.com/llvm/llvm-project/commit/f357a470de89660b6a887abc95f6a15aae205e64.diff LOG: [clang][lex] Store `HeaderFileInfo` in a `DenseMap` (#200968) Calling `FileManager::GetUniqueIDMapping()` during modular builds gets very expensive if the `FileManager` has seen lots of files. This function is used in two places in the `ASTWriter` to look up `HeaderFileInfo` in `HeaderSearch`. This PR changes the storage of `HeaderFileInfo` from `FileEntry::getUID()`-indexed `std::vector<T>` to `llvm::DenseMap<FileEntryRef, T>`, improving scanning performance by ~2.5%. Added: Modified: clang/include/clang/Basic/FileManager.h clang/include/clang/Lex/HeaderSearch.h clang/lib/Basic/FileManager.cpp clang/lib/Lex/HeaderSearch.cpp clang/lib/Serialization/ASTWriter.cpp Removed: ################################################################################ diff --git a/clang/include/clang/Basic/FileManager.h b/clang/include/clang/Basic/FileManager.h index 8ba56c2d778d8..f328969b1033a 100644 --- a/clang/include/clang/Basic/FileManager.h +++ b/clang/include/clang/Basic/FileManager.h @@ -306,11 +306,6 @@ class FileManager : public RefCountedBase<FileManager> { bool makeAbsolutePath(SmallVectorImpl<char> &Path, bool Canonicalize = false) const; - /// Produce an array mapping from the unique IDs assigned to each - /// file to the corresponding FileEntryRef. - void - GetUniqueIDMapping(SmallVectorImpl<OptionalFileEntryRef> &UIDToFiles) const; - /// Retrieve the canonical name for a given directory. /// /// This is a very expensive operation, despite its results being cached, diff --git a/clang/include/clang/Lex/HeaderSearch.h b/clang/include/clang/Lex/HeaderSearch.h index 0baf07e95ef83..5e4abbff83665 100644 --- a/clang/include/clang/Lex/HeaderSearch.h +++ b/clang/include/clang/Lex/HeaderSearch.h @@ -286,9 +286,8 @@ class HeaderSearch { /// SpecificModuleCachePath. size_t NormalizedModuleCachePathLen = 0; - /// All of the preprocessor-specific data about files that are - /// included, indexed by the FileEntry's UID. - mutable std::vector<HeaderFileInfo> FileInfo; + /// All the preprocessor-specific data about files that are included. + mutable llvm::DenseMap<FileEntryRef, HeaderFileInfo> FileInfo; /// Keeps track of each lookup performed by LookupFile. struct LookupFileCacheInfo { @@ -899,8 +898,6 @@ class HeaderSearch { /// Retrieve the module map. const ModuleMap &getModuleMap() const { return ModMap; } - unsigned header_file_size() const { return FileInfo.size(); } - /// Return the HeaderFileInfo structure for the specified FileEntry, in /// preparation for updating it in some way. HeaderFileInfo &getFileInfo(FileEntryRef FE); @@ -909,9 +906,10 @@ class HeaderSearch { /// ever been filled in (either locally or externally). const HeaderFileInfo *getExistingFileInfo(FileEntryRef FE) const; - /// Return the headerFileInfo structure for the specified FileEntry, if it has - /// ever been filled in locally. - const HeaderFileInfo *getExistingLocalFileInfo(FileEntryRef FE) const; + /// Iterate HeaderFileInfo structures and their corresponding FileEntryRef, if + /// they have ever been filled in locally. + void forEachExistingLocalFileInfo( + llvm::function_ref<void(FileEntryRef, const HeaderFileInfo &)> Fn) const; SearchDirIterator search_dir_begin() { return {*this, 0}; } SearchDirIterator search_dir_end() { return {*this, SearchDirs.size()}; } diff --git a/clang/lib/Basic/FileManager.cpp b/clang/lib/Basic/FileManager.cpp index efa4707731b8e..8fb3ba0a27aad 100644 --- a/clang/lib/Basic/FileManager.cpp +++ b/clang/lib/Basic/FileManager.cpp @@ -642,24 +642,6 @@ std::error_code FileManager::getStatValue(StringRef Path, return std::error_code(); } -void FileManager::GetUniqueIDMapping( - SmallVectorImpl<OptionalFileEntryRef> &UIDToFiles) const { - UIDToFiles.clear(); - UIDToFiles.resize(NextFileUID); - - for (const auto &Entry : SeenFileEntries) { - // Only return files that exist and are not redirected. - if (!Entry.getValue() || !isa<FileEntry *>(Entry.getValue()->V)) - continue; - FileEntryRef FE(Entry); - // Add this file if it's the first one with the UID, or if its name is - // better than the existing one. - OptionalFileEntryRef &ExistingFE = UIDToFiles[FE.getUID()]; - if (!ExistingFE || FE.getName() < ExistingFE->getName()) - ExistingFE = FE; - } -} - StringRef FileManager::getCanonicalName(DirectoryEntryRef Dir) { return getCanonicalName(Dir, Dir.getName()); } diff --git a/clang/lib/Lex/HeaderSearch.cpp b/clang/lib/Lex/HeaderSearch.cpp index 782b789f9260b..ecd80db10f2bd 100644 --- a/clang/lib/Lex/HeaderSearch.cpp +++ b/clang/lib/Lex/HeaderSearch.cpp @@ -91,8 +91,8 @@ void HeaderSearch::PrintStats() { llvm::errs() << "\n*** HeaderSearch Stats:\n" << FileInfo.size() << " files tracked.\n"; unsigned NumOnceOnlyFiles = 0; - for (unsigned i = 0, e = FileInfo.size(); i != e; ++i) - NumOnceOnlyFiles += (FileInfo[i].isPragmaOnce || FileInfo[i].isImport); + for (const auto &[FE, HFI] : FileInfo) + NumOnceOnlyFiles += (HFI.isPragmaOnce || HFI.isImport); llvm::errs() << " " << NumOnceOnlyFiles << " #import/#pragma once files.\n"; llvm::errs() << " " << NumIncluded << " #include/#include_next/#import.\n" @@ -1376,10 +1376,7 @@ static void mergeHeaderFileInfo(HeaderFileInfo &HFI, } HeaderFileInfo &HeaderSearch::getFileInfo(FileEntryRef FE) { - if (FE.getUID() >= FileInfo.size()) - FileInfo.resize(FE.getUID() + 1); - - HeaderFileInfo *HFI = &FileInfo[FE.getUID()]; + HeaderFileInfo *HFI = &FileInfo[FE]; // FIXME: Use a generation count to check whether this is really up to date. if (ExternalSource && !HFI->Resolved) { auto ExternalHFI = ExternalSource->GetHeaderFileInfo(FE); @@ -1400,10 +1397,7 @@ HeaderFileInfo &HeaderSearch::getFileInfo(FileEntryRef FE) { const HeaderFileInfo *HeaderSearch::getExistingFileInfo(FileEntryRef FE) const { HeaderFileInfo *HFI; if (ExternalSource) { - if (FE.getUID() >= FileInfo.size()) - FileInfo.resize(FE.getUID() + 1); - - HFI = &FileInfo[FE.getUID()]; + HFI = &FileInfo[FE]; // FIXME: Use a generation count to check whether this is really up to date. if (!HFI->Resolved) { auto ExternalHFI = ExternalSource->GetHeaderFileInfo(FE); @@ -1413,8 +1407,8 @@ const HeaderFileInfo *HeaderSearch::getExistingFileInfo(FileEntryRef FE) const { mergeHeaderFileInfo(*HFI, ExternalHFI); } } - } else if (FE.getUID() < FileInfo.size()) { - HFI = &FileInfo[FE.getUID()]; + } else if (auto It = FileInfo.find(FE); It != FileInfo.end()) { + HFI = &It->second; } else { HFI = nullptr; } @@ -1422,16 +1416,11 @@ const HeaderFileInfo *HeaderSearch::getExistingFileInfo(FileEntryRef FE) const { return (HFI && HFI->IsValid) ? HFI : nullptr; } -const HeaderFileInfo * -HeaderSearch::getExistingLocalFileInfo(FileEntryRef FE) const { - HeaderFileInfo *HFI; - if (FE.getUID() < FileInfo.size()) { - HFI = &FileInfo[FE.getUID()]; - } else { - HFI = nullptr; - } - - return (HFI && HFI->IsValid && !HFI->External) ? HFI : nullptr; +void HeaderSearch::forEachExistingLocalFileInfo( + llvm::function_ref<void(FileEntryRef, const HeaderFileInfo &)> Fn) const { + for (const auto &[FE, HFI] : FileInfo) + if (HFI.IsValid && !HFI.External) + Fn(FE, HFI); } bool HeaderSearch::isFileMultipleIncludeGuarded(FileEntryRef File) const { diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 074b0fccdb65d..6497c22a762ae 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -246,30 +246,17 @@ GetAffectingModuleMaps(const Preprocessor &PP, Module *RootModule) { } // Handle textually-included headers that belong to other modules. - - SmallVector<OptionalFileEntryRef, 16> FilesByUID; - HS.getFileMgr().GetUniqueIDMapping(FilesByUID); - - if (FilesByUID.size() > HS.header_file_size()) - FilesByUID.resize(HS.header_file_size()); - - for (unsigned UID = 0, LastUID = FilesByUID.size(); UID != LastUID; ++UID) { - OptionalFileEntryRef File = FilesByUID[UID]; - if (!File) - continue; - - const HeaderFileInfo *HFI = HS.getExistingLocalFileInfo(*File); - if (!HFI) - continue; // We have no information on this being a header file. - if (!HFI->isCompilingModuleHeader && HFI->isModuleHeader) - continue; // Modular header, handled in the above module-based loop. - if (!HFI->isCompilingModuleHeader && !HFI->IsLocallyIncluded) - continue; // Non-modular header not included locally is not affecting. - - for (const auto &KH : HS.findResolvedModulesForHeader(*File)) - if (const Module *M = KH.getModule()) - CollectModuleMapsForHierarchy(M, AR_TextualHeader); - } + HS.forEachExistingLocalFileInfo( + [&](FileEntryRef File, const HeaderFileInfo &HFI) { + if (!HFI.isCompilingModuleHeader && HFI.isModuleHeader) + return; // Modular header, handled in the above module-based loop. + if (!HFI.isCompilingModuleHeader && !HFI.IsLocallyIncluded) + return; // Non-modular header not included locally is not affecting. + + for (const auto &KH : HS.findResolvedModulesForHeader(File)) + if (const Module *M = KH.getModule()) + CollectModuleMapsForHierarchy(M, AR_TextualHeader); + }); // FIXME: This algorithm is not correct for module map hierarchies where // module map file defining a (sub)module of a top-level module X includes @@ -2252,46 +2239,36 @@ void ASTWriter::WriteHeaderSearch(const HeaderSearch &HS) { } } - SmallVector<OptionalFileEntryRef, 16> FilesByUID; - HS.getFileMgr().GetUniqueIDMapping(FilesByUID); - - if (FilesByUID.size() > HS.header_file_size()) - FilesByUID.resize(HS.header_file_size()); - - for (unsigned UID = 0, LastUID = FilesByUID.size(); UID != LastUID; ++UID) { - OptionalFileEntryRef File = FilesByUID[UID]; - if (!File) - continue; - - const HeaderFileInfo *HFI = HS.getExistingLocalFileInfo(*File); - if (!HFI) - continue; // We have no information on this being a header file. - if (!HFI->isCompilingModuleHeader && HFI->isModuleHeader) - continue; // Header file info is tracked by the owning module file. - if (!HFI->isCompilingModuleHeader && !HFI->IsLocallyIncluded) - continue; // Header file info is tracked by the including module file. - - // Massage the file path into an appropriate form. - StringRef Filename = File->getName(); - SmallString<128> FilenameTmp(Filename); - if (PreparePathForOutput(FilenameTmp)) { - // If we performed any translation on the file name at all, we need to - // save this string, since the generator will refer to it later. - Filename = StringRef(strdup(FilenameTmp.c_str())); - SavedStrings.push_back(Filename.data()); - } + HS.forEachExistingLocalFileInfo( + [&](FileEntryRef File, const HeaderFileInfo &HFI) { + if (!HFI.isCompilingModuleHeader && HFI.isModuleHeader) + return; // Header file info is tracked by the owning module file. + if (!HFI.isCompilingModuleHeader && !HFI.IsLocallyIncluded) + return; // Header file info is tracked by the including module file. + + // Massage the file path into an appropriate form. + StringRef Filename = File.getName(); + SmallString<128> FilenameTmp(Filename); + if (PreparePathForOutput(FilenameTmp)) { + // If we performed any translation on the file name at all, we need to + // save this string, since the generator will refer to it later. + Filename = StringRef(strdup(FilenameTmp.c_str())); + SavedStrings.push_back(Filename.data()); + } - bool Included = HFI->IsLocallyIncluded || PP->alreadyIncluded(*File); + bool Included = HFI.IsLocallyIncluded || PP->alreadyIncluded(File); - HeaderFileInfoTrait::key_type Key = { - Filename, File->getSize(), - getTimestampForOutput(File->getModificationTime())}; - HeaderFileInfoTrait::data_type Data = { - *HFI, Included, HS.getModuleMap().findResolvedModulesForHeader(*File), {} - }; - Generator.insert(Key, Data, GeneratorTrait); - ++NumHeaderSearchEntries; - } + HeaderFileInfoTrait::key_type Key = { + Filename, File.getSize(), + getTimestampForOutput(File.getModificationTime())}; + HeaderFileInfoTrait::data_type Data = { + HFI, + Included, + HS.getModuleMap().findResolvedModulesForHeader(File), + {}}; + Generator.insert(Key, Data, GeneratorTrait); + ++NumHeaderSearchEntries; + }); // Create the on-disk hash table in a buffer. SmallString<4096> TableData; _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
