Author: Jan Svoboda
Date: 2023-07-13T15:00:11-07:00
New Revision: 6504d87fc0c89fc584c2128355a14a07ed385c5b

URL: 
https://github.com/llvm/llvm-project/commit/6504d87fc0c89fc584c2128355a14a07ed385c5b
DIFF: 
https://github.com/llvm/llvm-project/commit/6504d87fc0c89fc584c2128355a14a07ed385c5b.diff

LOG: [clang][modules] Deserialize included files lazily

In D114095, `HeaderFileInfo::NumIncludes` was moved into `Preprocessor`. This 
still makes sense, because we want to track this on the granularity of 
submodules (D112915, D114173), but the way this information is serialized is 
not ideal. In `ASTWriter`, the set of included files gets deserialized eagerly, 
issuing lots of calls to `FileManager::getFile()` for input files the PCM 
consumer might not be interested in.

This patch makes the information part of the header file info table, taking 
advantage of its lazy deserialization which typically happens when a file is 
about to be included.

Reviewed By: benlangmuir

Differential Revision: https://reviews.llvm.org/D155131

Added: 
    

Modified: 
    clang/include/clang/Lex/Preprocessor.h
    clang/include/clang/Serialization/ASTBitCodes.h
    clang/include/clang/Serialization/ASTReader.h
    clang/include/clang/Serialization/ASTWriter.h
    clang/lib/Serialization/ASTReader.cpp
    clang/lib/Serialization/ASTReaderInternals.h
    clang/lib/Serialization/ASTWriter.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Lex/Preprocessor.h 
b/clang/include/clang/Lex/Preprocessor.h
index 8fbc002059a86b..9efe439bc5f219 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -1486,6 +1486,7 @@ class Preprocessor {
 
   /// Return true if this header has already been included.
   bool alreadyIncluded(const FileEntry *File) const {
+    HeaderInfo.getFileInfo(File);
     return IncludedFiles.count(File);
   }
 

diff  --git a/clang/include/clang/Serialization/ASTBitCodes.h 
b/clang/include/clang/Serialization/ASTBitCodes.h
index 7019bc5922ebcb..074d1002913084 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -41,7 +41,7 @@ namespace serialization {
 /// Version 4 of AST files also requires that the version control branch and
 /// revision match exactly, since there is no backward compatibility of
 /// AST files at this time.
-const unsigned VERSION_MAJOR = 25;
+const unsigned VERSION_MAJOR = 26;
 
 /// AST file minor version number supported by this version of
 /// Clang.
@@ -696,8 +696,7 @@ enum ASTRecordTypes {
   /// Record code for \#pragma float_control options.
   FLOAT_CONTROL_PRAGMA_OPTIONS = 65,
 
-  /// Record code for included files.
-  PP_INCLUDED_FILES = 66,
+  /// ID 66 used to be the list of included files.
 
   /// Record code for an unterminated \#pragma clang assume_nonnull begin
   /// recorded in a preamble.

diff  --git a/clang/include/clang/Serialization/ASTReader.h 
b/clang/include/clang/Serialization/ASTReader.h
index 85f31ed22aca9d..d56e2117a53f0d 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -1391,7 +1391,6 @@ class ASTReader
   void ParseLineTable(ModuleFile &F, const RecordData &Record);
   llvm::Error ReadSourceManagerBlock(ModuleFile &F);
   SourceLocation getImportLocation(ModuleFile *F);
-  void readIncludedFiles(ModuleFile &F, StringRef Blob, Preprocessor &PP);
   ASTReadResult ReadModuleMapFileBlock(RecordData &Record, ModuleFile &F,
                                        const ModuleFile *ImportedBy,
                                        unsigned ClientLoadCapabilities);

diff  --git a/clang/include/clang/Serialization/ASTWriter.h 
b/clang/include/clang/Serialization/ASTWriter.h
index 09ee1744e8945f..e328dd0cd5577e 100644
--- a/clang/include/clang/Serialization/ASTWriter.h
+++ b/clang/include/clang/Serialization/ASTWriter.h
@@ -500,7 +500,6 @@ class ASTWriter : public ASTDeserializationListener,
   void WriteInputFiles(SourceManager &SourceMgr, HeaderSearchOptions &HSOpts);
   void WriteSourceManagerBlock(SourceManager &SourceMgr,
                                const Preprocessor &PP);
-  void writeIncludedFiles(raw_ostream &Out, const Preprocessor &PP);
   void WritePreprocessor(const Preprocessor &PP, bool IsModule);
   void WriteHeaderSearch(const HeaderSearch &HS);
   void WritePreprocessorDetail(PreprocessingRecord &PPRec,

diff  --git a/clang/lib/Serialization/ASTReader.cpp 
b/clang/lib/Serialization/ASTReader.cpp
index b989ff2a9c95c4..380d117acc4973 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -1875,6 +1875,21 @@ ASTReader::getGlobalPreprocessedEntityID(ModuleFile &M,
   return LocalID + I->second;
 }
 
+const FileEntry *HeaderFileInfoTrait::getFile(const internal_key_type &Key) {
+  FileManager &FileMgr = Reader.getFileManager();
+  if (!Key.Imported) {
+    if (auto File = FileMgr.getFile(Key.Filename))
+      return *File;
+    return nullptr;
+  }
+
+  std::string Resolved = std::string(Key.Filename);
+  Reader.ResolveImportedPath(M, Resolved);
+  if (auto File = FileMgr.getFile(Resolved))
+    return *File;
+  return nullptr;
+}
+
 unsigned HeaderFileInfoTrait::ComputeHash(internal_key_ref ikey) {
   return llvm::hash_combine(ikey.Size, ikey.ModTime);
 }
@@ -1895,23 +1910,8 @@ bool HeaderFileInfoTrait::EqualKey(internal_key_ref a, 
internal_key_ref b) {
     return true;
 
   // Determine whether the actual files are equivalent.
-  FileManager &FileMgr = Reader.getFileManager();
-  auto GetFile = [&](const internal_key_type &Key) -> const FileEntry* {
-    if (!Key.Imported) {
-      if (auto File = FileMgr.getFile(Key.Filename))
-        return *File;
-      return nullptr;
-    }
-
-    std::string Resolved = std::string(Key.Filename);
-    Reader.ResolveImportedPath(M, Resolved);
-    if (auto File = FileMgr.getFile(Resolved))
-      return *File;
-    return nullptr;
-  };
-
-  const FileEntry *FEA = GetFile(a);
-  const FileEntry *FEB = GetFile(b);
+  const FileEntry *FEA = getFile(a);
+  const FileEntry *FEB = getFile(b);
   return FEA && FEA == FEB;
 }
 
@@ -1940,6 +1940,14 @@ HeaderFileInfoTrait::ReadData(internal_key_ref key, 
const unsigned char *d,
   const unsigned char *End = d + DataLen;
   HeaderFileInfo HFI;
   unsigned Flags = *d++;
+
+  bool Included = (Flags >> 6) & 0x01;
+  if (Included)
+    if (const FileEntry *FE = getFile(key))
+      // Not using \c Preprocessor::markIncluded(), since that would attempt to
+      // deserialize this header file info again.
+      Reader.getPreprocessor().getIncludedFiles().insert(FE);
+
   // FIXME: Refactor with mergeHeaderFileInfo in HeaderSearch.cpp.
   HFI.isImport |= (Flags >> 5) & 0x01;
   HFI.isPragmaOnce |= (Flags >> 4) & 0x01;
@@ -3028,22 +3036,6 @@ ASTReader::ReadControlBlock(ModuleFile &F,
   }
 }
 
-void ASTReader::readIncludedFiles(ModuleFile &F, StringRef Blob,
-                                  Preprocessor &PP) {
-  using namespace llvm::support;
-
-  const unsigned char *D = (const unsigned char *)Blob.data();
-  unsigned FileCount = endian::readNext<uint32_t, little, unaligned>(D);
-
-  for (unsigned I = 0; I < FileCount; ++I) {
-    size_t ID = endian::readNext<uint32_t, little, unaligned>(D);
-    InputFileInfo IFI = getInputFileInfo(F, ID);
-    if (llvm::ErrorOr<const FileEntry *> File =
-            PP.getFileManager().getFile(IFI.Filename))
-      PP.getIncludedFiles().insert(*File);
-  }
-}
-
 llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
                                     unsigned ClientLoadCapabilities) {
   BitstreamCursor &Stream = F.Stream;
@@ -3795,10 +3787,6 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
       break;
     }
 
-    case PP_INCLUDED_FILES:
-      readIncludedFiles(F, Blob, PP);
-      break;
-
     case LATE_PARSED_TEMPLATE:
       LateParsedTemplates.emplace_back(
           std::piecewise_construct, std::forward_as_tuple(&F),

diff  --git a/clang/lib/Serialization/ASTReaderInternals.h 
b/clang/lib/Serialization/ASTReaderInternals.h
index 4a4cfcce156d52..b906cc6c58a240 100644
--- a/clang/lib/Serialization/ASTReaderInternals.h
+++ b/clang/lib/Serialization/ASTReaderInternals.h
@@ -276,6 +276,9 @@ class HeaderFileInfoTrait {
   static internal_key_type ReadKey(const unsigned char *d, unsigned);
 
   data_type ReadData(internal_key_ref,const unsigned char *d, unsigned 
DataLen);
+
+private:
+  const FileEntry *getFile(const internal_key_type &Key);
 };
 
 /// The on-disk hash table used for known header files.

diff  --git a/clang/lib/Serialization/ASTWriter.cpp 
b/clang/lib/Serialization/ASTWriter.cpp
index f4389ecd7629a1..2ef0d09f359ac1 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -866,7 +866,6 @@ void ASTWriter::WriteBlockInfoBlock() {
   RECORD(CUDA_PRAGMA_FORCE_HOST_DEVICE_DEPTH);
   RECORD(PP_CONDITIONAL_STACK);
   RECORD(DECLS_TO_CHECK_FOR_DEFERRED_DIAGS);
-  RECORD(PP_INCLUDED_FILES);
   RECORD(PP_ASSUME_NONNULL_LOC);
 
   // SourceManager Block.
@@ -1763,6 +1762,7 @@ namespace {
 
     struct data_type {
       const HeaderFileInfo &HFI;
+      bool AlreadyIncluded;
       ArrayRef<ModuleMap::KnownHeader> KnownHeaders;
       UnresolvedModule Unresolved;
     };
@@ -1808,7 +1808,8 @@ namespace {
       endian::Writer LE(Out, little);
       uint64_t Start = Out.tell(); (void)Start;
 
-      unsigned char Flags = (Data.HFI.isImport << 5)
+      unsigned char Flags = (Data.AlreadyIncluded << 6)
+                          | (Data.HFI.isImport << 5)
                           | (Data.HFI.isPragmaOnce << 4)
                           | (Data.HFI.DirInfo << 1)
                           | Data.HFI.IndexHeaderMapHeader;
@@ -1909,7 +1910,7 @@ void ASTWriter::WriteHeaderSearch(const HeaderSearch &HS) 
{
         HeaderFileInfoTrait::key_type Key = {
             FilenameDup, *U.Size, IncludeTimestamps ? *U.ModTime : 0};
         HeaderFileInfoTrait::data_type Data = {
-            Empty, {}, {M, ModuleMap::headerKindToRole(U.Kind)}};
+            Empty, false, {}, {M, ModuleMap::headerKindToRole(U.Kind)}};
         // FIXME: Deal with cases where there are multiple unresolved header
         // directives in 
diff erent submodules for the same header.
         Generator.insert(Key, Data, GeneratorTrait);
@@ -1952,11 +1953,13 @@ void ASTWriter::WriteHeaderSearch(const HeaderSearch 
&HS) {
       SavedStrings.push_back(Filename.data());
     }
 
+    bool Included = PP->alreadyIncluded(File);
+
     HeaderFileInfoTrait::key_type Key = {
       Filename, File->getSize(), getTimestampForOutput(File)
     };
     HeaderFileInfoTrait::data_type Data = {
-      *HFI, HS.getModuleMap().findResolvedModulesForHeader(File), {}
+      *HFI, Included, HS.getModuleMap().findResolvedModulesForHeader(File), {}
     };
     Generator.insert(Key, Data, GeneratorTrait);
     ++NumHeaderSearchEntries;
@@ -2262,29 +2265,6 @@ static bool shouldIgnoreMacro(MacroDirective *MD, bool 
IsModule,
   return false;
 }
 
-void ASTWriter::writeIncludedFiles(raw_ostream &Out, const Preprocessor &PP) {
-  using namespace llvm::support;
-
-  const Preprocessor::IncludedFilesSet &IncludedFiles = PP.getIncludedFiles();
-
-  std::vector<uint32_t> IncludedInputFileIDs;
-  IncludedInputFileIDs.reserve(IncludedFiles.size());
-
-  for (const FileEntry *File : IncludedFiles) {
-    auto InputFileIt = InputFileIDs.find(File);
-    if (InputFileIt == InputFileIDs.end())
-      continue;
-    IncludedInputFileIDs.push_back(InputFileIt->second);
-  }
-
-  llvm::sort(IncludedInputFileIDs);
-
-  endian::Writer LE(Out, little);
-  LE.write<uint32_t>(IncludedInputFileIDs.size());
-  for (uint32_t ID : IncludedInputFileIDs)
-    LE.write<uint32_t>(ID);
-}
-
 /// Writes the block containing the serialized form of the
 /// preprocessor.
 void ASTWriter::WritePreprocessor(const Preprocessor &PP, bool IsModule) {
@@ -2533,20 +2513,6 @@ void ASTWriter::WritePreprocessor(const Preprocessor 
&PP, bool IsModule) {
                                        MacroOffsetsBase - ASTBlockStartOffset};
     Stream.EmitRecordWithBlob(MacroOffsetAbbrev, Record, bytes(MacroOffsets));
   }
-
-  {
-    auto Abbrev = std::make_shared<BitCodeAbbrev>();
-    Abbrev->Add(BitCodeAbbrevOp(PP_INCLUDED_FILES));
-    Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
-    unsigned IncludedFilesAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
-
-    SmallString<2048> Buffer;
-    raw_svector_ostream Out(Buffer);
-    writeIncludedFiles(Out, PP);
-    RecordData::value_type Record[] = {PP_INCLUDED_FILES};
-    Stream.EmitRecordWithBlob(IncludedFilesAbbrev, Record, Buffer.data(),
-                              Buffer.size());
-  }
 }
 
 void ASTWriter::WritePreprocessorDetail(PreprocessingRecord &PPRec,


        
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to