[PATCH] D155131: [clang][modules] Deserialize included files lazily

2023-07-13 Thread Jan Svoboda via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG6504d87fc0c8: [clang][modules] Deserialize included files 
lazily (authored by jansvoboda11).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155131/new/

https://reviews.llvm.org/D155131

Files:
  clang/include/clang/Lex/Preprocessor.h
  clang/include/clang/Serialization/ASTBitCodes.h
  clang/include/clang/Serialization/ASTReader.h
  clang/include/clang/Serialization/ASTWriter.h
  clang/lib/Serialization/ASTReader.cpp
  clang/lib/Serialization/ASTReaderInternals.h
  clang/lib/Serialization/ASTWriter.cpp

Index: clang/lib/Serialization/ASTWriter.cpp
===
--- clang/lib/Serialization/ASTWriter.cpp
+++ clang/lib/Serialization/ASTWriter.cpp
@@ -866,7 +866,6 @@
   RECORD(CUDA_PRAGMA_FORCE_HOST_DEVICE_DEPTH);
   RECORD(PP_CONDITIONAL_STACK);
   RECORD(DECLS_TO_CHECK_FOR_DEFERRED_DIAGS);
-  RECORD(PP_INCLUDED_FILES);
   RECORD(PP_ASSUME_NONNULL_LOC);
 
   // SourceManager Block.
@@ -1763,6 +1762,7 @@
 
 struct data_type {
   const HeaderFileInfo 
+  bool AlreadyIncluded;
   ArrayRef KnownHeaders;
   UnresolvedModule Unresolved;
 };
@@ -1808,7 +1808,8 @@
   endian::Writer LE(Out, little);
   uint64_t Start = Out.tell(); (void)Start;
 
-  unsigned char Flags = (Data.HFI.isImport << 5)
+  unsigned char Flags = (Data.AlreadyIncluded << 6)
+  | (Data.HFI.isImport << 5)
   | (Data.HFI.isPragmaOnce << 4)
   | (Data.HFI.DirInfo << 1)
   | Data.HFI.IndexHeaderMapHeader;
@@ -1909,7 +1910,7 @@
 HeaderFileInfoTrait::key_type Key = {
 FilenameDup, *U.Size, IncludeTimestamps ? *U.ModTime : 0};
 HeaderFileInfoTrait::data_type Data = {
-Empty, {}, {M, ModuleMap::headerKindToRole(U.Kind)}};
+Empty, false, {}, {M, ModuleMap::headerKindToRole(U.Kind)}};
 // FIXME: Deal with cases where there are multiple unresolved header
 // directives in different submodules for the same header.
 Generator.insert(Key, Data, GeneratorTrait);
@@ -1952,11 +1953,13 @@
   SavedStrings.push_back(Filename.data());
 }
 
+bool Included = PP->alreadyIncluded(File);
+
 HeaderFileInfoTrait::key_type Key = {
   Filename, File->getSize(), getTimestampForOutput(File)
 };
 HeaderFileInfoTrait::data_type Data = {
-  *HFI, HS.getModuleMap().findResolvedModulesForHeader(File), {}
+  *HFI, Included, HS.getModuleMap().findResolvedModulesForHeader(File), {}
 };
 Generator.insert(Key, Data, GeneratorTrait);
 ++NumHeaderSearchEntries;
@@ -2262,29 +2265,6 @@
   return false;
 }
 
-void ASTWriter::writeIncludedFiles(raw_ostream , const Preprocessor ) {
-  using namespace llvm::support;
-
-  const Preprocessor::IncludedFilesSet  = PP.getIncludedFiles();
-
-  std::vector IncludedInputFileIDs;
-  IncludedInputFileIDs.reserve(IncludedFiles.size());
-
-  for (const FileEntry *File : IncludedFiles) {
-auto InputFileIt = InputFileIDs.find(File);
-if (InputFileIt == InputFileIDs.end())
-  continue;
-IncludedInputFileIDs.push_back(InputFileIt->second);
-  }
-
-  llvm::sort(IncludedInputFileIDs);
-
-  endian::Writer LE(Out, little);
-  LE.write(IncludedInputFileIDs.size());
-  for (uint32_t ID : IncludedInputFileIDs)
-LE.write(ID);
-}
-
 /// Writes the block containing the serialized form of the
 /// preprocessor.
 void ASTWriter::WritePreprocessor(const Preprocessor , bool IsModule) {
@@ -2533,20 +2513,6 @@
MacroOffsetsBase - ASTBlockStartOffset};
 Stream.EmitRecordWithBlob(MacroOffsetAbbrev, Record, bytes(MacroOffsets));
   }
-
-  {
-auto Abbrev = std::make_shared();
-Abbrev->Add(BitCodeAbbrevOp(PP_INCLUDED_FILES));
-Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
-unsigned IncludedFilesAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
-
-SmallString<2048> Buffer;
-raw_svector_ostream Out(Buffer);
-writeIncludedFiles(Out, PP);
-RecordData::value_type Record[] = {PP_INCLUDED_FILES};
-Stream.EmitRecordWithBlob(IncludedFilesAbbrev, Record, Buffer.data(),
-  Buffer.size());
-  }
 }
 
 void ASTWriter::WritePreprocessorDetail(PreprocessingRecord ,
Index: clang/lib/Serialization/ASTReaderInternals.h
===
--- clang/lib/Serialization/ASTReaderInternals.h
+++ clang/lib/Serialization/ASTReaderInternals.h
@@ -276,6 +276,9 @@
   static internal_key_type ReadKey(const unsigned char *d, unsigned);
 
   data_type ReadData(internal_key_ref,const unsigned char *d, unsigned DataLen);
+
+private:
+  const FileEntry *getFile(const internal_key_type );
 };
 
 /// The on-disk hash table used for known 

[PATCH] D155131: [clang][modules] Deserialize included files lazily

2023-07-13 Thread Jan Svoboda via Phabricator via cfe-commits
jansvoboda11 updated this revision to Diff 540174.
jansvoboda11 added a comment.
Herald added a subscriber: mgrang.

Remove dead code, make sure `getFileInfo()` is called in `alreadyIncluded()`.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155131/new/

https://reviews.llvm.org/D155131

Files:
  clang/include/clang/Lex/Preprocessor.h
  clang/include/clang/Serialization/ASTBitCodes.h
  clang/include/clang/Serialization/ASTReader.h
  clang/include/clang/Serialization/ASTWriter.h
  clang/lib/Serialization/ASTReader.cpp
  clang/lib/Serialization/ASTReaderInternals.h
  clang/lib/Serialization/ASTWriter.cpp

Index: clang/lib/Serialization/ASTWriter.cpp
===
--- clang/lib/Serialization/ASTWriter.cpp
+++ clang/lib/Serialization/ASTWriter.cpp
@@ -866,7 +866,6 @@
   RECORD(CUDA_PRAGMA_FORCE_HOST_DEVICE_DEPTH);
   RECORD(PP_CONDITIONAL_STACK);
   RECORD(DECLS_TO_CHECK_FOR_DEFERRED_DIAGS);
-  RECORD(PP_INCLUDED_FILES);
   RECORD(PP_ASSUME_NONNULL_LOC);
 
   // SourceManager Block.
@@ -1763,6 +1762,7 @@
 
 struct data_type {
   const HeaderFileInfo 
+  bool AlreadyIncluded;
   ArrayRef KnownHeaders;
   UnresolvedModule Unresolved;
 };
@@ -1808,7 +1808,8 @@
   endian::Writer LE(Out, little);
   uint64_t Start = Out.tell(); (void)Start;
 
-  unsigned char Flags = (Data.HFI.isImport << 5)
+  unsigned char Flags = (Data.AlreadyIncluded << 6)
+  | (Data.HFI.isImport << 5)
   | (Data.HFI.isPragmaOnce << 4)
   | (Data.HFI.DirInfo << 1)
   | Data.HFI.IndexHeaderMapHeader;
@@ -1909,7 +1910,7 @@
 HeaderFileInfoTrait::key_type Key = {
 FilenameDup, *U.Size, IncludeTimestamps ? *U.ModTime : 0};
 HeaderFileInfoTrait::data_type Data = {
-Empty, {}, {M, ModuleMap::headerKindToRole(U.Kind)}};
+Empty, false, {}, {M, ModuleMap::headerKindToRole(U.Kind)}};
 // FIXME: Deal with cases where there are multiple unresolved header
 // directives in different submodules for the same header.
 Generator.insert(Key, Data, GeneratorTrait);
@@ -1952,11 +1953,13 @@
   SavedStrings.push_back(Filename.data());
 }
 
+bool Included = PP->alreadyIncluded(File);
+
 HeaderFileInfoTrait::key_type Key = {
   Filename, File->getSize(), getTimestampForOutput(File)
 };
 HeaderFileInfoTrait::data_type Data = {
-  *HFI, HS.getModuleMap().findResolvedModulesForHeader(File), {}
+  *HFI, Included, HS.getModuleMap().findResolvedModulesForHeader(File), {}
 };
 Generator.insert(Key, Data, GeneratorTrait);
 ++NumHeaderSearchEntries;
@@ -2262,29 +2265,6 @@
   return false;
 }
 
-void ASTWriter::writeIncludedFiles(raw_ostream , const Preprocessor ) {
-  using namespace llvm::support;
-
-  const Preprocessor::IncludedFilesSet  = PP.getIncludedFiles();
-
-  std::vector IncludedInputFileIDs;
-  IncludedInputFileIDs.reserve(IncludedFiles.size());
-
-  for (const FileEntry *File : IncludedFiles) {
-auto InputFileIt = InputFileIDs.find(File);
-if (InputFileIt == InputFileIDs.end())
-  continue;
-IncludedInputFileIDs.push_back(InputFileIt->second);
-  }
-
-  llvm::sort(IncludedInputFileIDs);
-
-  endian::Writer LE(Out, little);
-  LE.write(IncludedInputFileIDs.size());
-  for (uint32_t ID : IncludedInputFileIDs)
-LE.write(ID);
-}
-
 /// Writes the block containing the serialized form of the
 /// preprocessor.
 void ASTWriter::WritePreprocessor(const Preprocessor , bool IsModule) {
@@ -2533,20 +2513,6 @@
MacroOffsetsBase - ASTBlockStartOffset};
 Stream.EmitRecordWithBlob(MacroOffsetAbbrev, Record, bytes(MacroOffsets));
   }
-
-  {
-auto Abbrev = std::make_shared();
-Abbrev->Add(BitCodeAbbrevOp(PP_INCLUDED_FILES));
-Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
-unsigned IncludedFilesAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
-
-SmallString<2048> Buffer;
-raw_svector_ostream Out(Buffer);
-writeIncludedFiles(Out, PP);
-RecordData::value_type Record[] = {PP_INCLUDED_FILES};
-Stream.EmitRecordWithBlob(IncludedFilesAbbrev, Record, Buffer.data(),
-  Buffer.size());
-  }
 }
 
 void ASTWriter::WritePreprocessorDetail(PreprocessingRecord ,
Index: clang/lib/Serialization/ASTReaderInternals.h
===
--- clang/lib/Serialization/ASTReaderInternals.h
+++ clang/lib/Serialization/ASTReaderInternals.h
@@ -276,6 +276,9 @@
   static internal_key_type ReadKey(const unsigned char *d, unsigned);
 
   data_type ReadData(internal_key_ref,const unsigned char *d, unsigned DataLen);
+
+private:
+  const FileEntry *getFile(const internal_key_type );
 };
 
 /// The on-disk hash table used for known header files.
Index: 

[PATCH] D155131: [clang][modules] Deserialize included files lazily

2023-07-13 Thread Jan Svoboda via Phabricator via cfe-commits
jansvoboda11 added a comment.

In D155131#4495489 , @benlangmuir 
wrote:

> Now that it's not eagerly deserialized, should 
> `Preprocessor::alreadyIncluded` call `HeaderInfo.getFileInfo(File)` to ensure 
> the information is up to date?

It should, but `Preprocessor::alreadyIncluded()` is only called from 
`HeaderSearch::ShouldEnterIncludeFile()` and 
`Preprocessor::HandleHeaderIncludeOrImport()`, where 
`HeaderSearch::getFileInfo(File)` has already been called. But I agree it would 
be better to ensure that within `Preprocessor::alreadyIncluded()` itself. I'll 
try to include that in the next revision.

> Similarly, we expose the list of files in `Preprocessor::getIncludedFiles` -- 
> is it okay if this list is incomplete?

That should be okay. This function only needs to return files included in the 
current module compilation, not all transitive includes.




Comment at: clang/lib/Serialization/ASTReader.cpp:1947
+if (const FileEntry *FE = getFile(key))
+  Reader.getPreprocessor().getIncludedFiles().insert(FE);
+

benlangmuir wrote:
> `Reader.getPreprocessor().markIncluded`?
That would trigger infinite recursion, since that calls `getFileInfo()` which 
attempts to deserialize.



Comment at: clang/lib/Serialization/ASTWriter.cpp:2545
-raw_svector_ostream Out(Buffer);
-writeIncludedFiles(Out, PP);
-RecordData::value_type Record[] = {PP_INCLUDED_FILES};

benlangmuir wrote:
> Can we remove `ASTWriter::writeIncludedFiles` now?
Yes, forgot about that, thanks.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155131/new/

https://reviews.llvm.org/D155131

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D155131: [clang][modules] Deserialize included files lazily

2023-07-12 Thread Ben Langmuir via Phabricator via cfe-commits
benlangmuir added a comment.

Now that it's not eagerly deserialized, should `Preprocessor::alreadyIncluded` 
call `HeaderInfo.getFileInfo(File)` to ensure the information is up to date?  
Similarly, we expose the list of files in `Preprocessor::getIncludedFiles` -- 
is it okay if this list is incomplete?




Comment at: clang/lib/Serialization/ASTReader.cpp:1947
+if (const FileEntry *FE = getFile(key))
+  Reader.getPreprocessor().getIncludedFiles().insert(FE);
+

`Reader.getPreprocessor().markIncluded`?



Comment at: clang/lib/Serialization/ASTWriter.cpp:2545
-raw_svector_ostream Out(Buffer);
-writeIncludedFiles(Out, PP);
-RecordData::value_type Record[] = {PP_INCLUDED_FILES};

Can we remove `ASTWriter::writeIncludedFiles` now?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155131/new/

https://reviews.llvm.org/D155131

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D155131: [clang][modules] Deserialize included files lazily

2023-07-12 Thread Jan Svoboda via Phabricator via cfe-commits
jansvoboda11 created this revision.
jansvoboda11 added reviewers: benlangmuir, vsapsai, Bigcheese.
Herald added a subscriber: ributzka.
Herald added a project: All.
jansvoboda11 requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

In D114095 , `HeaderFileInfo::NumIncludes` 
was moved into `Preprocessor`. This still makes sense, because we want to track 
this on the granularity of submodules (D112915 
, D114173 
), but the way this information is serialized 
is not ideal. In `ASTWriter`, the set of included files gets deserialized 
eagerly, issuing lots of calls to `FileManager::getFile()` for input files the 
PCM consumer might not be interested in.

This patch makes the information part of the header file info table, taking 
advantage of its lazy deserialization which typically happens when a file is 
about to be included.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D155131

Files:
  clang/include/clang/Serialization/ASTBitCodes.h
  clang/include/clang/Serialization/ASTReader.h
  clang/lib/Serialization/ASTReader.cpp
  clang/lib/Serialization/ASTReaderInternals.h
  clang/lib/Serialization/ASTWriter.cpp

Index: clang/lib/Serialization/ASTWriter.cpp
===
--- clang/lib/Serialization/ASTWriter.cpp
+++ clang/lib/Serialization/ASTWriter.cpp
@@ -866,7 +866,6 @@
   RECORD(CUDA_PRAGMA_FORCE_HOST_DEVICE_DEPTH);
   RECORD(PP_CONDITIONAL_STACK);
   RECORD(DECLS_TO_CHECK_FOR_DEFERRED_DIAGS);
-  RECORD(PP_INCLUDED_FILES);
   RECORD(PP_ASSUME_NONNULL_LOC);
 
   // SourceManager Block.
@@ -1763,6 +1762,7 @@
 
 struct data_type {
   const HeaderFileInfo 
+  bool AlreadyIncluded;
   ArrayRef KnownHeaders;
   UnresolvedModule Unresolved;
 };
@@ -1808,7 +1808,8 @@
   endian::Writer LE(Out, little);
   uint64_t Start = Out.tell(); (void)Start;
 
-  unsigned char Flags = (Data.HFI.isImport << 5)
+  unsigned char Flags = (Data.AlreadyIncluded << 6)
+  | (Data.HFI.isImport << 5)
   | (Data.HFI.isPragmaOnce << 4)
   | (Data.HFI.DirInfo << 1)
   | Data.HFI.IndexHeaderMapHeader;
@@ -1909,7 +1910,7 @@
 HeaderFileInfoTrait::key_type Key = {
 FilenameDup, *U.Size, IncludeTimestamps ? *U.ModTime : 0};
 HeaderFileInfoTrait::data_type Data = {
-Empty, {}, {M, ModuleMap::headerKindToRole(U.Kind)}};
+Empty, false, {}, {M, ModuleMap::headerKindToRole(U.Kind)}};
 // FIXME: Deal with cases where there are multiple unresolved header
 // directives in different submodules for the same header.
 Generator.insert(Key, Data, GeneratorTrait);
@@ -1952,11 +1953,13 @@
   SavedStrings.push_back(Filename.data());
 }
 
+bool Included = PP->alreadyIncluded(File);
+
 HeaderFileInfoTrait::key_type Key = {
   Filename, File->getSize(), getTimestampForOutput(File)
 };
 HeaderFileInfoTrait::data_type Data = {
-  *HFI, HS.getModuleMap().findResolvedModulesForHeader(File), {}
+  *HFI, Included, HS.getModuleMap().findResolvedModulesForHeader(File), {}
 };
 Generator.insert(Key, Data, GeneratorTrait);
 ++NumHeaderSearchEntries;
@@ -2533,20 +2536,6 @@
MacroOffsetsBase - ASTBlockStartOffset};
 Stream.EmitRecordWithBlob(MacroOffsetAbbrev, Record, bytes(MacroOffsets));
   }
-
-  {
-auto Abbrev = std::make_shared();
-Abbrev->Add(BitCodeAbbrevOp(PP_INCLUDED_FILES));
-Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
-unsigned IncludedFilesAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
-
-SmallString<2048> Buffer;
-raw_svector_ostream Out(Buffer);
-writeIncludedFiles(Out, PP);
-RecordData::value_type Record[] = {PP_INCLUDED_FILES};
-Stream.EmitRecordWithBlob(IncludedFilesAbbrev, Record, Buffer.data(),
-  Buffer.size());
-  }
 }
 
 void ASTWriter::WritePreprocessorDetail(PreprocessingRecord ,
Index: clang/lib/Serialization/ASTReaderInternals.h
===
--- clang/lib/Serialization/ASTReaderInternals.h
+++ clang/lib/Serialization/ASTReaderInternals.h
@@ -276,6 +276,9 @@
   static internal_key_type ReadKey(const unsigned char *d, unsigned);
 
   data_type ReadData(internal_key_ref,const unsigned char *d, unsigned DataLen);
+
+private:
+  const FileEntry *getFile(const internal_key_type );
 };
 
 /// The on-disk hash table used for known header files.
Index: clang/lib/Serialization/ASTReader.cpp
===
--- clang/lib/Serialization/ASTReader.cpp
+++ clang/lib/Serialization/ASTReader.cpp
@@ -1875,6 +1875,21 @@
   return LocalID + I->second;
 }
 
+const