llvmbot wrote:

<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang-modules

<details>
<summary>Changes</summary>

In `getFileID()` the `SourceManager` ends up doing a binary search over its 
buffer of `SLocEntries`. For modules, this binary search fully deserializes the 
entire `SLocEntry` block for visited each entry. This shows up in profiles of 
the dependency scanner, since that operation includes decompressing buffers 
associated with some entries.

This patch moves the binary search over loaded entries into `ASTReader`, which 
now only performs partial deserialization during the binary search, speeding up 
the scanner by ~3.3%.


---
Full diff: https://github.com/llvm/llvm-project/pull/66966.diff


4 Files Affected:

- (modified) clang/include/clang/Basic/SourceManager.h (+3) 
- (modified) clang/include/clang/Serialization/ASTReader.h (+6) 
- (modified) clang/lib/Basic/SourceManager.cpp (+3-67) 
- (modified) clang/lib/Serialization/ASTReader.cpp (+63) 


``````````diff
diff --git a/clang/include/clang/Basic/SourceManager.h 
b/clang/include/clang/Basic/SourceManager.h
index 2f846502d6f3327..a4c7facddd53d64 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -533,6 +533,9 @@ class ExternalSLocEntrySource {
   /// entry from being loaded.
   virtual bool ReadSLocEntry(int ID) = 0;
 
+  /// Get the index ID for the loaded SourceLocation offset.
+  virtual int getSLocEntryID(SourceLocation::UIntTy SLocOffset) = 0;
+
   /// Retrieve the module import location and name for the given ID, if
   /// in fact it was loaded from a module (rather than, say, a precompiled
   /// header).
diff --git a/clang/include/clang/Serialization/ASTReader.h 
b/clang/include/clang/Serialization/ASTReader.h
index dc1eb21c27801fe..e643fcf4c930f09 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -2153,6 +2153,12 @@ class ASTReader
 
   /// Read the source location entry with index ID.
   bool ReadSLocEntry(int ID) override;
+  /// Get the index ID for the loaded SourceLocation offset.
+  int getSLocEntryID(SourceLocation::UIntTy SLocOffset) override;
+  /// Read the offset of the SLocEntry at the given index in the given module
+  /// file.
+  std::optional<SourceLocation::UIntTy> readSLocOffset(ModuleFile *F,
+                                                       unsigned Index);
 
   /// Retrieve the module import location and module name for the
   /// given source manager entry ID.
diff --git a/clang/lib/Basic/SourceManager.cpp 
b/clang/lib/Basic/SourceManager.cpp
index 0521ac7b30339ab..f881afc2e46c5c6 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -864,74 +864,10 @@ FileID 
SourceManager::getFileIDLocal(SourceLocation::UIntTy SLocOffset) const {
 /// This function knows that the SourceLocation is in a loaded buffer, not a
 /// local one.
 FileID SourceManager::getFileIDLoaded(SourceLocation::UIntTy SLocOffset) const 
{
-  if (SLocOffset < CurrentLoadedOffset) {
-    assert(0 && "Invalid SLocOffset or bad function choice");
-    return FileID();
-  }
-
-  // Essentially the same as the local case, but the loaded array is sorted
-  // in the other direction (decreasing order).
-  // GreaterIndex is the one where the offset is greater, which is actually a
-  // lower index!
-  unsigned GreaterIndex = 0;
-  unsigned LessIndex = LoadedSLocEntryTable.size();
-  if (LastFileIDLookup.ID < 0) {
-    // Prune the search space.
-    int LastID = LastFileIDLookup.ID;
-    if (getLoadedSLocEntryByID(LastID).getOffset() > SLocOffset)
-      GreaterIndex =
-          (-LastID - 2) + 1; // Exclude LastID, else we would have hit the 
cache
-    else
-      LessIndex = -LastID - 2;
-  }
-
-  // First do a linear scan from the last lookup position, if possible.
-  unsigned NumProbes;
+  int ID = ExternalSLocEntries->getSLocEntryID(SLocOffset);
   bool Invalid = false;
-  for (NumProbes = 0; NumProbes < 8; ++NumProbes, ++GreaterIndex) {
-    // Make sure the entry is loaded!
-    const SrcMgr::SLocEntry &E = getLoadedSLocEntry(GreaterIndex, &Invalid);
-    if (Invalid)
-      return FileID(); // invalid entry.
-    if (E.getOffset() <= SLocOffset) {
-      FileID Res = FileID::get(-int(GreaterIndex) - 2);
-      LastFileIDLookup = Res;
-      NumLinearScans += NumProbes + 1;
-      return Res;
-    }
-  }
-
-  // Linear scan failed. Do the binary search.
-  NumProbes = 0;
-  while (true) {
-    ++NumProbes;
-    unsigned MiddleIndex = (LessIndex - GreaterIndex) / 2 + GreaterIndex;
-    const SrcMgr::SLocEntry &E = getLoadedSLocEntry(MiddleIndex, &Invalid);
-    if (Invalid)
-      return FileID(); // invalid entry.
-
-    if (E.getOffset() > SLocOffset) {
-      if (GreaterIndex == MiddleIndex) {
-        assert(0 && "binary search missed the entry");
-        return FileID();
-      }
-      GreaterIndex = MiddleIndex;
-      continue;
-    }
-
-    if (isOffsetInFileID(FileID::get(-int(MiddleIndex) - 2), SLocOffset)) {
-      FileID Res = FileID::get(-int(MiddleIndex) - 2);
-      LastFileIDLookup = Res;
-      NumBinaryProbes += NumProbes;
-      return Res;
-    }
-
-    if (LessIndex == MiddleIndex) {
-      assert(0 && "binary search missed the entry");
-      return FileID();
-    }
-    LessIndex = MiddleIndex;
-  }
+  (void)getLoadedSLocEntryByID(ID, &Invalid);
+  return Invalid ? FileID() : FileID::get(ID);
 }
 
 SourceLocation SourceManager::
diff --git a/clang/lib/Serialization/ASTReader.cpp 
b/clang/lib/Serialization/ASTReader.cpp
index 0952244d037a77c..fdf89dce41aab4d 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -1444,6 +1444,69 @@ llvm::Error ASTReader::ReadSourceManagerBlock(ModuleFile 
&F) {
   }
 }
 
+std::optional<SourceLocation::UIntTy>
+ASTReader::readSLocOffset(ModuleFile *F, unsigned Index) {
+  BitstreamCursor &Cursor = F->SLocEntryCursor;
+  SavedStreamPosition SavedPosition(Cursor);
+  if (llvm::Error Err = Cursor.JumpToBit(F->SLocEntryOffsetsBase +
+                                         F->SLocEntryOffsets[Index])) {
+    Error(std::move(Err));
+    return std::nullopt;
+  }
+
+  Expected<llvm::BitstreamEntry> MaybeEntry = Cursor.advance();
+  if (!MaybeEntry) {
+    Error(MaybeEntry.takeError());
+    return std::nullopt;
+  }
+  llvm::BitstreamEntry Entry = MaybeEntry.get();
+
+  if (Entry.Kind != llvm::BitstreamEntry::Record) {
+    Error("incorrectly-formatted source location entry in AST file");
+    return std::nullopt;
+  }
+
+  RecordData Record;
+  StringRef Blob;
+  Expected<unsigned> MaybeSLOC = Cursor.readRecord(Entry.ID, Record, &Blob);
+  if (!MaybeSLOC) {
+    Error(MaybeSLOC.takeError());
+    return std::nullopt;
+  }
+  switch (MaybeSLOC.get()) {
+  default:
+    Error("incorrectly-formatted source location entry in AST file");
+    return std::nullopt;
+  case SM_SLOC_FILE_ENTRY:
+  case SM_SLOC_BUFFER_ENTRY:
+  case SM_SLOC_EXPANSION_ENTRY:
+    return F->SLocEntryBaseOffset + Record[0];
+  }
+}
+
+int ASTReader::getSLocEntryID(SourceLocation::UIntTy SLocOffset) {
+  auto SLocMapI =
+      GlobalSLocOffsetMap.find(SourceManager::MaxLoadedOffset - SLocOffset - 
1);
+  assert(SLocMapI != GlobalSLocOffsetMap.end() &&
+         "Corrupted global sloc offset map");
+  ModuleFile *F = SLocMapI->second;
+
+  std::vector<unsigned> Indices(F->LocalNumSLocEntries);
+  for (unsigned I = 0; I != F->LocalNumSLocEntries; ++I)
+    Indices[I] = I;
+
+  auto It = llvm::upper_bound(Indices, SLocOffset,
+                    [&](SourceLocation::UIntTy Offset, unsigned Index) {
+                      auto EntryOffset = readSLocOffset(F, Index);
+                      assert(EntryOffset && "Corrupted AST file");
+                      return Offset < *EntryOffset;
+                    });
+  // The iterator points to the first entry with start offset greater than the
+  // offset of interest. The previous entry must contain the offset of 
interest.
+  It = std::prev(It);
+  return F->SLocEntryBaseID + *It;
+}
+
 bool ASTReader::ReadSLocEntry(int ID) {
   if (ID == 0)
     return false;

``````````

</details>


https://github.com/llvm/llvm-project/pull/66966
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to