[clang] [clang][modules] Move `SLocEntry` search into `ASTReader` (PR #66966)
https://github.com/jansvoboda11 closed https://github.com/llvm/llvm-project/pull/66966 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][modules] Move `SLocEntry` search into `ASTReader` (PR #66966)
https://github.com/benlangmuir approved this pull request. Latest change to shrink memory use LGTM. https://github.com/llvm/llvm-project/pull/66966 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][modules] Move `SLocEntry` search into `ASTReader` (PR #66966)
@@ -1444,6 +1444,74 @@ llvm::Error ASTReader::ReadSourceManagerBlock(ModuleFile ) { } } +llvm::Expected +ASTReader::readSLocOffset(ModuleFile *F, unsigned Index) { + BitstreamCursor = F->SLocEntryCursor; + SavedStreamPosition SavedPosition(Cursor); + if (llvm::Error Err = Cursor.JumpToBit(F->SLocEntryOffsetsBase + + F->SLocEntryOffsets[Index])) +return Err; jansvoboda11 wrote: Thanks for the heads-up, I'll keep that in mind if bots start failing. I think we recently bumped compiler requirements, so we might get away with this. https://github.com/llvm/llvm-project/pull/66966 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][modules] Move `SLocEntry` search into `ASTReader` (PR #66966)
https://github.com/jansvoboda11 updated https://github.com/llvm/llvm-project/pull/66966 >From 4edf9d8559339a12108d9c4d1e2f3bb062a5a768 Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Wed, 20 Sep 2023 17:30:45 -0700 Subject: [PATCH 1/9] [clang][modules] Move `SLocEntry` search into `ASTReader` In `getFileID()` the `SourceManager` ends up doing a binary search over its buffer of `SLocEntries`. For modules, this binary search fully deserializes the entire `SLocEntry` block for visited each entry. This shows up in profiles of the dependency scanner, since that operation includes decompressing buffers associated with some entries. This patch moves the binary search over loaded entries into `ASTReader`, which now only performs partial deserialization during the binary search, speeding up the scanner by ~3.3%. --- clang/include/clang/Basic/SourceManager.h | 3 + clang/include/clang/Serialization/ASTReader.h | 6 ++ clang/lib/Basic/SourceManager.cpp | 70 +-- clang/lib/Serialization/ASTReader.cpp | 63 + 4 files changed, 75 insertions(+), 67 deletions(-) diff --git a/clang/include/clang/Basic/SourceManager.h b/clang/include/clang/Basic/SourceManager.h index 2f846502d6f3327..a4c7facddd53d64 100644 --- a/clang/include/clang/Basic/SourceManager.h +++ b/clang/include/clang/Basic/SourceManager.h @@ -533,6 +533,9 @@ class ExternalSLocEntrySource { /// entry from being loaded. virtual bool ReadSLocEntry(int ID) = 0; + /// Get the index ID for the loaded SourceLocation offset. + virtual int getSLocEntryID(SourceLocation::UIntTy SLocOffset) = 0; + /// Retrieve the module import location and name for the given ID, if /// in fact it was loaded from a module (rather than, say, a precompiled /// header). diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index dc1eb21c27801fe..e643fcf4c930f09 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -2153,6 +2153,12 @@ class ASTReader /// Read the source location entry with index ID. bool ReadSLocEntry(int ID) override; + /// Get the index ID for the loaded SourceLocation offset. + int getSLocEntryID(SourceLocation::UIntTy SLocOffset) override; + /// Read the offset of the SLocEntry at the given index in the given module + /// file. + std::optional readSLocOffset(ModuleFile *F, + unsigned Index); /// Retrieve the module import location and module name for the /// given source manager entry ID. diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp index 0521ac7b30339ab..f881afc2e46c5c6 100644 --- a/clang/lib/Basic/SourceManager.cpp +++ b/clang/lib/Basic/SourceManager.cpp @@ -864,74 +864,10 @@ FileID SourceManager::getFileIDLocal(SourceLocation::UIntTy SLocOffset) const { /// This function knows that the SourceLocation is in a loaded buffer, not a /// local one. FileID SourceManager::getFileIDLoaded(SourceLocation::UIntTy SLocOffset) const { - if (SLocOffset < CurrentLoadedOffset) { -assert(0 && "Invalid SLocOffset or bad function choice"); -return FileID(); - } - - // Essentially the same as the local case, but the loaded array is sorted - // in the other direction (decreasing order). - // GreaterIndex is the one where the offset is greater, which is actually a - // lower index! - unsigned GreaterIndex = 0; - unsigned LessIndex = LoadedSLocEntryTable.size(); - if (LastFileIDLookup.ID < 0) { -// Prune the search space. -int LastID = LastFileIDLookup.ID; -if (getLoadedSLocEntryByID(LastID).getOffset() > SLocOffset) - GreaterIndex = - (-LastID - 2) + 1; // Exclude LastID, else we would have hit the cache -else - LessIndex = -LastID - 2; - } - - // First do a linear scan from the last lookup position, if possible. - unsigned NumProbes; + int ID = ExternalSLocEntries->getSLocEntryID(SLocOffset); bool Invalid = false; - for (NumProbes = 0; NumProbes < 8; ++NumProbes, ++GreaterIndex) { -// Make sure the entry is loaded! -const SrcMgr::SLocEntry = getLoadedSLocEntry(GreaterIndex, ); -if (Invalid) - return FileID(); // invalid entry. -if (E.getOffset() <= SLocOffset) { - FileID Res = FileID::get(-int(GreaterIndex) - 2); - LastFileIDLookup = Res; - NumLinearScans += NumProbes + 1; - return Res; -} - } - - // Linear scan failed. Do the binary search. - NumProbes = 0; - while (true) { -++NumProbes; -unsigned MiddleIndex = (LessIndex - GreaterIndex) / 2 + GreaterIndex; -const SrcMgr::SLocEntry = getLoadedSLocEntry(MiddleIndex, ); -if (Invalid) - return FileID(); // invalid entry. - -if (E.getOffset() > SLocOffset) { - if (GreaterIndex == MiddleIndex) { -assert(0 && "binary search missed the entry"); -return FileID(); -
[clang] [clang][modules] Move `SLocEntry` search into `ASTReader` (PR #66966)
@@ -288,10 +288,12 @@ class ModuleFile { /// for the entry is SLocEntryOffsetsBase + SLocEntryOffsets[i]. uint64_t SLocEntryOffsetsBase = 0; - /// Offsets for all of the source location entries in the - /// AST file. + /// Stream bit offsets for all of the source location entries in the AST file. const uint32_t *SLocEntryOffsets = nullptr; + /// SLocEntry offsets that have been loaded from the AST file. + std::vector SLocEntryOffsetLoaded; benlangmuir wrote: I don't feel that strongly about 10 MB in a large TU; your suggestion seems reasonable, but could be a follow up. https://github.com/llvm/llvm-project/pull/66966 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][modules] Move `SLocEntry` search into `ASTReader` (PR #66966)
https://github.com/benlangmuir approved this pull request. LGTM; you might need a `std::move` in `readSLocOffset` to appease an older compiler. https://github.com/llvm/llvm-project/pull/66966 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][modules] Move `SLocEntry` search into `ASTReader` (PR #66966)
@@ -1444,6 +1444,74 @@ llvm::Error ASTReader::ReadSourceManagerBlock(ModuleFile ) { } } +llvm::Expected +ASTReader::readSLocOffset(ModuleFile *F, unsigned Index) { + BitstreamCursor = F->SLocEntryCursor; + SavedStreamPosition SavedPosition(Cursor); + if (llvm::Error Err = Cursor.JumpToBit(F->SLocEntryOffsetsBase + + F->SLocEntryOffsets[Index])) +return Err; benlangmuir wrote: I seem to recall that the Error & -> Expected conversion trips a compiler bug in some of our supported compilers. I think you need `std::move(Err)` when returning an l-value. https://github.com/llvm/llvm-project/pull/66966 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][modules] Move `SLocEntry` search into `ASTReader` (PR #66966)
https://github.com/benlangmuir edited https://github.com/llvm/llvm-project/pull/66966 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][modules] Move `SLocEntry` search into `ASTReader` (PR #66966)
https://github.com/benlangmuir edited https://github.com/llvm/llvm-project/pull/66966 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][modules] Move `SLocEntry` search into `ASTReader` (PR #66966)
https://github.com/benlangmuir approved this pull request. Thanks for iterating! I find the current implementation much clearer. The only thing I might quibble about is the "child" vs. "parent" terminology you changed: I think it's fairly ambiguous either way, because the node is the "child" from the perspective of a top-down include hierarchy, but it's the "parent" from the perspective of the bottom-up search. You could maybe change it to `IncludedFile` or something, but I don't feel very strongly about it. Child is no worse than parent so if you prefer child I don't think you need to change it. https://github.com/llvm/llvm-project/pull/66966 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][modules] Move `SLocEntry` search into `ASTReader` (PR #66966)
https://github.com/jansvoboda11 updated https://github.com/llvm/llvm-project/pull/66966 >From 4edf9d8559339a12108d9c4d1e2f3bb062a5a768 Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Wed, 20 Sep 2023 17:30:45 -0700 Subject: [PATCH 1/7] [clang][modules] Move `SLocEntry` search into `ASTReader` In `getFileID()` the `SourceManager` ends up doing a binary search over its buffer of `SLocEntries`. For modules, this binary search fully deserializes the entire `SLocEntry` block for visited each entry. This shows up in profiles of the dependency scanner, since that operation includes decompressing buffers associated with some entries. This patch moves the binary search over loaded entries into `ASTReader`, which now only performs partial deserialization during the binary search, speeding up the scanner by ~3.3%. --- clang/include/clang/Basic/SourceManager.h | 3 + clang/include/clang/Serialization/ASTReader.h | 6 ++ clang/lib/Basic/SourceManager.cpp | 70 +-- clang/lib/Serialization/ASTReader.cpp | 63 + 4 files changed, 75 insertions(+), 67 deletions(-) diff --git a/clang/include/clang/Basic/SourceManager.h b/clang/include/clang/Basic/SourceManager.h index 2f846502d6f3327..a4c7facddd53d64 100644 --- a/clang/include/clang/Basic/SourceManager.h +++ b/clang/include/clang/Basic/SourceManager.h @@ -533,6 +533,9 @@ class ExternalSLocEntrySource { /// entry from being loaded. virtual bool ReadSLocEntry(int ID) = 0; + /// Get the index ID for the loaded SourceLocation offset. + virtual int getSLocEntryID(SourceLocation::UIntTy SLocOffset) = 0; + /// Retrieve the module import location and name for the given ID, if /// in fact it was loaded from a module (rather than, say, a precompiled /// header). diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index dc1eb21c27801fe..e643fcf4c930f09 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -2153,6 +2153,12 @@ class ASTReader /// Read the source location entry with index ID. bool ReadSLocEntry(int ID) override; + /// Get the index ID for the loaded SourceLocation offset. + int getSLocEntryID(SourceLocation::UIntTy SLocOffset) override; + /// Read the offset of the SLocEntry at the given index in the given module + /// file. + std::optional readSLocOffset(ModuleFile *F, + unsigned Index); /// Retrieve the module import location and module name for the /// given source manager entry ID. diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp index 0521ac7b30339ab..f881afc2e46c5c6 100644 --- a/clang/lib/Basic/SourceManager.cpp +++ b/clang/lib/Basic/SourceManager.cpp @@ -864,74 +864,10 @@ FileID SourceManager::getFileIDLocal(SourceLocation::UIntTy SLocOffset) const { /// This function knows that the SourceLocation is in a loaded buffer, not a /// local one. FileID SourceManager::getFileIDLoaded(SourceLocation::UIntTy SLocOffset) const { - if (SLocOffset < CurrentLoadedOffset) { -assert(0 && "Invalid SLocOffset or bad function choice"); -return FileID(); - } - - // Essentially the same as the local case, but the loaded array is sorted - // in the other direction (decreasing order). - // GreaterIndex is the one where the offset is greater, which is actually a - // lower index! - unsigned GreaterIndex = 0; - unsigned LessIndex = LoadedSLocEntryTable.size(); - if (LastFileIDLookup.ID < 0) { -// Prune the search space. -int LastID = LastFileIDLookup.ID; -if (getLoadedSLocEntryByID(LastID).getOffset() > SLocOffset) - GreaterIndex = - (-LastID - 2) + 1; // Exclude LastID, else we would have hit the cache -else - LessIndex = -LastID - 2; - } - - // First do a linear scan from the last lookup position, if possible. - unsigned NumProbes; + int ID = ExternalSLocEntries->getSLocEntryID(SLocOffset); bool Invalid = false; - for (NumProbes = 0; NumProbes < 8; ++NumProbes, ++GreaterIndex) { -// Make sure the entry is loaded! -const SrcMgr::SLocEntry = getLoadedSLocEntry(GreaterIndex, ); -if (Invalid) - return FileID(); // invalid entry. -if (E.getOffset() <= SLocOffset) { - FileID Res = FileID::get(-int(GreaterIndex) - 2); - LastFileIDLookup = Res; - NumLinearScans += NumProbes + 1; - return Res; -} - } - - // Linear scan failed. Do the binary search. - NumProbes = 0; - while (true) { -++NumProbes; -unsigned MiddleIndex = (LessIndex - GreaterIndex) / 2 + GreaterIndex; -const SrcMgr::SLocEntry = getLoadedSLocEntry(MiddleIndex, ); -if (Invalid) - return FileID(); // invalid entry. - -if (E.getOffset() > SLocOffset) { - if (GreaterIndex == MiddleIndex) { -assert(0 && "binary search missed the entry"); -return FileID(); -
[clang] [clang][modules] Move `SLocEntry` search into `ASTReader` (PR #66966)
@@ -50,6 +50,7 @@ int y = a2; // CHECK: In module 'a': // CHECK-NEXT: a.h:1:45: error: +int z = b; // MISSING-B: could not find file '{{.*}}b.h' // MISSING-B-NOT: please delete the module cache jansvoboda11 wrote: I'm not sure. To me, this seems like an implementation detail we shouldn't test for. https://github.com/llvm/llvm-project/pull/66966 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][modules] Move `SLocEntry` search into `ASTReader` (PR #66966)
@@ -288,10 +288,12 @@ class ModuleFile { /// for the entry is SLocEntryOffsetsBase + SLocEntryOffsets[i]. uint64_t SLocEntryOffsetsBase = 0; - /// Offsets for all of the source location entries in the - /// AST file. + /// Stream bit offsets for all of the source location entries in the AST file. const uint32_t *SLocEntryOffsets = nullptr; + /// SLocEntry offsets that have been loaded from the AST file. + std::vector SLocEntryOffsetLoaded; jansvoboda11 wrote: This ends up being ~10MB for single TU using 37 modules from the Darwin SDK. I'm not thrilled about that, what do you think? We could store these offsets in `SourceManager::LoadedSLocEntryTable` and mark the fact that the entry has not been fully deserialized in a counterpart to `llvm::BitVector SLocEntryLoaded`. https://github.com/llvm/llvm-project/pull/66966 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][modules] Move `SLocEntry` search into `ASTReader` (PR #66966)
@@ -864,74 +864,7 @@ FileID SourceManager::getFileIDLocal(SourceLocation::UIntTy SLocOffset) const { /// This function knows that the SourceLocation is in a loaded buffer, not a /// local one. FileID SourceManager::getFileIDLoaded(SourceLocation::UIntTy SLocOffset) const { - if (SLocOffset < CurrentLoadedOffset) { -assert(0 && "Invalid SLocOffset or bad function choice"); jansvoboda11 wrote: It would probably fail to find the offset in `ASTReader::GlobalSLocOffsetMap` and fail with more generic error. I'll add this back. https://github.com/llvm/llvm-project/pull/66966 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][modules] Move `SLocEntry` search into `ASTReader` (PR #66966)
@@ -1444,6 +1444,77 @@ llvm::Error ASTReader::ReadSourceManagerBlock(ModuleFile ) { } } +std::optional +ASTReader::readSLocOffset(ModuleFile *F, unsigned Index) { benlangmuir wrote: Does every path that returns `std::nullopt` report an error? If so I would recommend returning `Expected<...>` instead of `optional` and then report the error in the caller. https://github.com/llvm/llvm-project/pull/66966 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][modules] Move `SLocEntry` search into `ASTReader` (PR #66966)
@@ -2261,43 +2261,68 @@ template struct enumerator_result { mutable range_reference_tuple Storage; }; -/// Infinite stream of increasing 0-based `size_t` indices. -struct index_stream { - struct iterator : iterator_facade_base { -iterator ++() { - assert(Index != std::numeric_limits::max() && - "Attempting to increment end iterator"); - ++Index; - return *this; -} +struct index_iterator +: llvm::iterator_facade_base { benlangmuir wrote: ptrdiff_t is the default https://github.com/llvm/llvm-project/pull/66966 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][modules] Move `SLocEntry` search into `ASTReader` (PR #66966)
@@ -288,10 +288,12 @@ class ModuleFile { /// for the entry is SLocEntryOffsetsBase + SLocEntryOffsets[i]. uint64_t SLocEntryOffsetsBase = 0; - /// Offsets for all of the source location entries in the - /// AST file. + /// Stream bit offsets for all of the source location entries in the AST file. const uint32_t *SLocEntryOffsets = nullptr; + /// SLocEntry offsets that have been loaded from the AST file. + std::vector SLocEntryOffsetLoaded; benlangmuir wrote: I assume this is relatively small memory use in practice? https://github.com/llvm/llvm-project/pull/66966 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][modules] Move `SLocEntry` search into `ASTReader` (PR #66966)
@@ -50,6 +50,7 @@ int y = a2; // CHECK: In module 'a': // CHECK-NEXT: a.h:1:45: error: +int z = b; // MISSING-B: could not find file '{{.*}}b.h' // MISSING-B-NOT: please delete the module cache benlangmuir wrote: Can we use this approach to test the current change? ie. create a version of this test that doesn't deserialize b and therefore doesn't get the diagnostic? You could maybe add more files that don't get deserialized and check that none of them show up to improve the robustness of the test? https://github.com/llvm/llvm-project/pull/66966 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][modules] Move `SLocEntry` search into `ASTReader` (PR #66966)
@@ -864,74 +864,7 @@ FileID SourceManager::getFileIDLocal(SourceLocation::UIntTy SLocOffset) const { /// This function knows that the SourceLocation is in a loaded buffer, not a /// local one. FileID SourceManager::getFileIDLoaded(SourceLocation::UIntTy SLocOffset) const { - if (SLocOffset < CurrentLoadedOffset) { -assert(0 && "Invalid SLocOffset or bad function choice"); benlangmuir wrote: Where would this case end up failing in the new implementation? https://github.com/llvm/llvm-project/pull/66966 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][modules] Move `SLocEntry` search into `ASTReader` (PR #66966)
@@ -50,6 +50,7 @@ int y = a2; // CHECK: In module 'a': // CHECK-NEXT: a.h:1:45: error: +int z = b; // MISSING-B: could not find file '{{.*}}b.h' // MISSING-B-NOT: please delete the module cache jansvoboda11 wrote: This test relied on the fact that when diagnosing line 49, the `SLocEntry` for "b.h" got deserialized as a side-effect of the binary search for "a.h", thus generating the `MISSING-B` error message. Since we no longer fully deserialize the "b.h" `SLocEntry` during the binary search for "a.h", let's force its full deserialization by triggering new diagnostics pointing directly into "b.h". https://github.com/llvm/llvm-project/pull/66966 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][modules] Move `SLocEntry` search into `ASTReader` (PR #66966)
https://github.com/jansvoboda11 updated https://github.com/llvm/llvm-project/pull/66966 >From 4edf9d8559339a12108d9c4d1e2f3bb062a5a768 Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Wed, 20 Sep 2023 17:30:45 -0700 Subject: [PATCH 1/6] [clang][modules] Move `SLocEntry` search into `ASTReader` In `getFileID()` the `SourceManager` ends up doing a binary search over its buffer of `SLocEntries`. For modules, this binary search fully deserializes the entire `SLocEntry` block for visited each entry. This shows up in profiles of the dependency scanner, since that operation includes decompressing buffers associated with some entries. This patch moves the binary search over loaded entries into `ASTReader`, which now only performs partial deserialization during the binary search, speeding up the scanner by ~3.3%. --- clang/include/clang/Basic/SourceManager.h | 3 + clang/include/clang/Serialization/ASTReader.h | 6 ++ clang/lib/Basic/SourceManager.cpp | 70 +-- clang/lib/Serialization/ASTReader.cpp | 63 + 4 files changed, 75 insertions(+), 67 deletions(-) diff --git a/clang/include/clang/Basic/SourceManager.h b/clang/include/clang/Basic/SourceManager.h index 2f846502d6f3327..a4c7facddd53d64 100644 --- a/clang/include/clang/Basic/SourceManager.h +++ b/clang/include/clang/Basic/SourceManager.h @@ -533,6 +533,9 @@ class ExternalSLocEntrySource { /// entry from being loaded. virtual bool ReadSLocEntry(int ID) = 0; + /// Get the index ID for the loaded SourceLocation offset. + virtual int getSLocEntryID(SourceLocation::UIntTy SLocOffset) = 0; + /// Retrieve the module import location and name for the given ID, if /// in fact it was loaded from a module (rather than, say, a precompiled /// header). diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index dc1eb21c27801fe..e643fcf4c930f09 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -2153,6 +2153,12 @@ class ASTReader /// Read the source location entry with index ID. bool ReadSLocEntry(int ID) override; + /// Get the index ID for the loaded SourceLocation offset. + int getSLocEntryID(SourceLocation::UIntTy SLocOffset) override; + /// Read the offset of the SLocEntry at the given index in the given module + /// file. + std::optional readSLocOffset(ModuleFile *F, + unsigned Index); /// Retrieve the module import location and module name for the /// given source manager entry ID. diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp index 0521ac7b30339ab..f881afc2e46c5c6 100644 --- a/clang/lib/Basic/SourceManager.cpp +++ b/clang/lib/Basic/SourceManager.cpp @@ -864,74 +864,10 @@ FileID SourceManager::getFileIDLocal(SourceLocation::UIntTy SLocOffset) const { /// This function knows that the SourceLocation is in a loaded buffer, not a /// local one. FileID SourceManager::getFileIDLoaded(SourceLocation::UIntTy SLocOffset) const { - if (SLocOffset < CurrentLoadedOffset) { -assert(0 && "Invalid SLocOffset or bad function choice"); -return FileID(); - } - - // Essentially the same as the local case, but the loaded array is sorted - // in the other direction (decreasing order). - // GreaterIndex is the one where the offset is greater, which is actually a - // lower index! - unsigned GreaterIndex = 0; - unsigned LessIndex = LoadedSLocEntryTable.size(); - if (LastFileIDLookup.ID < 0) { -// Prune the search space. -int LastID = LastFileIDLookup.ID; -if (getLoadedSLocEntryByID(LastID).getOffset() > SLocOffset) - GreaterIndex = - (-LastID - 2) + 1; // Exclude LastID, else we would have hit the cache -else - LessIndex = -LastID - 2; - } - - // First do a linear scan from the last lookup position, if possible. - unsigned NumProbes; + int ID = ExternalSLocEntries->getSLocEntryID(SLocOffset); bool Invalid = false; - for (NumProbes = 0; NumProbes < 8; ++NumProbes, ++GreaterIndex) { -// Make sure the entry is loaded! -const SrcMgr::SLocEntry = getLoadedSLocEntry(GreaterIndex, ); -if (Invalid) - return FileID(); // invalid entry. -if (E.getOffset() <= SLocOffset) { - FileID Res = FileID::get(-int(GreaterIndex) - 2); - LastFileIDLookup = Res; - NumLinearScans += NumProbes + 1; - return Res; -} - } - - // Linear scan failed. Do the binary search. - NumProbes = 0; - while (true) { -++NumProbes; -unsigned MiddleIndex = (LessIndex - GreaterIndex) / 2 + GreaterIndex; -const SrcMgr::SLocEntry = getLoadedSLocEntry(MiddleIndex, ); -if (Invalid) - return FileID(); // invalid entry. - -if (E.getOffset() > SLocOffset) { - if (GreaterIndex == MiddleIndex) { -assert(0 && "binary search missed the entry"); -return FileID(); -
[clang] [clang][modules] Move `SLocEntry` search into `ASTReader` (PR #66966)
https://github.com/jansvoboda11 ready_for_review https://github.com/llvm/llvm-project/pull/66966 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][modules] Move `SLocEntry` search into `ASTReader` (PR #66966)
https://github.com/jansvoboda11 edited https://github.com/llvm/llvm-project/pull/66966 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][modules] Move `SLocEntry` search into `ASTReader` (PR #66966)
https://github.com/jansvoboda11 edited https://github.com/llvm/llvm-project/pull/66966 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][modules] Move `SLocEntry` search into `ASTReader` (PR #66966)
https://github.com/jansvoboda11 updated https://github.com/llvm/llvm-project/pull/66966 >From 4edf9d8559339a12108d9c4d1e2f3bb062a5a768 Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Wed, 20 Sep 2023 17:30:45 -0700 Subject: [PATCH 1/5] [clang][modules] Move `SLocEntry` search into `ASTReader` In `getFileID()` the `SourceManager` ends up doing a binary search over its buffer of `SLocEntries`. For modules, this binary search fully deserializes the entire `SLocEntry` block for visited each entry. This shows up in profiles of the dependency scanner, since that operation includes decompressing buffers associated with some entries. This patch moves the binary search over loaded entries into `ASTReader`, which now only performs partial deserialization during the binary search, speeding up the scanner by ~3.3%. --- clang/include/clang/Basic/SourceManager.h | 3 + clang/include/clang/Serialization/ASTReader.h | 6 ++ clang/lib/Basic/SourceManager.cpp | 70 +-- clang/lib/Serialization/ASTReader.cpp | 63 + 4 files changed, 75 insertions(+), 67 deletions(-) diff --git a/clang/include/clang/Basic/SourceManager.h b/clang/include/clang/Basic/SourceManager.h index 2f846502d6f3327..a4c7facddd53d64 100644 --- a/clang/include/clang/Basic/SourceManager.h +++ b/clang/include/clang/Basic/SourceManager.h @@ -533,6 +533,9 @@ class ExternalSLocEntrySource { /// entry from being loaded. virtual bool ReadSLocEntry(int ID) = 0; + /// Get the index ID for the loaded SourceLocation offset. + virtual int getSLocEntryID(SourceLocation::UIntTy SLocOffset) = 0; + /// Retrieve the module import location and name for the given ID, if /// in fact it was loaded from a module (rather than, say, a precompiled /// header). diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index dc1eb21c27801fe..e643fcf4c930f09 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -2153,6 +2153,12 @@ class ASTReader /// Read the source location entry with index ID. bool ReadSLocEntry(int ID) override; + /// Get the index ID for the loaded SourceLocation offset. + int getSLocEntryID(SourceLocation::UIntTy SLocOffset) override; + /// Read the offset of the SLocEntry at the given index in the given module + /// file. + std::optional readSLocOffset(ModuleFile *F, + unsigned Index); /// Retrieve the module import location and module name for the /// given source manager entry ID. diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp index 0521ac7b30339ab..f881afc2e46c5c6 100644 --- a/clang/lib/Basic/SourceManager.cpp +++ b/clang/lib/Basic/SourceManager.cpp @@ -864,74 +864,10 @@ FileID SourceManager::getFileIDLocal(SourceLocation::UIntTy SLocOffset) const { /// This function knows that the SourceLocation is in a loaded buffer, not a /// local one. FileID SourceManager::getFileIDLoaded(SourceLocation::UIntTy SLocOffset) const { - if (SLocOffset < CurrentLoadedOffset) { -assert(0 && "Invalid SLocOffset or bad function choice"); -return FileID(); - } - - // Essentially the same as the local case, but the loaded array is sorted - // in the other direction (decreasing order). - // GreaterIndex is the one where the offset is greater, which is actually a - // lower index! - unsigned GreaterIndex = 0; - unsigned LessIndex = LoadedSLocEntryTable.size(); - if (LastFileIDLookup.ID < 0) { -// Prune the search space. -int LastID = LastFileIDLookup.ID; -if (getLoadedSLocEntryByID(LastID).getOffset() > SLocOffset) - GreaterIndex = - (-LastID - 2) + 1; // Exclude LastID, else we would have hit the cache -else - LessIndex = -LastID - 2; - } - - // First do a linear scan from the last lookup position, if possible. - unsigned NumProbes; + int ID = ExternalSLocEntries->getSLocEntryID(SLocOffset); bool Invalid = false; - for (NumProbes = 0; NumProbes < 8; ++NumProbes, ++GreaterIndex) { -// Make sure the entry is loaded! -const SrcMgr::SLocEntry = getLoadedSLocEntry(GreaterIndex, ); -if (Invalid) - return FileID(); // invalid entry. -if (E.getOffset() <= SLocOffset) { - FileID Res = FileID::get(-int(GreaterIndex) - 2); - LastFileIDLookup = Res; - NumLinearScans += NumProbes + 1; - return Res; -} - } - - // Linear scan failed. Do the binary search. - NumProbes = 0; - while (true) { -++NumProbes; -unsigned MiddleIndex = (LessIndex - GreaterIndex) / 2 + GreaterIndex; -const SrcMgr::SLocEntry = getLoadedSLocEntry(MiddleIndex, ); -if (Invalid) - return FileID(); // invalid entry. - -if (E.getOffset() > SLocOffset) { - if (GreaterIndex == MiddleIndex) { -assert(0 && "binary search missed the entry"); -return FileID(); -
[clang] [clang][modules] Move `SLocEntry` search into `ASTReader` (PR #66966)
https://github.com/jansvoboda11 edited https://github.com/llvm/llvm-project/pull/66966 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][modules] Move `SLocEntry` search into `ASTReader` (PR #66966)
https://github.com/jansvoboda11 updated https://github.com/llvm/llvm-project/pull/66966 >From 4edf9d8559339a12108d9c4d1e2f3bb062a5a768 Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Wed, 20 Sep 2023 17:30:45 -0700 Subject: [PATCH 1/4] [clang][modules] Move `SLocEntry` search into `ASTReader` In `getFileID()` the `SourceManager` ends up doing a binary search over its buffer of `SLocEntries`. For modules, this binary search fully deserializes the entire `SLocEntry` block for visited each entry. This shows up in profiles of the dependency scanner, since that operation includes decompressing buffers associated with some entries. This patch moves the binary search over loaded entries into `ASTReader`, which now only performs partial deserialization during the binary search, speeding up the scanner by ~3.3%. --- clang/include/clang/Basic/SourceManager.h | 3 + clang/include/clang/Serialization/ASTReader.h | 6 ++ clang/lib/Basic/SourceManager.cpp | 70 +-- clang/lib/Serialization/ASTReader.cpp | 63 + 4 files changed, 75 insertions(+), 67 deletions(-) diff --git a/clang/include/clang/Basic/SourceManager.h b/clang/include/clang/Basic/SourceManager.h index 2f846502d6f3327..a4c7facddd53d64 100644 --- a/clang/include/clang/Basic/SourceManager.h +++ b/clang/include/clang/Basic/SourceManager.h @@ -533,6 +533,9 @@ class ExternalSLocEntrySource { /// entry from being loaded. virtual bool ReadSLocEntry(int ID) = 0; + /// Get the index ID for the loaded SourceLocation offset. + virtual int getSLocEntryID(SourceLocation::UIntTy SLocOffset) = 0; + /// Retrieve the module import location and name for the given ID, if /// in fact it was loaded from a module (rather than, say, a precompiled /// header). diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index dc1eb21c27801fe..e643fcf4c930f09 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -2153,6 +2153,12 @@ class ASTReader /// Read the source location entry with index ID. bool ReadSLocEntry(int ID) override; + /// Get the index ID for the loaded SourceLocation offset. + int getSLocEntryID(SourceLocation::UIntTy SLocOffset) override; + /// Read the offset of the SLocEntry at the given index in the given module + /// file. + std::optional readSLocOffset(ModuleFile *F, + unsigned Index); /// Retrieve the module import location and module name for the /// given source manager entry ID. diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp index 0521ac7b30339ab..f881afc2e46c5c6 100644 --- a/clang/lib/Basic/SourceManager.cpp +++ b/clang/lib/Basic/SourceManager.cpp @@ -864,74 +864,10 @@ FileID SourceManager::getFileIDLocal(SourceLocation::UIntTy SLocOffset) const { /// This function knows that the SourceLocation is in a loaded buffer, not a /// local one. FileID SourceManager::getFileIDLoaded(SourceLocation::UIntTy SLocOffset) const { - if (SLocOffset < CurrentLoadedOffset) { -assert(0 && "Invalid SLocOffset or bad function choice"); -return FileID(); - } - - // Essentially the same as the local case, but the loaded array is sorted - // in the other direction (decreasing order). - // GreaterIndex is the one where the offset is greater, which is actually a - // lower index! - unsigned GreaterIndex = 0; - unsigned LessIndex = LoadedSLocEntryTable.size(); - if (LastFileIDLookup.ID < 0) { -// Prune the search space. -int LastID = LastFileIDLookup.ID; -if (getLoadedSLocEntryByID(LastID).getOffset() > SLocOffset) - GreaterIndex = - (-LastID - 2) + 1; // Exclude LastID, else we would have hit the cache -else - LessIndex = -LastID - 2; - } - - // First do a linear scan from the last lookup position, if possible. - unsigned NumProbes; + int ID = ExternalSLocEntries->getSLocEntryID(SLocOffset); bool Invalid = false; - for (NumProbes = 0; NumProbes < 8; ++NumProbes, ++GreaterIndex) { -// Make sure the entry is loaded! -const SrcMgr::SLocEntry = getLoadedSLocEntry(GreaterIndex, ); -if (Invalid) - return FileID(); // invalid entry. -if (E.getOffset() <= SLocOffset) { - FileID Res = FileID::get(-int(GreaterIndex) - 2); - LastFileIDLookup = Res; - NumLinearScans += NumProbes + 1; - return Res; -} - } - - // Linear scan failed. Do the binary search. - NumProbes = 0; - while (true) { -++NumProbes; -unsigned MiddleIndex = (LessIndex - GreaterIndex) / 2 + GreaterIndex; -const SrcMgr::SLocEntry = getLoadedSLocEntry(MiddleIndex, ); -if (Invalid) - return FileID(); // invalid entry. - -if (E.getOffset() > SLocOffset) { - if (GreaterIndex == MiddleIndex) { -assert(0 && "binary search missed the entry"); -return FileID(); -
[clang] [clang][modules] Move `SLocEntry` search into `ASTReader` (PR #66966)
https://github.com/jansvoboda11 edited https://github.com/llvm/llvm-project/pull/66966 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][modules] Move `SLocEntry` search into `ASTReader` (PR #66966)
https://github.com/jansvoboda11 updated https://github.com/llvm/llvm-project/pull/66966 >From d26f9487dedf53ac419cbd9dfb8c29214ba0e09f Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Wed, 20 Sep 2023 17:30:45 -0700 Subject: [PATCH 1/2] [clang][modules] Move `SLocEntry` search into `ASTReader` In `getFileID()` the `SourceManager` ends up doing a binary search over its buffer of `SLocEntries`. For modules, this binary search fully deserializes the entire `SLocEntry` block for visited each entry. This shows up in profiles of the dependency scanner, since that operation includes decompressing buffers associated with some entries. This patch moves the binary search over loaded entries into `ASTReader`, which now only performs partial deserialization during the binary search, speeding up the scanner by ~3.3%. --- clang/include/clang/Basic/SourceManager.h | 3 + clang/include/clang/Serialization/ASTReader.h | 6 ++ clang/lib/Basic/SourceManager.cpp | 70 +-- clang/lib/Serialization/ASTReader.cpp | 63 + 4 files changed, 75 insertions(+), 67 deletions(-) diff --git a/clang/include/clang/Basic/SourceManager.h b/clang/include/clang/Basic/SourceManager.h index 2f846502d6f3327..a4c7facddd53d64 100644 --- a/clang/include/clang/Basic/SourceManager.h +++ b/clang/include/clang/Basic/SourceManager.h @@ -533,6 +533,9 @@ class ExternalSLocEntrySource { /// entry from being loaded. virtual bool ReadSLocEntry(int ID) = 0; + /// Get the index ID for the loaded SourceLocation offset. + virtual int getSLocEntryID(SourceLocation::UIntTy SLocOffset) = 0; + /// Retrieve the module import location and name for the given ID, if /// in fact it was loaded from a module (rather than, say, a precompiled /// header). diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index dc1eb21c27801fe..e643fcf4c930f09 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -2153,6 +2153,12 @@ class ASTReader /// Read the source location entry with index ID. bool ReadSLocEntry(int ID) override; + /// Get the index ID for the loaded SourceLocation offset. + int getSLocEntryID(SourceLocation::UIntTy SLocOffset) override; + /// Read the offset of the SLocEntry at the given index in the given module + /// file. + std::optional readSLocOffset(ModuleFile *F, + unsigned Index); /// Retrieve the module import location and module name for the /// given source manager entry ID. diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp index 0521ac7b30339ab..f881afc2e46c5c6 100644 --- a/clang/lib/Basic/SourceManager.cpp +++ b/clang/lib/Basic/SourceManager.cpp @@ -864,74 +864,10 @@ FileID SourceManager::getFileIDLocal(SourceLocation::UIntTy SLocOffset) const { /// This function knows that the SourceLocation is in a loaded buffer, not a /// local one. FileID SourceManager::getFileIDLoaded(SourceLocation::UIntTy SLocOffset) const { - if (SLocOffset < CurrentLoadedOffset) { -assert(0 && "Invalid SLocOffset or bad function choice"); -return FileID(); - } - - // Essentially the same as the local case, but the loaded array is sorted - // in the other direction (decreasing order). - // GreaterIndex is the one where the offset is greater, which is actually a - // lower index! - unsigned GreaterIndex = 0; - unsigned LessIndex = LoadedSLocEntryTable.size(); - if (LastFileIDLookup.ID < 0) { -// Prune the search space. -int LastID = LastFileIDLookup.ID; -if (getLoadedSLocEntryByID(LastID).getOffset() > SLocOffset) - GreaterIndex = - (-LastID - 2) + 1; // Exclude LastID, else we would have hit the cache -else - LessIndex = -LastID - 2; - } - - // First do a linear scan from the last lookup position, if possible. - unsigned NumProbes; + int ID = ExternalSLocEntries->getSLocEntryID(SLocOffset); bool Invalid = false; - for (NumProbes = 0; NumProbes < 8; ++NumProbes, ++GreaterIndex) { -// Make sure the entry is loaded! -const SrcMgr::SLocEntry = getLoadedSLocEntry(GreaterIndex, ); -if (Invalid) - return FileID(); // invalid entry. -if (E.getOffset() <= SLocOffset) { - FileID Res = FileID::get(-int(GreaterIndex) - 2); - LastFileIDLookup = Res; - NumLinearScans += NumProbes + 1; - return Res; -} - } - - // Linear scan failed. Do the binary search. - NumProbes = 0; - while (true) { -++NumProbes; -unsigned MiddleIndex = (LessIndex - GreaterIndex) / 2 + GreaterIndex; -const SrcMgr::SLocEntry = getLoadedSLocEntry(MiddleIndex, ); -if (Invalid) - return FileID(); // invalid entry. - -if (E.getOffset() > SLocOffset) { - if (GreaterIndex == MiddleIndex) { -assert(0 && "binary search missed the entry"); -return FileID(); -
[clang] [clang][modules] Move `SLocEntry` search into `ASTReader` (PR #66966)
jansvoboda11 wrote: Marking as draft for now. We still need to cache the lightweight offset deserialization. While this patch speeds up the dependency scanner as it stands, implicit modules compile slows down by 50%. https://github.com/llvm/llvm-project/pull/66966 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][modules] Move `SLocEntry` search into `ASTReader` (PR #66966)
llvmbot wrote: @llvm/pr-subscribers-clang-modules Changes In `getFileID()` the `SourceManager` ends up doing a binary search over its buffer of `SLocEntries`. For modules, this binary search fully deserializes the entire `SLocEntry` block for visited each entry. This shows up in profiles of the dependency scanner, since that operation includes decompressing buffers associated with some entries. This patch moves the binary search over loaded entries into `ASTReader`, which now only performs partial deserialization during the binary search, speeding up the scanner by ~3.3%. --- Full diff: https://github.com/llvm/llvm-project/pull/66966.diff 4 Files Affected: - (modified) clang/include/clang/Basic/SourceManager.h (+3) - (modified) clang/include/clang/Serialization/ASTReader.h (+6) - (modified) clang/lib/Basic/SourceManager.cpp (+3-67) - (modified) clang/lib/Serialization/ASTReader.cpp (+63) ``diff diff --git a/clang/include/clang/Basic/SourceManager.h b/clang/include/clang/Basic/SourceManager.h index 2f846502d6f3327..a4c7facddd53d64 100644 --- a/clang/include/clang/Basic/SourceManager.h +++ b/clang/include/clang/Basic/SourceManager.h @@ -533,6 +533,9 @@ class ExternalSLocEntrySource { /// entry from being loaded. virtual bool ReadSLocEntry(int ID) = 0; + /// Get the index ID for the loaded SourceLocation offset. + virtual int getSLocEntryID(SourceLocation::UIntTy SLocOffset) = 0; + /// Retrieve the module import location and name for the given ID, if /// in fact it was loaded from a module (rather than, say, a precompiled /// header). diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index dc1eb21c27801fe..e643fcf4c930f09 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -2153,6 +2153,12 @@ class ASTReader /// Read the source location entry with index ID. bool ReadSLocEntry(int ID) override; + /// Get the index ID for the loaded SourceLocation offset. + int getSLocEntryID(SourceLocation::UIntTy SLocOffset) override; + /// Read the offset of the SLocEntry at the given index in the given module + /// file. + std::optional readSLocOffset(ModuleFile *F, + unsigned Index); /// Retrieve the module import location and module name for the /// given source manager entry ID. diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp index 0521ac7b30339ab..f881afc2e46c5c6 100644 --- a/clang/lib/Basic/SourceManager.cpp +++ b/clang/lib/Basic/SourceManager.cpp @@ -864,74 +864,10 @@ FileID SourceManager::getFileIDLocal(SourceLocation::UIntTy SLocOffset) const { /// This function knows that the SourceLocation is in a loaded buffer, not a /// local one. FileID SourceManager::getFileIDLoaded(SourceLocation::UIntTy SLocOffset) const { - if (SLocOffset < CurrentLoadedOffset) { -assert(0 && "Invalid SLocOffset or bad function choice"); -return FileID(); - } - - // Essentially the same as the local case, but the loaded array is sorted - // in the other direction (decreasing order). - // GreaterIndex is the one where the offset is greater, which is actually a - // lower index! - unsigned GreaterIndex = 0; - unsigned LessIndex = LoadedSLocEntryTable.size(); - if (LastFileIDLookup.ID < 0) { -// Prune the search space. -int LastID = LastFileIDLookup.ID; -if (getLoadedSLocEntryByID(LastID).getOffset() > SLocOffset) - GreaterIndex = - (-LastID - 2) + 1; // Exclude LastID, else we would have hit the cache -else - LessIndex = -LastID - 2; - } - - // First do a linear scan from the last lookup position, if possible. - unsigned NumProbes; + int ID = ExternalSLocEntries->getSLocEntryID(SLocOffset); bool Invalid = false; - for (NumProbes = 0; NumProbes < 8; ++NumProbes, ++GreaterIndex) { -// Make sure the entry is loaded! -const SrcMgr::SLocEntry = getLoadedSLocEntry(GreaterIndex, ); -if (Invalid) - return FileID(); // invalid entry. -if (E.getOffset() <= SLocOffset) { - FileID Res = FileID::get(-int(GreaterIndex) - 2); - LastFileIDLookup = Res; - NumLinearScans += NumProbes + 1; - return Res; -} - } - - // Linear scan failed. Do the binary search. - NumProbes = 0; - while (true) { -++NumProbes; -unsigned MiddleIndex = (LessIndex - GreaterIndex) / 2 + GreaterIndex; -const SrcMgr::SLocEntry = getLoadedSLocEntry(MiddleIndex, ); -if (Invalid) - return FileID(); // invalid entry. - -if (E.getOffset() > SLocOffset) { - if (GreaterIndex == MiddleIndex) { -assert(0 && "binary search missed the entry"); -return FileID(); - } - GreaterIndex = MiddleIndex; - continue; -} - -if (isOffsetInFileID(FileID::get(-int(MiddleIndex) - 2), SLocOffset)) { - FileID Res = FileID::get(-int(MiddleIndex) -
[clang] [clang][modules] Move `SLocEntry` search into `ASTReader` (PR #66966)
https://github.com/jansvoboda11 created https://github.com/llvm/llvm-project/pull/66966 In `getFileID()` the `SourceManager` ends up doing a binary search over its buffer of `SLocEntries`. For modules, this binary search fully deserializes the entire `SLocEntry` block for visited each entry. This shows up in profiles of the dependency scanner, since that operation includes decompressing buffers associated with some entries. This patch moves the binary search over loaded entries into `ASTReader`, which now only performs partial deserialization during the binary search, speeding up the scanner by ~3.3%. >From d26f9487dedf53ac419cbd9dfb8c29214ba0e09f Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Wed, 20 Sep 2023 17:30:45 -0700 Subject: [PATCH] [clang][modules] Move `SLocEntry` search into `ASTReader` In `getFileID()` the `SourceManager` ends up doing a binary search over its buffer of `SLocEntries`. For modules, this binary search fully deserializes the entire `SLocEntry` block for visited each entry. This shows up in profiles of the dependency scanner, since that operation includes decompressing buffers associated with some entries. This patch moves the binary search over loaded entries into `ASTReader`, which now only performs partial deserialization during the binary search, speeding up the scanner by ~3.3%. --- clang/include/clang/Basic/SourceManager.h | 3 + clang/include/clang/Serialization/ASTReader.h | 6 ++ clang/lib/Basic/SourceManager.cpp | 70 +-- clang/lib/Serialization/ASTReader.cpp | 63 + 4 files changed, 75 insertions(+), 67 deletions(-) diff --git a/clang/include/clang/Basic/SourceManager.h b/clang/include/clang/Basic/SourceManager.h index 2f846502d6f3327..a4c7facddd53d64 100644 --- a/clang/include/clang/Basic/SourceManager.h +++ b/clang/include/clang/Basic/SourceManager.h @@ -533,6 +533,9 @@ class ExternalSLocEntrySource { /// entry from being loaded. virtual bool ReadSLocEntry(int ID) = 0; + /// Get the index ID for the loaded SourceLocation offset. + virtual int getSLocEntryID(SourceLocation::UIntTy SLocOffset) = 0; + /// Retrieve the module import location and name for the given ID, if /// in fact it was loaded from a module (rather than, say, a precompiled /// header). diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index dc1eb21c27801fe..e643fcf4c930f09 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -2153,6 +2153,12 @@ class ASTReader /// Read the source location entry with index ID. bool ReadSLocEntry(int ID) override; + /// Get the index ID for the loaded SourceLocation offset. + int getSLocEntryID(SourceLocation::UIntTy SLocOffset) override; + /// Read the offset of the SLocEntry at the given index in the given module + /// file. + std::optional readSLocOffset(ModuleFile *F, + unsigned Index); /// Retrieve the module import location and module name for the /// given source manager entry ID. diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp index 0521ac7b30339ab..f881afc2e46c5c6 100644 --- a/clang/lib/Basic/SourceManager.cpp +++ b/clang/lib/Basic/SourceManager.cpp @@ -864,74 +864,10 @@ FileID SourceManager::getFileIDLocal(SourceLocation::UIntTy SLocOffset) const { /// This function knows that the SourceLocation is in a loaded buffer, not a /// local one. FileID SourceManager::getFileIDLoaded(SourceLocation::UIntTy SLocOffset) const { - if (SLocOffset < CurrentLoadedOffset) { -assert(0 && "Invalid SLocOffset or bad function choice"); -return FileID(); - } - - // Essentially the same as the local case, but the loaded array is sorted - // in the other direction (decreasing order). - // GreaterIndex is the one where the offset is greater, which is actually a - // lower index! - unsigned GreaterIndex = 0; - unsigned LessIndex = LoadedSLocEntryTable.size(); - if (LastFileIDLookup.ID < 0) { -// Prune the search space. -int LastID = LastFileIDLookup.ID; -if (getLoadedSLocEntryByID(LastID).getOffset() > SLocOffset) - GreaterIndex = - (-LastID - 2) + 1; // Exclude LastID, else we would have hit the cache -else - LessIndex = -LastID - 2; - } - - // First do a linear scan from the last lookup position, if possible. - unsigned NumProbes; + int ID = ExternalSLocEntries->getSLocEntryID(SLocOffset); bool Invalid = false; - for (NumProbes = 0; NumProbes < 8; ++NumProbes, ++GreaterIndex) { -// Make sure the entry is loaded! -const SrcMgr::SLocEntry = getLoadedSLocEntry(GreaterIndex, ); -if (Invalid) - return FileID(); // invalid entry. -if (E.getOffset() <= SLocOffset) { - FileID Res = FileID::get(-int(GreaterIndex) - 2); - LastFileIDLookup = Res; - NumLinearScans