[llvm] [clang] Avoid need for SLocEntryLoaded BitVector (PR #67960)

2023-12-22 Thread Giulio Eulisse via cfe-commits

ktf wrote:

 I was carried away by the heavy ion run and other priorities. i can try to
have a look again after Christmas. Feel free to close if you prefer.

Ciao,
Giulio

On Fri, Dec 22 2023 at 9:32 AM, Vassil Vassilev ***@***.***>
wrote:

> @ktf  what is the fate of this PR?
>
> —
> Reply to this email directly, view it on GitHub
> ,
> or unsubscribe
> 
> .
> You are receiving this because you were mentioned.Message ID:
> ***@***.***>
>


https://github.com/llvm/llvm-project/pull/67960
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Avoid need for SLocEntryLoaded BitVector (PR #67960)

2023-10-03 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf updated https://github.com/llvm/llvm-project/pull/67960

>From 9fde224de6baa5b1fb3713d810ce835d4456b457 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+...@users.noreply.github.com>
Date: Fri, 29 Sep 2023 08:37:57 +0200
Subject: [PATCH 1/9] Avoid need for SLocEntryLoaded BitVector

The BitVector is currently used to keep track of which entries of
LoadedSLocEntryTable have been loaded. Such information can be stored in
the SLocEntry itself. Moreover, thanks to the fact that
LoadedSLocEntryTable is now a PagedVector, we can simply consider
elements of unmaterialised pages as not loaded.

This trades reducing the number of OffsetBits by one for reducing memory
usage of the SourceManager by LoadedSLocEntryTable.size()/8 bytes.
---
 clang/include/clang/Basic/SourceManager.h | 16 ++
 clang/lib/Basic/SourceManager.cpp | 27 ++-
 llvm/include/llvm/ADT/PagedVector.h   |  6 +
 3 files changed, 29 insertions(+), 20 deletions(-)

diff --git a/clang/include/clang/Basic/SourceManager.h 
b/clang/include/clang/Basic/SourceManager.h
index c1b24eec2759c71..d450280303d6785 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -474,9 +474,10 @@ static_assert(sizeof(FileInfo) <= sizeof(ExpansionInfo),
 /// SourceManager keeps an array of these objects, and they are uniquely
 /// identified by the FileID datatype.
 class SLocEntry {
-  static constexpr int OffsetBits = 8 * sizeof(SourceLocation::UIntTy) - 1;
+  static constexpr int OffsetBits = 8 * sizeof(SourceLocation::UIntTy) - 2;
   SourceLocation::UIntTy Offset : OffsetBits;
   SourceLocation::UIntTy IsExpansion : 1;
+  SourceLocation::UIntTy Loaded : 1;
   union {
 FileInfo File;
 ExpansionInfo Expansion;
@@ -489,6 +490,8 @@ class SLocEntry {
 
   bool isExpansion() const { return IsExpansion; }
   bool isFile() const { return !isExpansion(); }
+  [[nodiscard]] bool isLoaded() const { return Loaded; }
+  void setLoaded(bool Value) { Loaded = Value; }
 
   const FileInfo () const {
 assert(isFile() && "Not a file SLocEntry!");
@@ -716,13 +719,7 @@ class SourceManager : public RefCountedBase 
{
   /// The highest possible offset is 2^31-1 (2^63-1 for 64-bit source
   /// locations), so CurrentLoadedOffset starts at 2^31 (2^63 resp.).
   static const SourceLocation::UIntTy MaxLoadedOffset =
-  1ULL << (8 * sizeof(SourceLocation::UIntTy) - 1);
-
-  /// A bitmap that indicates whether the entries of LoadedSLocEntryTable
-  /// have already been loaded from the external source.
-  ///
-  /// Same indexing as LoadedSLocEntryTable.
-  llvm::BitVector SLocEntryLoaded;
+  1ULL << (8 * sizeof(SourceLocation::UIntTy) - 2);
 
   /// An external source for source location entries.
   ExternalSLocEntrySource *ExternalSLocEntries = nullptr;
@@ -1710,7 +1707,8 @@ class SourceManager : public 
RefCountedBase {
   const SrcMgr::SLocEntry (unsigned Index,
   bool *Invalid = nullptr) const {
 assert(Index < LoadedSLocEntryTable.size() && "Invalid index");
-if (SLocEntryLoaded[Index])
+if (LoadedSLocEntryTable.isMaterialized(Index) &&
+LoadedSLocEntryTable[Index].isLoaded())
   return LoadedSLocEntryTable[Index];
 return loadSLocEntry(Index, Invalid);
   }
diff --git a/clang/lib/Basic/SourceManager.cpp 
b/clang/lib/Basic/SourceManager.cpp
index 3066cc53dbfd878..20e3f1252ddf077 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -336,7 +336,6 @@ void SourceManager::clearIDTables() {
   MainFileID = FileID();
   LocalSLocEntryTable.clear();
   LoadedSLocEntryTable.clear();
-  SLocEntryLoaded.clear();
   LastLineNoFileIDQuery = FileID();
   LastLineNoContentCache = nullptr;
   LastFileIDLookup = FileID();
@@ -373,7 +372,8 @@ void SourceManager::initializeForReplay(const SourceManager 
) {
 
   // Ensure all SLocEntries are loaded from the external source.
   for (unsigned I = 0, N = Old.LoadedSLocEntryTable.size(); I != N; ++I)
-if (!Old.SLocEntryLoaded[I])
+if (!Old.LoadedSLocEntryTable.isMaterialized(I) ||
+!Old.LoadedSLocEntryTable[I].isLoaded())
   Old.loadSLocEntry(I, nullptr);
 
   // Inherit any content cache data from the old source manager.
@@ -430,12 +430,14 @@ ContentCache ::createMemBufferContentCache(
 
 const SrcMgr::SLocEntry ::loadSLocEntry(unsigned Index,
   bool *Invalid) const {
-  assert(!SLocEntryLoaded[Index]);
+  assert(!LoadedSLocEntryTable.isMaterialized(Index) ||
+ !LoadedSLocEntryTable[Index].isLoaded());
   if (ExternalSLocEntries->ReadSLocEntry(-(static_cast(Index) + 2))) {
 if (Invalid)
   *Invalid = true;
 // If the file of the SLocEntry changed we could still have loaded it.
-if (!SLocEntryLoaded[Index]) {
+if (!LoadedSLocEntryTable.isMaterialized(Index) ||
+!LoadedSLocEntryTable[Index].isLoaded()) {
 

[clang] Avoid need for SLocEntryLoaded BitVector (PR #67960)

2023-10-03 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf updated https://github.com/llvm/llvm-project/pull/67960

>From 9fde224de6baa5b1fb3713d810ce835d4456b457 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+...@users.noreply.github.com>
Date: Fri, 29 Sep 2023 08:37:57 +0200
Subject: [PATCH 1/8] Avoid need for SLocEntryLoaded BitVector

The BitVector is currently used to keep track of which entries of
LoadedSLocEntryTable have been loaded. Such information can be stored in
the SLocEntry itself. Moreover, thanks to the fact that
LoadedSLocEntryTable is now a PagedVector, we can simply consider
elements of unmaterialised pages as not loaded.

This trades reducing the number of OffsetBits by one for reducing memory
usage of the SourceManager by LoadedSLocEntryTable.size()/8 bytes.
---
 clang/include/clang/Basic/SourceManager.h | 16 ++
 clang/lib/Basic/SourceManager.cpp | 27 ++-
 llvm/include/llvm/ADT/PagedVector.h   |  6 +
 3 files changed, 29 insertions(+), 20 deletions(-)

diff --git a/clang/include/clang/Basic/SourceManager.h 
b/clang/include/clang/Basic/SourceManager.h
index c1b24eec2759c71..d450280303d6785 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -474,9 +474,10 @@ static_assert(sizeof(FileInfo) <= sizeof(ExpansionInfo),
 /// SourceManager keeps an array of these objects, and they are uniquely
 /// identified by the FileID datatype.
 class SLocEntry {
-  static constexpr int OffsetBits = 8 * sizeof(SourceLocation::UIntTy) - 1;
+  static constexpr int OffsetBits = 8 * sizeof(SourceLocation::UIntTy) - 2;
   SourceLocation::UIntTy Offset : OffsetBits;
   SourceLocation::UIntTy IsExpansion : 1;
+  SourceLocation::UIntTy Loaded : 1;
   union {
 FileInfo File;
 ExpansionInfo Expansion;
@@ -489,6 +490,8 @@ class SLocEntry {
 
   bool isExpansion() const { return IsExpansion; }
   bool isFile() const { return !isExpansion(); }
+  [[nodiscard]] bool isLoaded() const { return Loaded; }
+  void setLoaded(bool Value) { Loaded = Value; }
 
   const FileInfo () const {
 assert(isFile() && "Not a file SLocEntry!");
@@ -716,13 +719,7 @@ class SourceManager : public RefCountedBase 
{
   /// The highest possible offset is 2^31-1 (2^63-1 for 64-bit source
   /// locations), so CurrentLoadedOffset starts at 2^31 (2^63 resp.).
   static const SourceLocation::UIntTy MaxLoadedOffset =
-  1ULL << (8 * sizeof(SourceLocation::UIntTy) - 1);
-
-  /// A bitmap that indicates whether the entries of LoadedSLocEntryTable
-  /// have already been loaded from the external source.
-  ///
-  /// Same indexing as LoadedSLocEntryTable.
-  llvm::BitVector SLocEntryLoaded;
+  1ULL << (8 * sizeof(SourceLocation::UIntTy) - 2);
 
   /// An external source for source location entries.
   ExternalSLocEntrySource *ExternalSLocEntries = nullptr;
@@ -1710,7 +1707,8 @@ class SourceManager : public 
RefCountedBase {
   const SrcMgr::SLocEntry (unsigned Index,
   bool *Invalid = nullptr) const {
 assert(Index < LoadedSLocEntryTable.size() && "Invalid index");
-if (SLocEntryLoaded[Index])
+if (LoadedSLocEntryTable.isMaterialized(Index) &&
+LoadedSLocEntryTable[Index].isLoaded())
   return LoadedSLocEntryTable[Index];
 return loadSLocEntry(Index, Invalid);
   }
diff --git a/clang/lib/Basic/SourceManager.cpp 
b/clang/lib/Basic/SourceManager.cpp
index 3066cc53dbfd878..20e3f1252ddf077 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -336,7 +336,6 @@ void SourceManager::clearIDTables() {
   MainFileID = FileID();
   LocalSLocEntryTable.clear();
   LoadedSLocEntryTable.clear();
-  SLocEntryLoaded.clear();
   LastLineNoFileIDQuery = FileID();
   LastLineNoContentCache = nullptr;
   LastFileIDLookup = FileID();
@@ -373,7 +372,8 @@ void SourceManager::initializeForReplay(const SourceManager 
) {
 
   // Ensure all SLocEntries are loaded from the external source.
   for (unsigned I = 0, N = Old.LoadedSLocEntryTable.size(); I != N; ++I)
-if (!Old.SLocEntryLoaded[I])
+if (!Old.LoadedSLocEntryTable.isMaterialized(I) ||
+!Old.LoadedSLocEntryTable[I].isLoaded())
   Old.loadSLocEntry(I, nullptr);
 
   // Inherit any content cache data from the old source manager.
@@ -430,12 +430,14 @@ ContentCache ::createMemBufferContentCache(
 
 const SrcMgr::SLocEntry ::loadSLocEntry(unsigned Index,
   bool *Invalid) const {
-  assert(!SLocEntryLoaded[Index]);
+  assert(!LoadedSLocEntryTable.isMaterialized(Index) ||
+ !LoadedSLocEntryTable[Index].isLoaded());
   if (ExternalSLocEntries->ReadSLocEntry(-(static_cast(Index) + 2))) {
 if (Invalid)
   *Invalid = true;
 // If the file of the SLocEntry changed we could still have loaded it.
-if (!SLocEntryLoaded[Index]) {
+if (!LoadedSLocEntryTable.isMaterialized(Index) ||
+!LoadedSLocEntryTable[Index].isLoaded()) {
 

[clang] Avoid need for SLocEntryLoaded BitVector (PR #67960)

2023-10-03 Thread Vassil Vassilev via cfe-commits


@@ -489,6 +490,8 @@ class SLocEntry {
 
   bool isExpansion() const { return IsExpansion; }
   bool isFile() const { return !isExpansion(); }
+  bool isLoaded() const { return Loaded; }
+  void setLoaded(bool Value) { Loaded = Value; }

vgvassilev wrote:

```suggestion
  bool isLoaded() const { return IsLoaded; }
  void setLoaded(bool Value) { IsLoaded = Value; }
```

https://github.com/llvm/llvm-project/pull/67960
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Avoid need for SLocEntryLoaded BitVector (PR #67960)

2023-10-03 Thread Vassil Vassilev via cfe-commits


@@ -474,9 +474,10 @@ static_assert(sizeof(FileInfo) <= sizeof(ExpansionInfo),
 /// SourceManager keeps an array of these objects, and they are uniquely
 /// identified by the FileID datatype.
 class SLocEntry {
-  static constexpr int OffsetBits = 8 * sizeof(SourceLocation::UIntTy) - 1;
+  static constexpr int OffsetBits = 8 * sizeof(SourceLocation::UIntTy) - 2;
   SourceLocation::UIntTy Offset : OffsetBits;
   SourceLocation::UIntTy IsExpansion : 1;
+  SourceLocation::UIntTy Loaded : 1;

vgvassilev wrote:

```suggestion
  SourceLocation::UIntTy IsLoaded : 1;
```

https://github.com/llvm/llvm-project/pull/67960
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Avoid need for SLocEntryLoaded BitVector (PR #67960)

2023-10-03 Thread Jakub Kuderski via cfe-commits

https://github.com/kuhar commented:

The ADT change looks good to me, I'm not familiar with the clang code though.

https://github.com/llvm/llvm-project/pull/67960
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Avoid need for SLocEntryLoaded BitVector (PR #67960)

2023-10-03 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf updated https://github.com/llvm/llvm-project/pull/67960

>From 9fde224de6baa5b1fb3713d810ce835d4456b457 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+...@users.noreply.github.com>
Date: Fri, 29 Sep 2023 08:37:57 +0200
Subject: [PATCH 1/7] Avoid need for SLocEntryLoaded BitVector

The BitVector is currently used to keep track of which entries of
LoadedSLocEntryTable have been loaded. Such information can be stored in
the SLocEntry itself. Moreover, thanks to the fact that
LoadedSLocEntryTable is now a PagedVector, we can simply consider
elements of unmaterialised pages as not loaded.

This trades reducing the number of OffsetBits by one for reducing memory
usage of the SourceManager by LoadedSLocEntryTable.size()/8 bytes.
---
 clang/include/clang/Basic/SourceManager.h | 16 ++
 clang/lib/Basic/SourceManager.cpp | 27 ++-
 llvm/include/llvm/ADT/PagedVector.h   |  6 +
 3 files changed, 29 insertions(+), 20 deletions(-)

diff --git a/clang/include/clang/Basic/SourceManager.h 
b/clang/include/clang/Basic/SourceManager.h
index c1b24eec2759c71..d450280303d6785 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -474,9 +474,10 @@ static_assert(sizeof(FileInfo) <= sizeof(ExpansionInfo),
 /// SourceManager keeps an array of these objects, and they are uniquely
 /// identified by the FileID datatype.
 class SLocEntry {
-  static constexpr int OffsetBits = 8 * sizeof(SourceLocation::UIntTy) - 1;
+  static constexpr int OffsetBits = 8 * sizeof(SourceLocation::UIntTy) - 2;
   SourceLocation::UIntTy Offset : OffsetBits;
   SourceLocation::UIntTy IsExpansion : 1;
+  SourceLocation::UIntTy Loaded : 1;
   union {
 FileInfo File;
 ExpansionInfo Expansion;
@@ -489,6 +490,8 @@ class SLocEntry {
 
   bool isExpansion() const { return IsExpansion; }
   bool isFile() const { return !isExpansion(); }
+  [[nodiscard]] bool isLoaded() const { return Loaded; }
+  void setLoaded(bool Value) { Loaded = Value; }
 
   const FileInfo () const {
 assert(isFile() && "Not a file SLocEntry!");
@@ -716,13 +719,7 @@ class SourceManager : public RefCountedBase 
{
   /// The highest possible offset is 2^31-1 (2^63-1 for 64-bit source
   /// locations), so CurrentLoadedOffset starts at 2^31 (2^63 resp.).
   static const SourceLocation::UIntTy MaxLoadedOffset =
-  1ULL << (8 * sizeof(SourceLocation::UIntTy) - 1);
-
-  /// A bitmap that indicates whether the entries of LoadedSLocEntryTable
-  /// have already been loaded from the external source.
-  ///
-  /// Same indexing as LoadedSLocEntryTable.
-  llvm::BitVector SLocEntryLoaded;
+  1ULL << (8 * sizeof(SourceLocation::UIntTy) - 2);
 
   /// An external source for source location entries.
   ExternalSLocEntrySource *ExternalSLocEntries = nullptr;
@@ -1710,7 +1707,8 @@ class SourceManager : public 
RefCountedBase {
   const SrcMgr::SLocEntry (unsigned Index,
   bool *Invalid = nullptr) const {
 assert(Index < LoadedSLocEntryTable.size() && "Invalid index");
-if (SLocEntryLoaded[Index])
+if (LoadedSLocEntryTable.isMaterialized(Index) &&
+LoadedSLocEntryTable[Index].isLoaded())
   return LoadedSLocEntryTable[Index];
 return loadSLocEntry(Index, Invalid);
   }
diff --git a/clang/lib/Basic/SourceManager.cpp 
b/clang/lib/Basic/SourceManager.cpp
index 3066cc53dbfd878..20e3f1252ddf077 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -336,7 +336,6 @@ void SourceManager::clearIDTables() {
   MainFileID = FileID();
   LocalSLocEntryTable.clear();
   LoadedSLocEntryTable.clear();
-  SLocEntryLoaded.clear();
   LastLineNoFileIDQuery = FileID();
   LastLineNoContentCache = nullptr;
   LastFileIDLookup = FileID();
@@ -373,7 +372,8 @@ void SourceManager::initializeForReplay(const SourceManager 
) {
 
   // Ensure all SLocEntries are loaded from the external source.
   for (unsigned I = 0, N = Old.LoadedSLocEntryTable.size(); I != N; ++I)
-if (!Old.SLocEntryLoaded[I])
+if (!Old.LoadedSLocEntryTable.isMaterialized(I) ||
+!Old.LoadedSLocEntryTable[I].isLoaded())
   Old.loadSLocEntry(I, nullptr);
 
   // Inherit any content cache data from the old source manager.
@@ -430,12 +430,14 @@ ContentCache ::createMemBufferContentCache(
 
 const SrcMgr::SLocEntry ::loadSLocEntry(unsigned Index,
   bool *Invalid) const {
-  assert(!SLocEntryLoaded[Index]);
+  assert(!LoadedSLocEntryTable.isMaterialized(Index) ||
+ !LoadedSLocEntryTable[Index].isLoaded());
   if (ExternalSLocEntries->ReadSLocEntry(-(static_cast(Index) + 2))) {
 if (Invalid)
   *Invalid = true;
 // If the file of the SLocEntry changed we could still have loaded it.
-if (!SLocEntryLoaded[Index]) {
+if (!LoadedSLocEntryTable.isMaterialized(Index) ||
+!LoadedSLocEntryTable[Index].isLoaded()) {
 

[clang] Avoid need for SLocEntryLoaded BitVector (PR #67960)

2023-10-02 Thread Jakub Kuderski via cfe-commits

https://github.com/kuhar edited https://github.com/llvm/llvm-project/pull/67960
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Avoid need for SLocEntryLoaded BitVector (PR #67960)

2023-10-02 Thread Jakub Kuderski via cfe-commits


@@ -180,6 +180,20 @@ TEST(PagedVectorTest, FillNonTrivialConstructor) {
   EXPECT_EQ(std::distance(V.materialized_begin(), V.materialized_end()), 10LL);
 }
 
+// Test that isMaterialized returns true for all the elements
+// of the page, not only the one that was accessed.
+TEST(PagedVectorTest, IsMaterialized) {
+  PagedVector V;
+  V.resize(20);
+  EXPECT_EQ(V.isMaterialized(0), false);

kuhar wrote:

We can Uluse `EXPECT_TRUE`/`FALSE`.

https://github.com/llvm/llvm-project/pull/67960
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Avoid need for SLocEntryLoaded BitVector (PR #67960)

2023-10-02 Thread Giulio Eulisse via cfe-commits

ktf wrote:

IIRC I saw a ~0.3 MB reduction in memory usage in my usual test. I do not see 
any performance change, although that is not particularly the focus of my 
investigation here, so I cannot exclude it (hopefully for the better).

https://github.com/llvm/llvm-project/pull/67960
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Avoid need for SLocEntryLoaded BitVector (PR #67960)

2023-10-02 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf updated https://github.com/llvm/llvm-project/pull/67960

>From 9fde224de6baa5b1fb3713d810ce835d4456b457 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+...@users.noreply.github.com>
Date: Fri, 29 Sep 2023 08:37:57 +0200
Subject: [PATCH 1/6] Avoid need for SLocEntryLoaded BitVector

The BitVector is currently used to keep track of which entries of
LoadedSLocEntryTable have been loaded. Such information can be stored in
the SLocEntry itself. Moreover, thanks to the fact that
LoadedSLocEntryTable is now a PagedVector, we can simply consider
elements of unmaterialised pages as not loaded.

This trades reducing the number of OffsetBits by one for reducing memory
usage of the SourceManager by LoadedSLocEntryTable.size()/8 bytes.
---
 clang/include/clang/Basic/SourceManager.h | 16 ++
 clang/lib/Basic/SourceManager.cpp | 27 ++-
 llvm/include/llvm/ADT/PagedVector.h   |  6 +
 3 files changed, 29 insertions(+), 20 deletions(-)

diff --git a/clang/include/clang/Basic/SourceManager.h 
b/clang/include/clang/Basic/SourceManager.h
index c1b24eec2759c71..d450280303d6785 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -474,9 +474,10 @@ static_assert(sizeof(FileInfo) <= sizeof(ExpansionInfo),
 /// SourceManager keeps an array of these objects, and they are uniquely
 /// identified by the FileID datatype.
 class SLocEntry {
-  static constexpr int OffsetBits = 8 * sizeof(SourceLocation::UIntTy) - 1;
+  static constexpr int OffsetBits = 8 * sizeof(SourceLocation::UIntTy) - 2;
   SourceLocation::UIntTy Offset : OffsetBits;
   SourceLocation::UIntTy IsExpansion : 1;
+  SourceLocation::UIntTy Loaded : 1;
   union {
 FileInfo File;
 ExpansionInfo Expansion;
@@ -489,6 +490,8 @@ class SLocEntry {
 
   bool isExpansion() const { return IsExpansion; }
   bool isFile() const { return !isExpansion(); }
+  [[nodiscard]] bool isLoaded() const { return Loaded; }
+  void setLoaded(bool Value) { Loaded = Value; }
 
   const FileInfo () const {
 assert(isFile() && "Not a file SLocEntry!");
@@ -716,13 +719,7 @@ class SourceManager : public RefCountedBase 
{
   /// The highest possible offset is 2^31-1 (2^63-1 for 64-bit source
   /// locations), so CurrentLoadedOffset starts at 2^31 (2^63 resp.).
   static const SourceLocation::UIntTy MaxLoadedOffset =
-  1ULL << (8 * sizeof(SourceLocation::UIntTy) - 1);
-
-  /// A bitmap that indicates whether the entries of LoadedSLocEntryTable
-  /// have already been loaded from the external source.
-  ///
-  /// Same indexing as LoadedSLocEntryTable.
-  llvm::BitVector SLocEntryLoaded;
+  1ULL << (8 * sizeof(SourceLocation::UIntTy) - 2);
 
   /// An external source for source location entries.
   ExternalSLocEntrySource *ExternalSLocEntries = nullptr;
@@ -1710,7 +1707,8 @@ class SourceManager : public 
RefCountedBase {
   const SrcMgr::SLocEntry (unsigned Index,
   bool *Invalid = nullptr) const {
 assert(Index < LoadedSLocEntryTable.size() && "Invalid index");
-if (SLocEntryLoaded[Index])
+if (LoadedSLocEntryTable.isMaterialized(Index) &&
+LoadedSLocEntryTable[Index].isLoaded())
   return LoadedSLocEntryTable[Index];
 return loadSLocEntry(Index, Invalid);
   }
diff --git a/clang/lib/Basic/SourceManager.cpp 
b/clang/lib/Basic/SourceManager.cpp
index 3066cc53dbfd878..20e3f1252ddf077 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -336,7 +336,6 @@ void SourceManager::clearIDTables() {
   MainFileID = FileID();
   LocalSLocEntryTable.clear();
   LoadedSLocEntryTable.clear();
-  SLocEntryLoaded.clear();
   LastLineNoFileIDQuery = FileID();
   LastLineNoContentCache = nullptr;
   LastFileIDLookup = FileID();
@@ -373,7 +372,8 @@ void SourceManager::initializeForReplay(const SourceManager 
) {
 
   // Ensure all SLocEntries are loaded from the external source.
   for (unsigned I = 0, N = Old.LoadedSLocEntryTable.size(); I != N; ++I)
-if (!Old.SLocEntryLoaded[I])
+if (!Old.LoadedSLocEntryTable.isMaterialized(I) ||
+!Old.LoadedSLocEntryTable[I].isLoaded())
   Old.loadSLocEntry(I, nullptr);
 
   // Inherit any content cache data from the old source manager.
@@ -430,12 +430,14 @@ ContentCache ::createMemBufferContentCache(
 
 const SrcMgr::SLocEntry ::loadSLocEntry(unsigned Index,
   bool *Invalid) const {
-  assert(!SLocEntryLoaded[Index]);
+  assert(!LoadedSLocEntryTable.isMaterialized(Index) ||
+ !LoadedSLocEntryTable[Index].isLoaded());
   if (ExternalSLocEntries->ReadSLocEntry(-(static_cast(Index) + 2))) {
 if (Invalid)
   *Invalid = true;
 // If the file of the SLocEntry changed we could still have loaded it.
-if (!SLocEntryLoaded[Index]) {
+if (!LoadedSLocEntryTable.isMaterialized(Index) ||
+!LoadedSLocEntryTable[Index].isLoaded()) {
 

[clang] Avoid need for SLocEntryLoaded BitVector (PR #67960)

2023-10-02 Thread Giulio Eulisse via cfe-commits


@@ -489,6 +490,8 @@ class SLocEntry {
 
   bool isExpansion() const { return IsExpansion; }
   bool isFile() const { return !isExpansion(); }
+  [[nodiscard]] bool isLoaded() const { return Loaded; }

ktf wrote:

No, I guess not. It's just my clang-tidy integration being overzealous, I guess.

```suggestion
  bool isLoaded() const { return Loaded; }
```

https://github.com/llvm/llvm-project/pull/67960
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Avoid need for SLocEntryLoaded BitVector (PR #67960)

2023-10-02 Thread Vassil Vassilev via cfe-commits

https://github.com/vgvassilev commented:

That's indeed what I was proposing. Do we observe some performance benefits 
from this PR on the downstream codebase?

https://github.com/llvm/llvm-project/pull/67960
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Avoid need for SLocEntryLoaded BitVector (PR #67960)

2023-10-02 Thread Vassil Vassilev via cfe-commits


@@ -489,6 +490,8 @@ class SLocEntry {
 
   bool isExpansion() const { return IsExpansion; }
   bool isFile() const { return !isExpansion(); }
+  [[nodiscard]] bool isLoaded() const { return Loaded; }

vgvassilev wrote:

Do we need to add `[[nodiscard]]` to the interfaces in this PR? I'd remove them.

https://github.com/llvm/llvm-project/pull/67960
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Avoid need for SLocEntryLoaded BitVector (PR #67960)

2023-10-02 Thread Vassil Vassilev via cfe-commits

https://github.com/vgvassilev edited 
https://github.com/llvm/llvm-project/pull/67960
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Avoid need for SLocEntryLoaded BitVector (PR #67960)

2023-10-02 Thread Giulio Eulisse via cfe-commits


@@ -103,6 +103,14 @@ template  
class PagedVector {
   /// Return the size of the vector.
   [[nodiscard]] size_t size() const { return Size; }
 
+  /// Return true if the element at `Index` belongs to a page which was already
+  /// materialized, i.e., had at least one element accessed.
+  [[nodiscard]] bool isMaterialized(size_t Index) const {

ktf wrote:

Done

https://github.com/llvm/llvm-project/pull/67960
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Avoid need for SLocEntryLoaded BitVector (PR #67960)

2023-10-02 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf updated https://github.com/llvm/llvm-project/pull/67960

>From 9fde224de6baa5b1fb3713d810ce835d4456b457 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+...@users.noreply.github.com>
Date: Fri, 29 Sep 2023 08:37:57 +0200
Subject: [PATCH 1/5] Avoid need for SLocEntryLoaded BitVector

The BitVector is currently used to keep track of which entries of
LoadedSLocEntryTable have been loaded. Such information can be stored in
the SLocEntry itself. Moreover, thanks to the fact that
LoadedSLocEntryTable is now a PagedVector, we can simply consider
elements of unmaterialised pages as not loaded.

This trades reducing the number of OffsetBits by one for reducing memory
usage of the SourceManager by LoadedSLocEntryTable.size()/8 bytes.
---
 clang/include/clang/Basic/SourceManager.h | 16 ++
 clang/lib/Basic/SourceManager.cpp | 27 ++-
 llvm/include/llvm/ADT/PagedVector.h   |  6 +
 3 files changed, 29 insertions(+), 20 deletions(-)

diff --git a/clang/include/clang/Basic/SourceManager.h 
b/clang/include/clang/Basic/SourceManager.h
index c1b24eec2759c71..d450280303d6785 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -474,9 +474,10 @@ static_assert(sizeof(FileInfo) <= sizeof(ExpansionInfo),
 /// SourceManager keeps an array of these objects, and they are uniquely
 /// identified by the FileID datatype.
 class SLocEntry {
-  static constexpr int OffsetBits = 8 * sizeof(SourceLocation::UIntTy) - 1;
+  static constexpr int OffsetBits = 8 * sizeof(SourceLocation::UIntTy) - 2;
   SourceLocation::UIntTy Offset : OffsetBits;
   SourceLocation::UIntTy IsExpansion : 1;
+  SourceLocation::UIntTy Loaded : 1;
   union {
 FileInfo File;
 ExpansionInfo Expansion;
@@ -489,6 +490,8 @@ class SLocEntry {
 
   bool isExpansion() const { return IsExpansion; }
   bool isFile() const { return !isExpansion(); }
+  [[nodiscard]] bool isLoaded() const { return Loaded; }
+  void setLoaded(bool Value) { Loaded = Value; }
 
   const FileInfo () const {
 assert(isFile() && "Not a file SLocEntry!");
@@ -716,13 +719,7 @@ class SourceManager : public RefCountedBase 
{
   /// The highest possible offset is 2^31-1 (2^63-1 for 64-bit source
   /// locations), so CurrentLoadedOffset starts at 2^31 (2^63 resp.).
   static const SourceLocation::UIntTy MaxLoadedOffset =
-  1ULL << (8 * sizeof(SourceLocation::UIntTy) - 1);
-
-  /// A bitmap that indicates whether the entries of LoadedSLocEntryTable
-  /// have already been loaded from the external source.
-  ///
-  /// Same indexing as LoadedSLocEntryTable.
-  llvm::BitVector SLocEntryLoaded;
+  1ULL << (8 * sizeof(SourceLocation::UIntTy) - 2);
 
   /// An external source for source location entries.
   ExternalSLocEntrySource *ExternalSLocEntries = nullptr;
@@ -1710,7 +1707,8 @@ class SourceManager : public 
RefCountedBase {
   const SrcMgr::SLocEntry (unsigned Index,
   bool *Invalid = nullptr) const {
 assert(Index < LoadedSLocEntryTable.size() && "Invalid index");
-if (SLocEntryLoaded[Index])
+if (LoadedSLocEntryTable.isMaterialized(Index) &&
+LoadedSLocEntryTable[Index].isLoaded())
   return LoadedSLocEntryTable[Index];
 return loadSLocEntry(Index, Invalid);
   }
diff --git a/clang/lib/Basic/SourceManager.cpp 
b/clang/lib/Basic/SourceManager.cpp
index 3066cc53dbfd878..20e3f1252ddf077 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -336,7 +336,6 @@ void SourceManager::clearIDTables() {
   MainFileID = FileID();
   LocalSLocEntryTable.clear();
   LoadedSLocEntryTable.clear();
-  SLocEntryLoaded.clear();
   LastLineNoFileIDQuery = FileID();
   LastLineNoContentCache = nullptr;
   LastFileIDLookup = FileID();
@@ -373,7 +372,8 @@ void SourceManager::initializeForReplay(const SourceManager 
) {
 
   // Ensure all SLocEntries are loaded from the external source.
   for (unsigned I = 0, N = Old.LoadedSLocEntryTable.size(); I != N; ++I)
-if (!Old.SLocEntryLoaded[I])
+if (!Old.LoadedSLocEntryTable.isMaterialized(I) ||
+!Old.LoadedSLocEntryTable[I].isLoaded())
   Old.loadSLocEntry(I, nullptr);
 
   // Inherit any content cache data from the old source manager.
@@ -430,12 +430,14 @@ ContentCache ::createMemBufferContentCache(
 
 const SrcMgr::SLocEntry ::loadSLocEntry(unsigned Index,
   bool *Invalid) const {
-  assert(!SLocEntryLoaded[Index]);
+  assert(!LoadedSLocEntryTable.isMaterialized(Index) ||
+ !LoadedSLocEntryTable[Index].isLoaded());
   if (ExternalSLocEntries->ReadSLocEntry(-(static_cast(Index) + 2))) {
 if (Invalid)
   *Invalid = true;
 // If the file of the SLocEntry changed we could still have loaded it.
-if (!SLocEntryLoaded[Index]) {
+if (!LoadedSLocEntryTable.isMaterialized(Index) ||
+!LoadedSLocEntryTable[Index].isLoaded()) {
 

[clang] Avoid need for SLocEntryLoaded BitVector (PR #67960)

2023-10-02 Thread Vassil Vassilev via cfe-commits


@@ -103,6 +103,14 @@ template  
class PagedVector {
   /// Return the size of the vector.
   [[nodiscard]] size_t size() const { return Size; }
 
+  /// Return true if the element at `Index` belongs to a page which was already
+  /// materialized, i.e., had at least one element accessed.
+  [[nodiscard]] bool isMaterialized(size_t Index) const {

vgvassilev wrote:

We should add a test case for this function.

https://github.com/llvm/llvm-project/pull/67960
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Avoid need for SLocEntryLoaded BitVector (PR #67960)

2023-10-02 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf updated https://github.com/llvm/llvm-project/pull/67960

>From 9fde224de6baa5b1fb3713d810ce835d4456b457 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+...@users.noreply.github.com>
Date: Fri, 29 Sep 2023 08:37:57 +0200
Subject: [PATCH 1/4] Avoid need for SLocEntryLoaded BitVector

The BitVector is currently used to keep track of which entries of
LoadedSLocEntryTable have been loaded. Such information can be stored in
the SLocEntry itself. Moreover, thanks to the fact that
LoadedSLocEntryTable is now a PagedVector, we can simply consider
elements of unmaterialised pages as not loaded.

This trades reducing the number of OffsetBits by one for reducing memory
usage of the SourceManager by LoadedSLocEntryTable.size()/8 bytes.
---
 clang/include/clang/Basic/SourceManager.h | 16 ++
 clang/lib/Basic/SourceManager.cpp | 27 ++-
 llvm/include/llvm/ADT/PagedVector.h   |  6 +
 3 files changed, 29 insertions(+), 20 deletions(-)

diff --git a/clang/include/clang/Basic/SourceManager.h 
b/clang/include/clang/Basic/SourceManager.h
index c1b24eec2759c71..d450280303d6785 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -474,9 +474,10 @@ static_assert(sizeof(FileInfo) <= sizeof(ExpansionInfo),
 /// SourceManager keeps an array of these objects, and they are uniquely
 /// identified by the FileID datatype.
 class SLocEntry {
-  static constexpr int OffsetBits = 8 * sizeof(SourceLocation::UIntTy) - 1;
+  static constexpr int OffsetBits = 8 * sizeof(SourceLocation::UIntTy) - 2;
   SourceLocation::UIntTy Offset : OffsetBits;
   SourceLocation::UIntTy IsExpansion : 1;
+  SourceLocation::UIntTy Loaded : 1;
   union {
 FileInfo File;
 ExpansionInfo Expansion;
@@ -489,6 +490,8 @@ class SLocEntry {
 
   bool isExpansion() const { return IsExpansion; }
   bool isFile() const { return !isExpansion(); }
+  [[nodiscard]] bool isLoaded() const { return Loaded; }
+  void setLoaded(bool Value) { Loaded = Value; }
 
   const FileInfo () const {
 assert(isFile() && "Not a file SLocEntry!");
@@ -716,13 +719,7 @@ class SourceManager : public RefCountedBase 
{
   /// The highest possible offset is 2^31-1 (2^63-1 for 64-bit source
   /// locations), so CurrentLoadedOffset starts at 2^31 (2^63 resp.).
   static const SourceLocation::UIntTy MaxLoadedOffset =
-  1ULL << (8 * sizeof(SourceLocation::UIntTy) - 1);
-
-  /// A bitmap that indicates whether the entries of LoadedSLocEntryTable
-  /// have already been loaded from the external source.
-  ///
-  /// Same indexing as LoadedSLocEntryTable.
-  llvm::BitVector SLocEntryLoaded;
+  1ULL << (8 * sizeof(SourceLocation::UIntTy) - 2);
 
   /// An external source for source location entries.
   ExternalSLocEntrySource *ExternalSLocEntries = nullptr;
@@ -1710,7 +1707,8 @@ class SourceManager : public 
RefCountedBase {
   const SrcMgr::SLocEntry (unsigned Index,
   bool *Invalid = nullptr) const {
 assert(Index < LoadedSLocEntryTable.size() && "Invalid index");
-if (SLocEntryLoaded[Index])
+if (LoadedSLocEntryTable.isMaterialized(Index) &&
+LoadedSLocEntryTable[Index].isLoaded())
   return LoadedSLocEntryTable[Index];
 return loadSLocEntry(Index, Invalid);
   }
diff --git a/clang/lib/Basic/SourceManager.cpp 
b/clang/lib/Basic/SourceManager.cpp
index 3066cc53dbfd878..20e3f1252ddf077 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -336,7 +336,6 @@ void SourceManager::clearIDTables() {
   MainFileID = FileID();
   LocalSLocEntryTable.clear();
   LoadedSLocEntryTable.clear();
-  SLocEntryLoaded.clear();
   LastLineNoFileIDQuery = FileID();
   LastLineNoContentCache = nullptr;
   LastFileIDLookup = FileID();
@@ -373,7 +372,8 @@ void SourceManager::initializeForReplay(const SourceManager 
) {
 
   // Ensure all SLocEntries are loaded from the external source.
   for (unsigned I = 0, N = Old.LoadedSLocEntryTable.size(); I != N; ++I)
-if (!Old.SLocEntryLoaded[I])
+if (!Old.LoadedSLocEntryTable.isMaterialized(I) ||
+!Old.LoadedSLocEntryTable[I].isLoaded())
   Old.loadSLocEntry(I, nullptr);
 
   // Inherit any content cache data from the old source manager.
@@ -430,12 +430,14 @@ ContentCache ::createMemBufferContentCache(
 
 const SrcMgr::SLocEntry ::loadSLocEntry(unsigned Index,
   bool *Invalid) const {
-  assert(!SLocEntryLoaded[Index]);
+  assert(!LoadedSLocEntryTable.isMaterialized(Index) ||
+ !LoadedSLocEntryTable[Index].isLoaded());
   if (ExternalSLocEntries->ReadSLocEntry(-(static_cast(Index) + 2))) {
 if (Invalid)
   *Invalid = true;
 // If the file of the SLocEntry changed we could still have loaded it.
-if (!SLocEntryLoaded[Index]) {
+if (!LoadedSLocEntryTable.isMaterialized(Index) ||
+!LoadedSLocEntryTable[Index].isLoaded()) {
 

[clang] Avoid need for SLocEntryLoaded BitVector (PR #67960)

2023-10-02 Thread Jakub Kuderski via cfe-commits


@@ -103,6 +103,14 @@ template  
class PagedVector {
   /// Return the size of the vector.
   [[nodiscard]] size_t size() const { return Size; }
 
+  /// @return true in case the element at index @a Index belongs to a page 
which
+  /// was already materialised.

kuhar wrote:

nit: This uses a different style than all the other function documentation in 
this file. For consistency, shouldn't this be something like:
```suggestion
  /// Return true if the element at `Index` belongs to a page which was already
  /// materialized, i.e., had at least one element accessed.
```

https://github.com/llvm/llvm-project/pull/67960
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Avoid need for SLocEntryLoaded BitVector (PR #67960)

2023-10-02 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf updated https://github.com/llvm/llvm-project/pull/67960

>From 9fde224de6baa5b1fb3713d810ce835d4456b457 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+...@users.noreply.github.com>
Date: Fri, 29 Sep 2023 08:37:57 +0200
Subject: [PATCH 1/3] Avoid need for SLocEntryLoaded BitVector

The BitVector is currently used to keep track of which entries of
LoadedSLocEntryTable have been loaded. Such information can be stored in
the SLocEntry itself. Moreover, thanks to the fact that
LoadedSLocEntryTable is now a PagedVector, we can simply consider
elements of unmaterialised pages as not loaded.

This trades reducing the number of OffsetBits by one for reducing memory
usage of the SourceManager by LoadedSLocEntryTable.size()/8 bytes.
---
 clang/include/clang/Basic/SourceManager.h | 16 ++
 clang/lib/Basic/SourceManager.cpp | 27 ++-
 llvm/include/llvm/ADT/PagedVector.h   |  6 +
 3 files changed, 29 insertions(+), 20 deletions(-)

diff --git a/clang/include/clang/Basic/SourceManager.h 
b/clang/include/clang/Basic/SourceManager.h
index c1b24eec2759c71..d450280303d6785 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -474,9 +474,10 @@ static_assert(sizeof(FileInfo) <= sizeof(ExpansionInfo),
 /// SourceManager keeps an array of these objects, and they are uniquely
 /// identified by the FileID datatype.
 class SLocEntry {
-  static constexpr int OffsetBits = 8 * sizeof(SourceLocation::UIntTy) - 1;
+  static constexpr int OffsetBits = 8 * sizeof(SourceLocation::UIntTy) - 2;
   SourceLocation::UIntTy Offset : OffsetBits;
   SourceLocation::UIntTy IsExpansion : 1;
+  SourceLocation::UIntTy Loaded : 1;
   union {
 FileInfo File;
 ExpansionInfo Expansion;
@@ -489,6 +490,8 @@ class SLocEntry {
 
   bool isExpansion() const { return IsExpansion; }
   bool isFile() const { return !isExpansion(); }
+  [[nodiscard]] bool isLoaded() const { return Loaded; }
+  void setLoaded(bool Value) { Loaded = Value; }
 
   const FileInfo () const {
 assert(isFile() && "Not a file SLocEntry!");
@@ -716,13 +719,7 @@ class SourceManager : public RefCountedBase 
{
   /// The highest possible offset is 2^31-1 (2^63-1 for 64-bit source
   /// locations), so CurrentLoadedOffset starts at 2^31 (2^63 resp.).
   static const SourceLocation::UIntTy MaxLoadedOffset =
-  1ULL << (8 * sizeof(SourceLocation::UIntTy) - 1);
-
-  /// A bitmap that indicates whether the entries of LoadedSLocEntryTable
-  /// have already been loaded from the external source.
-  ///
-  /// Same indexing as LoadedSLocEntryTable.
-  llvm::BitVector SLocEntryLoaded;
+  1ULL << (8 * sizeof(SourceLocation::UIntTy) - 2);
 
   /// An external source for source location entries.
   ExternalSLocEntrySource *ExternalSLocEntries = nullptr;
@@ -1710,7 +1707,8 @@ class SourceManager : public 
RefCountedBase {
   const SrcMgr::SLocEntry (unsigned Index,
   bool *Invalid = nullptr) const {
 assert(Index < LoadedSLocEntryTable.size() && "Invalid index");
-if (SLocEntryLoaded[Index])
+if (LoadedSLocEntryTable.isMaterialized(Index) &&
+LoadedSLocEntryTable[Index].isLoaded())
   return LoadedSLocEntryTable[Index];
 return loadSLocEntry(Index, Invalid);
   }
diff --git a/clang/lib/Basic/SourceManager.cpp 
b/clang/lib/Basic/SourceManager.cpp
index 3066cc53dbfd878..20e3f1252ddf077 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -336,7 +336,6 @@ void SourceManager::clearIDTables() {
   MainFileID = FileID();
   LocalSLocEntryTable.clear();
   LoadedSLocEntryTable.clear();
-  SLocEntryLoaded.clear();
   LastLineNoFileIDQuery = FileID();
   LastLineNoContentCache = nullptr;
   LastFileIDLookup = FileID();
@@ -373,7 +372,8 @@ void SourceManager::initializeForReplay(const SourceManager 
) {
 
   // Ensure all SLocEntries are loaded from the external source.
   for (unsigned I = 0, N = Old.LoadedSLocEntryTable.size(); I != N; ++I)
-if (!Old.SLocEntryLoaded[I])
+if (!Old.LoadedSLocEntryTable.isMaterialized(I) ||
+!Old.LoadedSLocEntryTable[I].isLoaded())
   Old.loadSLocEntry(I, nullptr);
 
   // Inherit any content cache data from the old source manager.
@@ -430,12 +430,14 @@ ContentCache ::createMemBufferContentCache(
 
 const SrcMgr::SLocEntry ::loadSLocEntry(unsigned Index,
   bool *Invalid) const {
-  assert(!SLocEntryLoaded[Index]);
+  assert(!LoadedSLocEntryTable.isMaterialized(Index) ||
+ !LoadedSLocEntryTable[Index].isLoaded());
   if (ExternalSLocEntries->ReadSLocEntry(-(static_cast(Index) + 2))) {
 if (Invalid)
   *Invalid = true;
 // If the file of the SLocEntry changed we could still have loaded it.
-if (!SLocEntryLoaded[Index]) {
+if (!LoadedSLocEntryTable.isMaterialized(Index) ||
+!LoadedSLocEntryTable[Index].isLoaded()) {
 

[clang] Avoid need for SLocEntryLoaded BitVector (PR #67960)

2023-10-02 Thread Giulio Eulisse via cfe-commits


@@ -103,6 +103,12 @@ template  
class PagedVector {
   /// Return the size of the vector.
   [[nodiscard]] size_t size() const { return Size; }
 
+  [[nodiscard]] bool isMaterialized(size_t Index) const {

ktf wrote:

```suggestion
  /// @return true in case the element at index @a Index belongs to a page which
  /// was already materialised.
  [[nodiscard]] bool isMaterialized(size_t Index) const {
```

https://github.com/llvm/llvm-project/pull/67960
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Avoid need for SLocEntryLoaded BitVector (PR #67960)

2023-10-02 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf updated https://github.com/llvm/llvm-project/pull/67960

>From 9fde224de6baa5b1fb3713d810ce835d4456b457 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+...@users.noreply.github.com>
Date: Fri, 29 Sep 2023 08:37:57 +0200
Subject: [PATCH 1/2] Avoid need for SLocEntryLoaded BitVector

The BitVector is currently used to keep track of which entries of
LoadedSLocEntryTable have been loaded. Such information can be stored in
the SLocEntry itself. Moreover, thanks to the fact that
LoadedSLocEntryTable is now a PagedVector, we can simply consider
elements of unmaterialised pages as not loaded.

This trades reducing the number of OffsetBits by one for reducing memory
usage of the SourceManager by LoadedSLocEntryTable.size()/8 bytes.
---
 clang/include/clang/Basic/SourceManager.h | 16 ++
 clang/lib/Basic/SourceManager.cpp | 27 ++-
 llvm/include/llvm/ADT/PagedVector.h   |  6 +
 3 files changed, 29 insertions(+), 20 deletions(-)

diff --git a/clang/include/clang/Basic/SourceManager.h 
b/clang/include/clang/Basic/SourceManager.h
index c1b24eec2759c71..d450280303d6785 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -474,9 +474,10 @@ static_assert(sizeof(FileInfo) <= sizeof(ExpansionInfo),
 /// SourceManager keeps an array of these objects, and they are uniquely
 /// identified by the FileID datatype.
 class SLocEntry {
-  static constexpr int OffsetBits = 8 * sizeof(SourceLocation::UIntTy) - 1;
+  static constexpr int OffsetBits = 8 * sizeof(SourceLocation::UIntTy) - 2;
   SourceLocation::UIntTy Offset : OffsetBits;
   SourceLocation::UIntTy IsExpansion : 1;
+  SourceLocation::UIntTy Loaded : 1;
   union {
 FileInfo File;
 ExpansionInfo Expansion;
@@ -489,6 +490,8 @@ class SLocEntry {
 
   bool isExpansion() const { return IsExpansion; }
   bool isFile() const { return !isExpansion(); }
+  [[nodiscard]] bool isLoaded() const { return Loaded; }
+  void setLoaded(bool Value) { Loaded = Value; }
 
   const FileInfo () const {
 assert(isFile() && "Not a file SLocEntry!");
@@ -716,13 +719,7 @@ class SourceManager : public RefCountedBase 
{
   /// The highest possible offset is 2^31-1 (2^63-1 for 64-bit source
   /// locations), so CurrentLoadedOffset starts at 2^31 (2^63 resp.).
   static const SourceLocation::UIntTy MaxLoadedOffset =
-  1ULL << (8 * sizeof(SourceLocation::UIntTy) - 1);
-
-  /// A bitmap that indicates whether the entries of LoadedSLocEntryTable
-  /// have already been loaded from the external source.
-  ///
-  /// Same indexing as LoadedSLocEntryTable.
-  llvm::BitVector SLocEntryLoaded;
+  1ULL << (8 * sizeof(SourceLocation::UIntTy) - 2);
 
   /// An external source for source location entries.
   ExternalSLocEntrySource *ExternalSLocEntries = nullptr;
@@ -1710,7 +1707,8 @@ class SourceManager : public 
RefCountedBase {
   const SrcMgr::SLocEntry (unsigned Index,
   bool *Invalid = nullptr) const {
 assert(Index < LoadedSLocEntryTable.size() && "Invalid index");
-if (SLocEntryLoaded[Index])
+if (LoadedSLocEntryTable.isMaterialized(Index) &&
+LoadedSLocEntryTable[Index].isLoaded())
   return LoadedSLocEntryTable[Index];
 return loadSLocEntry(Index, Invalid);
   }
diff --git a/clang/lib/Basic/SourceManager.cpp 
b/clang/lib/Basic/SourceManager.cpp
index 3066cc53dbfd878..20e3f1252ddf077 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -336,7 +336,6 @@ void SourceManager::clearIDTables() {
   MainFileID = FileID();
   LocalSLocEntryTable.clear();
   LoadedSLocEntryTable.clear();
-  SLocEntryLoaded.clear();
   LastLineNoFileIDQuery = FileID();
   LastLineNoContentCache = nullptr;
   LastFileIDLookup = FileID();
@@ -373,7 +372,8 @@ void SourceManager::initializeForReplay(const SourceManager 
) {
 
   // Ensure all SLocEntries are loaded from the external source.
   for (unsigned I = 0, N = Old.LoadedSLocEntryTable.size(); I != N; ++I)
-if (!Old.SLocEntryLoaded[I])
+if (!Old.LoadedSLocEntryTable.isMaterialized(I) ||
+!Old.LoadedSLocEntryTable[I].isLoaded())
   Old.loadSLocEntry(I, nullptr);
 
   // Inherit any content cache data from the old source manager.
@@ -430,12 +430,14 @@ ContentCache ::createMemBufferContentCache(
 
 const SrcMgr::SLocEntry ::loadSLocEntry(unsigned Index,
   bool *Invalid) const {
-  assert(!SLocEntryLoaded[Index]);
+  assert(!LoadedSLocEntryTable.isMaterialized(Index) ||
+ !LoadedSLocEntryTable[Index].isLoaded());
   if (ExternalSLocEntries->ReadSLocEntry(-(static_cast(Index) + 2))) {
 if (Invalid)
   *Invalid = true;
 // If the file of the SLocEntry changed we could still have loaded it.
-if (!SLocEntryLoaded[Index]) {
+if (!LoadedSLocEntryTable.isMaterialized(Index) ||
+!LoadedSLocEntryTable[Index].isLoaded()) {
 

[clang] Avoid need for SLocEntryLoaded BitVector (PR #67960)

2023-10-02 Thread Jakub Kuderski via cfe-commits


@@ -103,6 +103,12 @@ template  
class PagedVector {
   /// Return the size of the vector.
   [[nodiscard]] size_t size() const { return Size; }
 
+  [[nodiscard]] bool isMaterialized(size_t Index) const {

kuhar wrote:

Could you add a documentation comment above? This is not a standard function 
found in most containers so it would be nice to introduce it properly.

https://github.com/llvm/llvm-project/pull/67960
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Avoid need for SLocEntryLoaded BitVector (PR #67960)

2023-10-02 Thread Jakub Kuderski via cfe-commits


@@ -103,6 +103,12 @@ template  
class PagedVector {
   /// Return the size of the vector.
   [[nodiscard]] size_t size() const { return Size; }
 
+  [[nodiscard]] bool isMaterialized(size_t Index) const {
+assert(Index < Size);
+assert(Index / PageSize < PageToDataPtrs.size());
+return PageToDataPtrs[Index / PageSize] != nullptr;

kuhar wrote:

```suggestion
return PageToDataPtrs[Index / PageSize];
```

https://github.com/llvm/llvm-project/pull/67960
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Avoid need for SLocEntryLoaded BitVector (PR #67960)

2023-10-02 Thread Giulio Eulisse via cfe-commits

ktf wrote:

@vgvassilev IIUC, this is what you were proposing in some comment in 
https://github.com/llvm/llvm-project/pull/66430.

https://github.com/llvm/llvm-project/pull/67960
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Avoid need for SLocEntryLoaded BitVector (PR #67960)

2023-10-02 Thread via cfe-commits

llvmbot wrote:




@llvm/pr-subscribers-clang


Changes

The BitVector is currently used to keep track of which entries of 
LoadedSLocEntryTable have been loaded. Such information can be stored in the 
SLocEntry itself. Moreover, thanks to the fact that LoadedSLocEntryTable is now 
a PagedVector, we can simply consider elements of unmaterialised pages as not 
loaded.

This trades reducing the number of OffsetBits by one for reducing memory usage 
of the SourceManager by LoadedSLocEntryTable.size()/8 bytes.

---
Full diff: https://github.com/llvm/llvm-project/pull/67960.diff


3 Files Affected:

- (modified) clang/include/clang/Basic/SourceManager.h (+7-9) 
- (modified) clang/lib/Basic/SourceManager.cpp (+16-11) 
- (modified) llvm/include/llvm/ADT/PagedVector.h (+6) 


``diff
diff --git a/clang/include/clang/Basic/SourceManager.h 
b/clang/include/clang/Basic/SourceManager.h
index c1b24eec2759c71..d450280303d6785 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -474,9 +474,10 @@ static_assert(sizeof(FileInfo) <= sizeof(ExpansionInfo),
 /// SourceManager keeps an array of these objects, and they are uniquely
 /// identified by the FileID datatype.
 class SLocEntry {
-  static constexpr int OffsetBits = 8 * sizeof(SourceLocation::UIntTy) - 1;
+  static constexpr int OffsetBits = 8 * sizeof(SourceLocation::UIntTy) - 2;
   SourceLocation::UIntTy Offset : OffsetBits;
   SourceLocation::UIntTy IsExpansion : 1;
+  SourceLocation::UIntTy Loaded : 1;
   union {
 FileInfo File;
 ExpansionInfo Expansion;
@@ -489,6 +490,8 @@ class SLocEntry {
 
   bool isExpansion() const { return IsExpansion; }
   bool isFile() const { return !isExpansion(); }
+  [[nodiscard]] bool isLoaded() const { return Loaded; }
+  void setLoaded(bool Value) { Loaded = Value; }
 
   const FileInfo () const {
 assert(isFile() && "Not a file SLocEntry!");
@@ -716,13 +719,7 @@ class SourceManager : public RefCountedBase 
{
   /// The highest possible offset is 2^31-1 (2^63-1 for 64-bit source
   /// locations), so CurrentLoadedOffset starts at 2^31 (2^63 resp.).
   static const SourceLocation::UIntTy MaxLoadedOffset =
-  1ULL << (8 * sizeof(SourceLocation::UIntTy) - 1);
-
-  /// A bitmap that indicates whether the entries of LoadedSLocEntryTable
-  /// have already been loaded from the external source.
-  ///
-  /// Same indexing as LoadedSLocEntryTable.
-  llvm::BitVector SLocEntryLoaded;
+  1ULL << (8 * sizeof(SourceLocation::UIntTy) - 2);
 
   /// An external source for source location entries.
   ExternalSLocEntrySource *ExternalSLocEntries = nullptr;
@@ -1710,7 +1707,8 @@ class SourceManager : public 
RefCountedBase {
   const SrcMgr::SLocEntry (unsigned Index,
   bool *Invalid = nullptr) const {
 assert(Index < LoadedSLocEntryTable.size() && "Invalid index");
-if (SLocEntryLoaded[Index])
+if (LoadedSLocEntryTable.isMaterialized(Index) &&
+LoadedSLocEntryTable[Index].isLoaded())
   return LoadedSLocEntryTable[Index];
 return loadSLocEntry(Index, Invalid);
   }
diff --git a/clang/lib/Basic/SourceManager.cpp 
b/clang/lib/Basic/SourceManager.cpp
index 3066cc53dbfd878..20e3f1252ddf077 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -336,7 +336,6 @@ void SourceManager::clearIDTables() {
   MainFileID = FileID();
   LocalSLocEntryTable.clear();
   LoadedSLocEntryTable.clear();
-  SLocEntryLoaded.clear();
   LastLineNoFileIDQuery = FileID();
   LastLineNoContentCache = nullptr;
   LastFileIDLookup = FileID();
@@ -373,7 +372,8 @@ void SourceManager::initializeForReplay(const SourceManager 
) {
 
   // Ensure all SLocEntries are loaded from the external source.
   for (unsigned I = 0, N = Old.LoadedSLocEntryTable.size(); I != N; ++I)
-if (!Old.SLocEntryLoaded[I])
+if (!Old.LoadedSLocEntryTable.isMaterialized(I) ||
+!Old.LoadedSLocEntryTable[I].isLoaded())
   Old.loadSLocEntry(I, nullptr);
 
   // Inherit any content cache data from the old source manager.
@@ -430,12 +430,14 @@ ContentCache ::createMemBufferContentCache(
 
 const SrcMgr::SLocEntry ::loadSLocEntry(unsigned Index,
   bool *Invalid) const {
-  assert(!SLocEntryLoaded[Index]);
+  assert(!LoadedSLocEntryTable.isMaterialized(Index) ||
+ !LoadedSLocEntryTable[Index].isLoaded());
   if (ExternalSLocEntries->ReadSLocEntry(-(static_cast(Index) + 2))) {
 if (Invalid)
   *Invalid = true;
 // If the file of the SLocEntry changed we could still have loaded it.
-if (!SLocEntryLoaded[Index]) {
+if (!LoadedSLocEntryTable.isMaterialized(Index) ||
+!LoadedSLocEntryTable[Index].isLoaded()) {
   // Try to recover; create a SLocEntry so the rest of clang can handle it.
   if (!FakeSLocEntryForRecovery)
 FakeSLocEntryForRecovery = std::make_unique(SLocEntry::get(
@@ -458,7 +460,6 @@ 

[clang] Avoid need for SLocEntryLoaded BitVector (PR #67960)

2023-10-02 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf created https://github.com/llvm/llvm-project/pull/67960

The BitVector is currently used to keep track of which entries of 
LoadedSLocEntryTable have been loaded. Such information can be stored in the 
SLocEntry itself. Moreover, thanks to the fact that LoadedSLocEntryTable is now 
a PagedVector, we can simply consider elements of unmaterialised pages as not 
loaded.

This trades reducing the number of OffsetBits by one for reducing memory usage 
of the SourceManager by LoadedSLocEntryTable.size()/8 bytes.

>From 9fde224de6baa5b1fb3713d810ce835d4456b457 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+...@users.noreply.github.com>
Date: Fri, 29 Sep 2023 08:37:57 +0200
Subject: [PATCH] Avoid need for SLocEntryLoaded BitVector

The BitVector is currently used to keep track of which entries of
LoadedSLocEntryTable have been loaded. Such information can be stored in
the SLocEntry itself. Moreover, thanks to the fact that
LoadedSLocEntryTable is now a PagedVector, we can simply consider
elements of unmaterialised pages as not loaded.

This trades reducing the number of OffsetBits by one for reducing memory
usage of the SourceManager by LoadedSLocEntryTable.size()/8 bytes.
---
 clang/include/clang/Basic/SourceManager.h | 16 ++
 clang/lib/Basic/SourceManager.cpp | 27 ++-
 llvm/include/llvm/ADT/PagedVector.h   |  6 +
 3 files changed, 29 insertions(+), 20 deletions(-)

diff --git a/clang/include/clang/Basic/SourceManager.h 
b/clang/include/clang/Basic/SourceManager.h
index c1b24eec2759c71..d450280303d6785 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -474,9 +474,10 @@ static_assert(sizeof(FileInfo) <= sizeof(ExpansionInfo),
 /// SourceManager keeps an array of these objects, and they are uniquely
 /// identified by the FileID datatype.
 class SLocEntry {
-  static constexpr int OffsetBits = 8 * sizeof(SourceLocation::UIntTy) - 1;
+  static constexpr int OffsetBits = 8 * sizeof(SourceLocation::UIntTy) - 2;
   SourceLocation::UIntTy Offset : OffsetBits;
   SourceLocation::UIntTy IsExpansion : 1;
+  SourceLocation::UIntTy Loaded : 1;
   union {
 FileInfo File;
 ExpansionInfo Expansion;
@@ -489,6 +490,8 @@ class SLocEntry {
 
   bool isExpansion() const { return IsExpansion; }
   bool isFile() const { return !isExpansion(); }
+  [[nodiscard]] bool isLoaded() const { return Loaded; }
+  void setLoaded(bool Value) { Loaded = Value; }
 
   const FileInfo () const {
 assert(isFile() && "Not a file SLocEntry!");
@@ -716,13 +719,7 @@ class SourceManager : public RefCountedBase 
{
   /// The highest possible offset is 2^31-1 (2^63-1 for 64-bit source
   /// locations), so CurrentLoadedOffset starts at 2^31 (2^63 resp.).
   static const SourceLocation::UIntTy MaxLoadedOffset =
-  1ULL << (8 * sizeof(SourceLocation::UIntTy) - 1);
-
-  /// A bitmap that indicates whether the entries of LoadedSLocEntryTable
-  /// have already been loaded from the external source.
-  ///
-  /// Same indexing as LoadedSLocEntryTable.
-  llvm::BitVector SLocEntryLoaded;
+  1ULL << (8 * sizeof(SourceLocation::UIntTy) - 2);
 
   /// An external source for source location entries.
   ExternalSLocEntrySource *ExternalSLocEntries = nullptr;
@@ -1710,7 +1707,8 @@ class SourceManager : public 
RefCountedBase {
   const SrcMgr::SLocEntry (unsigned Index,
   bool *Invalid = nullptr) const {
 assert(Index < LoadedSLocEntryTable.size() && "Invalid index");
-if (SLocEntryLoaded[Index])
+if (LoadedSLocEntryTable.isMaterialized(Index) &&
+LoadedSLocEntryTable[Index].isLoaded())
   return LoadedSLocEntryTable[Index];
 return loadSLocEntry(Index, Invalid);
   }
diff --git a/clang/lib/Basic/SourceManager.cpp 
b/clang/lib/Basic/SourceManager.cpp
index 3066cc53dbfd878..20e3f1252ddf077 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -336,7 +336,6 @@ void SourceManager::clearIDTables() {
   MainFileID = FileID();
   LocalSLocEntryTable.clear();
   LoadedSLocEntryTable.clear();
-  SLocEntryLoaded.clear();
   LastLineNoFileIDQuery = FileID();
   LastLineNoContentCache = nullptr;
   LastFileIDLookup = FileID();
@@ -373,7 +372,8 @@ void SourceManager::initializeForReplay(const SourceManager 
) {
 
   // Ensure all SLocEntries are loaded from the external source.
   for (unsigned I = 0, N = Old.LoadedSLocEntryTable.size(); I != N; ++I)
-if (!Old.SLocEntryLoaded[I])
+if (!Old.LoadedSLocEntryTable.isMaterialized(I) ||
+!Old.LoadedSLocEntryTable[I].isLoaded())
   Old.loadSLocEntry(I, nullptr);
 
   // Inherit any content cache data from the old source manager.
@@ -430,12 +430,14 @@ ContentCache ::createMemBufferContentCache(
 
 const SrcMgr::SLocEntry ::loadSLocEntry(unsigned Index,
   bool *Invalid) const {
-