kbobyrev updated this revision to Diff 163523.
kbobyrev added a comment.

Resolved the issues. Measurements show that static Dex index for LLVM takes 
~140 MB,  ~80 MB of which is the size of `SymbolSlab`.


https://reviews.llvm.org/D51539

Files:
  clang-tools-extra/clangd/index/FileIndex.cpp
  clang-tools-extra/clangd/index/FileIndex.h
  clang-tools-extra/clangd/index/MemIndex.cpp
  clang-tools-extra/clangd/index/MemIndex.h
  clang-tools-extra/clangd/index/dex/DexIndex.cpp
  clang-tools-extra/clangd/index/dex/DexIndex.h

Index: clang-tools-extra/clangd/index/dex/DexIndex.h
===================================================================
--- clang-tools-extra/clangd/index/dex/DexIndex.h
+++ clang-tools-extra/clangd/index/dex/DexIndex.h
@@ -41,7 +41,8 @@
 public:
   /// \brief (Re-)Build index for `Symbols`. All symbol pointers must remain
   /// accessible as long as `Symbols` is kept alive.
-  void build(std::shared_ptr<std::vector<const Symbol *>> Syms);
+  void build(std::shared_ptr<std::vector<const Symbol *>> Syms,
+             size_t SlabSize=0);
 
   /// \brief Build index from a symbol slab.
   static std::unique_ptr<SymbolIndex> build(SymbolSlab Slab);
@@ -73,6 +74,12 @@
   // Inverted index is used to retrieve posting lists which are processed during
   // the fuzzyFind process.
   llvm::DenseMap<Token, PostingList> InvertedIndex /*GUARDED_BY(Mutex)*/;
+
+  // Stores size of the symbol slab which is referred by the Symbols stored in
+  // Dex. This is necessary for memory consumption estimation, because Dex only
+  // stores symbols and the actual link to the underlying symbol slab is lost as
+  // soon as it is constructed.
+  size_t PairedSlabSize;
 };
 
 } // namespace dex
Index: clang-tools-extra/clangd/index/dex/DexIndex.cpp
===================================================================
--- clang-tools-extra/clangd/index/dex/DexIndex.cpp
+++ clang-tools-extra/clangd/index/dex/DexIndex.cpp
@@ -36,7 +36,8 @@
 
 } // namespace
 
-void DexIndex::build(std::shared_ptr<std::vector<const Symbol *>> Syms) {
+void DexIndex::build(std::shared_ptr<std::vector<const Symbol *>> Syms,
+                     size_t SlabSize) {
   llvm::DenseMap<SymbolID, const Symbol *> TempLookupTable;
   llvm::DenseMap<const Symbol *, float> TempSymbolQuality;
   for (const Symbol *Sym : *Syms) {
@@ -66,15 +67,17 @@
     Symbols = std::move(Syms);
     InvertedIndex = std::move(TempInvertedIndex);
     SymbolQuality = std::move(TempSymbolQuality);
+    PairedSlabSize = SlabSize;
   }
 
   vlog("Built DexIndex with estimated memory usage {0} bytes.",
        estimateMemoryUsage());
 }
 
 std::unique_ptr<SymbolIndex> DexIndex::build(SymbolSlab Slab) {
   auto Idx = llvm::make_unique<DexIndex>();
-  Idx->build(getSymbolsFromSlab(std::move(Slab)));
+  size_t SlabSize = Slab.bytes();
+  Idx->build(getSymbolsFromSlab(std::move(Slab)), SlabSize);
   return std::move(Idx);
 }
 
@@ -177,14 +180,15 @@
 size_t DexIndex::estimateMemoryUsage() const {
   std::lock_guard<std::mutex> Lock(Mutex);
 
-  size_t Bytes =
-      LookupTable.size() * sizeof(std::pair<SymbolID, const Symbol *>);
+  size_t Bytes = PairedSlabSize;
+  Bytes += LookupTable.size() * sizeof(std::pair<SymbolID, const Symbol *>);
   Bytes += SymbolQuality.size() * sizeof(std::pair<const Symbol *, float>);
   Bytes += InvertedIndex.size() * sizeof(Token);
 
   for (const auto &P : InvertedIndex) {
     Bytes += P.second.size() * sizeof(DocID);
   }
+
   return Bytes;
 }
 
Index: clang-tools-extra/clangd/index/MemIndex.h
===================================================================
--- clang-tools-extra/clangd/index/MemIndex.h
+++ clang-tools-extra/clangd/index/MemIndex.h
@@ -22,7 +22,8 @@
 public:
   /// \brief (Re-)Build index for `Symbols`. All symbol pointers must remain
   /// accessible as long as `Symbols` is kept alive.
-  void build(std::shared_ptr<std::vector<const Symbol *>> Symbols);
+  void build(std::shared_ptr<std::vector<const Symbol *>> Symbols,
+             size_t SlabSize=0);
 
   /// \brief Build index from a symbol slab.
   static std::unique_ptr<SymbolIndex> build(SymbolSlab Slab);
@@ -47,6 +48,13 @@
   // Index is a set of symbols that are deduplicated by symbol IDs.
   // FIXME: build smarter index structure.
   llvm::DenseMap<SymbolID, const Symbol *> Index;
+
+  // Stores size of the symbol slab which is referred by the Symbols stored in
+  // Mem. This is necessary for memory consumption estimation, because Mem only
+  // stores symbols and the actual link to the underlying symbol slab is lost as
+  // soon as it is constructed.
+  size_t PairedSlabSize;
+
   mutable std::mutex Mutex;
 };
 
Index: clang-tools-extra/clangd/index/MemIndex.cpp
===================================================================
--- clang-tools-extra/clangd/index/MemIndex.cpp
+++ clang-tools-extra/clangd/index/MemIndex.cpp
@@ -15,7 +15,8 @@
 namespace clang {
 namespace clangd {
 
-void MemIndex::build(std::shared_ptr<std::vector<const Symbol *>> Syms) {
+void MemIndex::build(std::shared_ptr<std::vector<const Symbol *>> Syms,
+                     size_t SlabSize) {
   llvm::DenseMap<SymbolID, const Symbol *> TempIndex;
   for (const Symbol *Sym : *Syms)
     TempIndex[Sym->ID] = Sym;
@@ -25,15 +26,17 @@
     std::lock_guard<std::mutex> Lock(Mutex);
     Index = std::move(TempIndex);
     Symbols = std::move(Syms); // Relase old symbols.
+    PairedSlabSize = SlabSize;
   }
 
   vlog("Built MemIndex with estimated memory usage {0} bytes.",
        estimateMemoryUsage());
 }
 
 std::unique_ptr<SymbolIndex> MemIndex::build(SymbolSlab Slab) {
   auto Idx = llvm::make_unique<MemIndex>();
-  Idx->build(getSymbolsFromSlab(std::move(Slab)));
+  size_t SlabSize = Slab.bytes();
+  Idx->build(getSymbolsFromSlab(std::move(Slab)), SlabSize);
   return std::move(Idx);
 }
 
@@ -103,7 +106,7 @@
 
 size_t MemIndex::estimateMemoryUsage() const {
   std::lock_guard<std::mutex> Lock(Mutex);
-  return Index.getMemorySize();
+  return Index.getMemorySize() + PairedSlabSize;
 }
 
 } // namespace clangd
Index: clang-tools-extra/clangd/index/FileIndex.h
===================================================================
--- clang-tools-extra/clangd/index/FileIndex.h
+++ clang-tools-extra/clangd/index/FileIndex.h
@@ -46,6 +46,8 @@
   // The shared_ptr keeps the symbols alive
   std::shared_ptr<std::vector<const Symbol *>> allSymbols();
 
+  size_t estimateMemoryUsage() const;
+
 private:
   mutable std::mutex Mutex;
 
Index: clang-tools-extra/clangd/index/FileIndex.cpp
===================================================================
--- clang-tools-extra/clangd/index/FileIndex.cpp
+++ clang-tools-extra/clangd/index/FileIndex.cpp
@@ -85,6 +85,13 @@
   return {std::move(Snap), Pointers};
 }
 
+size_t FileSymbols::estimateMemoryUsage() const {
+  size_t Result = 0;
+  for (const auto &P : FileToSlabs)
+    Result += P.second->size();
+  return Result;
+}
+
 void FileIndex::update(PathRef Path, ASTContext *AST,
                        std::shared_ptr<Preprocessor> PP,
                        llvm::Optional<llvm::ArrayRef<Decl *>> TopLevelDecls) {
@@ -119,7 +126,7 @@
 }
 
 size_t FileIndex::estimateMemoryUsage() const {
-  return Index.estimateMemoryUsage();
+  return Index.estimateMemoryUsage() + FSymbols.estimateMemoryUsage();
 }
 
 } // namespace clangd
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to