Petr Onderka has submitted this change and it was merged.

Change subject: Improved clearing of index caches
......................................................................


Improved clearing of index caches

Change-Id: If8a035b4c4ea5b43a6d0bd8b39940d50fa261017
---
M Indexes/Index.h
M Indexes/Index.tpp
M Indexes/IndexInnerNode.h
M Indexes/IndexInnerNode.tpp
M Indexes/IndexLeafNode.h
M Indexes/IndexLeafNode.tpp
M Indexes/IndexNode.h
M Indexes/IndexNode.tpp
M Indexes/Iterators/IndexInnerIterator.h
M Indexes/Iterators/IndexInnerIterator.tpp
M Indexes/Iterators/IndexIterator.h
M Indexes/Iterators/IndexIterator.tpp
M Indexes/Iterators/IndexLeafIterator.h
M Indexes/Iterators/IndexLeafIterator.tpp
M Indexes/Iterators/IndexNodeIterator.h
15 files changed, 157 insertions(+), 43 deletions(-)

Approvals:
  Petr Onderka: Verified; Looks good to me, approved



diff --git a/Indexes/Index.h b/Indexes/Index.h
index 8248081..1733340 100644
--- a/Indexes/Index.h
+++ b/Indexes/Index.h
@@ -17,9 +17,10 @@
     std::weak_ptr<WritableDump> dump;
     std::weak_ptr<Offset> fileHeaderOffset;
 
-    int recentChanges;
+    int recentAccesses;
 
     void AfterAdd();
+    void AfterAccess();
 public:
     // fileHeaderOffset is assumed to be part of dump
     Index(std::weak_ptr<WritableDump> dump, Offset* fileHeaderOffset, bool 
delaySave = false);
diff --git a/Indexes/Index.tpp b/Indexes/Index.tpp
index 2e9b80b..144060f 100644
--- a/Indexes/Index.tpp
+++ b/Indexes/Index.tpp
@@ -7,7 +7,7 @@
 
 template<typename TKey, typename TValue>
 Index<TKey, TValue>::Index(std::weak_ptr<WritableDump> dump, Offset 
*fileHeaderOffset, bool delaySave)
-    : dump(dump), fileHeaderOffset(std::shared_ptr<Offset>(dump.lock(), 
fileHeaderOffset)), recentChanges(0)
+    : dump(dump), fileHeaderOffset(std::shared_ptr<Offset>(dump.lock(), 
fileHeaderOffset)), recentAccesses(0)
 {
     rootNodeUnsaved = false;
 
@@ -34,7 +34,11 @@
 template<typename TKey, typename TValue>
 TValue Index<TKey, TValue>::Get(TKey key)
 {
-    return rootNode->Get(key);
+    auto result = rootNode->Get(key);
+
+    AfterAccess();
+
+    return result;
 }
 
 template<typename TKey, typename TValue>
@@ -63,12 +67,20 @@
         rootNodeUnsaved = false;
     }
 
-    recentChanges++;
+    AfterAccess();
+}
 
-    if (recentChanges >= 100000)
+template<typename TKey, typename TValue>
+void Index<TKey, TValue>::AfterAccess()
+{
+    recentAccesses++;
+
+    if (recentAccesses >= 10000)
     {
-        rootNode->ClearCached();
-        recentChanges = 0;
+        if (rootNode->NodesCount() >= 5000)
+            rootNode->ClearCached();
+
+        recentAccesses = 0;
     }
 }
 
diff --git a/Indexes/IndexInnerNode.h b/Indexes/IndexInnerNode.h
index 5ccc1e1..8626bd9 100644
--- a/Indexes/IndexInnerNode.h
+++ b/Indexes/IndexInnerNode.h
@@ -21,6 +21,7 @@
     std::uint16_t GetKeyIndex(TKey key);
 
     void AfterAdd(std::uint16_t updatedChildIndex);
+
 protected:
     virtual void WriteInternal() override;
 public:
@@ -41,7 +42,8 @@
     virtual std::uint32_t RealLength() override;
     virtual SplitResult Split() override;
 
-    virtual void ClearCached() override;
+    virtual std::uint32_t NodesCount() override;
+    virtual void ClearCachedInternal() override;
 
     virtual std::unique_ptr<IndexNodeIterator<TKey, TValue>> begin() override;
     virtual std::unique_ptr<IndexNodeIterator<TKey, TValue>> end() override;
diff --git a/Indexes/IndexInnerNode.tpp b/Indexes/IndexInnerNode.tpp
index 0890b2a..8eb2126 100644
--- a/Indexes/IndexInnerNode.tpp
+++ b/Indexes/IndexInnerNode.tpp
@@ -219,13 +219,31 @@
 }
 
 template<typename TKey, typename TValue>
-void IndexInnerNode<TKey, TValue>::ClearCached()
+std::uint32_t IndexInnerNode<TKey, TValue>::NodesCount()
 {
-    Write();
+    std::uint32_t result = 1;
 
-    for (unsigned i = 0; i < cachedChildren.size(); i++)
+    for (const auto& child : cachedChildren)
     {
-        cachedChildren.at(i) = nullptr;
+        if (child != nullptr)
+            result += child->NodesCount();
+    }
+
+    return result;
+}
+
+template<typename TKey, typename TValue>
+void IndexInnerNode<TKey, TValue>::ClearCachedInternal()
+{
+    for (auto &child : cachedChildren)
+    {
+        if (child != nullptr)
+        {
+            if (child->CanBeCleared())
+                child = nullptr;
+            else
+                child->ClearCachedInternal();
+        }
     }
 }
 
@@ -239,4 +257,4 @@
 std::unique_ptr<IndexNodeIterator<TKey, TValue>> IndexInnerNode<TKey, 
TValue>::end()
 {
     return std::unique_ptr<IndexNodeIterator<TKey, TValue>>(new 
IndexInnerIterator<TKey, TValue>(this, false));
-}
+}
\ No newline at end of file
diff --git a/Indexes/IndexLeafNode.h b/Indexes/IndexLeafNode.h
index ad069ac..78c74ef 100644
--- a/Indexes/IndexLeafNode.h
+++ b/Indexes/IndexLeafNode.h
@@ -3,18 +3,19 @@
 #include <map>
 #include "IndexNode.h"
 
-using std::map;
-
 template<typename TKey, typename TValue>
 class IndexLeafNode : public IndexNode<TKey, TValue>
 {
+    template<typename TIndexKey, typename TIndexValue>
+    friend class IndexLeafIterator;
+
     using typename IndexNode<TKey, TValue>::SplitResult;
 private:
-    map<TKey, TValue> indexMap;
+    std::map<TKey, TValue> indexMap;
 protected:
     virtual void WriteInternal() override;
 public:
-    static unique_ptr<IndexNode<TKey, TValue>> 
Read(std::weak_ptr<WritableDump> dump, std::istream &stream);
+    static std::unique_ptr<IndexNode<TKey, TValue>> 
Read(std::weak_ptr<WritableDump> dump, std::istream &stream);
 
     IndexLeafNode(std::weak_ptr<WritableDump> dump);
 
@@ -32,10 +33,11 @@
     virtual std::uint32_t RealLength() override;
     virtual SplitResult Split() override;
 
-    virtual void ClearCached() override;
+    virtual std::uint32_t NodesCount() override;
+    virtual void ClearCachedInternal() override;
 
-    virtual unique_ptr<IndexNodeIterator<TKey, TValue>> begin() override;
-    virtual unique_ptr<IndexNodeIterator<TKey, TValue>> end() override;
+    virtual std::unique_ptr<IndexNodeIterator<TKey, TValue>> begin() override;
+    virtual std::unique_ptr<IndexNodeIterator<TKey, TValue>> end() override;
 };
 
 #include "IndexLeafNode.tpp"
diff --git a/Indexes/IndexLeafNode.tpp b/Indexes/IndexLeafNode.tpp
index 2f373ed..099c0fe 100644
--- a/Indexes/IndexLeafNode.tpp
+++ b/Indexes/IndexLeafNode.tpp
@@ -115,17 +115,23 @@
 }
 
 template<typename TKey, typename TValue>
-void IndexLeafNode<TKey, TValue>::ClearCached()
+std::uint32_t IndexLeafNode<TKey, TValue>::NodesCount()
+{
+    return 1;
+}
+
+template<typename TKey, typename TValue>
+void IndexLeafNode<TKey, TValue>::ClearCachedInternal()
 {}
 
 template<typename TKey, typename TValue>
 std::unique_ptr<IndexNodeIterator<TKey, TValue>> IndexLeafNode<TKey, 
TValue>::begin()
 {
-    return std::unique_ptr<IndexNodeIterator<TKey, TValue>>(new 
IndexLeafIterator<TKey, TValue>(indexMap.begin()));
+    return std::unique_ptr<IndexNodeIterator<TKey, TValue>>(new 
IndexLeafIterator<TKey, TValue>(this, indexMap.begin()));
 }
 
 template<typename TKey, typename TValue>
 std::unique_ptr<IndexNodeIterator<TKey, TValue>> IndexLeafNode<TKey, 
TValue>::end()
 {
-    return std::unique_ptr<IndexNodeIterator<TKey, TValue>>(new 
IndexLeafIterator<TKey, TValue>(indexMap.end()));
+    return std::unique_ptr<IndexNodeIterator<TKey, TValue>>(new 
IndexLeafIterator<TKey, TValue>(this, indexMap.end()));
 }
diff --git a/Indexes/IndexNode.h b/Indexes/IndexNode.h
index ce2bec5..17cc833 100644
--- a/Indexes/IndexNode.h
+++ b/Indexes/IndexNode.h
@@ -16,6 +16,8 @@
 protected:
     static const int Size = 4096;
 
+    unsigned iterators;
+
     virtual void WriteInternal() = 0;
 public:
     struct SplitResult
@@ -54,7 +56,10 @@
     virtual std::uint32_t RealLength() = 0;
     virtual SplitResult Split() = 0;
 
-    virtual void ClearCached() = 0;
+    bool CanBeCleared();
+    virtual std::uint32_t NodesCount() = 0;
+    void ClearCached();
+    virtual void ClearCachedInternal() = 0;
 
     virtual unique_ptr<IndexNodeIterator<TKey, TValue>> begin() = 0;
     virtual unique_ptr<IndexNodeIterator<TKey, TValue>> end() = 0;
diff --git a/Indexes/IndexNode.tpp b/Indexes/IndexNode.tpp
index 9a3f7a3..32c0aab 100644
--- a/Indexes/IndexNode.tpp
+++ b/Indexes/IndexNode.tpp
@@ -7,7 +7,7 @@
 
 template<typename TKey, typename TValue>
 IndexNode<TKey, TValue>::IndexNode(std::weak_ptr<WritableDump> dump)
-    : DumpObject(dump)
+    : DumpObject(dump), iterators(0)
 {}
 
 template<typename TKey, typename TValue>
@@ -64,3 +64,17 @@
 {
     return RealLength() > NewLength();
 }
+
+template<typename TKey, typename TValue>
+bool IndexNode<TKey, TValue>::CanBeCleared()
+{
+    return iterators == 0;
+}
+
+template<typename TKey, typename TValue>
+void IndexNode<TKey, TValue>::ClearCached()
+{
+    Write();
+
+    ClearCachedInternal();
+}
\ No newline at end of file
diff --git a/Indexes/Iterators/IndexInnerIterator.h 
b/Indexes/Iterators/IndexInnerIterator.h
index bb6b659..71e1a4b 100644
--- a/Indexes/Iterators/IndexInnerIterator.h
+++ b/Indexes/Iterators/IndexInnerIterator.h
@@ -12,13 +12,17 @@
     uint16_t i;
     std::unique_ptr<IndexNodeIterator<TKey, TValue>> childIterator;
 
-    IndexInnerIterator(IndexInnerNode<TKey, TValue> *node, uint16_t i, 
std::unique_ptr<IndexNodeIterator<TKey, TValue>> childIterator);
     IndexInnerIterator(IndexInnerNode<TKey, TValue> *node, bool isBegin);
+    IndexInnerIterator(const IndexInnerIterator<TKey, TValue>& other);
 public:
     virtual const pair<TKey, TValue> operator *() const override;
     virtual IndexInnerIterator& operator ++() override;
     virtual bool Equals(const IndexNodeIterator<TKey, TValue> *other) const 
override;
     virtual std::unique_ptr<IndexNodeIterator<TKey, TValue>> Clone() const 
override;
+
+    virtual void ClearNodeCacheIfTooBig() override;
+
+    virtual ~IndexInnerIterator();
 };
 
 #include "IndexInnerIterator.tpp"
\ No newline at end of file
diff --git a/Indexes/Iterators/IndexInnerIterator.tpp 
b/Indexes/Iterators/IndexInnerIterator.tpp
index 0de3c3f..9be909b 100644
--- a/Indexes/Iterators/IndexInnerIterator.tpp
+++ b/Indexes/Iterators/IndexInnerIterator.tpp
@@ -1,20 +1,21 @@
 #include <vector>
 #include "IndexInnerIterator.h"
 
-using std::vector;
-
-template<typename TKey, typename TValue>
-IndexInnerIterator<TKey, TValue>::IndexInnerIterator(
-    IndexInnerNode<TKey, TValue> *node, uint16_t i, 
std::unique_ptr<IndexNodeIterator<TKey, TValue>> childIterator)
-    : node(node), i(i), childIterator(std::move(childIterator))
-{}
-
 template<typename TKey, typename TValue>
 IndexInnerIterator<TKey, TValue>::IndexInnerIterator(IndexInnerNode<TKey, 
TValue> *node, bool isBegin)
     : node(node),
       i(isBegin ? 0 : node->childOffsets.size()),
       childIterator(isBegin ? node->GetChildByIndex(0)->begin() : nullptr)
-{}
+{
+    node->iterators++;
+}
+
+template<typename TKey, typename TValue>
+IndexInnerIterator<TKey, TValue>::IndexInnerIterator(const 
IndexInnerIterator<TKey, TValue>& other)
+    : node(other.node), i(other.i), childIterator(other.childIterator == 
nullptr ? nullptr : other.childIterator->Clone())
+{
+    node->iterators++;
+}
 
 template<typename TKey, typename TValue>
 const pair<TKey, TValue> IndexInnerIterator<TKey, TValue>::operator *() const
@@ -63,6 +64,18 @@
 std::unique_ptr<IndexNodeIterator<TKey, TValue>> IndexInnerIterator<TKey, 
TValue>::Clone() const
 {
     return std::unique_ptr<IndexNodeIterator<TKey, TValue>>(
-        new IndexInnerIterator<TKey, TValue>(
-            node, i, childIterator == nullptr ? nullptr : 
childIterator->Clone()));
+        new IndexInnerIterator<TKey, TValue>(*this));
+}
+
+template<typename TKey, typename TValue>
+void IndexInnerIterator<TKey, TValue>::ClearNodeCacheIfTooBig()
+{
+    if (node->NodesCount() >= 5000)
+        node->ClearCached();
+}
+
+template<typename TKey, typename TValue>
+IndexInnerIterator<TKey, TValue>::~IndexInnerIterator()
+{
+    node->iterators--;
 }
\ No newline at end of file
diff --git a/Indexes/Iterators/IndexIterator.h 
b/Indexes/Iterators/IndexIterator.h
index 4d734e4..273644f 100644
--- a/Indexes/Iterators/IndexIterator.h
+++ b/Indexes/Iterators/IndexIterator.h
@@ -16,6 +16,9 @@
     friend class Index;
 private:
     std::unique_ptr<IndexNodeIterator<TKey, TValue>> nodeIterator;
+
+    std::uint32_t recentIncrements;
+
     IndexIterator(std::unique_ptr<IndexNodeIterator<TKey, TValue>> 
nodeIterator);
 public:
     IndexIterator(const IndexIterator &other);
diff --git a/Indexes/Iterators/IndexIterator.tpp 
b/Indexes/Iterators/IndexIterator.tpp
index cee9b7c..163b04a 100644
--- a/Indexes/Iterators/IndexIterator.tpp
+++ b/Indexes/Iterators/IndexIterator.tpp
@@ -4,7 +4,7 @@
 
 template<typename TKey, typename TValue>
 IndexIterator<TKey, TValue>::IndexIterator(unique_ptr<IndexNodeIterator<TKey, 
TValue>> nodeIterator)
-    : nodeIterator(std::move(nodeIterator))
+    : nodeIterator(std::move(nodeIterator)), recentIncrements(0)
 {}
 
 template<typename TKey, typename TValue>
@@ -37,6 +37,15 @@
 IndexIterator<TKey, TValue>& IndexIterator<TKey, TValue>::operator ++()
 {
     ++(*nodeIterator);
+
+    recentIncrements++;
+
+    if (recentIncrements >= 10000)
+    {
+        nodeIterator->ClearNodeCacheIfTooBig();
+        recentIncrements = 0;
+    }
+
     return *this;
 }
 
diff --git a/Indexes/Iterators/IndexLeafIterator.h 
b/Indexes/Iterators/IndexLeafIterator.h
index 4c3022a..ebba9c1 100644
--- a/Indexes/Iterators/IndexLeafIterator.h
+++ b/Indexes/Iterators/IndexLeafIterator.h
@@ -14,14 +14,20 @@
     template<typename TIndexKey, typename TIndexValue>
     friend class IndexLeafNode;
 private:
+    IndexLeafNode<TKey, TValue> *node;
     MapIterator mapIterator;
 
-    IndexLeafIterator(MapIterator mapIterator);
+    IndexLeafIterator(IndexLeafNode<TKey, TValue> *node, MapIterator 
mapIterator);
+    IndexLeafIterator(const IndexLeafIterator<TKey, TValue>& other);
 public:
     virtual const pair<TKey, TValue> operator *() const override;
     virtual IndexLeafIterator& operator ++() override;
     virtual bool Equals(const IndexNodeIterator<TKey, TValue> *other) const 
override;
     virtual std::unique_ptr<IndexNodeIterator<TKey, TValue>> Clone() const 
override;
+
+    virtual void ClearNodeCacheIfTooBig() override;
+
+    virtual ~IndexLeafIterator();
 };
 
 #include "IndexLeafIterator.tpp"
diff --git a/Indexes/Iterators/IndexLeafIterator.tpp 
b/Indexes/Iterators/IndexLeafIterator.tpp
index 343d236..59b94d2 100644
--- a/Indexes/Iterators/IndexLeafIterator.tpp
+++ b/Indexes/Iterators/IndexLeafIterator.tpp
@@ -1,12 +1,19 @@
 #include <vector>
 #include "IndexLeafIterator.h"
 
-using std::vector;
+template<typename TKey, typename TValue>
+IndexLeafIterator<TKey, TValue>::IndexLeafIterator(IndexLeafNode<TKey, TValue> 
*node, MapIterator mapIterator)
+    : node(node), mapIterator(mapIterator)
+{
+    node->iterators++;
+}
 
 template<typename TKey, typename TValue>
-IndexLeafIterator<TKey, TValue>::IndexLeafIterator(MapIterator mapIterator)
-    : mapIterator(mapIterator)
-{}
+IndexLeafIterator<TKey, TValue>::IndexLeafIterator(const 
IndexLeafIterator<TKey, TValue>& other)
+    : node(other.node), mapIterator(other.mapIterator)
+{
+    node->iterators++;
+}
 
 template<typename TKey, typename TValue>
 const pair<TKey, TValue> IndexLeafIterator<TKey, TValue>::operator *() const
@@ -35,4 +42,14 @@
 std::unique_ptr<IndexNodeIterator<TKey, TValue>> IndexLeafIterator<TKey, 
TValue>::Clone() const
 {
     return std::unique_ptr<IndexNodeIterator<TKey, TValue>>(new 
IndexLeafIterator<TKey, TValue>(*this));
+}
+
+template<typename TKey, typename TValue>
+void IndexLeafIterator<TKey, TValue>::ClearNodeCacheIfTooBig()
+{}
+
+template<typename TKey, typename TValue>
+IndexLeafIterator<TKey, TValue>::~IndexLeafIterator()
+{
+    node->iterators--;
 }
\ No newline at end of file
diff --git a/Indexes/Iterators/IndexNodeIterator.h 
b/Indexes/Iterators/IndexNodeIterator.h
index 6249e02..07656c6 100644
--- a/Indexes/Iterators/IndexNodeIterator.h
+++ b/Indexes/Iterators/IndexNodeIterator.h
@@ -13,5 +13,7 @@
     virtual bool Equals(const IndexNodeIterator *other) const = 0;
     virtual std::unique_ptr<IndexNodeIterator> Clone() const = 0;
 
+    virtual void ClearNodeCacheIfTooBig() = 0;
+
     virtual ~IndexNodeIterator() {};
 };
\ No newline at end of file

-- 
To view, visit https://gerrit.wikimedia.org/r/83998
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: If8a035b4c4ea5b43a6d0bd8b39940d50fa261017
Gerrit-PatchSet: 1
Gerrit-Project: operations/dumps/incremental
Gerrit-Branch: gsoc
Gerrit-Owner: Petr Onderka <[email protected]>
Gerrit-Reviewer: Petr Onderka <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to