Petr Onderka has uploaded a new change for review.
https://gerrit.wikimedia.org/r/83998
Change subject: Improved clearing of index caches
......................................................................
Improved clearing of index caches
Change-Id: If8a035b4c4ea5b43a6d0bd8b39940d50fa261017
---
M Indexes/Index.h
M Indexes/Index.tpp
M Indexes/IndexInnerNode.h
M Indexes/IndexInnerNode.tpp
M Indexes/IndexLeafNode.h
M Indexes/IndexLeafNode.tpp
M Indexes/IndexNode.h
M Indexes/IndexNode.tpp
M Indexes/Iterators/IndexInnerIterator.h
M Indexes/Iterators/IndexInnerIterator.tpp
M Indexes/Iterators/IndexIterator.h
M Indexes/Iterators/IndexIterator.tpp
M Indexes/Iterators/IndexLeafIterator.h
M Indexes/Iterators/IndexLeafIterator.tpp
M Indexes/Iterators/IndexNodeIterator.h
15 files changed, 157 insertions(+), 43 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/operations/dumps/incremental
refs/changes/98/83998/1
diff --git a/Indexes/Index.h b/Indexes/Index.h
index 8248081..1733340 100644
--- a/Indexes/Index.h
+++ b/Indexes/Index.h
@@ -17,9 +17,10 @@
std::weak_ptr<WritableDump> dump;
std::weak_ptr<Offset> fileHeaderOffset;
- int recentChanges;
+ int recentAccesses;
void AfterAdd();
+ void AfterAccess();
public:
// fileHeaderOffset is assumed to be part of dump
Index(std::weak_ptr<WritableDump> dump, Offset* fileHeaderOffset, bool
delaySave = false);
diff --git a/Indexes/Index.tpp b/Indexes/Index.tpp
index 2e9b80b..144060f 100644
--- a/Indexes/Index.tpp
+++ b/Indexes/Index.tpp
@@ -7,7 +7,7 @@
template<typename TKey, typename TValue>
Index<TKey, TValue>::Index(std::weak_ptr<WritableDump> dump, Offset
*fileHeaderOffset, bool delaySave)
- : dump(dump), fileHeaderOffset(std::shared_ptr<Offset>(dump.lock(),
fileHeaderOffset)), recentChanges(0)
+ : dump(dump), fileHeaderOffset(std::shared_ptr<Offset>(dump.lock(),
fileHeaderOffset)), recentAccesses(0)
{
rootNodeUnsaved = false;
@@ -34,7 +34,11 @@
template<typename TKey, typename TValue>
TValue Index<TKey, TValue>::Get(TKey key)
{
- return rootNode->Get(key);
+ auto result = rootNode->Get(key);
+
+ AfterAccess();
+
+ return result;
}
template<typename TKey, typename TValue>
@@ -63,12 +67,20 @@
rootNodeUnsaved = false;
}
- recentChanges++;
+ AfterAccess();
+}
- if (recentChanges >= 100000)
+template<typename TKey, typename TValue>
+void Index<TKey, TValue>::AfterAccess()
+{
+ recentAccesses++;
+
+ if (recentAccesses >= 10000)
{
- rootNode->ClearCached();
- recentChanges = 0;
+ if (rootNode->NodesCount() >= 5000)
+ rootNode->ClearCached();
+
+ recentAccesses = 0;
}
}
diff --git a/Indexes/IndexInnerNode.h b/Indexes/IndexInnerNode.h
index 5ccc1e1..8626bd9 100644
--- a/Indexes/IndexInnerNode.h
+++ b/Indexes/IndexInnerNode.h
@@ -21,6 +21,7 @@
std::uint16_t GetKeyIndex(TKey key);
void AfterAdd(std::uint16_t updatedChildIndex);
+
protected:
virtual void WriteInternal() override;
public:
@@ -41,7 +42,8 @@
virtual std::uint32_t RealLength() override;
virtual SplitResult Split() override;
- virtual void ClearCached() override;
+ virtual std::uint32_t NodesCount() override;
+ virtual void ClearCachedInternal() override;
virtual std::unique_ptr<IndexNodeIterator<TKey, TValue>> begin() override;
virtual std::unique_ptr<IndexNodeIterator<TKey, TValue>> end() override;
diff --git a/Indexes/IndexInnerNode.tpp b/Indexes/IndexInnerNode.tpp
index 0890b2a..8eb2126 100644
--- a/Indexes/IndexInnerNode.tpp
+++ b/Indexes/IndexInnerNode.tpp
@@ -219,13 +219,31 @@
}
template<typename TKey, typename TValue>
-void IndexInnerNode<TKey, TValue>::ClearCached()
+std::uint32_t IndexInnerNode<TKey, TValue>::NodesCount()
{
- Write();
+ std::uint32_t result = 1;
- for (unsigned i = 0; i < cachedChildren.size(); i++)
+ for (const auto& child : cachedChildren)
{
- cachedChildren.at(i) = nullptr;
+ if (child != nullptr)
+ result += child->NodesCount();
+ }
+
+ return result;
+}
+
+template<typename TKey, typename TValue>
+void IndexInnerNode<TKey, TValue>::ClearCachedInternal()
+{
+ for (auto &child : cachedChildren)
+ {
+ if (child != nullptr)
+ {
+ if (child->CanBeCleared())
+ child = nullptr;
+ else
+ child->ClearCachedInternal();
+ }
}
}
@@ -239,4 +257,4 @@
std::unique_ptr<IndexNodeIterator<TKey, TValue>> IndexInnerNode<TKey,
TValue>::end()
{
return std::unique_ptr<IndexNodeIterator<TKey, TValue>>(new
IndexInnerIterator<TKey, TValue>(this, false));
-}
+}
\ No newline at end of file
diff --git a/Indexes/IndexLeafNode.h b/Indexes/IndexLeafNode.h
index ad069ac..78c74ef 100644
--- a/Indexes/IndexLeafNode.h
+++ b/Indexes/IndexLeafNode.h
@@ -3,18 +3,19 @@
#include <map>
#include "IndexNode.h"
-using std::map;
-
template<typename TKey, typename TValue>
class IndexLeafNode : public IndexNode<TKey, TValue>
{
+ template<typename TIndexKey, typename TIndexValue>
+ friend class IndexLeafIterator;
+
using typename IndexNode<TKey, TValue>::SplitResult;
private:
- map<TKey, TValue> indexMap;
+ std::map<TKey, TValue> indexMap;
protected:
virtual void WriteInternal() override;
public:
- static unique_ptr<IndexNode<TKey, TValue>>
Read(std::weak_ptr<WritableDump> dump, std::istream &stream);
+ static std::unique_ptr<IndexNode<TKey, TValue>>
Read(std::weak_ptr<WritableDump> dump, std::istream &stream);
IndexLeafNode(std::weak_ptr<WritableDump> dump);
@@ -32,10 +33,11 @@
virtual std::uint32_t RealLength() override;
virtual SplitResult Split() override;
- virtual void ClearCached() override;
+ virtual std::uint32_t NodesCount() override;
+ virtual void ClearCachedInternal() override;
- virtual unique_ptr<IndexNodeIterator<TKey, TValue>> begin() override;
- virtual unique_ptr<IndexNodeIterator<TKey, TValue>> end() override;
+ virtual std::unique_ptr<IndexNodeIterator<TKey, TValue>> begin() override;
+ virtual std::unique_ptr<IndexNodeIterator<TKey, TValue>> end() override;
};
#include "IndexLeafNode.tpp"
diff --git a/Indexes/IndexLeafNode.tpp b/Indexes/IndexLeafNode.tpp
index 2f373ed..099c0fe 100644
--- a/Indexes/IndexLeafNode.tpp
+++ b/Indexes/IndexLeafNode.tpp
@@ -115,17 +115,23 @@
}
template<typename TKey, typename TValue>
-void IndexLeafNode<TKey, TValue>::ClearCached()
+std::uint32_t IndexLeafNode<TKey, TValue>::NodesCount()
+{
+ return 1;
+}
+
+template<typename TKey, typename TValue>
+void IndexLeafNode<TKey, TValue>::ClearCachedInternal()
{}
template<typename TKey, typename TValue>
std::unique_ptr<IndexNodeIterator<TKey, TValue>> IndexLeafNode<TKey,
TValue>::begin()
{
- return std::unique_ptr<IndexNodeIterator<TKey, TValue>>(new
IndexLeafIterator<TKey, TValue>(indexMap.begin()));
+ return std::unique_ptr<IndexNodeIterator<TKey, TValue>>(new
IndexLeafIterator<TKey, TValue>(this, indexMap.begin()));
}
template<typename TKey, typename TValue>
std::unique_ptr<IndexNodeIterator<TKey, TValue>> IndexLeafNode<TKey,
TValue>::end()
{
- return std::unique_ptr<IndexNodeIterator<TKey, TValue>>(new
IndexLeafIterator<TKey, TValue>(indexMap.end()));
+ return std::unique_ptr<IndexNodeIterator<TKey, TValue>>(new
IndexLeafIterator<TKey, TValue>(this, indexMap.end()));
}
diff --git a/Indexes/IndexNode.h b/Indexes/IndexNode.h
index ce2bec5..17cc833 100644
--- a/Indexes/IndexNode.h
+++ b/Indexes/IndexNode.h
@@ -16,6 +16,8 @@
protected:
static const int Size = 4096;
+ unsigned iterators;
+
virtual void WriteInternal() = 0;
public:
struct SplitResult
@@ -54,7 +56,10 @@
virtual std::uint32_t RealLength() = 0;
virtual SplitResult Split() = 0;
- virtual void ClearCached() = 0;
+ bool CanBeCleared();
+ virtual std::uint32_t NodesCount() = 0;
+ void ClearCached();
+ virtual void ClearCachedInternal() = 0;
virtual unique_ptr<IndexNodeIterator<TKey, TValue>> begin() = 0;
virtual unique_ptr<IndexNodeIterator<TKey, TValue>> end() = 0;
diff --git a/Indexes/IndexNode.tpp b/Indexes/IndexNode.tpp
index 9a3f7a3..32c0aab 100644
--- a/Indexes/IndexNode.tpp
+++ b/Indexes/IndexNode.tpp
@@ -7,7 +7,7 @@
template<typename TKey, typename TValue>
IndexNode<TKey, TValue>::IndexNode(std::weak_ptr<WritableDump> dump)
- : DumpObject(dump)
+ : DumpObject(dump), iterators(0)
{}
template<typename TKey, typename TValue>
@@ -64,3 +64,17 @@
{
return RealLength() > NewLength();
}
+
+template<typename TKey, typename TValue>
+bool IndexNode<TKey, TValue>::CanBeCleared()
+{
+ return iterators == 0;
+}
+
+template<typename TKey, typename TValue>
+void IndexNode<TKey, TValue>::ClearCached()
+{
+ Write();
+
+ ClearCachedInternal();
+}
\ No newline at end of file
diff --git a/Indexes/Iterators/IndexInnerIterator.h
b/Indexes/Iterators/IndexInnerIterator.h
index bb6b659..71e1a4b 100644
--- a/Indexes/Iterators/IndexInnerIterator.h
+++ b/Indexes/Iterators/IndexInnerIterator.h
@@ -12,13 +12,17 @@
uint16_t i;
std::unique_ptr<IndexNodeIterator<TKey, TValue>> childIterator;
- IndexInnerIterator(IndexInnerNode<TKey, TValue> *node, uint16_t i,
std::unique_ptr<IndexNodeIterator<TKey, TValue>> childIterator);
IndexInnerIterator(IndexInnerNode<TKey, TValue> *node, bool isBegin);
+ IndexInnerIterator(const IndexInnerIterator<TKey, TValue>& other);
public:
virtual const pair<TKey, TValue> operator *() const override;
virtual IndexInnerIterator& operator ++() override;
virtual bool Equals(const IndexNodeIterator<TKey, TValue> *other) const
override;
virtual std::unique_ptr<IndexNodeIterator<TKey, TValue>> Clone() const
override;
+
+ virtual void ClearNodeCacheIfTooBig() override;
+
+ virtual ~IndexInnerIterator();
};
#include "IndexInnerIterator.tpp"
\ No newline at end of file
diff --git a/Indexes/Iterators/IndexInnerIterator.tpp
b/Indexes/Iterators/IndexInnerIterator.tpp
index 0de3c3f..9be909b 100644
--- a/Indexes/Iterators/IndexInnerIterator.tpp
+++ b/Indexes/Iterators/IndexInnerIterator.tpp
@@ -1,20 +1,21 @@
#include <vector>
#include "IndexInnerIterator.h"
-using std::vector;
-
-template<typename TKey, typename TValue>
-IndexInnerIterator<TKey, TValue>::IndexInnerIterator(
- IndexInnerNode<TKey, TValue> *node, uint16_t i,
std::unique_ptr<IndexNodeIterator<TKey, TValue>> childIterator)
- : node(node), i(i), childIterator(std::move(childIterator))
-{}
-
template<typename TKey, typename TValue>
IndexInnerIterator<TKey, TValue>::IndexInnerIterator(IndexInnerNode<TKey,
TValue> *node, bool isBegin)
: node(node),
i(isBegin ? 0 : node->childOffsets.size()),
childIterator(isBegin ? node->GetChildByIndex(0)->begin() : nullptr)
-{}
+{
+ node->iterators++;
+}
+
+template<typename TKey, typename TValue>
+IndexInnerIterator<TKey, TValue>::IndexInnerIterator(const
IndexInnerIterator<TKey, TValue>& other)
+ : node(other.node), i(other.i), childIterator(other.childIterator ==
nullptr ? nullptr : other.childIterator->Clone())
+{
+ node->iterators++;
+}
template<typename TKey, typename TValue>
const pair<TKey, TValue> IndexInnerIterator<TKey, TValue>::operator *() const
@@ -63,6 +64,18 @@
std::unique_ptr<IndexNodeIterator<TKey, TValue>> IndexInnerIterator<TKey,
TValue>::Clone() const
{
return std::unique_ptr<IndexNodeIterator<TKey, TValue>>(
- new IndexInnerIterator<TKey, TValue>(
- node, i, childIterator == nullptr ? nullptr :
childIterator->Clone()));
+ new IndexInnerIterator<TKey, TValue>(*this));
+}
+
+template<typename TKey, typename TValue>
+void IndexInnerIterator<TKey, TValue>::ClearNodeCacheIfTooBig()
+{
+ if (node->NodesCount() >= 5000)
+ node->ClearCached();
+}
+
+template<typename TKey, typename TValue>
+IndexInnerIterator<TKey, TValue>::~IndexInnerIterator()
+{
+ node->iterators--;
}
\ No newline at end of file
diff --git a/Indexes/Iterators/IndexIterator.h
b/Indexes/Iterators/IndexIterator.h
index 4d734e4..273644f 100644
--- a/Indexes/Iterators/IndexIterator.h
+++ b/Indexes/Iterators/IndexIterator.h
@@ -16,6 +16,9 @@
friend class Index;
private:
std::unique_ptr<IndexNodeIterator<TKey, TValue>> nodeIterator;
+
+ std::uint32_t recentIncrements;
+
IndexIterator(std::unique_ptr<IndexNodeIterator<TKey, TValue>>
nodeIterator);
public:
IndexIterator(const IndexIterator &other);
diff --git a/Indexes/Iterators/IndexIterator.tpp
b/Indexes/Iterators/IndexIterator.tpp
index cee9b7c..163b04a 100644
--- a/Indexes/Iterators/IndexIterator.tpp
+++ b/Indexes/Iterators/IndexIterator.tpp
@@ -4,7 +4,7 @@
template<typename TKey, typename TValue>
IndexIterator<TKey, TValue>::IndexIterator(unique_ptr<IndexNodeIterator<TKey,
TValue>> nodeIterator)
- : nodeIterator(std::move(nodeIterator))
+ : nodeIterator(std::move(nodeIterator)), recentIncrements(0)
{}
template<typename TKey, typename TValue>
@@ -37,6 +37,15 @@
IndexIterator<TKey, TValue>& IndexIterator<TKey, TValue>::operator ++()
{
++(*nodeIterator);
+
+ recentIncrements++;
+
+ if (recentIncrements >= 10000)
+ {
+ nodeIterator->ClearNodeCacheIfTooBig();
+ recentIncrements = 0;
+ }
+
return *this;
}
diff --git a/Indexes/Iterators/IndexLeafIterator.h
b/Indexes/Iterators/IndexLeafIterator.h
index 4c3022a..ebba9c1 100644
--- a/Indexes/Iterators/IndexLeafIterator.h
+++ b/Indexes/Iterators/IndexLeafIterator.h
@@ -14,14 +14,20 @@
template<typename TIndexKey, typename TIndexValue>
friend class IndexLeafNode;
private:
+ IndexLeafNode<TKey, TValue> *node;
MapIterator mapIterator;
- IndexLeafIterator(MapIterator mapIterator);
+ IndexLeafIterator(IndexLeafNode<TKey, TValue> *node, MapIterator
mapIterator);
+ IndexLeafIterator(const IndexLeafIterator<TKey, TValue>& other);
public:
virtual const pair<TKey, TValue> operator *() const override;
virtual IndexLeafIterator& operator ++() override;
virtual bool Equals(const IndexNodeIterator<TKey, TValue> *other) const
override;
virtual std::unique_ptr<IndexNodeIterator<TKey, TValue>> Clone() const
override;
+
+ virtual void ClearNodeCacheIfTooBig() override;
+
+ virtual ~IndexLeafIterator();
};
#include "IndexLeafIterator.tpp"
diff --git a/Indexes/Iterators/IndexLeafIterator.tpp
b/Indexes/Iterators/IndexLeafIterator.tpp
index 343d236..59b94d2 100644
--- a/Indexes/Iterators/IndexLeafIterator.tpp
+++ b/Indexes/Iterators/IndexLeafIterator.tpp
@@ -1,12 +1,19 @@
#include <vector>
#include "IndexLeafIterator.h"
-using std::vector;
+template<typename TKey, typename TValue>
+IndexLeafIterator<TKey, TValue>::IndexLeafIterator(IndexLeafNode<TKey, TValue>
*node, MapIterator mapIterator)
+ : node(node), mapIterator(mapIterator)
+{
+ node->iterators++;
+}
template<typename TKey, typename TValue>
-IndexLeafIterator<TKey, TValue>::IndexLeafIterator(MapIterator mapIterator)
- : mapIterator(mapIterator)
-{}
+IndexLeafIterator<TKey, TValue>::IndexLeafIterator(const
IndexLeafIterator<TKey, TValue>& other)
+ : node(other.node), mapIterator(other.mapIterator)
+{
+ node->iterators++;
+}
template<typename TKey, typename TValue>
const pair<TKey, TValue> IndexLeafIterator<TKey, TValue>::operator *() const
@@ -35,4 +42,14 @@
std::unique_ptr<IndexNodeIterator<TKey, TValue>> IndexLeafIterator<TKey,
TValue>::Clone() const
{
return std::unique_ptr<IndexNodeIterator<TKey, TValue>>(new
IndexLeafIterator<TKey, TValue>(*this));
+}
+
+template<typename TKey, typename TValue>
+void IndexLeafIterator<TKey, TValue>::ClearNodeCacheIfTooBig()
+{}
+
+template<typename TKey, typename TValue>
+IndexLeafIterator<TKey, TValue>::~IndexLeafIterator()
+{
+ node->iterators--;
}
\ No newline at end of file
diff --git a/Indexes/Iterators/IndexNodeIterator.h
b/Indexes/Iterators/IndexNodeIterator.h
index 6249e02..07656c6 100644
--- a/Indexes/Iterators/IndexNodeIterator.h
+++ b/Indexes/Iterators/IndexNodeIterator.h
@@ -13,5 +13,7 @@
virtual bool Equals(const IndexNodeIterator *other) const = 0;
virtual std::unique_ptr<IndexNodeIterator> Clone() const = 0;
+ virtual void ClearNodeCacheIfTooBig() = 0;
+
virtual ~IndexNodeIterator() {};
};
\ No newline at end of file
--
To view, visit https://gerrit.wikimedia.org/r/83998
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: If8a035b4c4ea5b43a6d0bd8b39940d50fa261017
Gerrit-PatchSet: 1
Gerrit-Project: operations/dumps/incremental
Gerrit-Branch: gsoc
Gerrit-Owner: Petr Onderka <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits