Petr Onderka has submitted this change and it was merged.
Change subject: Don't write empty indexes
......................................................................
Don't write empty indexes
Change-Id: I94a394fd940b34a6654cc0f8ef841c191457cbf6
---
M DumpObjects/DumpSiteInfo.cpp
M Indexes/Index.tpp
M Indexes/IndexInnerNode.tpp
M Indexes/IndexLeafNode.tpp
M Indexes/IndexNode.h
M Indexes/IndexNode.tpp
M Indexes/Iterators/IndexInnerIterator.h
M Indexes/Iterators/IndexInnerIterator.tpp
M TODO.txt
9 files changed, 39 insertions(+), 29 deletions(-)
Approvals:
Petr Onderka: Verified; Looks good to me, approved
diff --git a/DumpObjects/DumpSiteInfo.cpp b/DumpObjects/DumpSiteInfo.cpp
index 28ce47d..9cec08b 100644
--- a/DumpObjects/DumpSiteInfo.cpp
+++ b/DumpObjects/DumpSiteInfo.cpp
@@ -15,7 +15,10 @@
void DumpSiteInfo::UpdateIndex(Offset offset, bool overwrite)
{
- dump.lock()->fileHeader.SiteInfo = offset;
+ auto dumpRef = dump.lock();
+
+ dumpRef->fileHeader.SiteInfo = offset;
+ dumpRef->fileHeader.Write();
}
DumpSiteInfo::DumpSiteInfo(std::weak_ptr<WritableDump> dump)
diff --git a/Indexes/Index.tpp b/Indexes/Index.tpp
index 144060f..d1f41d2 100644
--- a/Indexes/Index.tpp
+++ b/Indexes/Index.tpp
@@ -22,7 +22,7 @@
}
else
{
- rootNode->Write();
+ rootNode->Write(true);
fileHeaderOffset->value = rootNode->SavedOffset();
dump.lock()->fileHeader.Write();
}
@@ -59,7 +59,7 @@
if (rootNodeUnsaved)
{
- rootNode->Write();
+ rootNode->Write(true);
fileHeaderOffset.lock()->value = rootNode->SavedOffset();
dump.lock()->fileHeader.Write();
diff --git a/Indexes/IndexInnerNode.tpp b/Indexes/IndexInnerNode.tpp
index 8eb2126..3738937 100644
--- a/Indexes/IndexInnerNode.tpp
+++ b/Indexes/IndexInnerNode.tpp
@@ -53,6 +53,8 @@
insert_at(childOffsets, updatedChildIndex + 1,
splitted.RightNode->SavedOffset());
insert_at(cachedChildren, updatedChildIndex + 1,
std::move(splitted.RightNode));
+
+ this->modified = true;
}
}
@@ -82,7 +84,7 @@
void IndexInnerNode<TKey, TValue>::Remove(const TKey key)
{
auto index = GetKeyIndex(key);
- return GetChildByIndex(index)->Remove(key);
+ GetChildByIndex(index)->Remove(key);
}
template<typename TKey, typename TValue>
@@ -107,6 +109,8 @@
throw DumpException();
childOffsets.push_back(rightOffset);
cachedChildren.push_back(std::move(splitResult.RightNode));
+
+ this->modified = true;
}
template<typename TKey, typename TValue>
@@ -152,8 +156,6 @@
template<typename TKey, typename TValue>
void IndexInnerNode<TKey, TValue>::Write()
{
- // TODO: don't do anything when there are no changes
-
IndexNode<TKey, TValue>::Write();
for (auto &cachedChild : cachedChildren)
@@ -257,4 +259,4 @@
std::unique_ptr<IndexNodeIterator<TKey, TValue>> IndexInnerNode<TKey,
TValue>::end()
{
return std::unique_ptr<IndexNodeIterator<TKey, TValue>>(new
IndexInnerIterator<TKey, TValue>(this, false));
-}
\ No newline at end of file
+}
diff --git a/Indexes/IndexLeafNode.tpp b/Indexes/IndexLeafNode.tpp
index 099c0fe..c15fb27 100644
--- a/Indexes/IndexLeafNode.tpp
+++ b/Indexes/IndexLeafNode.tpp
@@ -26,27 +26,30 @@
throw new DumpException();
}
- indexMap.insert(pair<TKey, TValue>(key, value));
+ indexMap.insert(std::pair<TKey, TValue>(key, value));
+
+ this->modified = true;
}
template<typename TKey, typename TValue>
void IndexLeafNode<TKey, TValue>::AddOrUpdate(TKey key, TValue value)
{
auto pos = indexMap.find(key);
+
if (pos == indexMap.end())
- {
Add(key, value);
- }
else
- {
- pos->second = value; // will this work?
- }
+ pos->second = value;
+
+ this->modified = true;
}
template<typename TKey, typename TValue>
void IndexLeafNode<TKey, TValue>::Remove(const TKey key)
{
indexMap.erase(key);
+
+ this->modified = true;
}
template<typename TKey, typename TValue>
diff --git a/Indexes/IndexNode.h b/Indexes/IndexNode.h
index 17cc833..a0bbef6 100644
--- a/Indexes/IndexNode.h
+++ b/Indexes/IndexNode.h
@@ -16,6 +16,8 @@
protected:
static const int Size = 4096;
+ bool modified;
+
unsigned iterators;
virtual void WriteInternal() = 0;
@@ -43,9 +45,8 @@
IndexNode(std::weak_ptr<WritableDump> dump);
- using DumpObject::Write;
- // write to a pre-allocated offset
- void Write(Offset offset);
+ virtual void Write() override;
+ void Write(bool force);
virtual TValue Get(TKey key) = 0;
virtual void Add(TKey key, TValue value) = 0;
diff --git a/Indexes/IndexNode.tpp b/Indexes/IndexNode.tpp
index 32c0aab..0485aec 100644
--- a/Indexes/IndexNode.tpp
+++ b/Indexes/IndexNode.tpp
@@ -7,7 +7,7 @@
template<typename TKey, typename TValue>
IndexNode<TKey, TValue>::IndexNode(std::weak_ptr<WritableDump> dump)
- : DumpObject(dump), iterators(0)
+ : DumpObject(dump), modified(false), iterators(0)
{}
template<typename TKey, typename TValue>
@@ -44,19 +44,20 @@
}
template<typename TKey, typename TValue>
-void IndexNode<TKey, TValue>::Write(Offset offset)
+void IndexNode<TKey, TValue>::Write()
{
- auto dumpRef = dump.lock();
+ Write(false);
+}
- if (savedOffset != 0)
- throw DumpException();
+template<typename TKey, typename TValue>
+void IndexNode<TKey, TValue>::Write(bool force)
+{
+ if (!modified && !force)
+ return;
- stream = dumpRef->stream.get();
- stream->seekp(offset.value);
+ DumpObject::Write();
- WriteInternal();
-
- stream = nullptr;
+ modified = false;
}
template<typename TKey, typename TValue>
diff --git a/Indexes/Iterators/IndexInnerIterator.h
b/Indexes/Iterators/IndexInnerIterator.h
index 71e1a4b..8ab6754 100644
--- a/Indexes/Iterators/IndexInnerIterator.h
+++ b/Indexes/Iterators/IndexInnerIterator.h
@@ -1,6 +1,7 @@
#pragma once
#include <utility>
+#include "IndexNodeIterator.h"
template<typename TKey, typename TValue>
class IndexInnerIterator : public IndexNodeIterator<TKey, TValue>
@@ -15,7 +16,7 @@
IndexInnerIterator(IndexInnerNode<TKey, TValue> *node, bool isBegin);
IndexInnerIterator(const IndexInnerIterator<TKey, TValue>& other);
public:
- virtual const pair<TKey, TValue> operator *() const override;
+ virtual const std::pair<TKey, TValue> operator *() const override;
virtual IndexInnerIterator& operator ++() override;
virtual bool Equals(const IndexNodeIterator<TKey, TValue> *other) const
override;
virtual std::unique_ptr<IndexNodeIterator<TKey, TValue>> Clone() const
override;
diff --git a/Indexes/Iterators/IndexInnerIterator.tpp
b/Indexes/Iterators/IndexInnerIterator.tpp
index 9be909b..132a792 100644
--- a/Indexes/Iterators/IndexInnerIterator.tpp
+++ b/Indexes/Iterators/IndexInnerIterator.tpp
@@ -18,7 +18,7 @@
}
template<typename TKey, typename TValue>
-const pair<TKey, TValue> IndexInnerIterator<TKey, TValue>::operator *() const
+const std::pair<TKey, TValue> IndexInnerIterator<TKey, TValue>::operator *()
const
{
return **childIterator;
}
diff --git a/TODO.txt b/TODO.txt
index 151e49f..6d6a169 100644
--- a/TODO.txt
+++ b/TODO.txt
@@ -1,5 +1,4 @@
short term:
-- don't write empty unsaved indexes
- compression of metadata and indexes
- better error messages (including bad filename)
- idumps r to stdout
--
To view, visit https://gerrit.wikimedia.org/r/84005
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I94a394fd940b34a6654cc0f8ef841c191457cbf6
Gerrit-PatchSet: 2
Gerrit-Project: operations/dumps/incremental
Gerrit-Branch: gsoc
Gerrit-Owner: Petr Onderka <[email protected]>
Gerrit-Reviewer: Petr Onderka <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits