Petr Onderka has submitted this change and it was merged.

Change subject: Don't write empty indexes
......................................................................


Don't write empty indexes

Change-Id: I94a394fd940b34a6654cc0f8ef841c191457cbf6
---
M DumpObjects/DumpSiteInfo.cpp
M Indexes/Index.tpp
M Indexes/IndexInnerNode.tpp
M Indexes/IndexLeafNode.tpp
M Indexes/IndexNode.h
M Indexes/IndexNode.tpp
M Indexes/Iterators/IndexInnerIterator.h
M Indexes/Iterators/IndexInnerIterator.tpp
M TODO.txt
9 files changed, 39 insertions(+), 29 deletions(-)

Approvals:
  Petr Onderka: Verified; Looks good to me, approved



diff --git a/DumpObjects/DumpSiteInfo.cpp b/DumpObjects/DumpSiteInfo.cpp
index 28ce47d..9cec08b 100644
--- a/DumpObjects/DumpSiteInfo.cpp
+++ b/DumpObjects/DumpSiteInfo.cpp
@@ -15,7 +15,10 @@
 
 void DumpSiteInfo::UpdateIndex(Offset offset, bool overwrite)
 {
-    dump.lock()->fileHeader.SiteInfo = offset;
+    auto dumpRef = dump.lock();
+
+    dumpRef->fileHeader.SiteInfo = offset;
+    dumpRef->fileHeader.Write();
 }
 
 DumpSiteInfo::DumpSiteInfo(std::weak_ptr<WritableDump> dump)
diff --git a/Indexes/Index.tpp b/Indexes/Index.tpp
index 144060f..d1f41d2 100644
--- a/Indexes/Index.tpp
+++ b/Indexes/Index.tpp
@@ -22,7 +22,7 @@
         }
         else
         {
-            rootNode->Write();
+            rootNode->Write(true);
             fileHeaderOffset->value = rootNode->SavedOffset();
             dump.lock()->fileHeader.Write();
         }
@@ -59,7 +59,7 @@
 
     if (rootNodeUnsaved)
     {
-        rootNode->Write();
+        rootNode->Write(true);
 
         fileHeaderOffset.lock()->value = rootNode->SavedOffset();
         dump.lock()->fileHeader.Write();
diff --git a/Indexes/IndexInnerNode.tpp b/Indexes/IndexInnerNode.tpp
index 8eb2126..3738937 100644
--- a/Indexes/IndexInnerNode.tpp
+++ b/Indexes/IndexInnerNode.tpp
@@ -53,6 +53,8 @@
 
         insert_at(childOffsets, updatedChildIndex + 1, 
splitted.RightNode->SavedOffset());
         insert_at(cachedChildren, updatedChildIndex + 1, 
std::move(splitted.RightNode));
+
+        this->modified = true;
     }
 }
 
@@ -82,7 +84,7 @@
 void IndexInnerNode<TKey, TValue>::Remove(const TKey key)
 {
     auto index = GetKeyIndex(key);
-    return GetChildByIndex(index)->Remove(key);
+    GetChildByIndex(index)->Remove(key);
 }
 
 template<typename TKey, typename TValue>
@@ -107,6 +109,8 @@
         throw DumpException();
     childOffsets.push_back(rightOffset);
     cachedChildren.push_back(std::move(splitResult.RightNode));
+
+    this->modified = true;
 }
 
 template<typename TKey, typename TValue>
@@ -152,8 +156,6 @@
 template<typename TKey, typename TValue>
 void IndexInnerNode<TKey, TValue>::Write()
 {
-    // TODO: don't do anything when there are no changes
-
     IndexNode<TKey, TValue>::Write();
 
     for (auto &cachedChild : cachedChildren)
@@ -257,4 +259,4 @@
 std::unique_ptr<IndexNodeIterator<TKey, TValue>> IndexInnerNode<TKey, 
TValue>::end()
 {
     return std::unique_ptr<IndexNodeIterator<TKey, TValue>>(new 
IndexInnerIterator<TKey, TValue>(this, false));
-}
\ No newline at end of file
+}
diff --git a/Indexes/IndexLeafNode.tpp b/Indexes/IndexLeafNode.tpp
index 099c0fe..c15fb27 100644
--- a/Indexes/IndexLeafNode.tpp
+++ b/Indexes/IndexLeafNode.tpp
@@ -26,27 +26,30 @@
         throw new DumpException();
     }
 
-    indexMap.insert(pair<TKey, TValue>(key, value));
+    indexMap.insert(std::pair<TKey, TValue>(key, value));
+
+    this->modified = true;
 }
 
 template<typename TKey, typename TValue>
 void IndexLeafNode<TKey, TValue>::AddOrUpdate(TKey key, TValue value)
 {
     auto pos = indexMap.find(key);
+
     if (pos == indexMap.end())
-    {
         Add(key, value);
-    }
     else
-    {
-        pos->second = value; // will this work?
-    }
+        pos->second = value;
+
+    this->modified = true;
 }
 
 template<typename TKey, typename TValue>
 void IndexLeafNode<TKey, TValue>::Remove(const TKey key)
 {
     indexMap.erase(key);
+
+    this->modified = true;
 }
 
 template<typename TKey, typename TValue>
diff --git a/Indexes/IndexNode.h b/Indexes/IndexNode.h
index 17cc833..a0bbef6 100644
--- a/Indexes/IndexNode.h
+++ b/Indexes/IndexNode.h
@@ -16,6 +16,8 @@
 protected:
     static const int Size = 4096;
 
+    bool modified;
+
     unsigned iterators;
 
     virtual void WriteInternal() = 0;
@@ -43,9 +45,8 @@
 
     IndexNode(std::weak_ptr<WritableDump> dump);
 
-    using DumpObject::Write;
-    // write to a pre-allocated offset
-    void Write(Offset offset);
+    virtual void Write() override;
+    void Write(bool force);
 
     virtual TValue Get(TKey key) = 0;
     virtual void Add(TKey key, TValue value) = 0;
diff --git a/Indexes/IndexNode.tpp b/Indexes/IndexNode.tpp
index 32c0aab..0485aec 100644
--- a/Indexes/IndexNode.tpp
+++ b/Indexes/IndexNode.tpp
@@ -7,7 +7,7 @@
 
 template<typename TKey, typename TValue>
 IndexNode<TKey, TValue>::IndexNode(std::weak_ptr<WritableDump> dump)
-    : DumpObject(dump), iterators(0)
+    : DumpObject(dump), modified(false), iterators(0)
 {}
 
 template<typename TKey, typename TValue>
@@ -44,19 +44,20 @@
 }
 
 template<typename TKey, typename TValue>
-void IndexNode<TKey, TValue>::Write(Offset offset)
+void IndexNode<TKey, TValue>::Write()
 {
-    auto dumpRef = dump.lock();
+    Write(false);
+}
 
-    if (savedOffset != 0)
-        throw DumpException();
+template<typename TKey, typename TValue>
+void IndexNode<TKey, TValue>::Write(bool force)
+{
+    if (!modified && !force)
+        return;
 
-    stream = dumpRef->stream.get();
-    stream->seekp(offset.value);
+    DumpObject::Write();
 
-    WriteInternal();
-
-    stream = nullptr;
+    modified = false;
 }
 
 template<typename TKey, typename TValue>
diff --git a/Indexes/Iterators/IndexInnerIterator.h 
b/Indexes/Iterators/IndexInnerIterator.h
index 71e1a4b..8ab6754 100644
--- a/Indexes/Iterators/IndexInnerIterator.h
+++ b/Indexes/Iterators/IndexInnerIterator.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <utility>
+#include "IndexNodeIterator.h"
 
 template<typename TKey, typename TValue>
 class IndexInnerIterator : public IndexNodeIterator<TKey, TValue>
@@ -15,7 +16,7 @@
     IndexInnerIterator(IndexInnerNode<TKey, TValue> *node, bool isBegin);
     IndexInnerIterator(const IndexInnerIterator<TKey, TValue>& other);
 public:
-    virtual const pair<TKey, TValue> operator *() const override;
+    virtual const std::pair<TKey, TValue> operator *() const override;
     virtual IndexInnerIterator& operator ++() override;
     virtual bool Equals(const IndexNodeIterator<TKey, TValue> *other) const 
override;
     virtual std::unique_ptr<IndexNodeIterator<TKey, TValue>> Clone() const 
override;
diff --git a/Indexes/Iterators/IndexInnerIterator.tpp 
b/Indexes/Iterators/IndexInnerIterator.tpp
index 9be909b..132a792 100644
--- a/Indexes/Iterators/IndexInnerIterator.tpp
+++ b/Indexes/Iterators/IndexInnerIterator.tpp
@@ -18,7 +18,7 @@
 }
 
 template<typename TKey, typename TValue>
-const pair<TKey, TValue> IndexInnerIterator<TKey, TValue>::operator *() const
+const std::pair<TKey, TValue> IndexInnerIterator<TKey, TValue>::operator *() 
const
 {
     return **childIterator;
 }
diff --git a/TODO.txt b/TODO.txt
index 151e49f..6d6a169 100644
--- a/TODO.txt
+++ b/TODO.txt
@@ -1,5 +1,4 @@
 short term:
-- don't write empty unsaved indexes
 - compression of metadata and indexes
 - better error messages (including bad filename)
 - idumps r to stdout

-- 
To view, visit https://gerrit.wikimedia.org/r/84005
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I94a394fd940b34a6654cc0f8ef841c191457cbf6
Gerrit-PatchSet: 2
Gerrit-Project: operations/dumps/incremental
Gerrit-Branch: gsoc
Gerrit-Owner: Petr Onderka <[email protected]>
Gerrit-Reviewer: Petr Onderka <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to