Petr Onderka has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/75668


Change subject: made indexes into trees
......................................................................

made indexes into trees

Change-Id: Iadab9a140821b68ac502ce9d67133972aa86966a
---
M CMakeLists.txt
A CollectionHelpers.h
M DumpObjects/DumpTraits.h
M Incremental dumps.vcxproj
M Indexes/Index.h
M Indexes/Index.tpp
A Indexes/IndexInnerNode.h
A Indexes/IndexInnerNode.tpp
M Indexes/IndexLeafNode.h
M Indexes/IndexLeafNode.tpp
M Indexes/IndexNode.h
M Indexes/IndexNode.tpp
A Indexes/Iterators/IndexInnerIterator.h
A Indexes/Iterators/IndexInnerIterator.tpp
M Indexes/Iterators/IndexLeafIterator.tpp
M Indexes/Iterators/IndexNodeIterator.h
D Indexes/Iterators/IndexNodeIterator.tpp
17 files changed, 531 insertions(+), 51 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/dumps/incremental 
refs/changes/68/75668/1

diff --git a/CMakeLists.txt b/CMakeLists.txt
index d3de7a4..7bb3029 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -41,6 +41,7 @@
 )
 # File list: C/C++ Header
 set(SOURCES_files_ClInclude
+  CollectionHelpers.h
   DumpException.h
   DumpObjects/DumpIpV4User.h
   DumpObjects/DumpNamedUser.h
@@ -49,6 +50,7 @@
   DumpObjects/DumpRevision.h
   DumpObjects/DumpUser.h
   Indexes/Index.h
+  Indexes/IndexInnerNode.tpp
   Indexes/IndexLeafNode.tpp
   Indexes/IndexNode.tpp
   Indexes/Index.tpp
@@ -57,6 +59,7 @@
   DumpObjects/DumpTraits.h
   DumpWriters/DumpWriter.h
   DumpObjects/FileHeader.h
+  Indexes/IndexInnerNode.h
   Indexes/IndexLeafNode.h
   Indexes/IndexNode.h
   DumpObjects/Offset.h
@@ -65,10 +68,11 @@
   Objects/Revision.h
   Indexes/Iterators/IndexIterator.h
   Indexes/Iterators/IndexIterator.tpp
+  Indexes/Iterators/IndexInnerIterator.h
+  Indexes/Iterators/IndexInnerIterator.tpp
   Indexes/Iterators/IndexLeafIterator.h
   Indexes/Iterators/IndexLeafIterator.tpp
   Indexes/Iterators/IndexNodeIterator.h
-  Indexes/Iterators/IndexNodeIterator.tpp
   SevenZip.h
   SpaceManager.h
   DumpWriters/TestDumpWriter.h
diff --git a/CollectionHelpers.h b/CollectionHelpers.h
new file mode 100644
index 0000000..7102002
--- /dev/null
+++ b/CollectionHelpers.h
@@ -0,0 +1,16 @@
+//http://stackoverflow.com/a/2330559/41071
+template <typename T>
+T& insert_at(T& pContainer, size_t pIndex, const typename T::value_type& 
pValue)
+{
+    pContainer.insert(pContainer.begin() + pIndex, pValue);
+
+    return pContainer;
+}
+
+template <typename T>
+T& insert_at(T& pContainer, size_t pIndex, typename T::value_type&& pValue)
+{
+    pContainer.insert(pContainer.begin() + pIndex, std::move(pValue));
+
+    return pContainer;
+}
\ No newline at end of file
diff --git a/DumpObjects/DumpTraits.h b/DumpObjects/DumpTraits.h
index a4c74a7..ea169c4 100644
--- a/DumpObjects/DumpTraits.h
+++ b/DumpObjects/DumpTraits.h
@@ -219,11 +219,11 @@
 public:
     static vector<T> Read(istream &stream)
     {
-        uint16_t count = DumpTraits<uint16_t>::Read(stream);
+        uint32_t count = DumpTraits<uint32_t>::Read(stream);
 
         vector<T> result;
 
-        for (int i = 0; i < count; i++)
+        for (uint32_t i = 0; i < count; i++)
         {
             result.push_back(DumpTraits<T>::Read(stream));
         }
@@ -235,10 +235,10 @@
     {
         auto length = value.size();
 
-        if (length > numeric_limits<uint16_t>::max())
+        if (length >= numeric_limits<uint32_t>::max())
             throw DumpException();
 
-        DumpTraits<uint16_t>::Write(stream, length);
+        DumpTraits<uint32_t>::Write(stream, length);
 
         for (T item : value)
         {
@@ -248,7 +248,7 @@
 
     static uint32_t DumpSize(const vector<T> value)
     {
-        uint32_t size = DumpTraits<uint16_t>::DumpSize(value.size());
+        uint32_t size = DumpTraits<uint32_t>::DumpSize(value.size());
 
         for (T item : value)
         {
diff --git a/Incremental dumps.vcxproj b/Incremental dumps.vcxproj
index efcf938..942e13e 100644
--- a/Incremental dumps.vcxproj
+++ b/Incremental dumps.vcxproj
@@ -86,6 +86,7 @@
     <ClCompile Include="DumpObjects\DumpNamedUser.cpp" />
     <ClCompile Include="DumpObjects\DumpObject.cpp" />
     <ClCompile Include="DumpObjects\FileHeader.cpp" />
+    <ClInclude Include="CollectionHelpers.h" />
     <ClInclude Include="DumpException.h" />
     <ClInclude Include="DumpObjects\DumpIpV4User.h" />
     <ClInclude Include="DumpObjects\DumpNamedUser.h" />
@@ -96,6 +97,7 @@
     <ClInclude Include="DumpWriters\PagesHistoryWriter.h" />
     <ClInclude Include="DumpWriters\StubHistoryWriter.h" />
     <ClInclude Include="Indexes\Index.h" />
+    <ClInclude Include="Indexes\IndexInnerNode.h" />
     <ClInclude Include="Indexes\IndexLeafNode.tpp">
       <FileType>CppCode</FileType>
     </ClInclude>
@@ -140,6 +142,7 @@
     <ClInclude Include="Indexes\IndexLeafNode.h" />
     <ClInclude Include="Indexes\IndexNode.h" />
     <ClInclude Include="DumpObjects\Offset.h" />
+    <ClInclude Include="Indexes\Iterators\IndexInnerIterator.h" />
     <ClInclude Include="Objects\IpV4User.h" />
     <ClInclude Include="Objects\Page.h" />
     <ClInclude Include="Objects\Revision.h" />
@@ -148,7 +151,6 @@
     <ClInclude Include="Indexes\Iterators\IndexLeafIterator.h" />
     <ClInclude Include="Indexes\Iterators\IndexLeafIterator.tpp" />
     <ClInclude Include="Indexes\Iterators\IndexNodeIterator.h" />
-    <ClInclude Include="Indexes\Iterators\IndexNodeIterator.tpp" />
     <ClInclude Include="SevenZip.h" />
     <ClInclude Include="SpaceManager.h" />
     <ClInclude Include="DumpWriters\TestDumpWriter.h" />
@@ -172,7 +174,17 @@
       <Project>{5f0fc7a8-1c0c-403a-bcae-1cc0ed546a44}</Project>
     </ProjectReference>
   </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="Indexes\IndexInnerNode.tpp">
+      <FileType>Document</FileType>
+    </ClInclude>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="Indexes\Iterators\IndexInnerIterator.tpp">
+      <FileType>Document</FileType>
+    </ClInclude>
+  </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
+</Project>
\ No newline at end of file
diff --git a/Indexes/Index.h b/Indexes/Index.h
index 0ed08cf..2b462be 100644
--- a/Indexes/Index.h
+++ b/Indexes/Index.h
@@ -12,10 +12,12 @@
 class Index
 {
 private:
-    bool fileHeaderZero;
+    bool rootNodeUnsaved;
     unique_ptr<IndexNode<TKey, TValue>> rootNode;
     weak_ptr<WritableDump> dump;
     weak_ptr<Offset> fileHeaderOffset;
+
+    void AfterAdd();
 public:
     Index(weak_ptr<WritableDump> dump, weak_ptr<Offset> fileHeaderOffset, bool 
delaySave = false);
 
diff --git a/Indexes/Index.tpp b/Indexes/Index.tpp
index 88dd6d1..460e031 100644
--- a/Indexes/Index.tpp
+++ b/Indexes/Index.tpp
@@ -1,4 +1,5 @@
 #include "Index.h"
+#include "../SpaceManager.h"
 
 #include <memory>
 
@@ -10,7 +11,7 @@
 {
     auto offset = fileHeaderOffset.lock();
 
-    fileHeaderZero = false;
+    rootNodeUnsaved = false;
 
     if (offset->value == 0)
     {
@@ -18,7 +19,7 @@
 
         if (delaySave)
         {
-            fileHeaderZero = true;
+            rootNodeUnsaved = true;
         }
         else
         {
@@ -38,19 +39,38 @@
 }
 
 template<typename TKey, typename TValue>
-void Index<TKey, TValue>::Add(TKey key, TValue value)
+void Index<TKey, TValue>::AfterAdd()
 {
-    rootNode->Add(key, value);
+    if (rootNode->IsOversized())
+    {
+        dump.lock()->spaceManager->Delete(rootNode->SavedOffset(), 
rootNode->NewLength());
 
-    if (fileHeaderZero)
+        auto splitted = rootNode->Split();
+        splitted.LeftNode->Write();
+        splitted.RightNode->Write();
+        rootNode = std::unique_ptr<IndexNode<TKey, TValue>>(
+            new IndexInnerNode<TKey, TValue>(dump, std::move(splitted)));
+
+        rootNodeUnsaved = true;
+    }
+
+    if (rootNodeUnsaved)
     {
         rootNode->Write();
 
         fileHeaderOffset.lock()->value = rootNode->SavedOffset();
         dump.lock()->fileHeader.Write();
 
-        fileHeaderZero = false;
+        rootNodeUnsaved = false;
     }
+}
+
+template<typename TKey, typename TValue>
+void Index<TKey, TValue>::Add(TKey key, TValue value)
+{
+    rootNode->Add(key, value);
+
+    AfterAdd();
 }
 
 template<typename TKey, typename TValue>
@@ -58,15 +78,7 @@
 {
     rootNode->AddOrUpdate(key, value);
 
-    if (fileHeaderZero)
-    {
-        rootNode->Write();
-
-        fileHeaderOffset.lock()->value = rootNode->SavedOffset();
-        dump.lock()->fileHeader.Write();
-
-        fileHeaderZero = false;
-    }
+    AfterAdd();
 }
 
 template<typename TKey, typename TValue>
diff --git a/Indexes/IndexInnerNode.h b/Indexes/IndexInnerNode.h
new file mode 100644
index 0000000..fc191c5
--- /dev/null
+++ b/Indexes/IndexInnerNode.h
@@ -0,0 +1,45 @@
+#pragma once
+
+#include <map>
+#include "IndexNode.h"
+
+template<typename TKey, typename TValue>
+class IndexInnerNode : public IndexNode<TKey, TValue>
+{
+    template<typename TIndexKey, typename TIndexValue>
+    friend class IndexInnerIterator;
+private:
+    std::vector<TKey> keys;
+    std::vector<Offset> childOffsets;
+    std::vector<std::unique_ptr<IndexNode<TKey, TValue>>> cachedChildren;
+
+    IndexNode<TKey, TValue>* GetChildByIndex(std::uint16_t index);
+    std::uint16_t GetKeyIndex(TKey key);
+
+    void AfterAdd(std::uint16_t updatedChildIndex);
+protected:
+    virtual void WriteInternal() override;
+public:
+    static std::unique_ptr<IndexNode<TKey, TValue>> 
Read(weak_ptr<WritableDump> dump, istream &stream);
+
+    IndexInnerNode(weak_ptr<WritableDump> dump);
+    IndexInnerNode(weak_ptr<WritableDump> dump, SplitResult splitResult);
+
+    virtual void Write() override;
+    using DumpObjectBase::WriteValue;
+
+    virtual uint32_t NewLength() override;
+
+    virtual TValue Get(TKey key) override;
+    virtual void Add(TKey key, TValue value) override;
+    virtual void AddOrUpdate(TKey key, TValue value) override;
+    virtual void Remove(TKey key) override;
+
+    virtual std::uint32_t RealLength() override;
+    virtual SplitResult Split() override;
+
+    virtual shared_ptr<IndexNodeIterator<TKey, TValue>> begin() override;
+    virtual shared_ptr<IndexNodeIterator<TKey, TValue>> end() override;
+};
+
+#include "IndexInnerNode.tpp"
\ No newline at end of file
diff --git a/Indexes/IndexInnerNode.tpp b/Indexes/IndexInnerNode.tpp
new file mode 100644
index 0000000..94ca3be
--- /dev/null
+++ b/Indexes/IndexInnerNode.tpp
@@ -0,0 +1,229 @@
+#include <algorithm>
+#include <utility>
+#include <vector>
+#include "../DumpObjects/DumpTraits.h"
+#include "../DumpObjects/DumpObjectKind.h"
+#include "../CollectionHelpers.h"
+#include "Iterators/IndexInnerIterator.h"
+
+template<typename TKey, typename TValue>
+IndexNode<TKey, TValue>* IndexInnerNode<TKey, 
TValue>::GetChildByIndex(std::uint16_t index)
+{
+    if (cachedChildren[index] == nullptr)
+    {
+        cachedChildren[index] = IndexNode<TKey, TValue>::Read(dump, 
childOffsets[index].value);
+    }
+
+    return cachedChildren[index].get();
+}
+
+template<typename TKey, typename TValue>
+std::uint16_t IndexInnerNode<TKey, TValue>::GetKeyIndex(TKey key)
+{
+    auto foundRef = std::lower_bound(keys.begin(), keys.end(), key);
+    auto foundPos = foundRef - keys.begin();
+
+    return foundPos;
+}
+
+template<typename TKey, typename TValue>
+TValue IndexInnerNode<TKey, TValue>::Get(TKey key)
+{
+    auto index = GetKeyIndex(key);
+    return GetChildByIndex(index)->Get(key);
+}
+
+template<typename TKey, typename TValue>
+void IndexInnerNode<TKey, TValue>::AfterAdd(std::uint16_t updatedChildIndex)
+{
+    auto updatedChild = GetChildByIndex(updatedChildIndex);
+
+    if (updatedChild->IsOversized())
+    {
+        dump.lock()->spaceManager->Delete(updatedChild->SavedOffset(), 
updatedChild->NewLength());
+
+        auto splitted = updatedChild->Split();
+        splitted.LeftNode->Write();
+        splitted.RightNode->Write();
+
+        insert_at(keys, updatedChildIndex, splitted.SplitKey);
+
+        childOffsets[updatedChildIndex] = splitted.LeftNode->SavedOffset();
+        cachedChildren[updatedChildIndex] = std::move(splitted.LeftNode);
+
+        insert_at(childOffsets, updatedChildIndex + 1, 
splitted.RightNode->SavedOffset());
+        insert_at(cachedChildren, updatedChildIndex + 1, 
std::move(splitted.RightNode));
+    }
+}
+
+template<typename TKey, typename TValue>
+void IndexInnerNode<TKey, TValue>::Add(TKey key, TValue value)
+{
+    auto index = GetKeyIndex(key);
+    auto child = GetChildByIndex(index);
+
+    child->Add(key, value);
+
+    AfterAdd(index);
+}
+
+template<typename TKey, typename TValue>
+void IndexInnerNode<TKey, TValue>::AddOrUpdate(TKey key, TValue value)
+{
+    auto index = GetKeyIndex(key);
+    auto child = GetChildByIndex(index);
+
+    child->AddOrUpdate(key, value);
+
+    AfterAdd(index);
+}
+
+template<typename TKey, typename TValue>
+void IndexInnerNode<TKey, TValue>::Remove(const TKey key)
+{
+    auto index = GetKeyIndex(key);
+    return GetChildByIndex(index)->Remove(key);
+}
+
+template<typename TKey, typename TValue>
+IndexInnerNode<TKey, TValue>::IndexInnerNode(weak_ptr<WritableDump> dump)
+    : IndexNode<TKey, TValue>(dump)
+{}
+
+template<typename TKey, typename TValue>
+IndexInnerNode<TKey, TValue>::IndexInnerNode(weak_ptr<WritableDump> dump, 
SplitResult splitResult)
+    : IndexNode<TKey, TValue>(dump)
+{
+    keys.push_back(splitResult.SplitKey);
+
+    auto leftOffset = splitResult.LeftNode->SavedOffset();
+    if (leftOffset == 0)
+        throw DumpException();
+    childOffsets.push_back(leftOffset);
+    cachedChildren.push_back(std::move(splitResult.LeftNode));
+
+    auto rightOffset = splitResult.RightNode->SavedOffset();
+    if (rightOffset == 0)
+        throw DumpException();
+    childOffsets.push_back(rightOffset);
+    cachedChildren.push_back(std::move(splitResult.RightNode));
+}
+
+template<typename TKey, typename TValue>
+unique_ptr<IndexNode<TKey, TValue>> IndexInnerNode<TKey, 
TValue>::Read(weak_ptr<WritableDump> dump, istream &stream)
+{
+    auto node = new IndexInnerNode<TKey, TValue>(dump);
+
+    // DumpObjectKind is assumed to be already read by IndexNode::Read
+
+    uint16_t count = DumpTraits<uint16_t>::Read(stream);
+
+    for (int i = 0; i < count; i++)
+    {
+        node->keys.push_back(DumpTraits<TKey>::Read(stream));
+    }
+
+    for (int i = 0; i < count + 1; i++)
+    {
+        node->childOffsets.push_back(DumpTraits<Offset>::Read(stream));
+        node->cachedChildren.push_back(std::unique_ptr<IndexNode<TKey, 
TValue>>());
+    }
+
+    return unique_ptr<IndexNode<TKey, TValue>>(node);
+}
+
+template<typename TKey, typename TValue>
+void IndexInnerNode<TKey, TValue>::WriteInternal()
+{
+    WriteValue((uint8_t)DumpObjectKind::IndexInnerNode);
+    WriteValue((uint16_t)keys.size());
+
+       for (auto key : keys)
+    {
+               WriteValue(key);
+    }
+
+       for (auto offset : childOffsets)
+    {
+               WriteValue(offset);
+    }
+}
+
+template<typename TKey, typename TValue>
+void IndexInnerNode<TKey, TValue>::Write()
+{
+    IndexNode<TKey, TValue>::Write();
+
+    for (auto &cachedChild : cachedChildren)
+    {
+        if (cachedChild != nullptr)
+            cachedChild->Write();
+    }
+}
+
+template<typename TKey, typename TValue>
+uint32_t IndexInnerNode<TKey, TValue>::NewLength()
+{
+    return Size;
+}
+
+template<typename TKey, typename TValue>
+std::uint32_t IndexInnerNode<TKey, TValue>::RealLength()
+{
+    return 
DumpTraits<uint8_t>::DumpSize((uint8_t)DumpObjectKind::IndexLeafNode)
+        + DumpTraits<uint16_t>::DumpSize(keys.size())
+        + keys.size() * DumpTraits<TKey>::DumpSize()
+        + (keys.size() + 1) * DumpTraits<Offset>::DumpSize();
+}
+
+template<typename TKey, typename TValue>
+typename IndexNode<TKey, TValue>::SplitResult IndexInnerNode<TKey, 
TValue>::Split()
+{
+    auto left = new IndexInnerNode<TKey, TValue>(dump);
+    auto right = new IndexInnerNode<TKey, TValue>(dump);
+
+    auto middle = keys.size() / 2;
+
+    unsigned i = 0;
+
+    for (; i < middle; i++)
+    {
+        left->keys.push_back(keys[i]);
+        left->childOffsets.push_back(childOffsets[i]);
+        left->cachedChildren.push_back(std::move(cachedChildren[i]));
+    }
+
+    left->childOffsets.push_back(childOffsets[i]);
+    left->cachedChildren.push_back(std::move(cachedChildren[i]));
+
+    TKey middleKey = keys[i];
+
+    i++;
+
+    for (; i < keys.size(); i++)
+    {
+        right->keys.push_back(keys[i]);
+        right->childOffsets.push_back(childOffsets[i]);
+        right->cachedChildren.push_back(std::move(cachedChildren[i]));
+    }
+
+    right->childOffsets.push_back(childOffsets[i]);
+    right->cachedChildren.push_back(std::move(cachedChildren[i]));
+
+    return IndexNode<TKey, TValue>::SplitResult(
+        std::unique_ptr<IndexNode<TKey, TValue>>(left),
+        std::unique_ptr<IndexNode<TKey, TValue>>(right),
+        middleKey);
+}
+
+template<typename TKey, typename TValue>
+shared_ptr<IndexNodeIterator<TKey, TValue>> IndexInnerNode<TKey, 
TValue>::begin()
+{
+    return shared_ptr<IndexNodeIterator<TKey, TValue>>(new 
IndexInnerIterator<TKey, TValue>(this, true));
+}
+
+template<typename TKey, typename TValue>
+shared_ptr<IndexNodeIterator<TKey, TValue>> IndexInnerNode<TKey, TValue>::end()
+{
+    return shared_ptr<IndexNodeIterator<TKey, TValue>>(new 
IndexInnerIterator<TKey, TValue>(this, false));
+}
\ No newline at end of file
diff --git a/Indexes/IndexLeafNode.h b/Indexes/IndexLeafNode.h
index 71b4678..0b40518 100644
--- a/Indexes/IndexLeafNode.h
+++ b/Indexes/IndexLeafNode.h
@@ -9,8 +9,6 @@
 class IndexLeafNode : public IndexNode<TKey, TValue>
 {
 private:
-    static const int Size = 0xFFFF;
-
     map<TKey, TValue> indexMap;
 protected:
     virtual void WriteInternal() override;
@@ -29,8 +27,11 @@
     virtual void AddOrUpdate(TKey key, TValue value) override;
     virtual void Remove(TKey key) override;
 
-    virtual shared_ptr<IndexNodeIterator<TKey, TValue>> begin() const override;
-    virtual shared_ptr<IndexNodeIterator<TKey, TValue>> end() const override;
+    virtual std::uint32_t RealLength() override;
+    virtual SplitResult Split() override;
+
+    virtual shared_ptr<IndexNodeIterator<TKey, TValue>> begin() override;
+    virtual shared_ptr<IndexNodeIterator<TKey, TValue>> end() override;
 };
 
 #include "IndexLeafNode.tpp"
diff --git a/Indexes/IndexLeafNode.tpp b/Indexes/IndexLeafNode.tpp
index 3c0681b..87bf14d 100644
--- a/Indexes/IndexLeafNode.tpp
+++ b/Indexes/IndexLeafNode.tpp
@@ -21,9 +21,8 @@
 template<typename TKey, typename TValue>
 void IndexLeafNode<TKey, TValue>::Add(TKey key, TValue value)
 {
-    if (indexMap.size() >= Size)
+    if (indexMap.size() >= std::numeric_limits<uint16_t>::max())
     {
-        // TODO
         throw new DumpException();
     }
 
@@ -100,19 +99,53 @@
 template<typename TKey, typename TValue>
 uint32_t IndexLeafNode<TKey, TValue>::NewLength()
 {
-    return 
DumpTraits<uint8_t>::DumpSize((uint8_t)DumpObjectKind::IndexLeafNode)
-        + DumpTraits<uint16_t>::DumpSize(indexMap.size())
-        + Size * (DumpTraits<TKey>::DumpSize() + 
DumpTraits<TValue>::DumpSize());
+    return Size;
 }
 
 template<typename TKey, typename TValue>
-shared_ptr<IndexNodeIterator<TKey, TValue>> IndexLeafNode<TKey, 
TValue>::begin() const
+std::uint32_t IndexLeafNode<TKey, TValue>:: RealLength()
+{
+    return 
DumpTraits<uint8_t>::DumpSize((uint8_t)DumpObjectKind::IndexLeafNode)
+        + DumpTraits<uint16_t>::DumpSize(indexMap.size())
+        + indexMap.size() * (DumpTraits<TKey>::DumpSize() + 
DumpTraits<TValue>::DumpSize());
+}
+
+template<typename TKey, typename TValue>
+typename IndexNode<TKey, TValue>::SplitResult IndexLeafNode<TKey, 
TValue>::Split()
+{
+    auto left = new IndexLeafNode<TKey, TValue>(dump);
+    auto right = new IndexLeafNode<TKey, TValue>(dump);
+
+    auto it = indexMap.begin();
+    for (unsigned i = 0; i < (indexMap.size() - 1) / 2; i++)
+    {
+        left->indexMap.insert(*it);
+        it++;
+    }
+
+    auto middleKey = it->first;
+    left->indexMap.insert(*it);
+    it++;
+
+    for (; it != indexMap.end(); it++)
+    {
+        right->indexMap.insert(*it);
+    }
+
+    return IndexNode<TKey, TValue>::SplitResult(
+        std::unique_ptr<IndexNode<TKey, TValue>>(left),
+        std::unique_ptr<IndexNode<TKey, TValue>>(right),
+        middleKey);
+}
+
+template<typename TKey, typename TValue>
+shared_ptr<IndexNodeIterator<TKey, TValue>> IndexLeafNode<TKey, 
TValue>::begin()
 {
     return shared_ptr<IndexNodeIterator<TKey, TValue>>(new 
IndexLeafIterator<TKey, TValue>(indexMap.begin()));
 }
 
 template<typename TKey, typename TValue>
-shared_ptr<IndexNodeIterator<TKey, TValue>> IndexLeafNode<TKey, TValue>::end() 
const
+shared_ptr<IndexNodeIterator<TKey, TValue>> IndexLeafNode<TKey, TValue>::end()
 {
     return shared_ptr<IndexNodeIterator<TKey, TValue>>(new 
IndexLeafIterator<TKey, TValue>(indexMap.end()));
 }
diff --git a/Indexes/IndexNode.h b/Indexes/IndexNode.h
index 63d5c9d..f936e99 100644
--- a/Indexes/IndexNode.h
+++ b/Indexes/IndexNode.h
@@ -8,26 +8,54 @@
 class IndexNodeIterator;
 
 template<typename TKey, typename TValue>
+class IndexNode;
+
+template<typename TKey, typename TValue>
 class IndexNode : public DumpObject
 {
 protected:
-    virtual void WriteInternal() = 0;
+    static const int Size = 4096;
 
+    virtual void WriteInternal() = 0;
 public:
+    struct SplitResult
+    {
+        SplitResult(
+            std::unique_ptr<IndexNode<TKey, TValue>> leftNode,
+            std::unique_ptr<IndexNode<TKey, TValue>> rightNode,
+            TKey splitKey)
+            : LeftNode(std::move(leftNode)), RightNode(std::move(rightNode)), 
SplitKey(splitKey)
+        {}
+
+        SplitResult(SplitResult&& other)
+            : LeftNode(std::move(other.LeftNode)), 
RightNode(std::move(other.RightNode)), SplitKey(std::move(other.SplitKey))
+        {}
+
+        std::unique_ptr<IndexNode<TKey, TValue>> LeftNode;
+        std::unique_ptr<IndexNode<TKey, TValue>> RightNode;
+        TKey SplitKey;
+    };
+
     static unique_ptr<IndexNode> Read(weak_ptr<WritableDump> dump, uint64_t 
offset);
     static unique_ptr<IndexNode> CreateNew(weak_ptr<WritableDump> dump);
 
     IndexNode(weak_ptr<WritableDump> dump);
 
     using DumpObject::Write;
+    // write to a pre-allocated offset
+    void Write(Offset offset);
 
     virtual TValue Get(TKey key) = 0;
     virtual void Add(TKey key, TValue value) = 0;
     virtual void AddOrUpdate(TKey key, TValue value) = 0;
     virtual void Remove(TKey key) = 0;
 
-    virtual shared_ptr<IndexNodeIterator<TKey, TValue>> begin() const = 0;
-    virtual shared_ptr<IndexNodeIterator<TKey, TValue>> end() const = 0;
+    bool IsOversized();
+    virtual std::uint32_t RealLength() = 0;
+    virtual SplitResult Split() = 0;
+
+    virtual shared_ptr<IndexNodeIterator<TKey, TValue>> begin() = 0;
+    virtual shared_ptr<IndexNodeIterator<TKey, TValue>> end() = 0;
 };
 
 #include "IndexNode.tpp"
diff --git a/Indexes/IndexNode.tpp b/Indexes/IndexNode.tpp
index d827d56..e02fda4 100644
--- a/Indexes/IndexNode.tpp
+++ b/Indexes/IndexNode.tpp
@@ -1,5 +1,6 @@
 #include "IndexNode.h"
 #include "IndexLeafNode.h"
+#include "IndexInnerNode.h"
 #include "../DumpException.h"
 #include "../Dump.h"
 #include "../DumpObjects/DumpObjectKind.h"
@@ -16,17 +17,24 @@
     auto &stream = *(dumpRef->stream);
     stream.seekp(offset);
 
+    std::unique_ptr<IndexNode<TKey, TValue>> result;
+
     char byte;
     stream.read(&byte, 1);
     if (byte == (char)DumpObjectKind::IndexLeafNode)
     {
-        auto result = IndexLeafNode<TKey, TValue>::Read(dump, stream);
-        result->savedOffset = offset;
-        result->savedLength = result->NewLength();
-        return result;
+        result = IndexLeafNode<TKey, TValue>::Read(dump, stream);
+    }
+    else if (byte == (char)DumpObjectKind::IndexInnerNode)
+    {
+        result = IndexInnerNode<TKey, TValue>::Read(dump, stream);
     }
     else
-        throw new DumpException();
+        throw DumpException();
+
+    result->savedOffset = offset;
+    result->savedLength = result->NewLength();
+    return result;
 }
 
 template<typename TKey, typename TValue>
@@ -34,3 +42,23 @@
 {
     return unique_ptr<IndexNode<TKey, TValue>>(new IndexLeafNode<TKey, 
TValue>(dump));
 }
+
+template<typename TKey, typename TValue>
+void IndexNode<TKey, TValue>::Write(Offset offset)
+{
+    if (savedOffset != 0)
+        throw DumpException();
+
+    stream = dumpRef->stream.get();
+    stream->seekp(newOffset);
+
+    WriteInternal();
+
+    stream = nullptr;
+}
+
+template<typename TKey, typename TValue>
+bool IndexNode<TKey, TValue>::IsOversized()
+{
+    return RealLength() > NewLength();
+}
\ No newline at end of file
diff --git a/Indexes/Iterators/IndexInnerIterator.h 
b/Indexes/Iterators/IndexInnerIterator.h
new file mode 100644
index 0000000..71cca54
--- /dev/null
+++ b/Indexes/Iterators/IndexInnerIterator.h
@@ -0,0 +1,22 @@
+#pragma once
+
+#include <utility>
+
+template<typename TKey, typename TValue>
+class IndexInnerIterator : public IndexNodeIterator<TKey, TValue>
+{
+    template<typename TIndexKey, typename TIndexValue>
+    friend class IndexInnerNode;
+private:
+    IndexInnerNode<TKey, TValue> *node;
+    uint16_t i;
+    std::shared_ptr<IndexNodeIterator<TKey, TValue>> childIterator;
+
+    IndexInnerIterator(IndexInnerNode<TKey, TValue> *node, bool isBegin);
+public:
+    virtual const pair<TKey, TValue> operator *() const override;
+    virtual IndexInnerIterator& operator ++() override;
+    virtual bool equals(const IndexNodeIterator<TKey, TValue> *other) const 
override;
+};
+
+#include "IndexInnerIterator.tpp"
\ No newline at end of file
diff --git a/Indexes/Iterators/IndexInnerIterator.tpp 
b/Indexes/Iterators/IndexInnerIterator.tpp
new file mode 100644
index 0000000..0b395ef
--- /dev/null
+++ b/Indexes/Iterators/IndexInnerIterator.tpp
@@ -0,0 +1,54 @@
+#include <vector>
+#include "IndexInnerIterator.h"
+
+using std::vector;
+
+template<typename TKey, typename TValue>
+IndexInnerIterator<TKey, TValue>::IndexInnerIterator(IndexInnerNode<TKey, 
TValue> *node, bool isBegin)
+    : node(node),
+      i(isBegin ? 0 : node->childOffsets.size()),
+      childIterator(isBegin ? node->GetChildByIndex(0)->begin() : nullptr)
+{}
+
+template<typename TKey, typename TValue>
+const pair<TKey, TValue> IndexInnerIterator<TKey, TValue>::operator *() const
+{
+    return **childIterator;
+}
+
+template<typename TKey, typename TValue>
+IndexInnerIterator<TKey, TValue>& IndexInnerIterator<TKey, TValue>::operator 
++()
+{
+    ++(*childIterator);
+
+    while (childIterator == node->GetChildByIndex(i)->end())
+    {
+        i++;
+
+        if (i == node->childOffsets.size())
+            childIterator = nullptr;
+        else
+            childIterator = node->GetChildByIndex(i)->begin();
+    }
+
+    return *this;
+}
+
+template<typename TKey, typename TValue>
+bool IndexInnerIterator<TKey, TValue>::equals(const IndexNodeIterator<TKey, 
TValue> *other) const
+{
+    auto otherCasted = dynamic_cast<const IndexInnerIterator<TKey, 
TValue>*>(other);
+    if (otherCasted == nullptr)
+        return false;
+
+    if (i == otherCasted->i)
+    {
+        if (i == node->childOffsets.size())
+            return true;
+
+        if (childIterator->equals(otherCasted->childIterator.get()))
+            return true;
+    }
+
+    return false;
+}
\ No newline at end of file
diff --git a/Indexes/Iterators/IndexLeafIterator.tpp 
b/Indexes/Iterators/IndexLeafIterator.tpp
index 2ba8d87..92ad2f5 100644
--- a/Indexes/Iterators/IndexLeafIterator.tpp
+++ b/Indexes/Iterators/IndexLeafIterator.tpp
@@ -1,6 +1,5 @@
 #include <vector>
 #include "IndexLeafIterator.h"
-#include "../../DumpObjects/DumpTraits.h"
 
 using std::vector;
 
diff --git a/Indexes/Iterators/IndexNodeIterator.h 
b/Indexes/Iterators/IndexNodeIterator.h
index 8aec9c1..dab4ea1 100644
--- a/Indexes/Iterators/IndexNodeIterator.h
+++ b/Indexes/Iterators/IndexNodeIterator.h
@@ -3,15 +3,11 @@
 #include <utility>
 #include "../../DumpObjects/DumpObject.h"
 
-using std::pair;
-
 template<typename TKey, typename TValue>
 class IndexNodeIterator
 {
 public:
-    virtual const pair<TKey, TValue> operator *() const = 0;
+    virtual const std::pair<TKey, TValue> operator *() const = 0;
     virtual IndexNodeIterator& operator ++() = 0;
     virtual bool equals(const IndexNodeIterator *other) const = 0;
-};
-
-#include "IndexNodeIterator.tpp"
\ No newline at end of file
+};
\ No newline at end of file
diff --git a/Indexes/Iterators/IndexNodeIterator.tpp 
b/Indexes/Iterators/IndexNodeIterator.tpp
deleted file mode 100644
index 7a9a05d..0000000
--- a/Indexes/Iterators/IndexNodeIterator.tpp
+++ /dev/null
@@ -1 +0,0 @@
-#include "IndexNodeIterator.h"

-- 
To view, visit https://gerrit.wikimedia.org/r/75668
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Iadab9a140821b68ac502ce9d67133972aa86966a
Gerrit-PatchSet: 1
Gerrit-Project: operations/dumps/incremental
Gerrit-Branch: gsoc
Gerrit-Owner: Petr Onderka <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to