Petr Onderka has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/72456


Change subject: handling freed space; implemented index iterator
......................................................................

handling freed space; implemented index iterator

Change-Id: I2f049bc64ccae46691d26982fc40c77f8ce3c00a
---
M Dump.cpp
M Dump.h
M DumpObject.cpp
M DumpObject.h
M DumpTraits.h
M FileHeader.cpp
M FileHeader.h
M Index.h
M Index.hpp
M IndexLeafNode.h
M IndexLeafNode.hpp
M IndexNode.h
M Offset.cpp
M Offset.h
M SpaceManager.cpp
M SpaceManager.h
M main.cpp
17 files changed, 299 insertions(+), 65 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/dumps/incremental 
refs/changes/56/72456/1

diff --git a/Dump.cpp b/Dump.cpp
index 5e469c5..a0ad987 100644
--- a/Dump.cpp
+++ b/Dump.cpp
@@ -57,7 +57,7 @@
         fileHeader = FileHeader::Read(*this);
     }
 
-    spaceManager = SpaceManager(self);
+    spaceManager = unique_ptr<SpaceManager>(new SpaceManager(self));
 
     pageIdIndex = unique_ptr<Index<int32_t, Offset>>(
         new Index<int32_t, Offset>(self, shared_ptr<Offset>(self.lock(), 
&fileHeader.PageIdIndexRoot)));
diff --git a/Dump.h b/Dump.h
index be7ab76..cc1aba8 100644
--- a/Dump.h
+++ b/Dump.h
@@ -48,5 +48,5 @@
     static shared_ptr<WritableDump> Create(string fileName);
 
     FileHeader fileHeader;
-    SpaceManager spaceManager;
+    unique_ptr<SpaceManager> spaceManager;
 };
\ No newline at end of file
diff --git a/DumpObject.cpp b/DumpObject.cpp
index 1b261c4..1435ca5 100644
--- a/DumpObject.cpp
+++ b/DumpObject.cpp
@@ -8,13 +8,21 @@
 void DumpObject::Write()
 {
     auto dumpRef = dump.lock();
-    auto &spaceManager = dumpRef->spaceManager;
 
-    if (savedOffset != 0)
-        spaceManager.Delete(savedOffset);
+    int32_t newLength = NewLength();
+    int64_t newOffset;
 
-    int64_t newLength = NewLength();
-    int64_t newOffset = spaceManager.GetSpace(newLength);
+    if (newLength == savedLength)
+        newOffset = savedOffset;
+    else
+    {
+        auto &spaceManager = dumpRef->spaceManager;
+
+        if (savedOffset != 0)
+            spaceManager->Delete(savedOffset, savedLength);
+
+        newOffset = spaceManager->GetSpace(newLength);
+    }
 
     ostream& stream = *(dumpRef->stream);
     stream.seekp(newOffset);
diff --git a/DumpObject.h b/DumpObject.h
index 42e6e83..d12faf1 100644
--- a/DumpObject.h
+++ b/DumpObject.h
@@ -16,13 +16,13 @@
 protected:
     weak_ptr<WritableDump> dump;
     int64_t savedOffset;
-    int64_t savedLength;
+    int32_t savedLength;
 
     DumpObject(weak_ptr<WritableDump> dump);
     virtual void Write(ostream &stream) = 0;
 
 public:
     virtual void Write();
-    virtual int64_t NewLength() = 0;
+    virtual int32_t NewLength() = 0;
     int64_t SavedOffset();
 };
\ No newline at end of file
diff --git a/DumpTraits.h b/DumpTraits.h
index 434ff93..0464268 100644
--- a/DumpTraits.h
+++ b/DumpTraits.h
@@ -21,7 +21,7 @@
         value.Write(stream);
     }
 
-    static int64_t DumpSize()
+    static int32_t DumpSize()
     {
         return T::DumpSize();
     }
@@ -38,10 +38,10 @@
         stream.read(bytes, 4);
 
         int32_t result = 0;
-        result |= (int64_t)bytes[0];
-        result |= (int64_t)bytes[1] << 8;
-        result |= (int64_t)bytes[2] << 16;
-        result |= (int64_t)bytes[3] << 24;
+        result |= (int32_t)(uint8_t)bytes[0];
+        result |= (int32_t)(uint8_t)bytes[1] << 8;
+        result |= (int32_t)(uint8_t)bytes[2] << 16;
+        result |= (int32_t)(uint8_t)bytes[3] << 24;
 
         return result;
     }
@@ -58,7 +58,7 @@
         stream.write(bytes, 4);
     }
 
-    static int64_t DumpSize()
+    static int32_t DumpSize()
     {
         return 4;
     }
@@ -80,7 +80,7 @@
         stream.write(&value, 1);
     }
 
-    static int64_t DumpSize()
+    static int32_t DumpSize()
     {
         return 1;
     }
diff --git a/FileHeader.cpp b/FileHeader.cpp
index 960e129..aafb014 100644
--- a/FileHeader.cpp
+++ b/FileHeader.cpp
@@ -41,7 +41,7 @@
     return FileHeader(fileEnd, pageIdIndexRoot, freeSpaceIndexRoot, 
dump.GetSelf());
 }
 
-int64_t FileHeader::NewLength()
+int32_t FileHeader::NewLength()
 {
     return 6 + 3 * 6;
 }
diff --git a/FileHeader.h b/FileHeader.h
index f0196b7..4ea97e8 100644
--- a/FileHeader.h
+++ b/FileHeader.h
@@ -21,7 +21,7 @@
     static FileHeader Read(ReadableDump const &dump);
 
     virtual void Write();
-    virtual int64_t NewLength();
+    virtual int32_t NewLength();
 
     Offset FileEnd;
     Offset PageIdIndexRoot;
diff --git a/Index.h b/Index.h
index 64cab2e..324d197 100644
--- a/Index.h
+++ b/Index.h
@@ -1,20 +1,49 @@
 #pragma once
 
+#include <utility>
+#include <iterator>
 #include "Offset.h"
 #include "IndexNode.h"
+
+using std::pair;
+using std::shared_ptr;
+using std::iterator;
+using std::input_iterator_tag;
+
+template<typename TKey, typename TValue>
+class IndexIterator : public iterator<input_iterator_tag, const pair<TKey, 
TValue>, int32_t>
+{
+    template<typename TKey, typename TValue>
+    friend class Index;
+private:
+    shared_ptr<IndexNodeIterator<TKey, TValue>> nodeIterator;
+    IndexIterator(shared_ptr<IndexNodeIterator<TKey, TValue>> nodeIterator);
+public:
+    const pair<TKey, TValue> operator *() const;
+    IndexIterator<TKey, TValue>& operator ++();
+    bool operator ==(const IndexIterator<TKey, TValue> other);
+    bool operator !=(const IndexIterator<TKey, TValue> other);
+};
 
 template<typename TKey, typename TValue>
 class Index
 {
 private:
+    bool fileHeaderZero;
     unique_ptr<IndexNode<TKey, TValue>> rootNode;
     weak_ptr<WritableDump> dump;
     weak_ptr<Offset> fileHeaderOffset;
+
+    void Save();
 public:
     Index(weak_ptr<WritableDump> dump, weak_ptr<Offset> fileHeaderOffset);
 
     TValue operator[](TKey key);
     void Add(TKey key, TValue value);
+    void Remove(TKey key);
+
+    IndexIterator<TKey, TValue> begin() const;
+    IndexIterator<TKey, TValue> end() const;
 };
 
 #include "Index.hpp"
\ No newline at end of file
diff --git a/Index.hpp b/Index.hpp
index 69b82ae..0662054 100644
--- a/Index.hpp
+++ b/Index.hpp
@@ -1,15 +1,52 @@
 #include "Index.h"
 
+#include <memory>
+
+using std::move;
+
+template<typename TKey, typename TValue>
+IndexIterator<TKey, TValue>::IndexIterator(shared_ptr<IndexNodeIterator<TKey, 
TValue>> nodeIterator)
+    : nodeIterator(nodeIterator)
+{}
+
+template<typename TKey, typename TValue>
+const pair<TKey, TValue> IndexIterator<TKey, TValue>::operator *() const
+{
+    return **nodeIterator;
+}
+
+template<typename TKey, typename TValue>
+IndexIterator<TKey, TValue>& IndexIterator<TKey, TValue>::operator ++()
+{
+    ++(*nodeIterator);
+    return *this;
+}
+
+template<typename TKey, typename TValue>
+bool IndexIterator<TKey, TValue>::operator ==(const IndexIterator<TKey, 
TValue> other)
+{
+    return nodeIterator->equals(other.nodeIterator.get());
+}
+
+template<typename TKey, typename TValue>
+bool IndexIterator<TKey, TValue>::operator !=(const IndexIterator<TKey, 
TValue> other)
+{
+    return !(*this == other);
+}
+
 template<typename TKey, typename TValue>
 Index<TKey, TValue>::Index(weak_ptr<WritableDump> dump, weak_ptr<Offset> 
fileHeaderOffset)
     : dump(dump), fileHeaderOffset(fileHeaderOffset)
 {
-    auto offset = fileHeaderOffset.lock()->value;
+    auto offset = fileHeaderOffset.lock();
 
-    if (offset == 0)
+    if (offset->value == 0)
+    {
         rootNode = IndexNode<TKey, TValue>::CreateNew(dump);
+        fileHeaderZero = true;
+    }
     else
-        rootNode = IndexNode<TKey, TValue>::Read(dump, offset);
+        rootNode = IndexNode<TKey, TValue>::Read(dump, offset->value);
 }
 
 template<typename TKey, typename TValue>
@@ -23,11 +60,29 @@
 {
     rootNode->Add(key, value);
 
-    auto offset = fileHeaderOffset.lock();
-
-    if (rootNode->SavedOffset() != offset->value)
+    if (fileHeaderZero)
     {
-        offset->value = rootNode->SavedOffset();
+        fileHeaderOffset.lock()->value = rootNode->SavedOffset();
         dump.lock()->fileHeader.Write();
+
+        fileHeaderZero = false;
     }
+}
+
+template<typename TKey, typename TValue>
+void Index<TKey, TValue>::Remove(TKey key)
+{
+    rootNode->Remove(key);
+}
+
+template<typename TKey, typename TValue>
+IndexIterator<TKey, TValue> Index<TKey, TValue>::begin() const
+{
+    return IndexIterator<TKey, TValue>(rootNode->begin());
+}
+
+template<typename TKey, typename TValue>
+IndexIterator<TKey, TValue> Index<TKey, TValue>::end() const
+{
+    return IndexIterator<TKey, TValue>(rootNode->end());
 }
\ No newline at end of file
diff --git a/IndexLeafNode.h b/IndexLeafNode.h
index 9ff696b..e846097 100644
--- a/IndexLeafNode.h
+++ b/IndexLeafNode.h
@@ -1,14 +1,35 @@
 #pragma once
 
+#include <utility>
 #include <map>
 #include "IndexNode.h"
 
+using std::pair;
 using std::map;
+
+template<typename TKey, typename TValue>
+class IndexLeafIterator : public IndexNodeIterator<TKey, TValue>
+{
+    typedef typename map<TKey, TValue>::const_iterator MapIterator;
+
+    template<typename TKey, typename TValue>
+    friend class IndexLeafNode;
+private:
+    MapIterator mapIterator;
+
+    IndexLeafIterator(MapIterator mapIterator);
+public:
+    virtual const pair<TKey, TValue> operator *() const;
+    virtual IndexLeafIterator& operator ++();
+    virtual bool equals(const IndexNodeIterator *other) const;
+};
 
 template<typename TKey, typename TValue>
 class IndexLeafNode : public IndexNode<TKey, TValue>
 {
 private:
+    static const int Size = 10; // has to be at most 128 for now
+
     map<TKey, TValue> map;
 protected:
     virtual void Write(ostream &stream);
@@ -18,10 +39,14 @@
     IndexLeafNode(weak_ptr<WritableDump> dump);
 
     using DumpObject::Write;
-    virtual int64_t NewLength();
+    virtual int32_t NewLength();
 
     virtual TValue operator[](TKey key);
     virtual void Add(TKey key, TValue value);
+    virtual void Remove(TKey key);
+
+    virtual shared_ptr<IndexNodeIterator<TKey, TValue>> begin() const;
+    virtual shared_ptr<IndexNodeIterator<TKey, TValue>> end() const;
 };
 
 #include "IndexLeafNode.hpp"
\ No newline at end of file
diff --git a/IndexLeafNode.hpp b/IndexLeafNode.hpp
index 5196744..be2b37e 100644
--- a/IndexLeafNode.hpp
+++ b/IndexLeafNode.hpp
@@ -1,7 +1,37 @@
 #include <utility>
+#include <vector>
 #include "DumpTraits.h"
 
 using std::pair;
+using std::vector;
+
+template<typename TKey, typename TValue>
+IndexLeafIterator<TKey, TValue>::IndexLeafIterator(MapIterator mapIterator)
+    : mapIterator(mapIterator)
+{}
+
+template<typename TKey, typename TValue>
+const pair<TKey, TValue> IndexLeafIterator<TKey, TValue>::operator *() const
+{
+    return *mapIterator;
+}
+
+template<typename TKey, typename TValue>
+IndexLeafIterator<TKey, TValue>& IndexLeafIterator<TKey, TValue>::operator ++()
+{
+    ++mapIterator;
+    return *this;
+}
+
+template<typename TKey, typename TValue>
+bool IndexLeafIterator<TKey, TValue>::equals(const IndexNodeIterator<TKey, 
TValue> *other) const
+{
+    auto otherCasted = dynamic_cast<const IndexLeafIterator<TKey, 
TValue>*>(other);
+    if (otherCasted == nullptr)
+        return false;
+
+    return mapIterator == otherCasted->mapIterator;
+}
 
 template<typename TKey, typename TValue>
 TValue IndexLeafNode<TKey, TValue>::operator[](TKey key)
@@ -12,9 +42,23 @@
 template<typename TKey, typename TValue>
 void IndexLeafNode<TKey, TValue>::Add(TKey key, TValue value)
 {
+    if (map.size() >= Size)
+    {
+        // TODO
+        throw new DumpException();
+    }
+
     map.insert(pair<TKey, TValue>(key, value));
     Write();
 }
+
+template<typename TKey, typename TValue>
+void IndexLeafNode<TKey, TValue>::Remove(const TKey key)
+{
+    map.erase(key);
+    Write();
+}
+
 
 template<typename TKey, typename TValue>
 IndexLeafNode<TKey, TValue>::IndexLeafNode(weak_ptr<WritableDump> dump)
@@ -29,11 +73,11 @@
 
     char count = DumpTraits<char>::Read(stream);
 
-    unique_ptr<TKey[]> keys(new TKey[count]);
+    vector<TKey> keys;
 
     for (int i = 0; i < count; i++)
     {
-        keys[i] = DumpTraits<TKey>::Read(stream);
+        keys.push_back(DumpTraits<TKey>::Read(stream));
     }
 
     for (int i = 0; i < count; i++)
@@ -62,8 +106,20 @@
 }
 
 template<typename TKey, typename TValue>
-int64_t IndexLeafNode<TKey, TValue>::NewLength()
+int32_t IndexLeafNode<TKey, TValue>::NewLength()
 {
-    return DumpTraits<char>::DumpSize()
-        + map.size() * (DumpTraits<TKey>::DumpSize() + 
DumpTraits<TValue>::DumpSize());
+    return 2 * DumpTraits<char>::DumpSize()
+        + Size * (DumpTraits<TKey>::DumpSize() + 
DumpTraits<TValue>::DumpSize());
+}
+
+template<typename TKey, typename TValue>
+shared_ptr<IndexNodeIterator<TKey, TValue>> IndexLeafNode<TKey, 
TValue>::begin() const
+{
+    return shared_ptr<IndexNodeIterator<TKey, TValue>>(new 
IndexLeafIterator<TKey, TValue>(map.begin()));
+}
+
+template<typename TKey, typename TValue>
+shared_ptr<IndexNodeIterator<TKey, TValue>> IndexLeafNode<TKey, TValue>::end() 
const
+{
+    return shared_ptr<IndexNodeIterator<TKey, TValue>>(new 
IndexLeafIterator<TKey, TValue>(map.end()));
 }
\ No newline at end of file
diff --git a/IndexNode.h b/IndexNode.h
index ff54e3a..024e8b1 100644
--- a/IndexNode.h
+++ b/IndexNode.h
@@ -1,6 +1,19 @@
 #pragma once
 
+#include <utility>
 #include "DumpObject.h"
+
+using std::pair;
+using std::shared_ptr;
+
+template<typename TKey, typename TValue>
+class IndexNodeIterator
+{
+public:
+    virtual const pair<TKey, TValue> operator *() const = 0;
+    virtual IndexNodeIterator& operator ++() = 0;
+    virtual bool equals(const IndexNodeIterator *other) const = 0;
+};
 
 template<typename TKey, typename TValue>
 class IndexNode : public DumpObject
@@ -23,9 +36,10 @@
 
     virtual TValue operator[](TKey key) = 0;
     virtual void Add(TKey key, TValue value) = 0;
-    // TODO:
-    //virtual void Remove(TKey key);
-    //virtual void Remove(TKey key, TValue value); // required for memory index
+    virtual void Remove(TKey key) = 0;
+
+    virtual shared_ptr<IndexNodeIterator<TKey, TValue>> begin() const = 0;
+    virtual shared_ptr<IndexNodeIterator<TKey, TValue>> end() const = 0;
 };
 
 #include "IndexNode.hpp"
\ No newline at end of file
diff --git a/Offset.cpp b/Offset.cpp
index 6eaaebf..a0c3e39 100644
--- a/Offset.cpp
+++ b/Offset.cpp
@@ -29,17 +29,22 @@
     stream.read(bytes, 6);
 
     int64_t offset = 0;
-    offset |= (int64_t)bytes[0];
-    offset |= (int64_t)bytes[1] << 8;
-    offset |= (int64_t)bytes[2] << 16;
-    offset |= (int64_t)bytes[3] << 24;
-    offset |= (int64_t)bytes[4] << 32;
-    offset |= (int64_t)bytes[5] << 40;
+    offset |= (int64_t)(uint8_t)bytes[0];
+    offset |= (int64_t)(uint8_t)bytes[1] << 8;
+    offset |= (int64_t)(uint8_t)bytes[2] << 16;
+    offset |= (int64_t)(uint8_t)bytes[3] << 24;
+    offset |= (int64_t)(uint8_t)bytes[4] << 32;
+    offset |= (int64_t)(uint8_t)bytes[5] << 40;
 
     return Offset(offset);
 }
 
-int64_t Offset::DumpSize()
+int32_t Offset::DumpSize()
 {
     return 6;
+}
+
+bool operator <(const Offset &first, const Offset &second)
+{
+    return first.value < second.value;
 }
\ No newline at end of file
diff --git a/Offset.h b/Offset.h
index 588c226..c7cce0e 100644
--- a/Offset.h
+++ b/Offset.h
@@ -1,11 +1,9 @@
 #pragma once
 
 #include <cstdint>
-#include <memory>
 #include <iostream>
 
 using std::int64_t;
-using std::unique_ptr;
 using std::istream;
 using std::ostream;
 
@@ -17,5 +15,7 @@
     Offset(int64_t value);
     void Write(ostream &stream) const;
     static Offset Read(istream &stream);
-    static int64_t DumpSize();
-};
\ No newline at end of file
+    static int32_t DumpSize();
+};
+
+bool operator <(const Offset &first, const Offset &second);
\ No newline at end of file
diff --git a/SpaceManager.cpp b/SpaceManager.cpp
index f8a0c38..334b4a9 100644
--- a/SpaceManager.cpp
+++ b/SpaceManager.cpp
@@ -1,25 +1,59 @@
 #include "SpaceManager.h"
 #include "Dump.h"
 
+using std::shared_ptr;
+
 SpaceManager::SpaceManager(weak_ptr<WritableDump> dump)
-    : dump(dump)
-{}
-
-int64_t SpaceManager::GetSpace(int64_t length)
+    : dump(dump),
+      spaceIndex(dump, shared_ptr<Offset>(dump.lock(), 
&dump.lock()->fileHeader.FreeSpaceIndexRoot)),
+      spaceByLength()
 {
-    // TODO: find free space
-
-    auto dumpRef = dump.lock();
-    auto &header = dumpRef->fileHeader;
-    int64_t offset = header.FileEnd.value;
-    header.FileEnd.value += length;
-    header.Write();
-
-    return offset;
+    for (auto value : spaceIndex)
+    {
+        spaceByLength.insert(pair<int32_t, Offset>(value.second, value.first));
+    }
 }
 
-void SpaceManager::Delete(int64_t offset)
+int64_t SpaceManager::GetSpace(int32_t length)
 {
-    // TODO
-    throw new DumpException();
+    auto foundSpace = spaceByLength.lower_bound(length);
+    if (foundSpace != spaceByLength.end())
+    {
+        int32_t foundLength = foundSpace->first;
+        Offset foundOffset = foundSpace->second;
+        
+        spaceByLength.erase(foundSpace);
+        spaceIndex.Remove(foundOffset);
+
+        int32_t remainingLength = foundLength - length;
+
+        if (remainingLength != 0)
+        {
+            Delete(foundOffset.value + length, remainingLength);
+        }
+
+        return foundOffset.value;
+    }
+    else
+    {
+        auto dumpRef = dump.lock();
+        auto &header = dumpRef->fileHeader;
+        int64_t offset = header.FileEnd.value;
+        header.FileEnd.value += length;
+        header.Write();
+
+        return offset;
+    }
+}
+
+void SpaceManager::Delete(int64_t offset, int32_t length)
+{
+    // TODO: free space at the end just decrements fileEnd
+    // TODO: join consecutive free blocks
+    // TODO: zero out?
+
+    // careful here, Add() can cause allocation of the index root node, which 
calls GetSpace()
+    // so spaceIndex.Add() has to go before spaceByLength.insert()
+    spaceIndex.Add(offset, length);
+    spaceByLength.insert(pair<int32_t, Offset>(length, offset));
 }
\ No newline at end of file
diff --git a/SpaceManager.h b/SpaceManager.h
index 4228f9f..ae5449a 100644
--- a/SpaceManager.h
+++ b/SpaceManager.h
@@ -2,10 +2,13 @@
 
 #include <cstdint>
 #include <memory>
-#include "SpaceManager.h"
+#include <map>
+#include "Index.h"
 
+using std::int32_t;
 using std::int64_t;
 using std::weak_ptr;
+using std::multimap;
 
 class WritableDump;
 
@@ -13,8 +16,10 @@
 {
 private:
     weak_ptr<WritableDump> dump;
+    Index<Offset, int32_t> spaceIndex;
+    multimap<int32_t, Offset> spaceByLength;
 public:
-    SpaceManager(weak_ptr<WritableDump> dump = weak_ptr<WritableDump>());
-    int64_t GetSpace(int64_t length);
-    void Delete(int64_t offset);
+    SpaceManager(weak_ptr<WritableDump> dump);
+    int64_t GetSpace(int32_t length);
+    void Delete(int64_t offset, int32_t length);
 };
\ No newline at end of file
diff --git a/main.cpp b/main.cpp
index 6bff684..5320821 100644
--- a/main.cpp
+++ b/main.cpp
@@ -46,5 +46,8 @@
 
     shared_ptr<WritableDump> dump = WritableDump::Create("tmp/test.id");
 
+    auto offset = dump->spaceManager->GetSpace(102);
+    dump->spaceManager->Delete(offset, 102);
+
     dump->pageIdIndex->Add(1, 2);
 }
\ No newline at end of file

-- 
To view, visit https://gerrit.wikimedia.org/r/72456
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I2f049bc64ccae46691d26982fc40c77f8ce3c00a
Gerrit-PatchSet: 1
Gerrit-Project: operations/dumps/incremental
Gerrit-Branch: gsoc
Gerrit-Owner: Petr Onderka <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to