Petr Onderka has uploaded a new change for review.
https://gerrit.wikimedia.org/r/72456
Change subject: handling freed space; implemented index iterator
......................................................................
handling freed space; implemented index iterator
Change-Id: I2f049bc64ccae46691d26982fc40c77f8ce3c00a
---
M Dump.cpp
M Dump.h
M DumpObject.cpp
M DumpObject.h
M DumpTraits.h
M FileHeader.cpp
M FileHeader.h
M Index.h
M Index.hpp
M IndexLeafNode.h
M IndexLeafNode.hpp
M IndexNode.h
M Offset.cpp
M Offset.h
M SpaceManager.cpp
M SpaceManager.h
M main.cpp
17 files changed, 299 insertions(+), 65 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/operations/dumps/incremental
refs/changes/56/72456/1
diff --git a/Dump.cpp b/Dump.cpp
index 5e469c5..a0ad987 100644
--- a/Dump.cpp
+++ b/Dump.cpp
@@ -57,7 +57,7 @@
fileHeader = FileHeader::Read(*this);
}
- spaceManager = SpaceManager(self);
+ spaceManager = unique_ptr<SpaceManager>(new SpaceManager(self));
pageIdIndex = unique_ptr<Index<int32_t, Offset>>(
new Index<int32_t, Offset>(self, shared_ptr<Offset>(self.lock(),
&fileHeader.PageIdIndexRoot)));
diff --git a/Dump.h b/Dump.h
index be7ab76..cc1aba8 100644
--- a/Dump.h
+++ b/Dump.h
@@ -48,5 +48,5 @@
static shared_ptr<WritableDump> Create(string fileName);
FileHeader fileHeader;
- SpaceManager spaceManager;
+ unique_ptr<SpaceManager> spaceManager;
};
\ No newline at end of file
diff --git a/DumpObject.cpp b/DumpObject.cpp
index 1b261c4..1435ca5 100644
--- a/DumpObject.cpp
+++ b/DumpObject.cpp
@@ -8,13 +8,21 @@
void DumpObject::Write()
{
auto dumpRef = dump.lock();
- auto &spaceManager = dumpRef->spaceManager;
- if (savedOffset != 0)
- spaceManager.Delete(savedOffset);
+ int32_t newLength = NewLength();
+ int64_t newOffset;
- int64_t newLength = NewLength();
- int64_t newOffset = spaceManager.GetSpace(newLength);
+ if (newLength == savedLength)
+ newOffset = savedOffset;
+ else
+ {
+ auto &spaceManager = dumpRef->spaceManager;
+
+ if (savedOffset != 0)
+ spaceManager->Delete(savedOffset, savedLength);
+
+ newOffset = spaceManager->GetSpace(newLength);
+ }
ostream& stream = *(dumpRef->stream);
stream.seekp(newOffset);
diff --git a/DumpObject.h b/DumpObject.h
index 42e6e83..d12faf1 100644
--- a/DumpObject.h
+++ b/DumpObject.h
@@ -16,13 +16,13 @@
protected:
weak_ptr<WritableDump> dump;
int64_t savedOffset;
- int64_t savedLength;
+ int32_t savedLength;
DumpObject(weak_ptr<WritableDump> dump);
virtual void Write(ostream &stream) = 0;
public:
virtual void Write();
- virtual int64_t NewLength() = 0;
+ virtual int32_t NewLength() = 0;
int64_t SavedOffset();
};
\ No newline at end of file
diff --git a/DumpTraits.h b/DumpTraits.h
index 434ff93..0464268 100644
--- a/DumpTraits.h
+++ b/DumpTraits.h
@@ -21,7 +21,7 @@
value.Write(stream);
}
- static int64_t DumpSize()
+ static int32_t DumpSize()
{
return T::DumpSize();
}
@@ -38,10 +38,10 @@
stream.read(bytes, 4);
int32_t result = 0;
- result |= (int64_t)bytes[0];
- result |= (int64_t)bytes[1] << 8;
- result |= (int64_t)bytes[2] << 16;
- result |= (int64_t)bytes[3] << 24;
+ result |= (int32_t)(uint8_t)bytes[0];
+ result |= (int32_t)(uint8_t)bytes[1] << 8;
+ result |= (int32_t)(uint8_t)bytes[2] << 16;
+ result |= (int32_t)(uint8_t)bytes[3] << 24;
return result;
}
@@ -58,7 +58,7 @@
stream.write(bytes, 4);
}
- static int64_t DumpSize()
+ static int32_t DumpSize()
{
return 4;
}
@@ -80,7 +80,7 @@
stream.write(&value, 1);
}
- static int64_t DumpSize()
+ static int32_t DumpSize()
{
return 1;
}
diff --git a/FileHeader.cpp b/FileHeader.cpp
index 960e129..aafb014 100644
--- a/FileHeader.cpp
+++ b/FileHeader.cpp
@@ -41,7 +41,7 @@
return FileHeader(fileEnd, pageIdIndexRoot, freeSpaceIndexRoot,
dump.GetSelf());
}
-int64_t FileHeader::NewLength()
+int32_t FileHeader::NewLength()
{
return 6 + 3 * 6;
}
diff --git a/FileHeader.h b/FileHeader.h
index f0196b7..4ea97e8 100644
--- a/FileHeader.h
+++ b/FileHeader.h
@@ -21,7 +21,7 @@
static FileHeader Read(ReadableDump const &dump);
virtual void Write();
- virtual int64_t NewLength();
+ virtual int32_t NewLength();
Offset FileEnd;
Offset PageIdIndexRoot;
diff --git a/Index.h b/Index.h
index 64cab2e..324d197 100644
--- a/Index.h
+++ b/Index.h
@@ -1,20 +1,49 @@
#pragma once
+#include <utility>
+#include <iterator>
#include "Offset.h"
#include "IndexNode.h"
+
+using std::pair;
+using std::shared_ptr;
+using std::iterator;
+using std::input_iterator_tag;
+
+template<typename TKey, typename TValue>
+class IndexIterator : public iterator<input_iterator_tag, const pair<TKey,
TValue>, int32_t>
+{
+ template<typename TKey, typename TValue>
+ friend class Index;
+private:
+ shared_ptr<IndexNodeIterator<TKey, TValue>> nodeIterator;
+ IndexIterator(shared_ptr<IndexNodeIterator<TKey, TValue>> nodeIterator);
+public:
+ const pair<TKey, TValue> operator *() const;
+ IndexIterator<TKey, TValue>& operator ++();
+ bool operator ==(const IndexIterator<TKey, TValue> other);
+ bool operator !=(const IndexIterator<TKey, TValue> other);
+};
template<typename TKey, typename TValue>
class Index
{
private:
+ bool fileHeaderZero;
unique_ptr<IndexNode<TKey, TValue>> rootNode;
weak_ptr<WritableDump> dump;
weak_ptr<Offset> fileHeaderOffset;
+
+ void Save();
public:
Index(weak_ptr<WritableDump> dump, weak_ptr<Offset> fileHeaderOffset);
TValue operator[](TKey key);
void Add(TKey key, TValue value);
+ void Remove(TKey key);
+
+ IndexIterator<TKey, TValue> begin() const;
+ IndexIterator<TKey, TValue> end() const;
};
#include "Index.hpp"
\ No newline at end of file
diff --git a/Index.hpp b/Index.hpp
index 69b82ae..0662054 100644
--- a/Index.hpp
+++ b/Index.hpp
@@ -1,15 +1,52 @@
#include "Index.h"
+#include <memory>
+
+using std::move;
+
+template<typename TKey, typename TValue>
+IndexIterator<TKey, TValue>::IndexIterator(shared_ptr<IndexNodeIterator<TKey,
TValue>> nodeIterator)
+ : nodeIterator(nodeIterator)
+{}
+
+template<typename TKey, typename TValue>
+const pair<TKey, TValue> IndexIterator<TKey, TValue>::operator *() const
+{
+ return **nodeIterator;
+}
+
+template<typename TKey, typename TValue>
+IndexIterator<TKey, TValue>& IndexIterator<TKey, TValue>::operator ++()
+{
+ ++(*nodeIterator);
+ return *this;
+}
+
+template<typename TKey, typename TValue>
+bool IndexIterator<TKey, TValue>::operator ==(const IndexIterator<TKey,
TValue> other)
+{
+ return nodeIterator->equals(other.nodeIterator.get());
+}
+
+template<typename TKey, typename TValue>
+bool IndexIterator<TKey, TValue>::operator !=(const IndexIterator<TKey,
TValue> other)
+{
+ return !(*this == other);
+}
+
template<typename TKey, typename TValue>
Index<TKey, TValue>::Index(weak_ptr<WritableDump> dump, weak_ptr<Offset>
fileHeaderOffset)
: dump(dump), fileHeaderOffset(fileHeaderOffset)
{
- auto offset = fileHeaderOffset.lock()->value;
+ auto offset = fileHeaderOffset.lock();
- if (offset == 0)
+ if (offset->value == 0)
+ {
rootNode = IndexNode<TKey, TValue>::CreateNew(dump);
+ fileHeaderZero = true;
+ }
else
- rootNode = IndexNode<TKey, TValue>::Read(dump, offset);
+ rootNode = IndexNode<TKey, TValue>::Read(dump, offset->value);
}
template<typename TKey, typename TValue>
@@ -23,11 +60,29 @@
{
rootNode->Add(key, value);
- auto offset = fileHeaderOffset.lock();
-
- if (rootNode->SavedOffset() != offset->value)
+ if (fileHeaderZero)
{
- offset->value = rootNode->SavedOffset();
+ fileHeaderOffset.lock()->value = rootNode->SavedOffset();
dump.lock()->fileHeader.Write();
+
+ fileHeaderZero = false;
}
+}
+
+template<typename TKey, typename TValue>
+void Index<TKey, TValue>::Remove(TKey key)
+{
+ rootNode->Remove(key);
+}
+
+template<typename TKey, typename TValue>
+IndexIterator<TKey, TValue> Index<TKey, TValue>::begin() const
+{
+ return IndexIterator<TKey, TValue>(rootNode->begin());
+}
+
+template<typename TKey, typename TValue>
+IndexIterator<TKey, TValue> Index<TKey, TValue>::end() const
+{
+ return IndexIterator<TKey, TValue>(rootNode->end());
}
\ No newline at end of file
diff --git a/IndexLeafNode.h b/IndexLeafNode.h
index 9ff696b..e846097 100644
--- a/IndexLeafNode.h
+++ b/IndexLeafNode.h
@@ -1,14 +1,35 @@
#pragma once
+#include <utility>
#include <map>
#include "IndexNode.h"
+using std::pair;
using std::map;
+
+template<typename TKey, typename TValue>
+class IndexLeafIterator : public IndexNodeIterator<TKey, TValue>
+{
+ typedef typename map<TKey, TValue>::const_iterator MapIterator;
+
+ template<typename TKey, typename TValue>
+ friend class IndexLeafNode;
+private:
+ MapIterator mapIterator;
+
+ IndexLeafIterator(MapIterator mapIterator);
+public:
+ virtual const pair<TKey, TValue> operator *() const;
+ virtual IndexLeafIterator& operator ++();
+ virtual bool equals(const IndexNodeIterator *other) const;
+};
template<typename TKey, typename TValue>
class IndexLeafNode : public IndexNode<TKey, TValue>
{
private:
+ static const int Size = 10; // has to be at most 128 for now
+
map<TKey, TValue> map;
protected:
virtual void Write(ostream &stream);
@@ -18,10 +39,14 @@
IndexLeafNode(weak_ptr<WritableDump> dump);
using DumpObject::Write;
- virtual int64_t NewLength();
+ virtual int32_t NewLength();
virtual TValue operator[](TKey key);
virtual void Add(TKey key, TValue value);
+ virtual void Remove(TKey key);
+
+ virtual shared_ptr<IndexNodeIterator<TKey, TValue>> begin() const;
+ virtual shared_ptr<IndexNodeIterator<TKey, TValue>> end() const;
};
#include "IndexLeafNode.hpp"
\ No newline at end of file
diff --git a/IndexLeafNode.hpp b/IndexLeafNode.hpp
index 5196744..be2b37e 100644
--- a/IndexLeafNode.hpp
+++ b/IndexLeafNode.hpp
@@ -1,7 +1,37 @@
#include <utility>
+#include <vector>
#include "DumpTraits.h"
using std::pair;
+using std::vector;
+
+template<typename TKey, typename TValue>
+IndexLeafIterator<TKey, TValue>::IndexLeafIterator(MapIterator mapIterator)
+ : mapIterator(mapIterator)
+{}
+
+template<typename TKey, typename TValue>
+const pair<TKey, TValue> IndexLeafIterator<TKey, TValue>::operator *() const
+{
+ return *mapIterator;
+}
+
+template<typename TKey, typename TValue>
+IndexLeafIterator<TKey, TValue>& IndexLeafIterator<TKey, TValue>::operator ++()
+{
+ ++mapIterator;
+ return *this;
+}
+
+template<typename TKey, typename TValue>
+bool IndexLeafIterator<TKey, TValue>::equals(const IndexNodeIterator<TKey,
TValue> *other) const
+{
+ auto otherCasted = dynamic_cast<const IndexLeafIterator<TKey,
TValue>*>(other);
+ if (otherCasted == nullptr)
+ return false;
+
+ return mapIterator == otherCasted->mapIterator;
+}
template<typename TKey, typename TValue>
TValue IndexLeafNode<TKey, TValue>::operator[](TKey key)
@@ -12,9 +42,23 @@
template<typename TKey, typename TValue>
void IndexLeafNode<TKey, TValue>::Add(TKey key, TValue value)
{
+ if (map.size() >= Size)
+ {
+ // TODO
+ throw new DumpException();
+ }
+
map.insert(pair<TKey, TValue>(key, value));
Write();
}
+
+template<typename TKey, typename TValue>
+void IndexLeafNode<TKey, TValue>::Remove(const TKey key)
+{
+ map.erase(key);
+ Write();
+}
+
template<typename TKey, typename TValue>
IndexLeafNode<TKey, TValue>::IndexLeafNode(weak_ptr<WritableDump> dump)
@@ -29,11 +73,11 @@
char count = DumpTraits<char>::Read(stream);
- unique_ptr<TKey[]> keys(new TKey[count]);
+ vector<TKey> keys;
for (int i = 0; i < count; i++)
{
- keys[i] = DumpTraits<TKey>::Read(stream);
+ keys.push_back(DumpTraits<TKey>::Read(stream));
}
for (int i = 0; i < count; i++)
@@ -62,8 +106,20 @@
}
template<typename TKey, typename TValue>
-int64_t IndexLeafNode<TKey, TValue>::NewLength()
+int32_t IndexLeafNode<TKey, TValue>::NewLength()
{
- return DumpTraits<char>::DumpSize()
- + map.size() * (DumpTraits<TKey>::DumpSize() +
DumpTraits<TValue>::DumpSize());
+ return 2 * DumpTraits<char>::DumpSize()
+ + Size * (DumpTraits<TKey>::DumpSize() +
DumpTraits<TValue>::DumpSize());
+}
+
+template<typename TKey, typename TValue>
+shared_ptr<IndexNodeIterator<TKey, TValue>> IndexLeafNode<TKey,
TValue>::begin() const
+{
+ return shared_ptr<IndexNodeIterator<TKey, TValue>>(new
IndexLeafIterator<TKey, TValue>(map.begin()));
+}
+
+template<typename TKey, typename TValue>
+shared_ptr<IndexNodeIterator<TKey, TValue>> IndexLeafNode<TKey, TValue>::end()
const
+{
+ return shared_ptr<IndexNodeIterator<TKey, TValue>>(new
IndexLeafIterator<TKey, TValue>(map.end()));
}
\ No newline at end of file
diff --git a/IndexNode.h b/IndexNode.h
index ff54e3a..024e8b1 100644
--- a/IndexNode.h
+++ b/IndexNode.h
@@ -1,6 +1,19 @@
#pragma once
+#include <utility>
#include "DumpObject.h"
+
+using std::pair;
+using std::shared_ptr;
+
+template<typename TKey, typename TValue>
+class IndexNodeIterator
+{
+public:
+ virtual const pair<TKey, TValue> operator *() const = 0;
+ virtual IndexNodeIterator& operator ++() = 0;
+ virtual bool equals(const IndexNodeIterator *other) const = 0;
+};
template<typename TKey, typename TValue>
class IndexNode : public DumpObject
@@ -23,9 +36,10 @@
virtual TValue operator[](TKey key) = 0;
virtual void Add(TKey key, TValue value) = 0;
- // TODO:
- //virtual void Remove(TKey key);
- //virtual void Remove(TKey key, TValue value); // required for memory index
+ virtual void Remove(TKey key) = 0;
+
+ virtual shared_ptr<IndexNodeIterator<TKey, TValue>> begin() const = 0;
+ virtual shared_ptr<IndexNodeIterator<TKey, TValue>> end() const = 0;
};
#include "IndexNode.hpp"
\ No newline at end of file
diff --git a/Offset.cpp b/Offset.cpp
index 6eaaebf..a0c3e39 100644
--- a/Offset.cpp
+++ b/Offset.cpp
@@ -29,17 +29,22 @@
stream.read(bytes, 6);
int64_t offset = 0;
- offset |= (int64_t)bytes[0];
- offset |= (int64_t)bytes[1] << 8;
- offset |= (int64_t)bytes[2] << 16;
- offset |= (int64_t)bytes[3] << 24;
- offset |= (int64_t)bytes[4] << 32;
- offset |= (int64_t)bytes[5] << 40;
+ offset |= (int64_t)(uint8_t)bytes[0];
+ offset |= (int64_t)(uint8_t)bytes[1] << 8;
+ offset |= (int64_t)(uint8_t)bytes[2] << 16;
+ offset |= (int64_t)(uint8_t)bytes[3] << 24;
+ offset |= (int64_t)(uint8_t)bytes[4] << 32;
+ offset |= (int64_t)(uint8_t)bytes[5] << 40;
return Offset(offset);
}
-int64_t Offset::DumpSize()
+int32_t Offset::DumpSize()
{
return 6;
+}
+
+bool operator <(const Offset &first, const Offset &second)
+{
+ return first.value < second.value;
}
\ No newline at end of file
diff --git a/Offset.h b/Offset.h
index 588c226..c7cce0e 100644
--- a/Offset.h
+++ b/Offset.h
@@ -1,11 +1,9 @@
#pragma once
#include <cstdint>
-#include <memory>
#include <iostream>
using std::int64_t;
-using std::unique_ptr;
using std::istream;
using std::ostream;
@@ -17,5 +15,7 @@
Offset(int64_t value);
void Write(ostream &stream) const;
static Offset Read(istream &stream);
- static int64_t DumpSize();
-};
\ No newline at end of file
+ static int32_t DumpSize();
+};
+
+bool operator <(const Offset &first, const Offset &second);
\ No newline at end of file
diff --git a/SpaceManager.cpp b/SpaceManager.cpp
index f8a0c38..334b4a9 100644
--- a/SpaceManager.cpp
+++ b/SpaceManager.cpp
@@ -1,25 +1,59 @@
#include "SpaceManager.h"
#include "Dump.h"
+using std::shared_ptr;
+
SpaceManager::SpaceManager(weak_ptr<WritableDump> dump)
- : dump(dump)
-{}
-
-int64_t SpaceManager::GetSpace(int64_t length)
+ : dump(dump),
+ spaceIndex(dump, shared_ptr<Offset>(dump.lock(),
&dump.lock()->fileHeader.FreeSpaceIndexRoot)),
+ spaceByLength()
{
- // TODO: find free space
-
- auto dumpRef = dump.lock();
- auto &header = dumpRef->fileHeader;
- int64_t offset = header.FileEnd.value;
- header.FileEnd.value += length;
- header.Write();
-
- return offset;
+ for (auto value : spaceIndex)
+ {
+ spaceByLength.insert(pair<int32_t, Offset>(value.second, value.first));
+ }
}
-void SpaceManager::Delete(int64_t offset)
+int64_t SpaceManager::GetSpace(int32_t length)
{
- // TODO
- throw new DumpException();
+ auto foundSpace = spaceByLength.lower_bound(length);
+ if (foundSpace != spaceByLength.end())
+ {
+ int32_t foundLength = foundSpace->first;
+ Offset foundOffset = foundSpace->second;
+
+ spaceByLength.erase(foundSpace);
+ spaceIndex.Remove(foundOffset);
+
+ int32_t remainingLength = foundLength - length;
+
+ if (remainingLength != 0)
+ {
+ Delete(foundOffset.value + length, remainingLength);
+ }
+
+ return foundOffset.value;
+ }
+ else
+ {
+ auto dumpRef = dump.lock();
+ auto &header = dumpRef->fileHeader;
+ int64_t offset = header.FileEnd.value;
+ header.FileEnd.value += length;
+ header.Write();
+
+ return offset;
+ }
+}
+
+void SpaceManager::Delete(int64_t offset, int32_t length)
+{
+ // TODO: free space at the end just decrements fileEnd
+ // TODO: join consecutive free blocks
+ // TODO: zero out?
+
+ // careful here, Add() can cause allocation of the index root node, which
calls GetSpace()
+ // so spaceIndex.Add() has to go before spaceByLength.insert()
+ spaceIndex.Add(offset, length);
+ spaceByLength.insert(pair<int32_t, Offset>(length, offset));
}
\ No newline at end of file
diff --git a/SpaceManager.h b/SpaceManager.h
index 4228f9f..ae5449a 100644
--- a/SpaceManager.h
+++ b/SpaceManager.h
@@ -2,10 +2,13 @@
#include <cstdint>
#include <memory>
-#include "SpaceManager.h"
+#include <map>
+#include "Index.h"
+using std::int32_t;
using std::int64_t;
using std::weak_ptr;
+using std::multimap;
class WritableDump;
@@ -13,8 +16,10 @@
{
private:
weak_ptr<WritableDump> dump;
+ Index<Offset, int32_t> spaceIndex;
+ multimap<int32_t, Offset> spaceByLength;
public:
- SpaceManager(weak_ptr<WritableDump> dump = weak_ptr<WritableDump>());
- int64_t GetSpace(int64_t length);
- void Delete(int64_t offset);
+ SpaceManager(weak_ptr<WritableDump> dump);
+ int64_t GetSpace(int32_t length);
+ void Delete(int64_t offset, int32_t length);
};
\ No newline at end of file
diff --git a/main.cpp b/main.cpp
index 6bff684..5320821 100644
--- a/main.cpp
+++ b/main.cpp
@@ -46,5 +46,8 @@
shared_ptr<WritableDump> dump = WritableDump::Create("tmp/test.id");
+ auto offset = dump->spaceManager->GetSpace(102);
+ dump->spaceManager->Delete(offset, 102);
+
dump->pageIdIndex->Add(1, 2);
}
\ No newline at end of file
--
To view, visit https://gerrit.wikimedia.org/r/72456
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I2f049bc64ccae46691d26982fc40c77f8ce3c00a
Gerrit-PatchSet: 1
Gerrit-Project: operations/dumps/incremental
Gerrit-Branch: gsoc
Gerrit-Owner: Petr Onderka <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits