Petr Onderka has submitted this change and it was merged. Change subject: made indexes into trees ......................................................................
made indexes into trees Change-Id: Iadab9a140821b68ac502ce9d67133972aa86966a --- M CMakeLists.txt A CollectionHelpers.h M Dump.cpp M DumpObjects/DumpObject.cpp M DumpObjects/DumpTraits.h M Incremental dumps.vcxproj M Indexes/Index.h M Indexes/Index.tpp A Indexes/IndexInnerNode.h A Indexes/IndexInnerNode.tpp M Indexes/IndexLeafNode.h M Indexes/IndexLeafNode.tpp M Indexes/IndexNode.h M Indexes/IndexNode.tpp A Indexes/Iterators/IndexInnerIterator.h A Indexes/Iterators/IndexInnerIterator.tpp M Indexes/Iterators/IndexLeafIterator.tpp M Indexes/Iterators/IndexNodeIterator.h D Indexes/Iterators/IndexNodeIterator.tpp M SpaceManager.cpp M SpaceManager.h 21 files changed, 544 insertions(+), 54 deletions(-) Approvals: Petr Onderka: Verified; Looks good to me, approved diff --git a/CMakeLists.txt b/CMakeLists.txt index d3de7a4..7bb3029 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -41,6 +41,7 @@ ) # File list: C/C++ Header set(SOURCES_files_ClInclude + CollectionHelpers.h DumpException.h DumpObjects/DumpIpV4User.h DumpObjects/DumpNamedUser.h @@ -49,6 +50,7 @@ DumpObjects/DumpRevision.h DumpObjects/DumpUser.h Indexes/Index.h + Indexes/IndexInnerNode.tpp Indexes/IndexLeafNode.tpp Indexes/IndexNode.tpp Indexes/Index.tpp @@ -57,6 +59,7 @@ DumpObjects/DumpTraits.h DumpWriters/DumpWriter.h DumpObjects/FileHeader.h + Indexes/IndexInnerNode.h Indexes/IndexLeafNode.h Indexes/IndexNode.h DumpObjects/Offset.h @@ -65,10 +68,11 @@ Objects/Revision.h Indexes/Iterators/IndexIterator.h Indexes/Iterators/IndexIterator.tpp + Indexes/Iterators/IndexInnerIterator.h + Indexes/Iterators/IndexInnerIterator.tpp Indexes/Iterators/IndexLeafIterator.h Indexes/Iterators/IndexLeafIterator.tpp Indexes/Iterators/IndexNodeIterator.h - Indexes/Iterators/IndexNodeIterator.tpp SevenZip.h SpaceManager.h DumpWriters/TestDumpWriter.h diff --git a/CollectionHelpers.h b/CollectionHelpers.h new file mode 100644 index 0000000..7102002 --- /dev/null +++ b/CollectionHelpers.h @@ -0,0 +1,16 @@ +//http://stackoverflow.com/a/2330559/41071 +template <typename T> +T& insert_at(T& pContainer, size_t pIndex, const typename T::value_type& pValue) +{ + pContainer.insert(pContainer.begin() + pIndex, pValue); + + return pContainer; +} + +template <typename T> +T& insert_at(T& pContainer, size_t pIndex, typename T::value_type&& pValue) +{ + pContainer.insert(pContainer.begin() + pIndex, std::move(pValue)); + + return pContainer; +} \ No newline at end of file diff --git a/Dump.cpp b/Dump.cpp index bb44611..292b514 100644 --- a/Dump.cpp +++ b/Dump.cpp @@ -2,6 +2,7 @@ #include <string> #include <fstream> #include "Dump.h" +#include "Indexes/Index.h" #include "SpaceManager.h" #include "DumpObjects/DumpRevision.h" @@ -90,4 +91,4 @@ revisionIdIndex->Remove(revisionId); spaceManager->Delete(offset.value, length); -} \ No newline at end of file +} diff --git a/DumpObjects/DumpObject.cpp b/DumpObjects/DumpObject.cpp index 491eb80..9871ba5 100644 --- a/DumpObjects/DumpObject.cpp +++ b/DumpObjects/DumpObject.cpp @@ -1,5 +1,6 @@ #include "DumpObject.h" #include "../Dump.h" +#include "../Indexes/Index.h" #include "../SpaceManager.h" DumpObject::DumpObject(weak_ptr<WritableDump> dump) diff --git a/DumpObjects/DumpTraits.h b/DumpObjects/DumpTraits.h index a4c74a7..ea169c4 100644 --- a/DumpObjects/DumpTraits.h +++ b/DumpObjects/DumpTraits.h @@ -219,11 +219,11 @@ public: static vector<T> Read(istream &stream) { - uint16_t count = DumpTraits<uint16_t>::Read(stream); + uint32_t count = DumpTraits<uint32_t>::Read(stream); vector<T> result; - for (int i = 0; i < count; i++) + for (uint32_t i = 0; i < count; i++) { result.push_back(DumpTraits<T>::Read(stream)); } @@ -235,10 +235,10 @@ { auto length = value.size(); - if (length > numeric_limits<uint16_t>::max()) + if (length >= numeric_limits<uint32_t>::max()) throw DumpException(); - DumpTraits<uint16_t>::Write(stream, length); + DumpTraits<uint32_t>::Write(stream, length); for (T item : value) { @@ -248,7 +248,7 @@ static uint32_t DumpSize(const vector<T> value) { - uint32_t size = DumpTraits<uint16_t>::DumpSize(value.size()); + uint32_t size = DumpTraits<uint32_t>::DumpSize(value.size()); for (T item : value) { diff --git a/Incremental dumps.vcxproj b/Incremental dumps.vcxproj index efcf938..942e13e 100644 --- a/Incremental dumps.vcxproj +++ b/Incremental dumps.vcxproj @@ -86,6 +86,7 @@ <ClCompile Include="DumpObjects\DumpNamedUser.cpp" /> <ClCompile Include="DumpObjects\DumpObject.cpp" /> <ClCompile Include="DumpObjects\FileHeader.cpp" /> + <ClInclude Include="CollectionHelpers.h" /> <ClInclude Include="DumpException.h" /> <ClInclude Include="DumpObjects\DumpIpV4User.h" /> <ClInclude Include="DumpObjects\DumpNamedUser.h" /> @@ -96,6 +97,7 @@ <ClInclude Include="DumpWriters\PagesHistoryWriter.h" /> <ClInclude Include="DumpWriters\StubHistoryWriter.h" /> <ClInclude Include="Indexes\Index.h" /> + <ClInclude Include="Indexes\IndexInnerNode.h" /> <ClInclude Include="Indexes\IndexLeafNode.tpp"> <FileType>CppCode</FileType> </ClInclude> @@ -140,6 +142,7 @@ <ClInclude Include="Indexes\IndexLeafNode.h" /> <ClInclude Include="Indexes\IndexNode.h" /> <ClInclude Include="DumpObjects\Offset.h" /> + <ClInclude Include="Indexes\Iterators\IndexInnerIterator.h" /> <ClInclude Include="Objects\IpV4User.h" /> <ClInclude Include="Objects\Page.h" /> <ClInclude Include="Objects\Revision.h" /> @@ -148,7 +151,6 @@ <ClInclude Include="Indexes\Iterators\IndexLeafIterator.h" /> <ClInclude Include="Indexes\Iterators\IndexLeafIterator.tpp" /> <ClInclude Include="Indexes\Iterators\IndexNodeIterator.h" /> - <ClInclude Include="Indexes\Iterators\IndexNodeIterator.tpp" /> <ClInclude Include="SevenZip.h" /> <ClInclude Include="SpaceManager.h" /> <ClInclude Include="DumpWriters\TestDumpWriter.h" /> @@ -172,7 +174,17 @@ <Project>{5f0fc7a8-1c0c-403a-bcae-1cc0ed546a44}</Project> </ProjectReference> </ItemGroup> + <ItemGroup> + <ClInclude Include="Indexes\IndexInnerNode.tpp"> + <FileType>Document</FileType> + </ClInclude> + </ItemGroup> + <ItemGroup> + <ClInclude Include="Indexes\Iterators\IndexInnerIterator.tpp"> + <FileType>Document</FileType> + </ClInclude> + </ItemGroup> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <ImportGroup Label="ExtensionTargets"> </ImportGroup> -</Project> +</Project> \ No newline at end of file diff --git a/Indexes/Index.h b/Indexes/Index.h index 0ed08cf..2b462be 100644 --- a/Indexes/Index.h +++ b/Indexes/Index.h @@ -12,10 +12,12 @@ class Index { private: - bool fileHeaderZero; + bool rootNodeUnsaved; unique_ptr<IndexNode<TKey, TValue>> rootNode; weak_ptr<WritableDump> dump; weak_ptr<Offset> fileHeaderOffset; + + void AfterAdd(); public: Index(weak_ptr<WritableDump> dump, weak_ptr<Offset> fileHeaderOffset, bool delaySave = false); diff --git a/Indexes/Index.tpp b/Indexes/Index.tpp index 88dd6d1..460e031 100644 --- a/Indexes/Index.tpp +++ b/Indexes/Index.tpp @@ -1,4 +1,5 @@ #include "Index.h" +#include "../SpaceManager.h" #include <memory> @@ -10,7 +11,7 @@ { auto offset = fileHeaderOffset.lock(); - fileHeaderZero = false; + rootNodeUnsaved = false; if (offset->value == 0) { @@ -18,7 +19,7 @@ if (delaySave) { - fileHeaderZero = true; + rootNodeUnsaved = true; } else { @@ -38,19 +39,38 @@ } template<typename TKey, typename TValue> -void Index<TKey, TValue>::Add(TKey key, TValue value) +void Index<TKey, TValue>::AfterAdd() { - rootNode->Add(key, value); + if (rootNode->IsOversized()) + { + dump.lock()->spaceManager->Delete(rootNode->SavedOffset(), rootNode->NewLength()); - if (fileHeaderZero) + auto splitted = rootNode->Split(); + splitted.LeftNode->Write(); + splitted.RightNode->Write(); + rootNode = std::unique_ptr<IndexNode<TKey, TValue>>( + new IndexInnerNode<TKey, TValue>(dump, std::move(splitted))); + + rootNodeUnsaved = true; + } + + if (rootNodeUnsaved) { rootNode->Write(); fileHeaderOffset.lock()->value = rootNode->SavedOffset(); dump.lock()->fileHeader.Write(); - fileHeaderZero = false; + rootNodeUnsaved = false; } +} + +template<typename TKey, typename TValue> +void Index<TKey, TValue>::Add(TKey key, TValue value) +{ + rootNode->Add(key, value); + + AfterAdd(); } template<typename TKey, typename TValue> @@ -58,15 +78,7 @@ { rootNode->AddOrUpdate(key, value); - if (fileHeaderZero) - { - rootNode->Write(); - - fileHeaderOffset.lock()->value = rootNode->SavedOffset(); - dump.lock()->fileHeader.Write(); - - fileHeaderZero = false; - } + AfterAdd(); } template<typename TKey, typename TValue> diff --git a/Indexes/IndexInnerNode.h b/Indexes/IndexInnerNode.h new file mode 100644 index 0000000..971ad0c --- /dev/null +++ b/Indexes/IndexInnerNode.h @@ -0,0 +1,48 @@ +#pragma once + +#include <map> +#include "IndexNode.h" + +template<typename TKey, typename TValue> +class IndexInnerNode : public IndexNode<TKey, TValue> +{ + template<typename TIndexKey, typename TIndexValue> + friend class IndexInnerIterator; + + using typename IndexNode<TKey, TValue>::SplitResult; + + using DumpObjectBase::WriteValue; +private: + std::vector<TKey> keys; + std::vector<Offset> childOffsets; + std::vector<std::unique_ptr<IndexNode<TKey, TValue>>> cachedChildren; + + IndexNode<TKey, TValue>* GetChildByIndex(std::uint16_t index); + std::uint16_t GetKeyIndex(TKey key); + + void AfterAdd(std::uint16_t updatedChildIndex); +protected: + virtual void WriteInternal() override; +public: + static std::unique_ptr<IndexNode<TKey, TValue>> Read(weak_ptr<WritableDump> dump, istream &stream); + + IndexInnerNode(weak_ptr<WritableDump> dump); + IndexInnerNode(weak_ptr<WritableDump> dump, SplitResult splitResult); + + virtual void Write() override; + + virtual uint32_t NewLength() override; + + virtual TValue Get(TKey key) override; + virtual void Add(TKey key, TValue value) override; + virtual void AddOrUpdate(TKey key, TValue value) override; + virtual void Remove(TKey key) override; + + virtual std::uint32_t RealLength() override; + virtual SplitResult Split() override; + + virtual shared_ptr<IndexNodeIterator<TKey, TValue>> begin() override; + virtual shared_ptr<IndexNodeIterator<TKey, TValue>> end() override; +}; + +#include "IndexInnerNode.tpp" diff --git a/Indexes/IndexInnerNode.tpp b/Indexes/IndexInnerNode.tpp new file mode 100644 index 0000000..fcd24c6 --- /dev/null +++ b/Indexes/IndexInnerNode.tpp @@ -0,0 +1,229 @@ +#include <algorithm> +#include <utility> +#include <vector> +#include "../DumpObjects/DumpTraits.h" +#include "../DumpObjects/DumpObjectKind.h" +#include "../CollectionHelpers.h" +#include "Iterators/IndexInnerIterator.h" + +template<typename TKey, typename TValue> +IndexNode<TKey, TValue>* IndexInnerNode<TKey, TValue>::GetChildByIndex(std::uint16_t index) +{ + if (cachedChildren[index] == nullptr) + { + cachedChildren[index] = IndexNode<TKey, TValue>::Read(this->dump, childOffsets[index].value); + } + + return cachedChildren[index].get(); +} + +template<typename TKey, typename TValue> +std::uint16_t IndexInnerNode<TKey, TValue>::GetKeyIndex(TKey key) +{ + auto foundRef = std::lower_bound(keys.begin(), keys.end(), key); + auto foundPos = foundRef - keys.begin(); + + return foundPos; +} + +template<typename TKey, typename TValue> +TValue IndexInnerNode<TKey, TValue>::Get(TKey key) +{ + auto index = GetKeyIndex(key); + return GetChildByIndex(index)->Get(key); +} + +template<typename TKey, typename TValue> +void IndexInnerNode<TKey, TValue>::AfterAdd(std::uint16_t updatedChildIndex) +{ + auto updatedChild = GetChildByIndex(updatedChildIndex); + + if (updatedChild->IsOversized()) + { + this->dump.lock()->spaceManager->Delete(updatedChild->SavedOffset(), updatedChild->NewLength()); + + auto splitted = updatedChild->Split(); + splitted.LeftNode->Write(); + splitted.RightNode->Write(); + + insert_at(keys, updatedChildIndex, splitted.SplitKey); + + childOffsets[updatedChildIndex] = splitted.LeftNode->SavedOffset(); + cachedChildren[updatedChildIndex] = std::move(splitted.LeftNode); + + insert_at(childOffsets, updatedChildIndex + 1, splitted.RightNode->SavedOffset()); + insert_at(cachedChildren, updatedChildIndex + 1, std::move(splitted.RightNode)); + } +} + +template<typename TKey, typename TValue> +void IndexInnerNode<TKey, TValue>::Add(TKey key, TValue value) +{ + auto index = GetKeyIndex(key); + auto child = GetChildByIndex(index); + + child->Add(key, value); + + AfterAdd(index); +} + +template<typename TKey, typename TValue> +void IndexInnerNode<TKey, TValue>::AddOrUpdate(TKey key, TValue value) +{ + auto index = GetKeyIndex(key); + auto child = GetChildByIndex(index); + + child->AddOrUpdate(key, value); + + AfterAdd(index); +} + +template<typename TKey, typename TValue> +void IndexInnerNode<TKey, TValue>::Remove(const TKey key) +{ + auto index = GetKeyIndex(key); + return GetChildByIndex(index)->Remove(key); +} + +template<typename TKey, typename TValue> +IndexInnerNode<TKey, TValue>::IndexInnerNode(weak_ptr<WritableDump> dump) + : IndexNode<TKey, TValue>(dump) +{} + +template<typename TKey, typename TValue> +IndexInnerNode<TKey, TValue>::IndexInnerNode(weak_ptr<WritableDump> dump, SplitResult splitResult) + : IndexNode<TKey, TValue>(dump) +{ + keys.push_back(splitResult.SplitKey); + + auto leftOffset = splitResult.LeftNode->SavedOffset(); + if (leftOffset == 0) + throw DumpException(); + childOffsets.push_back(leftOffset); + cachedChildren.push_back(std::move(splitResult.LeftNode)); + + auto rightOffset = splitResult.RightNode->SavedOffset(); + if (rightOffset == 0) + throw DumpException(); + childOffsets.push_back(rightOffset); + cachedChildren.push_back(std::move(splitResult.RightNode)); +} + +template<typename TKey, typename TValue> +unique_ptr<IndexNode<TKey, TValue>> IndexInnerNode<TKey, TValue>::Read(weak_ptr<WritableDump> dump, istream &stream) +{ + auto node = new IndexInnerNode<TKey, TValue>(dump); + + // DumpObjectKind is assumed to be already read by IndexNode::Read + + uint16_t count = DumpTraits<uint16_t>::Read(stream); + + for (int i = 0; i < count; i++) + { + node->keys.push_back(DumpTraits<TKey>::Read(stream)); + } + + for (int i = 0; i < count + 1; i++) + { + node->childOffsets.push_back(DumpTraits<Offset>::Read(stream)); + node->cachedChildren.push_back(std::unique_ptr<IndexNode<TKey, TValue>>()); + } + + return unique_ptr<IndexNode<TKey, TValue>>(node); +} + +template<typename TKey, typename TValue> +void IndexInnerNode<TKey, TValue>::WriteInternal() +{ + WriteValue((uint8_t)DumpObjectKind::IndexInnerNode); + WriteValue((uint16_t)keys.size()); + + for (auto key : keys) + { + WriteValue(key); + } + + for (auto offset : childOffsets) + { + WriteValue(offset); + } +} + +template<typename TKey, typename TValue> +void IndexInnerNode<TKey, TValue>::Write() +{ + IndexNode<TKey, TValue>::Write(); + + for (auto &cachedChild : cachedChildren) + { + if (cachedChild != nullptr) + cachedChild->Write(); + } +} + +template<typename TKey, typename TValue> +uint32_t IndexInnerNode<TKey, TValue>::NewLength() +{ + return this->Size; +} + +template<typename TKey, typename TValue> +std::uint32_t IndexInnerNode<TKey, TValue>::RealLength() +{ + return DumpTraits<uint8_t>::DumpSize((uint8_t)DumpObjectKind::IndexLeafNode) + + DumpTraits<uint16_t>::DumpSize(keys.size()) + + keys.size() * DumpTraits<TKey>::DumpSize() + + (keys.size() + 1) * DumpTraits<Offset>::DumpSize(); +} + +template<typename TKey, typename TValue> +typename IndexNode<TKey, TValue>::SplitResult IndexInnerNode<TKey, TValue>::Split() +{ + auto left = new IndexInnerNode<TKey, TValue>(this->dump); + auto right = new IndexInnerNode<TKey, TValue>(this->dump); + + auto middle = keys.size() / 2; + + unsigned i = 0; + + for (; i < middle; i++) + { + left->keys.push_back(keys[i]); + left->childOffsets.push_back(childOffsets[i]); + left->cachedChildren.push_back(std::move(cachedChildren[i])); + } + + left->childOffsets.push_back(childOffsets[i]); + left->cachedChildren.push_back(std::move(cachedChildren[i])); + + TKey middleKey = keys[i]; + + i++; + + for (; i < keys.size(); i++) + { + right->keys.push_back(keys[i]); + right->childOffsets.push_back(childOffsets[i]); + right->cachedChildren.push_back(std::move(cachedChildren[i])); + } + + right->childOffsets.push_back(childOffsets[i]); + right->cachedChildren.push_back(std::move(cachedChildren[i])); + + return SplitResult( + std::unique_ptr<IndexNode<TKey, TValue>>(left), + std::unique_ptr<IndexNode<TKey, TValue>>(right), + middleKey); +} + +template<typename TKey, typename TValue> +shared_ptr<IndexNodeIterator<TKey, TValue>> IndexInnerNode<TKey, TValue>::begin() +{ + return shared_ptr<IndexNodeIterator<TKey, TValue>>(new IndexInnerIterator<TKey, TValue>(this, true)); +} + +template<typename TKey, typename TValue> +shared_ptr<IndexNodeIterator<TKey, TValue>> IndexInnerNode<TKey, TValue>::end() +{ + return shared_ptr<IndexNodeIterator<TKey, TValue>>(new IndexInnerIterator<TKey, TValue>(this, false)); +} diff --git a/Indexes/IndexLeafNode.h b/Indexes/IndexLeafNode.h index 71b4678..5b53439 100644 --- a/Indexes/IndexLeafNode.h +++ b/Indexes/IndexLeafNode.h @@ -8,9 +8,8 @@ template<typename TKey, typename TValue> class IndexLeafNode : public IndexNode<TKey, TValue> { + using typename IndexNode<TKey, TValue>::SplitResult; private: - static const int Size = 0xFFFF; - map<TKey, TValue> indexMap; protected: virtual void WriteInternal() override; @@ -29,8 +28,11 @@ virtual void AddOrUpdate(TKey key, TValue value) override; virtual void Remove(TKey key) override; - virtual shared_ptr<IndexNodeIterator<TKey, TValue>> begin() const override; - virtual shared_ptr<IndexNodeIterator<TKey, TValue>> end() const override; + virtual std::uint32_t RealLength() override; + virtual SplitResult Split() override; + + virtual shared_ptr<IndexNodeIterator<TKey, TValue>> begin() override; + virtual shared_ptr<IndexNodeIterator<TKey, TValue>> end() override; }; #include "IndexLeafNode.tpp" diff --git a/Indexes/IndexLeafNode.tpp b/Indexes/IndexLeafNode.tpp index 3c0681b..b657beb 100644 --- a/Indexes/IndexLeafNode.tpp +++ b/Indexes/IndexLeafNode.tpp @@ -21,9 +21,8 @@ template<typename TKey, typename TValue> void IndexLeafNode<TKey, TValue>::Add(TKey key, TValue value) { - if (indexMap.size() >= Size) + if (indexMap.size() >= std::numeric_limits<uint16_t>::max()) { - // TODO throw new DumpException(); } @@ -100,19 +99,53 @@ template<typename TKey, typename TValue> uint32_t IndexLeafNode<TKey, TValue>::NewLength() { - return DumpTraits<uint8_t>::DumpSize((uint8_t)DumpObjectKind::IndexLeafNode) - + DumpTraits<uint16_t>::DumpSize(indexMap.size()) - + Size * (DumpTraits<TKey>::DumpSize() + DumpTraits<TValue>::DumpSize()); + return this->Size; } template<typename TKey, typename TValue> -shared_ptr<IndexNodeIterator<TKey, TValue>> IndexLeafNode<TKey, TValue>::begin() const +std::uint32_t IndexLeafNode<TKey, TValue>:: RealLength() +{ + return DumpTraits<uint8_t>::DumpSize((uint8_t)DumpObjectKind::IndexLeafNode) + + DumpTraits<uint16_t>::DumpSize(indexMap.size()) + + indexMap.size() * (DumpTraits<TKey>::DumpSize() + DumpTraits<TValue>::DumpSize()); +} + +template<typename TKey, typename TValue> +typename IndexNode<TKey, TValue>::SplitResult IndexLeafNode<TKey, TValue>::Split() +{ + auto left = new IndexLeafNode<TKey, TValue>(this->dump); + auto right = new IndexLeafNode<TKey, TValue>(this->dump); + + auto it = indexMap.begin(); + for (unsigned i = 0; i < (indexMap.size() - 1) / 2; i++) + { + left->indexMap.insert(*it); + it++; + } + + auto middleKey = it->first; + left->indexMap.insert(*it); + it++; + + for (; it != indexMap.end(); it++) + { + right->indexMap.insert(*it); + } + + return SplitResult( + std::unique_ptr<IndexNode<TKey, TValue>>(left), + std::unique_ptr<IndexNode<TKey, TValue>>(right), + middleKey); +} + +template<typename TKey, typename TValue> +shared_ptr<IndexNodeIterator<TKey, TValue>> IndexLeafNode<TKey, TValue>::begin() { return shared_ptr<IndexNodeIterator<TKey, TValue>>(new IndexLeafIterator<TKey, TValue>(indexMap.begin())); } template<typename TKey, typename TValue> -shared_ptr<IndexNodeIterator<TKey, TValue>> IndexLeafNode<TKey, TValue>::end() const +shared_ptr<IndexNodeIterator<TKey, TValue>> IndexLeafNode<TKey, TValue>::end() { return shared_ptr<IndexNodeIterator<TKey, TValue>>(new IndexLeafIterator<TKey, TValue>(indexMap.end())); } diff --git a/Indexes/IndexNode.h b/Indexes/IndexNode.h index 63d5c9d..f936e99 100644 --- a/Indexes/IndexNode.h +++ b/Indexes/IndexNode.h @@ -8,26 +8,54 @@ class IndexNodeIterator; template<typename TKey, typename TValue> +class IndexNode; + +template<typename TKey, typename TValue> class IndexNode : public DumpObject { protected: - virtual void WriteInternal() = 0; + static const int Size = 4096; + virtual void WriteInternal() = 0; public: + struct SplitResult + { + SplitResult( + std::unique_ptr<IndexNode<TKey, TValue>> leftNode, + std::unique_ptr<IndexNode<TKey, TValue>> rightNode, + TKey splitKey) + : LeftNode(std::move(leftNode)), RightNode(std::move(rightNode)), SplitKey(splitKey) + {} + + SplitResult(SplitResult&& other) + : LeftNode(std::move(other.LeftNode)), RightNode(std::move(other.RightNode)), SplitKey(std::move(other.SplitKey)) + {} + + std::unique_ptr<IndexNode<TKey, TValue>> LeftNode; + std::unique_ptr<IndexNode<TKey, TValue>> RightNode; + TKey SplitKey; + }; + static unique_ptr<IndexNode> Read(weak_ptr<WritableDump> dump, uint64_t offset); static unique_ptr<IndexNode> CreateNew(weak_ptr<WritableDump> dump); IndexNode(weak_ptr<WritableDump> dump); using DumpObject::Write; + // write to a pre-allocated offset + void Write(Offset offset); virtual TValue Get(TKey key) = 0; virtual void Add(TKey key, TValue value) = 0; virtual void AddOrUpdate(TKey key, TValue value) = 0; virtual void Remove(TKey key) = 0; - virtual shared_ptr<IndexNodeIterator<TKey, TValue>> begin() const = 0; - virtual shared_ptr<IndexNodeIterator<TKey, TValue>> end() const = 0; + bool IsOversized(); + virtual std::uint32_t RealLength() = 0; + virtual SplitResult Split() = 0; + + virtual shared_ptr<IndexNodeIterator<TKey, TValue>> begin() = 0; + virtual shared_ptr<IndexNodeIterator<TKey, TValue>> end() = 0; }; #include "IndexNode.tpp" diff --git a/Indexes/IndexNode.tpp b/Indexes/IndexNode.tpp index d827d56..20de510 100644 --- a/Indexes/IndexNode.tpp +++ b/Indexes/IndexNode.tpp @@ -1,5 +1,6 @@ #include "IndexNode.h" #include "IndexLeafNode.h" +#include "IndexInnerNode.h" #include "../DumpException.h" #include "../Dump.h" #include "../DumpObjects/DumpObjectKind.h" @@ -16,17 +17,24 @@ auto &stream = *(dumpRef->stream); stream.seekp(offset); + std::unique_ptr<IndexNode<TKey, TValue>> result; + char byte; stream.read(&byte, 1); if (byte == (char)DumpObjectKind::IndexLeafNode) { - auto result = IndexLeafNode<TKey, TValue>::Read(dump, stream); - result->savedOffset = offset; - result->savedLength = result->NewLength(); - return result; + result = IndexLeafNode<TKey, TValue>::Read(dump, stream); + } + else if (byte == (char)DumpObjectKind::IndexInnerNode) + { + result = IndexInnerNode<TKey, TValue>::Read(dump, stream); } else - throw new DumpException(); + throw DumpException(); + + result->savedOffset = offset; + result->savedLength = result->NewLength(); + return result; } template<typename TKey, typename TValue> @@ -34,3 +42,25 @@ { return unique_ptr<IndexNode<TKey, TValue>>(new IndexLeafNode<TKey, TValue>(dump)); } + +template<typename TKey, typename TValue> +void IndexNode<TKey, TValue>::Write(Offset offset) +{ + auto dumpRef = dump.lock(); + + if (savedOffset != 0) + throw DumpException(); + + stream = dumpRef->stream.get(); + stream->seekp(offset.value); + + WriteInternal(); + + stream = nullptr; +} + +template<typename TKey, typename TValue> +bool IndexNode<TKey, TValue>::IsOversized() +{ + return RealLength() > NewLength(); +} diff --git a/Indexes/Iterators/IndexInnerIterator.h b/Indexes/Iterators/IndexInnerIterator.h new file mode 100644 index 0000000..71cca54 --- /dev/null +++ b/Indexes/Iterators/IndexInnerIterator.h @@ -0,0 +1,22 @@ +#pragma once + +#include <utility> + +template<typename TKey, typename TValue> +class IndexInnerIterator : public IndexNodeIterator<TKey, TValue> +{ + template<typename TIndexKey, typename TIndexValue> + friend class IndexInnerNode; +private: + IndexInnerNode<TKey, TValue> *node; + uint16_t i; + std::shared_ptr<IndexNodeIterator<TKey, TValue>> childIterator; + + IndexInnerIterator(IndexInnerNode<TKey, TValue> *node, bool isBegin); +public: + virtual const pair<TKey, TValue> operator *() const override; + virtual IndexInnerIterator& operator ++() override; + virtual bool equals(const IndexNodeIterator<TKey, TValue> *other) const override; +}; + +#include "IndexInnerIterator.tpp" \ No newline at end of file diff --git a/Indexes/Iterators/IndexInnerIterator.tpp b/Indexes/Iterators/IndexInnerIterator.tpp new file mode 100644 index 0000000..0b395ef --- /dev/null +++ b/Indexes/Iterators/IndexInnerIterator.tpp @@ -0,0 +1,54 @@ +#include <vector> +#include "IndexInnerIterator.h" + +using std::vector; + +template<typename TKey, typename TValue> +IndexInnerIterator<TKey, TValue>::IndexInnerIterator(IndexInnerNode<TKey, TValue> *node, bool isBegin) + : node(node), + i(isBegin ? 0 : node->childOffsets.size()), + childIterator(isBegin ? node->GetChildByIndex(0)->begin() : nullptr) +{} + +template<typename TKey, typename TValue> +const pair<TKey, TValue> IndexInnerIterator<TKey, TValue>::operator *() const +{ + return **childIterator; +} + +template<typename TKey, typename TValue> +IndexInnerIterator<TKey, TValue>& IndexInnerIterator<TKey, TValue>::operator ++() +{ + ++(*childIterator); + + while (childIterator == node->GetChildByIndex(i)->end()) + { + i++; + + if (i == node->childOffsets.size()) + childIterator = nullptr; + else + childIterator = node->GetChildByIndex(i)->begin(); + } + + return *this; +} + +template<typename TKey, typename TValue> +bool IndexInnerIterator<TKey, TValue>::equals(const IndexNodeIterator<TKey, TValue> *other) const +{ + auto otherCasted = dynamic_cast<const IndexInnerIterator<TKey, TValue>*>(other); + if (otherCasted == nullptr) + return false; + + if (i == otherCasted->i) + { + if (i == node->childOffsets.size()) + return true; + + if (childIterator->equals(otherCasted->childIterator.get())) + return true; + } + + return false; +} \ No newline at end of file diff --git a/Indexes/Iterators/IndexLeafIterator.tpp b/Indexes/Iterators/IndexLeafIterator.tpp index 2ba8d87..92ad2f5 100644 --- a/Indexes/Iterators/IndexLeafIterator.tpp +++ b/Indexes/Iterators/IndexLeafIterator.tpp @@ -1,6 +1,5 @@ #include <vector> #include "IndexLeafIterator.h" -#include "../../DumpObjects/DumpTraits.h" using std::vector; diff --git a/Indexes/Iterators/IndexNodeIterator.h b/Indexes/Iterators/IndexNodeIterator.h index 8aec9c1..dab4ea1 100644 --- a/Indexes/Iterators/IndexNodeIterator.h +++ b/Indexes/Iterators/IndexNodeIterator.h @@ -3,15 +3,11 @@ #include <utility> #include "../../DumpObjects/DumpObject.h" -using std::pair; - template<typename TKey, typename TValue> class IndexNodeIterator { public: - virtual const pair<TKey, TValue> operator *() const = 0; + virtual const std::pair<TKey, TValue> operator *() const = 0; virtual IndexNodeIterator& operator ++() = 0; virtual bool equals(const IndexNodeIterator *other) const = 0; -}; - -#include "IndexNodeIterator.tpp" \ No newline at end of file +}; \ No newline at end of file diff --git a/Indexes/Iterators/IndexNodeIterator.tpp b/Indexes/Iterators/IndexNodeIterator.tpp deleted file mode 100644 index 7a9a05d..0000000 --- a/Indexes/Iterators/IndexNodeIterator.tpp +++ /dev/null @@ -1 +0,0 @@ -#include "IndexNodeIterator.h" diff --git a/SpaceManager.cpp b/SpaceManager.cpp index 8eca6bc..1f41771 100644 --- a/SpaceManager.cpp +++ b/SpaceManager.cpp @@ -1,3 +1,4 @@ +#include "Indexes/Index.h" #include "SpaceManager.h" #include "Dump.h" @@ -56,4 +57,4 @@ // so spaceIndex.Add() has to go before spaceByLength.insert() spaceIndex.Add(offset, length); spaceByLength.insert(pair<int32_t, Offset>(length, offset)); -} \ No newline at end of file +} diff --git a/SpaceManager.h b/SpaceManager.h index 9517c9e..372a3e7 100644 --- a/SpaceManager.h +++ b/SpaceManager.h @@ -1,3 +1,4 @@ +// warning, because of a cyclic dependency, "Indexes/Index.h" has to be always included before this header #pragma once #include <cstdint> @@ -24,4 +25,4 @@ uint64_t GetSpace(uint32_t length); void Delete(uint64_t offset, uint32_t length); -}; \ No newline at end of file +}; -- To view, visit https://gerrit.wikimedia.org/r/75668 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: Iadab9a140821b68ac502ce9d67133972aa86966a Gerrit-PatchSet: 3 Gerrit-Project: operations/dumps/incremental Gerrit-Branch: gsoc Gerrit-Owner: Petr Onderka <[email protected]> Gerrit-Reviewer: Petr Onderka <[email protected]> _______________________________________________ MediaWiki-commits mailing list [email protected] https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits
