Petr Onderka has submitted this change and it was merged.

Change subject: saving page metadata
......................................................................


saving page metadata

Change-Id: Iff5afe7457f6ad74b1faadeefe34fd10f0fbada7
---
M .gitignore
M Dump.cpp
M Dump.h
M DumpObjects/DumpObject.cpp
M DumpObjects/DumpObject.h
A DumpObjects/DumpObjectKind.h
A DumpObjects/DumpPage.cpp
A DumpObjects/DumpPage.h
M DumpObjects/DumpTraits.h
M DumpObjects/FileHeader.cpp
M DumpObjects/FileHeader.h
M DumpObjects/Offset.cpp
M DumpObjects/Offset.h
D DumpObjects/Page.h
D DumpWriter.h
A DumpWriters/DumpWriter.h
A DumpWriters/StubCurrentWriter.cpp
A DumpWriters/StubCurrentWriter.h
R DumpWriters/TestDumpWriter.cpp
A DumpWriters/TestDumpWriter.h
M Incremental dumps.vcxproj
M Indexes/Index.h
M Indexes/Index.tpp
M Indexes/IndexLeafNode.h
M Indexes/IndexLeafNode.tpp
M Indexes/IndexNode.h
M Indexes/IndexNode.tpp
A Objects/Page.h
R Objects/Revision.h
M SpaceManager.cpp
M SpaceManager.h
D TestDumpWriter.h
M XmlPageProcessor.cpp
M XmlPageProcessor.h
M XmlRevisionProcessor.h
M main.cpp
36 files changed, 507 insertions(+), 166 deletions(-)

Approvals:
  Petr Onderka: Verified; Looks good to me, approved



diff --git a/.gitignore b/.gitignore
index 9ea4b86..aa34183 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,3 +4,4 @@
 *.vcxproj.filters
 *.vcxproj.user
 /Debug/
+/Release/
diff --git a/Dump.cpp b/Dump.cpp
index a0ad987..ea66e56 100644
--- a/Dump.cpp
+++ b/Dump.cpp
@@ -59,8 +59,8 @@
 
     spaceManager = unique_ptr<SpaceManager>(new SpaceManager(self));
 
-    pageIdIndex = unique_ptr<Index<int32_t, Offset>>(
-        new Index<int32_t, Offset>(self, shared_ptr<Offset>(self.lock(), 
&fileHeader.PageIdIndexRoot)));
+    pageIdIndex = unique_ptr<Index<uint32_t, Offset>>(
+        new Index<uint32_t, Offset>(self, shared_ptr<Offset>(self.lock(), 
&fileHeader.PageIdIndexRoot)));
 }
 
 shared_ptr<WritableDump> WritableDump::Create(string fileName)
diff --git a/Dump.h b/Dump.h
index ddf0660..a082132 100644
--- a/Dump.h
+++ b/Dump.h
@@ -26,7 +26,7 @@
 public:
     // TODO: others should not be able to steal this stream
     unique_ptr<iostream> stream;
-    unique_ptr<Index<int32_t, Offset>> pageIdIndex;
+    unique_ptr<Index<uint32_t, Offset>> pageIdIndex;
 
     ReadableDump(string fileName);
 
diff --git a/DumpObjects/DumpObject.cpp b/DumpObjects/DumpObject.cpp
index 5356958..32b88f2 100644
--- a/DumpObjects/DumpObject.cpp
+++ b/DumpObjects/DumpObject.cpp
@@ -9,8 +9,8 @@
 {
     auto dumpRef = dump.lock();
 
-    int32_t newLength = NewLength();
-    int64_t newOffset;
+    uint32_t newLength = NewLength();
+    uint64_t newOffset;
 
     if (newLength == savedLength)
         newOffset = savedOffset;
@@ -24,16 +24,23 @@
         newOffset = spaceManager->GetSpace(newLength);
     }
 
-    ostream& stream = *(dumpRef->stream);
-    stream.seekp(newOffset);
+    stream = dumpRef->stream.get();
+    stream->seekp(newOffset);
 
-    Write(stream);
+    WriteInternal();
+
+    stream = nullptr;
 
     savedOffset = newOffset;
     savedLength = newLength;
+
+    UpdateIndex(newOffset);
 }
 
-int64_t DumpObject::SavedOffset()
+void DumpObject::UpdateIndex(Offset offset)
+{}
+
+uint64_t DumpObject::SavedOffset() const
 {
     return savedOffset;
 }
\ No newline at end of file
diff --git a/DumpObjects/DumpObject.h b/DumpObjects/DumpObject.h
index d12faf1..7a3e166 100644
--- a/DumpObjects/DumpObject.h
+++ b/DumpObjects/DumpObject.h
@@ -3,26 +3,47 @@
 #include <cstdint>
 #include <memory>
 #include <iostream>
+#include "Offset.h"
 
 class WritableDump;
 
-using std::int64_t;
+using std::uint64_t;
 using std::unique_ptr;
 using std::weak_ptr;
 using std::ostream;
 
 class DumpObject
 {
+private:
+    ostream *stream;
 protected:
     weak_ptr<WritableDump> dump;
-    int64_t savedOffset;
-    int32_t savedLength;
+    uint64_t savedOffset;
+    uint32_t savedLength;
 
     DumpObject(weak_ptr<WritableDump> dump);
-    virtual void Write(ostream &stream) = 0;
+    virtual void WriteInternal() = 0;
+    virtual void UpdateIndex(Offset offset);
 
+    template<typename T>
+    void WriteValue(const T value);
+
+    template<typename T>
+    uint32_t ValueSize(const T value) const;
 public:
     virtual void Write();
-    virtual int32_t NewLength() = 0;
-    int64_t SavedOffset();
-};
\ No newline at end of file
+    virtual uint32_t NewLength() const = 0;
+    uint64_t SavedOffset() const;
+};
+
+template<typename T>
+void DumpObject::WriteValue(const T value)
+{
+    DumpTraits<T>::Write(*stream, value);
+}
+
+template<typename T>
+uint32_t DumpObject::ValueSize(const T value) const
+{
+    return DumpTraits<T>::DumpSize(value);
+}
\ No newline at end of file
diff --git a/DumpObjects/DumpObjectKind.h b/DumpObjects/DumpObjectKind.h
new file mode 100644
index 0000000..a0c2b15
--- /dev/null
+++ b/DumpObjects/DumpObjectKind.h
@@ -0,0 +1,14 @@
+#pragma once
+
+#include <cstdint>
+
+using std::uint8_t;
+
+enum class DumpObjectKind : uint8_t
+{
+    IndexLeafNode  = 0x01,
+    IndexInnerNode = 0x02,
+
+    Page           = 0x11,
+    Revision       = 0x12
+};
\ No newline at end of file
diff --git a/DumpObjects/DumpPage.cpp b/DumpObjects/DumpPage.cpp
new file mode 100644
index 0000000..3736887
--- /dev/null
+++ b/DumpObjects/DumpPage.cpp
@@ -0,0 +1,66 @@
+#include "DumpPage.h"
+#include "DumpObjectKind.h"
+
+void DumpPage::Load(uint32_t pageId)
+{
+    auto dumpRef = dump.lock();
+    auto pageOffset = dumpRef->pageIdIndex->Get(pageId);
+    if (pageOffset.value == 0)
+    {
+        page = Page();
+        savedOffset = 0;
+        savedLength = 0;
+    }
+    else
+    {
+        page = Read(dumpRef, pageOffset);
+        savedOffset = pageOffset.value;
+        savedLength = NewLength();
+    }
+}
+
+Page DumpPage::Read(shared_ptr<WritableDump> dump, Offset offset)
+{
+    Page page;
+
+    auto &stream = *(dump->stream);
+    stream.seekp(offset.value);
+
+    auto kind = DumpTraits<uint8_t>::Read(stream);
+    if (kind != (uint8_t)DumpObjectKind::Page)
+        throw new DumpException();
+
+    page.PageId = DumpTraits<uint32_t>::Read(stream);
+    page.Namespace = DumpTraits<uint16_t>::Read(stream);
+    page.Title = DumpTraits<string>::Read(stream);
+    page.RedirectTarget = DumpTraits<string>::Read(stream);
+
+    return page;
+}
+
+void DumpPage::WriteInternal()
+{
+    WriteValue((uint8_t)DumpObjectKind::Page);
+    WriteValue(page.PageId);
+    WriteValue(page.Namespace);
+    WriteValue(page.Title);
+    WriteValue(page.RedirectTarget);
+}
+
+void DumpPage::UpdateIndex(Offset offset)
+{
+    auto dumpRef = dump.lock();
+    dumpRef->pageIdIndex->AddOrUpdate(page.PageId, offset);
+}
+
+uint32_t DumpPage::NewLength() const
+{
+    return ValueSize((uint8_t)DumpObjectKind::Page) + ValueSize(page.PageId)
+        + ValueSize(page.Namespace) + ValueSize(page.Title) + 
ValueSize(page.RedirectTarget);
+}
+
+DumpPage::DumpPage(weak_ptr<WritableDump> dump, uint32_t pageId)
+    : DumpObject(dump), page()
+{
+    Load(pageId);
+}
\ No newline at end of file
diff --git a/DumpObjects/DumpPage.h b/DumpObjects/DumpPage.h
new file mode 100644
index 0000000..d2d0b58
--- /dev/null
+++ b/DumpObjects/DumpPage.h
@@ -0,0 +1,23 @@
+#pragma once
+
+#include "DumpObject.h"
+#include "../Dump.h"
+#include "../Objects/Page.h"
+
+using std::shared_ptr;
+
+class DumpPage : public DumpObject
+{
+private:
+    void Load(uint32_t pageId);
+    static Page Read(shared_ptr<WritableDump> dump, Offset offset);
+protected:
+    virtual void WriteInternal();
+    virtual void UpdateIndex(Offset offset);
+public:
+    Page page;
+
+    DumpPage(weak_ptr<WritableDump> dump, uint32_t pageId);
+
+    virtual uint32_t NewLength() const;
+};
\ No newline at end of file
diff --git a/DumpObjects/DumpTraits.h b/DumpObjects/DumpTraits.h
index 0464268..8044f76 100644
--- a/DumpObjects/DumpTraits.h
+++ b/DumpObjects/DumpTraits.h
@@ -1,9 +1,14 @@
 #pragma once
 
 #include <cstdint>
+#include <memory>
+#include <string>
 #include <iostream>
+#include "../DumpException.h"
 
-using std::int32_t;
+using std::uint32_t;
+using std::unique_ptr;
+using std::string;
 using std::istream;
 using std::ostream;
 
@@ -21,32 +26,37 @@
         value.Write(stream);
     }
 
-    static int32_t DumpSize()
+    static uint32_t DumpSize()
     {
         return T::DumpSize();
+    }
+
+    static uint32_t DumpSize(const T value)
+    {
+        return DumpSize();
     }
 };
 
 template<>
-class DumpTraits<int32_t>
+class DumpTraits<uint32_t>
 {
 public:
-    static int32_t Read(istream &stream)
+    static uint32_t Read(istream &stream)
     {
         char bytes[4];
 
         stream.read(bytes, 4);
 
-        int32_t result = 0;
-        result |= (int32_t)(uint8_t)bytes[0];
-        result |= (int32_t)(uint8_t)bytes[1] << 8;
-        result |= (int32_t)(uint8_t)bytes[2] << 16;
-        result |= (int32_t)(uint8_t)bytes[3] << 24;
+        uint32_t result = 0;
+        result |= (uint32_t)(uint8_t)bytes[0];
+        result |= (uint32_t)(uint8_t)bytes[1] << 8;
+        result |= (uint32_t)(uint8_t)bytes[2] << 16;
+        result |= (uint32_t)(uint8_t)bytes[3] << 24;
 
         return result;
     }
 
-    static void Write(ostream &stream, const int32_t value)
+    static void Write(ostream &stream, const uint32_t value)
     {
         char bytes[4];
 
@@ -58,30 +68,96 @@
         stream.write(bytes, 4);
     }
 
-    static int32_t DumpSize()
+    static uint32_t DumpSize(const uint32_t value = 0)
     {
         return 4;
     }
 };
 
 template<>
-class DumpTraits<char>
+class DumpTraits<uint16_t>
 {
 public:
-    static char Read(istream &stream)
+    static uint16_t Read(istream &stream)
+    {
+        char bytes[2];
+
+        stream.read(bytes, 2);
+
+        uint16_t result = 0;
+        result |= (uint16_t)(uint8_t)bytes[0];
+        result |= (uint16_t)(uint8_t)bytes[1] << 8;
+
+        return result;
+    }
+
+    static void Write(ostream &stream, const uint16_t value)
+    {
+        char bytes[2];
+
+        bytes[0] = value & 0xFF;
+        bytes[1] = (value >> 8) & 0xFF;
+
+        stream.write(bytes, 2);
+    }
+
+    static uint32_t DumpSize(const uint16_t value = 0)
+    {
+        return 2;
+    }
+};
+
+template<>
+class DumpTraits<uint8_t>
+{
+public:
+    static uint8_t Read(istream &stream)
     {
         char byte;
         stream.read(&byte, 1);
         return byte;
     }
 
-    static void Write(ostream &stream, const char value)
+    static void Write(ostream &stream, const uint8_t value)
     {
-        stream.write(&value, 1);
+        stream.put(value);
     }
 
-    static int32_t DumpSize()
+    static uint32_t DumpSize(const uint8_t value = 0)
     {
         return 1;
     }
+};
+
+// for now, handle only strings of length up to 255
+template<>
+class DumpTraits<string>
+{
+public:
+    static string Read(istream &stream)
+    {
+        uint8_t count = DumpTraits<uint8_t>::Read(stream);
+
+        auto bytes = unique_ptr<char[]>(new char[count]);
+        stream.read(bytes.get(), count);
+
+        return string(bytes.get(), count);
+    }
+
+    static void Write(ostream &stream, const string value)
+    {
+        auto length = value.length();
+
+        if (length > 255)
+            throw DumpException();
+
+        DumpTraits<uint8_t>::Write(stream, length);
+
+        stream.write(value.data(), length);
+    }
+
+    static uint32_t DumpSize(const string value)
+    {
+        return DumpTraits<uint8_t>::DumpSize(value.length()) + value.length();
+    }
 };
\ No newline at end of file
diff --git a/DumpObjects/FileHeader.cpp b/DumpObjects/FileHeader.cpp
index 19533f5..0f7c98c 100644
--- a/DumpObjects/FileHeader.cpp
+++ b/DumpObjects/FileHeader.cpp
@@ -6,24 +6,27 @@
     : DumpObject(dump), FileEnd(fileEnd), PageIdIndexRoot(pageIdIndexRoot), 
FreeSpaceIndexRoot(freeSpaceIndexRoot)
 {}
 
-void FileHeader::Write(ostream &stream)
+void FileHeader::WriteInternal()
 {
-    stream.write("WMID", 4);
-    stream.write(&FileFormatVersion, 1);
-    stream.write(&FileDataVersion, 1);
+    stream->write("WMID", 4);
+    DumpTraits<uint8_t>::Write(*stream, FileFormatVersion);
+    DumpTraits<uint8_t>::Write(*stream, FileDataVersion);
 
-    FileEnd.Write(stream);
-    PageIdIndexRoot.Write(stream);
-    FreeSpaceIndexRoot.Write(stream);
+    FileEnd.Write(*stream);
+    PageIdIndexRoot.Write(*stream);
+    FreeSpaceIndexRoot.Write(*stream);
 }
 
 void FileHeader::Write()
 {
     auto dumpRef = dump.lock();
-    ostream &stream = *(dumpRef->stream);
+    stream = dumpRef->stream.get();
 
-    stream.seekp(0);
-    Write(stream);
+    stream->seekp(0);
+
+    WriteInternal();
+
+    stream = nullptr;
 }
 
 FileHeader FileHeader::Read(ReadableDump const &dump)
@@ -42,9 +45,9 @@
     return FileHeader(fileEnd, pageIdIndexRoot, freeSpaceIndexRoot, 
dump.GetSelf());
 }
 
-int32_t FileHeader::NewLength()
+uint32_t FileHeader::NewLength() const
 {
-    return 6 + 3 * 6;
+    return 4 + 2 * DumpTraits<uint8_t>::DumpSize() + 3 * 
DumpTraits<Offset>::DumpSize();
 }
 
 FileHeader::FileHeader(weak_ptr<WritableDump> dump)
diff --git a/DumpObjects/FileHeader.h b/DumpObjects/FileHeader.h
index 4ea97e8..7d6cf20 100644
--- a/DumpObjects/FileHeader.h
+++ b/DumpObjects/FileHeader.h
@@ -11,17 +11,19 @@
 class FileHeader : public DumpObject
 {
 private:
+    ostream* stream;
+
     FileHeader(Offset fileEnd, Offset pageIdIndexRoot, Offset 
freeSpaceIndexRoot, weak_ptr<WritableDump> dump = weak_ptr<WritableDump>());
 protected:
-    virtual void Write(ostream &stream);
+    void WriteInternal();
 public:
-    static const char FileFormatVersion = 1;
-    static const char FileDataVersion = 1;
+    static const uint8_t FileFormatVersion = 1;
+    static const uint8_t FileDataVersion = 1;
 
     static FileHeader Read(ReadableDump const &dump);
 
     virtual void Write();
-    virtual int32_t NewLength();
+    virtual uint32_t NewLength() const;
 
     Offset FileEnd;
     Offset PageIdIndexRoot;
diff --git a/DumpObjects/Offset.cpp b/DumpObjects/Offset.cpp
index 1a2e2de..7cdb6c7 100644
--- a/DumpObjects/Offset.cpp
+++ b/DumpObjects/Offset.cpp
@@ -1,7 +1,7 @@
 #include "Offset.h"
 #include "../DumpException.h"
 
-Offset::Offset(int64_t value)
+Offset::Offset(uint64_t value)
     : value(value)
 {
     if (value < 0 || value > 0xFFFFFFFFFFFF) // 6 bytes
@@ -28,18 +28,18 @@
 
     stream.read(bytes, 6);
 
-    int64_t offset = 0;
-    offset |= (int64_t)(uint8_t)bytes[0];
-    offset |= (int64_t)(uint8_t)bytes[1] << 8;
-    offset |= (int64_t)(uint8_t)bytes[2] << 16;
-    offset |= (int64_t)(uint8_t)bytes[3] << 24;
-    offset |= (int64_t)(uint8_t)bytes[4] << 32;
-    offset |= (int64_t)(uint8_t)bytes[5] << 40;
+    uint64_t offset = 0;
+    offset |= (uint64_t)(uint8_t)bytes[0];
+    offset |= (uint64_t)(uint8_t)bytes[1] << 8;
+    offset |= (uint64_t)(uint8_t)bytes[2] << 16;
+    offset |= (uint64_t)(uint8_t)bytes[3] << 24;
+    offset |= (uint64_t)(uint8_t)bytes[4] << 32;
+    offset |= (uint64_t)(uint8_t)bytes[5] << 40;
 
     return Offset(offset);
 }
 
-int32_t Offset::DumpSize()
+uint32_t Offset::DumpSize()
 {
     return 6;
 }
diff --git a/DumpObjects/Offset.h b/DumpObjects/Offset.h
index c7cce0e..87f8f3f 100644
--- a/DumpObjects/Offset.h
+++ b/DumpObjects/Offset.h
@@ -3,19 +3,19 @@
 #include <cstdint>
 #include <iostream>
 
-using std::int64_t;
+using std::uint64_t;
 using std::istream;
 using std::ostream;
 
 class Offset
 {
 public:
-    int64_t value;
+    uint64_t value;
 
-    Offset(int64_t value);
+    Offset(uint64_t value = 0);
     void Write(ostream &stream) const;
     static Offset Read(istream &stream);
-    static int32_t DumpSize();
+    static uint32_t DumpSize();
 };
 
 bool operator <(const Offset &first, const Offset &second);
\ No newline at end of file
diff --git a/DumpObjects/Page.h b/DumpObjects/Page.h
deleted file mode 100644
index e4d416c..0000000
--- a/DumpObjects/Page.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#pragma once
-
-#include <string>
-
-using std::string;
-
-class Page
-{
-public:
-    string Title;
-};
\ No newline at end of file
diff --git a/DumpWriter.h b/DumpWriter.h
deleted file mode 100644
index 5b6c10c..0000000
--- a/DumpWriter.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#pragma once
-
-#include <memory>
-
-#include "DumpObjects/Page.h"
-#include "DumpObjects/Revision.h"
-
-using std::shared_ptr;
-
-class DumpWriter
-{
-public:
-    virtual void WritePage(const shared_ptr<const Page> page) = 0;
-    virtual void WriteRevision(const shared_ptr<const Revision> revision) = 0;
-};
\ No newline at end of file
diff --git a/DumpWriters/DumpWriter.h b/DumpWriters/DumpWriter.h
new file mode 100644
index 0000000..56f6dac
--- /dev/null
+++ b/DumpWriters/DumpWriter.h
@@ -0,0 +1,16 @@
+#pragma once
+
+#include <memory>
+
+#include "../Objects/Page.h"
+#include "../Objects/Revision.h"
+
+using std::shared_ptr;
+
+class DumpWriter
+{
+public:
+    virtual void StartPage(const shared_ptr<const Page> page) = 0;
+    virtual void AddRevision(const shared_ptr<const Revision> revision) = 0;
+    virtual void EndPage() = 0;
+};
\ No newline at end of file
diff --git a/DumpWriters/StubCurrentWriter.cpp 
b/DumpWriters/StubCurrentWriter.cpp
new file mode 100644
index 0000000..6c3e3a5
--- /dev/null
+++ b/DumpWriters/StubCurrentWriter.cpp
@@ -0,0 +1,21 @@
+#include "StubCurrentWriter.h"
+
+StubCurrentWriter::StubCurrentWriter(shared_ptr<WritableDump> dump)
+    : dump(dump)
+{}
+
+void StubCurrentWriter::StartPage(const shared_ptr<const Page> page)
+{
+    this->page = unique_ptr<DumpPage>(new DumpPage(dump, page->PageId));
+    this->page->page = *page;
+}
+
+void StubCurrentWriter::AddRevision(const shared_ptr<const Revision> revision)
+{
+}
+
+void StubCurrentWriter::EndPage()
+{
+    page->Write();
+    page = nullptr;
+}
\ No newline at end of file
diff --git a/DumpWriters/StubCurrentWriter.h b/DumpWriters/StubCurrentWriter.h
new file mode 100644
index 0000000..6ad1f9e
--- /dev/null
+++ b/DumpWriters/StubCurrentWriter.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include "DumpWriter.h"
+#include "../DumpObjects/DumpPage.h"
+
+class StubCurrentWriter : public DumpWriter
+{
+private:
+    shared_ptr<WritableDump> dump;
+    unique_ptr<DumpPage> page;
+public:
+    StubCurrentWriter(shared_ptr<WritableDump> dump);
+
+    virtual void StartPage(const shared_ptr<const Page> page);
+    virtual void AddRevision(const shared_ptr<const Revision> revision);
+    virtual void EndPage();
+};
\ No newline at end of file
diff --git a/TestDumpWriter.cpp b/DumpWriters/TestDumpWriter.cpp
similarity index 76%
rename from TestDumpWriter.cpp
rename to DumpWriters/TestDumpWriter.cpp
index 1b6e9b2..6b34eb2 100644
--- a/TestDumpWriter.cpp
+++ b/DumpWriters/TestDumpWriter.cpp
@@ -16,12 +16,16 @@
     return subject;
 }
 
-void TestDumpWriter::WritePage(const shared_ptr<const Page> page)
+void TestDumpWriter::StartPage(const shared_ptr<const Page> page)
 {
     cout << page->Title << "\n";
 }
 
-void TestDumpWriter::WriteRevision(const shared_ptr<const Revision> revision)
+void TestDumpWriter::AddRevision(const shared_ptr<const Revision> revision)
 {
     cout << " " << ReplaceString(revision->Text, "\n", "\\n").substr(0, 78) << 
"\n";
+}
+
+void TestDumpWriter::EndPage()
+{
 }
\ No newline at end of file
diff --git a/DumpWriters/TestDumpWriter.h b/DumpWriters/TestDumpWriter.h
new file mode 100644
index 0000000..638adb9
--- /dev/null
+++ b/DumpWriters/TestDumpWriter.h
@@ -0,0 +1,16 @@
+#pragma once
+
+#include <string>
+#include "DumpWriter.h"
+
+using std::string;
+
+class TestDumpWriter : public DumpWriter
+{
+private:
+    string ReplaceString(string subject, const string& search, const string& 
replace);
+public:
+    virtual void StartPage(const shared_ptr<const Page> page);
+    virtual void AddRevision(const shared_ptr<const Revision> revision);
+    virtual void EndPage();
+};
\ No newline at end of file
diff --git a/Incremental dumps.vcxproj b/Incremental dumps.vcxproj
index 4b6a00f..c3e58f6 100644
--- a/Incremental dumps.vcxproj
+++ b/Incremental dumps.vcxproj
@@ -67,7 +67,7 @@
       <Optimization>MaxSpeed</Optimization>
       <FunctionLevelLinking>true</FunctionLevelLinking>
       <IntrinsicFunctions>true</IntrinsicFunctions>
-      
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <SDLCheck>true</SDLCheck>
     </ClCompile>
     <Link>
@@ -84,6 +84,8 @@
     <ClCompile Include="DumpObjects\DumpTraits.cpp" />
     <ClCompile Include="DumpObjects\FileHeader.cpp" />
     <ClInclude Include="DumpException.h" />
+    <ClInclude Include="DumpObjects\DumpObjectKind.h" />
+    <ClInclude Include="DumpObjects\DumpPage.h" />
     <ClInclude Include="Indexes\Index.h" />
     <ClInclude Include="Indexes\IndexLeafNode.tpp">
       <FileType>CppCode</FileType>
@@ -93,10 +95,12 @@
       <FileType>CppCode</FileType>
     </ClInclude>
     <ClInclude Include="Indexes\Index.tpp" />
+    <ClCompile Include="DumpObjects\DumpPage.cpp" />
     <ClCompile Include="main.cpp" />
     <ClCompile Include="DumpObjects\Offset.cpp" />
     <ClCompile Include="SpaceManager.cpp" />
-    <ClCompile Include="TestDumpWriter.cpp" />
+    <ClCompile Include="DumpWriters\TestDumpWriter.cpp" />
+    <ClCompile Include="DumpWriters\StubCurrentWriter.cpp" />
     <ClCompile Include="XmlPageProcessor.cpp" />
     <ClCompile Include="XmlRevisionProcessor.cpp" />
     <ClCompile Include="XmlUtils.cpp" />
@@ -110,13 +114,13 @@
     <ClInclude Include="Dump.h" />
     <ClInclude Include="DumpObjects\DumpObject.h" />
     <ClInclude Include="DumpObjects\DumpTraits.h" />
-    <ClInclude Include="DumpWriter.h" />
+    <ClInclude Include="DumpWriters\DumpWriter.h" />
     <ClInclude Include="DumpObjects\FileHeader.h" />
     <ClInclude Include="Indexes\IndexLeafNode.h" />
     <ClInclude Include="Indexes\IndexNode.h" />
     <ClInclude Include="DumpObjects\Offset.h" />
-    <ClInclude Include="DumpObjects\Page.h" />
-    <ClInclude Include="DumpObjects\Revision.h" />
+    <ClInclude Include="Objects\Page.h" />
+    <ClInclude Include="Objects\Revision.h" />
     <ClInclude Include="Indexes\Iterators\IndexIterator.h" />
     <ClInclude Include="Indexes\Iterators\IndexIterator.tpp" />
     <ClInclude Include="Indexes\Iterators\IndexLeafIterator.h" />
@@ -124,7 +128,8 @@
     <ClInclude Include="Indexes\Iterators\IndexNodeIterator.h" />
     <ClInclude Include="Indexes\Iterators\IndexNodeIterator.tpp" />
     <ClInclude Include="SpaceManager.h" />
-    <ClInclude Include="TestDumpWriter.h" />
+    <ClInclude Include="DumpWriters\TestDumpWriter.h" />
+    <ClInclude Include="DumpWriters\StubCurrentWriter.h" />
     <ClInclude Include="XmlPageProcessor.h" />
     <ClInclude Include="XmlRevisionProcessor.h" />
     <ClInclude Include="XmlUtils.h" />
diff --git a/Indexes/Index.h b/Indexes/Index.h
index 37417fc..85a86ac 100644
--- a/Indexes/Index.h
+++ b/Indexes/Index.h
@@ -16,13 +16,12 @@
     unique_ptr<IndexNode<TKey, TValue>> rootNode;
     weak_ptr<WritableDump> dump;
     weak_ptr<Offset> fileHeaderOffset;
-
-    void Save();
 public:
-    Index(weak_ptr<WritableDump> dump, weak_ptr<Offset> fileHeaderOffset);
+    Index(weak_ptr<WritableDump> dump, weak_ptr<Offset> fileHeaderOffset, bool 
delaySave = false);
 
-    TValue operator[](TKey key);
+    TValue Get(TKey key);
     void Add(TKey key, TValue value);
+    void AddOrUpdate(TKey key, TValue value);
     void Remove(TKey key);
 
     IndexIterator<TKey, TValue> begin() const;
diff --git a/Indexes/Index.tpp b/Indexes/Index.tpp
index 54281be..f99c952 100644
--- a/Indexes/Index.tpp
+++ b/Indexes/Index.tpp
@@ -5,24 +5,36 @@
 using std::move;
 
 template<typename TKey, typename TValue>
-Index<TKey, TValue>::Index(weak_ptr<WritableDump> dump, weak_ptr<Offset> 
fileHeaderOffset)
+Index<TKey, TValue>::Index(weak_ptr<WritableDump> dump, weak_ptr<Offset> 
fileHeaderOffset, bool delaySave)
     : dump(dump), fileHeaderOffset(fileHeaderOffset)
 {
     auto offset = fileHeaderOffset.lock();
 
+    fileHeaderZero = false;
+
     if (offset->value == 0)
     {
         rootNode = IndexNode<TKey, TValue>::CreateNew(dump);
-        fileHeaderZero = true;
+
+        if (delaySave)
+        {
+            fileHeaderZero = true;
+        }
+        else
+        {
+            rootNode->Write();
+            fileHeaderOffset.lock()->value = rootNode->SavedOffset();
+            dump.lock()->fileHeader.Write();
+        }
     }
     else
         rootNode = IndexNode<TKey, TValue>::Read(dump, offset->value);
 }
 
 template<typename TKey, typename TValue>
-TValue Index<TKey, TValue>::operator[](TKey key)
+TValue Index<TKey, TValue>::Get(TKey key)
 {
-    return (*rootNode)[key];
+    return rootNode->Get(key);
 }
 
 template<typename TKey, typename TValue>
@@ -40,6 +52,20 @@
 }
 
 template<typename TKey, typename TValue>
+void Index<TKey, TValue>::AddOrUpdate(TKey key, TValue value)
+{
+    rootNode->AddOrUpdate(key, value);
+
+    if (fileHeaderZero)
+    {
+        fileHeaderOffset.lock()->value = rootNode->SavedOffset();
+        dump.lock()->fileHeader.Write();
+
+        fileHeaderZero = false;
+    }
+}
+
+template<typename TKey, typename TValue>
 void Index<TKey, TValue>::Remove(TKey key)
 {
     rootNode->Remove(key);
diff --git a/Indexes/IndexLeafNode.h b/Indexes/IndexLeafNode.h
index 0e94d78..f93960a 100644
--- a/Indexes/IndexLeafNode.h
+++ b/Indexes/IndexLeafNode.h
@@ -9,21 +9,22 @@
 class IndexLeafNode : public IndexNode<TKey, TValue>
 {
 private:
-    static const int Size = 10; // has to be at most 128 for now
+    static const int Size = 255;
 
     map<TKey, TValue> map;
 protected:
-    virtual void Write(ostream &stream);
+    virtual void WriteInternal();
 public:
     static unique_ptr<IndexNode> Read(weak_ptr<WritableDump> dump, istream 
&stream);
 
     IndexLeafNode(weak_ptr<WritableDump> dump);
 
     using DumpObject::Write;
-    virtual int32_t NewLength();
+    virtual uint32_t NewLength() const;
 
-    virtual TValue operator[](TKey key);
+    virtual TValue Get(TKey key);
     virtual void Add(TKey key, TValue value);
+    virtual void AddOrUpdate(TKey key, TValue value);
     virtual void Remove(TKey key);
 
     virtual shared_ptr<IndexNodeIterator<TKey, TValue>> begin() const;
diff --git a/Indexes/IndexLeafNode.tpp b/Indexes/IndexLeafNode.tpp
index 422a3b1..edd36f6 100644
--- a/Indexes/IndexLeafNode.tpp
+++ b/Indexes/IndexLeafNode.tpp
@@ -7,9 +7,12 @@
 using std::vector;
 
 template<typename TKey, typename TValue>
-TValue IndexLeafNode<TKey, TValue>::operator[](TKey key)
+TValue IndexLeafNode<TKey, TValue>::Get(TKey key)
 {
-    return map.find(key)->second;
+    auto found = map.find(key);
+    if (found == map.end())
+        return TValue();
+    return found->second;
 }
 
 template<typename TKey, typename TValue>
@@ -26,12 +29,25 @@
 }
 
 template<typename TKey, typename TValue>
+void IndexLeafNode<TKey, TValue>::AddOrUpdate(TKey key, TValue value)
+{
+    auto pos = map.find(key);
+    if (pos == map.end())
+    {
+        Add(key, value);
+    }
+    else
+    {
+        pos->second = value; // will this work?
+    }
+}
+
+template<typename TKey, typename TValue>
 void IndexLeafNode<TKey, TValue>::Remove(const TKey key)
 {
     map.erase(key);
     Write();
 }
-
 
 template<typename TKey, typename TValue>
 IndexLeafNode<TKey, TValue>::IndexLeafNode(weak_ptr<WritableDump> dump)
@@ -44,7 +60,7 @@
 {
     auto node = new IndexLeafNode<TKey, TValue>(dump);
 
-    char count = DumpTraits<char>::Read(stream);
+    uint8_t count = DumpTraits<uint8_t>::Read(stream);
 
     vector<TKey> keys;
 
@@ -62,26 +78,26 @@
 }
 
 template<typename TKey, typename TValue>
-void IndexLeafNode<TKey, TValue>::Write(ostream &stream)
+void IndexLeafNode<TKey, TValue>::WriteInternal()
 {
-    DumpTraits<char>::Write(stream, (char)NodeKind::LeafNode);
-       DumpTraits<char>::Write(stream, map.size());
+    WriteValue((uint8_t)DumpObjectKind::IndexLeafNode);
+    WriteValue((uint8_t)map.size());
 
        for (auto pair : map)
     {
-               DumpTraits<TKey>::Write(stream, pair.first);
+               WriteValue(pair.first);
     }
 
        for (auto pair : map)
     {
-               DumpTraits<TValue>::Write(stream, pair.second);
+               WriteValue(pair.second);
     }
 }
 
 template<typename TKey, typename TValue>
-int32_t IndexLeafNode<TKey, TValue>::NewLength()
+uint32_t IndexLeafNode<TKey, TValue>::NewLength() const
 {
-    return 2 * DumpTraits<char>::DumpSize()
+    return 2 * DumpTraits<uint8_t>::DumpSize()
         + Size * (DumpTraits<TKey>::DumpSize() + 
DumpTraits<TValue>::DumpSize());
 }
 
diff --git a/Indexes/IndexNode.h b/Indexes/IndexNode.h
index 1908934..6265558 100644
--- a/Indexes/IndexNode.h
+++ b/Indexes/IndexNode.h
@@ -11,23 +11,19 @@
 class IndexNode : public DumpObject
 {
 protected:
-    virtual void Write(ostream &stream) = 0;
-
-    enum class NodeKind : char
-    {
-        LeafNode = 1
-    };
+    virtual void WriteInternal() = 0;
 
 public:
-    static unique_ptr<IndexNode> Read(weak_ptr<WritableDump> dump, int64_t 
offset);
+    static unique_ptr<IndexNode> Read(weak_ptr<WritableDump> dump, uint64_t 
offset);
     static unique_ptr<IndexNode> CreateNew(weak_ptr<WritableDump> dump);
 
     IndexNode(weak_ptr<WritableDump> dump);
 
     using DumpObject::Write;
 
-    virtual TValue operator[](TKey key) = 0;
+    virtual TValue Get(TKey key) = 0;
     virtual void Add(TKey key, TValue value) = 0;
+    virtual void AddOrUpdate(TKey key, TValue value) = 0;
     virtual void Remove(TKey key) = 0;
 
     virtual shared_ptr<IndexNodeIterator<TKey, TValue>> begin() const = 0;
diff --git a/Indexes/IndexNode.tpp b/Indexes/IndexNode.tpp
index 9b562fa..0c13830 100644
--- a/Indexes/IndexNode.tpp
+++ b/Indexes/IndexNode.tpp
@@ -1,6 +1,7 @@
 #include "IndexNode.h"
 #include "IndexLeafNode.h"
 #include "../DumpException.h"
+#include "../DumpObjects/DumpObjectKind.h"
 
 template<typename TKey, typename TValue>
 IndexNode<TKey, TValue>::IndexNode(weak_ptr<WritableDump> dump)
@@ -8,7 +9,7 @@
 {}
 
 template<typename TKey, typename TValue>
-unique_ptr<IndexNode<TKey, TValue>> IndexNode<TKey, 
TValue>::Read(weak_ptr<WritableDump> dump, int64_t offset)
+unique_ptr<IndexNode<TKey, TValue>> IndexNode<TKey, 
TValue>::Read(weak_ptr<WritableDump> dump, uint64_t offset)
 {
     auto dumpRef = dump.lock();
     auto &stream = *(dumpRef->stream);
@@ -16,7 +17,7 @@
 
     char byte;
     stream.read(&byte, 1);
-    if (byte == (char)NodeKind::LeafNode)
+    if (byte == (char)DumpObjectKind::IndexLeafNode)
     {
         auto result = IndexLeafNode<TKey, TValue>::Read(dump, stream);
         result->savedOffset = offset;
diff --git a/Objects/Page.h b/Objects/Page.h
new file mode 100644
index 0000000..8c53e4d
--- /dev/null
+++ b/Objects/Page.h
@@ -0,0 +1,15 @@
+#pragma once
+
+#include <string>
+
+using std::string;
+
+class Page
+{
+public:
+    uint32_t PageId;
+    uint16_t Namespace;
+    string Title;
+    // if empty, the page is not a redirect
+    string RedirectTarget;
+};
\ No newline at end of file
diff --git a/DumpObjects/Revision.h b/Objects/Revision.h
similarity index 100%
rename from DumpObjects/Revision.h
rename to Objects/Revision.h
diff --git a/SpaceManager.cpp b/SpaceManager.cpp
index 334b4a9..8eca6bc 100644
--- a/SpaceManager.cpp
+++ b/SpaceManager.cpp
@@ -5,7 +5,7 @@
 
 SpaceManager::SpaceManager(weak_ptr<WritableDump> dump)
     : dump(dump),
-      spaceIndex(dump, shared_ptr<Offset>(dump.lock(), 
&dump.lock()->fileHeader.FreeSpaceIndexRoot)),
+      spaceIndex(dump, shared_ptr<Offset>(dump.lock(), 
&dump.lock()->fileHeader.FreeSpaceIndexRoot), true),
       spaceByLength()
 {
     for (auto value : spaceIndex)
@@ -14,7 +14,7 @@
     }
 }
 
-int64_t SpaceManager::GetSpace(int32_t length)
+uint64_t SpaceManager::GetSpace(uint32_t length)
 {
     auto foundSpace = spaceByLength.lower_bound(length);
     if (foundSpace != spaceByLength.end())
@@ -46,7 +46,7 @@
     }
 }
 
-void SpaceManager::Delete(int64_t offset, int32_t length)
+void SpaceManager::Delete(uint64_t offset, uint32_t length)
 {
     // TODO: free space at the end just decrements fileEnd
     // TODO: join consecutive free blocks
diff --git a/SpaceManager.h b/SpaceManager.h
index daf168f..40d396a 100644
--- a/SpaceManager.h
+++ b/SpaceManager.h
@@ -5,8 +5,8 @@
 #include <map>
 #include "Indexes/Index.h"
 
-using std::int32_t;
-using std::int64_t;
+using std::uint32_t;
+using std::uint64_t;
 using std::weak_ptr;
 using std::multimap;
 
@@ -16,10 +16,10 @@
 {
 private:
     weak_ptr<WritableDump> dump;
-    Index<Offset, int32_t> spaceIndex;
-    multimap<int32_t, Offset> spaceByLength;
+    Index<Offset, uint32_t> spaceIndex;
+    multimap<uint32_t, Offset> spaceByLength;
 public:
     SpaceManager(weak_ptr<WritableDump> dump);
-    int64_t GetSpace(int32_t length);
-    void Delete(int64_t offset, int32_t length);
+    uint64_t GetSpace(uint32_t length);
+    void Delete(uint64_t offset, uint32_t length);
 };
\ No newline at end of file
diff --git a/TestDumpWriter.h b/TestDumpWriter.h
deleted file mode 100644
index 717dab8..0000000
--- a/TestDumpWriter.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#pragma once
-
-#include <string>
-#include "DumpWriter.h"
-
-using std::string;
-
-class TestDumpWriter : public DumpWriter
-{
-private:
-    string ReplaceString(string subject, const string& search, const string& 
replace);
-public:
-    virtual void WritePage(const shared_ptr<Page const> page);
-    virtual void WriteRevision(const shared_ptr<const Revision> revision);
-};
\ No newline at end of file
diff --git a/XmlPageProcessor.cpp b/XmlPageProcessor.cpp
index d8ae903..3afc920 100644
--- a/XmlPageProcessor.cpp
+++ b/XmlPageProcessor.cpp
@@ -1,9 +1,10 @@
 #include "XmlPageProcessor.h"
 #include "XmlRevisionProcessor.h"
 #include "XmlUtils.h"
-#include "DumpWriter.h"
+#include "DumpWriters/DumpWriter.h"
 
 using std::make_shared;
+using std::stoi;
 
 void XmlPageProcessor::titleHandler(XML::Element &elem, void *userData)
 {
@@ -11,13 +12,38 @@
     processor->page->Title = readElementData(elem);
 }
 
+void XmlPageProcessor::nsHandler(XML::Element &elem, void *userData)
+{
+    XmlPageProcessor* processor = (XmlPageProcessor*)userData;
+    processor->page->Namespace = stoi(readElementData(elem));
+}
+
+void XmlPageProcessor::idHandler(XML::Element &elem, void *userData)
+{
+    XmlPageProcessor* processor = (XmlPageProcessor*)userData;
+    processor->page->PageId = stoi(readElementData(elem));
+}
+
+void XmlPageProcessor::redirectHandler(XML::Element &elem, void *userData)
+{
+    XmlPageProcessor* processor = (XmlPageProcessor*)userData;
+    
+    //processor->page->RedirectTarget = readElementData(elem);
+    processor->page->RedirectTarget = string(elem.GetAttribute("title"));
+}
+
 void XmlPageProcessor::writePage()
 {
     if (!pageWritten)
     {
-        dumpWriter->WritePage(page);
+        dumpWriter->StartPage(page);
         pageWritten = true;
     }
+}
+
+void XmlPageProcessor::completePage()
+{
+    dumpWriter->EndPage();
 }
 
 XmlPageProcessor::XmlPageProcessor(const shared_ptr<Page> page, DumpWriter* 
dumpWriter)
@@ -29,11 +55,14 @@
 {
     static int i = 0;
 
-    if (i++ > 5)
+    if (i++ >= 255)
         return;
 
     XML::Handler handlers[] = {
         XML::Handler("title", titleHandler),
+        XML::Handler("ns", nsHandler),
+        XML::Handler("id", idHandler),
+        XML::Handler("redirect", redirectHandler),
         XML::Handler("revision", XmlRevisionProcessor::Handler),
         XML::Handler::END
     };
@@ -45,10 +74,11 @@
     elem.Process(handlers, &pageProcessor);
 
     pageProcessor.writePage();
+    pageProcessor.completePage();
 }
 
 void XmlPageProcessor::ProcessRevision(const shared_ptr<const Revision> 
revision)
 {
     writePage();
-    dumpWriter->WriteRevision(revision);
+    dumpWriter->AddRevision(revision);
 }
\ No newline at end of file
diff --git a/XmlPageProcessor.h b/XmlPageProcessor.h
index 55deab3..e060267 100644
--- a/XmlPageProcessor.h
+++ b/XmlPageProcessor.h
@@ -2,8 +2,8 @@
 
 #include <memory>
 #include "XML/xmlinput.h"
-#include "DumpObjects/Page.h"
-#include "DumpWriter.h"
+#include "Objects/Page.h"
+#include "DumpWriters/DumpWriter.h"
 
 using std::shared_ptr;
 
@@ -17,7 +17,11 @@
     XmlPageProcessor(const shared_ptr<Page> page, DumpWriter* dumpWriter);
 
     static void titleHandler(XML::Element &elem, void *userData);
+    static void nsHandler(XML::Element &elem, void *userData);
+    static void idHandler(XML::Element &elem, void *userData);
+    static void redirectHandler(XML::Element &elem, void *userData);
     void writePage();
+    void completePage();
 public:
     static void Handler(XML::Element &elem, void *userData);
     void ProcessRevision(const shared_ptr<const Revision> revision);
diff --git a/XmlRevisionProcessor.h b/XmlRevisionProcessor.h
index a5238db..28a6a37 100644
--- a/XmlRevisionProcessor.h
+++ b/XmlRevisionProcessor.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include "XML/xmlinput.h"
-#include "DumpObjects/Revision.h"
+#include "Objects/Revision.h"
 
 class XmlRevisionProcessor
 {
diff --git a/main.cpp b/main.cpp
index ec78254..06e7e01 100644
--- a/main.cpp
+++ b/main.cpp
@@ -1,7 +1,7 @@
 #include <iostream>
 #include "XML/xmlinput.h"
 #include "XML/xmlfile.h"
-#include "TestDumpWriter.h"
+#include "DumpWriters/StubCurrentWriter.h"
 #include "XmlPageProcessor.h"
 #include "Dump.h"
 
@@ -30,7 +30,7 @@
 int main(int argc, const char* argv[])
 {
     //StandardInputStream stream;
-    /*XML::FileInputStream stream = 
XML::FileInputStream("C:\\Users\\Svick\\Downloads\\tenwiki-20130622-pages-meta-history.xml");
+    XML::FileInputStream stream = 
XML::FileInputStream("C:\\Users\\Svick\\Downloads\\tenwiki-20130622-pages-meta-history.xml");
 
     XML::Input input(stream);
 
@@ -39,14 +39,16 @@
         XML::Handler::END
     };
 
-    TestDumpWriter writer;
-
-    input.Process(handlers, &writer);*/
-
     shared_ptr<WritableDump> dump = WritableDump::Create("tmp/test.id");
+
+    StubCurrentWriter writer(dump);
+
+    input.Process(handlers, &writer);
+
+    /*shared_ptr<WritableDump> dump = WritableDump::Create("tmp/test.id");
 
     auto offset = dump->spaceManager->GetSpace(102);
     dump->spaceManager->Delete(offset, 102);
 
-    dump->pageIdIndex->Add(1, 2);
+    dump->pageIdIndex->Add(1, 2);*/
 }
\ No newline at end of file

-- 
To view, visit https://gerrit.wikimedia.org/r/72993
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Iff5afe7457f6ad74b1faadeefe34fd10f0fbada7
Gerrit-PatchSet: 1
Gerrit-Project: operations/dumps/incremental
Gerrit-Branch: gsoc
Gerrit-Owner: Petr Onderka <[email protected]>
Gerrit-Reviewer: Petr Onderka <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to