Petr Onderka has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/78393


Change subject: Deleting deleted pages
......................................................................

Deleting deleted pages

Change-Id: I5f0e8bb6d8517f197efee515a91d49a5d2e784d7
---
M CollectionHelpers.h
M Dump.cpp
M Dump.h
M DumpObjects/DumpPage.cpp
M DumpObjects/DumpPage.h
M DumpObjects/DumpRevision.cpp
M DumpObjects/DumpRevision.h
M DumpWriters/ArticlesWriterWrapper.cpp
M DumpWriters/ArticlesWriterWrapper.h
M DumpWriters/CompositeWriter.cpp
M DumpWriters/CompositeWriter.h
M DumpWriters/CurrentWriterWrapper.cpp
M DumpWriters/DumpWriter.cpp
M DumpWriters/DumpWriter.h
M DumpWriters/IDumpWriter.h
M DumpWriters/WriterWrapper.cpp
M DumpWriters/WriterWrapper.h
M Objects/Page.cpp
M Objects/Page.h
M XmlWriter.cpp
M libexecstream/win/exec-stream-helpers.cpp
M main.cpp
22 files changed, 199 insertions(+), 136 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/dumps/incremental 
refs/changes/93/78393/1

diff --git a/CollectionHelpers.h b/CollectionHelpers.h
index 7102002..a69f63f 100644
--- a/CollectionHelpers.h
+++ b/CollectionHelpers.h
@@ -1,3 +1,8 @@
+#pragma once
+
+#include <vector>
+#include <unordered_set>
+
 //http://stackoverflow.com/a/2330559/41071
 template <typename T>
 T& insert_at(T& pContainer, size_t pIndex, const typename T::value_type& 
pValue)
@@ -13,4 +18,15 @@
     pContainer.insert(pContainer.begin() + pIndex, std::move(pValue));
 
     return pContainer;
+}
+
+template <typename T>
+std::unordered_set<T> except(std::vector<T> container1, std::vector<T> 
container2)
+{
+    std::unordered_set<T> set(container1.begin(), container1.end());
+
+    for (auto item : container2)
+        set.erase(item);
+
+    return set;
 }
\ No newline at end of file
diff --git a/Dump.cpp b/Dump.cpp
index b6f2571..58a3c33 100644
--- a/Dump.cpp
+++ b/Dump.cpp
@@ -5,6 +5,7 @@
 #include "Indexes/Index.h"
 #include "SpaceManager.h"
 #include "DumpObjects/DumpRevision.h"
+#include "DumpObjects/DumpPage.h"
 
 using std::move;
 using std::unique_ptr;
@@ -88,11 +89,26 @@
     modelFormatIndex->Write();
 }
 
-void WritableDump::DeleteRevision(uint32_t revisionId)
+void WritableDump::DeletePage(std::uint32_t pageId)
+{
+    Offset offset = pageIdIndex->Get(pageId);
+    DumpPage page(self, pageId);
+    std::uint32_t length = page.NewLength();
+
+    for (auto revisionId : page.page.RevisionIds)
+    {
+        DeleteRevision(revisionId);
+    }
+
+    pageIdIndex->Remove(pageId);
+    spaceManager->Delete(offset.value, length);
+}
+
+void WritableDump::DeleteRevision(std::uint32_t revisionId)
 {
     Offset offset = revisionIdIndex->Get(revisionId);
-    DumpRevision revision(self, true);
-    uint32_t length = revision.NewLength();
+    DumpRevision revision(self, revisionId, false);
+    std::uint32_t length = revision.NewLength();
 
     revisionIdIndex->Remove(revisionId);
     spaceManager->Delete(offset.value, length);
diff --git a/Dump.h b/Dump.h
index e176f44..ca5d8fa 100644
--- a/Dump.h
+++ b/Dump.h
@@ -61,7 +61,9 @@
     // it's necessary to call this after writing is finished
     void WriteIndexes();
 
-    void DeleteRevision(uint32_t revisionId);
+    // also recursively deletes revisions of the given page
+    void DeletePage(std::uint32_t pageId);
+    void DeleteRevision(std::uint32_t revisionId);
 
     std::uint8_t GetIdForModelFormat(std::string model, std::string format);
     std::pair<std::string, std::string> GetModelFormat(std::uint8_t id);
diff --git a/DumpObjects/DumpPage.cpp b/DumpObjects/DumpPage.cpp
index 609fdef..6a3693d 100644
--- a/DumpObjects/DumpPage.cpp
+++ b/DumpObjects/DumpPage.cpp
@@ -9,12 +9,17 @@
     if (pageOffset.value == 0)
     {
         page = Page();
+        wasLoaded = false;
+
         savedOffset = 0;
         savedLength = 0;
     }
     else
     {
         page = Read(dumpRef, pageOffset);
+        originalPage = page;
+        wasLoaded = true;
+
         savedOffset = pageOffset.value;
         savedLength = NewLength();
     }
@@ -40,6 +45,14 @@
     return page;
 }
 
+void DumpPage::Write()
+{
+    if (wasLoaded && originalPage == page)
+        return;
+
+    DumpObject::Write();
+}
+
 void DumpPage::WriteInternal()
 {
     WriteValue((uint8_t)DumpObjectKind::Page);
diff --git a/DumpObjects/DumpPage.h b/DumpObjects/DumpPage.h
index 7be9c09..0efe483 100644
--- a/DumpObjects/DumpPage.h
+++ b/DumpObjects/DumpPage.h
@@ -9,6 +9,9 @@
 class DumpPage : public DumpObject
 {
 private:
+    Page originalPage;
+    bool wasLoaded;
+
     void Load(uint32_t pageId);
     static Page Read(shared_ptr<WritableDump> dump, Offset offset);
 protected:
@@ -20,5 +23,6 @@
     DumpPage(weak_ptr<WritableDump> dump, uint32_t pageId);
     DumpPage(weak_ptr<WritableDump> dump, Offset offset);
 
-    virtual uint32_t NewLength() override;
+    virtual void Write() override;
+    virtual std::uint32_t NewLength() override;
 };
\ No newline at end of file
diff --git a/DumpObjects/DumpRevision.cpp b/DumpObjects/DumpRevision.cpp
index 255da96..3a28e99 100644
--- a/DumpObjects/DumpRevision.cpp
+++ b/DumpObjects/DumpRevision.cpp
@@ -4,7 +4,7 @@
 #include "../SevenZip.h"
 #include "../Indexes/Index.h"
 
-void DumpRevision::Load(uint32_t revisionId)
+void DumpRevision::Load(std::uint32_t revisionId, bool loadText)
 {
     auto dumpRef = dump.lock();
     auto revisionOffset = dumpRef->revisionIdIndex->Get(revisionId);
@@ -13,16 +13,21 @@
         revision = Revision();
         savedOffset = 0;
         savedLength = 0;
+        wasLoaded = false;
     }
     else
     {
-        revision = Read(dumpRef, revisionOffset);
+        revision = Read(dumpRef, revisionOffset, loadText);
+
+        originalFlags = revision.Flags;
+        originalParentId = revision.ParentId;
+
         savedOffset = revisionOffset.value;
         savedLength = NewLength();
     }
 }
 
-Revision DumpRevision::Read(shared_ptr<WritableDump> dump, Offset offset)
+Revision DumpRevision::Read(std::shared_ptr<WritableDump> dump, Offset offset, 
bool loadText)
 {
     Revision revision;
 
@@ -57,10 +62,19 @@
 
     if (!HasFlag(revision.Flags, RevisionFlags::TextDeleted))
     {
-        if (IsPages(dump->fileHeader.Kind))
+        if (withText)
         {
-            std::string compressedText = DumpTraits<string>::ReadLong(stream);
-            revision.SetText(SevenZip::Decompress(compressedText));
+            if (loadText)
+            {
+                std::string compressedText = 
DumpTraits<string>::ReadLong(stream);
+                revision.SetText(SevenZip::Decompress(compressedText));
+            }
+            else
+            {
+                textOffset = stream.tellg();
+                ReadValue(stream, textLength);
+                textUnloaded = true;
+            }
         }
         else
         {
@@ -78,13 +92,29 @@
         compressedText = SevenZip::Compress(revision.GetText());
 }
 
+void DumpRevision::Write()
+{
+    modelFormatId = dump.lock()->GetIdForModelFormat(revision.Model, 
revision.Format);
+    if (modelFormatId == 0)
+        revision.Flags |= RevisionFlags::WikitextModelFormat;
+
+    if (wasLoaded && originalFlags == revision.Flags && originalParentId == 
revision.ParentId)
+        return;
+
+    if (textUnloaded)
+    {
+        auto dumpRef = dump.lock();
+        auto stream = dumpRef->stream.get();
+        stream->seekp(textOffset);
+        compressedText = DumpTraits<string>::ReadLong(*stream);
+    }
+
+    DumpObject::Write();
+}
+
 void DumpRevision::WriteInternal()
 {
     EnsureCompressed();
-
-    std::uint8_t modelFormatId = 
dump.lock()->GetIdForModelFormat(revision.Model, revision.Format);
-    if (modelFormatId == 0)
-        revision.Flags |= RevisionFlags::WikitextModelFormat;
 
     WriteValue((uint8_t)DumpObjectKind::Revision);
     WriteValue(revision.RevisionId);
@@ -122,8 +152,6 @@
 
 uint32_t DumpRevision::NewLength()
 {
-    EnsureCompressed();
-
     std::uint8_t modelFormatId = 
dump.lock()->GetIdForModelFormat(revision.Model, revision.Format);
     if (modelFormatId == 0)
         revision.Flags |= RevisionFlags::WikitextModelFormat;
@@ -145,7 +173,15 @@
     if (!HasFlag(revision.Flags, RevisionFlags::TextDeleted))
     {
         if (withText)
-            result += DumpTraits<string>::DumpSizeLong(compressedText);
+        {
+            if (textUnloaded)
+                result += DumpTraits<std::uint32_t>::DumpSize(textLength) + 
textLength;
+            else
+            {
+                EnsureCompressed();
+                result += 
DumpTraits<std::string>::DumpSizeLong(compressedText);
+            }
+        }
         else
             result += ValueSize(revision.TextLength);
     }
@@ -153,12 +189,13 @@
     return result;
 }
 
-DumpRevision::DumpRevision(weak_ptr<WritableDump> dump, uint32_t revisionId, 
bool withText)
-    : DumpObject(dump), revision(), withText(withText)
+DumpRevision::DumpRevision(std::weak_ptr<WritableDump> dump, std::uint32_t 
revisionId, bool loadText)
+    : DumpObject(dump), revision(), originalFlags(), originalParentId(), 
wasLoaded(true), textUnloaded(false)
 {
-    Load(revisionId);
+    withText = IsPages(dump.lock()->fileHeader.Kind);
+    Load(revisionId, loadText);
 }
 
-DumpRevision::DumpRevision(weak_ptr<WritableDump> dump, bool withText)
-    : DumpObject(dump), revision(), withText(withText)
-{}
+DumpRevision::DumpRevision(std::weak_ptr<WritableDump> dump)
+    : DumpObject(dump), revision(), originalFlags(), originalParentId(), 
wasLoaded(false), textUnloaded(false)
+{}
\ No newline at end of file
diff --git a/DumpObjects/DumpRevision.h b/DumpObjects/DumpRevision.h
index a6e80fc..0f34d4f 100644
--- a/DumpObjects/DumpRevision.h
+++ b/DumpObjects/DumpRevision.h
@@ -4,17 +4,24 @@
 #include "../Dump.h"
 #include "../Objects/Revision.h"
 
-using std::shared_ptr;
-
 // after DumpRevision is created, Text of its revision can't be changed
 class DumpRevision : public DumpObject
 {
 private:
     bool withText;
-    string compressedText;
+    std::string compressedText;
 
-    void Load(uint32_t revisionId);
-    static Revision Read(shared_ptr<WritableDump> dump, Offset offset);
+    RevisionFlags originalFlags;
+    std::uint32_t originalParentId;
+    bool wasLoaded;
+    std::uint8_t modelFormatId;
+
+    std::uint32_t textLength;
+    std::uint32_t textOffset;
+    bool textUnloaded;
+
+    void Load(std::uint32_t revisionId, bool loadText);
+    Revision Read(std::shared_ptr<WritableDump> dump, Offset offset, bool 
loadText);
     void EnsureCompressed();
 protected:
     virtual void WriteInternal() override;
@@ -22,8 +29,9 @@
 public:
     Revision revision;
 
-    DumpRevision(weak_ptr<WritableDump> dump, uint32_t revisionId, bool 
withText);
-    DumpRevision(weak_ptr<WritableDump> dump, bool withText);
+    DumpRevision(std::weak_ptr<WritableDump> dump, std::uint32_t revisionId, 
bool loadText);
+    DumpRevision(std::weak_ptr<WritableDump> dump);
 
-    virtual uint32_t NewLength() override;
+    virtual void Write() override;
+    virtual std::uint32_t NewLength() override;
 };
\ No newline at end of file
diff --git a/DumpWriters/ArticlesWriterWrapper.cpp 
b/DumpWriters/ArticlesWriterWrapper.cpp
index 229e910..a17c385 100644
--- a/DumpWriters/ArticlesWriterWrapper.cpp
+++ b/DumpWriters/ArticlesWriterWrapper.cpp
@@ -11,26 +11,10 @@
         wrapped->StartPage(page);
 }
 
-const std::vector<std::uint32_t> ArticlesWriterWrapper::GetRevisionIds() const
-{
-    if (!pageInlcuded)
-        throw DumpException();
-
-    return WriterWrapper::GetRevisionIds();
-}
-
 void ArticlesWriterWrapper::AddRevision(const std::shared_ptr<const Revision> 
revision)
 {
     if (pageInlcuded)
         wrapped->AddRevision(revision);
-}
-
-void ArticlesWriterWrapper::DeleteRevision(std::uint32_t revisionId)
-{
-    if (!pageInlcuded)
-        throw DumpException();
-
-    WriterWrapper::DeleteRevision(revisionId);
 }
 
 void ArticlesWriterWrapper::EndPage()
diff --git a/DumpWriters/ArticlesWriterWrapper.h 
b/DumpWriters/ArticlesWriterWrapper.h
index f2f08b5..366f11e 100644
--- a/DumpWriters/ArticlesWriterWrapper.h
+++ b/DumpWriters/ArticlesWriterWrapper.h
@@ -12,9 +12,7 @@
     {}
 
     virtual void StartPage(const std::shared_ptr<const Page> page) override;
-    virtual const std::vector<std::uint32_t> GetRevisionIds() const override;
     virtual void AddRevision(const std::shared_ptr<const Revision> revision) 
override;
-    virtual void DeleteRevision(std::uint32_t revisionId) override;
     virtual void EndPage() override;
     virtual void SetDumpKind(DumpKind dumpKind) override;
 };
\ No newline at end of file
diff --git a/DumpWriters/CompositeWriter.cpp b/DumpWriters/CompositeWriter.cpp
index 140a83c..c93dde4 100644
--- a/DumpWriters/CompositeWriter.cpp
+++ b/DumpWriters/CompositeWriter.cpp
@@ -7,11 +7,6 @@
         writer->StartPage(page);
 }
 
-const std::vector<std::uint32_t> CompositeWriter::GetRevisionIds() const
-{
-    throw DumpException();
-}
-
 void CompositeWriter::AddRevision(const std::shared_ptr<const Revision> 
revision)
 {
     if (getTextFunction != nullptr)
@@ -19,12 +14,6 @@
 
     for (auto &writer : writers)
         writer->AddRevision(revision);
-}
-
-void CompositeWriter::DeleteRevision(std::uint32_t revisionId)
-{
-    for (auto &writer : writers)
-        writer->DeleteRevision(revisionId);
 }
 
 void CompositeWriter::EndPage()
@@ -45,8 +34,8 @@
         writer->SetDumpKind(dumpKind);
 }
 
-void CompositeWriter::WriteIndexes()
+void CompositeWriter::EndDump()
 {
     for (auto &writer : writers)
-        writer->WriteIndexes();
+        writer->EndDump();
 }
\ No newline at end of file
diff --git a/DumpWriters/CompositeWriter.h b/DumpWriters/CompositeWriter.h
index b562781..5352e80 100644
--- a/DumpWriters/CompositeWriter.h
+++ b/DumpWriters/CompositeWriter.h
@@ -16,11 +16,9 @@
     {}
 
     virtual void StartPage(const std::shared_ptr<const Page> page) override;
-    virtual const std::vector<std::uint32_t> GetRevisionIds() const override;
     virtual void AddRevision(const std::shared_ptr<const Revision> revision) 
override;
-    virtual void DeleteRevision(std::uint32_t revisionId) override;
     virtual void EndPage() override;
     virtual void SetSiteInfo(const std::shared_ptr<const SiteInfo> siteInfo) 
override;
     virtual void SetDumpKind(DumpKind dumpKind) override;
-    virtual void WriteIndexes() override;
+    virtual void EndDump() override;
 };
\ No newline at end of file
diff --git a/DumpWriters/CurrentWriterWrapper.cpp 
b/DumpWriters/CurrentWriterWrapper.cpp
index 03be130..08416e6 100644
--- a/DumpWriters/CurrentWriterWrapper.cpp
+++ b/DumpWriters/CurrentWriterWrapper.cpp
@@ -7,11 +7,6 @@
 
 void CurrentWriterWrapper::EndPage()
 {
-    for (uint32_t revisionId : wrapped->GetRevisionIds())
-    {
-        wrapped->DeleteRevision(revisionId);
-    }
-
     wrapped->AddRevision(this->revision);
     wrapped->EndPage();
 
diff --git a/DumpWriters/DumpWriter.cpp b/DumpWriters/DumpWriter.cpp
index 3554f5f..c21fadd 100644
--- a/DumpWriters/DumpWriter.cpp
+++ b/DumpWriters/DumpWriter.cpp
@@ -1,6 +1,20 @@
 #include "DumpWriter.h"
 #include "../DumpObjects/DumpRevision.h"
+#include "../CollectionHelpers.h"
+#include "../Indexes/Index.h"
 #include <algorithm>
+
+DumpWriter::DumpWriter(std::shared_ptr<WritableDump> dump, bool withText)
+    : dump(dump), withText(withText)
+{
+    for (auto pair : *dump->pageIdIndex)
+    {
+        std::uint32_t pageId = pair.first;
+        if (unvisitedPageIds.size() <= pageId)
+            unvisitedPageIds.resize(pageId + 1);
+        unvisitedPageIds[pageId] = true;
+    }
+}
 
 void DumpWriter::SetSiteInfo(const std::shared_ptr<const SiteInfo> siteInfo)
 {
@@ -8,42 +22,32 @@
     dump->siteInfo->Write();
 }
 
-void DumpWriter::StartPage(const shared_ptr<const Page> page)
+void DumpWriter::StartPage(const std::shared_ptr<const Page> page)
 {
-    this->page = unique_ptr<DumpPage>(new DumpPage(dump, page->PageId));
+    this->page = std::unique_ptr<DumpPage>(new DumpPage(dump, page->PageId));
+    this->oldRevisionIds = this->page->page.RevisionIds;
     this->page->page = *page;
+    this->unvisitedPageIds[page->PageId] = false;
 }
 
-const std::vector<std::uint32_t> DumpWriter::GetRevisionIds() const
-{
-    return this->page->page.RevisionIds;
-}
-
-void DumpWriter::AddRevision(const shared_ptr<const Revision> revision)
+void DumpWriter::AddRevision(const std::shared_ptr<const Revision> revision)
 {
     page->page.RevisionIds.push_back(revision->RevisionId);
     revisions.push_back(revision);
-}
-
-void DumpWriter::DeleteRevision(std::uint32_t revisionId)
-{
-    dump->DeleteRevision(revisionId);
-
-    auto &revisionIds = page->page.RevisionIds;
-    auto toDeleteIt = std::find(revisionIds.begin(), revisionIds.end(), 
revisionId);
-    if (toDeleteIt == revisionIds.end())
-        throw DumpException();
-    
-    revisionIds.erase(toDeleteIt);
 }
 
 void DumpWriter::EndPage()
 {
     page->Write();
 
+    auto deletedRevisionIds = except(oldRevisionIds, page->page.RevisionIds);
+
+    for (auto revisionId : deletedRevisionIds)
+        dump->DeleteRevision(revisionId);
+
     for (auto revision : revisions)
     {
-        DumpRevision dumpRevision(dump, withText);
+        DumpRevision dumpRevision(dump, revision->RevisionId, false);
         dumpRevision.revision = *revision;
         dumpRevision.Write();
     }
@@ -61,7 +65,13 @@
     dump->fileHeader.Write();
 }
 
-void DumpWriter::WriteIndexes()
+void DumpWriter::EndDump()
 {
+    for (std::uint32_t i = 0; i < unvisitedPageIds.size(); i++)
+    {
+        if (unvisitedPageIds[i])
+            dump->DeletePage(i);
+    }
+
     dump->WriteIndexes();
-}
+}
\ No newline at end of file
diff --git a/DumpWriters/DumpWriter.h b/DumpWriters/DumpWriter.h
index 3efc08e..ca78e9c 100644
--- a/DumpWriters/DumpWriter.h
+++ b/DumpWriters/DumpWriter.h
@@ -1,30 +1,30 @@
 #pragma once
 
 #include "IDumpWriter.h"
-#include "../DumpObjects/DumpPage.h"
 #include "../Dump.h"
 #include "../DumpObjects/DumpPage.h"
-
 
 class DumpWriter : public IDumpWriter
 {
 private:
     std::shared_ptr<WritableDump> dump;
-    unique_ptr<DumpPage> page;
-    vector<shared_ptr<const Revision>> revisions;
+
+    std::unique_ptr<DumpPage> page;
+    // this is necessary, so that page object can be on disk before its 
revision objects
+    // it shouldn't waste too much memory, because these revisions don't 
contain any text
+    std::vector<std::shared_ptr<const Revision>> revisions;
+    std::vector<std::uint32_t> oldRevisionIds;
+
+    std::vector<bool> unvisitedPageIds;
     bool withText;
 
 public:
-    DumpWriter(std::shared_ptr<WritableDump> dump, bool withText)
-        : dump(dump), withText(withText)
-    {}
+    DumpWriter(std::shared_ptr<WritableDump> dump, bool withText);
 
     virtual void StartPage(const std::shared_ptr<const Page> page) override;
-    virtual const std::vector<std::uint32_t> GetRevisionIds() const override;
     virtual void AddRevision(const std::shared_ptr<const Revision> revision) 
override;
-    virtual void DeleteRevision(std::uint32_t revisionId) override;
     virtual void EndPage() override;
     virtual void SetSiteInfo(const std::shared_ptr<const SiteInfo> siteInfo) 
override;
     virtual void SetDumpKind(DumpKind dumpKind) override;
-    virtual void WriteIndexes() override;
+    virtual void EndDump() override;
 };
\ No newline at end of file
diff --git a/DumpWriters/IDumpWriter.h b/DumpWriters/IDumpWriter.h
index fa75de0..22c717f 100644
--- a/DumpWriters/IDumpWriter.h
+++ b/DumpWriters/IDumpWriter.h
@@ -10,13 +10,11 @@
 {
 public:
     virtual void StartPage(const std::shared_ptr<const Page> page) = 0;
-    virtual const std::vector<std::uint32_t> GetRevisionIds() const = 0;
     virtual void AddRevision(const std::shared_ptr<const Revision> revision) = 
0;
-    virtual void DeleteRevision(std::uint32_t revisionId) = 0;
     virtual void EndPage() = 0;
     virtual void SetSiteInfo(const std::shared_ptr<const SiteInfo> siteInfo) = 
0;
     virtual void SetDumpKind(DumpKind dumpKind) = 0;
-    virtual void WriteIndexes() = 0;
+    virtual void EndDump() = 0;
 
     virtual ~IDumpWriter() {}
 };
diff --git a/DumpWriters/WriterWrapper.cpp b/DumpWriters/WriterWrapper.cpp
index 7f30504..1cc81b4 100644
--- a/DumpWriters/WriterWrapper.cpp
+++ b/DumpWriters/WriterWrapper.cpp
@@ -5,19 +5,9 @@
     wrapped->StartPage(page);
 }
 
-const std::vector<std::uint32_t> WriterWrapper::GetRevisionIds() const
-{
-    return wrapped->GetRevisionIds();
-}
-
 void WriterWrapper::AddRevision(const std::shared_ptr<const Revision> revision)
 {
     wrapped->AddRevision(revision);
-}
-
-void WriterWrapper::DeleteRevision(std::uint32_t revisionId)
-{
-    wrapped->DeleteRevision(revisionId);
 }
 
 void WriterWrapper::EndPage()
@@ -30,7 +20,7 @@
     wrapped->SetSiteInfo(siteInfo);
 }
 
-void WriterWrapper::WriteIndexes()
+void WriterWrapper::EndDump()
 {
-    wrapped->WriteIndexes();
+    wrapped->EndDump();
 }
\ No newline at end of file
diff --git a/DumpWriters/WriterWrapper.h b/DumpWriters/WriterWrapper.h
index 6a88e42..af648e2 100644
--- a/DumpWriters/WriterWrapper.h
+++ b/DumpWriters/WriterWrapper.h
@@ -12,11 +12,9 @@
     {}
 
     virtual void StartPage(const std::shared_ptr<const Page> page) override;
-    virtual const std::vector<std::uint32_t> GetRevisionIds() const override;
     virtual void AddRevision(const std::shared_ptr<const Revision> revision) 
override;
-    virtual void DeleteRevision(std::uint32_t revisionId) override;
     virtual void EndPage() override;
     virtual void SetSiteInfo(const std::shared_ptr<const SiteInfo> siteInfo) 
override;
     virtual void SetDumpKind(DumpKind dumpKind) override = 0;
-    virtual void WriteIndexes() override;
+    virtual void EndDump() override;
 };
\ No newline at end of file
diff --git a/Objects/Page.cpp b/Objects/Page.cpp
index 34632c8..d02ecd7 100644
--- a/Objects/Page.cpp
+++ b/Objects/Page.cpp
@@ -2,4 +2,13 @@
 
 Page::Page()
     : PageId(), Namespace()
-{}
\ No newline at end of file
+{}
+
+bool operator ==(const Page &first, const Page &second)
+{
+    return first.PageId == second.PageId
+        && first.Namespace == second.Namespace
+        && first.Title == second.Title
+        && first.RedirectTarget == second.RedirectTarget
+        && first.RevisionIds == second.RevisionIds;
+}
\ No newline at end of file
diff --git a/Objects/Page.h b/Objects/Page.h
index 6302c3d..bc63c17 100644
--- a/Objects/Page.h
+++ b/Objects/Page.h
@@ -4,20 +4,18 @@
 #include <string>
 #include <vector>
 
-using std::uint32_t;
-using std::string;
-using std::vector;
-
 class Page
 {
 public:
-    uint32_t PageId;
+    std::uint32_t PageId;
     std::int16_t Namespace;
-    string Title;
+    std::string Title;
     // if empty, the page is not a redirect
-    string RedirectTarget;
+    std::string RedirectTarget;
 
-    vector<uint32_t> RevisionIds;
+    std::vector<std::uint32_t> RevisionIds;
 
     Page();
-};
\ No newline at end of file
+};
+
+bool operator ==(const Page &first, const Page &second);
\ No newline at end of file
diff --git a/XmlWriter.cpp b/XmlWriter.cpp
index 5ab81fc..3db3ee5 100644
--- a/XmlWriter.cpp
+++ b/XmlWriter.cpp
@@ -84,7 +84,7 @@
         int j = 0;
         for (auto revisionId : page.RevisionIds)
         {
-            auto revision = DumpRevision(dump, revisionId, false).revision;
+            auto revision = DumpRevision(dump, revisionId, true).revision;
 
             output.BeginElement("revision");
 
diff --git a/libexecstream/win/exec-stream-helpers.cpp 
b/libexecstream/win/exec-stream-helpers.cpp
index 456ab6c..ac624fe 100644
--- a/libexecstream/win/exec-stream-helpers.cpp
+++ b/libexecstream/win/exec-stream-helpers.cpp
@@ -290,7 +290,7 @@
     m_error_code=ERROR_SUCCESS;
     m_error_message="";
 
-    m_wait_timeout=2000;
+    m_wait_timeout=10000;
     m_buffer_limit=0;
     m_read_buffer_size=4096;
 
diff --git a/main.cpp b/main.cpp
index 4af9bae..5927eff 100644
--- a/main.cpp
+++ b/main.cpp
@@ -110,7 +110,7 @@
 
     XmlMediawikiProcessor::Process(&writer, inputFileName);
 
-    writer.WriteIndexes();
+    writer.EndDump();
 
     return true;
 }
@@ -142,7 +142,7 @@
 
     XmlMediawikiProcessor::Process(&writer, dumpBackupStream);
 
-    writer.WriteIndexes();
+    writer.EndDump();
 
     return true;
 }

-- 
To view, visit https://gerrit.wikimedia.org/r/78393
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I5f0e8bb6d8517f197efee515a91d49a5d2e784d7
Gerrit-PatchSet: 1
Gerrit-Project: operations/dumps/incremental
Gerrit-Branch: gsoc
Gerrit-Owner: Petr Onderka <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to