Petr Onderka has uploaded a new change for review.
https://gerrit.wikimedia.org/r/83785
Change subject: Don't save namespace as part of title
......................................................................
Don't save namespace as part of title
Change-Id: I794c19db9e36d11d6d05e3bbd7f6f3fe26fb8b0a
---
M DumpWriters/ArticlesWriterWrapper.cpp
M DumpWriters/ArticlesWriterWrapper.h
M DumpWriters/CompositeWriter.cpp
M DumpWriters/CompositeWriter.h
M DumpWriters/DumpWriter.cpp
M DumpWriters/DumpWriter.h
M DumpWriters/IDumpWriter.h
M DumpWriters/WriterWrapper.cpp
M DumpWriters/WriterWrapper.h
M XmlInput/XmlPageProcessor.cpp
M XmlWriter.cpp
11 files changed, 43 insertions(+), 14 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/operations/dumps/incremental
refs/changes/85/83785/1
diff --git a/DumpWriters/ArticlesWriterWrapper.cpp
b/DumpWriters/ArticlesWriterWrapper.cpp
index a17c385..4554784 100644
--- a/DumpWriters/ArticlesWriterWrapper.cpp
+++ b/DumpWriters/ArticlesWriterWrapper.cpp
@@ -3,12 +3,12 @@
const std::int16_t UserNamespace = 2;
-void ArticlesWriterWrapper::StartPage(const std::shared_ptr<const Page> page)
+void ArticlesWriterWrapper::StartPage(const std::shared_ptr<const Page> page,
bool titleWithNamespace)
{
pageInlcuded = page->Namespace % 2 == 0 && page->Namespace !=
UserNamespace;
if (pageInlcuded)
- wrapped->StartPage(page);
+ wrapped->StartPage(page, titleWithNamespace);
}
void ArticlesWriterWrapper::AddRevision(const std::shared_ptr<const Revision>
revision)
diff --git a/DumpWriters/ArticlesWriterWrapper.h
b/DumpWriters/ArticlesWriterWrapper.h
index 366f11e..fd7c99b 100644
--- a/DumpWriters/ArticlesWriterWrapper.h
+++ b/DumpWriters/ArticlesWriterWrapper.h
@@ -11,7 +11,7 @@
: WriterWrapper(std::move(wrapped)), pageInlcuded(false)
{}
- virtual void StartPage(const std::shared_ptr<const Page> page) override;
+ virtual void StartPage(const std::shared_ptr<const Page> page, bool
titleWithNamespace) override;
virtual void AddRevision(const std::shared_ptr<const Revision> revision)
override;
virtual void EndPage() override;
virtual void SetDumpKind(DumpKind dumpKind) override;
diff --git a/DumpWriters/CompositeWriter.cpp b/DumpWriters/CompositeWriter.cpp
index c93dde4..ad75049 100644
--- a/DumpWriters/CompositeWriter.cpp
+++ b/DumpWriters/CompositeWriter.cpp
@@ -1,10 +1,10 @@
#include "CompositeWriter.h"
#include "../DumpException.h"
-void CompositeWriter::StartPage(const std::shared_ptr<const Page> page)
+void CompositeWriter::StartPage(const std::shared_ptr<const Page> page, bool
titleWithNamespace)
{
for (auto &writer : writers)
- writer->StartPage(page);
+ writer->StartPage(page, titleWithNamespace);
}
void CompositeWriter::AddRevision(const std::shared_ptr<const Revision>
revision)
diff --git a/DumpWriters/CompositeWriter.h b/DumpWriters/CompositeWriter.h
index 5352e80..8966f52 100644
--- a/DumpWriters/CompositeWriter.h
+++ b/DumpWriters/CompositeWriter.h
@@ -15,7 +15,7 @@
: writers(std::move(writers)), getTextFunction(getTextFunction)
{}
- virtual void StartPage(const std::shared_ptr<const Page> page) override;
+ virtual void StartPage(const std::shared_ptr<const Page> page, bool
titleWithNamespace) override;
virtual void AddRevision(const std::shared_ptr<const Revision> revision)
override;
virtual void EndPage() override;
virtual void SetSiteInfo(const std::shared_ptr<const SiteInfo> siteInfo)
override;
diff --git a/DumpWriters/DumpWriter.cpp b/DumpWriters/DumpWriter.cpp
index d1423fe..6183c4c 100644
--- a/DumpWriters/DumpWriter.cpp
+++ b/DumpWriters/DumpWriter.cpp
@@ -5,6 +5,21 @@
#include "../Indexes/Index.h"
#include "../format.h"
+void DumpWriter::RemoveNamespace(Page& page)
+{
+ std::string namespapceName =
dump->siteInfo->siteInfo.Namespaces.at(page.Namespace).second;
+
+ if (namespapceName.empty())
+ return;
+
+ namespapceName.append(":");
+
+ if (page.Title.substr(0, namespapceName.length()) != namespapceName)
+ throw DumpException();
+
+ page.Title.erase(0, namespapceName.length());
+}
+
DumpWriter::DumpWriter(std::shared_ptr<WritableDump> dump, bool withText,
std::unique_ptr<DiffWriter> diffWriter)
: dump(dump), withText(withText), diffWriter(std::move(diffWriter))
{
@@ -24,12 +39,17 @@
diffWriter->SetSiteInfo(*siteInfo, dump->fileHeader.Kind);
}
-void DumpWriter::StartPage(const std::shared_ptr<const Page> page)
+void DumpWriter::StartPage(const std::shared_ptr<const Page> page, bool
titleWithNamespace)
{
std::uint32_t pageId = page->PageId;
+
this->page = std::unique_ptr<DumpPage>(new DumpPage(dump, pageId));
oldPage = this->page->page;
this->page->page = *page;
+
+ if (titleWithNamespace)
+ RemoveNamespace(this->page->page);
+
unset(unvisitedPageIds, pageId);
if (diffWriter != nullptr)
diff --git a/DumpWriters/DumpWriter.h b/DumpWriters/DumpWriter.h
index 233f9ea..a1fee36 100644
--- a/DumpWriters/DumpWriter.h
+++ b/DumpWriters/DumpWriter.h
@@ -19,10 +19,11 @@
std::unordered_set<std::uint32_t> newRevisionIds;
bool withText;
+ void RemoveNamespace(Page& page);
public:
DumpWriter(std::shared_ptr<WritableDump> dump, bool withText,
std::unique_ptr<DiffWriter> diffWriter = nullptr);
- virtual void StartPage(const std::shared_ptr<const Page> page) override;
+ virtual void StartPage(const std::shared_ptr<const Page> page, bool
titleWithNamespace) override;
virtual void AddRevision(const std::shared_ptr<const Revision> revision)
override;
virtual void EndPage() override;
virtual void SetSiteInfo(const std::shared_ptr<const SiteInfo> siteInfo)
override;
diff --git a/DumpWriters/IDumpWriter.h b/DumpWriters/IDumpWriter.h
index 22c717f..42cde90 100644
--- a/DumpWriters/IDumpWriter.h
+++ b/DumpWriters/IDumpWriter.h
@@ -9,7 +9,7 @@
class IDumpWriter
{
public:
- virtual void StartPage(const std::shared_ptr<const Page> page) = 0;
+ virtual void StartPage(const std::shared_ptr<const Page> page, bool
titleWithNamespace) = 0;
virtual void AddRevision(const std::shared_ptr<const Revision> revision) =
0;
virtual void EndPage() = 0;
virtual void SetSiteInfo(const std::shared_ptr<const SiteInfo> siteInfo) =
0;
diff --git a/DumpWriters/WriterWrapper.cpp b/DumpWriters/WriterWrapper.cpp
index 1cc81b4..697f89f 100644
--- a/DumpWriters/WriterWrapper.cpp
+++ b/DumpWriters/WriterWrapper.cpp
@@ -1,8 +1,8 @@
#include "WriterWrapper.h"
-void WriterWrapper::StartPage(const std::shared_ptr<const Page> page)
+void WriterWrapper::StartPage(const std::shared_ptr<const Page> page, bool
titleWithNamespace)
{
- wrapped->StartPage(page);
+ wrapped->StartPage(page, titleWithNamespace);
}
void WriterWrapper::AddRevision(const std::shared_ptr<const Revision> revision)
diff --git a/DumpWriters/WriterWrapper.h b/DumpWriters/WriterWrapper.h
index af648e2..7718133 100644
--- a/DumpWriters/WriterWrapper.h
+++ b/DumpWriters/WriterWrapper.h
@@ -11,7 +11,7 @@
: wrapped(std::move(wrapped))
{}
- virtual void StartPage(const std::shared_ptr<const Page> page) override;
+ virtual void StartPage(const std::shared_ptr<const Page> page, bool
titleWithNamespace) override;
virtual void AddRevision(const std::shared_ptr<const Revision> revision)
override;
virtual void EndPage() override;
virtual void SetSiteInfo(const std::shared_ptr<const SiteInfo> siteInfo)
override;
diff --git a/XmlInput/XmlPageProcessor.cpp b/XmlInput/XmlPageProcessor.cpp
index 75c1907..36c6855 100644
--- a/XmlInput/XmlPageProcessor.cpp
+++ b/XmlInput/XmlPageProcessor.cpp
@@ -9,7 +9,7 @@
{
if (!pageWritten)
{
- dumpWriter->StartPage(page);
+ dumpWriter->StartPage(page, true);
pageWritten = true;
}
}
diff --git a/XmlWriter.cpp b/XmlWriter.cpp
index a7e5cf1..f89027a 100644
--- a/XmlWriter.cpp
+++ b/XmlWriter.cpp
@@ -69,7 +69,12 @@
output.BeginElement("page");
- output.WriteElement("title", escapeElementText(page.Title));
+ std::string title = page.Title;
+ std::string ns =
dump->siteInfo->siteInfo.Namespaces.at(page.Namespace).second;
+ if (!ns.empty())
+ title = ns + ':' + title;
+
+ output.WriteElement("title", escapeElementText(title));
output.WriteElement("ns", page.Namespace);
output.WriteElement("id", page.PageId);
@@ -84,6 +89,9 @@
{
auto revision = DumpRevision(dump, revisionId, true).revision;
+ if (revision.RevisionId != revisionId)
+ throw DumpException();
+
output.BeginElement("revision");
output.WriteElement("id", revision.RevisionId);
--
To view, visit https://gerrit.wikimedia.org/r/83785
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I794c19db9e36d11d6d05e3bbd7f6f3fe26fb8b0a
Gerrit-PatchSet: 1
Gerrit-Project: operations/dumps/incremental
Gerrit-Branch: gsoc
Gerrit-Owner: Petr Onderka <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits