Petr Onderka has uploaded a new change for review.
https://gerrit.wikimedia.org/r/74361
Change subject: reading dump; command-line parameters
......................................................................
reading dump; command-line parameters
Change-Id: Icb1dc34aea3b290c3250b0f2db23cfe00a185a06
---
M DumpObjects/DumpIpV4User.cpp
M DumpObjects/DumpIpV4User.h
M DumpObjects/DumpNamedUser.cpp
M DumpObjects/DumpNamedUser.h
M DumpObjects/DumpPage.cpp
M DumpObjects/DumpPage.h
M DumpObjects/DumpUser.h
M DumpObjects/FileHeader.cpp
M DumpObjects/FileHeader.h
M Incremental dumps.sln
M Incremental dumps.vcxproj
M Objects/IpV4User.cpp
M Objects/IpV4User.h
M Objects/Revision.h
M Objects/User.cpp
M Objects/User.h
M XmlContributorProcessor.cpp
M main.cpp
18 files changed, 136 insertions(+), 29 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/operations/dumps/incremental
refs/changes/61/74361/1
diff --git a/DumpObjects/DumpIpV4User.cpp b/DumpObjects/DumpIpV4User.cpp
index 89a9321..b805426 100644
--- a/DumpObjects/DumpIpV4User.cpp
+++ b/DumpObjects/DumpIpV4User.cpp
@@ -24,9 +24,4 @@
uint32_t DumpIpV4User::NewLength() const
{
return ValueSize(user->Address);
-}
-
-RevisionFlags DumpIpV4User::UserKind() const
-{
- return RevisionFlags::IpV4User;
}
\ No newline at end of file
diff --git a/DumpObjects/DumpIpV4User.h b/DumpObjects/DumpIpV4User.h
index 8ac527b..7bcf5b1 100644
--- a/DumpObjects/DumpIpV4User.h
+++ b/DumpObjects/DumpIpV4User.h
@@ -16,5 +16,4 @@
virtual shared_ptr<User> GetUser() const override;
virtual uint32_t NewLength() const override;
- virtual RevisionFlags UserKind() const override;
};
\ No newline at end of file
diff --git a/DumpObjects/DumpNamedUser.cpp b/DumpObjects/DumpNamedUser.cpp
index 457e86d..e18ca95 100644
--- a/DumpObjects/DumpNamedUser.cpp
+++ b/DumpObjects/DumpNamedUser.cpp
@@ -26,9 +26,4 @@
uint32_t DumpNamedUser::NewLength() const
{
return ValueSize(user->UserId) + ValueSize(user->UserName);
-}
-
-RevisionFlags DumpNamedUser::UserKind() const
-{
- return RevisionFlags::NamedUser;
}
\ No newline at end of file
diff --git a/DumpObjects/DumpNamedUser.h b/DumpObjects/DumpNamedUser.h
index e33f509..0f3c1b2 100644
--- a/DumpObjects/DumpNamedUser.h
+++ b/DumpObjects/DumpNamedUser.h
@@ -15,5 +15,4 @@
virtual shared_ptr<User> GetUser() const override;
virtual uint32_t NewLength() const override;
- virtual RevisionFlags UserKind() const override;
};
\ No newline at end of file
diff --git a/DumpObjects/DumpPage.cpp b/DumpObjects/DumpPage.cpp
index 8c76e23..d151466 100644
--- a/DumpObjects/DumpPage.cpp
+++ b/DumpObjects/DumpPage.cpp
@@ -70,4 +70,14 @@
: DumpObject(dump), page()
{
Load(pageId);
+}
+
+DumpPage::DumpPage(weak_ptr<WritableDump> dump, Offset offset)
+ : DumpObject(dump), page()
+{
+ auto dumpRef = dump.lock();
+
+ page = Read(dumpRef, offset);
+ savedOffset = offset.value;
+ savedLength = NewLength();
}
\ No newline at end of file
diff --git a/DumpObjects/DumpPage.h b/DumpObjects/DumpPage.h
index e8ccf33..9fbd53d 100644
--- a/DumpObjects/DumpPage.h
+++ b/DumpObjects/DumpPage.h
@@ -18,6 +18,7 @@
Page page;
DumpPage(weak_ptr<WritableDump> dump, uint32_t pageId);
+ DumpPage(weak_ptr<WritableDump> dump, Offset offset);
virtual uint32_t NewLength() const override;
};
\ No newline at end of file
diff --git a/DumpObjects/DumpUser.h b/DumpObjects/DumpUser.h
index 1bcb20d..1742702 100644
--- a/DumpObjects/DumpUser.h
+++ b/DumpObjects/DumpUser.h
@@ -21,5 +21,4 @@
void Write(ostream *stream);
virtual shared_ptr<User> GetUser() const = 0;
virtual uint32_t NewLength() const override = 0;
- virtual RevisionFlags UserKind() const = 0;
};
\ No newline at end of file
diff --git a/DumpObjects/FileHeader.cpp b/DumpObjects/FileHeader.cpp
index 0f7c98c..0f683da 100644
--- a/DumpObjects/FileHeader.cpp
+++ b/DumpObjects/FileHeader.cpp
@@ -2,8 +2,8 @@
#include "../Dump.h"
#include "../DumpException.h"
-FileHeader::FileHeader(Offset fileEnd, Offset pageIdIndexRoot, Offset
freeSpaceIndexRoot, weak_ptr<WritableDump> dump)
- : DumpObject(dump), FileEnd(fileEnd), PageIdIndexRoot(pageIdIndexRoot),
FreeSpaceIndexRoot(freeSpaceIndexRoot)
+FileHeader::FileHeader(Offset fileEnd, Offset pageIdIndexRoot, Offset
revisionIdIndexRoot, Offset freeSpaceIndexRoot, weak_ptr<WritableDump> dump)
+ : DumpObject(dump), FileEnd(fileEnd), PageIdIndexRoot(pageIdIndexRoot),
RevisionIdIndexRoot(revisionIdIndexRoot), FreeSpaceIndexRoot(freeSpaceIndexRoot)
{}
void FileHeader::WriteInternal()
@@ -14,6 +14,7 @@
FileEnd.Write(*stream);
PageIdIndexRoot.Write(*stream);
+ RevisionIdIndexRoot.Write(*stream);
FreeSpaceIndexRoot.Write(*stream);
}
@@ -40,16 +41,22 @@
Offset fileEnd = Offset::Read(stream);
Offset pageIdIndexRoot = Offset::Read(stream);
+ Offset revisionIdIndexRoot = Offset::Read(stream);
Offset freeSpaceIndexRoot = Offset::Read(stream);
- return FileHeader(fileEnd, pageIdIndexRoot, freeSpaceIndexRoot,
dump.GetSelf());
+ return FileHeader(fileEnd, pageIdIndexRoot, revisionIdIndexRoot,
freeSpaceIndexRoot, dump.GetSelf());
+}
+
+uint32_t FileHeader::Length()
+{
+ return 4 + 2 * DumpTraits<uint8_t>::DumpSize() + 4 *
DumpTraits<Offset>::DumpSize();
}
uint32_t FileHeader::NewLength() const
{
- return 4 + 2 * DumpTraits<uint8_t>::DumpSize() + 3 *
DumpTraits<Offset>::DumpSize();
+ return Length();
}
FileHeader::FileHeader(weak_ptr<WritableDump> dump)
- : DumpObject(dump), FileEnd(6 + 3 * 6), PageIdIndexRoot(0),
FreeSpaceIndexRoot(0)
+ : DumpObject(dump), FileEnd(Length()), PageIdIndexRoot(0),
FreeSpaceIndexRoot(0)
{}
\ No newline at end of file
diff --git a/DumpObjects/FileHeader.h b/DumpObjects/FileHeader.h
index 7de0031..7a50c29 100644
--- a/DumpObjects/FileHeader.h
+++ b/DumpObjects/FileHeader.h
@@ -11,9 +11,11 @@
class FileHeader : public DumpObject
{
private:
+ static uint32_t Length();
+
ostream* stream;
- FileHeader(Offset fileEnd, Offset pageIdIndexRoot, Offset
freeSpaceIndexRoot, weak_ptr<WritableDump> dump = weak_ptr<WritableDump>());
+ FileHeader(Offset fileEnd, Offset pageIdIndexRoot, Offset
revisionIdIndexRoot, Offset freeSpaceIndexRoot, weak_ptr<WritableDump> dump =
weak_ptr<WritableDump>());
protected:
void WriteInternal();
public:
diff --git a/Incremental dumps.sln b/Incremental dumps.sln
index fac3077..73116d2 100644
--- a/Incremental dumps.sln
+++ b/Incremental dumps.sln
@@ -17,7 +17,4 @@
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
- GlobalSection(Performance) = preSolution
- HasPerformanceSessions = true
- EndGlobalSection
EndGlobal
diff --git a/Incremental dumps.vcxproj b/Incremental dumps.vcxproj
index e99cec4..f32f7e0 100644
--- a/Incremental dumps.vcxproj
+++ b/Incremental dumps.vcxproj
@@ -41,6 +41,7 @@
<PropertyGroup Label="UserMacros" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<LinkIncremental>true</LinkIncremental>
+ <TargetName>idumps</TargetName>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<LinkIncremental>false</LinkIncremental>
diff --git a/Objects/IpV4User.cpp b/Objects/IpV4User.cpp
index 4d13f4d..b6af351 100644
--- a/Objects/IpV4User.cpp
+++ b/Objects/IpV4User.cpp
@@ -2,6 +2,7 @@
#include <sstream>
#include "../StringHelpers.h"
#include "../DumpException.h"
+#include "Revision.h"
using std::stoi;
using std::ostringstream;
@@ -58,4 +59,9 @@
IpV4User::IpV4User(uint32_t parsedAddress)
: User(0, AddressToString(parsedAddress)), Address(parsedAddress)
-{}
\ No newline at end of file
+{}
+
+RevisionFlags IpV4User::UserKind() const
+{
+ return RevisionFlags::IpV4User;
+}
\ No newline at end of file
diff --git a/Objects/IpV4User.h b/Objects/IpV4User.h
index 2822b4a..b7a234d 100644
--- a/Objects/IpV4User.h
+++ b/Objects/IpV4User.h
@@ -13,4 +13,6 @@
IpV4User(uint32_t parsedAddress);
uint32_t Address;
+
+ virtual RevisionFlags UserKind() const override;
};
\ No newline at end of file
diff --git a/Objects/Revision.h b/Objects/Revision.h
index 50359d0..ab7954b 100644
--- a/Objects/Revision.h
+++ b/Objects/Revision.h
@@ -14,8 +14,8 @@
Minor = 0x01,
- NamedUser = 0x11,
- IpV4User = 0x12
+ NamedUser = 0x10,
+ IpV4User = 0x20
};
RevisionFlags operator |(RevisionFlags first, RevisionFlags second);
diff --git a/Objects/User.cpp b/Objects/User.cpp
index 8399f8e..83ef349 100644
--- a/Objects/User.cpp
+++ b/Objects/User.cpp
@@ -1,5 +1,6 @@
#include "User.h"
#include "IpV4User.h"
+#include "Revision.h"
unique_ptr<User> User::Create(uint32_t userId, string userName)
{
@@ -17,5 +18,10 @@
: UserId(userId), UserName(userName)
{}
+RevisionFlags User::UserKind() const
+{
+ return RevisionFlags::NamedUser;
+}
+
User::~User()
{}
\ No newline at end of file
diff --git a/Objects/User.h b/Objects/User.h
index 4c57773..b91ce91 100644
--- a/Objects/User.h
+++ b/Objects/User.h
@@ -8,6 +8,8 @@
using std::unique_ptr;
using std::string;
+enum class RevisionFlags : uint8_t;
+
// TODO: create class NamedUser?
class User
{
@@ -19,5 +21,6 @@
uint32_t UserId;
string UserName;
+ virtual RevisionFlags UserKind() const;
virtual ~User();
};
\ No newline at end of file
diff --git a/XmlContributorProcessor.cpp b/XmlContributorProcessor.cpp
index 5d0f305..8651b23 100644
--- a/XmlContributorProcessor.cpp
+++ b/XmlContributorProcessor.cpp
@@ -28,4 +28,5 @@
user = new User(processor.id, processor.userName);
revision->Contributor = shared_ptr<User>(user);
+ revision->Flags |= user->UserKind();
}
\ No newline at end of file
diff --git a/main.cpp b/main.cpp
index 2608b8b..e0113f4 100644
--- a/main.cpp
+++ b/main.cpp
@@ -4,8 +4,10 @@
#include "DumpWriters/StubCurrentWriter.h"
#include "XmlPageProcessor.h"
#include "Dump.h"
+#include "DumpObjects/DumpRevision.h"
using std::cin;
+using std::cout;
class StandardInputStream : public XML::InputStream
{
@@ -27,10 +29,16 @@
elem.Process(handlers, userData);
}
-int main(int argc, const char* argv[])
+void printUsage()
{
- //StandardInputStream stream;
- XML::FileInputStream stream =
XML::FileInputStream("C:\\Users\\Svick\\Downloads\\tenwiki-20130622-pages-meta-history.xml");
+ cout << "Usage:\n";
+ cout << "creating dump: idumps c[reate] source.xml dump.id\n";
+ cout << "reading dump: idumps r[ead] dump.id\n";
+}
+
+void createDump(string inputFileName, string outputFileName)
+{
+ XML::FileInputStream stream = XML::FileInputStream(inputFileName.c_str());
XML::Input input(stream);
@@ -39,11 +47,87 @@
XML::Handler::END
};
- shared_ptr<WritableDump> dump = WritableDump::Create("tmp/test.id");
+ shared_ptr<WritableDump> dump = WritableDump::Create(outputFileName);
StubCurrentWriter writer(dump);
input.Process(handlers, &writer);
dump->WriteIndexes();
+}
+
+void readDump(string dumpFileName)
+{
+ auto dump = WritableDump::Create(dumpFileName);
+
+ int i = 0;
+ for (auto pageInfo : *dump->pageIdIndex)
+ {
+ auto page = DumpPage(dump, pageInfo.second).page;
+
+ cout << page.PageId << ": " << page.Title;
+
+ if (page.RedirectTarget != string())
+ {
+ cout << " -> " << page.RedirectTarget;
+ }
+
+ cout << ", " << page.RevisionIds.size() << " revs\n";
+
+ int j = 0;
+ for (auto revisionId : page.RevisionIds)
+ {
+ auto revision = DumpRevision(dump, revisionId, false).revision;
+
+ cout << " " << revision.RevisionId << " (<- " << revision.ParentId
<< ") " << revision.Timestamp.ToString() << " " <<
revision.Contributor->UserName << "\n";
+ cout << " " << revision.Comment << "\n";
+
+ if (++j >= 5)
+ break;
+ }
+
+ if (++i >= 5)
+ break;
+ }
+}
+
+int main(int argc, const char* argv[])
+{
+ if (argc == 1)
+ {
+ printUsage();
+ return 0;
+ }
+
+ string action = argv[1];
+
+ if (action == "c" || action == "create")
+ {
+ if (argc != 4)
+ {
+ cout << "Invalid number of parameters\n";
+ printUsage();
+ }
+ else
+ {
+ createDump(argv[2], argv[3]);
+ }
+ }
+ else if (action == "r" || action == "read")
+ {
+ if (argc != 3)
+ {
+ cout << "Invalid number of parameters\n";
+ printUsage();
+ }
+ else
+ {
+ readDump(argv[2]);
+ }
+ }
+ else
+ {
+ cout << "Unknown action '" << action << "'\n";
+ printUsage();
+ }
}
\ No newline at end of file
--
To view, visit https://gerrit.wikimedia.org/r/74361
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Icb1dc34aea3b290c3250b0f2db23cfe00a185a06
Gerrit-PatchSet: 1
Gerrit-Project: operations/dumps/incremental
Gerrit-Branch: gsoc
Gerrit-Owner: Petr Onderka <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits