Petr Onderka has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/74361


Change subject: reading dump; command-line parameters
......................................................................

reading dump; command-line parameters

Change-Id: Icb1dc34aea3b290c3250b0f2db23cfe00a185a06
---
M DumpObjects/DumpIpV4User.cpp
M DumpObjects/DumpIpV4User.h
M DumpObjects/DumpNamedUser.cpp
M DumpObjects/DumpNamedUser.h
M DumpObjects/DumpPage.cpp
M DumpObjects/DumpPage.h
M DumpObjects/DumpUser.h
M DumpObjects/FileHeader.cpp
M DumpObjects/FileHeader.h
M Incremental dumps.sln
M Incremental dumps.vcxproj
M Objects/IpV4User.cpp
M Objects/IpV4User.h
M Objects/Revision.h
M Objects/User.cpp
M Objects/User.h
M XmlContributorProcessor.cpp
M main.cpp
18 files changed, 136 insertions(+), 29 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/dumps/incremental 
refs/changes/61/74361/1

diff --git a/DumpObjects/DumpIpV4User.cpp b/DumpObjects/DumpIpV4User.cpp
index 89a9321..b805426 100644
--- a/DumpObjects/DumpIpV4User.cpp
+++ b/DumpObjects/DumpIpV4User.cpp
@@ -24,9 +24,4 @@
 uint32_t DumpIpV4User::NewLength() const
 {
     return ValueSize(user->Address);
-}
-
-RevisionFlags DumpIpV4User::UserKind() const
-{
-    return RevisionFlags::IpV4User;
 }
\ No newline at end of file
diff --git a/DumpObjects/DumpIpV4User.h b/DumpObjects/DumpIpV4User.h
index 8ac527b..7bcf5b1 100644
--- a/DumpObjects/DumpIpV4User.h
+++ b/DumpObjects/DumpIpV4User.h
@@ -16,5 +16,4 @@
 
     virtual shared_ptr<User> GetUser() const override;
     virtual uint32_t NewLength() const override;
-    virtual RevisionFlags UserKind() const override;
 };
\ No newline at end of file
diff --git a/DumpObjects/DumpNamedUser.cpp b/DumpObjects/DumpNamedUser.cpp
index 457e86d..e18ca95 100644
--- a/DumpObjects/DumpNamedUser.cpp
+++ b/DumpObjects/DumpNamedUser.cpp
@@ -26,9 +26,4 @@
 uint32_t DumpNamedUser::NewLength() const
 {
     return ValueSize(user->UserId) + ValueSize(user->UserName);
-}
-
-RevisionFlags DumpNamedUser::UserKind() const
-{
-    return RevisionFlags::NamedUser;
 }
\ No newline at end of file
diff --git a/DumpObjects/DumpNamedUser.h b/DumpObjects/DumpNamedUser.h
index e33f509..0f3c1b2 100644
--- a/DumpObjects/DumpNamedUser.h
+++ b/DumpObjects/DumpNamedUser.h
@@ -15,5 +15,4 @@
 
     virtual shared_ptr<User> GetUser() const override;
     virtual uint32_t NewLength() const override;
-    virtual RevisionFlags UserKind() const override;
 };
\ No newline at end of file
diff --git a/DumpObjects/DumpPage.cpp b/DumpObjects/DumpPage.cpp
index 8c76e23..d151466 100644
--- a/DumpObjects/DumpPage.cpp
+++ b/DumpObjects/DumpPage.cpp
@@ -70,4 +70,14 @@
     : DumpObject(dump), page()
 {
     Load(pageId);
+}
+
+DumpPage::DumpPage(weak_ptr<WritableDump> dump, Offset offset)
+    : DumpObject(dump), page()
+{
+    auto dumpRef = dump.lock();
+
+    page = Read(dumpRef, offset);
+    savedOffset = offset.value;
+    savedLength = NewLength();
 }
\ No newline at end of file
diff --git a/DumpObjects/DumpPage.h b/DumpObjects/DumpPage.h
index e8ccf33..9fbd53d 100644
--- a/DumpObjects/DumpPage.h
+++ b/DumpObjects/DumpPage.h
@@ -18,6 +18,7 @@
     Page page;
 
     DumpPage(weak_ptr<WritableDump> dump, uint32_t pageId);
+    DumpPage(weak_ptr<WritableDump> dump, Offset offset);
 
     virtual uint32_t NewLength() const override;
 };
\ No newline at end of file
diff --git a/DumpObjects/DumpUser.h b/DumpObjects/DumpUser.h
index 1bcb20d..1742702 100644
--- a/DumpObjects/DumpUser.h
+++ b/DumpObjects/DumpUser.h
@@ -21,5 +21,4 @@
     void Write(ostream *stream);
     virtual shared_ptr<User> GetUser() const = 0;
     virtual uint32_t NewLength() const override = 0;
-    virtual RevisionFlags UserKind() const = 0;
 };
\ No newline at end of file
diff --git a/DumpObjects/FileHeader.cpp b/DumpObjects/FileHeader.cpp
index 0f7c98c..0f683da 100644
--- a/DumpObjects/FileHeader.cpp
+++ b/DumpObjects/FileHeader.cpp
@@ -2,8 +2,8 @@
 #include "../Dump.h"
 #include "../DumpException.h"
 
-FileHeader::FileHeader(Offset fileEnd, Offset pageIdIndexRoot, Offset 
freeSpaceIndexRoot, weak_ptr<WritableDump> dump)
-    : DumpObject(dump), FileEnd(fileEnd), PageIdIndexRoot(pageIdIndexRoot), 
FreeSpaceIndexRoot(freeSpaceIndexRoot)
+FileHeader::FileHeader(Offset fileEnd, Offset pageIdIndexRoot, Offset 
revisionIdIndexRoot, Offset freeSpaceIndexRoot, weak_ptr<WritableDump> dump)
+    : DumpObject(dump), FileEnd(fileEnd), PageIdIndexRoot(pageIdIndexRoot), 
RevisionIdIndexRoot(revisionIdIndexRoot), FreeSpaceIndexRoot(freeSpaceIndexRoot)
 {}
 
 void FileHeader::WriteInternal()
@@ -14,6 +14,7 @@
 
     FileEnd.Write(*stream);
     PageIdIndexRoot.Write(*stream);
+    RevisionIdIndexRoot.Write(*stream);
     FreeSpaceIndexRoot.Write(*stream);
 }
 
@@ -40,16 +41,22 @@
 
     Offset fileEnd = Offset::Read(stream);
     Offset pageIdIndexRoot = Offset::Read(stream);
+    Offset revisionIdIndexRoot = Offset::Read(stream);
     Offset freeSpaceIndexRoot = Offset::Read(stream);
 
-    return FileHeader(fileEnd, pageIdIndexRoot, freeSpaceIndexRoot, 
dump.GetSelf());
+    return FileHeader(fileEnd, pageIdIndexRoot, revisionIdIndexRoot, 
freeSpaceIndexRoot, dump.GetSelf());
+}
+
+uint32_t FileHeader::Length()
+{
+    return 4 + 2 * DumpTraits<uint8_t>::DumpSize() + 4 * 
DumpTraits<Offset>::DumpSize();
 }
 
 uint32_t FileHeader::NewLength() const
 {
-    return 4 + 2 * DumpTraits<uint8_t>::DumpSize() + 3 * 
DumpTraits<Offset>::DumpSize();
+    return Length();
 }
 
 FileHeader::FileHeader(weak_ptr<WritableDump> dump)
-    : DumpObject(dump), FileEnd(6 + 3 * 6), PageIdIndexRoot(0), 
FreeSpaceIndexRoot(0)
+    : DumpObject(dump), FileEnd(Length()), PageIdIndexRoot(0), 
FreeSpaceIndexRoot(0)
 {}
\ No newline at end of file
diff --git a/DumpObjects/FileHeader.h b/DumpObjects/FileHeader.h
index 7de0031..7a50c29 100644
--- a/DumpObjects/FileHeader.h
+++ b/DumpObjects/FileHeader.h
@@ -11,9 +11,11 @@
 class FileHeader : public DumpObject
 {
 private:
+    static uint32_t Length();
+
     ostream* stream;
 
-    FileHeader(Offset fileEnd, Offset pageIdIndexRoot, Offset 
freeSpaceIndexRoot, weak_ptr<WritableDump> dump = weak_ptr<WritableDump>());
+    FileHeader(Offset fileEnd, Offset pageIdIndexRoot, Offset 
revisionIdIndexRoot, Offset freeSpaceIndexRoot, weak_ptr<WritableDump> dump = 
weak_ptr<WritableDump>());
 protected:
     void WriteInternal();
 public:
diff --git a/Incremental dumps.sln b/Incremental dumps.sln
index fac3077..73116d2 100644
--- a/Incremental dumps.sln
+++ b/Incremental dumps.sln
@@ -17,7 +17,4 @@
        GlobalSection(SolutionProperties) = preSolution
                HideSolutionNode = FALSE
        EndGlobalSection
-       GlobalSection(Performance) = preSolution
-               HasPerformanceSessions = true
-       EndGlobalSection
 EndGlobal
diff --git a/Incremental dumps.vcxproj b/Incremental dumps.vcxproj
index e99cec4..f32f7e0 100644
--- a/Incremental dumps.vcxproj
+++ b/Incremental dumps.vcxproj
@@ -41,6 +41,7 @@
   <PropertyGroup Label="UserMacros" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <LinkIncremental>true</LinkIncremental>
+    <TargetName>idumps</TargetName>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <LinkIncremental>false</LinkIncremental>
diff --git a/Objects/IpV4User.cpp b/Objects/IpV4User.cpp
index 4d13f4d..b6af351 100644
--- a/Objects/IpV4User.cpp
+++ b/Objects/IpV4User.cpp
@@ -2,6 +2,7 @@
 #include <sstream>
 #include "../StringHelpers.h"
 #include "../DumpException.h"
+#include "Revision.h"
 
 using std::stoi;
 using std::ostringstream;
@@ -58,4 +59,9 @@
 
 IpV4User::IpV4User(uint32_t parsedAddress)
     : User(0, AddressToString(parsedAddress)), Address(parsedAddress)
-{}
\ No newline at end of file
+{}
+
+RevisionFlags IpV4User::UserKind() const
+{
+    return RevisionFlags::IpV4User;
+}
\ No newline at end of file
diff --git a/Objects/IpV4User.h b/Objects/IpV4User.h
index 2822b4a..b7a234d 100644
--- a/Objects/IpV4User.h
+++ b/Objects/IpV4User.h
@@ -13,4 +13,6 @@
     IpV4User(uint32_t parsedAddress);
 
     uint32_t Address;
+
+    virtual RevisionFlags UserKind() const override;
 };
\ No newline at end of file
diff --git a/Objects/Revision.h b/Objects/Revision.h
index 50359d0..ab7954b 100644
--- a/Objects/Revision.h
+++ b/Objects/Revision.h
@@ -14,8 +14,8 @@
 
     Minor     = 0x01,
 
-    NamedUser = 0x11,
-    IpV4User  = 0x12
+    NamedUser = 0x10,
+    IpV4User  = 0x20
 };
 
 RevisionFlags operator |(RevisionFlags first, RevisionFlags second);
diff --git a/Objects/User.cpp b/Objects/User.cpp
index 8399f8e..83ef349 100644
--- a/Objects/User.cpp
+++ b/Objects/User.cpp
@@ -1,5 +1,6 @@
 #include "User.h"
 #include "IpV4User.h"
+#include "Revision.h"
 
 unique_ptr<User> User::Create(uint32_t userId, string userName)
 {
@@ -17,5 +18,10 @@
     : UserId(userId), UserName(userName)
 {}
 
+RevisionFlags User::UserKind() const
+{
+    return RevisionFlags::NamedUser;
+}
+
 User::~User()
 {}
\ No newline at end of file
diff --git a/Objects/User.h b/Objects/User.h
index 4c57773..b91ce91 100644
--- a/Objects/User.h
+++ b/Objects/User.h
@@ -8,6 +8,8 @@
 using std::unique_ptr;
 using std::string;
 
+enum class RevisionFlags : uint8_t;
+
 // TODO: create class NamedUser?
 class User
 {
@@ -19,5 +21,6 @@
     uint32_t UserId;
     string UserName;
 
+    virtual RevisionFlags UserKind() const;
     virtual ~User();
 };
\ No newline at end of file
diff --git a/XmlContributorProcessor.cpp b/XmlContributorProcessor.cpp
index 5d0f305..8651b23 100644
--- a/XmlContributorProcessor.cpp
+++ b/XmlContributorProcessor.cpp
@@ -28,4 +28,5 @@
         user = new User(processor.id, processor.userName);
 
     revision->Contributor = shared_ptr<User>(user);
+    revision->Flags |= user->UserKind();
 }
\ No newline at end of file
diff --git a/main.cpp b/main.cpp
index 2608b8b..e0113f4 100644
--- a/main.cpp
+++ b/main.cpp
@@ -4,8 +4,10 @@
 #include "DumpWriters/StubCurrentWriter.h"
 #include "XmlPageProcessor.h"
 #include "Dump.h"
+#include "DumpObjects/DumpRevision.h"
 
 using std::cin;
+using std::cout;
 
 class StandardInputStream : public XML::InputStream
 {
@@ -27,10 +29,16 @@
     elem.Process(handlers, userData);
 }
 
-int main(int argc, const char* argv[])
+void printUsage()
 {
-    //StandardInputStream stream;
-    XML::FileInputStream stream = 
XML::FileInputStream("C:\\Users\\Svick\\Downloads\\tenwiki-20130622-pages-meta-history.xml");
+    cout << "Usage:\n";
+    cout << "creating dump: idumps c[reate] source.xml dump.id\n";
+    cout << "reading dump: idumps r[ead] dump.id\n";
+}
+
+void createDump(string inputFileName, string outputFileName)
+{
+    XML::FileInputStream stream = XML::FileInputStream(inputFileName.c_str());
 
     XML::Input input(stream);
 
@@ -39,11 +47,87 @@
         XML::Handler::END
     };
 
-    shared_ptr<WritableDump> dump = WritableDump::Create("tmp/test.id");
+    shared_ptr<WritableDump> dump = WritableDump::Create(outputFileName);
 
     StubCurrentWriter writer(dump);
 
     input.Process(handlers, &writer);
 
     dump->WriteIndexes();
+}
+
+void readDump(string dumpFileName)
+{
+    auto dump = WritableDump::Create(dumpFileName);
+
+    int i = 0;
+    for (auto pageInfo : *dump->pageIdIndex)
+    {
+        auto page = DumpPage(dump, pageInfo.second).page;
+
+        cout << page.PageId << ": " << page.Title;
+
+        if (page.RedirectTarget != string())
+        {
+            cout << " -> " << page.RedirectTarget;
+        }
+
+        cout << ", " << page.RevisionIds.size() << " revs\n";
+
+        int j = 0;
+        for (auto revisionId : page.RevisionIds)
+        {
+            auto revision = DumpRevision(dump, revisionId, false).revision;
+
+            cout << " " << revision.RevisionId << " (<- " << revision.ParentId 
<< ") " << revision.Timestamp.ToString() << " " << 
revision.Contributor->UserName << "\n";
+            cout << "  " << revision.Comment << "\n";
+
+            if (++j >= 5)
+                break;
+        }
+
+        if (++i >= 5)
+            break;
+    }
+}
+
+int main(int argc, const char* argv[])
+{
+    if (argc == 1)
+    {
+        printUsage();
+        return 0;
+    }
+
+    string action = argv[1];
+
+    if (action == "c" || action == "create")
+    {
+        if (argc != 4)
+        {
+            cout << "Invalid number of parameters\n";
+            printUsage();
+        }
+        else
+        {
+            createDump(argv[2], argv[3]);
+        }
+    }
+    else if (action == "r" || action == "read")
+    {
+        if (argc != 3)
+        {
+            cout << "Invalid number of parameters\n";
+            printUsage();
+        }
+        else
+        {
+            readDump(argv[2]);
+        }
+    }
+    else
+    {
+        cout << "Unknown action '" << action << "'\n";
+        printUsage();
+    }
 }
\ No newline at end of file

-- 
To view, visit https://gerrit.wikimedia.org/r/74361
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Icb1dc34aea3b290c3250b0f2db23cfe00a185a06
Gerrit-PatchSet: 1
Gerrit-Project: operations/dumps/incremental
Gerrit-Branch: gsoc
Gerrit-Owner: Petr Onderka <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to