Petr Onderka has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/80579


Change subject: Documentation of command line parameters + other small changes
......................................................................

Documentation of command line parameters + other small changes

Change-Id: I355014afbe2c2a954e559125bd1ec4da56e7f16c
---
M Diff/Changes/RevisionChange.cpp
M DumpException.cpp
M DumpException.h
M DumpKind.cpp
M DumpKind.h
M DumpWriters/DumpWriter.cpp
M README.md
M main.cpp
8 files changed, 148 insertions(+), 33 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/dumps/incremental 
refs/changes/79/80579/1

diff --git a/Diff/Changes/RevisionChange.cpp b/Diff/Changes/RevisionChange.cpp
index 1f469a7..e480b5f 100644
--- a/Diff/Changes/RevisionChange.cpp
+++ b/Diff/Changes/RevisionChange.cpp
@@ -80,8 +80,8 @@
 {
     RevisionChange result(withText);
 
-    ReadValue(stream, result.flags);
     ReadValue(stream, result.revisionChanges.RevisionId);
+    ReadValue(stream, result.flags);
 
     if (HasFlag(result.flags, RevisionChangeFlags::Flags))
         ReadValue(stream, result.revisionChanges.Flags);
@@ -121,8 +121,8 @@
 void RevisionChange::WriteInternal()
 {
     WriteValue(ChangeKind::ChangeRevision);
-    WriteValue(flags);
     WriteValue(revisionChanges.RevisionId);
+    WriteValue(flags);
 
     if (HasFlag(flags, RevisionChangeFlags::Flags))
         WriteValue(revisionChanges.Flags);
@@ -158,8 +158,8 @@
     std::uint32_t result = 0;
 
     result += ValueSize(ChangeKind::ChangeRevision);
-    result += ValueSize(flags);
     result += ValueSize(revisionChanges.RevisionId);
+    result += ValueSize(flags);
 
     if (HasFlag(flags, RevisionChangeFlags::Flags))
         result += ValueSize(revisionChanges.Flags);
diff --git a/DumpException.cpp b/DumpException.cpp
index 239fbb5..761cc94 100644
--- a/DumpException.cpp
+++ b/DumpException.cpp
@@ -4,7 +4,7 @@
     : message(message)
 {}
 
-const char* UserException::what() const throw()
+const char* UserException::what() const NOEXCEPT
 {
     return message.c_str();
 }
diff --git a/DumpException.h b/DumpException.h
index 9aa01db..4b89a9f 100644
--- a/DumpException.h
+++ b/DumpException.h
@@ -3,6 +3,13 @@
 #include <exception>
 #include <string>
 
+// http://stackoverflow.com/a/18387764/41071
+#ifndef _MSC_VER
+#define NOEXCEPT noexcept
+#else
+#define NOEXCEPT
+#endif
+
 class DumpException : public std::exception
 {};
 
@@ -15,7 +22,7 @@
 public:
     UserException(const std::string &message);
 
-    virtual const char* what() const throw() override;
+    virtual const char* what() const NOEXCEPT override;
 };
 
 // user exception that was caused by invalid parameters,
diff --git a/DumpKind.cpp b/DumpKind.cpp
index 0c82f26..a1900ff 100644
--- a/DumpKind.cpp
+++ b/DumpKind.cpp
@@ -34,4 +34,22 @@
 {
     first = first | second;
     return first;
+}
+
+std::ostream& operator<<(std::ostream& stream, DumpKind dumpKind)
+{
+    if (IsStub(dumpKind))
+        stream << "stub";
+    else
+        stream << "pages";
+
+    if (IsCurrent(dumpKind))
+        stream << "-current";
+    else
+        stream << "-history";
+
+    if (IsArticles(dumpKind))
+        stream << "-articles";
+  
+    return stream;
 }
\ No newline at end of file
diff --git a/DumpKind.h b/DumpKind.h
index 3b25f0d..08ff048 100644
--- a/DumpKind.h
+++ b/DumpKind.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <cstdint>
+#include <iostream>
 
 enum class DumpKind : std::uint8_t
 {
@@ -19,4 +20,6 @@
 bool IsArticles(DumpKind dumpKind);
 
 DumpKind operator |(DumpKind first, DumpKind second);
-DumpKind operator |=(DumpKind &first, DumpKind second);
\ No newline at end of file
+DumpKind operator |=(DumpKind &first, DumpKind second);
+
+std::ostream& operator<<(std::ostream& os, DumpKind obj);
\ No newline at end of file
diff --git a/DumpWriters/DumpWriter.cpp b/DumpWriters/DumpWriter.cpp
index 9ff4c5b..2ac4611 100644
--- a/DumpWriters/DumpWriter.cpp
+++ b/DumpWriters/DumpWriter.cpp
@@ -1,8 +1,9 @@
+#include <algorithm>
 #include "DumpWriter.h"
 #include "../DumpObjects/DumpRevision.h"
 #include "../CollectionHelpers.h"
 #include "../Indexes/Index.h"
-#include <algorithm>
+#include "../format.h"
 
 DumpWriter::DumpWriter(std::shared_ptr<WritableDump> dump, bool withText, 
std::unique_ptr<DiffWriter> diffWriter)
     : dump(dump), withText(withText), diffWriter(std::move(diffWriter))
@@ -85,8 +86,18 @@
     if (withText)
         dumpKind |= DumpKind::Pages;
 
-    dump->fileHeader.Kind = dumpKind;
-    dump->fileHeader.Write();
+    // empty name means it's a new dump
+    if (dump->siteInfo->name.empty())
+    {
+        dump->fileHeader.Kind = dumpKind;
+        dump->fileHeader.Write();
+    }
+    else
+    {
+        if (dump->fileHeader.Kind != dumpKind)
+            throw UserException(str(fmt::Format(
+                "The specified dump kind ({0}) is not the same as the kind of 
the dump ({1}).") << dumpKind << dump->fileHeader.Kind));
+    }
 }
 
 void DumpWriter::EndDump()
diff --git a/README.md b/README.md
index 81c89dd..0cf68c3 100644
--- a/README.md
+++ b/README.md
@@ -1,8 +1,8 @@
 [Incremental dumps][1] are an improved format for dumps of content from 
WikiMedia wikis.
 
-== Compiling ==
+## Compiling
 
-=== Linux ===
+### Linux
 
 You will need cmake 2.8 and gcc 4.8 (gcc 4.7 could work too, I haven't tested 
that).
 To compile this project, run:
@@ -10,30 +10,106 @@
     cmake .
     make
 
-=== Windows ===
+### Windows
 
 Open the solution in Visual Studio 2012 (Visual Studio Express 2012 could work 
too, I haven't tested that) and build it.
 
-== Running the application ==
+## Running the application
 
-Compiling produces a command-line application `idumps`.
+Compiling produces a command-line application `idumps` (or `idumps.exe` on 
Windows).
 
-It can be used to convert pages-history XML dump to one or more types of 
incremental dumps
-and also to convert an incremental dump back to XML.
-Running it wihtout parameters produces a short usage message, explaining the 
meaning of parameters.
+Running it wihtout parameters produces a short usage message, explaining 
possible actions and the meaning of their parameters.
 
-When creating a dump with a file name that already exists, the program tried 
to use the existing file.
-This can cause problems, so if an error happens, delete the dump file and try 
running the command again
+### For dump readers
 
-=== Examples ===
+The following actions are useful for normal users of dumps, i.e. those who 
want to download and process them, not to create their own dumps.
+In the future, there might be a special version of `idumps` that contains only 
these actions.
 
-    idumps c tenwiki-20130622-pages-meta-history.xml sh sh.id pca pca.id
+#### Reading a dump
 
-Creates stub-history dump `sh.id` and pages-articles dump `pca.id` from the 
XML dump of tenwiki.
+The `r` (or `read`) action is for reading a dump and converting it to XML.
+It takes two parameters: path to the dump file and path to the generated XML 
file.
+If the XML file already exists, it will be overwritten.
 
-    idumps r sh.id sh.xml
+Example:
 
-Creates `sh.xml`, XML version of the dump `sh.id`.
+    idumps r dump.id dump.xml
 
+#### Applying a diff dump
+
+The `a` (or `apply`) action is for applying diff dump to existing normal dump 
to update it.
+It takes two parameters: path to the dump file and path to the diff dump file.
+If the diff cannot be applied to the dump (because it's for different wiki, or 
for a dump with different timestamp), an error is printed.
+Applying the diff also updates the timestamp of the dump.
+
+Example:
+
+    idumps a dump.id diff.dd
+
+### For dump creators
+
+The following actions are for dump creators, usually those who want to create 
a dump of their wiki.
+
+#### Updating a dump from wiki
+
+The `u` (or `update`) action is for updating or creating a dump based on 
communication with a wiki.
+The first five parameters are:
+
+* Name of the wiki (e.g. `enwiki`). This has to match with the name that's 
already in the dump, if it exists.
+
+* New timestamp for the updated dump. This has to be different than the 
current timestamp in the dump, if it exists.
+
+ Timestamps are used in diff dumps, to make sure only the right diff can be 
applied to the dump.
+
+* Path to the PHP interpreter. This can be simply `php`, if `php` is in `PATH`.
+
+* Path to the dumpBackup maintenance script, possibly with optional parameters 
(e.g. `--report`).
+
+ The script will be called with additional parameters `--full --stub`, which 
are added automatically.
+
+* Path to the fetchText maintenance script.
+
+The remaining parameters specify what dumps to update/create, each group of 
two or three parameters represents separate dump.
+
+The parameters in each group are:
+
+* Dump specification.
+
+* Path to the dump file.
+
+* Path to the created diff dump (if diff was included in the specification).
+
+ If the file already exists, it will be overwritten.
+
+The specification is a 2- to 4-letter string that describes what kind of dump 
to create:
+
+* 1. letter: `p` for pages dump or `s` for stub dump
+* 2. letter: `h` for history dump or `c` for current dump
+* 3. optional letter: `a` for articles dump (without talk and User namespaces)
+* 4. optional letter: `d` to also create diff dump
+
+Example:
+
+    idumps u enwiki 20130823 php "/var/www/maintenance/dumpBackup.php 
--report=10000" /var/www/maintenance/fetchText.php pca pages-articles.id shd 
stub-history.id stub-history.dd
+
+This sets the name of the wiki to `enwiki`, timestamp to `20130823`. The PHP 
interpreter is in `PATH`, MediaWiki is installed in `/var/www` and dumpBackup 
will report each 10000 revisions.
+
+The updated or created dumps are a pages-current-articles dump 
pages-articles.id and a stub-history dump stub-history.id that also has a diff 
dump stub-history.dd.
+
+### Creating a dump from XML
+
+The `c` (or `create`) action is for creating incremental dump based on 
pages-history XML dump.
+
+The parameters are similar as in the `update` action:
+
+* Name of the wiki.
+* Timestamp of the created dump.
+* Path to the source XML dump.
+
+The remaining parameters specify what dumps to creates, just as in `update`.
+
+Example:
+
+    idumps c enwiki 20130823 enwiki-20130823-pages-meta-history.xml sc sc.id
 
 [1]: http://www.mediawiki.org/wiki/User:Svick/Incremental_dumps
diff --git a/main.cpp b/main.cpp
index c575cd0..5a20936 100644
--- a/main.cpp
+++ b/main.cpp
@@ -15,19 +15,19 @@
 void printUsage()
 {
     std::cout << "Usage:\n";
-    std::cout << "creating dump: idumps c[reate] name timestamp source.xml 
spec dump.id ...\n";
+    std::cout << "reading dump: idumps r[ead] dump.id output.xml\n";
+    std::cout << "applying diff: idumps a[pply] dump.id diff.dd\n";
+    std::cout << "updating dump: idumps u[pdate] name new-timestamp phpPath 
dumpBackup fetchText spec dump.id ...\n";
     std::cout << " name is the name of the wiki (e.g. enwiki)\n";
     std::cout << " timestamp identifies this specific dump (it doesn't 
actually have to be a timestamp)\n";
     std::cout << " spec is a 2 to 4-letter string that describes what kind of 
dump to create:\n";
-    std::cout << " 1. letter: p for pages dump or s for stub dump:\n";
-    std::cout << " 2. letter: h for history dump or c for current dump:\n";
-    std::cout << " 3. optional letter: a for articles dump:\n";
-    std::cout << " 4. optional letter: d to also create diff dump:\n";
+    std::cout << " 1. letter: p for pages dump or s for stub dump\n";
+    std::cout << " 2. letter: h for history dump or c for current dump\n";
+    std::cout << " 3. optional letter: a for articles dump\n";
+    std::cout << " 4. optional letter: d to also create diff dump\n";
     std::cout << "  add the path to the diff dump after the path to dump\n";
     std::cout << " example: sh for stub-meta-history, pcad for pages-articles 
with diff dump\n";
-    std::cout << "updating dump: idumps u[pdate] name new-timestamp phpPath 
dumpBackup fetchText spec dump.id ...\n";
-    std::cout << "reading dump: idumps r[ead] dump.id output.xml\n";
-    std::cout << "applying diff: idumps a[pply] dump.id diff.dd\n";
+    std::cout << "creating dump: idumps c[reate] name timestamp source.xml 
spec dump.id ...\n";
 }
 
 std::unique_ptr<IDumpWriter> createWriter(std::queue<std::string> &parameters, 
const std::string &name, const std::string &timestamp)
@@ -193,7 +193,7 @@
 
     exec_stream_t dumpBackupProcess;
     dumpBackupProcess.set_buffer_limit(exec_stream_t::s_out, 8192);
-    dumpBackupProcess.start(phpPath, dumpBackupParameters);
+    dumpBackupProcess.start(phpPath, dumpBackupParameters + " --full --stub");
 
     WrapperInputStream dumpBackupStream(dumpBackupProcess.out(),
         [&]()

-- 
To view, visit https://gerrit.wikimedia.org/r/80579
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I355014afbe2c2a954e559125bd1ec4da56e7f16c
Gerrit-PatchSet: 1
Gerrit-Project: operations/dumps/incremental
Gerrit-Branch: gsoc
Gerrit-Owner: Petr Onderka <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to