Petr Onderka has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/84363


Change subject: Simple progress reporting for idumps create
......................................................................

Simple progress reporting for idumps create

Change-Id: Iae928f67eb1b958aea64e984b326801aeab05461
---
M CMakeLists.txt
M Incremental dumps.vcxproj
M main.cpp
3 files changed, 31 insertions(+), 7 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/dumps/incremental 
refs/changes/63/84363/1

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6f353d6..c2c8af4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -54,6 +54,7 @@
   DumpWriters/CompositeWriter.cpp
   DumpWriters/CurrentWriterWrapper.cpp
   DumpWriters/DumpWriter.cpp
+  DumpWriters/ProgressWriterWrapper.cpp
   DumpWriters/WriterWrapper.cpp
   FetchText.cpp
   format.cc
@@ -138,6 +139,7 @@
   DumpWriters/CompositeWriter.h
   DumpWriters/CurrentWriterWrapper.h
   DumpWriters/DumpWriter.h
+  DumpWriters/ProgressWriterWrapper.h
   DumpWriters/IDumpWriter.h
   DumpWriters/WriterWrapper.h
   FetchText.h
diff --git a/Incremental dumps.vcxproj b/Incremental dumps.vcxproj
index 0c176c6..df6e30d 100644
--- a/Incremental dumps.vcxproj
+++ b/Incremental dumps.vcxproj
@@ -134,6 +134,7 @@
     <ClInclude Include="DumpWriters\ArticlesWriterWrapper.h" />
     <ClInclude Include="DumpWriters\CompositeWriter.h" />
     <ClInclude Include="DumpWriters\DumpWriter.h" />
+    <ClInclude Include="DumpWriters\ProgressWriterWrapper.h" />
     <ClInclude Include="DumpWriters\WriterWrapper.h" />
     <ClInclude Include="FetchText.h" />
     <ClInclude Include="format.h" />
@@ -154,6 +155,7 @@
     <ClCompile Include="DumpWriters\ArticlesWriterWrapper.cpp" />
     <ClCompile Include="DumpWriters\CompositeWriter.cpp" />
     <ClCompile Include="DumpWriters\DumpWriter.cpp" />
+    <ClCompile Include="DumpWriters\ProgressWriterWrapper.cpp" />
     <ClCompile Include="DumpWriters\WriterWrapper.cpp" />
     <ClCompile Include="FetchText.cpp" />
     <ClCompile Include="format.cc" />
diff --git a/main.cpp b/main.cpp
index ab3962b..affbd95 100644
--- a/main.cpp
+++ b/main.cpp
@@ -1,9 +1,11 @@
 #include <iostream>
+#include <fstream>
 #include <queue>
 #include "DumpWriters/CompositeWriter.h"
 #include "DumpWriters/DumpWriter.h"
 #include "DumpWriters/CurrentWriterWrapper.h"
 #include "DumpWriters/ArticlesWriterWrapper.h"
+#include "DumpWriters/ProgressWriterWrapper.h"
 #include "Diff/DiffReader.h"
 #include "XmlInput/XmlMediawikiProcessor.h"
 #include "XmlInput/WrapperInputStream.h"
@@ -144,6 +146,26 @@
         throw ParametersException("The timestamp can't be empty.");
 }
 
+void createDumpCore(ProgressWriterWrapper& writer, std::istream& inputStream)
+{
+    std::uint64_t i = 0;
+    std::function<void ()> offsetReportingFunction = [&]()
+    {
+        if (i % 100 == 0)
+            writer.ReportOffset(inputStream.tellg());
+
+        i++;
+    };
+
+    writer.SetDumpKind(DumpKind::None);
+
+    WrapperInputStream wrapperStream(inputStream, offsetReportingFunction);
+
+    XmlMediawikiProcessor::Process(&writer, wrapperStream);
+
+    writer.Complete();
+}
+
 void createDump(std::queue<std::string> &parameters)
 {
     if (parameters.size() < 3 + 2)
@@ -157,21 +179,19 @@
 
     auto writers = createWriters(parameters, name, timestamp);
 
-    CompositeWriter writer(writers);
+    std::unique_ptr<CompositeWriter> writer(new CompositeWriter(writers));
 
-    writer.SetDumpKind(DumpKind::None);
+    ProgressWriterWrapper progressWriter(std::move(writer), 10000);
 
     if (inputFileName == "-")
     {
-        auto stream = WrapperInputStream(std::cin);
-        XmlMediawikiProcessor::Process(&writer, stream);
+        createDumpCore(progressWriter, std::cin);
     }
     else
     {
-        XmlMediawikiProcessor::Process(&writer, inputFileName);
+        std::ifstream stream(inputFileName, std::ios::binary);
+        createDumpCore(progressWriter, stream);
     }
-
-    writer.Complete();
 }
 
 void updateDump(std::queue<std::string> &parameters)

-- 
To view, visit https://gerrit.wikimedia.org/r/84363
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Iae928f67eb1b958aea64e984b326801aeab05461
Gerrit-PatchSet: 1
Gerrit-Project: operations/dumps/incremental
Gerrit-Branch: gsoc
Gerrit-Owner: Petr Onderka <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to