[MediaWiki-commits] [Gerrit] operations/dumps[master]: make reporting of file sizes for dump steps in progress work...

ArielGlenn (Code Review) Tue, 26 Dec 2017 03:04:31 -0800

ArielGlenn has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/400226 )


Change subject: make reporting of file sizes for dump steps in progress work 
again
......................................................................


make reporting of file sizes for dump steps in progress work again

This was probably broken when we introduced writing files with
a special extension and then moving them into place once the
run completes successfully.

Bug: T183694
Change-Id: Ib4bc9f6a41f31431e5642a3f7f7415bd2de38ea8
---
M xmldumps-backup/dumps/apijobs.py
M xmldumps-backup/dumps/fileutils.py
M xmldumps-backup/dumps/flowjob.py
M xmldumps-backup/dumps/jobs.py
M xmldumps-backup/dumps/recombinejobs.py
M xmldumps-backup/dumps/recompressjobs.py
M xmldumps-backup/dumps/runnerutils.py
M xmldumps-backup/dumps/tablesjobs.py
M xmldumps-backup/dumps/xmlcontentjobs.py
M xmldumps-backup/dumps/xmljobs.py
10 files changed, 51 insertions(+), 33 deletions(-)

Approvals:
  ArielGlenn: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/xmldumps-backup/dumps/apijobs.py b/xmldumps-backup/dumps/apijobs.py
index 3991add..534ca99 100644
--- a/xmldumps-backup/dumps/apijobs.py
+++ b/xmldumps-backup/dumps/apijobs.py
@@ -1,5 +1,6 @@
 import time
 from dumps.exceptions import BackupError
+from dumps.fileutils import DumpFilename
 from dumps.jobs import Dump
 
 
@@ -30,11 +31,11 @@
         commands = self.build_command(runner)
         if runner.wiki.is_private():
             command_series = runner.get_save_command_series(
-                commands, self.get_inprogress_name(
+                commands, DumpFilename.get_inprogress_name(
                     runner.dump_dir.filename_private_path(output_dfname)))
         else:
             command_series = runner.get_save_command_series(
-                commands, self.get_inprogress_name(
+                commands, DumpFilename.get_inprogress_name(
                     runner.dump_dir.filename_public_path(output_dfname)))
         self.setup_command_info(runner, command_series, [output_dfname])
 
diff --git a/xmldumps-backup/dumps/fileutils.py 
b/xmldumps-backup/dumps/fileutils.py
index a264ece..e6b4b67 100644
--- a/xmldumps-backup/dumps/fileutils.py
+++ b/xmldumps-backup/dumps/fileutils.py
@@ -165,6 +165,8 @@
     partnum_int       part number as int
     """
 
+    INPROG = ".inprog"  # extension for dump output files that are in progress 
(not fully written)
+
     @staticmethod
     def make_checkpoint_string(first_page_id, last_page_id):
         if first_page_id is not None and last_page_id is not None:
@@ -172,6 +174,10 @@
         else:
             return None
 
+    @staticmethod
+    def get_inprogress_name(filename):
+        return filename + DumpFilename.INPROG
+
     def __init__(self, wiki, date=None, dump_name=None, filetype=None,
                  ext=None, partnum=None, checkpoint=None, temp=False):
         """Constructor.  Arguments: the dump name as it should appear in the 
filename,
diff --git a/xmldumps-backup/dumps/flowjob.py b/xmldumps-backup/dumps/flowjob.py
index b875101..f1de495 100644
--- a/xmldumps-backup/dumps/flowjob.py
+++ b/xmldumps-backup/dumps/flowjob.py
@@ -5,6 +5,7 @@
 import os
 from dumps.exceptions import BackupError
 from dumps.utils import MultiVersion
+from dumps.fileutils import DumpFilename
 from dumps.jobs import Dump
 
 
@@ -45,7 +46,7 @@
         command.extend(script_command)
         command.extend(["--wiki=%s" % runner.db_name,
                         "--current", "--report=1000",
-                        "--output=bzip2:%s" % 
self.get_inprogress_name(flow_output_fpath)])
+                        "--output=bzip2:%s" % 
DumpFilename.get_inprogress_name(flow_output_fpath)])
         if self.history:
             command.append("--full")
         pipeline = [command]
diff --git a/xmldumps-backup/dumps/jobs.py b/xmldumps-backup/dumps/jobs.py
index 8ebbbf1..256056b 100644
--- a/xmldumps-backup/dumps/jobs.py
+++ b/xmldumps-backup/dumps/jobs.py
@@ -54,8 +54,6 @@
 
 
 class Dump(object):
-    INPROG = ".inprog"  # extension for dump output files that are in progress 
(not fully written)
-
     def __init__(self, name, desc, verbose=False):
         self._desc = desc
         self.verbose = verbose
@@ -84,14 +82,12 @@
         if not hasattr(self, '_parts'):
             self._parts = False
 
-    def get_inprogress_name(self, filename):
-        return filename + self.INPROG
-
     def setup_command_info(self, runner, command_series, output_dfnames, 
output_dir=None):
         command_info = {}
         command_info['runner'] = runner
         command_info['series'] = command_series
-        command_info['output_files'] = [dfname.filename + self.INPROG for 
dfname in output_dfnames]
+        command_info['output_files'] = [dfname.filename + DumpFilename.INPROG
+                                        for dfname in output_dfnames]
         if output_dir is not None:
             command_info['output_dir'] = output_dir
         else:
@@ -239,12 +235,12 @@
         file_truncated = True
         if runner.wiki.is_private():
             dcontents = DumpContents(runner.wiki,
-                                     self.get_inprogress_name(
+                                     DumpFilename.get_inprogress_name(
                                          
runner.dump_dir.filename_private_path(dfname)),
                                      dfname)
         else:
             dcontents = DumpContents(runner.wiki,
-                                     self.get_inprogress_name(
+                                     DumpFilename.get_inprogress_name(
                                          
runner.dump_dir.filename_public_path(dfname)),
                                      dfname)
         if exists(dcontents.filename):
@@ -319,10 +315,11 @@
                 if not commands['output_files']:
                     return
                 for inprogress_filename in commands['output_files']:
-                    if not inprogress_filename.endswith(self.INPROG):
+                    if not inprogress_filename.endswith(DumpFilename.INPROG):
                         continue
                     final_dfname = DumpFilename(commands['runner'].wiki)
-                    final_dfname.new_from_filename(inprogress_filename[:-1 * 
len(self.INPROG)])
+                    final_dfname.new_from_filename(
+                        inprogress_filename[:-1 * len(DumpFilename.INPROG)])
 
                     in_progress_path = os.path.join(commands['output_dir'], 
inprogress_filename)
                     final_path = os.path.join(commands['output_dir'], 
final_dfname.filename)
@@ -347,10 +344,10 @@
             os.remove(dump_dir.filename_public_path(dfname))
         elif exists(dump_dir.filename_private_path(dfname)):
             os.remove(dump_dir.filename_private_path(dfname))
-        if exists(dump_dir.filename_public_path(dfname) + self.INPROG):
-            os.remove(dump_dir.filename_public_path(dfname) + self.INPROG)
-        elif exists(dump_dir.filename_private_path(dfname) + self.INPROG):
-            os.remove(dump_dir.filename_private_path(dfname) + self.INPROG)
+        if exists(dump_dir.filename_public_path(dfname) + DumpFilename.INPROG):
+            os.remove(dump_dir.filename_public_path(dfname) + 
DumpFilename.INPROG)
+        elif exists(dump_dir.filename_private_path(dfname) + 
DumpFilename.INPROG):
+            os.remove(dump_dir.filename_private_path(dfname) + 
DumpFilename.INPROG)
 
     def cleanup_old_files(self, dump_dir, runner):
         if "cleanup_old_files" in runner.enabled:
diff --git a/xmldumps-backup/dumps/recombinejobs.py 
b/xmldumps-backup/dumps/recombinejobs.py
index 12c49e5..6d1f0a1 100644
--- a/xmldumps-backup/dumps/recombinejobs.py
+++ b/xmldumps-backup/dumps/recombinejobs.py
@@ -7,6 +7,7 @@
 import signal
 from dumps.exceptions import BackupError
 from dumps.jobs import Dump
+from dumps.fileutils import DumpFilename
 from dumps.CommandManagement import CommandPipeline
 
 
@@ -74,7 +75,7 @@
             recombines.append(recombine)
         recombine_command_string = ("(" + ";".join(recombines) + ")" + "|" +
                                     "%s %s" % (compression_command,
-                                               
self.get_inprogress_name(output_filename)))
+                                               
DumpFilename.get_inprogress_name(output_filename)))
         return recombine_command_string
 
 
diff --git a/xmldumps-backup/dumps/recompressjobs.py 
b/xmldumps-backup/dumps/recompressjobs.py
index 90c22c8..49ca8aa 100644
--- a/xmldumps-backup/dumps/recompressjobs.py
+++ b/xmldumps-backup/dumps/recompressjobs.py
@@ -110,8 +110,8 @@
             infilepath = runner.dump_dir.filename_public_path(input_dfname)
         command_pipe = [["%s -dc %s | %s --pagesperstream 100 --buildindex %s 
> %s" %
                          (self.wiki.config.bzip2, infilepath, 
self.wiki.config.recompressxml,
-                          self.get_inprogress_name(outfilepath_index),
-                          self.get_inprogress_name(outfilepath))]]
+                          DumpFilename.get_inprogress_name(outfilepath_index),
+                          DumpFilename.get_inprogress_name(outfilepath))]]
         return [command_pipe]
 
     def run(self, runner):
@@ -350,7 +350,7 @@
             command_pipe = [["%s -dc %s | %s a -mx=4 -si %s" %
                              (self.wiki.config.bzip2, infilepath,
                               self.wiki.config.sevenzip,
-                              self.get_inprogress_name(outfilepath))]]
+                              DumpFilename.get_inprogress_name(outfilepath))]]
             command_series.append(command_pipe)
         return command_series
 
diff --git a/xmldumps-backup/dumps/runnerutils.py 
b/xmldumps-backup/dumps/runnerutils.py
index 4688900..10b0295 100644
--- a/xmldumps-backup/dumps/runnerutils.py
+++ b/xmldumps-backup/dumps/runnerutils.py
@@ -272,9 +272,18 @@
             status ("in-progress", "missing", ...)
         """
         filename = dump_dir.filename_public_path(dfname)
+        size = None
         if exists(filename):
             size = os.path.getsize(filename)
-        else:
+        elif item_status == "in-progress":
+            # note that because multiple files may be produced for a single 
dump
+            # job, some may be complete while others are still in progress.
+            # therefore we check the normal name first, falling back to the
+            # inprogress name.
+            filename = filename + DumpFilename.INPROG
+            if exists(filename):
+                size = os.path.getsize(filename)
+        if size is None:
             item_status = "missing"
             size = 0
         pretty_size = FileUtils.pretty_size(size)
diff --git a/xmldumps-backup/dumps/tablesjobs.py 
b/xmldumps-backup/dumps/tablesjobs.py
index a82e01b..e2ca0ce 100644
--- a/xmldumps-backup/dumps/tablesjobs.py
+++ b/xmldumps-backup/dumps/tablesjobs.py
@@ -9,6 +9,7 @@
 
 from dumps.exceptions import BackupError
 from dumps.jobs import Dump
+from dumps.fileutils import DumpFilename
 
 
 class PublicTable(Dump):
@@ -33,11 +34,11 @@
         commands = runner.db_server_info.build_sqldump_command(self._table, 
runner.wiki.config.gzip)
         if self.private or runner.wiki.is_private():
             command_series = runner.get_save_command_series(
-                commands, self.get_inprogress_name(
+                commands, DumpFilename.get_inprogress_name(
                     runner.dump_dir.filename_private_path(output_dfname)))
         else:
             command_series = runner.get_save_command_series(
-                commands, self.get_inprogress_name(
+                commands, DumpFilename.get_inprogress_name(
                     runner.dump_dir.filename_public_path(output_dfname)))
         return command_series
 
@@ -136,10 +137,12 @@
         series = runner.db_server_info.build_sql_command(query, 
runner.wiki.config.gzip)
         if runner.wiki.is_private():
             return runner.get_save_command_series(
-                series, 
self.get_inprogress_name(runner.dump_dir.filename_private_path(out_dfname)))
+                series, DumpFilename.get_inprogress_name(
+                    runner.dump_dir.filename_private_path(out_dfname)))
         else:
             return runner.get_save_command_series(
-                series, 
self.get_inprogress_name(runner.dump_dir.filename_public_path(out_dfname)))
+                series, DumpFilename.get_inprogress_name(
+                    runner.dump_dir.filename_public_path(out_dfname)))
 
     def save_sql(self, runner, command_series):
         """Pass some SQL commands to the server for this DB and save output to 
a gzipped file."""
diff --git a/xmldumps-backup/dumps/xmlcontentjobs.py 
b/xmldumps-backup/dumps/xmlcontentjobs.py
index b0db10d..2e21232 100644
--- a/xmldumps-backup/dumps/xmlcontentjobs.py
+++ b/xmldumps-backup/dumps/xmlcontentjobs.py
@@ -765,7 +765,7 @@
             bz2mode = "dbzip2"
         else:
             bz2mode = "bzip2"
-        return "--output=%s:%s" % (bz2mode, 
self.get_inprogress_name(xmlbz2_path))
+        return "--output=%s:%s" % (bz2mode, 
DumpFilename.get_inprogress_name(xmlbz2_path))
 
     def build_command(self, runner, stub_dfname, prefetch, output_dfname):
         """
diff --git a/xmldumps-backup/dumps/xmljobs.py b/xmldumps-backup/dumps/xmljobs.py
index 3b6ea86..4419d42 100644
--- a/xmldumps-backup/dumps/xmljobs.py
+++ b/xmldumps-backup/dumps/xmljobs.py
@@ -134,9 +134,9 @@
             config_file_arg = config_file_arg + ":" + 
runner.wiki.config.override_section
         command = ["/usr/bin/python", "xmlstubs.py", "--config", 
config_file_arg,
                    "--wiki", runner.db_name,
-                   "--articles", self.get_inprogress_name(articles_filepath),
-                   "--history", self.get_inprogress_name(history_filepath),
-                   "--current", self.get_inprogress_name(current_filepath)]
+                   "--articles", 
DumpFilename.get_inprogress_name(articles_filepath),
+                   "--history", 
DumpFilename.get_inprogress_name(history_filepath),
+                   "--current", 
DumpFilename.get_inprogress_name(current_filepath)]
 
         if output_dfname.partnum:
             # set up start end end pageids for this piece
@@ -227,7 +227,7 @@
             config_file_arg = config_file_arg + ":" + 
runner.wiki.config.override_section
         command = ["/usr/bin/python", "xmllogs.py", "--config",
                    config_file_arg, "--wiki", runner.db_name,
-                   "--outfile", self.get_inprogress_name(logging_path)]
+                   "--outfile", DumpFilename.get_inprogress_name(logging_path)]
 
         pipeline = [command]
         series = [pipeline]
@@ -297,10 +297,10 @@
             variant = self.get_variant_from_dumpname(dfname.dumpname)
             variant_option = self._variant_option(variant)
             if runner.wiki.is_private():
-                output_paths.append(self.get_inprogress_name(
+                output_paths.append(DumpFilename.get_inprogress_name(
                     runner.dump_dir.filename_private_path(dfname)))
             else:
-                output_paths.append(self.get_inprogress_name(
+                output_paths.append(DumpFilename.get_inprogress_name(
                     runner.dump_dir.filename_public_path(dfname)))
             variants.append(variant_option)
 

-- 
To view, visit https://gerrit.wikimedia.org/r/400226
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Ib4bc9f6a41f31431e5642a3f7f7415bd2de38ea8
Gerrit-PatchSet: 1
Gerrit-Project: operations/dumps
Gerrit-Branch: master
Gerrit-Owner: ArielGlenn <ar...@wikimedia.org>
Gerrit-Reviewer: ArielGlenn <ar...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

[MediaWiki-commits] [Gerrit] operations/dumps[master]: make reporting of file sizes for dump steps in progress work...

Reply via email to