[MediaWiki-commits] [Gerrit] operations/dumps[master]: make reporting of file sizes for dump steps in progress work...
ArielGlenn has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/400226 ) Change subject: make reporting of file sizes for dump steps in progress work again .. make reporting of file sizes for dump steps in progress work again This was probably broken when we introduced writing files with a special extension and then moving them into place once the run completes successfully. Bug: T183694 Change-Id: Ib4bc9f6a41f31431e5642a3f7f7415bd2de38ea8 --- M xmldumps-backup/dumps/apijobs.py M xmldumps-backup/dumps/fileutils.py M xmldumps-backup/dumps/flowjob.py M xmldumps-backup/dumps/jobs.py M xmldumps-backup/dumps/recombinejobs.py M xmldumps-backup/dumps/recompressjobs.py M xmldumps-backup/dumps/runnerutils.py M xmldumps-backup/dumps/tablesjobs.py M xmldumps-backup/dumps/xmlcontentjobs.py M xmldumps-backup/dumps/xmljobs.py 10 files changed, 51 insertions(+), 33 deletions(-) Approvals: ArielGlenn: Looks good to me, approved jenkins-bot: Verified diff --git a/xmldumps-backup/dumps/apijobs.py b/xmldumps-backup/dumps/apijobs.py index 3991add..534ca99 100644 --- a/xmldumps-backup/dumps/apijobs.py +++ b/xmldumps-backup/dumps/apijobs.py @@ -1,5 +1,6 @@ import time from dumps.exceptions import BackupError +from dumps.fileutils import DumpFilename from dumps.jobs import Dump @@ -30,11 +31,11 @@ commands = self.build_command(runner) if runner.wiki.is_private(): command_series = runner.get_save_command_series( -commands, self.get_inprogress_name( +commands, DumpFilename.get_inprogress_name( runner.dump_dir.filename_private_path(output_dfname))) else: command_series = runner.get_save_command_series( -commands, self.get_inprogress_name( +commands, DumpFilename.get_inprogress_name( runner.dump_dir.filename_public_path(output_dfname))) self.setup_command_info(runner, command_series, [output_dfname]) diff --git a/xmldumps-backup/dumps/fileutils.py b/xmldumps-backup/dumps/fileutils.py index a264ece..e6b4b67 100644 --- a/xmldumps-backup/dumps/fileutils.py +++ b/xmldumps-backup/dumps/fileutils.py @@ -165,6 +165,8 @@ partnum_int part number as int """ +INPROG = ".inprog" # extension for dump output files that are in progress (not fully written) + @staticmethod def make_checkpoint_string(first_page_id, last_page_id): if first_page_id is not None and last_page_id is not None: @@ -172,6 +174,10 @@ else: return None +@staticmethod +def get_inprogress_name(filename): +return filename + DumpFilename.INPROG + def __init__(self, wiki, date=None, dump_name=None, filetype=None, ext=None, partnum=None, checkpoint=None, temp=False): """Constructor. Arguments: the dump name as it should appear in the filename, diff --git a/xmldumps-backup/dumps/flowjob.py b/xmldumps-backup/dumps/flowjob.py index b875101..f1de495 100644 --- a/xmldumps-backup/dumps/flowjob.py +++ b/xmldumps-backup/dumps/flowjob.py @@ -5,6 +5,7 @@ import os from dumps.exceptions import BackupError from dumps.utils import MultiVersion +from dumps.fileutils import DumpFilename from dumps.jobs import Dump @@ -45,7 +46,7 @@ command.extend(script_command) command.extend(["--wiki=%s" % runner.db_name, "--current", "--report=1000", -"--output=bzip2:%s" % self.get_inprogress_name(flow_output_fpath)]) +"--output=bzip2:%s" % DumpFilename.get_inprogress_name(flow_output_fpath)]) if self.history: command.append("--full") pipeline = [command] diff --git a/xmldumps-backup/dumps/jobs.py b/xmldumps-backup/dumps/jobs.py index 8ebbbf1..256056b 100644 --- a/xmldumps-backup/dumps/jobs.py +++ b/xmldumps-backup/dumps/jobs.py @@ -54,8 +54,6 @@ class Dump(object): -INPROG = ".inprog" # extension for dump output files that are in progress (not fully written) - def __init__(self, name, desc, verbose=False): self._desc = desc self.verbose = verbose @@ -84,14 +82,12 @@ if not hasattr(self, '_parts'): self._parts = False -def get_inprogress_name(self, filename): -return filename + self.INPROG - def setup_command_info(self, runner, command_series, output_dfnames, output_dir=None): command_info = {} command_info['runner'] = runner command_info['series'] = command_series -command_info['output_files'] = [dfname.filename + self.INPROG for dfname in output_dfnames] +command_info['output_files'] = [dfname.filename + DumpFilename.INPROG +for dfname in output_dfnames] if output_dir is not None:
[MediaWiki-commits] [Gerrit] operations/dumps[master]: make reporting of file sizes for dump steps in progress work...
ArielGlenn has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/400226 ) Change subject: make reporting of file sizes for dump steps in progress work again .. make reporting of file sizes for dump steps in progress work again This was probably broken when we introduced writing files with a special extension and then moving them into place once the run completes successfully. Bug: T183694 Change-Id: Ib4bc9f6a41f31431e5642a3f7f7415bd2de38ea8 --- M xmldumps-backup/dumps/apijobs.py M xmldumps-backup/dumps/fileutils.py M xmldumps-backup/dumps/flowjob.py M xmldumps-backup/dumps/jobs.py M xmldumps-backup/dumps/recombinejobs.py M xmldumps-backup/dumps/recompressjobs.py M xmldumps-backup/dumps/runnerutils.py M xmldumps-backup/dumps/tablesjobs.py M xmldumps-backup/dumps/xmlcontentjobs.py M xmldumps-backup/dumps/xmljobs.py 10 files changed, 51 insertions(+), 33 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/operations/dumps refs/changes/26/400226/1 diff --git a/xmldumps-backup/dumps/apijobs.py b/xmldumps-backup/dumps/apijobs.py index 3991add..534ca99 100644 --- a/xmldumps-backup/dumps/apijobs.py +++ b/xmldumps-backup/dumps/apijobs.py @@ -1,5 +1,6 @@ import time from dumps.exceptions import BackupError +from dumps.fileutils import DumpFilename from dumps.jobs import Dump @@ -30,11 +31,11 @@ commands = self.build_command(runner) if runner.wiki.is_private(): command_series = runner.get_save_command_series( -commands, self.get_inprogress_name( +commands, DumpFilename.get_inprogress_name( runner.dump_dir.filename_private_path(output_dfname))) else: command_series = runner.get_save_command_series( -commands, self.get_inprogress_name( +commands, DumpFilename.get_inprogress_name( runner.dump_dir.filename_public_path(output_dfname))) self.setup_command_info(runner, command_series, [output_dfname]) diff --git a/xmldumps-backup/dumps/fileutils.py b/xmldumps-backup/dumps/fileutils.py index a264ece..e6b4b67 100644 --- a/xmldumps-backup/dumps/fileutils.py +++ b/xmldumps-backup/dumps/fileutils.py @@ -165,6 +165,8 @@ partnum_int part number as int """ +INPROG = ".inprog" # extension for dump output files that are in progress (not fully written) + @staticmethod def make_checkpoint_string(first_page_id, last_page_id): if first_page_id is not None and last_page_id is not None: @@ -172,6 +174,10 @@ else: return None +@staticmethod +def get_inprogress_name(filename): +return filename + DumpFilename.INPROG + def __init__(self, wiki, date=None, dump_name=None, filetype=None, ext=None, partnum=None, checkpoint=None, temp=False): """Constructor. Arguments: the dump name as it should appear in the filename, diff --git a/xmldumps-backup/dumps/flowjob.py b/xmldumps-backup/dumps/flowjob.py index b875101..f1de495 100644 --- a/xmldumps-backup/dumps/flowjob.py +++ b/xmldumps-backup/dumps/flowjob.py @@ -5,6 +5,7 @@ import os from dumps.exceptions import BackupError from dumps.utils import MultiVersion +from dumps.fileutils import DumpFilename from dumps.jobs import Dump @@ -45,7 +46,7 @@ command.extend(script_command) command.extend(["--wiki=%s" % runner.db_name, "--current", "--report=1000", -"--output=bzip2:%s" % self.get_inprogress_name(flow_output_fpath)]) +"--output=bzip2:%s" % DumpFilename.get_inprogress_name(flow_output_fpath)]) if self.history: command.append("--full") pipeline = [command] diff --git a/xmldumps-backup/dumps/jobs.py b/xmldumps-backup/dumps/jobs.py index 8ebbbf1..256056b 100644 --- a/xmldumps-backup/dumps/jobs.py +++ b/xmldumps-backup/dumps/jobs.py @@ -54,8 +54,6 @@ class Dump(object): -INPROG = ".inprog" # extension for dump output files that are in progress (not fully written) - def __init__(self, name, desc, verbose=False): self._desc = desc self.verbose = verbose @@ -84,14 +82,12 @@ if not hasattr(self, '_parts'): self._parts = False -def get_inprogress_name(self, filename): -return filename + self.INPROG - def setup_command_info(self, runner, command_series, output_dfnames, output_dir=None): command_info = {} command_info['runner'] = runner command_info['series'] = command_series -command_info['output_files'] = [dfname.filename + self.INPROG for dfname in output_dfnames] +command_info['output_files'] = [dfname.filename + DumpFilename.INPROG +for dfname in output_dfnames] if output_dir is not None: