ArielGlenn has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/327764 )
Change subject: when rerunning a checkpoint file, use only the relevant prefetch file(s) ...................................................................... when rerunning a checkpoint file, use only the relevant prefetch file(s) Change-Id: Ibe21a45919c81f05a372d9bd0c266cec2bc80e12 --- M xmldumps-backup/dumps/xmljobs.py 1 file changed, 44 insertions(+), 12 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/operations/dumps refs/changes/64/327764/1 diff --git a/xmldumps-backup/dumps/xmljobs.py b/xmldumps-backup/dumps/xmljobs.py index 88c1d3d..7532097 100644 --- a/xmldumps-backup/dumps/xmljobs.py +++ b/xmldumps-backup/dumps/xmljobs.py @@ -2,8 +2,8 @@ All xml dump jobs are defined here ''' -import os import re +import os from os.path import exists import signal @@ -361,6 +361,31 @@ temp=False) return output_file + def chkptfile_in_pagerange(self, fobj, chkpt_fobj): + """return False if both files are checkpoint files (with page ranges) + and the second file page range does not overlap with the first one""" + # not both checkpoint files: + if not fobj.is_checkpoint_file: + return True + if not chkpt_fobj.is_checkpoint_file: + return True + # one or both end values are missing: + if not fobj.last_page_id and not chkpt_fobj.last_page_id: + return True + elif not fobj.last_page_id and chkpt_fobj.last_page_id < fobj.first_page_id: + return True + elif not chkpt_fobj.last_page_id and fobj.last_page_id < chkpt_fobj.first_page_id: + return True + # have end values for both files: + elif (fobj.first_page_id <= chkpt_fobj.first_page_id and + chkpt_fobj.first_page_id <= fobj.last_page_id): + return True + elif (chkpt_fobj.first_page_id <= fobj.first_page_id and + fobj.first_page_id <= chkpt_fobj.last_page_id): + return True + else: + return False + def run(self, runner): # here we will either clean up or not depending on how we were called FIXME self.cleanup_old_files(runner.dump_dir, runner) @@ -550,10 +575,10 @@ proc = CommandPipeline(pipeline, quiet=True) proc.run_pipeline_get_output() if (proc.exited_successfully() or - (proc.get_failed_cmds_with_retcode() == - [[-signal.SIGPIPE, pipeline[0]]]) or - (proc.get_failed_cmds_with_retcode() == - [[signal.SIGPIPE + 128, pipeline[0]]])): + (proc.get_failed_cmds_with_retcode() == + [[-signal.SIGPIPE, pipeline[0]]]) or + (proc.get_failed_cmds_with_retcode() == + [[signal.SIGPIPE + 128, pipeline[0]]])): last_lines = proc.output() return last_lines @@ -576,10 +601,10 @@ proc = CommandPipeline(pipeline, quiet=True) proc.run_pipeline_get_output() if (proc.exited_successfully() or - (proc.get_failed_cmds_with_retcode() == - [[-signal.SIGPIPE, pipeline[0]]]) or - (proc.get_failed_cmds_with_retcode() == - [[signal.SIGPIPE + 128, pipeline[0]]])): + (proc.get_failed_cmds_with_retcode() == + [[-signal.SIGPIPE, pipeline[0]]]) or + (proc.get_failed_cmds_with_retcode() == + [[signal.SIGPIPE + 128, pipeline[0]]])): output = proc.output() # 339915646: <page> if ':' in output: @@ -609,6 +634,8 @@ """Build the command line for the dump, minus output and filter options""" # we write a temp file, it will be checkpointed every so often. + prefetch_start = None + prefetch_end = None temp = bool(self._checkpoints_enabled) output_file = DumpFilename(self.wiki, stub_file.date, self.dumpname, @@ -635,6 +662,11 @@ # we need to check existence for each and put them together in a string if possible_sources: for sourcefile in possible_sources: + # if we are doing partial stub run, include only the analogous + # checkpointed prefetch files, if there are checkpointed files + # otherwise we'll use the all the sourcefiles reported + if not self.chkptfile_in_pagerange(stub_file, sourcefile): + continue sname = runner.dump_dir.filename_public_path(sourcefile, sourcefile.date) if exists(sname): sources.append(sname) @@ -755,9 +787,9 @@ if ((first_page_id_in_file <= int(start_page_id) and (last_page_id_in_file is None or last_page_id_in_file >= int(start_page_id))) or - (first_page_id_in_file >= int(start_page_id) and - (end_page_id is None or - first_page_id_in_file <= int(end_page_id)))): + (first_page_id_in_file >= int(start_page_id) and + (end_page_id is None or + first_page_id_in_file <= int(end_page_id)))): possibles.append(file_obj) except Exception as ex: runner.debug( -- To view, visit https://gerrit.wikimedia.org/r/327764 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Ibe21a45919c81f05a372d9bd0c266cec2bc80e12 Gerrit-PatchSet: 1 Gerrit-Project: operations/dumps Gerrit-Branch: master Gerrit-Owner: ArielGlenn <ar...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits