ArielGlenn has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/327764 )

Change subject: when rerunning a checkpoint file, use only the relevant 
prefetch file(s)
......................................................................

when rerunning a checkpoint file, use only the relevant prefetch file(s)

Change-Id: Ibe21a45919c81f05a372d9bd0c266cec2bc80e12
---
M xmldumps-backup/dumps/xmljobs.py
1 file changed, 44 insertions(+), 12 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/dumps 
refs/changes/64/327764/1

diff --git a/xmldumps-backup/dumps/xmljobs.py b/xmldumps-backup/dumps/xmljobs.py
index 88c1d3d..7532097 100644
--- a/xmldumps-backup/dumps/xmljobs.py
+++ b/xmldumps-backup/dumps/xmljobs.py
@@ -2,8 +2,8 @@
 All xml dump jobs are defined here
 '''
 
-import os
 import re
+import os
 from os.path import exists
 import signal
 
@@ -361,6 +361,31 @@
                                    temp=False)
         return output_file
 
+    def chkptfile_in_pagerange(self, fobj, chkpt_fobj):
+        """return False if both files are checkpoint files (with page ranges)
+        and the second file page range does not overlap with the first one"""
+        # not both checkpoint files:
+        if not fobj.is_checkpoint_file:
+            return True
+        if not chkpt_fobj.is_checkpoint_file:
+            return True
+        # one or both end values are missing:
+        if not fobj.last_page_id and not chkpt_fobj.last_page_id:
+            return True
+        elif not fobj.last_page_id and chkpt_fobj.last_page_id < 
fobj.first_page_id:
+            return True
+        elif not chkpt_fobj.last_page_id and fobj.last_page_id < 
chkpt_fobj.first_page_id:
+            return True
+        # have end values for both files:
+        elif (fobj.first_page_id <= chkpt_fobj.first_page_id and
+              chkpt_fobj.first_page_id <= fobj.last_page_id):
+            return True
+        elif (chkpt_fobj.first_page_id <= fobj.first_page_id and
+              fobj.first_page_id <= chkpt_fobj.last_page_id):
+            return True
+        else:
+            return False
+
     def run(self, runner):
         # here we will either clean up or not depending on how we were called 
FIXME
         self.cleanup_old_files(runner.dump_dir, runner)
@@ -550,10 +575,10 @@
         proc = CommandPipeline(pipeline, quiet=True)
         proc.run_pipeline_get_output()
         if (proc.exited_successfully() or
-            (proc.get_failed_cmds_with_retcode() ==
-             [[-signal.SIGPIPE, pipeline[0]]]) or
-            (proc.get_failed_cmds_with_retcode() ==
-             [[signal.SIGPIPE + 128, pipeline[0]]])):
+                (proc.get_failed_cmds_with_retcode() ==
+                 [[-signal.SIGPIPE, pipeline[0]]]) or
+                (proc.get_failed_cmds_with_retcode() ==
+                 [[signal.SIGPIPE + 128, pipeline[0]]])):
             last_lines = proc.output()
         return last_lines
 
@@ -576,10 +601,10 @@
         proc = CommandPipeline(pipeline, quiet=True)
         proc.run_pipeline_get_output()
         if (proc.exited_successfully() or
-            (proc.get_failed_cmds_with_retcode() ==
-             [[-signal.SIGPIPE, pipeline[0]]]) or
-            (proc.get_failed_cmds_with_retcode() ==
-             [[signal.SIGPIPE + 128, pipeline[0]]])):
+                (proc.get_failed_cmds_with_retcode() ==
+                 [[-signal.SIGPIPE, pipeline[0]]]) or
+                (proc.get_failed_cmds_with_retcode() ==
+                 [[signal.SIGPIPE + 128, pipeline[0]]])):
             output = proc.output()
             # 339915646:  <page>
             if ':' in output:
@@ -609,6 +634,8 @@
         """Build the command line for the dump, minus output and filter 
options"""
 
         # we write a temp file, it will be checkpointed every so often.
+        prefetch_start = None
+        prefetch_end = None
         temp = bool(self._checkpoints_enabled)
 
         output_file = DumpFilename(self.wiki, stub_file.date, self.dumpname,
@@ -635,6 +662,11 @@
             # we need to check existence for each and put them together in a 
string
             if possible_sources:
                 for sourcefile in possible_sources:
+                    # if we are doing partial stub run, include only the 
analogous
+                    # checkpointed prefetch files, if there are checkpointed 
files
+                    # otherwise we'll use the all the sourcefiles reported
+                    if not self.chkptfile_in_pagerange(stub_file, sourcefile):
+                        continue
                     sname = runner.dump_dir.filename_public_path(sourcefile, 
sourcefile.date)
                     if exists(sname):
                         sources.append(sname)
@@ -755,9 +787,9 @@
                     if ((first_page_id_in_file <= int(start_page_id) and
                          (last_page_id_in_file is None or
                           last_page_id_in_file >= int(start_page_id))) or
-                        (first_page_id_in_file >= int(start_page_id) and
-                         (end_page_id is None or
-                          first_page_id_in_file <= int(end_page_id)))):
+                            (first_page_id_in_file >= int(start_page_id) and
+                             (end_page_id is None or
+                              first_page_id_in_file <= int(end_page_id)))):
                         possibles.append(file_obj)
                 except Exception as ex:
                     runner.debug(

-- 
To view, visit https://gerrit.wikimedia.org/r/327764
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ibe21a45919c81f05a372d9bd0c266cec2bc80e12
Gerrit-PatchSet: 1
Gerrit-Project: operations/dumps
Gerrit-Branch: master
Gerrit-Owner: ArielGlenn <ar...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to