ArielGlenn has submitted this change and it was merged.

Change subject: dumps: redo handling of jobs with unrun prereqs
......................................................................


dumps: redo handling of jobs with unrun prereqs

for a wiki not currently locked by another worker, if a prereq for a
job is waiting or in progress, don't call this run a failure,
just mark as waiting.  prereqs that failed still result in this
run being marked as failed

if we have requested specific job(s) and skipdone:
for a wiki locked by another worker, if a prereq job is waiting
or in progress, we won't return "no wikis available
to run" as that won't strictly be true. We're just too early.

fix couple typos in comments

for createdir job, generate dumprun info file as for most jobs

Change-Id: I8cf8d27dda0d14262a39c56ac73f7bc00005719a
---
M xmldumps-backup/worker
M xmldumps-backup/worker.py
2 files changed, 80 insertions(+), 26 deletions(-)

Approvals:
  ArielGlenn: Verified; Looks good to me, approved
  jenkins-bot: Verified



diff --git a/xmldumps-backup/worker b/xmldumps-backup/worker
index 7c81a12..b00a8e0 100755
--- a/xmldumps-backup/worker
+++ b/xmldumps-backup/worker
@@ -157,9 +157,8 @@
     result=$?
     if [ $result -eq 255 ]; then
        if [ ! -z "$ONEPASS" -o  ! -z "$CUTOFF" ]; then
-            exit 0
             # this isn't a failure but rather 'no wikis available to run'
-            result=0
+            exit 0
        fi
     fi
     if [ $result -ne 0 ]; then
diff --git a/xmldumps-backup/worker.py b/xmldumps-backup/worker.py
index 8e692e6..3286ff7 100644
--- a/xmldumps-backup/worker.py
+++ b/xmldumps-backup/worker.py
@@ -364,6 +364,9 @@
 class BackupError(Exception):
        pass
 
+class BackupPrereqError(Exception):
+       pass
+
 class RunInfoFile(object):
        def __init__(self, wiki, enabled, verbose = False):
                self.wiki = wiki
@@ -1687,10 +1690,10 @@
 
                if self.jobRequested == "latestlinks":
                        self._statusEnabled = False
+                       self._runInfoFileEnabled = False
 
                if self.jobRequested == "latestlinks" or self.jobRequested == 
"createdirs":
                        self._checksummerEnabled = False
-                       self._runInfoFileEnabled = False
                        self._noticeFileEnabled = False
                        self._makeDirEnabled = False
                        self._cleanOldDumpsEnabled = False
@@ -1879,9 +1882,13 @@
                                        exc_type, exc_value, exc_traceback = 
sys.exc_info()
                                        if (self.verbose):
                                                
sys.stderr.write(repr(traceback.format_exception(exc_type, exc_value, 
exc_traceback)))
-                                       else:
-                                               self.debug("*** exception! " + 
str(ex))
-                                       item.setStatus("failed")
+                                       else:   
+                                                if exc_type.__name__ == 
'BackupPrereqError':
+                                                       self.debug(str(ex))
+                                                else:
+                                                       self.debug("*** 
exception! " + str(ex))
+                                        if exc_type.__name__ != 
'BackupPrereqError':
+                                               item.setStatus("failed")
 
                        if item.status() == "done":
                                self.checksums.cpMd5TmpFileToPermFile()
@@ -1903,7 +1910,7 @@
                                 
os.makedirs(os.path.join(self.wiki.privateDir(), self.wiki.date))
 
                if (self.dumpItemList.allPossibleJobsDone(self.skipJobs)):
-                       # All jobs are either in status "done", "wating", 
"failed",."skipped"
+                       # All jobs are either in status "done", "waiting", 
"failed", "skipped"
                        self.status.updateStatusFiles("done")
                else:
                        # This may happen if we start a dump now and abort 
before all items are
@@ -2258,17 +2265,23 @@
                """Attempt to run the operation, updating progress/status 
info."""
                try:
                        for prerequisiteItem in self._prerequisiteItems:
-                               if prerequisiteItem.status() != "done":
-                                       raise BackupError("Required job %s not 
marked as done, not starting job %s" % ( prerequisiteItem.name(),self.name() ) )
+                               if prerequisiteItem.status() == "failed":
+                                       raise BackupError("Required job %s 
failed, not starting job %s" % ( prerequisiteItem.name(),self.name() ) )
+                               elif prerequisiteItem.status() != "done":
+                                       raise BackupPrereqError("Required job 
%s not marked as done, not starting job %s" % ( 
prerequisiteItem.name(),self.name() ) )
 
                        self.run(runner)
                        self.postRun(runner)
                except Exception:
+                       exc_type, exc_value, exc_traceback = sys.exc_info()
                        if (self.verbose):
-                               exc_type, exc_value, exc_traceback = 
sys.exc_info()
                                
sys.stderr.write(repr(traceback.format_exception(exc_type, exc_value, 
exc_traceback)))
-                       self.setStatus("failed")
+                        if exc_type.__name__ == 'BackupPrereqError':
+                               self.setStatus("waiting")
+                        else:
+                               self.setStatus("failed")
                        raise
+
                self.setStatus("done")
 
        def run(self, runner):
@@ -4043,10 +4056,16 @@
                        raise BackupError("error dumping all titles list")
 
 
-def checkJobDone(wiki, date, job, skipjobs, pageIDRange, chunkToDo, 
checkpointFile):
+def checkJobs(wiki, date, job, skipjobs, pageIDRange, chunkToDo, 
checkpointFile, prereqs=False):
         '''
+        if prereqs is False:
         see if dump run on specific date completed specific job(s)
         or if no job was specified, ran to completion
+
+        if prereqs is True:
+        see if dump run on specific date completed prereqs for specific job(s)
+        or if no job was specified, return True
+
         '''
         if not date:
                 return False
@@ -4058,6 +4077,9 @@
                 else:
                         # never dumped so that's the same as 'job didn't run'
                         return False
+
+        if not job and prereqs:
+                return True
 
         wiki.setDate(date)
 
@@ -4082,16 +4104,31 @@
         else:
                 dumpItemList.markAllJobsToRun(True)
 
-        # see if there are any to run. no? then return True (all job(s) done)
-        # otherwise return False (still some to do)
-        for item in dumpItemList.dumpItems:
-                if item.toBeRun():
-                        return False
-        return True
+        if not prereqs:
+                # see if there are any to run. no? then return True (all 
job(s) done)
+                # otherwise return False (still some to do)
+                for item in dumpItemList.dumpItems:
+                        if item.toBeRun():
+                                return False
+                return True
+        else:
+                # get the list of prereqs, see if they are all status done, if 
so
+                # return True, otherwise False (still some to do)
+                prereqItems = []
+                for item in dumpItemList.dumpItems:
+                        if (item.name() == job):
+                                prereqItems = item._prerequisiteItems
+                        break
+
+                for item in prereqItems:
+                        if item.status() != "done":
+                                return False
+                return True
 
 
 def findAndLockNextWiki(config, locksEnabled, cutoff, bystatustime=False, 
check_job_status=False,
-                        date=None, job=None, skipjobs=None, pageIDRange=None, 
chunkToDo=None, checkpointFile=None):
+                        check_prereq_status=False, date=None, job=None, 
skipjobs=None, pageIDRange=None,
+                        chunkToDo=None, checkpointFile=None):
        if config.halt:
                sys.stderr.write("Dump process halted by config.\n")
                return None
@@ -4102,26 +4139,35 @@
        if verbose and not cutoff:
                sys.stderr.write("Finding oldest unlocked wiki...\n")
 
+        # if we skip locked wikis which are missing the prereqs for this job,
+        # there are still wikis where this job needs to run
+        missingPrereqs = False
        for db in next:
                wiki = WikiDump.Wiki(config, db)
                if (cutoff):
-#                      lastRan = wiki.latestDump()
-#                      if lastRan >= cutoff:
                         lastUpdated = wiki.dateTouchedLatestDump()
                         if lastUpdated >= cutoff:
-#                              return None
                                continue
                 if check_job_status:
-                        if checkJobDone(wiki, date, job, skipjobs, 
pageIDRange, chunkToDo, checkpointFile):
+                        if checkJobs(wiki, date, job, skipjobs, pageIDRange, 
chunkToDo, checkpointFile):
                                 continue
                try:
                        if (locksEnabled):
                                wiki.lock()
                        return wiki
                except:
+                        if check_prereq_status:
+                                # if we skip locked wikis which are missing 
the prereqs for this job,
+                                # there are still wikis where this job needs 
to run
+                                if not checkJobs(wiki, date, job, skipjobs, 
pageIDRange, chunkToDo,
+                                                 checkpointFile, prereqs=True):
+                                        missingPrereqs = True
                        sys.stderr.write("Couldn't lock %s, someone else must 
have got it...\n" % db)
                        continue
-       return None
+        if missingPrereqs:
+                return False
+        else:
+               return None
 
 def xmlEscape(text):
        return text.replace("&", "&amp;").replace("<", "&lt;").replace(">", 
"&gt;")
@@ -4346,9 +4392,15 @@
                                 check_job_status = True
                         else:
                                 check_job_status = False
-                       wiki = findAndLockNextWiki(config, locksEnabled, 
cutoff, check_status_time, check_job_status, date, jobRequested, skipJobs, 
pageIDRange, chunkToDo, checkpointFile)
+                        if jobRequested and skipdone:
+                                check_prereq_status = True
+                        else:
+                                check_prereq_status = False
+                       wiki = findAndLockNextWiki(config, locksEnabled, 
cutoff, check_status_time,
+                                                   check_job_status, 
check_prereq_status,
+                                                   date, jobRequested, 
skipJobs, pageIDRange, chunkToDo, checkpointFile)
 
-               if wiki:
+               if wiki is not None and wiki:
                        # process any per-project configuration options
                        config.parseConfFilePerProject(wiki.dbName)
 
@@ -4390,6 +4442,9 @@
                        # if we are doing one piece only of the dump, we don't 
unlock either
                        if locksEnabled:
                                wiki.unlock()
+                elif wiki is not None:
+                       sys.stderr.write("Wikis available to run but prereqs 
not complete.\n")
+                        exitcode = 0
                else:
                        sys.stderr.write("No wikis available to run.\n")
                         exitcode = 255

-- 
To view, visit https://gerrit.wikimedia.org/r/233417
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I8cf8d27dda0d14262a39c56ac73f7bc00005719a
Gerrit-PatchSet: 3
Gerrit-Project: operations/dumps
Gerrit-Branch: ariel
Gerrit-Owner: ArielGlenn <[email protected]>
Gerrit-Reviewer: ArielGlenn <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to