ArielGlenn has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/215662

Change subject: dumps: add onepass to worker script and wrapper, fix cutoff 
option
......................................................................

dumps: add onepass to worker script and wrapper, fix cutoff option

onepass lets us run one or more jobs across all wikis stopping
after one complete run
fix cutoff option to run one or more jobs across all wikis stopping
when all job runs are as recent as the cutoff date; it used to
pass back a wiki if there were some to run, now it behaves in a
similar fashion to onepass, exiting worker script with special exit
code if there are no wikis left to run

Change-Id: I391f71e9b12b4c490a4f4e19ba704fc685b9c69b
---
M xmldumps-backup/WikiDump.py
M xmldumps-backup/worker
M xmldumps-backup/worker.py
3 files changed, 45 insertions(+), 37 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/dumps 
refs/changes/62/215662/1

diff --git a/xmldumps-backup/WikiDump.py b/xmldumps-backup/WikiDump.py
index 1cc4b9c..84c2572 100644
--- a/xmldumps-backup/WikiDump.py
+++ b/xmldumps-backup/WikiDump.py
@@ -622,7 +622,18 @@
                        else:
                                return dirs[index]
                else:
-                       return None
+                        return None
+
+        def dateTouchedLatestDump(self):
+                mtime = 0
+                last = self.latestDump()
+                if last:
+                        dumpStatus = os.path.join(self.publicDir(), last, 
"status.html")
+                        try:
+                                mtime = os.stat(dumpStatus).st_mtime
+                        except:
+                                pass
+                return time.strftime("%Y%m%d", time.gmtime(mtime))
 
        def dumpDirs(self):
                """List all dump directories for the given database."""
diff --git a/xmldumps-backup/worker b/xmldumps-backup/worker
index 882e0e1..3731b54 100755
--- a/xmldumps-backup/worker
+++ b/xmldumps-backup/worker
@@ -8,7 +8,7 @@
     echo "--log          write log of (almost) everything written to stderr 
(default: no logging)"
     echo "--maxfails     if more than this many dumps fail in a row, exit 
(default: 3)"
     echo "--basedir      scripts and files are relative to this dir (default: 
location of this script)"
-    echo "--cutoff       dump wikis until all have a dump produced more recent 
than the specified cutoff,"
+    echo "--cutoff       dump wikis until all have a dump produced  as recent 
as the specified cutoff,"
     echo "               then exit.  format: yyyymmdd  OR  'today'"
     echo "--date         (re)do dump runs of specified date (yyyymmdd) OR 
'last'"
     echo "--skipdone     skip any dump jobs that ran successfully (this makes 
sense only for reruns)"
@@ -17,6 +17,7 @@
     echo "               default is true for all runs except those where --job 
is specified"
     echo "--wiki         name of specific wiki db to dump; otherwise all wikis 
in list referenced by"
     echo "               config file will be dumped"
+    echo "--onepass      if there are no wikis to dump (--skipdone option) 
then exit"
     echo
     echo "If the file maintenance.txt is present, no more jobs will be run, 
and"
     echo "this script will check the status again in 5 minutes."
@@ -43,6 +44,8 @@
     JOB=""
     # default for one job: no locking
     EXCLUSIVE=""
+    # if no wikis are left to run, exit
+    ONEPASS=""
 }
 
 process_opts() {
@@ -71,6 +74,9 @@
        elif [ $1 == "--job" ]; then
            JOB="$2"
            shift; shift
+       elif [ $1 == "--onepass" ]; then
+           ONEPASS=true
+           shift
        elif [ $1 == "--skipdone" ]; then
            SKIPDONE=true
            shift
@@ -120,7 +126,7 @@
     if [ ! -z "$CUTOFF" ]; then
        if [ "$CUTOFF" == "today" ]; then
            # convert this to yyyymmdd, UTC always
-           CUTOFF=`date -u +"%Y%m%d`
+           CUTOFF=`date -u +"%Y%m%d"`
        else
            # sanity check of arg
            result=`date -d "$CUTOFF"`
@@ -129,18 +135,25 @@
                exit 1
            fi
        fi
-       cutoffargs=( "${pythonargs[@]}" "--cutoff" "$CUTOFF" )
+       pythonargs=( "${pythonargs[@]}" "--cutoff" "$CUTOFF" )
     fi
     if [ ! -z "$WIKI" ]; then
        pythonargs=( "${pythonargs[@]}" "$WIKI" )
-       cutoffargs=( "${cutoffargs[@]}" "$WIKI" )
     fi
 }
 
 dump_wiki() {
     echo python ${pythonargs[@]}
     python ${pythonargs[@]}
-    if [ $? -ne 0 ]; then
+    result=$?
+    if [ $result -eq 255 ]; then
+       if [ ! -z "$ONEPASS" -o  ! -z "$CUTOFF" ]; then
+            exit 0
+            # this isn't a failure but rather 'no wikis available to run'
+            result=0
+       fi
+    fi
+    if [ $result -ne 0 ]; then
        failures=$(($failures+1))
        if [ $failures -gt $MAXFAILS ]; then
            echo "more than $MAXFAILS failures in a row, halting."
@@ -170,20 +183,6 @@
     elif [ -e "$WIKIDUMP_BASE/exit.txt" ]; then
        echo "exit requested, remove 'exit.txt' to continue normal operations."
        exit 0
-    elif [ ! -z "$CUTOFF" ]; then
-       # see if there are any wikis left with dumps that are not more recent 
than CUTOFF
-       echo ${cutoffargs[@]}
-       result=`python ${cutoffargs[@]}`
-       if [ -z "$result" ]; then
-           # nope, so we are done
-           echo "All wikis completed after cutoff $CUTOFF"
-           exit 0
-       else
-           # there's (at least) one wiki left to do... if some other worker
-           # gets it and we do an 'extra' one it's not a disaster, so don't 
worry about
-           # potential race
-           dump_wiki
-       fi
     else
        dump_wiki
     fi
diff --git a/xmldumps-backup/worker.py b/xmldumps-backup/worker.py
index 7c9b3a3..724910b 100644
--- a/xmldumps-backup/worker.py
+++ b/xmldumps-backup/worker.py
@@ -4128,8 +4128,10 @@
        for db in next:
                wiki = WikiDump.Wiki(config, db)
                if (cutoff):
-                       lastRan = wiki.latestDump()
-                       if lastRan > cutoff:
+#                      lastRan = wiki.latestDump()
+#                      if lastRan >= cutoff:
+                        lastUpdated = wiki.dateTouchedLatestDump()
+                        if lastUpdated >= cutoff:
                                return None
                 if check_job_status:
                         if checkJobDone(wiki, date, job, pageIDRange, 
chunkToDo, checkpointFile):
@@ -4191,7 +4193,7 @@
        sys.stderr.write( "--log:         Log progress messages and other 
output to logfile in addition to\n" )
        sys.stderr.write( "               the usual console output\n" )
        sys.stderr.write( "--cutoff:      Given a cutoff date in yyyymmdd 
format, display the next wiki for which\n" )
-       sys.stderr.write( "               dumps should be run, if its last dump 
was not after the cutoff date,\n" )
+       sys.stderr.write( "               dumps should be run, if its last dump 
was older than the cutoff date,\n" )
        sys.stderr.write( "               and exit, or if there are no such 
wikis, just exit\n" )
        sys.stderr.write( "--verbose:     Print lots of stuff (includes 
printing full backtraces for any exception)\n" )
        sys.stderr.write( "               This is used primarily for 
debugging\n" )
@@ -4216,7 +4218,7 @@
                checkpointFile = None
                pageIDRange = None
                cutoff = None
-               result = False
+               exitcode = 1
                 skipdone = False
                 doLocking = False
                verbose = False
@@ -4295,7 +4297,7 @@
                else:
                        config = WikiDump.Config()
 
-               if dryrun or chunkToDo or (jobRequested and not restart  and 
not doLocking) or cutoff:
+               if dryrun or chunkToDo or (jobRequested and not restart  and 
not doLocking):
                        locksEnabled = False
                else:
                        locksEnabled = True
@@ -4308,8 +4310,10 @@
                if len(remainder) > 0:
                        wiki = WikiDump.Wiki(config, remainder[0])
                        if cutoff:
+                                # fixme if we asked for a specific job then 
check that job only
+                                # not the dir
                                lastRan = wiki.latestDump()
-                               if lastRan > cutoff:
+                               if lastRan >= cutoff:
                                        wiki = None
                        if wiki is not None and locksEnabled:
                                if forceLock and wiki.isLocked():
@@ -4330,11 +4334,6 @@
                         else:
                                 check_job_status = False
                        wiki = findAndLockNextWiki(config, locksEnabled, 
cutoff, check_status_time, check_job_status, date, jobRequested, pageIDRange, 
chunkToDo, checkpointFile)
-               if cutoff:
-                       if wiki:
-                               print wiki.dbName
-                       WikiDump.cleanup()
-                       sys.exit(0)
 
                if wiki:
                        # process any per-project configuration options
@@ -4372,16 +4371,15 @@
                                sys.stderr.write("Running %s, job %s...\n" % 
(wiki.dbName, jobRequested))
                        else:
                                sys.stderr.write("Running %s...\n" % 
wiki.dbName)
-                       result = runner.run()
+                        result = runner.run()
+                        if result is not None and result:
+                                exitcode = 0
                        # if we are doing one piece only of the dump, we don't 
unlock either
                        if locksEnabled:
                                wiki.unlock()
                else:
                        sys.stderr.write("No wikis available to run.\n")
-                       result = True
+                        exitcode = 255
        finally:
                WikiDump.cleanup()
-       if result == False:
-               sys.exit(1)
-       else:
-               sys.exit(0)
+        sys.exit(exitcode)

-- 
To view, visit https://gerrit.wikimedia.org/r/215662
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I391f71e9b12b4c490a4f4e19ba704fc685b9c69b
Gerrit-PatchSet: 1
Gerrit-Project: operations/dumps
Gerrit-Branch: ariel
Gerrit-Owner: ArielGlenn <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to