ArielGlenn has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/62800


Change subject: option to run until all wikis have dumps more recent than 
cutoff date
......................................................................

option to run until all wikis have dumps more recent than cutoff date

--cutoff option in wrapper shell script and in main python script
wrapper shell script cleanup (break stuff into functions for
readability)

Change-Id: I405aae9f4c3894eb337d59d487b87e111f37306c
---
M xmldumps-backup/worker
M xmldumps-backup/worker.py
2 files changed, 162 insertions(+), 81 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/dumps 
refs/changes/00/62800/1

diff --git a/xmldumps-backup/worker b/xmldumps-backup/worker
index afd9944..ced94f8 100755
--- a/xmldumps-backup/worker
+++ b/xmldumps-backup/worker
@@ -1,92 +1,150 @@
 #!/bin/bash
 
-# default number of failures of worker.py in a row before we decide
-# something serious is broken and we refuse to run
-MAXFAILS=3
-# default: don't pass special config file
-CONFIGFILE=""
-# default: no logging to file
-LOG=""
+usage() {
+    echo "$0: Unknown option $1"
+    echo "Usage: $0 [--configfile filename] [--log] [--maxfails num] 
[--basedir dir] [--wiki wikiname]"
+    echo "          [--cutoff date]"
+    echo "--configfile   use specified file for config file (default: 
wikidump.conf)"
+    echo "--log          write log of (almost) everything written to stderr 
(default: no logging)"
+    echo "--maxfails     if more than this many dumps fail in a row, exit 
(default: 3)"
+    echo "--basedir      scripts and files are relative to this dir (default: 
location of this script)"
+    echo "--cutoff       dump wikis until all have a dump produced more recent 
than the specified cutoff,"
+    echo "               then exit.  format: yyyymmdd  OR  'today'"
+    echo "--wiki         name of specific wiki db to dump; otherwise all wikis 
in list referenced by"
+    echo "               config file will be dumped"
+    echo
+    echo "If the file maintenance.txt is present, no more jobs will be run, 
and"
+    echo "this script will check the status again in 5 minutes."
+    echo "If the file exit.txt is present, no more new jobs will be started 
and"
+    echo "this script will terminate."
+    exit 1
+}
 
-failures=0
-WIKIDUMP_BASE=`dirname "$0"`
+set_defaults() {
+    # default number of failures of worker.py in a row before we decide
+    # something serious is broken and we refuse to run
+    MAXFAILS=3
+    # default: don't pass special config file
+    CONFIGFILE=""
+    # default: no logging to file
+    LOG=""
+    # default: no cutoff date
+    CUTOFF=""
+}
 
-while [ $# -gt 0 ]; do
-    if [ $1 == "--configfile" ]; then
-       CONFIGFILE="$2"
-       shift; shift
-    elif [ $1 == "--maxfails" ]; then
-       MAXFAILS="$2"
-       shift; shift
-    elif [ $1 == "--basedir" ]; then
-       WIKIDUMP_BASE="$2"
-       shift; shift
-    elif [ $1 == "--log" ]; then
-       LOG=true
-       shift;
-    elif [ $1 == "--wiki" ]; then
-       WIKI="$2"
-       shift; shift
-    else
-       echo "$0: Unknown option $1"
-       echo "Usage: $0 [--configfile filename] [--log] [--maxfails num] 
[--basedir dir] [--wiki wikiname]"
-       echo "--configfile   use specified file for config file (default: 
wikidump.conf)"
-       echo "--log          write log of (almost) everything written to stderr 
(default: no logging)"
-       echo "--maxfails     if more than this many dumps fail in a row, exit 
(default: 3)"
-       echo "--basedir      scripts and files are relative to this dir 
(default: location of this script)"
-       echo "--wiki         name of specific wiki db to dump; otherwise all 
wikis in list referenced by"
-       echo "               config file will be dumped"
-       echo 
-       echo "If the file maintenance.txt is present, no more jobs will be run, 
and"
-       echo "this script will check the status again in 5 minutes."
-       echo "If the file exit.txt is present, no more new jobs will be started 
and"
-       echo "this script will terminate."
-       exit 1
+process_opts() {
+    while [ $# -gt 0 ]; do
+       if [ $1 == "--configfile" ]; then
+           CONFIGFILE="$2"
+           shift; shift
+       elif [ $1 == "--maxfails" ]; then
+           MAXFAILS="$2"
+           shift; shift
+       elif [ $1 == "--basedir" ]; then
+           WIKIDUMP_BASE="$2"
+           shift; shift
+       elif [ $1 == "--log" ]; then
+           LOG=true
+           shift;
+       elif [ $1 == "--cutoff" ]; then
+           CUTOFF="$2"
+           shift; shift
+       elif [ $1 == "--wiki" ]; then
+           WIKI="$2"
+           shift; shift
+       else
+           usage
+       fi
+    done
+}
+
+setup_python_args() {
+    # set up the command
+    pythonargs=( "$WIKIDUMP_BASE/worker.py" )
+    if [ ! -z "$CONFIGFILE" ]; then
+       pythonargs=( "${pythonargs[@]}" "--configfile" "$CONFIGFILE" )
     fi
-done
+    if [ ! -z "$LOG" ]; then
+       pythonargs=( "${pythonargs[@]}" "--log" )
+    fi
+    if [ ! -z "$CUTOFF" ]; then
+       if [ "$CUTOFF" == "today" ]; then
+           # convert this to yyyymmdd, UTC always
+           CUTOFF=`date -u +"%Y%m%d`
+       else
+           # sanity check of arg
+           result=`date -d "$CUTOFF"`
+           if [ -z "$result" ]; then
+               echo "bad date given for cutoff arg"
+               exit 1
+           fi
+       fi
+       cutoffargs=( "${pythonargs[@]}" "--cutoff" "$CUTOFF" )
+    fi
+    if [ ! -z "$WIKI" ]; then
+       pythonargs=( "${pythonargs[@]}" "$WIKI" )
+       cutoffargs=( "${cutoffargs[@]}" "$WIKI" )
+    fi
+}
 
-# set up the command
-pythonargs=( "$WIKIDUMP_BASE/worker.py" )
-if [ ! -z "$CONFIGFILE" ]; then
-    pythonargs=( "${pythonargs[@]}" "--configfile" "$CONFIGFILE" )
-fi
-if [ ! -z "$LOG" ]; then
-    pythonargs=( "${pythonargs[@]}" "--log" )
-fi
-if [ ! -z "$WIKI" ]; then
-    pythonargs=( "${pythonargs[@]}" "$WIKI" )
-fi
+dump_wiki() {
+    echo python ${pythonargs[@]}
+    python ${pythonargs[@]}
+    if [ $? -ne 0 ]; then
+       failures=$(($failures+1))
+       if [ $failures -gt $MAXFAILS ]; then
+           echo "more than $MAXFAILS failures in a row, halting."
+           exit 1
+       fi
+    else
+       failures=0
+    fi
+    if [ ! -z "$WIKI" ]; then
+       # we ran for a specific wiki, we don't loop even if it failed.
+       if [ $failures -gt 0 ]; then
+           echo "Dump of wiki $WIKI failed."
+           exit 1
+       else
+           echo "Dump of wiki $WIKI succeeded."
+           exit 0
+       fi
+    fi
+    echo "sleeping"
+    sleep 30
+}
 
-while true; do
+main_loop() {
     if [ -e "$WIKIDUMP_BASE/maintenance.txt" ]; then
        echo "in maintenance mode (see 'maintenance.txt'), sleeping 5 minutes"
        sleep 300
     elif [ -e "$WIKIDUMP_BASE/exit.txt" ]; then
        echo "exit requested, remove 'exit.txt' to continue normal operations."
        exit 0
-    else
-       echo python ${pythonargs[@]}
-       python ${pythonargs[@]}
-       if [ $? -ne 0 ]; then
-           failures=$(($failures+1))
-           if [ $failures -gt $MAXFAILS ]; then
-               echo "more than $MAXFAILS failures in a row, halting."
-               exit 1
-           fi
+    elif [ ! -z "$CUTOFF" ]; then
+       # see if there are any wikis left with dumps that are not more recent 
than CUTOFF
+       echo ${cutoffargs[@]}
+       result=`python ${cutoffargs[@]}`
+       if [ -z "$result" ]; then
+           # nope, so we are done
+           echo "All wikis completed after cutoff $CUTOFF"
+           exit 0
        else
-           failures=0
+           # there's (at least) one wiki left to do... if some other worker
+           # gets it and we do an 'extra' one it's not a disaster, so don't 
worry about
+           # potential race
+           dump_wiki
        fi
-       if [ ! -z "$WIKI" ]; then
-           # we ran for a specific wiki, we don't loop even if it failed.
-           if [ $failures -gt 0 ]; then
-               echo "Dump of wiki $WIKI failed."
-               exit 1
-           else
-               echo "Dump of wiki $WIKI succeeded."
-               exit 0
-           fi
-       fi
-       echo "sleeping"
-       sleep 30
+    else
+       dump_wiki
     fi
+}
+
+failures=0
+WIKIDUMP_BASE=`dirname "$0"`
+set_defaults
+process_opts "$@"
+setup_python_args
+
+while true; do
+    main_loop
 done
diff --git a/xmldumps-backup/worker.py b/xmldumps-backup/worker.py
index e998f9c..2245b55 100644
--- a/xmldumps-backup/worker.py
+++ b/xmldumps-backup/worker.py
@@ -4032,7 +4032,7 @@
                command = runner.dbServerInfo.buildSqlCommand(query, 
runner.wiki.config.gzip)
                return runner.saveCommand(command, outfile)
 
-def findAndLockNextWiki(config, locksEnabled):
+def findAndLockNextWiki(config, locksEnabled, cutoff):
        if config.halt:
                print "Dump process halted by config."
                return None
@@ -4040,10 +4040,15 @@
        next = config.dbListByAge()
        next.reverse()
 
-       print "Finding oldest unlocked wiki..."
+       if verbose and not cutoff:
+               print "Finding oldest unlocked wiki..."
 
        for db in next:
                wiki = WikiDump.Wiki(config, db)
+               if (cutoff):
+                       lastRan = wiki.latestDump()
+                       if lastRan > cutoff:
+                               return None
                try:
                        if (locksEnabled):
                                wiki.lock()
@@ -4060,7 +4065,7 @@
        if message:
                print message
        print "Usage: python worker.py [options] [wikidbname]"
-       print "Options: --aftercheckpoint, --checkpoint, --chunk, --configfile, 
--date, --job, --addnotice, --delnotice, --force, --noprefetch, --nospawn, 
--restartfrom, --log"
+       print "Options: --aftercheckpoint, --checkpoint, --chunk, --configfile, 
--date, --job, --addnotice, --delnotice, --force, --noprefetch, --nospawn, 
--restartfrom, --log, --cutoff"
        print "--aftercheckpoint: Restart thie job from the after specified 
checkpoint file, doing the"
        print "               rest of the job for the appropriate chunk if 
chunks are configured"
        print "               or for the all the rest of the revisions if no 
chunks are configured;"
@@ -4095,6 +4100,9 @@
        print "--restartfrom: Do all jobs after the one specified via --job, 
including that one"
        print "--log:         Log progress messages and other output to logfile 
in addition to"
        print "               the usual console output"
+       print "--cutoff:      Given a cutoff date in yyyymmdd format, display 
the next wiki for which"
+       print "               dumps should be run, if its last dump was not 
after the cutoff date,"
+       print "               and exit, or if there are no such wikis, just 
exit"
        print "--verbose:     Print lots of stuff (includes printing full 
backtraces for any exception)"
        print "               This is used primarily for debugging"
 
@@ -4117,12 +4125,13 @@
                afterCheckpoint = False
                checkpointFile = None
                pageIDRange = None
+               cutoff = None
                result = False
                verbose = False
 
                try:
                        (options, remainder) = getopt.gnu_getopt(sys.argv[1:], 
"",
-                                                                ['date=', 
'job=', 'configfile=', 'addnotice=', 'delnotice', 'force', 'dryrun', 
'noprefetch', 'nospawn', 'restartfrom', 'aftercheckpoint=', 'log', 'chunk=', 
'checkpoint=', 'pageidrange=', 'verbose' ])
+                                                                ['date=', 
'job=', 'configfile=', 'addnotice=', 'delnotice', 'force', 'dryrun', 
'noprefetch', 'nospawn', 'restartfrom', 'aftercheckpoint=', 'log', 'chunk=', 
'checkpoint=', 'pageidrange=', 'cutoff=', 'verbose' ])
                except:
                        usage("Unknown option specified")
 
@@ -4158,6 +4167,10 @@
                                htmlNotice = False
                        elif opt == "--pageidrange":
                                pageIDRange = val
+                       elif opt == "--cutoff":
+                               cutoff = val
+                               if not cutoff.isdigit() or not len(cutoff) == 8:
+                                       usage("--cutoff value must be in 
yyyymmdd format")
                        elif opt == "--verbose":
                                verbose = True
 
@@ -4188,7 +4201,7 @@
                else:
                        config = WikiDump.Config()
 
-               if dryrun or chunkToDo or (jobRequested and not restart):
+               if dryrun or chunkToDo or (jobRequested and not restart) or 
cutoff:
                        locksEnabled = False
                else:
                        locksEnabled = True
@@ -4200,6 +4213,10 @@
 
                if len(remainder) > 0:
                        wiki = WikiDump.Wiki(config, remainder[0])
+                       if cutoff:
+                               lastRan = wiki.latestDump()
+                               if lastRan > cutoff:
+                                       wiki = None
                        if locksEnabled:
                                if forceLock and wiki.isLocked():
                                        wiki.unlock()
@@ -4207,7 +4224,13 @@
                                        wiki.lock()
 
                else:
-                       wiki = findAndLockNextWiki(config, locksEnabled)
+                       wiki = findAndLockNextWiki(config, locksEnabled, cutoff)
+
+               if cutoff:
+                       if wiki:
+                               print wiki.dbName
+                       WikiDump.cleanup()
+                       sys.exit(0)
 
                if wiki:
                        # process any per-project configuration options

-- 
To view, visit https://gerrit.wikimedia.org/r/62800
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I405aae9f4c3894eb337d59d487b87e111f37306c
Gerrit-PatchSet: 1
Gerrit-Project: operations/dumps
Gerrit-Branch: ariel
Gerrit-Owner: ArielGlenn <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to