ArielGlenn has uploaded a new change for review.
https://gerrit.wikimedia.org/r/215662
Change subject: dumps: add onepass to worker script and wrapper, fix cutoff
option
......................................................................
dumps: add onepass to worker script and wrapper, fix cutoff option
onepass lets us run one or more jobs across all wikis stopping
after one complete run
fix cutoff option to run one or more jobs across all wikis stopping
when all job runs are as recent as the cutoff date; it used to
pass back a wiki if there were some to run, now it behaves in a
similar fashion to onepass, exiting worker script with special exit
code if there are no wikis left to run
Change-Id: I391f71e9b12b4c490a4f4e19ba704fc685b9c69b
---
M xmldumps-backup/WikiDump.py
M xmldumps-backup/worker
M xmldumps-backup/worker.py
3 files changed, 45 insertions(+), 37 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/operations/dumps
refs/changes/62/215662/1
diff --git a/xmldumps-backup/WikiDump.py b/xmldumps-backup/WikiDump.py
index 1cc4b9c..84c2572 100644
--- a/xmldumps-backup/WikiDump.py
+++ b/xmldumps-backup/WikiDump.py
@@ -622,7 +622,18 @@
else:
return dirs[index]
else:
- return None
+ return None
+
+ def dateTouchedLatestDump(self):
+ mtime = 0
+ last = self.latestDump()
+ if last:
+ dumpStatus = os.path.join(self.publicDir(), last,
"status.html")
+ try:
+ mtime = os.stat(dumpStatus).st_mtime
+ except:
+ pass
+ return time.strftime("%Y%m%d", time.gmtime(mtime))
def dumpDirs(self):
"""List all dump directories for the given database."""
diff --git a/xmldumps-backup/worker b/xmldumps-backup/worker
index 882e0e1..3731b54 100755
--- a/xmldumps-backup/worker
+++ b/xmldumps-backup/worker
@@ -8,7 +8,7 @@
echo "--log write log of (almost) everything written to stderr
(default: no logging)"
echo "--maxfails if more than this many dumps fail in a row, exit
(default: 3)"
echo "--basedir scripts and files are relative to this dir (default:
location of this script)"
- echo "--cutoff dump wikis until all have a dump produced more recent
than the specified cutoff,"
+ echo "--cutoff dump wikis until all have a dump produced as recent
as the specified cutoff,"
echo " then exit. format: yyyymmdd OR 'today'"
echo "--date (re)do dump runs of specified date (yyyymmdd) OR
'last'"
echo "--skipdone skip any dump jobs that ran successfully (this makes
sense only for reruns)"
@@ -17,6 +17,7 @@
echo " default is true for all runs except those where --job
is specified"
echo "--wiki name of specific wiki db to dump; otherwise all wikis
in list referenced by"
echo " config file will be dumped"
+ echo "--onepass if there are no wikis to dump (--skipdone option)
then exit"
echo
echo "If the file maintenance.txt is present, no more jobs will be run,
and"
echo "this script will check the status again in 5 minutes."
@@ -43,6 +44,8 @@
JOB=""
# default for one job: no locking
EXCLUSIVE=""
+ # if no wikis are left to run, exit
+ ONEPASS=""
}
process_opts() {
@@ -71,6 +74,9 @@
elif [ $1 == "--job" ]; then
JOB="$2"
shift; shift
+ elif [ $1 == "--onepass" ]; then
+ ONEPASS=true
+ shift
elif [ $1 == "--skipdone" ]; then
SKIPDONE=true
shift
@@ -120,7 +126,7 @@
if [ ! -z "$CUTOFF" ]; then
if [ "$CUTOFF" == "today" ]; then
# convert this to yyyymmdd, UTC always
- CUTOFF=`date -u +"%Y%m%d`
+ CUTOFF=`date -u +"%Y%m%d"`
else
# sanity check of arg
result=`date -d "$CUTOFF"`
@@ -129,18 +135,25 @@
exit 1
fi
fi
- cutoffargs=( "${pythonargs[@]}" "--cutoff" "$CUTOFF" )
+ pythonargs=( "${pythonargs[@]}" "--cutoff" "$CUTOFF" )
fi
if [ ! -z "$WIKI" ]; then
pythonargs=( "${pythonargs[@]}" "$WIKI" )
- cutoffargs=( "${cutoffargs[@]}" "$WIKI" )
fi
}
dump_wiki() {
echo python ${pythonargs[@]}
python ${pythonargs[@]}
- if [ $? -ne 0 ]; then
+ result=$?
+ if [ $result -eq 255 ]; then
+ if [ ! -z "$ONEPASS" -o ! -z "$CUTOFF" ]; then
+ exit 0
+ # this isn't a failure but rather 'no wikis available to run'
+ result=0
+ fi
+ fi
+ if [ $result -ne 0 ]; then
failures=$(($failures+1))
if [ $failures -gt $MAXFAILS ]; then
echo "more than $MAXFAILS failures in a row, halting."
@@ -170,20 +183,6 @@
elif [ -e "$WIKIDUMP_BASE/exit.txt" ]; then
echo "exit requested, remove 'exit.txt' to continue normal operations."
exit 0
- elif [ ! -z "$CUTOFF" ]; then
- # see if there are any wikis left with dumps that are not more recent
than CUTOFF
- echo ${cutoffargs[@]}
- result=`python ${cutoffargs[@]}`
- if [ -z "$result" ]; then
- # nope, so we are done
- echo "All wikis completed after cutoff $CUTOFF"
- exit 0
- else
- # there's (at least) one wiki left to do... if some other worker
- # gets it and we do an 'extra' one it's not a disaster, so don't
worry about
- # potential race
- dump_wiki
- fi
else
dump_wiki
fi
diff --git a/xmldumps-backup/worker.py b/xmldumps-backup/worker.py
index 7c9b3a3..724910b 100644
--- a/xmldumps-backup/worker.py
+++ b/xmldumps-backup/worker.py
@@ -4128,8 +4128,10 @@
for db in next:
wiki = WikiDump.Wiki(config, db)
if (cutoff):
- lastRan = wiki.latestDump()
- if lastRan > cutoff:
+# lastRan = wiki.latestDump()
+# if lastRan >= cutoff:
+ lastUpdated = wiki.dateTouchedLatestDump()
+ if lastUpdated >= cutoff:
return None
if check_job_status:
if checkJobDone(wiki, date, job, pageIDRange,
chunkToDo, checkpointFile):
@@ -4191,7 +4193,7 @@
sys.stderr.write( "--log: Log progress messages and other
output to logfile in addition to\n" )
sys.stderr.write( " the usual console output\n" )
sys.stderr.write( "--cutoff: Given a cutoff date in yyyymmdd
format, display the next wiki for which\n" )
- sys.stderr.write( " dumps should be run, if its last dump
was not after the cutoff date,\n" )
+ sys.stderr.write( " dumps should be run, if its last dump
was older than the cutoff date,\n" )
sys.stderr.write( " and exit, or if there are no such
wikis, just exit\n" )
sys.stderr.write( "--verbose: Print lots of stuff (includes
printing full backtraces for any exception)\n" )
sys.stderr.write( " This is used primarily for
debugging\n" )
@@ -4216,7 +4218,7 @@
checkpointFile = None
pageIDRange = None
cutoff = None
- result = False
+ exitcode = 1
skipdone = False
doLocking = False
verbose = False
@@ -4295,7 +4297,7 @@
else:
config = WikiDump.Config()
- if dryrun or chunkToDo or (jobRequested and not restart and
not doLocking) or cutoff:
+ if dryrun or chunkToDo or (jobRequested and not restart and
not doLocking):
locksEnabled = False
else:
locksEnabled = True
@@ -4308,8 +4310,10 @@
if len(remainder) > 0:
wiki = WikiDump.Wiki(config, remainder[0])
if cutoff:
+ # fixme if we asked for a specific job then
check that job only
+ # not the dir
lastRan = wiki.latestDump()
- if lastRan > cutoff:
+ if lastRan >= cutoff:
wiki = None
if wiki is not None and locksEnabled:
if forceLock and wiki.isLocked():
@@ -4330,11 +4334,6 @@
else:
check_job_status = False
wiki = findAndLockNextWiki(config, locksEnabled,
cutoff, check_status_time, check_job_status, date, jobRequested, pageIDRange,
chunkToDo, checkpointFile)
- if cutoff:
- if wiki:
- print wiki.dbName
- WikiDump.cleanup()
- sys.exit(0)
if wiki:
# process any per-project configuration options
@@ -4372,16 +4371,15 @@
sys.stderr.write("Running %s, job %s...\n" %
(wiki.dbName, jobRequested))
else:
sys.stderr.write("Running %s...\n" %
wiki.dbName)
- result = runner.run()
+ result = runner.run()
+ if result is not None and result:
+ exitcode = 0
# if we are doing one piece only of the dump, we don't
unlock either
if locksEnabled:
wiki.unlock()
else:
sys.stderr.write("No wikis available to run.\n")
- result = True
+ exitcode = 255
finally:
WikiDump.cleanup()
- if result == False:
- sys.exit(1)
- else:
- sys.exit(0)
+ sys.exit(exitcode)
--
To view, visit https://gerrit.wikimedia.org/r/215662
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I391f71e9b12b4c490a4f4e19ba704fc685b9c69b
Gerrit-PatchSet: 1
Gerrit-Project: operations/dumps
Gerrit-Branch: ariel
Gerrit-Owner: ArielGlenn <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits