ArielGlenn has submitted this change and it was merged. Change subject: make the 'force' (lock stealing) option for dumps useful ......................................................................
make the 'force' (lock stealing) option for dumps useful * for regular runs, only remove the lockfile if we created it * allow lock stealing for single dump jobs * add the force option to the bash wrapper script Since we don't lock per job, at least allow someone to manually kick off a job for a wiki if the other workers are grabbing the lock and not doing anything with it (e.g. waiting for some earlier job to be run). Change-Id: I79c457a13832a8eb6ddb744dd84c08a8239bd233 --- M xmldumps-backup/dumps/WikiDump.py M xmldumps-backup/worker M xmldumps-backup/worker.py 3 files changed, 56 insertions(+), 13 deletions(-) Approvals: ArielGlenn: Looks good to me, approved jenkins-bot: Verified diff --git a/xmldumps-backup/dumps/WikiDump.py b/xmldumps-backup/dumps/WikiDump.py index 293c497..1fff656 100644 --- a/xmldumps-backup/dumps/WikiDump.py +++ b/xmldumps-backup/dumps/WikiDump.py @@ -606,18 +606,49 @@ self.watchdog.start() return True - def unlock(self, lockfiles): + def check_owner(self, lockfile, pid): ''' - Note: + check if the specified pid created the lockfile + (it would be recorded in the lockfile) + ''' + if pid is None: + return True + + try: + with open(lockfile, "r") as fdesc: + lines = fdesc.read().splitlines() + # if there's more than one line it's garbage or wrong file, + # don't touch + if len(lines) == 1: + lockpid = lines[0].split(" ", 1)[1] + if pid == lockpid: + return True + except: + # don't care what the error is, file is off limits for us + pass + return False + + def unlock(self, lockfiles, owner=False): + ''' + remove all specified lockfiles. + if 'owner' is True, check contents of each lockfile + and only remove it if this process is the owner + (its pid is recorded in lockfile) + if more than one lockfile is to be removed, they had better be 'stale' (no longer being updated by a watchdog) or this will fail ''' if self.watchdog is not None: self.watchdog.stop_watching() self.watchdog = None + if owner: + pid = str(os.getpid()) + else: + pid = None for lockfile in lockfiles: try: - os.remove(lockfile) + if self.check_owner(lockfile, pid): + os.remove(lockfile) except: # someone else removed it? pass diff --git a/xmldumps-backup/worker b/xmldumps-backup/worker index 12c8c92..7017b5d 100755 --- a/xmldumps-backup/worker +++ b/xmldumps-backup/worker @@ -22,6 +22,8 @@ echo "--onepass if there are no wikis to dump (--skipdone option) then exit" echo "--sleep time to sleep between jobs, default 60 seconds" echo "--prereqs do prereqs missing for a job rather than failing out" + echo "--force steal the lock for the specified wiki if necessary -- dangerous!" + echo " requires the --wiki argument" echo echo "If the file maintenance.txt is present, no more jobs will be run, and" echo "this script will check the status again in 5 minutes." @@ -56,6 +58,8 @@ SLEEP=60 # default: don't do missing prereqs for a job PREREQS="" + # default: don't steal locks + FORCE="" } process_opts() { @@ -101,6 +105,8 @@ shift elif [ $1 == "--prereqs" ]; then PREREQS=true + elif [ $1 == "--force" ]; then + FORCE=true shift else usage @@ -132,6 +138,13 @@ if [ ! -z "$PREREQS" ]; then pythonargs=( "${pythonargs[@]}" "--prereqs" ) fi + if [ ! -z "$FORCE" ]; then + if [ -z "$WIKI" ]; then + echo "--force requires --wiki" + exit 1 + fi + pythonargs=( "${pythonargs[@]}" "--force" ) + fi if [ ! -z "$DATE" ]; then if [ "$DATE" == "today" ]; then # convert this to yyyymmdd, UTC always diff --git a/xmldumps-backup/worker.py b/xmldumps-backup/worker.py index 176e9f7..54929e7 100644 --- a/xmldumps-backup/worker.py +++ b/xmldumps-backup/worker.py @@ -178,10 +178,8 @@ give the option --job help --dryrun: Don't really run the job, just print what would be done (must be used with a specified wikidbname on which to run ---force: remove a lock file for the specified wiki (dangerous, if there is - another process running, useful if you want to start a second later - run while the first dump from a previous date is still going) - This option cannot be specified with --job. +--force: steal the lock for the specified wiki; dangerous, if there is + another process doing a dump run for that wiki and that date. --exclusive Even if rerunning just one job of a wiki, get a lock to make sure no other runners try to work on that wiki. Default: for single jobs, don't lock --noprefetch: Do not use a previous file's contents for speeding up the dumps @@ -309,8 +307,6 @@ if dryrun and (len(remainder) == 0): usage("--dryrun requires the name of a wikidb to be specified") - if jobs_requested and force_lock: - usage("--force cannot be used with --job option") if restart and not jobs_requested: usage("--restartfrom requires --job and the job from which to restart") if restart and len(jobs_todo) > 1: @@ -367,7 +363,10 @@ sys.exit(1) if (dryrun or partnum_todo is not None or - (jobs_requested is not None and not restart and not do_locking)): + (jobs_requested is not None and + not restart and + not do_locking and + not force_lock)): locks_enabled = False else: locks_enabled = True @@ -387,9 +386,9 @@ wiki = None if wiki is not None and locks_enabled: locker = Locker(wiki, date) - if force_lock: + if force_lock and locks_enabled: lockfiles = locker.is_locked() - locker.unlock(lockfiles) + locker.unlock(lockfiles, owner=False) if locks_enabled: locker.lock() @@ -497,7 +496,7 @@ if locks_enabled: locker = Locker(wiki, date) lockfiles = locker.is_locked() - locker.unlock(lockfiles) + locker.unlock(lockfiles, owner=True) elif wiki is not None: sys.stderr.write("Wikis available to run but prereqs not complete.\n") exitcode = 0 -- To view, visit https://gerrit.wikimedia.org/r/304013 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I79c457a13832a8eb6ddb744dd84c08a8239bd233 Gerrit-PatchSet: 5 Gerrit-Project: operations/dumps Gerrit-Branch: master Gerrit-Owner: ArielGlenn <ar...@wikimedia.org> Gerrit-Reviewer: ArielGlenn <ar...@wikimedia.org> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits