ArielGlenn has submitted this change and it was merged.

Change subject: make the 'force' (lock stealing) option for dumps useful
......................................................................


make the 'force' (lock stealing) option for dumps useful

* for regular runs, only remove the lockfile if we created it
* allow lock stealing for single dump jobs
* add the force option to the bash wrapper script

Since we don't lock per job, at least allow someone to manually kick
off a job for a wiki if the other workers are grabbing the lock and
not doing anything with it (e.g. waiting for some earlier job to
be run).

Change-Id: I79c457a13832a8eb6ddb744dd84c08a8239bd233
---
M xmldumps-backup/dumps/WikiDump.py
M xmldumps-backup/worker
M xmldumps-backup/worker.py
3 files changed, 56 insertions(+), 13 deletions(-)

Approvals:
  ArielGlenn: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/xmldumps-backup/dumps/WikiDump.py 
b/xmldumps-backup/dumps/WikiDump.py
index 293c497..1fff656 100644
--- a/xmldumps-backup/dumps/WikiDump.py
+++ b/xmldumps-backup/dumps/WikiDump.py
@@ -606,18 +606,49 @@
         self.watchdog.start()
         return True
 
-    def unlock(self, lockfiles):
+    def check_owner(self, lockfile, pid):
         '''
-        Note:
+        check if the specified pid created the lockfile
+        (it would be recorded in the lockfile)
+        '''
+        if pid is None:
+            return True
+
+        try:
+            with open(lockfile, "r") as fdesc:
+                lines = fdesc.read().splitlines()
+                # if there's more than one line it's garbage or wrong file,
+                # don't touch
+                if len(lines) == 1:
+                    lockpid = lines[0].split(" ", 1)[1]
+                    if pid == lockpid:
+                        return True
+        except:
+            # don't care what the error is, file is off limits for us
+            pass
+        return False
+
+    def unlock(self, lockfiles, owner=False):
+        '''
+        remove all specified lockfiles.
+        if 'owner' is True, check contents of each lockfile
+        and only remove it if this process is the owner
+        (its pid is recorded in lockfile)
+
         if more than one lockfile is to be removed, they had better be
         'stale' (no longer being updated by a watchdog) or this will fail
         '''
         if self.watchdog is not None:
             self.watchdog.stop_watching()
             self.watchdog = None
+        if owner:
+            pid = str(os.getpid())
+        else:
+            pid = None
         for lockfile in lockfiles:
             try:
-                os.remove(lockfile)
+                if self.check_owner(lockfile, pid):
+                    os.remove(lockfile)
             except:
                 # someone else removed it?
                 pass
diff --git a/xmldumps-backup/worker b/xmldumps-backup/worker
index 12c8c92..7017b5d 100755
--- a/xmldumps-backup/worker
+++ b/xmldumps-backup/worker
@@ -22,6 +22,8 @@
     echo "--onepass      if there are no wikis to dump (--skipdone option) 
then exit"
     echo "--sleep        time to sleep between jobs, default 60 seconds"
     echo "--prereqs      do prereqs missing for a job rather than failing out"
+    echo "--force        steal the lock for the specified wiki if necessary -- 
dangerous!"
+    echo "               requires the --wiki argument"
     echo
     echo "If the file maintenance.txt is present, no more jobs will be run, 
and"
     echo "this script will check the status again in 5 minutes."
@@ -56,6 +58,8 @@
     SLEEP=60
     # default: don't do missing prereqs for a job
     PREREQS=""
+    # default: don't steal locks
+    FORCE=""
 }
 
 process_opts() {
@@ -101,6 +105,8 @@
            shift
        elif [ $1 == "--prereqs" ]; then
            PREREQS=true
+       elif [ $1 == "--force" ]; then
+           FORCE=true
            shift
        else
            usage
@@ -132,6 +138,13 @@
     if [ ! -z "$PREREQS" ]; then
        pythonargs=( "${pythonargs[@]}" "--prereqs" )
     fi
+    if [ ! -z "$FORCE" ]; then
+        if [ -z "$WIKI" ]; then
+           echo "--force requires --wiki"
+           exit 1
+       fi
+       pythonargs=( "${pythonargs[@]}" "--force" )
+    fi
     if [ ! -z "$DATE" ]; then
        if [ "$DATE" == "today" ]; then
            # convert this to yyyymmdd, UTC always
diff --git a/xmldumps-backup/worker.py b/xmldumps-backup/worker.py
index 176e9f7..54929e7 100644
--- a/xmldumps-backup/worker.py
+++ b/xmldumps-backup/worker.py
@@ -178,10 +178,8 @@
                give the option --job help
 --dryrun:      Don't really run the job, just print what would be done (must 
be used
                with a specified wikidbname on which to run
---force:       remove a lock file for the specified wiki (dangerous, if there 
is
-               another process running, useful if you want to start a second 
later
-               run while the first dump from a previous date is still going)
-               This option cannot be specified with --job.
+--force:       steal the lock for the specified wiki; dangerous, if there is
+               another process doing a dump run for that wiki and that date.
 --exclusive    Even if rerunning just one job of a wiki, get a lock to make 
sure no other
                runners try to work on that wiki. Default: for single jobs, 
don't lock
 --noprefetch:  Do not use a previous file's contents for speeding up the dumps
@@ -309,8 +307,6 @@
 
         if dryrun and (len(remainder) == 0):
             usage("--dryrun requires the name of a wikidb to be specified")
-        if jobs_requested and force_lock:
-            usage("--force cannot be used with --job option")
         if restart and not jobs_requested:
             usage("--restartfrom requires --job and the job from which to 
restart")
         if restart and len(jobs_todo) > 1:
@@ -367,7 +363,10 @@
             sys.exit(1)
 
         if (dryrun or partnum_todo is not None or
-                (jobs_requested is not None and not restart and not 
do_locking)):
+                (jobs_requested is not None and
+                 not restart and
+                 not do_locking and
+                 not force_lock)):
             locks_enabled = False
         else:
             locks_enabled = True
@@ -387,9 +386,9 @@
                     wiki = None
             if wiki is not None and locks_enabled:
                 locker = Locker(wiki, date)
-                if force_lock:
+                if force_lock and locks_enabled:
                     lockfiles = locker.is_locked()
-                    locker.unlock(lockfiles)
+                    locker.unlock(lockfiles, owner=False)
                 if locks_enabled:
                     locker.lock()
 
@@ -497,7 +496,7 @@
             if locks_enabled:
                 locker = Locker(wiki, date)
                 lockfiles = locker.is_locked()
-                locker.unlock(lockfiles)
+                locker.unlock(lockfiles, owner=True)
         elif wiki is not None:
             sys.stderr.write("Wikis available to run but prereqs not 
complete.\n")
             exitcode = 0

-- 
To view, visit https://gerrit.wikimedia.org/r/304013
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I79c457a13832a8eb6ddb744dd84c08a8239bd233
Gerrit-PatchSet: 5
Gerrit-Project: operations/dumps
Gerrit-Branch: master
Gerrit-Owner: ArielGlenn <ar...@wikimedia.org>
Gerrit-Reviewer: ArielGlenn <ar...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to