Changeset: fc516173a646 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=fc516173a646
Modified Files:
        tools/merovingian/ChangeLog.Jul2012
        tools/merovingian/daemon/forkmserver.c
Branch: Jul2012
Log Message:

forkmserver: fix possible race condition when starting mservers

As reported in bug #3107, when multiple clients would trigger monetdbd
to auto-start an mserver at the same time, monetdbd could wrongly start
forking mserver5's for the same database, obviously with failing
behaviour as a result.

This is now resolved by using a global lock for forkmserver when it
figures out it really needs to fork an mserver5 process.  Multiple
concurrent clients would in this case block, and have to re-evaluate
whether they still need to start an mserver5 or not in serial order.

Upside of this fix, is that successive connects to a database should be
slightly faster now, as a shortcut is taken when the database already
appears to be running, avoiding expensive calls which results aren't
used.


diffs (160 lines):

diff --git a/tools/merovingian/ChangeLog.Jul2012 
b/tools/merovingian/ChangeLog.Jul2012
--- a/tools/merovingian/ChangeLog.Jul2012
+++ b/tools/merovingian/ChangeLog.Jul2012
@@ -1,3 +1,7 @@
 # ChangeLog file for sql/src/backends/monet5/merovingian
 # This file is updated with mchangelog
 
+* Wed Jul 18 2012 Fabian Groffen <[email protected]>
+- Resolved a problem where automatic starting of a database initiated by
+  multiple clients at the same time could cause failed starts.  Bug #3107
+
diff --git a/tools/merovingian/daemon/forkmserver.c 
b/tools/merovingian/daemon/forkmserver.c
--- a/tools/merovingian/daemon/forkmserver.c
+++ b/tools/merovingian/daemon/forkmserver.c
@@ -40,6 +40,8 @@
 #include "forkmserver.h"
 
 
+static pthread_mutex_t fork_lock = PTHREAD_MUTEX_INITIALIZER;
+
 /**
  * Fork an mserver and detach.  Before forking off, Sabaoth is consulted
  * to see if forking makes sense, or whether it is necessary at all, or
@@ -86,6 +88,29 @@ forkMserver(char *database, sabdb** stat
         * more than one entry in the list, so we assume we have the right
         * one here. */
 
+       if ((*stats)->state == SABdbRunning)
+               /* return before doing expensive stuff, when this db just seems
+                * to be running */
+               return(NO_ERR);
+
+       /* Make sure we only start one mserver5 at the same time, this is a
+        * horsedrug for preventing race-conditions where two or more
+        * clients start the same database at the same time, because they
+        * were all identified as being SABdbInactive.  If this "global"
+        * lock ever becomes a problem, we can reduce it to a per-database
+        * lock instead. */
+       pthread_mutex_lock(&fork_lock);
+
+       /* refetch the status, as it may have changed */
+       msab_freeStatus(stats);
+       er = msab_getStatus(stats, database);
+       if (er != NULL) {
+               err e = newErr("%s", er);
+               free(er);
+               pthread_mutex_unlock(&fork_lock);
+               return(e);
+       }
+
        ckv = getDefaultProps();
        readProps(ckv, (*stats)->path);
        kv = findConfKey(ckv, "type");
@@ -98,6 +123,7 @@ forkMserver(char *database, sabdb** stat
                                        kv->val, database);
                        freeConfFile(ckv);
                        free(ckv);
+                       pthread_mutex_unlock(&fork_lock);
                        return(NO_ERR);
                } else {
                        Mfprintf(stdout, "startup of %s under maintenance "
@@ -113,6 +139,7 @@ forkMserver(char *database, sabdb** stat
                msab_freeStatus(stats);
                freeConfFile(ckv);
                free(ckv);
+               pthread_mutex_unlock(&fork_lock);
                return(e);
        }
 
@@ -120,6 +147,7 @@ forkMserver(char *database, sabdb** stat
                case SABdbRunning:
                        freeConfFile(ckv);
                        free(ckv);
+                       pthread_mutex_unlock(&fork_lock);
                        return(NO_ERR);
                case SABdbCrashed:
                        t = localtime(&info.lastcrash);
@@ -152,6 +180,7 @@ forkMserver(char *database, sabdb** stat
                        msab_freeStatus(stats);
                        freeConfFile(ckv);
                        free(ckv);
+                       pthread_mutex_unlock(&fork_lock);
                        return(newErr("unknown state: %d", 
(int)(*stats)->state));
        }
 
@@ -161,6 +190,7 @@ forkMserver(char *database, sabdb** stat
                msab_freeStatus(stats);
                freeConfFile(ckv);
                free(ckv);
+               pthread_mutex_unlock(&fork_lock);
                return(newErr("unable to create pipe: %s", strerror(errno)));
        }
        if (pipe(pfde) == -1) {
@@ -169,6 +199,7 @@ forkMserver(char *database, sabdb** stat
                msab_freeStatus(stats);
                freeConfFile(ckv);
                free(ckv);
+               pthread_mutex_unlock(&fork_lock);
                return(newErr("unable to create pipe: %s", strerror(errno)));
        }
 
@@ -198,6 +229,7 @@ forkMserver(char *database, sabdb** stat
                                        getErrMsg(er));
                        freeConfFile(ckv);
                        free(ckv);
+                       pthread_mutex_unlock(&fork_lock);
                        return(er);
                }
                freeConfFile(ckv);
@@ -211,8 +243,10 @@ forkMserver(char *database, sabdb** stat
                         * it's not really a problem we exit here */
                        err e = newErr("%s", er);
                        free(er);
+                       pthread_mutex_unlock(&fork_lock);
                        return(e);
                }
+               pthread_mutex_unlock(&fork_lock);
                return(NO_ERR);
        }
 
@@ -223,6 +257,7 @@ forkMserver(char *database, sabdb** stat
                msab_freeStatus(stats);
                freeConfFile(ckv);
                free(ckv);
+               pthread_mutex_unlock(&fork_lock);
                return(newErr("cannot start database '%s': no .vaultkey found "
                                        "(did you create the database with 
`monetdb create %s`?)",
                                        database, database));
@@ -411,6 +446,7 @@ forkMserver(char *database, sabdb** stat
                                 * it's not really a problem we exit here */
                                err e = newErr("%s", er);
                                free(er);
+                               pthread_mutex_unlock(&fork_lock);
                                return(e);
                        }
                        if ((*stats)->state == SABdbRunning &&
@@ -458,6 +494,7 @@ forkMserver(char *database, sabdb** stat
                         * starting */
                        if (dp == NULL) {
                                pthread_mutex_unlock(&_mero_topdp_lock);
+                               pthread_mutex_unlock(&fork_lock);
                                switch (state) {
                                        case SABdbRunning:
                                                /* right, it's not there, but 
it's running */
@@ -488,6 +525,7 @@ forkMserver(char *database, sabdb** stat
                         * we don't want */
                        terminateProcess(dp);
                        pthread_mutex_unlock(&_mero_topdp_lock);
+                       pthread_mutex_unlock(&fork_lock);
 
                        switch (state) {
                                case SABdbRunning:
@@ -530,6 +568,7 @@ forkMserver(char *database, sabdb** stat
                                        "mode during startup\n", database);
                }
 
+               pthread_mutex_unlock(&fork_lock);
                return(NO_ERR);
        }
        /* forking failed somehow, cleanup the pipes */
_______________________________________________
Checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to