Changeset: fc516173a646 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=fc516173a646 Modified Files: tools/merovingian/ChangeLog.Jul2012 tools/merovingian/daemon/forkmserver.c Branch: Jul2012 Log Message:
forkmserver: fix possible race condition when starting mservers As reported in bug #3107, when multiple clients would trigger monetdbd to auto-start an mserver at the same time, monetdbd could wrongly start forking mserver5's for the same database, obviously with failing behaviour as a result. This is now resolved by using a global lock for forkmserver when it figures out it really needs to fork an mserver5 process. Multiple concurrent clients would in this case block, and have to re-evaluate whether they still need to start an mserver5 or not in serial order. Upside of this fix, is that successive connects to a database should be slightly faster now, as a shortcut is taken when the database already appears to be running, avoiding expensive calls which results aren't used. diffs (160 lines): diff --git a/tools/merovingian/ChangeLog.Jul2012 b/tools/merovingian/ChangeLog.Jul2012 --- a/tools/merovingian/ChangeLog.Jul2012 +++ b/tools/merovingian/ChangeLog.Jul2012 @@ -1,3 +1,7 @@ # ChangeLog file for sql/src/backends/monet5/merovingian # This file is updated with mchangelog +* Wed Jul 18 2012 Fabian Groffen <[email protected]> +- Resolved a problem where automatic starting of a database initiated by + multiple clients at the same time could cause failed starts. Bug #3107 + diff --git a/tools/merovingian/daemon/forkmserver.c b/tools/merovingian/daemon/forkmserver.c --- a/tools/merovingian/daemon/forkmserver.c +++ b/tools/merovingian/daemon/forkmserver.c @@ -40,6 +40,8 @@ #include "forkmserver.h" +static pthread_mutex_t fork_lock = PTHREAD_MUTEX_INITIALIZER; + /** * Fork an mserver and detach. Before forking off, Sabaoth is consulted * to see if forking makes sense, or whether it is necessary at all, or @@ -86,6 +88,29 @@ forkMserver(char *database, sabdb** stat * more than one entry in the list, so we assume we have the right * one here. */ + if ((*stats)->state == SABdbRunning) + /* return before doing expensive stuff, when this db just seems + * to be running */ + return(NO_ERR); + + /* Make sure we only start one mserver5 at the same time, this is a + * horsedrug for preventing race-conditions where two or more + * clients start the same database at the same time, because they + * were all identified as being SABdbInactive. If this "global" + * lock ever becomes a problem, we can reduce it to a per-database + * lock instead. */ + pthread_mutex_lock(&fork_lock); + + /* refetch the status, as it may have changed */ + msab_freeStatus(stats); + er = msab_getStatus(stats, database); + if (er != NULL) { + err e = newErr("%s", er); + free(er); + pthread_mutex_unlock(&fork_lock); + return(e); + } + ckv = getDefaultProps(); readProps(ckv, (*stats)->path); kv = findConfKey(ckv, "type"); @@ -98,6 +123,7 @@ forkMserver(char *database, sabdb** stat kv->val, database); freeConfFile(ckv); free(ckv); + pthread_mutex_unlock(&fork_lock); return(NO_ERR); } else { Mfprintf(stdout, "startup of %s under maintenance " @@ -113,6 +139,7 @@ forkMserver(char *database, sabdb** stat msab_freeStatus(stats); freeConfFile(ckv); free(ckv); + pthread_mutex_unlock(&fork_lock); return(e); } @@ -120,6 +147,7 @@ forkMserver(char *database, sabdb** stat case SABdbRunning: freeConfFile(ckv); free(ckv); + pthread_mutex_unlock(&fork_lock); return(NO_ERR); case SABdbCrashed: t = localtime(&info.lastcrash); @@ -152,6 +180,7 @@ forkMserver(char *database, sabdb** stat msab_freeStatus(stats); freeConfFile(ckv); free(ckv); + pthread_mutex_unlock(&fork_lock); return(newErr("unknown state: %d", (int)(*stats)->state)); } @@ -161,6 +190,7 @@ forkMserver(char *database, sabdb** stat msab_freeStatus(stats); freeConfFile(ckv); free(ckv); + pthread_mutex_unlock(&fork_lock); return(newErr("unable to create pipe: %s", strerror(errno))); } if (pipe(pfde) == -1) { @@ -169,6 +199,7 @@ forkMserver(char *database, sabdb** stat msab_freeStatus(stats); freeConfFile(ckv); free(ckv); + pthread_mutex_unlock(&fork_lock); return(newErr("unable to create pipe: %s", strerror(errno))); } @@ -198,6 +229,7 @@ forkMserver(char *database, sabdb** stat getErrMsg(er)); freeConfFile(ckv); free(ckv); + pthread_mutex_unlock(&fork_lock); return(er); } freeConfFile(ckv); @@ -211,8 +243,10 @@ forkMserver(char *database, sabdb** stat * it's not really a problem we exit here */ err e = newErr("%s", er); free(er); + pthread_mutex_unlock(&fork_lock); return(e); } + pthread_mutex_unlock(&fork_lock); return(NO_ERR); } @@ -223,6 +257,7 @@ forkMserver(char *database, sabdb** stat msab_freeStatus(stats); freeConfFile(ckv); free(ckv); + pthread_mutex_unlock(&fork_lock); return(newErr("cannot start database '%s': no .vaultkey found " "(did you create the database with `monetdb create %s`?)", database, database)); @@ -411,6 +446,7 @@ forkMserver(char *database, sabdb** stat * it's not really a problem we exit here */ err e = newErr("%s", er); free(er); + pthread_mutex_unlock(&fork_lock); return(e); } if ((*stats)->state == SABdbRunning && @@ -458,6 +494,7 @@ forkMserver(char *database, sabdb** stat * starting */ if (dp == NULL) { pthread_mutex_unlock(&_mero_topdp_lock); + pthread_mutex_unlock(&fork_lock); switch (state) { case SABdbRunning: /* right, it's not there, but it's running */ @@ -488,6 +525,7 @@ forkMserver(char *database, sabdb** stat * we don't want */ terminateProcess(dp); pthread_mutex_unlock(&_mero_topdp_lock); + pthread_mutex_unlock(&fork_lock); switch (state) { case SABdbRunning: @@ -530,6 +568,7 @@ forkMserver(char *database, sabdb** stat "mode during startup\n", database); } + pthread_mutex_unlock(&fork_lock); return(NO_ERR); } /* forking failed somehow, cleanup the pipes */ _______________________________________________ Checkin-list mailing list [email protected] http://mail.monetdb.org/mailman/listinfo/checkin-list
