Update of /cvsroot/monetdb/sql/src/backends/monet5
In directory sc8-pr-cvs16.sourceforge.net:/tmp/cvs-serv13315
Modified Files:
Tag: SQL_2-22
merovingian.mx
Log Message:
Fix a bunch of problems that Arjen identified today while trying to
figure out why mserver seems to immediately stop when started from
merovingian (that actual problem is not yet solved).
- fix off-by-one error, we never ran the code meant for when starting
the server failed
- don't leak memory for the error message when forking an mserver failed
from the control channel
- don't crash due to an "application double free"; the sighandler
happens to remove dpairs when their associated process dies, however,
if you want to terminate that process afterwards then, you crash on
garbage memory
U merovingian.mx
Index: merovingian.mx
===================================================================
RCS file: /cvsroot/monetdb/sql/src/backends/monet5/merovingian.mx,v
retrieving revision 1.22.2.13
retrieving revision 1.22.2.14
diff -u -d -r1.22.2.13 -r1.22.2.14
--- merovingian.mx 22 Feb 2008 17:17:00 -0000 1.22.2.13
+++ merovingian.mx 16 Apr 2008 19:07:33 -0000 1.22.2.14
@@ -117,6 +117,7 @@
pid_t pid; /* this process' id */
char* dbname; /* the database that this server serves */
pthread_t tid; /* thread id used when terminating this server */
+ char started; /* is it started? (and can be terminated) */
struct _dpair* next;
}* dpair;
static dpair topdp = NULL;
@@ -514,9 +515,12 @@
dp->next = NULL;
dp->pid = pid;
dp->dbname = GDKstrdup(database);
+ dp->started = 0;
/* wait for the child to open up a communication channel */
for (i = 0; i < 20; i++) { /* wait up to 10 seconds */
+ /* give the database a break */
+ MT_sleep_ms(500);
/* stats cannot be NULL, as we don't allow starting not
* existing databases */
SABAOTHfreeStatus(stats);
@@ -535,19 +539,28 @@
{
break;
}
- MT_sleep_ms(500);
}
- if (i > 20) {
- /* we've never found a connection */
+ /* if we've never found a connection, try to figure out why */
+ if (i >= 20) {
switch ((*stats)->state) {
case SABdbRunning:
/* not sure what's going on with it,
but its not
* like we want to have it, so kill it
*/
- terminateProcess(&dp);
+ terminateProcess(dp);
+ /* allow the logger to catch up */
+ MT_sleep_ms(100);
+ /* remove from the list again */
+ GDKfree(dp->dbname);
+ GDKfree(dp);
+ dp->next = NULL;
return(newErr(
"timeout when
waiting for database '%s' to "
"open up a
communication channel", database));
case SABdbCrashed:
+ /* remove from the list again */
+ GDKfree(dp->dbname);
+ GDKfree(dp);
+ dp->next = NULL;
return(newErr(
"database '%s'
has crashed after starting, "
"manual
intervention needed", database));
@@ -558,12 +571,22 @@
* in this case GDK may still be trying
to start up,
* or that it indeed cleanly shut
itself down after
* starting... kill it in any case. */
- terminateProcess(&dp);
+ terminateProcess(dp);
+ /* allow the logger to catch up */
+ MT_sleep_ms(100);
+ /* remove from the list again */
+ GDKfree(dp->dbname);
+ GDKfree(dp);
+ dp->next = NULL;
return(newErr(
"database '%s'
either needs a longer timeout "
"to start up,
or appears to cleanly shut "
"itself down
after starting", database));
default:
+ /* remove from the list again */
+ GDKfree(dp->dbname);
+ GDKfree(dp);
+ dp->next = NULL;
return(newErr("unknown state: %d",
(int)(*stats)->state));
}
}
@@ -572,6 +595,9 @@
"mode during startup", database);
}
+ /* from now on the SIGCHLD handler can find this database */
+ dp->started = 1;
+
return(NO_ERR);
}
/* forking failed somehow, cleanup the pipes */
@@ -925,8 +951,13 @@
} else {
*p++ = '\0';
if (strcmp(p, "start") == 0) {
+ err e;
merlog("starting database %s due to control
signal", q);
- forkMserver(q, &stats, 1);
+ if ((e = forkMserver(q, &stats, 1)) != NO_ERR) {
+ fprintf(stderr, "failed to fork
mserver: %s\n",
+ getErrMsg(e));
+ freeErr(e);
+ }
if (stats != NULL)
SABAOTHfreeStatus(&stats);
} else if (strcmp(p, "stop") == 0 ||
@@ -1021,7 +1052,7 @@
if (p->pid == si->si_pid) {
/* wait a little (we're not in a hurry anyway, are we?)
such
* that the logger catches the messages (if any). */
- MT_sleep_ms(40);
+ MT_sleep_ms(100);
/* remove it, disconnect first, then free (because the
* logger might access it otherwise after the free) */
q->next = p->next;
@@ -1035,6 +1066,9 @@
merlog("database '%s' (%d) has crashed (dumped
core)",
p->dbname, p->pid);
}
+ /* if we're starting up, don't clean up */
+ if (p->started == 0)
+ return;
if (p->dbname)
GDKfree(p->dbname);
GDKfree(p);
-------------------------------------------------------------------------
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference
Don't miss this year's exciting event. There's still time to save $100.
Use priority code J8TL2D2.
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone
_______________________________________________
Monetdb-sql-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-sql-checkins