Update of /cvsroot/monetdb/sql/src/backends/monet5
In directory sc8-pr-cvs16.sourceforge.net:/tmp/cvs-serv32490/src/backends/monet5
Modified Files:
merovingian.mx
Log Message:
propagated changes of Wednesday Apr 16 2008 - Thursday Apr 17 2008
from the SQL_2-22 branch to the development trunk
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2008/04/16 - mr-meltdown: src/backends/monet5/merovingian.mx,1.22.2.14
Fix a bunch of problems that Arjen identified today while trying to
figure out why mserver seems to immediately stop when started from
merovingian (that actual problem is not yet solved).
- fix off-by-one error, we never ran the code meant for when starting
the server failed
- don't leak memory for the error message when forking an mserver failed
from the control channel
- don't crash due to an "application double free"; the sighandler
happens to remove dpairs when their associated process dies, however,
if you want to terminate that process afterwards then, you crash on
garbage memory
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2008/04/17 - mr-meltdown: src/backends/monet5/merovingian.mx,1.22.2.15
Niels changed/fixed the daemon argument, hence merovingian wasn't starting its
mservers in daemon mode. This fixes bug #1944717, and also explains it a bit,
since merovingian closes stdin of its child (and hence frees up filedescriptor
0).
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Index: merovingian.mx
===================================================================
RCS file: /cvsroot/monetdb/sql/src/backends/monet5/merovingian.mx,v
retrieving revision 1.32
retrieving revision 1.33
diff -u -d -r1.32 -r1.33
--- merovingian.mx 27 Mar 2008 16:12:37 -0000 1.32
+++ merovingian.mx 17 Apr 2008 09:26:08 -0000 1.33
@@ -118,6 +118,7 @@
pid_t pid; /* this process' id */
char* dbname; /* the database that this server serves */
pthread_t tid; /* thread id used when terminating this server */
+ char started; /* is it started? (and can be terminated) */
struct _dpair* next;
}* dpair;
static dpair topdp = NULL;
@@ -490,7 +491,7 @@
argv[c++] = conffile;
argv[c++] = dbname;
argv[c++] = "--dbinit=include sql;"; /* yep, no quotes needed!
*/
- argv[c++] = "--set"; argv[c++] = "daemon=yes";
+ argv[c++] = "--set"; argv[c++] = "monet_daemon=yes";
argv[c++] = "--set"; argv[c++] = "mapi_open=true";
argv[c++] = "--set"; argv[c++] = "mapi_port=0"; /* force
autosensing! */
argv[c++] = "--set"; argv[c++] = vaultkey;
@@ -518,9 +519,12 @@
dp->next = NULL;
dp->pid = pid;
dp->dbname = GDKstrdup(database);
+ dp->started = 0;
/* wait for the child to open up a communication channel */
for (i = 0; i < 20; i++) { /* wait up to 10 seconds */
+ /* give the database a break */
+ MT_sleep_ms(500);
/* stats cannot be NULL, as we don't allow starting not
* existing databases */
SABAOTHfreeStatus(stats);
@@ -539,19 +543,28 @@
{
break;
}
- MT_sleep_ms(500);
}
- if (i > 20) {
- /* we've never found a connection */
+ /* if we've never found a connection, try to figure out why */
+ if (i >= 20) {
switch ((*stats)->state) {
case SABdbRunning:
/* not sure what's going on with it,
but its not
* like we want to have it, so kill it
*/
- terminateProcess(&dp);
+ terminateProcess(dp);
+ /* allow the logger to catch up */
+ MT_sleep_ms(100);
+ /* remove from the list again */
+ GDKfree(dp->dbname);
+ GDKfree(dp);
+ dp->next = NULL;
return(newErr(
"timeout when
waiting for database '%s' to "
"open up a
communication channel", database));
case SABdbCrashed:
+ /* remove from the list again */
+ GDKfree(dp->dbname);
+ GDKfree(dp);
+ dp->next = NULL;
return(newErr(
"database '%s'
has crashed after starting, "
"manual
intervention needed", database));
@@ -562,12 +575,22 @@
* in this case GDK may still be trying
to start up,
* or that it indeed cleanly shut
itself down after
* starting... kill it in any case. */
- terminateProcess(&dp);
+ terminateProcess(dp);
+ /* allow the logger to catch up */
+ MT_sleep_ms(100);
+ /* remove from the list again */
+ GDKfree(dp->dbname);
+ GDKfree(dp);
+ dp->next = NULL;
return(newErr(
"database '%s'
either needs a longer timeout "
"to start up,
or appears to cleanly shut "
"itself down
after starting", database));
default:
+ /* remove from the list again */
+ GDKfree(dp->dbname);
+ GDKfree(dp);
+ dp->next = NULL;
return(newErr("unknown state: %d",
(int)(*stats)->state));
}
}
@@ -576,6 +599,9 @@
"mode during startup", database);
}
+ /* from now on the SIGCHLD handler can find this database */
+ dp->started = 1;
+
return(NO_ERR);
}
/* forking failed somehow, cleanup the pipes */
@@ -1098,8 +1124,13 @@
} else {
*p++ = '\0';
if (strcmp(p, "start") == 0) {
+ err e;
merlog("starting database %s due to control
signal", q);
- forkMserver(q, &stats, 1);
+ if ((e = forkMserver(q, &stats, 1)) != NO_ERR) {
+ fprintf(stderr, "failed to fork
mserver: %s\n",
+ getErrMsg(e));
+ freeErr(e);
+ }
if (stats != NULL)
SABAOTHfreeStatus(&stats);
} else if (strcmp(p, "stop") == 0 ||
@@ -1194,7 +1225,7 @@
if (p->pid == si->si_pid) {
/* wait a little (we're not in a hurry anyway, are we?)
such
* that the logger catches the messages (if any). */
- MT_sleep_ms(40);
+ MT_sleep_ms(100);
/* remove it, disconnect first, then free (because the
* logger might access it otherwise after the free) */
q->next = p->next;
@@ -1208,6 +1239,9 @@
merlog("database '%s' (%d) has crashed (dumped
core)",
p->dbname, p->pid);
}
+ /* if we're starting up, don't clean up */
+ if (p->started == 0)
+ return;
if (p->dbname)
GDKfree(p->dbname);
GDKfree(p);
-------------------------------------------------------------------------
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference
Don't miss this year's exciting event. There's still time to save $100.
Use priority code J8TL2D2.
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone
_______________________________________________
Monetdb-sql-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-sql-checkins