Update of /cvsroot/monetdb/sql/src/backends/monet5
In directory sc8-pr-cvs16.sourceforge.net:/tmp/cvs-serv32490/src/backends/monet5

Modified Files:
        merovingian.mx 
Log Message:
propagated changes of Wednesday Apr 16 2008 - Thursday Apr 17 2008
from the SQL_2-22 branch to the development trunk

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2008/04/16 - mr-meltdown: src/backends/monet5/merovingian.mx,1.22.2.14
Fix a bunch of problems that Arjen identified today while trying to
figure out why mserver seems to immediately stop when started from
merovingian (that actual problem is not yet solved).
- fix off-by-one error, we never ran the code meant for when starting
  the server failed
- don't leak memory for the error message when forking an mserver failed
  from the control channel
- don't crash due to an "application double free"; the sighandler
  happens to remove dpairs when their associated process dies, however,
  if you want to terminate that process afterwards then, you crash on
  garbage memory
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2008/04/17 - mr-meltdown: src/backends/monet5/merovingian.mx,1.22.2.15
Niels changed/fixed the daemon argument, hence merovingian wasn't starting its 
mservers in daemon mode.  This fixes bug #1944717, and also explains it a bit, 
since merovingian closes stdin of its child (and hence frees up filedescriptor 
0).
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


Index: merovingian.mx
===================================================================
RCS file: /cvsroot/monetdb/sql/src/backends/monet5/merovingian.mx,v
retrieving revision 1.32
retrieving revision 1.33
diff -u -d -r1.32 -r1.33
--- merovingian.mx      27 Mar 2008 16:12:37 -0000      1.32
+++ merovingian.mx      17 Apr 2008 09:26:08 -0000      1.33
@@ -118,6 +118,7 @@
        pid_t pid;        /* this process' id */
        char* dbname;     /* the database that this server serves */
        pthread_t tid;    /* thread id used when terminating this server */
+       char started;     /* is it started? (and can be terminated) */
        struct _dpair* next;
 }* dpair;
 static dpair topdp = NULL;
@@ -490,7 +491,7 @@
                argv[c++] = conffile;
                argv[c++] = dbname;
                argv[c++] = "--dbinit=include sql;"; /* yep, no quotes needed! 
*/
-               argv[c++] = "--set"; argv[c++] = "daemon=yes";
+               argv[c++] = "--set"; argv[c++] = "monet_daemon=yes";
                argv[c++] = "--set"; argv[c++] = "mapi_open=true";
                argv[c++] = "--set"; argv[c++] = "mapi_port=0"; /* force 
autosensing! */
                argv[c++] = "--set"; argv[c++] = vaultkey;
@@ -518,9 +519,12 @@
                dp->next = NULL;
                dp->pid = pid;
                dp->dbname = GDKstrdup(database);
+               dp->started = 0;
 
                /* wait for the child to open up a communication channel */
                for (i = 0; i < 20; i++) {      /* wait up to 10 seconds */
+                       /* give the database a break */
+                       MT_sleep_ms(500);
                        /* stats cannot be NULL, as we don't allow starting not
                         * existing databases */
                        SABAOTHfreeStatus(stats);
@@ -539,19 +543,28 @@
                        {
                                break;
                        }
-                       MT_sleep_ms(500);
                }
-               if (i > 20) {
-                       /* we've never found a connection */
+               /* if we've never found a connection, try to figure out why */
+               if (i >= 20) {
                        switch ((*stats)->state) {
                                case SABdbRunning:
                                        /* not sure what's going on with it, 
but its not
                                         * like we want to have it, so kill it 
*/
-                                       terminateProcess(&dp);
+                                       terminateProcess(dp);
+                                       /* allow the logger to catch up */
+                                       MT_sleep_ms(100);
+                                       /* remove from the list again */
+                                       GDKfree(dp->dbname);
+                                       GDKfree(dp);
+                                       dp->next = NULL;
                                        return(newErr(
                                                                "timeout when 
waiting for database '%s' to "
                                                                "open up a 
communication channel", database));
                                case SABdbCrashed:
+                                       /* remove from the list again */
+                                       GDKfree(dp->dbname);
+                                       GDKfree(dp);
+                                       dp->next = NULL;
                                        return(newErr(
                                                                "database '%s' 
has crashed after starting, "
                                                                "manual 
intervention needed", database));
@@ -562,12 +575,22 @@
                                         * in this case GDK may still be trying 
to start up,
                                         * or that it indeed cleanly shut 
itself down after
                                         * starting... kill it in any case. */
-                                       terminateProcess(&dp);
+                                       terminateProcess(dp);
+                                       /* allow the logger to catch up */
+                                       MT_sleep_ms(100);
+                                       /* remove from the list again */
+                                       GDKfree(dp->dbname);
+                                       GDKfree(dp);
+                                       dp->next = NULL;
                                        return(newErr(
                                                                "database '%s' 
either needs a longer timeout "
                                                                "to start up, 
or appears to cleanly shut "
                                                                "itself down 
after starting", database));
                                default:
+                                       /* remove from the list again */
+                                       GDKfree(dp->dbname);
+                                       GDKfree(dp);
+                                       dp->next = NULL;
                                        return(newErr("unknown state: %d", 
(int)(*stats)->state));
                        }
                }
@@ -576,6 +599,9 @@
                                        "mode during startup", database);
                }
 
+               /* from now on the SIGCHLD handler can find this database */
+               dp->started = 1;
+
                return(NO_ERR);
        }
        /* forking failed somehow, cleanup the pipes */
@@ -1098,8 +1124,13 @@
                } else {
                        *p++ = '\0';
                        if (strcmp(p, "start") == 0) {
+                               err e;
                                merlog("starting database %s due to control 
signal", q);
-                               forkMserver(q, &stats, 1);
+                               if ((e = forkMserver(q, &stats, 1)) != NO_ERR) {
+                                       fprintf(stderr, "failed to fork 
mserver: %s\n",
+                                                       getErrMsg(e));
+                                       freeErr(e);
+                               }
                                if (stats != NULL)
                                        SABAOTHfreeStatus(&stats);
                        } else if (strcmp(p, "stop") == 0 ||
@@ -1194,7 +1225,7 @@
                if (p->pid == si->si_pid) {
                        /* wait a little (we're not in a hurry anyway, are we?) 
such
                         * that the logger catches the messages (if any). */
-                       MT_sleep_ms(40);
+                       MT_sleep_ms(100);
                        /* remove it, disconnect first, then free (because the
                         * logger might access it otherwise after the free) */
                        q->next = p->next;
@@ -1208,6 +1239,9 @@
                                merlog("database '%s' (%d) has crashed (dumped 
core)",
                                                p->dbname, p->pid);
                        }
+                       /* if we're starting up, don't clean up */
+                       if (p->started == 0)
+                               return;
                        if (p->dbname)
                                GDKfree(p->dbname);
                        GDKfree(p);


-------------------------------------------------------------------------
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference 
Don't miss this year's exciting event. There's still time to save $100. 
Use priority code J8TL2D2. 
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone
_______________________________________________
Monetdb-sql-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-sql-checkins

Reply via email to