Update of /cvsroot/monetdb/sql/src/backends/monet5
In directory sc8-pr-cvs16.sourceforge.net:/tmp/cvs-serv13315

Modified Files:
      Tag: SQL_2-22
        merovingian.mx 
Log Message:
Fix a bunch of problems that Arjen identified today while trying to
figure out why mserver seems to immediately stop when started from
merovingian (that actual problem is not yet solved).
- fix off-by-one error, we never ran the code meant for when starting
  the server failed
- don't leak memory for the error message when forking an mserver failed
  from the control channel
- don't crash due to an "application double free"; the sighandler
  happens to remove dpairs when their associated process dies, however,
  if you want to terminate that process afterwards then, you crash on
  garbage memory


U merovingian.mx
Index: merovingian.mx
===================================================================
RCS file: /cvsroot/monetdb/sql/src/backends/monet5/merovingian.mx,v
retrieving revision 1.22.2.13
retrieving revision 1.22.2.14
diff -u -d -r1.22.2.13 -r1.22.2.14
--- merovingian.mx      22 Feb 2008 17:17:00 -0000      1.22.2.13
+++ merovingian.mx      16 Apr 2008 19:07:33 -0000      1.22.2.14
@@ -117,6 +117,7 @@
        pid_t pid;        /* this process' id */
        char* dbname;     /* the database that this server serves */
        pthread_t tid;    /* thread id used when terminating this server */
+       char started;     /* is it started? (and can be terminated) */
        struct _dpair* next;
 }* dpair;
 static dpair topdp = NULL;
@@ -514,9 +515,12 @@
                dp->next = NULL;
                dp->pid = pid;
                dp->dbname = GDKstrdup(database);
+               dp->started = 0;
 
                /* wait for the child to open up a communication channel */
                for (i = 0; i < 20; i++) {      /* wait up to 10 seconds */
+                       /* give the database a break */
+                       MT_sleep_ms(500);
                        /* stats cannot be NULL, as we don't allow starting not
                         * existing databases */
                        SABAOTHfreeStatus(stats);
@@ -535,19 +539,28 @@
                        {
                                break;
                        }
-                       MT_sleep_ms(500);
                }
-               if (i > 20) {
-                       /* we've never found a connection */
+               /* if we've never found a connection, try to figure out why */
+               if (i >= 20) {
                        switch ((*stats)->state) {
                                case SABdbRunning:
                                        /* not sure what's going on with it, 
but its not
                                         * like we want to have it, so kill it 
*/
-                                       terminateProcess(&dp);
+                                       terminateProcess(dp);
+                                       /* allow the logger to catch up */
+                                       MT_sleep_ms(100);
+                                       /* remove from the list again */
+                                       GDKfree(dp->dbname);
+                                       GDKfree(dp);
+                                       dp->next = NULL;
                                        return(newErr(
                                                                "timeout when 
waiting for database '%s' to "
                                                                "open up a 
communication channel", database));
                                case SABdbCrashed:
+                                       /* remove from the list again */
+                                       GDKfree(dp->dbname);
+                                       GDKfree(dp);
+                                       dp->next = NULL;
                                        return(newErr(
                                                                "database '%s' 
has crashed after starting, "
                                                                "manual 
intervention needed", database));
@@ -558,12 +571,22 @@
                                         * in this case GDK may still be trying 
to start up,
                                         * or that it indeed cleanly shut 
itself down after
                                         * starting... kill it in any case. */
-                                       terminateProcess(&dp);
+                                       terminateProcess(dp);
+                                       /* allow the logger to catch up */
+                                       MT_sleep_ms(100);
+                                       /* remove from the list again */
+                                       GDKfree(dp->dbname);
+                                       GDKfree(dp);
+                                       dp->next = NULL;
                                        return(newErr(
                                                                "database '%s' 
either needs a longer timeout "
                                                                "to start up, 
or appears to cleanly shut "
                                                                "itself down 
after starting", database));
                                default:
+                                       /* remove from the list again */
+                                       GDKfree(dp->dbname);
+                                       GDKfree(dp);
+                                       dp->next = NULL;
                                        return(newErr("unknown state: %d", 
(int)(*stats)->state));
                        }
                }
@@ -572,6 +595,9 @@
                                        "mode during startup", database);
                }
 
+               /* from now on the SIGCHLD handler can find this database */
+               dp->started = 1;
+
                return(NO_ERR);
        }
        /* forking failed somehow, cleanup the pipes */
@@ -925,8 +951,13 @@
                } else {
                        *p++ = '\0';
                        if (strcmp(p, "start") == 0) {
+                               err e;
                                merlog("starting database %s due to control 
signal", q);
-                               forkMserver(q, &stats, 1);
+                               if ((e = forkMserver(q, &stats, 1)) != NO_ERR) {
+                                       fprintf(stderr, "failed to fork 
mserver: %s\n",
+                                                       getErrMsg(e));
+                                       freeErr(e);
+                               }
                                if (stats != NULL)
                                        SABAOTHfreeStatus(&stats);
                        } else if (strcmp(p, "stop") == 0 ||
@@ -1021,7 +1052,7 @@
                if (p->pid == si->si_pid) {
                        /* wait a little (we're not in a hurry anyway, are we?) 
such
                         * that the logger catches the messages (if any). */
-                       MT_sleep_ms(40);
+                       MT_sleep_ms(100);
                        /* remove it, disconnect first, then free (because the
                         * logger might access it otherwise after the free) */
                        q->next = p->next;
@@ -1035,6 +1066,9 @@
                                merlog("database '%s' (%d) has crashed (dumped 
core)",
                                                p->dbname, p->pid);
                        }
+                       /* if we're starting up, don't clean up */
+                       if (p->started == 0)
+                               return;
                        if (p->dbname)
                                GDKfree(p->dbname);
                        GDKfree(p);


-------------------------------------------------------------------------
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference 
Don't miss this year's exciting event. There's still time to save $100. 
Use priority code J8TL2D2. 
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone
_______________________________________________
Monetdb-sql-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-sql-checkins

Reply via email to