CVSROOT:        /cvs/cluster
Module name:    cluster
Changes by:     [EMAIL PROTECTED]       2008-01-30 15:46:41

Modified files:
        cman/cman_tool : cman_tool.h join.c 
        cman/daemon    : ais.c 
        cman/man       : cman_tool.8 

Log message:
        Improve startup error checking and logging.

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/cman_tool/cman_tool.h.diff?cvsroot=cluster&r1=1.14&r2=1.15
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/cman_tool/join.c.diff?cvsroot=cluster&r1=1.53&r2=1.54
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/daemon/ais.c.diff?cvsroot=cluster&r1=1.59&r2=1.60
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/man/cman_tool.8.diff?cvsroot=cluster&r1=1.15&r2=1.16

--- cluster/cman/cman_tool/cman_tool.h  2007/11/29 11:19:12     1.14
+++ cluster/cman/cman_tool/cman_tool.h  2008/01/30 15:46:41     1.15
@@ -56,6 +56,8 @@
 #define MAX_MCAST_NAME_LEN 256
 #define MAX_PATH_LEN 256
 
+#define DEBUG_STARTUP_ONLY 32
+
 enum format_opt
 {
        FMT_NONE,
--- cluster/cman/cman_tool/join.c       2008/01/10 10:39:16     1.53
+++ cluster/cman/cman_tool/join.c       2008/01/30 15:46:41     1.54
@@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -51,22 +51,21 @@
        setsid();
 }
 
-
 int join(commandline_t *comline)
 {
        int i;
        int envptr = 0;
+       int argvptr = 0;
        char scratch[1024];
        cman_handle_t h;
+       int status;
        pid_t aisexec_pid;
        int ctree;
        int p[2];
 
-       if (!comline->noccs_opt)
-       {
+       if (!comline->noccs_opt) {
                ctree = ccs_force_connect(NULL, 1);
-               if (ctree < 0)
-               {
+               if (ctree < 0) {
                        die("ccsd is not running\n");
                }
                ccs_disconnect(ctree);
@@ -79,7 +78,6 @@
        if (h)
                die("Node is already active");
 
-
        /* Set up environment variables for override */
        if (comline->multicast_addr) {
                snprintf(scratch, sizeof(scratch), "CMAN_MCAST_ADDR=%s", 
comline->multicast_addr);
@@ -117,27 +115,29 @@
                snprintf(scratch, sizeof(scratch), "CMAN_2NODE=true");
                envp[envptr++] = strdup(scratch);
        }
-       if (comline->verbose) {
+       if (comline->verbose ^ DEBUG_STARTUP_ONLY) {
                snprintf(scratch, sizeof(scratch), "CMAN_DEBUGLOG=%d", 
comline->verbose);
                envp[envptr++] = strdup(scratch);
        }
        if (comline->noccs_opt) {
-               snprintf(scratch, sizeof(scratch), "CMAN_NOCCS=TRUE");
-               envp[envptr++] = strdup(scratch);
+               envp[envptr++] = strdup("CMAN_NOCCS=true");
+               envp[envptr++] = 
strdup("OPENAIS_DEFAULT_CONFIG_IFACE=cmanpreconfig");
+       }
+       else {
+               envp[envptr++] = 
strdup("OPENAIS_DEFAULT_CONFIG_IFACE=cmanconfig");
        }
-
-       /* Use cman to configure services */
-       envp[envptr++] = strdup("OPENAIS_DEFAULT_CONFIG_IFACE=cmanconfig");
 
        /* Create a pipe to monitor cman startup progress */
        pipe(p);
        fcntl(p[1], F_SETFD, 0); /* Don't close on exec */
        snprintf(scratch, sizeof(scratch), "CMAN_PIPE=%d", p[1]);
        envp[envptr++] = strdup(scratch);
-
        envp[envptr++] = NULL;
 
        argv[0] = "aisexec";
+       if (comline->verbose & ~DEBUG_STARTUP_ONLY)
+               argv[++argvptr] = "-f";
+       argv[++argvptr] = NULL;
 
        /* Fork/exec cman */
        switch ( (aisexec_pid = fork()) )
@@ -145,18 +145,28 @@
        case -1:
                die("fork of aisexec daemon failed: %s", strerror(errno));
 
-       case 0: // child
+       case 0: /* child */
                close(p[0]);
-               be_daemon(!comline->verbose);
+               if (comline->verbose & DEBUG_STARTUP_ONLY) {
+                       fprintf(stderr, "Starting %s", AISEXECBIN);
+                       for (i=0; i< argvptr; i++) {
+                               fprintf(stderr, " %s", argv[i]);
+                       }
+                       fprintf(stderr, "\n");
+                       for (i=0; i<envptr-1; i++) {
+                               fprintf(stderr, "%s\n", envp[i]);
+                       }
+               }
+               be_daemon(!(comline->verbose & ~DEBUG_STARTUP_ONLY));
                execve(AISEXECBIN, argv, envp);
 
-               // exec failed - tell the parent process */
+               /* exec failed - tell the parent process */
                sprintf(scratch, "execve of " AISEXECBIN " failed: %s", 
strerror(errno));
                write(p[1], scratch, strlen(scratch));
                exit(1);
                break;
 
-       default: //parent
+       default: /* parent */
                break;
 
        }
@@ -164,10 +174,12 @@
        /* Give the daemon a chance to start up, and monitor the pipe FD for 
messages */
        i = 0;
        close(p[1]);
+
+       /* Wait for the process to start or die */
+       sleep(1);
        do {
                fd_set fds;
                struct timeval tv={1, 0};
-               int status;
                char message[1024];
 
                FD_ZERO(&fds);
@@ -177,31 +189,69 @@
 
                /* Did we get an error? */
                if (status == 1) {
-                       if (read(p[0], message, sizeof(message)) != 0) {
-                               fprintf(stderr, "cman not started: %s\n", 
message);
+                       int len;
+                       if ((len = read(p[0], message, sizeof(message)) > 0)) {
+
+                               /* Success! get the new PID of double-forked 
aisexec */
+                               if (sscanf(message, "SUCCESS: %d", 
&aisexec_pid) == 1) {
+                                       if (comline->verbose & 
DEBUG_STARTUP_ONLY)
+                                               fprintf(stderr, "aisexec 
running, process ID is %d\n", aisexec_pid);
+                                       status = 0;
+                               }
+                               else {
+                                       fprintf(stderr, "cman not started: 
%s\n", message);
+                               }
                                break;
                        }
-                       else {
+                       else if (len < 0 && errno == EINTR) {
+                               continue;
+                       }
+                       else { /* Error or EOF - check the child status */
                                int pidstatus;
-                               if (waitpid(aisexec_pid, &pidstatus, WNOHANG) 
== 0 && pidstatus != 0)
-                                       fprintf(stderr, "cman died with status: 
%d\n", WEXITSTATUS(pidstatus));
-                               else
+                               status = waitpid(aisexec_pid, &pidstatus, 
WNOHANG);
+                               if (status == -1 && errno == ECHILD) {
+                                       fprintf(stderr, "cman not started\n");
+                                       break;
+                               }
+                               if (status == 0 && pidstatus != 0) {
+                                       if (WIFEXITED(pidstatus))
+                                               fprintf(stderr, "aisexec died 
with status: %d\n", WEXITSTATUS(pidstatus));
+                                       if (WIFSIGNALED(pidstatus))
+                                               fprintf(stderr, "aisexec died 
with signal: %d\n", WTERMSIG(pidstatus));
+                                       status = -1;
+                                       break;
+                               }
+                               else {
                                        status = 0; /* Try to connect */
+                               }
                        }
                }
-               if (status == 0) {
-                       h = cman_admin_init(NULL);
-                       if (!h && comline->verbose)
-                       {
-                               fprintf(stderr, "waiting for aisexec to 
start\n");
+
+       } while (status != 0);
+       close(p[0]);
+
+       /* If aisexec has started, try to connect to cman ... if it's still 
there */
+       if (status == 0) {
+               do {
+                       if (status == 0) {
+                               if (kill(aisexec_pid, 0) < 0) {
+                                       die("aisexec died during startup\n");
+                               }
+
+                               h = cman_admin_init(NULL);
+                               if (!h && comline->verbose & DEBUG_STARTUP_ONLY)
+                               {
+                                       fprintf(stderr, "waiting for aisexec to 
start\n");
+                               }
                        }
-               }
-       } while (!h && ++i < 100);
+                       sleep (1);
+               } while (!h && ++i < 100);
+       }
 
        if (!h)
                die("aisexec daemon didn't start");
 
-       if (comline->verbose && !cman_is_active(h))
+       if ((comline->verbose & DEBUG_STARTUP_ONLY) && !cman_is_active(h))
                fprintf(stderr, "aisexec started, but not joined the cluster 
yet.\n");
 
        cman_finish(h);
--- cluster/cman/daemon/ais.c   2008/01/02 16:35:44     1.59
+++ cluster/cman/daemon/ais.c   2008/01/30 15:46:41     1.60
@@ -249,6 +249,7 @@
 static int cman_exec_init_fn(struct objdb_iface_ver0 *objdb)
 {
        unsigned int object_handle;
+       char pipe_msg[256];
 
        /* We can only work if our config interface was run first */
        if (!config_run)
@@ -273,7 +274,9 @@
        /* Open local sockets and initialise I/O queues */
        cman_init();
 
-       /* Let cman_tool know we are running */
+       /* Let cman_tool know we are running and our PID */
+       sprintf(pipe_msg,"SUCCESS: %d", getpid());
+       write_cman_pipe(pipe_msg);
        close(startup_pipe);
        startup_pipe = 0;
 
--- cluster/cman/man/cman_tool.8        2007/11/29 11:19:12     1.15
+++ cluster/cman/man/cman_tool.8        2008/01/30 15:46:41     1.16
@@ -290,6 +290,8 @@
 .br
 16 Interaction with OpenAIS
 .br
+32 Startup debugging (cman_tool join operations only)
+.br
 .SH NOTES
 .br
 the 

Reply via email to