The master process is now able to manage old processes when they didn't
quit on a reload. The master won't exit if every current processes has
left but there are still old process.

There is now a notification message when a child exits, either an old or
a current process.

When every workers have left, the master leaves with an appropriate
return code (which is the same as the systemd wrapper).
---
 src/haproxy.c | 124 +++++++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 96 insertions(+), 28 deletions(-)

diff --git a/src/haproxy.c b/src/haproxy.c
index d2b5d4c..5e3028d 100644
--- a/src/haproxy.c
+++ b/src/haproxy.c
@@ -193,7 +193,7 @@ char localpeer[MAX_HOSTNAME_LEN];
  */
 int shut_your_big_mouth_gcc_int = 0;
 
-int *children; /* store PIDs of children in master workers mode */
+int *children = NULL; /* store PIDs of children in master workers mode */
 
 static volatile sig_atomic_t caught_signal = 0;
 static char **next_argv = NULL;
@@ -407,6 +407,26 @@ int current_child(int pid)
 
 
 /*
+ * remove a pid forom the olpid array and decrease nb_oldpids
+ * return 1 pid was found otherwise return 0
+ */
+
+int delete_oldpid(int pid)
+{
+       int i;
+
+       for (i = 0; i < nb_oldpids; i++) {
+               if (oldpids[i] == pid) {
+                       oldpids[i] = oldpids[nb_oldpids - 1];
+                       oldpids[nb_oldpids - 1] = 0;
+                       nb_oldpids--;
+                       return 1;
+               }
+       }
+       return 0;
+}
+
+/*
  * When called, this function add -sf followed by current children PIDs and
  * possibily old children PIDs if they didn't leave yet.
  */
@@ -416,6 +436,7 @@ static void master_reload()
 {
        int next_argc = 0;
        int j;
+       char *msg = NULL;
 
        signal(SIGUSR1, SIG_IGN);
        signal(SIGUSR2, SIG_IGN);
@@ -428,24 +449,31 @@ static void master_reload()
                next_argc++;
 
        /* 1 for haproxy -sf */
-       next_argv = realloc(next_argv, (next_argc + 1 + global.nbproc + 1) * 
sizeof(char *));
+       next_argv = realloc(next_argv, (next_argc + 1 + global.nbproc + 
nb_oldpids + 1) * sizeof(char *));
        if (next_argv == NULL)
                goto alloc_error;
 
-       if (children) {
-               char *msg = NULL;
 
-               /* add -sf <PID>*  to argv */
+       /* add -sf <PID>*  to argv */
+       if (children || nb_oldpids > 0)
                next_argv[next_argc++] = "-sf";
+       if (children) {
                for (j = 0; j < global.nbproc; next_argc++,j++) {
                        next_argv[next_argc] = memprintf(&msg, "%d", 
children[j]);
                        if (next_argv[next_argc] == NULL)
                                goto alloc_error;
                        msg = NULL;
                }
-               next_argv[next_argc] = NULL;
        }
-       Warning("Reexecuting Master process [%d]\n", pid);
+       /* copy old process PIDs */
+       for (j = 0; j < nb_oldpids; next_argc++,j++) {
+               next_argv[next_argc] = memprintf(&msg, "%d", oldpids[j]);
+               if (next_argv[next_argc] == NULL)
+                       goto alloc_error;
+               msg = NULL;
+       }
+       next_argv[next_argc] = NULL;
+       Warning("Reexecuting Master process\n");
        execv(next_argv[0], next_argv);
 
 alloc_error:
@@ -459,7 +487,6 @@ alloc_error:
  * that the binary is up to date in memory, launch new processes and kill the
  * old ones.
  */
-
 static void sig_master_usr2(int signum)
 {
        Warning("Received signal USR2, reloading...\n");
@@ -467,6 +494,63 @@ static void sig_master_usr2(int signum)
 }
 
 /*
+ * Wait for every children to exit
+ */
+
+static void mworkers_wait()
+{
+       int exitpid = -1;
+       int status = 0;
+       struct sigaction sa;
+
+       /* Here we are not using the haproxy async way
+       for signals because it does not exists in
+       the master */
+       memset(&sa, 0, sizeof(struct sigaction));
+       sa.sa_handler = &sig_master_usr2;
+       sigaction(SIGUSR2, &sa, NULL);
+
+       while (1) {
+
+               while (((exitpid = wait(&status)) == -1) && errno == EINTR) {
+                       int sig = caught_signal;
+                       if (sig) {
+                               caught_signal = 0;
+                               master_reload();
+                       }
+               }
+
+               if (exitpid == -1 && errno == ECHILD) {
+                       Warning("All workers are left. Leaving... (%d)\n", 
status);
+                       exit(status); /* parent must leave using the latest 
status code known */
+               }
+
+               if (WIFEXITED(status))
+                       status = WEXITSTATUS(status);
+               else if (WIFSIGNALED(status))
+                       status = 128 + WTERMSIG(status);
+               else if (WIFSTOPPED(status))
+                       status = 128 + WSTOPSIG(status);
+               else
+                       status = 255;
+
+               if (!children) {
+                       Warning("Worker %d left with exit code %d\n", exitpid, 
status);
+               } else {
+                       /* check if exited child was in the current children 
list */
+                       if (current_child(exitpid)) {
+                               Alert("Current worker %d left with exit code 
%d\n", exitpid, status);
+                       } else {
+                               Warning("Former worker %d left with exit code 
%d\n", exitpid, status);
+                               delete_oldpid(exitpid);
+                       }
+               }
+       }
+}
+
+
+
+/*
  * upon SIGUSR1, let's have a soft stop. Note that soft_stop() broadcasts
  * a signal zero to all subscribers. This means that it's as easy as
  * subscribing to signal 0 to get informed about an imminent shutdown.
@@ -865,7 +949,7 @@ static char **copy_argv(int argc, char **argv)
        char **newargv;
        int i, j;
 
-       newargv = calloc(argc + 1, sizeof(char *));
+       newargv = calloc(argc + 2, sizeof(char *));
        if (newargv == NULL) {
                Warning("Cannot allocate memory\n");
                return NULL;
@@ -2221,6 +2305,8 @@ int main(int argc, char **argv)
                 * if daemon + mworker: must fork here to let a master
                 * process live in background before forking children
                 */
+
+               // TODO: don't refork if it's a reexec
                if ((global.mode & MODE_MWORKER) && (global.mode & 
MODE_DAEMON)) {
                        ret = fork();
                        if (ret < 0) {
@@ -2278,31 +2364,13 @@ int main(int argc, char **argv)
                }
 
                /* We won't ever use this anymore */
-               free(oldpids);        oldpids = NULL;
                free(global.pidfile); global.pidfile = NULL;
 
                if (proc == global.nbproc) {
                        if (global.mode & MODE_MWORKER) {
-                               struct sigaction sa;
-
-                                /* Here we are not using the haproxy async way
-                                 for signals because it does not exists in
-                                 the master */
-                               memset(&sa, 0, sizeof(struct sigaction));
-                               sa.sa_handler = &sig_master_usr2;
-                               sigaction(SIGUSR2, &sa, NULL);
-
                                protocol_unbind_all();
-                               for (proc = 0; proc < global.nbproc; proc++)
-                                       while (waitpid(-1, NULL, 0) == -1 && 
errno == EINTR) {
-                                               int sig = caught_signal;
-                                               if (sig) {
-                                                       caught_signal = 0;
-                                                       master_reload();
-                                               }
-                                       }
+                               mworkers_wait();
                        }
-                       exit(0); /* parent must leave */
                }
 
                /* Must chroot and setgid/setuid in the children */
-- 
2.10.2


Reply via email to