---
src/haproxy.c | 117 ++++++++++++++++++++++++++++++++++++++++++++------------
1 files changed, 92 insertions(+), 25 deletions(-)
diff --git a/src/haproxy.c b/src/haproxy.c
index 0187f47..94adcf9 100644
--- a/src/haproxy.c
+++ b/src/haproxy.c
@@ -138,6 +138,7 @@ struct global global_default = {
int stopping; /* non zero means stopping in progress */
int restarting; /* non zero means restart in progress */
+int replacing_workers = 0; /* non zero means replacing workers in progress
*/
int is_master = 0; /* non zero means that master/worker mode
* has been activated and the current process
* is the master */
@@ -322,6 +323,16 @@ void sig_listen(struct sig_handler *sh)
/*
* upon SIGCHLD reap child
*/
+
+static struct task *replace_workers_task = NULL;
+
+struct task *task_replace_workers(struct task *t)
+{
+ replacing_workers = 1;
+ t->expire = TICK_ETERNITY;
+ return t;
+}
+
void sig_reaper(struct sig_handler *sh)
{
int status, p;
@@ -331,13 +342,24 @@ void sig_reaper(struct sig_handler *sh)
pid = waitpid(-1, &status, WNOHANG);
if (pid <= 0)
break;
+ status = 0;
for (p = 0; p < nb_allocated_oldpids; p++)
if (oldpids[p] == pid) {
oldpids[p] = 0;
nb_oldpids--;
+ status++;
break;
}
}
+
+ /* Delay replacing workers for 3s to mitigate the possibility
+ * of a restart storm. */
+ if (status) {
+ tv_update_date(0,1); /* else, the old time before select
+ * will be used */
+ replace_workers_task->expire = tick_add_ifset(now_ms, 3000);
+ task_queue(replace_workers_task);
+ }
}
/*
@@ -765,6 +787,13 @@ void deinit(void)
int i;
deinit_signals();
+
+ if (replace_workers_task) {
+ task_delete(replace_workers_task);
+ task_free(replace_workers_task);
+ replace_workers_task = NULL;
+ }
+
while (p) {
free(p->id);
free(p->check_req);
@@ -989,6 +1018,11 @@ static int tell_old_pids(int sig)
return ret;
}
+int worker_missing()
+{
+ return nb_allocated_oldpids != nb_oldpids;
+}
+
/*
* Runs the polling loop
*
@@ -1028,9 +1062,11 @@ void run_poll_loop()
break;
}
- if (is_master) {
- sleep(1);
- continue;
+ if (replacing_workers) {
+ send_log(NULL, LOG_INFO,
+ "Replacing %d workers.\n",
+ nb_allocated_oldpids - nb_oldpids);
+ break;
}
/* The poller will ensure it returns around <next> */
@@ -1145,7 +1181,6 @@ static FILE *prepare(int argc, char **argv)
signal_register_fct(SIGUSR2, sig_restart, SIGUSR2);
signal_register_fct(SIGHUP, sig_dump_state, SIGHUP);
signal_register_fct(SIGTERM, sig_term, SIGTERM);
- signal_register_fct(SIGCHLD, sig_reaper, SIGCHLD);
/* Always catch SIGPIPE even on platforms which define MSG_NOSIGNAL.
* Some recent FreeBSD setups report broken pipes, and MSG_NOSIGNAL
@@ -1372,20 +1407,26 @@ static void create_processes(int argc, char **argv,
FILE *pidfile)
send_log(NULL, LOG_INFO, "Master started\n");
}
- /* Store PIDs of worker processes in oldpid so
- * they can be signaled later */
- nb_oldpids = global.nbproc;
- free(oldpids);
- oldpids = malloc(nb_oldpids * sizeof(*oldpids));
- if (!oldpids) {
- send_log(NULL, LOG_ERR, "Cannot allocate memory "
- "for oldpids.\n");
- protocol_unbind_all();
- exit(1); /* there has been an error */
+ if (!replacing_workers) {
+ /* Store PIDs of worker processes in oldpid so
+ * they can be signaled later */
+ nb_oldpids = 0;
+ nb_allocated_oldpids = global.nbproc;
+ free(oldpids);
+ oldpids = calloc(nb_allocated_oldpids, sizeof(*oldpids));
+ if (!oldpids) {
+ send_log(NULL, LOG_ERR, "Cannot allocate memory "
+ "for oldpids.\n");
+ protocol_unbind_all();
+ exit(1); /* there has been an error */
+ }
}
/* the father launches the required number of processes */
for (proc = 0; proc < global.nbproc; proc++) {
+ /* In the case of replacing_workers a worker may still exist */
+ if (oldpids[proc])
+ continue;
ret = fork();
if (ret < 0) {
send_log(NULL, LOG_ERR, "Cannot fork.\n");
@@ -1404,6 +1445,7 @@ static void create_processes(int argc, char **argv, FILE
*pidfile)
break;
}
oldpids[proc] = ret;
+ nb_oldpids++;
relative_pid++; /* each child will get a different one */
}
@@ -1411,8 +1453,22 @@ static void create_processes(int argc, char **argv, FILE
*pidfile)
if (!(global.mode & MODE_MASTER_WORKER))
/* The parent process is no longer needed */
exit(0);
- else
- is_master = 1;
+
+ is_master = 1;
+ if (!replace_workers_task) {
+ replace_workers_task = task_new();
+ if (unlikely(replace_workers_task == NULL)) {
+ send_log(NULL, LOG_ERR,
+ "Cannot create reaper task.\n");
+ protocol_unbind_all();
+ exit(1); /* there has been an error */
+ }
+
+ replace_workers_task->process = task_replace_workers;
+ replace_workers_task->expire = TICK_ETERNITY;
+
+ signal_register_fct(SIGCHLD, sig_reaper, SIGCHLD);
+ }
}
/* we might have to unbind some proxies from some processes */
@@ -1453,14 +1509,23 @@ static void create_processes(int argc, char **argv,
FILE *pidfile)
int main(int argc, char **argv)
{
FILE *pidfile = NULL;
+ int mode = 0;
while (1) {
- FILE *newpidfile = prepare(argc, argv);
- if (!is_master)
- pidfile = newpidfile;
-
+ if (!replacing_workers) {
+ FILE *newpidfile = prepare(argc, argv);
+ if (!is_master)
+ pidfile = newpidfile;
+ mode = global.mode & (MODE_QUIET|MODE_VERBOSE);
+ } else
+ /* Restore value of mode before it was
+ * mangled by create_processes() */
+ global.mode = (global.mode &
+ ~(MODE_QUIET|MODE_VERBOSE)) | mode;
create_processes(argc, argv, pidfile);
+ replacing_workers = 0;
+
if (!is_master)
drop_capabilities();
@@ -1471,12 +1536,14 @@ int main(int argc, char **argv)
*/
run_poll_loop();
- /* Free all Hash Keys and all Hash elements */
- appsession_cleanup();
- /* Do some cleanup */
- deinit();
+ if (!replacing_workers) {
+ /* Free all Hash Keys and all Hash elements */
+ appsession_cleanup();
+ /* Do some cleanup */
+ deinit();
+ }
- if (!restarting)
+ if (!restarting && !replacing_workers)
break;
}
--
1.7.2.3