Hi,
Sometimes I find i am left with zombie haproxy instances that are *still
listening* and do *not* have any established connections (netstat told me
so), even though those particular pids have been told to die.
I.e.
141 ? Ss 0:00 /usr/sbin/haproxy -D -f /proxy.conf -p /proxy.pid
-sf 139
143 ? Ss 0:00 /usr/sbin/haproxy -D -f /proxy.conf -p /proxy.pid
-sf 141
It is worth mentioning this is running in a Dockerfile with a go
application as the parent (interlock).
I've tried ensuring that new haproxy instances are only started at most
once every 5 seconds (or even 10 seconds), in an attempt to rule out a race
condition, but no dice.
Below are pastes of two strace outputs. The first is tracing (just
capturing signal related calls) the parent (pid 1) and all the children,
note that process 284 gets nothing even when PID 285 kills it. In the
second trace, im capturing everything, note that the process gets the
SIGUSR1 signal, but continues on anyway. Hopefully these shed some light on
what is occurring?
*First trace:*
Process 1 attached with 9 threads
Process 283 attached
[pid 283] rt_sigaction(SIGRTMIN, {0x7fcacff949f0, [],
SA_RESTORER|SA_SIGINFO, 0x7fcacff9d8d0}, NULL, 8) = 0
[pid 283] rt_sigaction(SIGRT_1, {0x7fcacff94a80, [],
SA_RESTORER|SA_RESTART|SA_SIGINFO, 0x7fcacff9d8d0}, NULL, 8) = 0
[pid 283] rt_sigprocmask(SIG_UNBLOCK, [RTMIN RT_1], NULL, 8) = 0
[pid 283] rt_sigaction(SIGQUIT, {0x46bb90, [QUIT],
SA_RESTORER|SA_RESTART, 0x7fcad03e4180}, {SIG_DFL, [], 0}, 8) = 0
[pid 283] rt_sigaction(SIGUSR1, {0x46bb90, [USR1],
SA_RESTORER|SA_RESTART, 0x7fcad03e4180}, {SIG_DFL, [], 0}, 8) = 0
[pid 283] rt_sigaction(SIGHUP, {0x46bb90, [HUP], SA_RESTORER|SA_RESTART,
0x7fcad03e4180}, {SIG_DFL, [], 0}, 8) = 0
[pid 283] rt_sigaction(SIGPIPE, {SIG_IGN, [PIPE], SA_RESTORER|SA_RESTART,
0x7fcad03e4180}, {SIG_DFL, [], 0}, 8) = 0
[pid 283] rt_sigaction(SIGTTOU, {0x46bb90, [TTOU],
SA_RESTORER|SA_RESTART, 0x7fcad03e4180}, {SIG_DFL, [], 0}, 8) = 0
[pid 283] rt_sigaction(SIGTTIN, {0x46bb90, [TTIN],
SA_RESTORER|SA_RESTART, 0x7fcad03e4180}, {SIG_DFL, [], 0}, 8) = 0
[pid 283] kill(176, SIGUSR1) = 0
[pid 283] +++ exited with 0 +++
[pid 66] --- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=283,
si_uid=0, si_status=0, si_utime=0, si_stime=0} ---
[pid 66] rt_sigreturn() = 283
Process 284 attached
Process 285 attached
[pid 285] rt_sigaction(SIGRTMIN, {0x7f2be16d39f0, [],
SA_RESTORER|SA_SIGINFO, 0x7f2be16dc8d0}, NULL, 8) = 0
[pid 285] rt_sigaction(SIGRT_1, {0x7f2be16d3a80, [],
SA_RESTORER|SA_RESTART|SA_SIGINFO, 0x7f2be16dc8d0}, NULL, 8) = 0
[pid 285] rt_sigprocmask(SIG_UNBLOCK, [RTMIN RT_1], NULL, 8) = 0
[pid 285] rt_sigaction(SIGQUIT, {0x46bb90, [QUIT],
SA_RESTORER|SA_RESTART, 0x7f2be1b23180}, {SIG_DFL, [], 0}, 8) = 0
[pid 285] rt_sigaction(SIGUSR1, {0x46bb90, [USR1],
SA_RESTORER|SA_RESTART, 0x7f2be1b23180}, {SIG_DFL, [], 0}, 8) = 0
[pid 285] rt_sigaction(SIGHUP, {0x46bb90, [HUP], SA_RESTORER|SA_RESTART,
0x7f2be1b23180}, {SIG_DFL, [], 0}, 8) = 0
[pid 285] rt_sigaction(SIGPIPE, {SIG_IGN, [PIPE], SA_RESTORER|SA_RESTART,
0x7f2be1b23180}, {SIG_DFL, [], 0}, 8) = 0
[pid 285] rt_sigaction(SIGTTOU, {0x46bb90, [TTOU],
SA_RESTORER|SA_RESTART, 0x7f2be1b23180}, {SIG_DFL, [], 0}, 8) = 0
[pid 285] rt_sigaction(SIGTTIN, {0x46bb90, [TTIN],
SA_RESTORER|SA_RESTART, 0x7f2be1b23180}, {SIG_DFL, [], 0}, 8) = 0
[pid 285] kill(284, SIGUSR1) = 0
Process 286 attached
[pid 285] +++ exited with 0 +++
[pid 66] --- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=285,
si_uid=0, si_status=0, si_utime=0, si_stime=0} ---
[pid 66] rt_sigreturn() = 285
^CProcess 1 detached
* <- nothing of interest after this point (waited awhile)*
*Second trace:*
epoll_wait(0, 2191e30, 200, 1000) = -1 EINTR (Interrupted system call)
--- SIGUSR1 {si_signo=SIGUSR1, si_code=SI_USER, si_pid=1120, si_uid=0} ---
rt_sigaction(SIGUSR1, {0x46bb90, [USR1], SA_RESTORER|SA_RESTART,
0x7f437988c180}, {0x46bb90, [USR1], SA_RESTORER|SA_RESTART,
0x7f437988c180}, 8) = 0
rt_sigreturn() = -1 EINTR (Interrupted system call)
rt_sigprocmask(SIG_SETMASK, ~[PROF RTMIN RT_1], [], 8) = 0
brk(0x21d9000) = 0x21d9000
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
epoll_ctl(0, EPOLL_CTL_DEL, 4, 6be6e0) = 0
close(4) = 0
epoll_ctl(0, EPOLL_CTL_DEL, 5, 6be6e0) = 0
close(5) = 0
epoll_ctl(0, EPOLL_CTL_DEL, 6, 6be6e0) = 0
close(6) = 0
epoll_wait(0, {}, 200, 1000) = 0
epoll_wait(0, {}, 200, 1000) = 0
epoll_wait(0, {{EPOLLIN, {u32=2, u64=2}}}, 200, 386) = 1
recvfrom(2,
"\301*\252V*\311/\310LV\262R\312HR\322QJI,IT\2622415\2664507"..., 16384, 0,
NULL, NULL) = 44
sendto(1, "\301*\252V*\311/\310LV\262R\312HR\322QJI,IT\2622415\2664507"...,
44, MSG_DONTWAIT|MSG_NOSIGNAL, NULL, 0) = 44
epoll_wait(0, {{EPOLLIN, {u32=9, u64=9}}}, 200, 40) = 1
recvfrom(9,
"\301*\252V*\311/\310LV\262R\312HR\322QJI,IT\2622415\2664507"..., 16384, 0,
NULL, NULL) = 44
sendto(8, "\301*\252V*\311/\310LV\262R\312HR\322QJI,IT\2622415\2664507"...,
44, MSG_DONTWAIT|MSG_NOSIGNAL, NULL, 0) = 44
epoll_wait(0, {{EPOLLIN, {u32=7, u64=7}}}, 200, 32) = 1
recvfrom(7,
"\301*\252V*\311/\310LV\262R\312HR\322QJI,IT\2622415\2664507"..., 16384, 0,
NULL, NULL) = 44
sendto(3, "\301*\252V*\311/\310LV\262R\312HR\322QJI,IT\2622415\2664507"...,
44, MSG_DONTWAIT|MSG_NOSIGNAL, NULL, 0) = 44
epoll_wait(0, {}, 200, 32) = 0
epoll_wait(0, {}, 200, 1000) = 0
epoll_wait(0, {}, 200, 1000) = 0
epoll_wait(0, {}, 200, 503) = 0
epoll_wait(0, {}, 200, 1000) = 0
epoll_wait(0, {}, 200, 1000) = 0
epoll_wait(0, {}, 200, 493) = 0
epoll_wait(0, {}, 200, 1000) = 0
epoll_wait(0, {}, 200, 1000) = 0
epoll_wait(0, {}, 200, 503) = 0
epoll_wait(0, {}, 200, 1000) = 0
epoll_wait(0, {}, 200, 1000) = 0
epoll_wait(0, {}, 200, 493) = 0
epoll_wait(0, {}, 200, 1000) = 0
epoll_wait(0, {}, 200, 1000) = 0
epoll_wait(0, {}, 200, 503) = 0
epoll_wait(0, {}, 200, 1000) = 0
epoll_wait(0, {}, 200, 1000) = 0
epoll_wait(0, {{EPOLLIN, {u32=2, u64=2}}}, 200, 493) = 1
recvfrom(2,
"\301*\252V*\311/\310LV\262R\312HR\322QJI,IT\2622415\266450\267"..., 16384,
0, NULL, NULL) = 44
sendto(1,
"\301*\252V*\311/\310LV\262R\312HR\322QJI,IT\2622415\266450\267"..., 44,
MSG_DONTWAIT|MSG_NOSIGNAL, NULL, 0) = 44
epoll_wait(0, {{EPOLLIN, {u32=7, u64=7}}}, 200, 38) = 1
recvfrom(7,
"\301*\252V*\311/\310LV\262R\312HR\322QJI,IT\2622415\266450\267"..., 16384,
0, NULL, NULL) = 44
sendto(3,
"\301*\252V*\311/\310LV\262R\312HR\322QJI,IT\2622415\266450\267"..., 44,
MSG_DONTWAIT|MSG_NOSIGNAL, NULL, 0) = 44
epoll_wait(0, {{EPOLLIN, {u32=9, u64=9}}}, 200, 38) = 1
recvfrom(9,
"\301*\252V*\311/\310LV\262R\312HR\322QJI,IT\2622415\266450\267"..., 16384,
0, NULL, NULL) = 44
sendto(8,
"\301*\252V*\311/\310LV\262R\312HR\322QJI,IT\2622415\266450\267"..., 44,
MSG_DONTWAIT|MSG_NOSIGNAL, NULL, 0) = 44
epoll_wait(0, {}, 200, 37) = 0
epoll_wait(0, {}, 200, 1000) = 0
epoll_wait(0, {}, 200, 1000) = 0
epoll_wait(0, {}, 200, 503) = 0
epoll_wait(0, {}, 200, 1000) = 0
epoll_wait(0, {}, 200, 1000) = 0
epoll_wait(0, {}, 200, 492) = 0
epoll_wait(0, {}, 200, 1000) = 0
epoll_wait(0, {}, 200, 1000) = 0
epoll_wait(0, {}, 200, 502) = 0
epoll_wait(0, {}, 200, 1000) = 0
epoll_wait(0, {}, 200, 1000) = 0
epoll_wait(0, {}, 200, 493) = 0
epoll_wait(0, {}, 200, 1000) = 0
epoll_wait(0, {}, 200, 1000) = 0
epoll_wait(0, {}, 200, 502) = 0
epoll_wait(0, {}, 200, 1000) = 0
epoll_wait(0, {}, 200, 1000) = 0
epoll_wait(0, {{EPOLLIN, {u32=9, u64=9}}}, 200, 490) = 1
recvfrom(9,
"\301*\252V*\311/\310LV\262R\312HR\322QJI,IT\2622415\26645\2600"..., 16384,
0, NULL, NULL) = 44
sendto(8,
"\301*\252V*\311/\310LV\262R\312HR\322QJI,IT\2622415\26645\2600"..., 44,
MSG_DONTWAIT|MSG_NOSIGNAL, NULL, 0) = 44
epoll_wait(0, {{EPOLLIN, {u32=7, u64=7}}}, 200, 33) = 1
Hopefully these traces tell someone something, i'm at a complete loss as to
how this is occurring / what else to try to debug this.
Thanks