Hi,

Sometimes I find i am left with zombie haproxy instances that are *still
listening* and do *not* have any established connections (netstat told me
so), even though those particular pids have been told to die.

I.e.

 141 ?        Ss     0:00 /usr/sbin/haproxy -D -f /proxy.conf -p /proxy.pid
-sf 139
 143 ?        Ss     0:00 /usr/sbin/haproxy -D -f /proxy.conf -p /proxy.pid
-sf 141

It is worth mentioning this is running in a Dockerfile with a go
application as the parent (interlock).

I've tried ensuring that new haproxy instances are only started at most
once every 5 seconds (or even 10 seconds), in an attempt to rule out a race
condition, but no dice.

Below are pastes of two strace outputs. The first is tracing (just
capturing signal related calls) the parent (pid 1) and all the children,
note that process 284 gets nothing even when PID 285 kills it. In the
second trace, im capturing everything, note that the process gets the
SIGUSR1 signal, but continues on anyway. Hopefully these shed some light on
what is occurring?

*First trace:*

Process 1 attached with 9 threads
Process 283 attached
[pid   283] rt_sigaction(SIGRTMIN, {0x7fcacff949f0, [],
SA_RESTORER|SA_SIGINFO, 0x7fcacff9d8d0}, NULL, 8) = 0
[pid   283] rt_sigaction(SIGRT_1, {0x7fcacff94a80, [],
SA_RESTORER|SA_RESTART|SA_SIGINFO, 0x7fcacff9d8d0}, NULL, 8) = 0
[pid   283] rt_sigprocmask(SIG_UNBLOCK, [RTMIN RT_1], NULL, 8) = 0
[pid   283] rt_sigaction(SIGQUIT, {0x46bb90, [QUIT],
SA_RESTORER|SA_RESTART, 0x7fcad03e4180}, {SIG_DFL, [], 0}, 8) = 0
[pid   283] rt_sigaction(SIGUSR1, {0x46bb90, [USR1],
SA_RESTORER|SA_RESTART, 0x7fcad03e4180}, {SIG_DFL, [], 0}, 8) = 0
[pid   283] rt_sigaction(SIGHUP, {0x46bb90, [HUP], SA_RESTORER|SA_RESTART,
0x7fcad03e4180}, {SIG_DFL, [], 0}, 8) = 0
[pid   283] rt_sigaction(SIGPIPE, {SIG_IGN, [PIPE], SA_RESTORER|SA_RESTART,
0x7fcad03e4180}, {SIG_DFL, [], 0}, 8) = 0
[pid   283] rt_sigaction(SIGTTOU, {0x46bb90, [TTOU],
SA_RESTORER|SA_RESTART, 0x7fcad03e4180}, {SIG_DFL, [], 0}, 8) = 0
[pid   283] rt_sigaction(SIGTTIN, {0x46bb90, [TTIN],
SA_RESTORER|SA_RESTART, 0x7fcad03e4180}, {SIG_DFL, [], 0}, 8) = 0
[pid   283] kill(176, SIGUSR1)          = 0
[pid   283] +++ exited with 0 +++
[pid    66] --- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=283,
si_uid=0, si_status=0, si_utime=0, si_stime=0} ---
[pid    66] rt_sigreturn()              = 283
Process 284 attached
Process 285 attached
[pid   285] rt_sigaction(SIGRTMIN, {0x7f2be16d39f0, [],
SA_RESTORER|SA_SIGINFO, 0x7f2be16dc8d0}, NULL, 8) = 0
[pid   285] rt_sigaction(SIGRT_1, {0x7f2be16d3a80, [],
SA_RESTORER|SA_RESTART|SA_SIGINFO, 0x7f2be16dc8d0}, NULL, 8) = 0
[pid   285] rt_sigprocmask(SIG_UNBLOCK, [RTMIN RT_1], NULL, 8) = 0
[pid   285] rt_sigaction(SIGQUIT, {0x46bb90, [QUIT],
SA_RESTORER|SA_RESTART, 0x7f2be1b23180}, {SIG_DFL, [], 0}, 8) = 0
[pid   285] rt_sigaction(SIGUSR1, {0x46bb90, [USR1],
SA_RESTORER|SA_RESTART, 0x7f2be1b23180}, {SIG_DFL, [], 0}, 8) = 0
[pid   285] rt_sigaction(SIGHUP, {0x46bb90, [HUP], SA_RESTORER|SA_RESTART,
0x7f2be1b23180}, {SIG_DFL, [], 0}, 8) = 0
[pid   285] rt_sigaction(SIGPIPE, {SIG_IGN, [PIPE], SA_RESTORER|SA_RESTART,
0x7f2be1b23180}, {SIG_DFL, [], 0}, 8) = 0
[pid   285] rt_sigaction(SIGTTOU, {0x46bb90, [TTOU],
SA_RESTORER|SA_RESTART, 0x7f2be1b23180}, {SIG_DFL, [], 0}, 8) = 0
[pid   285] rt_sigaction(SIGTTIN, {0x46bb90, [TTIN],
SA_RESTORER|SA_RESTART, 0x7f2be1b23180}, {SIG_DFL, [], 0}, 8) = 0
[pid   285] kill(284, SIGUSR1)          = 0
Process 286 attached
[pid   285] +++ exited with 0 +++
[pid    66] --- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=285,
si_uid=0, si_status=0, si_utime=0, si_stime=0} ---
[pid    66] rt_sigreturn()              = 285
^CProcess 1 detached
* <- nothing of interest after this point (waited awhile)*
*Second trace:*

epoll_wait(0, 2191e30, 200, 1000)       = -1 EINTR (Interrupted system call)
--- SIGUSR1 {si_signo=SIGUSR1, si_code=SI_USER, si_pid=1120, si_uid=0} ---
rt_sigaction(SIGUSR1, {0x46bb90, [USR1], SA_RESTORER|SA_RESTART,
0x7f437988c180}, {0x46bb90, [USR1], SA_RESTORER|SA_RESTART,
0x7f437988c180}, 8) = 0
rt_sigreturn()                          = -1 EINTR (Interrupted system call)
rt_sigprocmask(SIG_SETMASK, ~[PROF RTMIN RT_1], [], 8) = 0
brk(0x21d9000)                          = 0x21d9000
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
epoll_ctl(0, EPOLL_CTL_DEL, 4, 6be6e0)  = 0
close(4)                                = 0
epoll_ctl(0, EPOLL_CTL_DEL, 5, 6be6e0)  = 0
close(5)                                = 0
epoll_ctl(0, EPOLL_CTL_DEL, 6, 6be6e0)  = 0
close(6)                                = 0
epoll_wait(0, {}, 200, 1000)            = 0
epoll_wait(0, {}, 200, 1000)            = 0
epoll_wait(0, {{EPOLLIN, {u32=2, u64=2}}}, 200, 386) = 1
recvfrom(2,
"\301*\252V*\311/\310LV\262R\312HR\322QJI,IT\2622415\2664507"..., 16384, 0,
NULL, NULL) = 44
sendto(1, "\301*\252V*\311/\310LV\262R\312HR\322QJI,IT\2622415\2664507"...,
44, MSG_DONTWAIT|MSG_NOSIGNAL, NULL, 0) = 44
epoll_wait(0, {{EPOLLIN, {u32=9, u64=9}}}, 200, 40) = 1
recvfrom(9,
"\301*\252V*\311/\310LV\262R\312HR\322QJI,IT\2622415\2664507"..., 16384, 0,
NULL, NULL) = 44
sendto(8, "\301*\252V*\311/\310LV\262R\312HR\322QJI,IT\2622415\2664507"...,
44, MSG_DONTWAIT|MSG_NOSIGNAL, NULL, 0) = 44
epoll_wait(0, {{EPOLLIN, {u32=7, u64=7}}}, 200, 32) = 1
recvfrom(7,
"\301*\252V*\311/\310LV\262R\312HR\322QJI,IT\2622415\2664507"..., 16384, 0,
NULL, NULL) = 44
sendto(3, "\301*\252V*\311/\310LV\262R\312HR\322QJI,IT\2622415\2664507"...,
44, MSG_DONTWAIT|MSG_NOSIGNAL, NULL, 0) = 44
epoll_wait(0, {}, 200, 32)              = 0
epoll_wait(0, {}, 200, 1000)            = 0
epoll_wait(0, {}, 200, 1000)            = 0
epoll_wait(0, {}, 200, 503)             = 0
epoll_wait(0, {}, 200, 1000)            = 0
epoll_wait(0, {}, 200, 1000)            = 0
epoll_wait(0, {}, 200, 493)             = 0
epoll_wait(0, {}, 200, 1000)            = 0
epoll_wait(0, {}, 200, 1000)            = 0
epoll_wait(0, {}, 200, 503)             = 0
epoll_wait(0, {}, 200, 1000)            = 0
epoll_wait(0, {}, 200, 1000)            = 0
epoll_wait(0, {}, 200, 493)             = 0
epoll_wait(0, {}, 200, 1000)            = 0
epoll_wait(0, {}, 200, 1000)            = 0
epoll_wait(0, {}, 200, 503)             = 0
epoll_wait(0, {}, 200, 1000)            = 0
epoll_wait(0, {}, 200, 1000)            = 0
epoll_wait(0, {{EPOLLIN, {u32=2, u64=2}}}, 200, 493) = 1
recvfrom(2,
"\301*\252V*\311/\310LV\262R\312HR\322QJI,IT\2622415\266450\267"..., 16384,
0, NULL, NULL) = 44
sendto(1,
"\301*\252V*\311/\310LV\262R\312HR\322QJI,IT\2622415\266450\267"..., 44,
MSG_DONTWAIT|MSG_NOSIGNAL, NULL, 0) = 44
epoll_wait(0, {{EPOLLIN, {u32=7, u64=7}}}, 200, 38) = 1
recvfrom(7,
"\301*\252V*\311/\310LV\262R\312HR\322QJI,IT\2622415\266450\267"..., 16384,
0, NULL, NULL) = 44
sendto(3,
"\301*\252V*\311/\310LV\262R\312HR\322QJI,IT\2622415\266450\267"..., 44,
MSG_DONTWAIT|MSG_NOSIGNAL, NULL, 0) = 44
epoll_wait(0, {{EPOLLIN, {u32=9, u64=9}}}, 200, 38) = 1
recvfrom(9,
"\301*\252V*\311/\310LV\262R\312HR\322QJI,IT\2622415\266450\267"..., 16384,
0, NULL, NULL) = 44
sendto(8,
"\301*\252V*\311/\310LV\262R\312HR\322QJI,IT\2622415\266450\267"..., 44,
MSG_DONTWAIT|MSG_NOSIGNAL, NULL, 0) = 44
epoll_wait(0, {}, 200, 37)              = 0
epoll_wait(0, {}, 200, 1000)            = 0
epoll_wait(0, {}, 200, 1000)            = 0
epoll_wait(0, {}, 200, 503)             = 0
epoll_wait(0, {}, 200, 1000)            = 0
epoll_wait(0, {}, 200, 1000)            = 0
epoll_wait(0, {}, 200, 492)             = 0
epoll_wait(0, {}, 200, 1000)            = 0
epoll_wait(0, {}, 200, 1000)            = 0
epoll_wait(0, {}, 200, 502)             = 0
epoll_wait(0, {}, 200, 1000)            = 0
epoll_wait(0, {}, 200, 1000)            = 0
epoll_wait(0, {}, 200, 493)             = 0
epoll_wait(0, {}, 200, 1000)            = 0
epoll_wait(0, {}, 200, 1000)            = 0
epoll_wait(0, {}, 200, 502)             = 0
epoll_wait(0, {}, 200, 1000)            = 0
epoll_wait(0, {}, 200, 1000)            = 0
epoll_wait(0, {{EPOLLIN, {u32=9, u64=9}}}, 200, 490) = 1
recvfrom(9,
"\301*\252V*\311/\310LV\262R\312HR\322QJI,IT\2622415\26645\2600"..., 16384,
0, NULL, NULL) = 44
sendto(8,
"\301*\252V*\311/\310LV\262R\312HR\322QJI,IT\2622415\26645\2600"..., 44,
MSG_DONTWAIT|MSG_NOSIGNAL, NULL, 0) = 44
epoll_wait(0, {{EPOLLIN, {u32=7, u64=7}}}, 200, 33) = 1

Hopefully these traces tell someone something, i'm at a complete loss as to
how this is occurring / what else to try to debug this.

Thanks

Reply via email to