Sam Varshavchik wrote:
> Michael Carmack writes:
>
>> Oddly enough, the act of running an strace on the root perlfilter
>> process seems to fix the problem. If I run strace on the root perlfilter
>> _before_ trying to stop courierfilter, I never see any looping when I
>> shut it down. And if I run strace on the root perlfilter process _after_
>> trying to stop courierfilter, while all of the rest of the perlfilter
>> processes are respawning, then the respawning immediately stops and the
>> courierfitler stops.
>
> That sounds almost like when perlfilter's root parent process gets a
> signal, it doesn't really "get it", but as soon as it forks off a child
> process, the child process realizes that it just got signal and dies,
> then the parent just respawns it. And when you try to strace the root
> parent process, it realizes that it got a signal, and dies.
>
> You said you've put together a custom kernel. Try a different major
> kernel version first, to see if you can reproduce the behavior.
Ok, I just tried it on 2.6.20.7 and 2.6.21.1, with same result in both
cases. I attempted to try it out on 2.6.16.49 also, but I wasn't able to
get a stable boot -- some kind of hardware incompatibility.
> The stop command first sends a SIGTERM, and if the lock file isn't
> released, that's followed by a SIGKILL after ten seconds. Presuming
> that your stop command never terminates, it's going to be sending
> SIGKILLs every ten seconds.
I'm not sure what you meant by "stop command never terminates", but when
I run "courierfilter stop" it returns me to the command prompt
immediately -- it doesn't hang or report any errors. I just happened to
run a 'ps' after stopping courier yesterday and noticed all of those
defunct perlfilters. If you didn't look at the process list after
stopping courier, you'd never know anything was wrong. (Well, the
respawning eventually drives the load up, but if you were just shutting
down courier as part of the normal system shutdown procedure, you
wouldn't notice anything out of the ordinary.)
> strace the stop process, and see for
> yourself that it's sending SIGKILLs every ten seconds, to the same pid.
Here's the results of some more stracing:
==========================
Start up courierfilter
==========================
[EMAIL PROTECTED] ./sbin/courierfilter start
[EMAIL PROTECTED] ps -efww | grep courier
courier 1565 1 0 04:47 ? 00:00:00 ./sbin/courierfilter start
courier 1567 1 0 04:47 ? 00:00:00
/pkg/courier-authlib/0.59.3/.001/sbin/courierlogger courierfilter
courier 1568 1565 0 04:47 ? 00:00:00 perlfilter
courier 1569 1568 0 04:47 ? 00:00:00 perlfilter
courier 1570 1568 0 04:47 ? 00:00:00 perlfilter
courier 1571 1568 0 04:47 ? 00:00:00 perlfilter
courier 1572 1568 0 04:47 ? 00:00:00 perlfilter
courier 1573 1568 0 04:47 ? 00:00:00 perlfilter
root 1575 743 0 04:47 pts/0 00:00:00 grep courier
==================================================================
Now strace the "courierfilter start" process in another xterm
==================================================================
[EMAIL PROTECTED] strace -vfp 1565
Process 1565 attached - interrupt to quit
rt_sigsuspend([]
=========================================================================
Now strace "courierfilter stop". (Note that it exits w/o any problem)
=========================================================================
[EMAIL PROTECTED] strace ./sbin/courierfilter stop
execve("./sbin/courierfilter", ["./sbin/courierfilter", "stop"], [/* 23
vars */]) = 0
brk(0) = 0x504000
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0)
= 0x2b73191d9000
uname({sys="Linux", node="b1", ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0)
= 0x2b73191da000
access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or
directory)
open("/pkg/glibc/2.5/lib/tls/x86_64/libstdc++.so.6", O_RDONLY) = -1
ENOENT (No such file or directory)
stat("/pkg/glibc/2.5/lib/tls/x86_64", 0x7fff918e9990) = -1 ENOENT (No
such file or directory)
open("/pkg/glibc/2.5/lib/tls/libstdc++.so.6", O_RDONLY) = -1 ENOENT (No
such file or directory)
stat("/pkg/glibc/2.5/lib/tls", 0x7fff918e9990) = -1 ENOENT (No such file
or directory)
open("/pkg/glibc/2.5/lib/x86_64/libstdc++.so.6", O_RDONLY) = -1 ENOENT
(No such file or directory)
stat("/pkg/glibc/2.5/lib/x86_64", 0x7fff918e9990) = -1 ENOENT (No such
file or directory)
open("/pkg/glibc/2.5/lib/libstdc++.so.6", O_RDONLY) = -1 ENOENT (No such
file or directory)
stat("/pkg/glibc/2.5/lib", {st_mode=S_IFDIR|0755, st_size=2784, ...}) = 0
open("/pkg/gcc/4.1.2/lib/tls/x86_64/libstdc++.so.6", O_RDONLY) = -1
ENOENT (No such file or directory)
stat("/pkg/gcc/4.1.2/lib/tls/x86_64", 0x7fff918e9990) = -1 ENOENT (No
such file or directory)
open("/pkg/gcc/4.1.2/lib/tls/libstdc++.so.6", O_RDONLY) = -1 ENOENT (No
such file or directory)
stat("/pkg/gcc/4.1.2/lib/tls", 0x7fff918e9990) = -1 ENOENT (No such file
or directory)
open("/pkg/gcc/4.1.2/lib/x86_64/libstdc++.so.6", O_RDONLY) = -1 ENOENT
(No such file or directory)
stat("/pkg/gcc/4.1.2/lib/x86_64", 0x7fff918e9990) = -1 ENOENT (No such
file or directory)
open("/pkg/gcc/4.1.2/lib/libstdc++.so.6", O_RDONLY) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>[EMAIL PROTECTED]"...,
832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=5073930, ...}) = 0
mmap(NULL, 2088736, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3,
0) = 0x2b73192da000
mprotect(0x2b73193bd000, 1048576, PROT_NONE) = 0
mmap(0x2b73194bd000, 36864, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0xe3000) = 0x2b73194bd000
mmap(0x2b73194c6000, 73504, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x2b73194c6000
close(3) = 0
open("/pkg/glibc/2.5/lib/libm.so.6", O_RDONLY) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\20=\0\0"...,
832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=1366690, ...}) = 0
mmap(NULL, 1569000, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3,
0) = 0x2b73194d8000
mprotect(0x2b7319557000, 1044480, PROT_NONE) = 0
mmap(0x2b7319656000, 8192, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x7e000) = 0x2b7319656000
close(3) = 0
open("/pkg/glibc/2.5/lib/libc.so.6", O_RDONLY) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\340\323"...,
832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=7559198, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0)
= 0x2b7319658000
mmap(NULL, 2322568, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3,
0) = 0x2b7319659000
mprotect(0x2b7319787000, 1048576, PROT_NONE) = 0
mmap(0x2b7319887000, 20480, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x12e000) = 0x2b7319887000
mmap(0x2b731988c000, 16520, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x2b731988c000
close(3) = 0
open("/pkg/glibc/2.5/lib/libgcc_s.so.1", O_RDONLY) = -1 ENOENT (No such
file or directory)
open("/pkg/gcc/4.1.2/lib/libgcc_s.so.1", O_RDONLY) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\20\36\0"...,
832) = 832
fstat(3, {st_mode=S_IFREG|0644, st_size=292889, ...}) = 0
mmap(NULL, 1099208, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3,
0) = 0x2b7319891000
mprotect(0x2b731989e000, 1044480, PROT_NONE) = 0
mmap(0x2b731999d000, 4096, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0xc000) = 0x2b731999d000
close(3) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0)
= 0x2b731999e000
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0)
= 0x2b731999f000
arch_prctl(ARCH_SET_FS, 0x2b731999eb20) = 0
mprotect(0x2b7319887000, 16384, PROT_READ) = 0
mprotect(0x2b7319656000, 4096, PROT_READ) = 0
mprotect(0x2b73194bd000, 28672, PROT_READ) = 0
mprotect(0x2b73192d8000, 4096, PROT_READ) = 0
getuid() = 0
setgid(1002) = 0
getuid() = 0
setgroups(1, [1002]) = 0
setuid(1002) = 0
chdir("/pkg/courier/0.55.1/.000") = 0
brk(0) = 0x504000
brk(0x525000) = 0x525000
open("/pkg/courier/0.55.1/.000/var/tmp/courierfilter.pid", O_RDONLY) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=5, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0)
= 0x2b73199a0000
read(3, "1565\n", 4096) = 5
close(3) = 0
munmap(0x2b73199a0000, 4096) = 0
kill(1565, SIGTERM) = 0
alarm(15) = 0
open("/pkg/courier/0.55.1/.000/var/filters/.lock", O_RDWR) = -1 ENOENT
(No such file or directory)
exit_group(0) = ?
Process 1579 detached
======================================================================
Meanwhile, there is more output from the"courierfilter start" trace
======================================================================
[EMAIL PROTECTED] strace -vfp 1565
Process 1565 attached - interrupt to quit
rt_sigsuspend([]) = ? ERESTARTNOHAND (To be restarted)
--- SIGTERM (Terminated) @ 0 (0) ---
rt_sigreturn(0xf) = -1 EINTR (Interrupted system call)
writev(2, [{"INFO: Stopping perlfilter", 25}, {"\n", 1}], 2) = 26
close(8) = 0
wait4(-1,
====================================================
But the perlfilters are still respawning
====================================================
[EMAIL PROTECTED] ps -efww | grep courier
courier 1565 1 0 04:47 ? 00:00:00 ./sbin/courierfilter start
courier 1567 1 0 04:47 ? 00:00:00
/pkg/courier-authlib/0.59.3/.001/sbin/courierlogger courierfilter
courier 1568 1565 0 04:47 ? 00:00:00 perlfilter
courier 22048 1568 0 04:50 ? 00:00:00 [perlfilter] <defunct>
courier 22049 1568 0 04:50 ? 00:00:00 [perlfilter] <defunct>
courier 22050 1568 0 04:50 ? 00:00:00 [perlfilter] <defunct>
root 22052 743 0 04:50 pts/0 00:00:00 grep courier
courier 22053 1568 0 04:50 ? 00:00:00 perlfilter
courier 22054 1568 0 04:50 ? 00:00:00 perlfilter
================================================================
Now I kill the root perlfilter and they all die
================================================================
[EMAIL PROTECTED] kill 1568
[EMAIL PROTECTED] ps -efww | grep courier
root 22906 743 0 04:50 pts/0 00:00:00 grep courier
========================================================
And now the "courierfilter start" strace finishes up
========================================================
[EMAIL PROTECTED] strace -vfp 1565
Process 1565 attached - interrupt to quit
rt_sigsuspend([]) = ? ERESTARTNOHAND (To be restarted)
--- SIGTERM (Terminated) @ 0 (0) ---
rt_sigreturn(0xf) = -1 EINTR (Interrupted system call)
writev(2, [{"INFO: Stopping perlfilter", 25}, {"\n", 1}], 2) = 26
close(8) = 0
wait4(-1, [{WIFSIGNALED(s) && WTERMSIG(s) == SIGTERM}], 0, NULL) = 1568
--- SIGCHLD (Child exited) @ 0 (0) ---
unlink("/pkg/courier/0.55.1/.000/var/tmp/courierfilter.pid") = 0
exit_group(0) = ?
Process 1565 detached
-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
_______________________________________________
courier-users mailing list
[email protected]
Unsubscribe: https://lists.sourceforge.net/lists/listinfo/courier-users