Sam Varshavchik wrote:
> Michael Carmack writes:
> 
>> Oddly enough, the act of running an strace on the root perlfilter
>> process seems to fix the problem. If I run strace on the root perlfilter
>> _before_ trying to stop courierfilter, I never see any looping when I
>> shut it down. And if I run strace on the root perlfilter process _after_
>> trying to stop courierfilter, while all of the rest of the perlfilter
>> processes are respawning, then the respawning immediately stops and the
>> courierfitler stops.
> 
> That sounds almost like when perlfilter's root parent process gets a 
> signal, it doesn't really "get it", but as soon as it forks off a child 
> process, the child process realizes that it just got signal and dies, 
> then the parent just respawns it.  And when you try to strace the root 
> parent process, it realizes that it got a signal, and dies.
> 
> You said you've put together a custom kernel.  Try a different major 
> kernel version first, to see if you can reproduce the behavior.

Ok, I just tried it on 2.6.20.7 and 2.6.21.1, with same result in both 
cases. I attempted to try it out on 2.6.16.49 also, but I wasn't able to 
get a stable boot -- some kind of hardware incompatibility.


> The stop command first sends a SIGTERM, and if the lock file isn't 
> released, that's followed by a SIGKILL after ten seconds.  Presuming 
> that your stop command never terminates, it's going to be sending 
> SIGKILLs every ten seconds. 

I'm not sure what you meant by "stop command never terminates", but when 
I run "courierfilter stop" it returns me to the command prompt 
immediately -- it doesn't hang or report any errors. I just happened to 
run a 'ps' after stopping courier yesterday and noticed all of those 
defunct perlfilters. If you didn't look at the process list after 
stopping courier, you'd never know anything was wrong. (Well, the 
respawning eventually drives the load up, but if you were just shutting 
down courier as part of the normal system shutdown procedure, you 
wouldn't notice anything out of the ordinary.)


> strace the stop process, and see for 
> yourself that it's sending SIGKILLs every ten seconds, to the same pid.

Here's the results of some more stracing:

==========================
   Start up courierfilter
==========================


[EMAIL PROTECTED] ./sbin/courierfilter start

[EMAIL PROTECTED] ps -efww | grep courier
courier   1565     1  0 04:47 ?        00:00:00 ./sbin/courierfilter start
courier   1567     1  0 04:47 ?        00:00:00 
/pkg/courier-authlib/0.59.3/.001/sbin/courierlogger courierfilter
courier   1568  1565  0 04:47 ?        00:00:00 perlfilter
courier   1569  1568  0 04:47 ?        00:00:00 perlfilter
courier   1570  1568  0 04:47 ?        00:00:00 perlfilter
courier   1571  1568  0 04:47 ?        00:00:00 perlfilter
courier   1572  1568  0 04:47 ?        00:00:00 perlfilter
courier   1573  1568  0 04:47 ?        00:00:00 perlfilter
root      1575   743  0 04:47 pts/0    00:00:00 grep courier



==================================================================
   Now strace the "courierfilter start" process in another xterm
==================================================================


[EMAIL PROTECTED] strace -vfp 1565
Process 1565 attached - interrupt to quit
rt_sigsuspend([]


=========================================================================
    Now strace "courierfilter stop". (Note that it exits w/o any problem)
=========================================================================


[EMAIL PROTECTED] strace ./sbin/courierfilter stop
execve("./sbin/courierfilter", ["./sbin/courierfilter", "stop"], [/* 23 
vars */]) = 0
brk(0)                                  = 0x504000
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) 
= 0x2b73191d9000
uname({sys="Linux", node="b1", ...})    = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) 
= 0x2b73191da000
access("/etc/ld.so.preload", R_OK)      = -1 ENOENT (No such file or 
directory)
open("/pkg/glibc/2.5/lib/tls/x86_64/libstdc++.so.6", O_RDONLY) = -1 
ENOENT (No such file or directory)
stat("/pkg/glibc/2.5/lib/tls/x86_64", 0x7fff918e9990) = -1 ENOENT (No 
such file or directory)
open("/pkg/glibc/2.5/lib/tls/libstdc++.so.6", O_RDONLY) = -1 ENOENT (No 
such file or directory)
stat("/pkg/glibc/2.5/lib/tls", 0x7fff918e9990) = -1 ENOENT (No such file 
or directory)
open("/pkg/glibc/2.5/lib/x86_64/libstdc++.so.6", O_RDONLY) = -1 ENOENT 
(No such file or directory)
stat("/pkg/glibc/2.5/lib/x86_64", 0x7fff918e9990) = -1 ENOENT (No such 
file or directory)
open("/pkg/glibc/2.5/lib/libstdc++.so.6", O_RDONLY) = -1 ENOENT (No such 
file or directory)
stat("/pkg/glibc/2.5/lib", {st_mode=S_IFDIR|0755, st_size=2784, ...}) = 0
open("/pkg/gcc/4.1.2/lib/tls/x86_64/libstdc++.so.6", O_RDONLY) = -1 
ENOENT (No such file or directory)
stat("/pkg/gcc/4.1.2/lib/tls/x86_64", 0x7fff918e9990) = -1 ENOENT (No 
such file or directory)
open("/pkg/gcc/4.1.2/lib/tls/libstdc++.so.6", O_RDONLY) = -1 ENOENT (No 
such file or directory)
stat("/pkg/gcc/4.1.2/lib/tls", 0x7fff918e9990) = -1 ENOENT (No such file 
or directory)
open("/pkg/gcc/4.1.2/lib/x86_64/libstdc++.so.6", O_RDONLY) = -1 ENOENT 
(No such file or directory)
stat("/pkg/gcc/4.1.2/lib/x86_64", 0x7fff918e9990) = -1 ENOENT (No such 
file or directory)
open("/pkg/gcc/4.1.2/lib/libstdc++.so.6", O_RDONLY) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>[EMAIL PROTECTED]"..., 
832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=5073930, ...}) = 0
mmap(NULL, 2088736, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 
0) = 0x2b73192da000
mprotect(0x2b73193bd000, 1048576, PROT_NONE) = 0
mmap(0x2b73194bd000, 36864, PROT_READ|PROT_WRITE, 
MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0xe3000) = 0x2b73194bd000
mmap(0x2b73194c6000, 73504, PROT_READ|PROT_WRITE, 
MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x2b73194c6000
close(3)                                = 0
open("/pkg/glibc/2.5/lib/libm.so.6", O_RDONLY) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\20=\0\0"..., 
832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=1366690, ...}) = 0
mmap(NULL, 1569000, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 
0) = 0x2b73194d8000
mprotect(0x2b7319557000, 1044480, PROT_NONE) = 0
mmap(0x2b7319656000, 8192, PROT_READ|PROT_WRITE, 
MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x7e000) = 0x2b7319656000
close(3)                                = 0
open("/pkg/glibc/2.5/lib/libc.so.6", O_RDONLY) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\340\323"..., 
832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=7559198, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) 
= 0x2b7319658000
mmap(NULL, 2322568, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 
0) = 0x2b7319659000
mprotect(0x2b7319787000, 1048576, PROT_NONE) = 0
mmap(0x2b7319887000, 20480, PROT_READ|PROT_WRITE, 
MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x12e000) = 0x2b7319887000
mmap(0x2b731988c000, 16520, PROT_READ|PROT_WRITE, 
MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x2b731988c000
close(3)                                = 0
open("/pkg/glibc/2.5/lib/libgcc_s.so.1", O_RDONLY) = -1 ENOENT (No such 
file or directory)
open("/pkg/gcc/4.1.2/lib/libgcc_s.so.1", O_RDONLY) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\20\36\0"..., 
832) = 832
fstat(3, {st_mode=S_IFREG|0644, st_size=292889, ...}) = 0
mmap(NULL, 1099208, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 
0) = 0x2b7319891000
mprotect(0x2b731989e000, 1044480, PROT_NONE) = 0
mmap(0x2b731999d000, 4096, PROT_READ|PROT_WRITE, 
MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0xc000) = 0x2b731999d000
close(3)                                = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) 
= 0x2b731999e000
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) 
= 0x2b731999f000
arch_prctl(ARCH_SET_FS, 0x2b731999eb20) = 0
mprotect(0x2b7319887000, 16384, PROT_READ) = 0
mprotect(0x2b7319656000, 4096, PROT_READ) = 0
mprotect(0x2b73194bd000, 28672, PROT_READ) = 0
mprotect(0x2b73192d8000, 4096, PROT_READ) = 0
getuid()                                = 0
setgid(1002)                            = 0
getuid()                                = 0
setgroups(1, [1002])                    = 0
setuid(1002)                            = 0
chdir("/pkg/courier/0.55.1/.000")       = 0
brk(0)                                  = 0x504000
brk(0x525000)                           = 0x525000
open("/pkg/courier/0.55.1/.000/var/tmp/courierfilter.pid", O_RDONLY) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=5, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) 
= 0x2b73199a0000
read(3, "1565\n", 4096)                 = 5
close(3)                                = 0
munmap(0x2b73199a0000, 4096)            = 0
kill(1565, SIGTERM)                     = 0
alarm(15)                               = 0
open("/pkg/courier/0.55.1/.000/var/filters/.lock", O_RDWR) = -1 ENOENT 
(No such file or directory)
exit_group(0)                           = ?
Process 1579 detached


======================================================================
   Meanwhile, there is more output from the"courierfilter start" trace
======================================================================


[EMAIL PROTECTED] strace -vfp 1565

Process 1565 attached - interrupt to quit
rt_sigsuspend([])                       = ? ERESTARTNOHAND (To be restarted)
--- SIGTERM (Terminated) @ 0 (0) ---
rt_sigreturn(0xf)                       = -1 EINTR (Interrupted system call)
writev(2, [{"INFO: Stopping perlfilter", 25}, {"\n", 1}], 2) = 26
close(8)                                = 0
wait4(-1,


====================================================
   But the perlfilters are still respawning
====================================================


[EMAIL PROTECTED] ps -efww | grep courier 

courier   1565     1  0 04:47 ?        00:00:00 ./sbin/courierfilter start
courier   1567     1  0 04:47 ?        00:00:00 
/pkg/courier-authlib/0.59.3/.001/sbin/courierlogger courierfilter
courier   1568  1565  0 04:47 ?        00:00:00 perlfilter
courier  22048  1568  0 04:50 ?        00:00:00 [perlfilter] <defunct>
courier  22049  1568  0 04:50 ?        00:00:00 [perlfilter] <defunct>
courier  22050  1568  0 04:50 ?        00:00:00 [perlfilter] <defunct>
root     22052   743  0 04:50 pts/0    00:00:00 grep courier
courier  22053  1568  0 04:50 ?        00:00:00 perlfilter
courier  22054  1568  0 04:50 ?        00:00:00 perlfilter


================================================================
   Now I kill the root perlfilter and they all die
================================================================


[EMAIL PROTECTED] kill 1568
[EMAIL PROTECTED] ps -efww | grep courier
root     22906   743  0 04:50 pts/0    00:00:00 grep courier


========================================================
   And now the "courierfilter start" strace finishes up
========================================================


[EMAIL PROTECTED] strace -vfp 1565
Process 1565 attached - interrupt to quit
rt_sigsuspend([])                       = ? ERESTARTNOHAND (To be restarted)
--- SIGTERM (Terminated) @ 0 (0) ---
rt_sigreturn(0xf)                       = -1 EINTR (Interrupted system call)
writev(2, [{"INFO: Stopping perlfilter", 25}, {"\n", 1}], 2) = 26
close(8)                                = 0
wait4(-1, [{WIFSIGNALED(s) && WTERMSIG(s) == SIGTERM}], 0, NULL) = 1568
--- SIGCHLD (Child exited) @ 0 (0) ---
unlink("/pkg/courier/0.55.1/.000/var/tmp/courierfilter.pid") = 0
exit_group(0)                           = ?
Process 1565 detached



-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
_______________________________________________
courier-users mailing list
[email protected]
Unsubscribe: https://lists.sourceforge.net/lists/listinfo/courier-users

Reply via email to