On 09-07-2010 12:09, Sam Varshavchik wrote:
> Kristian Duus Østergaard writes:
>
>> On 07/08/10 23:09, Sam Varshavchik wrote:
>>>
>>> If you can find out more information as to what specific IMAP commands
>>> result in the server hanging, in this situation, I'm sure it can be
>>> easily fixed.
>> I think that either the Mail.App from OSX or the iPhone Mail client
>> ignores the subscription status and just issues a read of the
>> .AppleDouble folder, so how can I provide you with a more adequate
>> description of what goes on ?
>
> I don't know. You'll need to figure it out. There is no cookie-cutter
> answer, one size fits all. You'll have to have a more detailed look at
> what's going on, when it does. Things that you will want to look at:
>
> What is the parent process ID of all these processes.
Two have lost their parent process and is now owned by 1
The remaining imap processes are owned by 6708 (/usr/sbin/couriertcpd)
which in turn is owned by 6707 (/usr/sbin/courierlogger)
>
> What are they doing (the strace command).
>
I think the problem is indicated by the processes who have lost their
parent - they are both trying to open fam :
#strace -p 17120
Process 17120 attached - interrupt to quit
connect(7, {sa_family=AF_FILE, path=@"/tmp/fam-duus-"}, 110^C
<unfinished ...>
Process 17120 detached
6707 and 6708 :
#strace -p 6707
Process 6707 attached - interrupt to quit
read(0, ^C <unfinished ...>
Process 6707 detached
# strace -p 6708
Process 6708 attached - interrupt to quit
select(4, [3], NULL, NULL, NULL^C <unfinished ...>
Process 6708 detached
> What files they have open, /proc/<pid>/fd on Linux.
>
==== 6707 ====
total 0
lr-x------ 1 root root 64 23 jul 11:30 0 -> pipe:[12240]
lrwx------ 1 root root 64 23 jul 11:30 1 -> /dev/null
lrwx------ 1 root root 64 23 jul 11:30 1023 ->
/var/run/courier/imapd.pid.lock
lrwx------ 1 root root 64 23 jul 11:30 2 -> /dev/null
lrwx------ 1 root root 64 23 jul 11:30 3 -> socket:[12243]
==== 6708 ====
total 0
lrwx------ 1 root root 64 23 jul 11:49 0 -> /dev/null
lrwx------ 1 root root 64 23 jul 11:49 1 -> /dev/null
l-wx------ 1 root root 64 23 jul 11:49 2 -> pipe:[12240]
lrwx------ 1 root root 64 23 jul 11:49 3 -> socket:[12248]
l-wx------ 1 root root 64 23 jul 11:49 4 -> pipe:[12238]
==== 17114 ====
total 0
lrwx------ 1 duus users 64 23 jul 11:37 0 -> socket:[16367047]
lrwx------ 1 duus users 64 23 jul 11:37 1 -> socket:[16367047]
l-wx------ 1 duus users 64 23 jul 11:37 2 -> pipe:[12275]
lrwx------ 1 duus users 64 23 jul 11:37 3 ->
/var/lib/courier/couriersslcache
l-wx------ 1 duus users 64 23 jul 11:37 4 -> pipe:[12273]
lrwx------ 1 duus users 64 23 jul 11:37 5 -> socket:[16367057]
lrwx------ 1 duus users 64 23 jul 11:37 6 -> socket:[16371822]
lr-x------ 1 duus users 64 23 jul 11:37 7 ->
/home/duus/.maildir/cur/1279839784.M918162P28378V000000000000FD01I0000000000158D4C_0.hydra,S=2008280:2,S
lrwx------ 1 duus users 64 23 jul 11:37 8 -> socket:[16383848]
==== 17120 ====
total 0
lrwx------ 1 duus users 64 23 jul 11:43 0 -> socket:[16367109]
lrwx------ 1 duus users 64 23 jul 11:43 1 -> socket:[16367109]
l-wx------ 1 duus users 64 23 jul 11:43 2 -> pipe:[12275]
lrwx------ 1 duus users 64 23 jul 11:43 3 ->
/var/lib/courier/couriersslcache
l-wx------ 1 duus users 64 23 jul 11:43 4 -> pipe:[12273]
lrwx------ 1 duus users 64 23 jul 11:43 5 -> socket:[16367117]
lrwx------ 1 duus users 64 23 jul 11:43 6 -> socket:[16372070]
lrwx------ 1 duus users 64 23 jul 11:43 7 -> socket:[16383847]
> What options you've used to build Courier.
>
This is the gentoo build :
mail-mta/courier-0.65.0
net-libs/courier-authlib-0.63.0
> Running an strace on the couriertcpd process that's listening on the
> IMAP port, while performing a test connect and disconnect.
Part of the strace of couriertcpd process - But I'm not sure if this is
caused by a connect disconnect cycle..
# strace -p 6708
Process 6708 attached - interrupt to quit
select(4, [3], NULL, NULL, NULL) = 1 (in [3])
rt_sigprocmask(SIG_BLOCK, [CHLD], NULL, 8) = 0
rt_sigprocmask(SIG_UNBLOCK, [CHLD], NULL, 8) = 0
accept(3, {sa_family=AF_INET6, sin6_port=htons(38336),
inet_pton(AF_INET6, "::ffff:192.168.1.2", &sin6_addr), sin6_flowinfo=0,
sin6_scope_id=0}, [28]) = 5
fcntl(5, F_SETFD, 0) = 0
fcntl(5, F_SETFL, O_RDONLY) = 0
setsockopt(5, SOL_SOCKET, SO_KEEPALIVE, [1], 4) = 0
setsockopt(5, SOL_SOCKET, SO_LINGER, {onoff=0, linger=0}, 8) = 0
rt_sigprocmask(SIG_BLOCK, [CHLD], NULL, 8) = 0
clone(child_stack=0,
flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD,
child_tidptr=0x7f6fe7b369d0) = 30252
close(5) = 0
rt_sigprocmask(SIG_UNBLOCK, [CHLD], NULL, 8) = 0
select(4, [3], NULL, NULL, NULL) = ? ERESTARTNOHAND (To be restarted)
--- SIGCHLD (Child exited) @ 0 (0) ---
rt_sigprocmask(SIG_BLOCK, [CHLD], NULL, 8) = 0
wait4(-1, [{WIFEXITED(s) && WEXITSTATUS(s) == 0}], WNOHANG, NULL) = 30252
wait4(-1, 0x7fff636e508c, WNOHANG, NULL) = 0
rt_sigaction(SIGCHLD, {0x402f30, [CHLD], SA_RESTORER|SA_RESTART,
0x7f6fe741ddf0}, {0x402f30, [CHLD], SA_RESTORER|SA_RESTART,
0x7f6fe741ddf0}, 8) = 0
rt_sigprocmask(SIG_UNBLOCK, [CHLD], NULL, 8) = 0
rt_sigreturn(0x1) = -1 EINTR (Interrupted system call)
select(4, [3], NULL, NULL, NULL) = 1 (in [3])
rt_sigprocmask(SIG_BLOCK, [CHLD], NULL, 8) = 0
rt_sigprocmask(SIG_UNBLOCK, [CHLD], NULL, 8) = 0
accept(3, {sa_family=AF_INET6, sin6_port=htons(38337),
inet_pton(AF_INET6, "::ffff:192.168.1.2", &sin6_addr), sin6_flowinfo=0,
sin6_scope_id=0}, [28]) = 5
fcntl(5, F_SETFD, 0) = 0
fcntl(5, F_SETFL, O_RDONLY) = 0
setsockopt(5, SOL_SOCKET, SO_KEEPALIVE, [1], 4) = 0
setsockopt(5, SOL_SOCKET, SO_LINGER, {onoff=0, linger=0}, 8) = 0
rt_sigprocmask(SIG_BLOCK, [CHLD], NULL, 8) = 0
clone(child_stack=0,
flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD,
child_tidptr=0x7f6fe7b369d0) = 30254
close(5) = 0
rt_sigprocmask(SIG_UNBLOCK, [CHLD], NULL, 8) = 0
select(4, [3], NULL, NULL, NULL) = ? ERESTARTNOHAND (To be restarted)
--- SIGCHLD (Child exited) @ 0 (0) ---
rt_sigprocmask(SIG_BLOCK, [CHLD], NULL, 8) = 0
wait4(-1, [{WIFEXITED(s) && WEXITSTATUS(s) == 0}], WNOHANG, NULL) = 30254
wait4(-1, 0x7fff636e508c, WNOHANG, NULL) = 0
rt_sigaction(SIGCHLD, {0x402f30, [CHLD], SA_RESTORER|SA_RESTART,
0x7f6fe741ddf0}, {0x402f30, [CHLD], SA_RESTORER|SA_RESTART,
0x7f6fe741ddf0}, 8) = 0
rt_sigprocmask(SIG_UNBLOCK, [CHLD], NULL, 8) = 0
rt_sigreturn(0x1) = -1 EINTR (Interrupted system call)
select(4, [3], NULL, NULL, NULL) = 1 (in [3])
rt_sigprocmask(SIG_BLOCK, [CHLD], NULL, 8) = 0
rt_sigprocmask(SIG_UNBLOCK, [CHLD], NULL, 8) = 0
accept(3, {sa_family=AF_INET6, sin6_port=htons(38338),
inet_pton(AF_INET6, "::ffff:192.168.1.2", &sin6_addr), sin6_flowinfo=0,
sin6_scope_id=0}, [28]) = 5
fcntl(5, F_SETFD, 0) = 0
fcntl(5, F_SETFL, O_RDONLY) = 0
setsockopt(5, SOL_SOCKET, SO_KEEPALIVE, [1], 4) = 0
setsockopt(5, SOL_SOCKET, SO_LINGER, {onoff=0, linger=0}, 8) = 0
rt_sigprocmask(SIG_BLOCK, [CHLD], NULL, 8) = 0
clone(child_stack=0,
flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD,
child_tidptr=0x7f6fe7b369d0) = 30256
close(5) = 0
rt_sigprocmask(SIG_UNBLOCK, [CHLD], NULL, 8) = 0
select(4, [3], NULL, NULL, NULL) = ? ERESTARTNOHAND (To be restarted)
--- SIGCHLD (Child exited) @ 0 (0) ---
rt_sigprocmask(SIG_BLOCK, [CHLD], NULL, 8) = 0
wait4(-1, [{WIFEXITED(s) && WEXITSTATUS(s) == 0}], WNOHANG, NULL) = 30256
wait4(-1, 0x7fff636e508c, WNOHANG, NULL) = 0
rt_sigaction(SIGCHLD, {0x402f30, [CHLD], SA_RESTORER|SA_RESTART,
0x7f6fe741ddf0}, {0x402f30, [CHLD], SA_RESTORER|SA_RESTART,
0x7f6fe741ddf0}, 8) = 0
rt_sigprocmask(SIG_UNBLOCK, [CHLD], NULL, 8) = 0
rt_sigreturn(0x1) = -1 EINTR (Interrupted system call)
select(4, [3], NULL, NULL, NULL) = 1 (in [3])
rt_sigprocmask(SIG_BLOCK, [CHLD], NULL, 8) = 0
rt_sigprocmask(SIG_UNBLOCK, [CHLD], NULL, 8) = 0
accept(3, {sa_family=AF_INET6, sin6_port=htons(38339),
inet_pton(AF_INET6, "::ffff:192.168.1.2", &sin6_addr), sin6_flowinfo=0,
sin6_scope_id=0}, [28]) = 5
fcntl(5, F_SETFD, 0) = 0
fcntl(5, F_SETFL, O_RDONLY) = 0
setsockopt(5, SOL_SOCKET, SO_KEEPALIVE, [1], 4) = 0
setsockopt(5, SOL_SOCKET, SO_LINGER, {onoff=0, linger=0}, 8) = 0
rt_sigprocmask(SIG_BLOCK, [CHLD], NULL, 8) = 0
clone(child_stack=0,
flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD,
child_tidptr=0x7f6fe7b369d0) = 30258
close(5) = 0
rt_sigprocmask(SIG_UNBLOCK, [CHLD], NULL, 8) = 0
select(4, [3], NULL, NULL, NULL) = ? ERESTARTNOHAND (To be restarted)
--- SIGCHLD (Child exited) @ 0 (0) ---
As the last rite to this I unfortunatly have had to remove the
.AppleDouble and killed 17114 and 17120, so I hope this will give you
something to work with.
Regards
Kristian
------------------------------------------------------------------------------
This SF.net email is sponsored by Sprint
What will you do first with EVO, the first 4G phone?
Visit sprint.com/first -- http://p.sf.net/sfu/sprint-com-first
_______________________________________________
courier-users mailing list
[email protected]
Unsubscribe: https://lists.sourceforge.net/lists/listinfo/courier-users