Package: am-utils
Version: 6.2+rc20110530-3.2
Severity: critical
Justification: makes unrelated software on the system break (unrelated 
processes hanging)

Hello,

I installed am-utils package today,
and then uninstalled it.
Was pretty astonished to find that
after package uninstall
unrelated processes started hanging (df etc.).
[the usual very annoying symptoms of stale mounts]

Thus figured out rather quickly that there was a stale mount left, despite
am-utils stop having been initiated by package uninstall already:

andi:(pid4532,port1022) on /net type nfs
(rw,relatime,sync,vers=2,rsize=1024,wsize=1024,namlen=255,acregmin=0,acregmax=0,acdirmin=0,acdirmax=0,hard,noac,nolock,proto=udp,port=1022,timeo=7,retrans=3,sec=sys,local_lock=all,addr=127.0.0.1)


# sh -x /etc/init.d/am-utils stop
+ PATH=/usr/sbin:/sbin:/usr/bin:/bin
+ export PATH
+ CONF=/etc/default/am-utils
+ test -x /usr/sbin/amd
+ stop_amd
+ amq -p
+ pid=
+ [ -z  ]
+ echo Stopping automounter: amd not running
Stopping automounter: amd not running
+ [ 0 -eq 0 ]
+ exit 0


[subsequent shutdown handling is thus completely and fully out-maneuvered]


I realized that in fact amd was not running any more at this point.


So, I re-start:ed the service.


At this point, manually doing
   # strace -f amq -p
will show:

stat64("/usr/lib/cmov", 0xbfb262d0)     = -1 ENOENT (No such file or
directory)
open("/usr/lib/libnss_db.so.2", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such
file or directory)
stat64("/usr/lib", {st_mode=S_IFDIR|0755, st_size=86016, ...}) = 0
munmap(0xb771b000, 163182)              = 0
open("/etc/protocols", O_RDONLY|O_CLOEXEC) = 3
fstat64(3, {st_mode=S_IFREG|0644, st_size=2932, ...}) = 0
read(3, "# Internet (IP) protocols\n#\n# Up"..., 1024) = 1024
close(3)                                = 0
socket(AF_INET, SOCK_STREAM, IPPROTO_TCP) = 3
bind(3, {sa_family=AF_INET, sin_port=htons(0),
sin_addr=inet_addr("0.0.0.0")}, 16) = 0
connect(3, {sa_family=AF_INET, sin_port=htons(111),
sin_addr=inet_addr("127.0.0.1")}, 16) = 0
gettimeofday({1475612569, 410100}, NULL) = 0
futex(0xb7678180, FUTEX_WAKE_PRIVATE, 2147483647) = 0
futex(0xb76782e0, FUTEX_WAKE_PRIVATE, 2147483647) = 0
write(3,
"\200\0\0008Y\304\273\254\0\0\0\0\0\0\0\2\0\1\206\240\0\0\0\2\0\0\0\3\0\0\0\0"...,
60) = 60
poll([{fd=3, events=POLLIN}], 1, 60000) = 1 ([{fd=3, revents=POLLIN}])
read(3,
"\200\0\0\34Y\304\273\254\0\0\0\1\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\3\371",
400) = 32
close(3)                                = 0
socket(AF_INET, SOCK_STREAM, IPPROTO_TCP) = 3
open("/etc/bindresvport.blacklist", O_RDONLY) = 4
fstat64(4, {st_mode=S_IFREG|0644, st_size=367, ...}) = 0
read(4, "#\n# This file contains a list of"..., 1024) = 367
read(4, "", 1024)                       = 0
close(4)                                = 0
bind(3, {sa_family=AF_INET, sin_port=htons(604),
sin_addr=inet_addr("0.0.0.0")}, 16) = 0
connect(3, {sa_family=AF_INET, sin_port=htons(1017),
sin_addr=inet_addr("127.0.0.1")}, 16) = 0
write(3,
"\200\0\0(m.M]\0\0\0\0\0\0\0\2\0\4\223\363\0\0\0\1\0\0\0\t\0\0\0\0"...,
44) = 44
poll([{fd=3, events=POLLIN}], 1, 40000) = 1 ([{fd=3, revents=POLLIN}])
read(3, "", 4000)                       = 0
write(2, "amq: failed to get PID of amd\n", 30amq: failed to get PID of
amd
) = 30
exit_group(1)                           = ?
+++ exited with 1 +++



Having attached to a running amd process right before that amq invocation
this will then have resulted in the following report there:
# strace -f -p 4657
strace: Process 4657 attached
_newselect(1024, [3 4 5 6], NULL, NULL, {282, 847391}) = 1 (in [5], left
{281, 93032})
rt_sigprocmask(SIG_BLOCK, [HUP INT TERM CHLD], NULL, 8) = 0
gettimeofday({1475612569, 412383}, NULL) = 0
accept(5, {sa_family=AF_INET, sin_port=htons(604),
sin_addr=inet_addr("127.0.0.1")}, [16]) = 7
dup(0)                                  = 8
close(8)                                = 0
gettimeofday({1475612569, 412959}, NULL) = 0
gettimeofday({1475612569, 413030}, NULL) = 0
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
_newselect(1024, [3 4 5 6 7], NULL, NULL, {281, 0}) = 1 (in [7], left
{280, 999649})
rt_sigprocmask(SIG_BLOCK, [HUP INT TERM CHLD], NULL, 8) = 0
gettimeofday({1475612569, 413806}, NULL) = 0
poll([{fd=7, events=POLLIN}], 1, 35000) = 1 ([{fd=7, revents=POLLIN}])
read(7,
"\200\0\0(m.M]\0\0\0\0\0\0\0\2\0\4\223\363\0\0\0\1\0\0\0\t\0\0\0\0"...,
16384) = 44
open("/etc/hosts.allow", O_RDONLY)      = 8
fstat64(8, {st_mode=S_IFREG|0644, st_size=707, ...}) = 0
read(8, "# /etc/hosts.allow: list of host"..., 1024) = 707
read(8, "", 1024)                       = 0
close(8)                                = 0
open("/etc/hosts.deny", O_RDONLY)       = 8
fstat64(8, {st_mode=S_IFREG|0644, st_size=858, ...}) = 0
read(8, "# /etc/hosts.deny: list of hosts"..., 1024) = 858
close(8)                                = 0
time([1475612569])                      = 1475612569
socket(AF_LOCAL, SOCK_DGRAM|SOCK_CLOEXEC, 0) = 8
connect(8, {sa_family=AF_LOCAL, sun_path="/dev/log"}, 110) = -1 ENOENT
(No such file or directory)
close(8)                                = 0
close(0)                                = 0
dup(-1)                                 = -1 EBADF (Bad file descriptor)
time([1475612569])                      = 1475612569
socket(AF_LOCAL, SOCK_DGRAM|SOCK_CLOEXEC, 0) = 0
connect(0, {sa_family=AF_LOCAL, sun_path="/dev/log"}, 110) = -1 ENOENT
(No such file or directory)
close(0)                                = 0
nanosleep({5, 0}, 0xbfa1d2d8)           = 0
exit_group(0)                           = ?
+++ exited with 0 +++



At a quick glance it looks like
unavailability of /dev/log is the terminating problem here
(trying to open /dev/log, getting fd -1, then even trying to dup() -1).
This to me quite strongly looks like this might be the case due to
a non-syslog install here:

# dpkg -l|grep syslog
#


# ps aux|grep log
root      4680  0.0  0.4   6304  2060 pts/7    S+   22:24   0:00 grep
log
root      5144  0.0  0.0   6816     8 tty1     Ss   Sep08   0:00
/bin/login --     
root      5182  0.0  0.1   7188   720 ?        S    Sep08   0:00
/lib/systemd/systemd-logind
andi      8229  0.7  0.9 211108  4672 ?        S<l  Okt03  11:14
/usr/bin/pulseaudio --start --log-target=syslog



I'd hazard a guess that this is due to
this system now being a wee bit too far into
systemd updating (despite being almost 20 years old ;),
with am-utils not being able to cope with
such a newer environment.

Since the am-utils package version I am using
is the newest offer, Houston.

So,
perhaps am-utils has a systemd compat issue, or
perhaps am-utils is missing a crucial Depends: on a syslog package.



BTW, since it's easily conceivable that
amd shutdown attempts might happen to fail for *various* other reasons,
I believe we have got a much bigger problem scope here,
since failed shutdown would always result in a leftover /net mount
(IOW, lifetime guarantees of the
rather crucial/consequences-laden /net mount
are *NOT* properly managed, since
scoped lifetime operation of /net mount
is not guaranteed *beyond* handling of amd shutdown).
OTOH since probably amd binary itself is
the party responsible for
managing (establishing/tearing down) /net mount, *it* is
the party which already ought to provide
reliably scoped handling of the /net mount resource
*irrespective of however severe its operating conditions might be*,
yet it completely fails to do so
(hard process abort in case of /dev/log unavailability - UGGGGHH!!).

BTW function raze_amd() in /etc/init.d/am-utils already seems to be implemented
to try to post-cope with such pathological cases,
however it will only be operative *with* a running amd.
So, possibly the shutdown parts additionally should also be
printing some prominently visible diagnostics
in case it was determined that amd somehow failed to carry out
its mount removal duties.



#730027 seems somewhat related (whereabouts and effects),
however the -TERM issue (doing kill -TERM right after an amq request)
is *not* the case in my version [any more?].

Thanks,

Andreas Mohr

Reply via email to