-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1
Hi!
I recently upgraded one machine from Amanda v2.5.2p1 to
v2.6.1p2 and noticed that with the new version amandad
does not collect exit status from forked child processes
anymore, so all terminated child processes are left as zombies
until the main amandad process terminates.
This creates a situation with many defunct processes during
a backup run, like this:
r...@file:~ {530} $ ps awwx -o user,pid,ppid,start,stat,wchan,command | grep
amanda
amanda 28390 28387 20:00:01 Ss wait4 /bin/sh /usr/sbin/amdump workdays
amanda 28423 28390 20:00:02 S wait4 /usr/bin/perl -
/usr/libexec/amanda/planner /usr/libexec/amanda/driver workdays 20100617200002
amanda 28425 28423 20:00:02 S poll /usr/libexec/amanda/driver workdays
amanda 28426 28425 20:00:03 S ? taper workdays
amanda 28427 28425 20:00:03 S poll dumper0 workdays
amanda 28428 28425 20:00:03 R - dumper1 workdays
amanda 28429 28425 20:00:03 S ? dumper2 workdays
amanda 28430 28425 20:00:03 S ? dumper3 workdays
amanda 28431 28425 20:00:03 S ? dumper4 workdays
amanda 28432 28426 20:00:03 S poll taper workdays
amanda 28434 1120 20:00:03 S poll amandad -auth=bsd amdump amindexd
amidxtaped
amanda 28436 28434 20:00:03 Z exit [amandad] <defunct>
amanda 28438 28434 20:00:04 Z exit [amandad] <defunct>
amanda 29575 28425 20:27:33 S tcp_da chunker0 workdays
amanda 29576 28425 20:27:33 S tcp_da chunker1 workdays
amanda 29578 28428 20:27:34 S pipe_w /bin/gzip --best
amanda 29579 28434 20:27:34 S pipe_w /usr/libexec/amanda/sendbackup
amandad bsd
amanda 29580 28434 20:27:34 Z exit [amandad] <defunct>
amanda 29581 29579 20:27:34 R - /bin/gzip --fast
amanda 29582 29579 20:27:34 S pipe_w /usr/libexec/amanda/sendbackup
amandad bsd
root 29583 29579 20:27:34 S pipe_w /bin/tar --create --file -
--directory /home/gruppen/alle/Projekte --one-file-system --listed-incremental
/var/db/amanda/gnutar-lists/file.local_home_gruppen_alle_Projekte_0.new
--sparse --ignore-failed-read --totals --exclude-from
/tmp/amanda/sendbackup._home_gruppen_alle_Projekte.20100617202734.exclude .
amanda 29584 29582 20:27:34 S wait4 sh -c /bin/tar -tf - 2>/dev/null |
sed -e 's/^\.//'
amanda 29585 29584 20:27:34 S - /bin/tar -tf -
amanda 29586 29584 20:27:34 S pipe_w sed -e s/^\.//
amanda 29587 28427 20:27:34 S pipe_w /bin/gzip --best
amanda 29588 29583 20:27:34 Z exit [sh] <defunct>
amanda 29602 28434 20:27:49 Z exit [amandad] <defunct>
amanda 29620 28434 20:28:03 Z exit [amandad] <defunct>
amanda 29636 28434 20:28:18 Z exit [amandad] <defunct>
amanda 29695 28434 20:30:18 Z exit [amandad] <defunct>
amanda 29845 28434 20:41:03 Z exit [amandad] <defunct>
amanda 29857 28434 20:41:19 Z exit [amandad] <defunct>
amanda 29871 28434 20:41:33 Z exit [amandad] <defunct>
amanda 29886 28434 20:41:48 Z exit [amandad] <defunct>
amanda 29902 28434 20:42:04 Z exit [amandad] <defunct>
amanda 29920 28434 20:42:19 Z exit [amandad] <defunct>
amanda 29936 28434 20:42:33 Z exit [amandad] <defunct>
amanda 29951 28434 20:42:48 Z exit [amandad] <defunct>
amanda 30071 28434 20:47:29 Z exit [amandad] <defunct>
amanda 30091 28434 20:47:52 Z exit [amandad] <defunct>
amanda 30108 28434 20:48:08 Z exit [amandad] <defunct>
amanda 30121 28434 20:48:22 Z exit [amandad] <defunct>
amanda 30136 28434 20:48:38 Z exit [amandad] <defunct>
amanda 30150 28434 20:48:53 Z exit [amandad] <defunct>
amanda 30164 28434 20:49:07 Z exit [amandad] <defunct>
amanda 30180 28434 20:49:31 Z exit [amandad] <defunct>
amanda 30221 28434 20:50:23 Z exit [amandad] <defunct>
amanda 30267 28434 20:51:10 Z exit [amandad] <defunct>
amanda 30284 28434 20:51:36 Z exit [amandad] <defunct>
amanda 30301 28434 20:52:02 Z exit [amandad] <defunct>
amanda 30319 28434 20:52:38 Z exit [amandad] <defunct>
amanda 30348 28434 20:53:18 Z exit [amandad] <defunct>
amanda 30374 28434 20:53:59 Z exit [amandad] <defunct>
amanda 30390 28434 20:54:14 Z exit [amandad] <defunct>
amanda 30406 28434 20:54:29 Z exit [amandad] <defunct>
amanda 30422 28434 20:54:43 Z exit [amandad] <defunct>
amanda 30442 28434 20:55:00 Z exit [amandad] <defunct>
amanda 30470 28434 20:55:15 Z exit [amandad] <defunct>
amanda 30486 28434 20:55:31 Z exit [amandad] <defunct>
amanda 30502 28434 20:55:45 Z exit [amandad] <defunct>
amanda 30533 28434 20:56:01 Z exit [amandad] <defunct>
amanda 30549 28434 20:56:16 Z exit [amandad] <defunct>
amanda 30562 28434 20:56:31 Z exit [amandad] <defunct>
amanda 30591 28434 20:56:46 Z exit [amandad] <defunct>
amanda 30607 28434 20:57:00 Z exit [amandad] <defunct>
amanda 30621 28434 20:57:15 Z exit [amandad] <defunct>
amanda 30651 28434 20:57:30 Z exit [amandad] <defunct>
amanda 30679 28434 20:57:45 Z exit [amandad] <defunct>
amanda 30694 28434 20:58:01 Z exit [amandad] <defunct>
amanda 30722 28434 20:58:15 Z exit [amandad] <defunct>
amanda 30737 28434 20:58:31 Z exit [amandad] <defunct>
amanda 30754 28434 20:58:45 Z exit [amandad] <defunct>
amanda 30769 28434 20:59:01 Z exit [amandad] <defunct>
amanda 30785 28434 20:59:15 Z exit [amandad] <defunct>
amanda 30800 28434 20:59:30 Z exit [amandad] <defunct>
amanda 30817 28434 20:59:46 Z exit [amandad] <defunct>
amanda 30837 28434 21:00:01 Z exit [amandad] <defunct>
amanda 30868 28434 21:00:15 Z exit [amandad] <defunct>
amanda 30896 28434 21:00:30 Z exit [amandad] <defunct>
amanda 30914 28434 21:00:46 Z exit [amandad] <defunct>
amanda 30929 28434 21:01:00 Z exit [amandad] <defunct>
amanda 30948 28434 21:01:15 Z exit [amandad] <defunct>
amanda 30964 28434 21:01:30 Z exit [amandad] <defunct>
amanda 30981 28434 21:01:46 Z exit [amandad] <defunct>
amanda 30996 28434 21:02:00 Z exit [amandad] <defunct>
amanda 31035 28434 21:02:15 Z exit [amandad] <defunct>
amanda 31051 28434 21:02:31 Z exit [amandad] <defunct>
amanda 31068 28434 21:02:46 Z exit [amandad] <defunct>
amanda 31083 28434 21:03:00 Z exit [amandad] <defunct>
amanda 31098 28434 21:03:15 Z exit [amandad] <defunct>
amanda 31114 28434 21:03:31 Z exit [amandad] <defunct>
amanda 31130 28434 21:03:45 Z exit [amandad] <defunct>
amanda 31145 28434 21:04:00 Z exit [amandad] <defunct>
amanda 31160 28434 21:04:16 Z exit [amandad] <defunct>
amanda 31176 28434 21:04:31 Z exit [amandad] <defunct>
amanda 31193 28434 21:04:45 Z exit [amandad] <defunct>
This is from a backup server with about 75 DLE's distributed
on 3 servers with a total of about 2TB of data. All servers run
Linux.
Please note, that the backup itself finishes successfully
and a restore of files from backup works fine. So it is not
a showstopper. But during backup time Nagios reports those
zombie processes and this fills our mailbox, so I really
would like to solve it.
I noticed the following changes between 2.5.2p1 and 2.6.1p2 which
might be related to the problem:
- --- amandad.c 2 May 2007 11:54:59 -0000 1.1.1.2
+++ amandad.c 24 Jul 2009 18:42:48 -0000 1.1.1.4
@@ -162,40 +175,21 @@
static action_t s_ackwait(struct active_service *, action_t, pkt_t *);
static void repfd_recv(void *);
+static void errfd_recv(void *);
static void timeout_repfd(void *);
static void protocol_recv(void *, pkt_t *, security_status_t);
static void process_readnetfd(void *);
static void process_writenetfd(void *, void *, ssize_t);
static struct active_service *service_new(security_handle_t *,
- - const char *, const char *);
+ const char *, service_t, const char *);
static void service_delete(struct active_service *);
static int writebuf(struct active_service *, const void *, size_t);
static ssize_t do_sendpkt(security_handle_t *handle, pkt_t *pkt);
- -
- -static void child_signal(int signal);
+static char *amandad_get_security_conf (char *, void *);
static const char *state2str(state_t);
static const char *action2str(action_t);
- -/*
- - * Harvests defunct processes...
- - */
- -
- -static void
- -child_signal(
- - int signal)
- -{
- - pid_t rp;
- -
- - (void)signal; /* Quite compiler warning */
- - /*
- - * Reap and child status and promptly ignore since we don't care...
- - */
- - do {
- - rp = waitpid(-1, NULL, WNOHANG);
- - } while (rp > 0);
- -}
- -
int
main(
int argc,
and
@@ -232,49 +233,28 @@
dbopen(DBG_SUBDIR_AMANDAD);
if(argv == NULL) {
- - error("argv == NULL\n");
+ error(_("argv == NULL\n"));
/*NOTREACHED*/
}
/* Don't die when child closes pipe */
signal(SIGPIPE, SIG_IGN);
- - /* Tell me when a child exits or dies... */
- - act.sa_handler = child_signal;
- - sigemptyset(&act.sa_mask);
- - act.sa_flags = 0;
- - if(sigaction(SIGCHLD, &act, &oact) != 0) {
- - error("error setting SIGCHLD handler: %s", strerror(errno));
- - /*NOTREACHED*/
- - }
+ /* Parse the configuration; we'll handle errors later */
+ config_init(CONFIG_INIT_CLIENT, NULL);
- - conffile = vstralloc(CONFIG_DIR, "/", "amanda-client.conf", NULL);
- - if (read_clientconf(conffile) > 0) {
- - error("error reading conffile: %s", conffile);
- - /*NOTREACHED*/
+ if (geteuid() == 0) {
+ check_running_as(RUNNING_AS_ROOT);
+ initgroups(CLIENT_LOGIN, get_client_gid());
+ setgid(get_client_gid());
+ setegid(get_client_gid());
+ seteuid(get_client_uid());
+ } else {
+ check_running_as(RUNNING_AS_CLIENT_LOGIN);
}
- - amfree(conffile);
- -
- -#ifdef USE_DBMALLOC
- - dbmalloc_info.start.size = malloc_inuse(&dbmalloc_info.start.hist);
- -#endif
erroutput_type = (ERR_INTERACTIVE|ERR_SYSLOG);
- -#ifdef FORCE_USERID
- - /* we'd rather not run as root */
- - if (geteuid() == 0) {
- - if(client_uid == (uid_t) -1) {
- - error("error [cannot find user %s in passwd file]\n",
CLIENT_LOGIN);
- - /*NOTREACHED*/
- - }
- - initgroups(CLIENT_LOGIN, client_gid);
- - setgid(client_gid);
- - setegid(client_gid);
- - seteuid(client_uid);
- - }
- -#endif /* FORCE_USERID */
- -
/*
* ad-hoc argument parsing
*
Am I right with my diagnosis?
Why was the SIGCHLD handler removed in Amanda 2.6?
Is this a known problem?
Am I the only one seeing it (I found some older similar reports,
but no recent ones)?
- - andreas
- --
Andreas Haumer | mailto:[email protected]
*x Software + Systeme | http://www.xss.co.at/
Karmarschgasse 51/2/20 | Tel: +43-1-6060114-0
A-1100 Vienna, Austria | Fax: +43-1-6060114-71
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.10 (GNU/Linux)
Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org/
iD8DBQFMG1vsxJmyeGcXPhERAsP+AJ4qeUQ1TDMvZ4QgqY6J9uw82+3lTQCePGVB
Om7VkKTg+JOW3Dy0TwvnlNM=
=gZ8P
-----END PGP SIGNATURE-----