This diff fixes some issues around sasyncd's handling of the carpdemote flag, fixing some signal handling, cleanup, and logging issues.
Here's the problem I am seeing. Some background: # ifconfig -g carp carp: carp demote count 0 This is the normal case. If I poke the demote counter (essentially, forcing the machine to be a backup), I see this: # ifconfig -g carp carpdemote 1 carp: carp demote count 1 this is normal. But if I start sasyncd, then kill it, the demote flag is raised, but never lowered afterwards: # sasyncd -vv # pkill sasyncd # ifconfig -g carp carp: carp demote count 2 The reason is that if sasyncd sees a nonzero demote, it raises the demote flag by 1, and schedules a demotion a minute later. This is presumably to allow time for whatever caused the demotion to finish before sasync lets the machine take over as master. BUT, if this scheduled demote doesn't happen, the machine is left hanging. This problem also occasionally happens if carp is misconfigured. (e.g. if carp_init fails) To fix, i tried to ensure that whatever sasyncd does to demote, it undoes on exit (both in the parent, and child). Anyway, on this box, this fixes all cases of demote getting set, but not unset. This incorporates feedback and suggestions by hshoexer@, Please, anyone who actually uses sasyncd, give this a whirl and report whether everything works (or ideally, gets better): diff is against -current, obviously Index: carp.c =================================================================== RCS file: /cvs/src/usr.sbin/sasyncd/carp.c,v retrieving revision 1.12 diff -u -r1.12 carp.c --- carp.c 29 Jun 2010 21:25:37 -0000 1.12 +++ carp.c 1 Jul 2010 19:07:36 -0000 @@ -147,6 +147,13 @@ close(s); } +void +carp_undemote(void) +{ + if (carp_demoted) + carp_demote(-carp_demoted, 1); +} + const char* carp_state_name(enum RUNSTATE state) { Index: log.c =================================================================== RCS file: /cvs/src/usr.sbin/sasyncd/log.c,v retrieving revision 1.6 diff -u -r1.6 log.c --- log.c 11 Nov 2009 23:05:28 -0000 1.6 +++ log.c 1 Jul 2010 19:07:36 -0000 @@ -37,6 +37,7 @@ #include <syslog.h> #include <errno.h> #include <time.h> +#include <unistd.h> #include "sasyncd.h" @@ -52,10 +53,17 @@ static void log_output(char *msg) { + char *prefix; + + if (getuid()) + prefix = ""; + else + prefix = "[priv]"; + if (cfgstate.debug) - fprintf(stderr, "%s\n", msg); + fprintf(stderr, "%s %s\n", prefix, msg); else - syslog(LOG_CRIT, "%s", msg); + syslog(LOG_CRIT, "%s %s", prefix, msg); } void Index: monitor.c =================================================================== RCS file: /cvs/src/usr.sbin/sasyncd/monitor.c,v retrieving revision 1.14 diff -u -r1.14 monitor.c --- monitor.c 29 Jun 2010 18:10:04 -0000 1.14 +++ monitor.c 1 Jul 2010 19:07:36 -0000 @@ -56,6 +56,7 @@ volatile sig_atomic_t sigchld = 0; +static void set_monitor_signals(void); static void got_sigchld(int); static void sig_to_child(int); static void m_priv_pfkey_snap(int); @@ -86,10 +87,7 @@ strlcpy(root, pw->pw_dir, sizeof root); endpwent(); - signal(SIGCHLD, got_sigchld); - signal(SIGTERM, sig_to_child); - signal(SIGHUP, sig_to_child); - signal(SIGINT, sig_to_child); + set_monitor_signals(); m_state.pid = fork(); @@ -98,6 +96,8 @@ exit(1); } else if (m_state.pid == 0) { /* Child */ + set_slave_signals(); + m_state.s = p[0]; close(p[1]); @@ -122,6 +122,22 @@ } static void +set_monitor_signals(void) +{ + int n; + + for (n = 1; n < _NSIG; n++) + signal(n, SIG_DFL); + + signal(SIGCHLD, got_sigchld); + + /* Forward some signals to the child. */ + signal(SIGTERM, sig_to_child); + signal(SIGINT, sig_to_child); + signal(SIGHUP, sig_to_child); +} + +static void got_sigchld(int s) { sigchld = 1; @@ -130,8 +146,11 @@ static void sig_to_child(int s) { - if (m_state.pid != -1) + int oerrno = errno; + + if (m_state.pid > 0) kill(m_state.pid, s); + errno = oerrno; } static void @@ -228,7 +247,7 @@ } } - monitor_carpundemote(NULL); + carp_undemote(); if (!sigchld) log_msg(0, "monitor_loop: priv process exiting abnormally"); Index: sasyncd.c =================================================================== RCS file: /cvs/src/usr.sbin/sasyncd/sasyncd.c,v retrieving revision 1.20 diff -u -r1.20 sasyncd.c --- sasyncd.c 16 Jun 2010 17:39:05 -0000 1.20 +++ sasyncd.c 1 Jul 2010 19:07:36 -0000 @@ -52,6 +52,18 @@ daemon_shutdown++; } +void +set_slave_signals(void) +{ + int n; + + for (n = 1; n < _NSIG; n++) + signal(n, SIG_DFL); + + signal(SIGINT, sasyncd_stop); + signal(SIGTERM, sasyncd_stop); +} + static int sasyncd_run(pid_t ppid) { @@ -78,9 +90,6 @@ control_setrun(); - signal(SIGINT, sasyncd_stop); - signal(SIGTERM, sasyncd_stop); - timer_add("carp_undemote", CARP_DEMOTE_MAXTIME, monitor_carpundemote, NULL); @@ -200,19 +209,28 @@ exit(1); } + set_slave_signals(); + + /* + * if carpdemote > 0, either the system is starting up, or + * something funky is happening, so increment the demote counter. + * This is offset by a scheduled 'monitor_uncarpdemote" is sasyncd_run + * and buys us another CARP_DEMOTE_MAXTIME sec to sort out sync + * before we (potentially) take over as master. + */ carp_demote(CARP_INC, 0); if (carp_init()) - return 1; + goto errout; if (pfkey_init(0)) - return 1; + goto errout; if (net_init()) - return 1; + goto errout; if (!cfgstate.debug) if (daemon(1, 0)) { perror("daemon()"); - exit(1); + goto errout; } if (monitor_init()) { @@ -229,7 +247,13 @@ net_shutdown(); pfkey_shutdown(); - return 0; + + _exit(0); + +errout: + /* Only for exiting _before_ privilege separation. */ + carp_undemote(); + exit(1); } /* Special for compiling with Boehms GC. See Makefile and sasyncd.h */ Index: sasyncd.h =================================================================== RCS file: /cvs/src/usr.sbin/sasyncd/sasyncd.h,v retrieving revision 1.15 diff -u -r1.15 sasyncd.h --- sasyncd.h 16 Jun 2010 17:39:05 -0000 1.15 +++ sasyncd.h 1 Jul 2010 19:07:36 -0000 @@ -116,6 +116,7 @@ int carp_init(void); void carp_check_state(void); void carp_demote(int, int); +void carp_undemote(void); void carp_update_state(enum RUNSTATE); void carp_set_rfd(fd_set *); void carp_read_message(fd_set *); @@ -165,6 +166,9 @@ int pfkey_set_promisc(void); void pfkey_shutdown(void); void pfkey_snapshot(void *); + +/* sasyncd.c */ +void set_slave_signals(void); /* timer.c */ void timer_init(void);