commit:     3673040722b75c0a4d06fbeb272f917c7d1ea7c4
Author:     William Hubbs <w.d.hubbs <AT> gmail <DOT> com>
AuthorDate: Tue May  9 23:20:52 2017 +0000
Commit:     William Hubbs <williamh <AT> gentoo <DOT> org>
CommitDate: Tue May  9 23:30:08 2017 +0000
URL:        https://gitweb.gentoo.org/proj/openrc.git/commit/?id=36730407

supervise-daemon: add a --respawn-limit option

Allow limiting the number of times supervise-daemon will attempt to respawn a
daemon once it has died to prevent infinite respawning. Also, set a
reasonable default limit (10 times in a 5 second period).

This is for issue #126.

 man/openrc-run.8          |  6 ++++++
 man/supervise-daemon.8    | 20 ++++++++++++++++++++
 sh/supervise-daemon.sh    |  1 +
 src/rc/supervise-daemon.c | 37 ++++++++++++++++++++++++++++++++++++-
 4 files changed, 63 insertions(+), 1 deletion(-)

diff --git a/man/openrc-run.8 b/man/openrc-run.8
index 25ec5b91..c7ac2ac1 100644
--- a/man/openrc-run.8
+++ b/man/openrc-run.8
@@ -167,6 +167,12 @@ Display name used for the above defined command.
 Process name to match when signaling the daemon.
 .It Ar stopsig
 Signal to send when stopping the daemon.
+.It Ar respawn_limit
+Respawn limit
+.Xr supervise-daemon 8
+will use for this daemon.  See
+.Xr supervise-daemon 8
+for more information about this setting.
 .It Ar retry
 Retry schedule to use when stopping the daemon. It can either be a
 timeout in seconds or multiple signal/timeout pairs (like SIGTERM/5).

diff --git a/man/supervise-daemon.8 b/man/supervise-daemon.8
index 06087675..43e74ef7 100644
--- a/man/supervise-daemon.8
+++ b/man/supervise-daemon.8
@@ -34,6 +34,8 @@
 .Ar user
 .Fl r , -chroot
 .Ar chrootpath
+.Fl R , -respawn-limit
+.Ar limit
 .Fl 1 , -stdout
 .Ar logfile
 .Fl 2 , -stderr
@@ -99,6 +101,24 @@ Modifies the scheduling priority of the daemon.
 .It Fl r , -chroot Ar path
 chroot to this directory before starting the daemon. All other paths, such
 as the path to the daemon, chdir and pidfile, should be relative to the chroot.
+.It Fl R , -respawn-limit Ar limit
+Control how agressively
+.Nm
+will try to respawn a daemon when it fails to start. The limit argument
+can be a pair of integers separated bya colon or the string unlimited.
+.Pp
+If a pair of integers is given, the first is a maximum number of respawn
+attempts and the second is a time period. It should be interpreted as:
+If the daemon dies and has to be respawned more than <first number>
+times in any time period of <second number> seconds, exit and give up.
+.Pp
+For example, the default is 10:5.
+This means if the supervisor respawns a daemon more than ten times
+in any 5 second period, it gives up and exits.
+.Pp
+if unlimited is given as the limit, it means that the supervisor will
+not exit or give up, no matter how many times the daemon it is
+supervising needs to be respawned.
 .It Fl u , -user Ar user
 Start the daemon as the specified user.
 .It Fl 1 , -stdout Ar logfile

diff --git a/sh/supervise-daemon.sh b/sh/supervise-daemon.sh
index bff68a4c..c6130edb 100644
--- a/sh/supervise-daemon.sh
+++ b/sh/supervise-daemon.sh
@@ -25,6 +25,7 @@ supervise_start()
        eval supervise-daemon --start \
                ${chroot:+--chroot} $chroot \
                ${pidfile:+--pidfile} $pidfile \
+               ${respawn_limit:+--respawn-limit} $respawn_limit \
                ${command_user+--user} $command_user \
                $supervise_daemon_args \
                $command \

diff --git a/src/rc/supervise-daemon.c b/src/rc/supervise-daemon.c
index 2080e4a6..bd24d782 100644
--- a/src/rc/supervise-daemon.c
+++ b/src/rc/supervise-daemon.c
@@ -66,7 +66,7 @@ static struct pam_conv conv = { NULL, NULL};
 
 const char *applet = NULL;
 const char *extraopts = NULL;
-const char *getoptstring = "d:e:g:I:Kk:N:p:r:Su:1:2:" \
+const char *getoptstring = "d:e:g:I:Kk:N:p:r:R:Su:1:2:" \
        getoptstring_COMMON;
 const struct option longopts[] = {
        { "chdir",        1, NULL, 'd'},
@@ -79,6 +79,7 @@ const struct option longopts[] = {
        { "pidfile",      1, NULL, 'p'},
        { "user",         1, NULL, 'u'},
        { "chroot",       1, NULL, 'r'},
+       { "respawn-limit",        1, NULL, 'R'},
        { "start",        0, NULL, 'S'},
        { "stdout",       1, NULL, '1'},
        { "stderr",       1, NULL, '2'},
@@ -95,6 +96,7 @@ const char * const longopts_help[] = {
        "Match pid found in this file",
        "Change the process user",
        "Chroot to this directory",
+       "set a respawn limit",
        "Start daemon",
        "Redirect stdout to file",
        "Redirect stderr to file",
@@ -424,7 +426,13 @@ int main(int argc, char **argv)
        char *p;
        char *token;
        int i;
+       int n;
        char exec_file[PATH_MAX];
+       int respawn_count = 0;
+       int respawn_max = 10;
+       int respawn_period = 5;
+       time_t respawn_now= 0;
+       time_t first_spawn= 0;
        struct passwd *pw;
        struct group *gr;
        FILE *fp;
@@ -519,6 +527,17 @@ int main(int argc, char **argv)
                        ch_root = optarg;
                        break;
 
+               case 'R':  /* --respawn-limit unlimited|count:period */
+                       if (strcasecmp(optarg, "unlimited") == 0) {
+                               respawn_max = 0;
+                               respawn_period = 0;
+                       } else {
+                               n = sscanf(optarg, "%d:%d", &respawn_max, 
&respawn_period);
+                               if (n   != 2 || respawn_max < 1 || 
respawn_period < 1)
+                                       eerrorx("Invalid respawn-limit setting 
'%s'", optarg);
+                       }
+                       break;
+
                case 'u':  /* --user <username>|<uid> */
                {
                        p = optarg;
@@ -713,6 +732,22 @@ int main(int argc, char **argv)
                                syslog(LOG_INFO, "stopping %s, pid %d", exec, 
child_pid);
                                kill(child_pid, SIGTERM);
                        } else {
+                               if (respawn_max > 0 && respawn_period > 0) {
+                                       respawn_now = time(NULL);
+                                       if (first_spawn == 0)
+                                               first_spawn = respawn_now;
+                                       if (respawn_now - first_spawn > 
respawn_period) {
+                                               respawn_count = 0;
+                                               first_spawn = 0;
+                                       } else
+                                               respawn_count++;
+                                       if (respawn_count >= respawn_max) {
+                                               syslog(LOG_INFO, "respawned 
\"%s\" too many times, "
+                                                               "exiting", 
exec);
+                                               exiting = true;
+                                               continue;
+                                       }
+                               }
                                if (WIFEXITED(i))
                                        syslog(LOG_INFO, "%s, pid %d, exited 
with return code %d",
                                                        exec, child_pid, 
WEXITSTATUS(i));

Reply via email to