czan pushed a commit to branch develop in repository shepherd. commit 0c4d8ec40bc97481eec6dab2b53bb4c33e897862 Author: Carlo Zancanaro <ca...@zancanaro.id.au> Date: Sat Aug 25 20:32:11 2018 +1000
service: Restart dependent services on service restart * modules/shepherd/service.scm (required-by?): New procedure. (stop): Return a list of canonical-names for stopped dependent services, including transitive dependencies. (action)[restart]: Start services based on the return value of stop. (fold-services): New procedure. * doc/shepherd.texi (Jump Start): Clarify details about new behaviour of restart. * tests/restart.sh: New file. * Makefile.am (TESTS): Add tests/restart.sh. --- Makefile.am | 1 + doc/shepherd.texi | 12 +++--- modules/shepherd/service.scm | 90 +++++++++++++++++++++++++++----------------- tests/restart.sh | 77 +++++++++++++++++++++++++++++++++++++ 4 files changed, 139 insertions(+), 41 deletions(-) diff --git a/Makefile.am b/Makefile.am index 4322d7f..d9e21e9 100644 --- a/Makefile.am +++ b/Makefile.am @@ -187,6 +187,7 @@ TESTS = \ tests/replacement.sh \ tests/respawn.sh \ tests/respawn-throttling.sh \ + tests/restart.sh \ tests/misbehaved-client.sh \ tests/no-home.sh \ tests/pid-file.sh \ diff --git a/doc/shepherd.texi b/doc/shepherd.texi index 1de6d80..f0498d6 100644 --- a/doc/shepherd.texi +++ b/doc/shepherd.texi @@ -267,12 +267,12 @@ this inconsistency is just to make it more intuitive to get information about the status of a service, see below). These actions are @code{restart} and @code{status}. The default -implementation of @code{restart} calls @code{stop} and @code{start} on -the affected service in order, the @code{status} action displays some -general information about the service, like what it provides, what it -depends on and with which other services it conflicts (because they -provide a virtual service that is also provided by that particular -service). +implementation of @code{restart} calls @code{stop} and @code{start} on the +affected service, taking care to also restart any dependent services. The +default implementation of @code{status} displays some general information +about the service, like what it provides, what it depends on and with which +other services it conflicts (because they provide a virtual service that is +also provided by that particular service). Another special action is @code{list-actions}, which displays a list of the additional actions a service provides; obviously, it can also diff --git a/modules/shepherd/service.scm b/modules/shepherd/service.scm index 24c9224..be86621 100644 --- a/modules/shepherd/service.scm +++ b/modules/shepherd/service.scm @@ -358,61 +358,72 @@ NEW-SERVICE." (for-each remove-service (provided-by old-service)) (register-services new-service))) +(define (required-by? service dependent) + "Returns #t if DEPENDENT directly requires SERVICE in order to run. Returns +#f otherwise." + (and (find (lambda (dependency) + (memq dependency (provided-by service))) + (required-by dependent)) + #t)) + ;; Stop the service, including services that depend on it. If the ;; latter fails, continue anyway. Return `#f' if it could be stopped. -(define-method (stop (obj <service>) . args) +(define-method (stop (service <service>) . args) + "Stop SERVICE, and any services which depend on it. Returns a list of +canonical names for all of the services which have been stopped (including +transitive dependent services). This method will print a warning if SERVICE +is not already running, and will return SERVICE's canonical name in a list." ;; Block asyncs so the SIGCHLD handler doesn't execute concurrently. - ;; Notably, that makes sure the handler processes the SIGCHLD for OBJ's - ;; process once we're done; otherwise, it could end up respawning OBJ. + ;; Notably, that makes sure the handler processes the SIGCHLD for SERVICE's + ;; process once we're done; otherwise, it could end up respawning SERVICE. (call-with-blocked-asyncs (lambda () - (if (not (running? obj)) - (local-output "Service ~a is not running." (canonical-name obj)) - (if (slot-ref obj 'stop-delay?) + (if (not (running? service)) + (begin + (local-output "Service ~a is not running." (canonical-name service)) + (list (canonical-name service))) + (if (slot-ref service 'stop-delay?) (begin - (slot-set! obj 'waiting-for-termination? #t) + (slot-set! service 'waiting-for-termination? #t) (local-output "Service ~a pending to be stopped." - (canonical-name obj))) - (begin - ;; Stop services that depend on it. - (for-each-service - (lambda (serv) - (and (running? serv) - (for-each (lambda (sym) - (and (memq sym (provided-by obj)) - (stop serv))) - (required-by serv))))) - + (canonical-name service)) + (list (canonical-name service))) + (let ((name (canonical-name service)) + (stopped-dependents (fold-services (lambda (other acc) + (if (and (running? other) + (required-by? service other)) + (append (stop other) acc) + acc)) + '()))) ;; Stop the service itself. (catch #t (lambda () - (apply (slot-ref obj 'stop) - (slot-ref obj 'running) + (apply (slot-ref service 'stop) + (slot-ref service 'running) args)) (lambda (key . args) ;; Special case: 'root' may quit. - (and (eq? root-service obj) + (and (eq? root-service service) (eq? key 'quit) (apply quit args)) (caught-error key args))) - ;; OBJ is no longer running. - (slot-set! obj 'running #f) + ;; SERVICE is no longer running. + (slot-set! service 'running #f) ;; Reset the list of respawns. - (slot-set! obj 'last-respawns '()) + (slot-set! service 'last-respawns '()) ;; Replace the service with its replacement, if it has one - (let ((replacement (slot-ref obj 'replacement))) + (let ((replacement (slot-ref service 'replacement))) (when replacement - (replace-service obj replacement))) + (replace-service service replacement))) ;; Status message. - (let ((name (canonical-name obj))) - (if (running? obj) - (local-output "Service ~a could not be stopped." name) - (local-output "Service ~a has been stopped." name)))))) - (slot-ref obj 'running)))) + (if (running? service) + (local-output "Service ~a could not be stopped." name) + (local-output "Service ~a has been stopped." name)) + (cons name stopped-dependents))))))) ;; Call action THE-ACTION with ARGS. (define-method (action (obj <service>) the-action . args) @@ -423,10 +434,9 @@ NEW-SERVICE." ;; Restarting is done in the obvious way. ((restart) (lambda (running . args) - (if running - (stop obj) - (local-output "~a was not running." (canonical-name obj))) - (apply start obj args))) + (let ((stopped-services (stop obj))) + (for-each start stopped-services) + #t))) ((status) ;; Return the service itself. It is automatically converted to an sexp ;; via 'result->sexp' and sent to the client. @@ -960,6 +970,16 @@ Return #f if service is not found." (eq? name (canonical-name service))) services)) +(define (fold-services proc init) + "Apply PROC to the registered services to build a result, and return that +result. Works in a manner akin to `fold' from SRFI-1." + (hash-fold (lambda (name services acc) + (let ((service (lookup-canonical-service name services))) + (if service + (proc service acc) + acc))) + init %services)) + (define (for-each-service proc) "Call PROC for each registered service." (hash-for-each (lambda (name services) diff --git a/tests/restart.sh b/tests/restart.sh new file mode 100644 index 0000000..92a1f79 --- /dev/null +++ b/tests/restart.sh @@ -0,0 +1,77 @@ +# GNU Shepherd --- Test restarting services. +# Copyright © 2013, 2014, 2016 Ludovic Courtès <l...@gnu.org> +# Copyright © 2018 Carlo Zancanaro <ca...@zancanaro.id.au> +# +# This file is part of the GNU Shepherd. +# +# The GNU Shepherd is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or (at +# your option) any later version. +# +# The GNU Shepherd is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with the GNU Shepherd. If not, see <http://www.gnu.org/licenses/>. + +shepherd --version +herd --version + +socket="t-socket-$$" +conf="t-conf-$$" +log="t-log-$$" +pid="t-pid-$$" + +herd="herd -s $socket" + +trap "cat $log || true ; + rm -f $socket $conf $log; + test -f $pid && kill \`cat $pid\` || true ; rm -f $pid" EXIT + +cat > "$conf"<<EOF +(register-services + (make <service> + #:provides '(test1) + #:start (const #t) + #:stop (const #t)) + (make <service> + #:provides '(test2) + #:requires '(test1) + #:start (const #t) + #:stop (const #t)) + (make <service> + #:provides '(test3) + #:requires '(test2) + #:start (const #t) + #:stop (const #t))) +EOF + +rm -f "$pid" +shepherd -I -s "$socket" -c "$conf" -l "$log" --pid="$pid" & + +while ! test -f "$pid" ; do sleep 0.3 ; done + +# Start some test services, and make sure they behave how we expect +$herd start test1 +$herd start test2 +$herd status test1 | grep started +$herd status test2 | grep started + +# Restart test1 and make sure that both services are still running (ie. that +# test2 hasn't been stopped) +$herd restart test1 +$herd status test1 | grep started +$herd status test2 | grep started + +# Now let's test with a transitive dependency +$herd start test3 +$herd status test3 | grep started + +# After restarting test1 we want test3 to still be running +$herd restart test1 +$herd status test1 | grep started +$herd status test2 | grep started +$herd status test3 | grep started