civodul pushed a commit to branch devel in repository shepherd. commit b4ca2abbfa072a8aeba3ef5483a6fc1a09bfc207 Author: Ludovic Courtès <l...@gnu.org> AuthorDate: Fri May 10 21:29:40 2024 +0200
timer: Log the start time and end time of each action. So far we’d only record the end time of processes by reusing the ‘record-process-exit-status’ mechanism. With this change, timers preserve that information internally and with additional details: the start time, end time, and status, including for actions implemented as thunks rather than processes. * modules/shepherd/service/timer.scm (serializer-timer): Rename to… (serialize-timer): … this. Add ‘past-runs’ entry. (timer-request): New procedure. (timer-processes): Rewrite in terms of ‘timer-request’. (timer-past-runs): New procedure. (%past-run-log-size): New variable. (make-timer-constructor): Use ‘let-loop’. Change ‘processes’ to a list of PID/start time pairs. In 'process-terminated handler, replace ‘record-process-exit-status’ by an addition to ‘past-runs’. Add 'past-runs handler. In 'timeout handler, capture start and end time, as well as any exception thrown. * modules/shepherd/scripts/herd.scm (seconds->string): New procedure. (display-process-exit-status): Add ‘duration’ parameter and honor it. (display-service-status): Adjust timer handling for child processes and for past runs. * modules/shepherd/service.scm (record-process-exit-status): Do not export. * tests/services/timer.sh: Test logging of completion for thunks. --- modules/shepherd/scripts/herd.scm | 110 ++++++++++++++++++++++++++++--------- modules/shepherd/service.scm | 1 - modules/shepherd/service/timer.scm | 103 +++++++++++++++++++++++----------- tests/services/timer.sh | 13 +++++ 4 files changed, 168 insertions(+), 59 deletions(-) diff --git a/modules/shepherd/scripts/herd.scm b/modules/shepherd/scripts/herd.scm index d8db6f7..503fb4a 100644 --- a/modules/shepherd/scripts/herd.scm +++ b/modules/shepherd/scripts/herd.scm @@ -265,6 +265,24 @@ transient status for too long." ;; relative date string like "2 hours ago". (format #f (l10n "~a (~a)") absolute relative)) +(define (seconds->string seconds) + "Return a string representing @var{seconds} as a duration in a +human-friendly way." + (cond ((< seconds 180) + ;; TRANSLATORS: This string and the following ones denote a duration. + ;; It ends up being inserted in a sentence like "Process terminated + ;; after 10 seconds". (Arguably not ideal.) + (format #f (l10n "~h second" "~h seconds" seconds) + seconds)) + ((< seconds (* 180 60)) + (let ((minutes (quotient seconds 60))) + (format #f (l10n "~h minute" "~h minutes" minutes) + minutes))) + (else + (let ((hours (quotient seconds 3600))) + (format #f (l10n "~h hour" "~h hours" hours) + hours))))) + (define %default-log-history-size ;; Number of log lines displayed by default. 10) @@ -290,29 +308,48 @@ relevant bits quoted according to POSIX shell rules." str)) command))) -(define (display-process-exit-status status) +(define* (display-process-exit-status status #:optional duration) "Display @var{status}, a process status as returned by @code{waitpid}, in a -human-friendly way." +human-friendly way. When @var{duration} is provided, it is the number of +seconds during which the process ran." (cond ((zero? status) - (format #t (l10n "Process exited successfully.~%"))) + (if duration + (format #t (l10n "Process exited successfully after ~a.~%") + (seconds->string duration)) + (format #t (l10n "Process exited successfully.~%")))) ((status:exit-val status) => (lambda (code) - (format #t (highlight/error - (l10n "Process exited with code ~a.~%")) - code))) + (if duration + (format #t + (highlight/error + (l10n "Process exited with code ~a after ~a.~%")) + code (seconds->string duration)) + (format #t (highlight/error + (l10n "Process exited with code ~a.~%")) + code)))) ((status:term-sig status) => (lambda (signal) - (format #t (highlight/error - (l10n "Process terminated with signal ~a.~%")) - signal))) + (if duration + (format #t + (highlight/error + (l10n "Process terminated with signal ~a after ~a.~%")) + signal (seconds->string duration)) + (format #t (highlight/error + (l10n "Process terminated with signal ~a.~%")) + signal)))) ((status:stop-sig status) => (lambda (signal) - (format #t (highlight/error - (l10n "Process stopped with signal ~a.~%")) - signal))))) + (if duration + (format #t + (highlight/error + (l10n "Process stopped with signal ~a after ~a.~%")) + signal (seconds->string duration)) + (format #t (highlight/error + (l10n "Process stopped with signal ~a.~%")) + signal)))))) (define* (display-timer-events event #:optional (count 5)) "Display the @var{count} upcoming timer alarms that match @var{event}, a @@ -409,7 +446,9 @@ calendar event." (format #t (l10n " Child process:~{ ~a~}~%" " Child processes:~{ ~a~}~%" (length processes)) - processes))) + (match processes + (((pids . start-times) ...) + pids))))) ('procedure (format #t (l10n " Periodically running Scheme code.~%"))) (_ #f))) @@ -479,20 +518,37 @@ to upgrade).~%")))) (when show-recent-messages? (match (live-service-running-value service) - (('timer . _) - (match (live-service-process-exit-statuses service) - (() - #t) - (statuses - (newline) - (format #t (highlight (l10n "Recent runs:~%"))) - (for-each (match-lambda - ((status . time) - (format #t " ~a" - (strftime default-logfile-date-format - (localtime time))) - (display-process-exit-status status))) - (reverse (at-most timer-history-size statuses)))))) + (('timer ('version 0) properties ...) + (alist-let* properties (past-runs) + (match past-runs + ((or () #f) + #t) + (statuses + (newline) + (format #t (highlight (l10n "Recent runs:~%"))) + (for-each (match-lambda + ((status end start) + (format #t " ~a" + (strftime default-logfile-date-format + (localtime end))) + (match status + ((? integer?) + (display-process-exit-status status + (- end start))) + ('success + (format #t (l10n "Completed in ~a.~%") + (seconds->string (- end start)))) + (('exception key args ...) + (format #t (highlight/error + (l10n "Exception thrown after ~a: ~a~%")) + (seconds->string (- end start)) + (string-trim-right + (call-with-output-string + (lambda (port) + (print-exception port #f + key args)))))) + (_ #f)))) + (reverse (at-most timer-history-size statuses))))))) (_ #f)) (match (live-service-recent-messages service) diff --git a/modules/shepherd/service.scm b/modules/shepherd/service.scm index e3b7486..ce9cbb9 100644 --- a/modules/shepherd/service.scm +++ b/modules/shepherd/service.scm @@ -105,7 +105,6 @@ spawn-command spawn-shell-command terminate-process - record-process-exit-status %precious-signals register-services unregister-services diff --git a/modules/shepherd/service/timer.scm b/modules/shepherd/service/timer.scm index e6837fe..343dd77 100644 --- a/modules/shepherd/service/timer.scm +++ b/modules/shepherd/service/timer.scm @@ -535,7 +535,7 @@ list, to be executed as @var{user} and @var{group}, with the given #:resource-limits resource-limits)) (_ #f))) -(define-record-type-serializer (serializer-timer (timer <timer>)) +(define-record-type-serializer (serialize-timer (timer <timer>)) ;; Serialize TIMER to clients can inspect it. `(timer (version 0) (event ,(match (timer-event timer) @@ -545,17 +545,35 @@ list, to be executed as @var{user} and @var{group}, with the given (action ,(match (timer-action timer) ((? command? command) (command->sexp command)) (_ 'procedure))) - (processes ,(timer-processes timer)))) + (processes ,(timer-processes timer)) + (past-runs ,(ring-buffer->list (timer-past-runs timer))))) -(define (timer-processes timer) - "Return the list of PIDs of the currently running processes started by -@var{timer}." - (let ((reply (make-channel))) - (put-message (timer-channel timer) `(processes ,reply)) - (get-message reply))) +(define (timer-request message) + (lambda (timer) + "Send @var{message} to @var{timer} and return its reply." + (let ((reply (make-channel))) + (put-message (timer-channel timer) `(,message ,reply)) + (get-message reply)))) + +(define timer-processes + ;; Return the list of PID/start time pairs of the currently running + ;; processes started by the given timer. + (timer-request 'processes)) + +(define timer-past-runs + ;; Return the list of past runs as a ring buffer. Each run has the form + ;; (STATUS END START). When the timer's action is a command, STATUS is an + ;; integer, its exit status; otherwise, STATUS is either 'success or + ;; '(exception ...). END and START are the completion and start times, + ;; respectively, as integers (seconds since the Epoch). + (timer-request 'past-runs)) (define sleep (@ (fibers) sleep)) +(define %past-run-log-size + ;; Maximum number of entries the log of timer runs. + 50) + (define* (make-timer-constructor event action #:key log-file wait-for-termination?) @@ -577,8 +595,9 @@ instances running concurrently." (name (service-canonical-name (current-service)))) (spawn-fiber (lambda () - (let loop ((processes '()) - (termination #f)) + (let-loop loop ((processes '()) ;PID/start time + (past-runs (ring-buffer %past-run-log-size)) + (termination #f)) (match (if (or termination (and (pair? processes) wait-for-termination?)) (get-message channel) @@ -592,26 +611,37 @@ instances running concurrently." "Terminating timer '~a' with ~a processes running." (length processes)) name (length processes)) - (for-each (lambda (pid) - (terminate-process pid SIGHUP)) + (for-each (match-lambda + ((pid . _) + (terminate-process pid SIGHUP))) processes) ;; If there are processes left, keep going until they're gone. (if (pair? processes) - (loop processes reply) + (loop (termination reply)) (put-message reply #t))) (('process-terminated pid status) ;; Process PID completed. - (local-output - (l10n "Process ~a of timer '~a' terminated with status ~a.") - pid name status) - (record-process-exit-status pid status) - (let ((processes (delv pid processes))) - (if (and termination (null? processes)) + (let ((start-time (assoc-ref processes pid)) + (end-time ((@ (guile) current-time))) + (remaining (alist-delete pid processes))) + (local-output + (l10n "Process ~a of timer '~a' terminated with status ~a \ +after ~a seconds.") + pid name status + (- end-time start-time)) + (if (and termination (null? remaining)) (put-message termination #t) ;done - (loop processes termination)))) + (loop (processes remaining) + (past-runs + (ring-buffer-insert + (list status end-time start-time) + past-runs)))))) (('processes reply) (put-message reply processes) - (loop processes termination)) + (loop)) + (('past-runs reply) + (put-message reply past-runs) + (loop)) ('timeout ;; Time to perform ACTION. (if (command? action) @@ -633,16 +663,27 @@ instances running concurrently." (local-output (l10n "Timer '~a' spawned process ~a.") name pid) - (loop (cons pid processes) termination)) - (begin - (catch #t - action - (lambda (key . args) - (local-output - (l10n "Exception caught while calling action of \ + (loop (processes + (alist-cons pid ((@ (guile) current-time)) + processes)))) + (let ((start-time ((@ (guile) current-time)))) + (define result + (catch #t + (lambda () + (action) + 'success) + (lambda (key . args) + (local-output + (l10n "Exception caught while calling action of \ timer '~a': ~s") - name (cons key args)))) - (loop processes termination)))) + name (cons key args)) + `(exception ,key ,@args)))) + + (loop (past-runs + (ring-buffer-insert + (list result ((@ (guile) current-time)) start-time) + past-runs)))))) + ('overslept ;; Reached when resuming from sleep state: we slept ;; significantly more than the requested number of seconds. To @@ -651,7 +692,7 @@ timer '~a': ~s") (local-output (l10n "Waiting anew for timer '~a' (resuming \ from sleep state?).") name) - (loop processes termination)))))) + (loop)))))) (timer channel event action)))) diff --git a/tests/services/timer.sh b/tests/services/timer.sh index a4bb165..2158a43 100644 --- a/tests/services/timer.sh +++ b/tests/services/timer.sh @@ -49,6 +49,12 @@ cat > "$conf" <<EOF (calendar-event #:seconds (iota 60)) (lambda () (display "Hello from procedure.\n"))) #:stop (make-timer-destructor)) + (service '(timer-that-throws) + #:start (make-timer-constructor + (calendar-event #:seconds (iota 60)) + (lambda () (display "Throwing!\n") (mkdir "/"))) + #:stop (make-timer-destructor) + #:actions (list timer-trigger-action)) (service '(endless-timer) #:start (make-timer-constructor (calendar-event #:seconds (iota 60)) @@ -61,6 +67,7 @@ cat > "$conf" <<EOF #:months (if (<= (date-month (current-date)) 6) '(12) + '(1))) (command (quote ("sh" "-c" "echo Triggered from \$PWD.")) #:directory "$PWD")) @@ -88,8 +95,14 @@ $herd stop timer-with-command $herd start timer-with-procedure sleep 2 +$herd status timer-with-procedure | grep "Completed in" # recent runs grep "Hello from procedure" "$log" +$herd start timer-that-throws +$herd trigger timer-that-throws +grep "Throwing" "$log" +$herd status timer-that-throws | grep "Exception thrown.*mkdir" # recent runs + rm -f "$service_pid" $herd start endless-timer sleep 2