civodul pushed a commit to branch devel
in repository shepherd.

commit b4ca2abbfa072a8aeba3ef5483a6fc1a09bfc207
Author: Ludovic Courtès <l...@gnu.org>
AuthorDate: Fri May 10 21:29:40 2024 +0200

    timer: Log the start time and end time of each action.
    
    So far we’d only record the end time of processes by reusing the
    ‘record-process-exit-status’ mechanism.  With this change, timers
    preserve that information internally and with additional details: the
    start time, end time, and status, including for actions implemented as
    thunks rather than processes.
    
    * modules/shepherd/service/timer.scm (serializer-timer): Rename to…
    (serialize-timer): … this.  Add ‘past-runs’ entry.
    (timer-request): New procedure.
    (timer-processes): Rewrite in terms of ‘timer-request’.
    (timer-past-runs): New procedure.
    (%past-run-log-size): New variable.
    (make-timer-constructor): Use ‘let-loop’.  Change ‘processes’ to a list
    of PID/start time pairs.  In 'process-terminated handler, replace
    ‘record-process-exit-status’ by an addition to ‘past-runs’.  Add
    'past-runs handler.  In 'timeout handler, capture start and end time, as
    well as any exception thrown.
    * modules/shepherd/scripts/herd.scm (seconds->string): New procedure.
    (display-process-exit-status): Add ‘duration’ parameter and honor it.
    (display-service-status): Adjust timer handling for child processes and
    for past runs.
    * modules/shepherd/service.scm (record-process-exit-status): Do not
    export.
    * tests/services/timer.sh: Test logging of completion for thunks.
---
 modules/shepherd/scripts/herd.scm  | 110 ++++++++++++++++++++++++++++---------
 modules/shepherd/service.scm       |   1 -
 modules/shepherd/service/timer.scm | 103 +++++++++++++++++++++++-----------
 tests/services/timer.sh            |  13 +++++
 4 files changed, 168 insertions(+), 59 deletions(-)

diff --git a/modules/shepherd/scripts/herd.scm 
b/modules/shepherd/scripts/herd.scm
index d8db6f7..503fb4a 100644
--- a/modules/shepherd/scripts/herd.scm
+++ b/modules/shepherd/scripts/herd.scm
@@ -265,6 +265,24 @@ transient status for too long."
   ;; relative date string like "2 hours ago".
   (format #f (l10n "~a (~a)") absolute relative))
 
+(define (seconds->string seconds)
+  "Return a string representing @var{seconds} as a duration in a
+human-friendly way."
+  (cond ((< seconds 180)
+         ;; TRANSLATORS: This string and the following ones denote a duration.
+         ;; It ends up being inserted in a sentence like "Process terminated
+         ;; after 10 seconds".  (Arguably not ideal.)
+         (format #f (l10n "~h second" "~h seconds" seconds)
+                 seconds))
+        ((< seconds (* 180 60))
+         (let ((minutes (quotient seconds 60)))
+           (format #f (l10n "~h minute" "~h minutes" minutes)
+                   minutes)))
+        (else
+         (let ((hours (quotient seconds 3600)))
+           (format #f (l10n "~h hour" "~h hours" hours)
+                   hours)))))
+
 (define %default-log-history-size
   ;; Number of log lines displayed by default.
   10)
@@ -290,29 +308,48 @@ relevant bits quoted according to POSIX shell rules."
               str))
         command)))
 
-(define (display-process-exit-status status)
+(define* (display-process-exit-status status #:optional duration)
   "Display @var{status}, a process status as returned by @code{waitpid}, in a
-human-friendly way."
+human-friendly way.  When @var{duration} is provided, it is the number of
+seconds during which the process ran."
   (cond ((zero? status)
-         (format #t (l10n "Process exited successfully.~%")))
+         (if duration
+             (format #t (l10n "Process exited successfully after ~a.~%")
+                     (seconds->string duration))
+             (format #t (l10n "Process exited successfully.~%"))))
         ((status:exit-val status)
          =>
          (lambda (code)
-           (format #t (highlight/error
-                       (l10n "Process exited with code ~a.~%"))
-                   code)))
+           (if duration
+               (format #t
+                       (highlight/error
+                        (l10n "Process exited with code ~a after ~a.~%"))
+                       code (seconds->string duration))
+               (format #t (highlight/error
+                           (l10n "Process exited with code ~a.~%"))
+                       code))))
         ((status:term-sig status)
          =>
          (lambda (signal)
-           (format #t (highlight/error
-                       (l10n "Process terminated with signal ~a.~%"))
-                   signal)))
+           (if duration
+               (format #t
+                       (highlight/error
+                        (l10n "Process terminated with signal ~a after ~a.~%"))
+                       signal (seconds->string duration))
+               (format #t (highlight/error
+                           (l10n "Process terminated with signal ~a.~%"))
+                       signal))))
         ((status:stop-sig status)
          =>
          (lambda (signal)
-           (format #t (highlight/error
-                       (l10n "Process stopped with signal ~a.~%"))
-                   signal)))))
+           (if duration
+               (format #t
+                       (highlight/error
+                        (l10n "Process stopped with signal ~a after ~a.~%"))
+                       signal (seconds->string duration))
+               (format #t (highlight/error
+                           (l10n "Process stopped with signal ~a.~%"))
+                       signal))))))
 
 (define* (display-timer-events event #:optional (count 5))
   "Display the @var{count} upcoming timer alarms that match @var{event}, a
@@ -409,7 +446,9 @@ calendar event."
              (format #t (l10n "  Child process:~{ ~a~}~%"
                               "  Child processes:~{ ~a~}~%"
                               (length processes))
-                     processes)))
+                     (match processes
+                       (((pids . start-times) ...)
+                        pids)))))
           ('procedure
            (format #t (l10n "  Periodically running Scheme code.~%")))
           (_ #f)))
@@ -479,20 +518,37 @@ to upgrade).~%"))))
 
   (when show-recent-messages?
     (match (live-service-running-value service)
-      (('timer . _)
-       (match (live-service-process-exit-statuses service)
-         (()
-          #t)
-         (statuses
-          (newline)
-          (format #t (highlight (l10n "Recent runs:~%")))
-          (for-each (match-lambda
-                      ((status . time)
-                       (format #t "  ~a"
-                               (strftime default-logfile-date-format
-                                         (localtime time)))
-                       (display-process-exit-status status)))
-                    (reverse (at-most timer-history-size statuses))))))
+      (('timer ('version 0) properties ...)
+       (alist-let* properties (past-runs)
+         (match past-runs
+           ((or () #f)
+            #t)
+           (statuses
+            (newline)
+            (format #t (highlight (l10n "Recent runs:~%")))
+            (for-each (match-lambda
+                        ((status end start)
+                         (format #t "  ~a"
+                                 (strftime default-logfile-date-format
+                                           (localtime end)))
+                         (match status
+                           ((? integer?)
+                            (display-process-exit-status status
+                                                         (- end start)))
+                           ('success
+                            (format #t (l10n "Completed in ~a.~%")
+                                    (seconds->string (- end start))))
+                           (('exception key args ...)
+                            (format #t (highlight/error
+                                        (l10n "Exception thrown after ~a: 
~a~%"))
+                                    (seconds->string (- end start))
+                                    (string-trim-right
+                                     (call-with-output-string
+                                       (lambda (port)
+                                         (print-exception port #f
+                                                          key args))))))
+                           (_ #f))))
+                      (reverse (at-most timer-history-size statuses)))))))
       (_ #f))
 
     (match (live-service-recent-messages service)
diff --git a/modules/shepherd/service.scm b/modules/shepherd/service.scm
index e3b7486..ce9cbb9 100644
--- a/modules/shepherd/service.scm
+++ b/modules/shepherd/service.scm
@@ -105,7 +105,6 @@
             spawn-command
             spawn-shell-command
             terminate-process
-            record-process-exit-status
             %precious-signals
             register-services
             unregister-services
diff --git a/modules/shepherd/service/timer.scm 
b/modules/shepherd/service/timer.scm
index e6837fe..343dd77 100644
--- a/modules/shepherd/service/timer.scm
+++ b/modules/shepherd/service/timer.scm
@@ -535,7 +535,7 @@ list, to be executed as @var{user} and @var{group}, with 
the given
               #:resource-limits resource-limits))
     (_ #f)))
 
-(define-record-type-serializer (serializer-timer (timer <timer>))
+(define-record-type-serializer (serialize-timer (timer <timer>))
   ;; Serialize TIMER to clients can inspect it.
   `(timer (version 0)
           (event ,(match (timer-event timer)
@@ -545,17 +545,35 @@ list, to be executed as @var{user} and @var{group}, with 
the given
           (action ,(match (timer-action timer)
                      ((? command? command) (command->sexp command))
                      (_ 'procedure)))
-          (processes ,(timer-processes timer))))
+          (processes ,(timer-processes timer))
+          (past-runs ,(ring-buffer->list (timer-past-runs timer)))))
 
-(define (timer-processes timer)
-  "Return the list of PIDs of the currently running processes started by
-@var{timer}."
-  (let ((reply (make-channel)))
-    (put-message (timer-channel timer) `(processes ,reply))
-    (get-message reply)))
+(define (timer-request message)
+  (lambda (timer)
+    "Send @var{message} to @var{timer} and return its reply."
+    (let ((reply (make-channel)))
+      (put-message (timer-channel timer) `(,message ,reply))
+      (get-message reply))))
+
+(define timer-processes
+  ;; Return the list of PID/start time pairs of the currently running
+  ;; processes started by the given timer.
+  (timer-request 'processes))
+
+(define timer-past-runs
+  ;; Return the list of past runs as a ring buffer.  Each run has the form
+  ;; (STATUS END START).  When the timer's action is a command, STATUS is an
+  ;; integer, its exit status; otherwise, STATUS is either 'success or
+  ;; '(exception ...).  END and START are the completion and start times,
+  ;; respectively, as integers (seconds since the Epoch).
+  (timer-request 'past-runs))
 
 (define sleep (@ (fibers) sleep))
 
+(define %past-run-log-size
+  ;; Maximum number of entries the log of timer runs.
+  50)
+
 (define* (make-timer-constructor event action
                                  #:key log-file
                                  wait-for-termination?)
@@ -577,8 +595,9 @@ instances running concurrently."
           (name (service-canonical-name (current-service))))
       (spawn-fiber
        (lambda ()
-         (let loop ((processes '())
-                    (termination #f))
+         (let-loop loop ((processes '())          ;PID/start time
+                         (past-runs (ring-buffer %past-run-log-size))
+                         (termination #f))
            (match (if (or termination
                           (and (pair? processes) wait-for-termination?))
                       (get-message channel)
@@ -592,26 +611,37 @@ instances running concurrently."
                      "Terminating timer '~a' with ~a processes running."
                      (length processes))
                name (length processes))
-              (for-each (lambda (pid)
-                          (terminate-process pid SIGHUP))
+              (for-each (match-lambda
+                          ((pid . _)
+                           (terminate-process pid SIGHUP)))
                         processes)
               ;; If there are processes left, keep going until they're gone.
               (if (pair? processes)
-                  (loop processes reply)
+                  (loop (termination reply))
                   (put-message reply #t)))
              (('process-terminated pid status)
               ;; Process PID completed.
-              (local-output
-               (l10n "Process ~a of timer '~a' terminated with status ~a.")
-               pid name status)
-              (record-process-exit-status pid status)
-              (let ((processes (delv pid processes)))
-                (if (and termination (null? processes))
+              (let ((start-time (assoc-ref processes pid))
+                    (end-time ((@ (guile) current-time)))
+                    (remaining (alist-delete pid processes)))
+                (local-output
+                 (l10n "Process ~a of timer '~a' terminated with status ~a \
+after ~a seconds.")
+                 pid name status
+                 (- end-time start-time))
+                (if (and termination (null? remaining))
                     (put-message termination #t)  ;done
-                    (loop processes termination))))
+                    (loop (processes remaining)
+                          (past-runs
+                           (ring-buffer-insert
+                            (list status end-time start-time)
+                            past-runs))))))
              (('processes reply)
               (put-message reply processes)
-              (loop processes termination))
+              (loop))
+             (('past-runs reply)
+              (put-message reply past-runs)
+              (loop))
              ('timeout
               ;; Time to perform ACTION.
               (if (command? action)
@@ -633,16 +663,27 @@ instances running concurrently."
 
                     (local-output (l10n "Timer '~a' spawned process ~a.")
                                   name pid)
-                    (loop (cons pid processes) termination))
-                  (begin
-                    (catch #t
-                      action
-                      (lambda (key . args)
-                        (local-output
-                         (l10n "Exception caught while calling action of \
+                    (loop (processes
+                           (alist-cons pid ((@ (guile) current-time))
+                                       processes))))
+                  (let ((start-time ((@ (guile) current-time))))
+                    (define result
+                      (catch #t
+                        (lambda ()
+                          (action)
+                          'success)
+                        (lambda (key . args)
+                          (local-output
+                           (l10n "Exception caught while calling action of \
 timer '~a': ~s")
-                         name (cons key args))))
-                    (loop processes termination))))
+                           name (cons key args))
+                          `(exception ,key ,@args))))
+
+                    (loop (past-runs
+                           (ring-buffer-insert
+                            (list result ((@ (guile) current-time)) start-time)
+                            past-runs))))))
+
              ('overslept
               ;; Reached when resuming from sleep state: we slept
               ;; significantly more than the requested number of seconds.  To
@@ -651,7 +692,7 @@ timer '~a': ~s")
               (local-output (l10n "Waiting anew for timer '~a' (resuming \
 from sleep state?).")
                             name)
-              (loop processes termination))))))
+              (loop))))))
 
       (timer channel event action))))
 
diff --git a/tests/services/timer.sh b/tests/services/timer.sh
index a4bb165..2158a43 100644
--- a/tests/services/timer.sh
+++ b/tests/services/timer.sh
@@ -49,6 +49,12 @@ cat > "$conf" <<EOF
                          (calendar-event #:seconds (iota 60))
                           (lambda () (display "Hello from procedure.\n")))
                  #:stop (make-timer-destructor))
+        (service '(timer-that-throws)
+                 #:start (make-timer-constructor
+                         (calendar-event #:seconds (iota 60))
+                          (lambda () (display "Throwing!\n") (mkdir "/")))
+                 #:stop (make-timer-destructor)
+                #:actions (list timer-trigger-action))
         (service '(endless-timer)
                  #:start (make-timer-constructor
                           (calendar-event #:seconds (iota 60))
@@ -61,6 +67,7 @@ cat > "$conf" <<EOF
                             #:months
                             (if (<= (date-month (current-date)) 6)
                                 '(12)
+
                                 '(1)))
                           (command (quote ("sh" "-c" "echo Triggered from 
\$PWD."))
                                    #:directory "$PWD"))
@@ -88,8 +95,14 @@ $herd stop timer-with-command
 
 $herd start timer-with-procedure
 sleep 2
+$herd status timer-with-procedure | grep "Completed in" # recent runs
 grep "Hello from procedure" "$log"
 
+$herd start timer-that-throws
+$herd trigger timer-that-throws
+grep "Throwing" "$log"
+$herd status timer-that-throws | grep "Exception thrown.*mkdir" # recent runs
+
 rm -f "$service_pid"
 $herd start endless-timer
 sleep 2

Reply via email to