This patch adds WatchdogTimestamp[Monotonic] to the systemd service D-Bus API. The timestamp is updated to the current time when the service calls 'sd_nofity("WATCHDOG=1\n")'. Using a timestamp instead of an 'alive' flag has two advantages: 1. No timeout is needed to define when a service is no longer alive. This simplifies both configuration (no timeout value) and implementation (no timeout event). 2. It is more robust. A 'dead' service might not be detected should systemd 'forget' to reset an 'alive' flag. It is much less likely to get a valid new timestamp if a service died. --- man/sd_notify.xml | 12 ++++++++++++ src/dbus-service.c | 5 +++++ src/sd-daemon.h | 5 +++++ src/service.c | 20 ++++++++++++++++++++ src/service.h | 2 ++ 5 files changed, 44 insertions(+), 0 deletions(-)
diff --git a/man/sd_notify.xml b/man/sd_notify.xml index c3791ce..1f7160f 100644 --- a/man/sd_notify.xml +++ b/man/sd_notify.xml @@ -151,6 +151,18 @@ itself. Example: "MAINPID=4711"</para></listitem> </varlistentry> + + <varlistentry> + <term>WATCHDOG=1</term> + + <listitem><para>Tells systemd to + update the watchdog timestamp. + Services using this feature should do + this in regular intervals. A watchdog + framework can use the timestamps to + detect failed + services.</para></listitem> + </varlistentry> </variablelist> <para>It is recommended to prefix variable names that diff --git a/src/dbus-service.c b/src/dbus-service.c index 3486623..2f53484 100644 --- a/src/dbus-service.c +++ b/src/dbus-service.c @@ -43,6 +43,8 @@ " <property name=\"NotifyAccess\" type=\"s\" access=\"read\"/>\n" \ " <property name=\"RestartUSec\" type=\"t\" access=\"read\"/>\n" \ " <property name=\"TimeoutUSec\" type=\"t\" access=\"read\"/>\n" \ + " <property name=\"WatchdogTimestamp\" type=\"t\" access=\"read\"/>\n" \ + " <property name=\"WatchdogTimestampMonotonic\" type=\"t\" access=\"read\"/>\n" \ BUS_EXEC_COMMAND_INTERFACE("ExecStartPre") \ BUS_EXEC_COMMAND_INTERFACE("ExecStart") \ BUS_EXEC_COMMAND_INTERFACE("ExecStartPost") \ @@ -87,6 +89,7 @@ const char bus_service_invalidating_properties[] = "ExecStop\0" "ExecStopPost\0" "ExecMain\0" + "WatchdogTimestamp\0" "MainPID\0" "ControlPID\0" "StatusText\0"; @@ -104,6 +107,8 @@ DBusHandlerResult bus_service_message_handler(Unit *u, DBusConnection *connectio { "org.freedesktop.systemd1.Service", "NotifyAccess", bus_service_append_notify_access, "s", &u->service.notify_access }, { "org.freedesktop.systemd1.Service", "RestartUSec", bus_property_append_usec, "t", &u->service.restart_usec }, { "org.freedesktop.systemd1.Service", "TimeoutUSec", bus_property_append_usec, "t", &u->service.timeout_usec }, + { "org.freedesktop.systemd1.Service", "WatchdogTimestamp", bus_property_append_usec, "t", &u->service.watchdog_timestamp.realtime }, + { "org.freedesktop.systemd1.Service", "WatchdogTimestampMonotonic",bus_property_append_usec,"t", &u->service.watchdog_timestamp.monotonic }, BUS_EXEC_COMMAND_PROPERTY("org.freedesktop.systemd1.Service", u->service.exec_command[SERVICE_EXEC_START_PRE], "ExecStartPre"), BUS_EXEC_COMMAND_PROPERTY("org.freedesktop.systemd1.Service", u->service.exec_command[SERVICE_EXEC_START], "ExecStart"), BUS_EXEC_COMMAND_PROPERTY("org.freedesktop.systemd1.Service", u->service.exec_command[SERVICE_EXEC_START_POST], "ExecStartPost"), diff --git a/src/sd-daemon.h b/src/sd-daemon.h index 46dc7fd..17536f7 100644 --- a/src/sd-daemon.h +++ b/src/sd-daemon.h @@ -217,6 +217,11 @@ int sd_is_mq(int fd, const char *path); MAINPID=... The main pid of a daemon, in case systemd did not fork off the process itself. Example: "MAINPID=4711" + WATCHDOG=1 Tells systemd to update the watchdog timestamp. + Services using this feature should do this in + regular intervals. A watchdog framework can use the + timestamps to detect failed services. + Daemons can choose to send additional variables. However, it is recommended to prefix variable names not listed above with X_. diff --git a/src/service.c b/src/service.c index feecbbe..4093cc7 100644 --- a/src/service.c +++ b/src/service.c @@ -194,6 +194,19 @@ static void service_connection_unref(Service *s) { s->accept_socket = NULL; } +static void service_stop_watchdog(Service *s) { + assert(s); + + s->watchdog_timestamp.realtime = 0; + s->watchdog_timestamp.monotonic = 0; +} + +static void service_reset_watchdog(Service *s) { + assert(s); + + dual_timestamp_get(&s->watchdog_timestamp); +} + static void service_done(Unit *u) { Service *s = SERVICE(u); @@ -1539,6 +1552,9 @@ static void service_set_state(Service *s, ServiceState state) { service_connection_unref(s); } + if (state == SERVICE_STOP) + service_stop_watchdog(s); + /* For the inactive states unit_notify() will trim the cgroup, * but for exit we have to do that ourselves... */ if (state == SERVICE_EXITED && s->meta.manager->n_reloading <= 0) @@ -3129,6 +3145,10 @@ static void service_notify_message(Unit *u, pid_t pid, char **tags) { } } + if (strv_find(tags, "WATCHDOG=1")) { + log_debug("%s: got WATCHDOG=1", u->meta.id); + service_reset_watchdog(s); + } /* Notify clients about changed status or main pid */ unit_add_to_dbus_queue(u); diff --git a/src/service.h b/src/service.h index 2102826..bee0e9e 100644 --- a/src/service.h +++ b/src/service.h @@ -99,6 +99,8 @@ struct Service { usec_t restart_usec; usec_t timeout_usec; + dual_timestamp watchdog_timestamp; + ExecCommand* exec_command[_SERVICE_EXEC_COMMAND_MAX]; ExecContext exec_context; -- 1.7.7.3 _______________________________________________ systemd-devel mailing list systemd-devel@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/systemd-devel