From: Daniel Bailey <[email protected]> Date: Fri, 29 May 2020 17:37:25 -0700 Subject: [PATCH] procd: add service instance watchdog
Added instance watchdog which will eventually either terminate or respawn an instance depending on the instance respawn setting. Added service ubus method 'watchdog' which services the watchdog timer and allows update of the instance watchdog mode instance. Three modes: disabled, passive, active. Presently, only disabled and passive modes are implemented. Disabled: cancels watchdog timer set for a given instance. Passive: sets a instance timer which must be serviced or the instance will be stopped/restarted depending upon the instance respawn value when the timer expires. Active (to be implemented): requires an additional service 'endpoint' parameter. Upon watchdog timer expiry, procd will query the endpoint to determine whether the instance is alive. If the instance does not answer, procd will terminate or respawn the instance depending on the instance respawn setting. Signed-off-by: Daniel Bailey <[email protected]> --- service/instance.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++ service/instance.h | 15 ++++++++++ service/service.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 151 insertions(+) diff --git a/service/instance.c b/service/instance.c index 142208a..8560a95 100644 --- a/service/instance.c +++ b/service/instance.c @@ -65,6 +65,7 @@ enum { INSTANCE_ATTR_EXTROOT, INSTANCE_ATTR_OVERLAYDIR, INSTANCE_ATTR_TMPOVERLAYSIZE, + INSTANCE_ATTR_WATCHDOG, __INSTANCE_ATTR_MAX }; @@ -95,6 +96,7 @@ static const struct blobmsg_policy instance_attr[__INSTANCE_ATTR_MAX] = { [INSTANCE_ATTR_EXTROOT] = { "extroot", BLOBMSG_TYPE_STRING }, [INSTANCE_ATTR_OVERLAYDIR] = { "overlaydir", BLOBMSG_TYPE_STRING }, [INSTANCE_ATTR_TMPOVERLAYSIZE] = { "tmpoverlaysize", BLOBMSG_TYPE_STRING }, + [INSTANCE_ATTR_WATCHDOG] = { "watchdog", BLOBMSG_TYPE_ARRAY }, }; enum { @@ -546,6 +548,11 @@ instance_start(struct service_instance *in) fcntl(epipe[0], F_SETFD, FD_CLOEXEC); } + if (in->watchdog.mode != INSTANCE_WATCHDOG_MODE_DISABLED) { + uloop_timeout_set(&in->watchdog.timeout, in->watchdog.freq * 1000); + DEBUG(2, "Started instance %s::%s watchdog timer : timeout = %d\n", in->srv->name, in->name, in->watchdog.freq); + } + service_event("instance.start", in->srv->name, in->name); } @@ -693,6 +700,7 @@ instance_exit(struct uloop_process *p, int ret) in->exit_code = instance_exit_code(ret); uloop_timeout_cancel(&in->timeout); + uloop_timeout_cancel(&in->watchdog.timeout); service_event("instance.stop", in->srv->name, in->name); if (in->halt) { @@ -752,6 +760,19 @@ instance_restart(struct service_instance *in) uloop_timeout_set(&in->timeout, in->term_timeout * 1000); } +static void +instance_watchdog(struct uloop_timeout *t) +{ + struct service_instance *in = container_of(t, struct service_instance, watchdog.timeout); + + DEBUG(3, "instance %s::%s watchdog timer expired\n", in->srv->name, in->name); + + if (in->respawn) + instance_restart(in); + else + instance_stop(in, true); +} + static bool string_changed(const char *a, const char *b) { return !((!a && !b) || (a && b && !strcmp(a, b))); @@ -817,6 +838,12 @@ instance_config_changed(struct service_instance *in, struct service_instance *in if (!blobmsg_list_equal(&in->errors, &in_new->errors)) return true; + if (in->watchdog.mode != in_new->watchdog.mode) + return true; + + if (in->watchdog.freq != in_new->watchdog.freq) + return true; + return false; } @@ -1170,6 +1197,36 @@ instance_config_parse(struct service_instance *in) DEBUG(3, "unknown syslog facility '%s' given, using default (LOG_DAEMON)\n", blobmsg_get_string(tb[INSTANCE_ATTR_FACILITY])); } + if (tb[INSTANCE_ATTR_WATCHDOG]) { + int i = 0; + uint32_t vals[2] = { 0, 30 }; + + blobmsg_for_each_attr(cur2, tb[INSTANCE_ATTR_WATCHDOG], rem) { + if (i >= 2) + break; + + vals[i] = atoi(blobmsg_get_string(cur2)); + i++; + } + + // TODO(danielb): change mode integers to strings (0 = disabled, 1 = passive, 2 = active) + if (vals[0] >= 0 && vals[0] < __INSTANCE_WATCHDOG_MODE_MAX) { + in->watchdog.mode = vals[0]; + DEBUG(3, "setting watchdog mode (%d)\n", vals[0]); + } else { + in->watchdog.mode = 0; + DEBUG(3, "unknown watchdog mode (%d) given, using default (0)\n", vals[0]); + } + + if (vals[1] > 0) { + in->watchdog.freq = vals[1]; + DEBUG(3, "setting watchdog timeout (%d)\n", vals[0]); + } else { + in->watchdog.freq = 30; + DEBUG(3, "invalid watchdog timeout (%d) given, using default (30)\n", vals[1]); + } + } + return true; } @@ -1255,6 +1312,7 @@ instance_free(struct service_instance *in) instance_free_stdio(in); uloop_process_delete(&in->proc); uloop_timeout_cancel(&in->timeout); + uloop_timeout_cancel(&in->watchdog.timeout); trigger_del(in); watch_del(in); instance_config_cleanup(in); @@ -1308,6 +1366,9 @@ instance_init(struct service_instance *in, struct service *s, struct blob_attr * blobmsg_list_simple_init(&in->limits); blobmsg_list_simple_init(&in->errors); blobmsg_list_simple_init(&in->jail.mount); + + in->watchdog.timeout.cb = instance_watchdog; + in->valid = instance_config_parse(in); } @@ -1425,5 +1486,12 @@ void instance_dump(struct blob_buf *b, struct service_instance *in, int verbose) if (verbose && in->trigger) blobmsg_add_blob(b, in->trigger); + if (in->watchdog.mode != INSTANCE_WATCHDOG_MODE_DISABLED) { + void *r = blobmsg_open_table(b, "watchdog"); + blobmsg_add_u32(b, "mode", in->watchdog.mode); + blobmsg_add_u32(b, "timeout", in->watchdog.freq); + blobmsg_close_table(b, r); + } + blobmsg_close_table(b, i); } diff --git a/service/instance.h b/service/instance.h index 4400cd4..590f931 100644 --- a/service/instance.h +++ b/service/instance.h @@ -23,6 +23,19 @@ #define RESPAWN_ERROR (5 * 60) #define SIGNALLED_OFFSET 128 +typedef enum instance_watchdog { + INSTANCE_WATCHDOG_MODE_DISABLED, + INSTANCE_WATCHDOG_MODE_PASSIVE, + INSTANCE_WATCHDOG_MODE_ACTIVE, + __INSTANCE_WATCHDOG_MODE_MAX, +} instance_watchdog_mode_t; + +struct watchdog { + instance_watchdog_mode_t mode; + uint32_t freq; + struct uloop_timeout timeout; +}; + struct jail { bool procfs; bool sysfs; @@ -94,6 +107,8 @@ struct service_instance { struct blobmsg_list file; struct blobmsg_list limits; struct blobmsg_list errors; + + struct watchdog watchdog; }; void instance_start(struct service_instance *in); diff --git a/service/service.c b/service/service.c index fcf0215..d9249a3 100644 --- a/service/service.c +++ b/service/service.c @@ -727,6 +727,73 @@ service_get_data(struct ubus_context *ctx, struct ubus_object *obj, return 0; } +enum { + SERVICE_WATCHDOG_MODE, + SERVICE_WATCHDOG_TIMEOUT, + SERVICE_WATCHDOG_NAME, + SERVICE_WATCHDOG_INSTANCE, + __SERVICE_WATCHDOG_MAX, +}; + +static const struct blobmsg_policy service_watchdog_policy[__SERVICE_WATCHDOG_MAX] = { + [SERVICE_WATCHDOG_MODE] = { "mode", BLOBMSG_TYPE_INT32 }, + [SERVICE_WATCHDOG_NAME] = { "name", BLOBMSG_TYPE_STRING }, + [SERVICE_WATCHDOG_TIMEOUT] = { "timeout", BLOBMSG_TYPE_INT32 }, + [SERVICE_WATCHDOG_INSTANCE] = { "instance", BLOBMSG_TYPE_STRING }, +}; + +static int +service_handle_watchdog(struct ubus_context *ctx, struct ubus_object *obj, + struct ubus_request_data *req, const char *method, + struct blob_attr *msg) +{ + struct blob_attr *tb[__SERVICE_WATCHDOG_MAX] = {0}; + struct service *s; + struct blob_attr *cur; + struct service_instance *in; + + blobmsg_parse(service_watchdog_policy, __SERVICE_WATCHDOG_MAX, tb, blobmsg_data(msg), blobmsg_data_len(msg)); + cur = tb[SERVICE_WATCHDOG_NAME]; + if (!cur) + return UBUS_STATUS_NOT_FOUND; + + s = avl_find_element(&services, blobmsg_data(cur), s, avl); + if (!s) + return UBUS_STATUS_NOT_FOUND; + + cur = tb[SERVICE_WATCHDOG_INSTANCE]; + if (!cur) + return UBUS_STATUS_NOT_FOUND; + + in = vlist_find(&s->instances, blobmsg_data(cur), in, node); + if (!in) { + ERROR("instance %s not found\n", blobmsg_get_string(cur)); + return UBUS_STATUS_NOT_FOUND; + } + + // TODO(danielb): change mode from u32 to string (0 = disabled, 1 = passive, 2 = active) + if (tb[SERVICE_WATCHDOG_MODE]) + in->watchdog.mode = blobmsg_get_u32(tb[SERVICE_WATCHDOG_MODE]); + + if (tb[SERVICE_WATCHDOG_TIMEOUT]) + in->watchdog.freq = blobmsg_get_u32(tb[SERVICE_WATCHDOG_TIMEOUT]); + + if (in->watchdog.mode == INSTANCE_WATCHDOG_MODE_DISABLED) + uloop_timeout_cancel(&in->watchdog.timeout); + else + uloop_timeout_set(&in->watchdog.timeout, in->watchdog.freq * 1000); + + blob_buf_init(&b, 0); + blobmsg_add_string(&b, "name", blobmsg_get_string(tb[SERVICE_WATCHDOG_NAME])); + blobmsg_add_string(&b, "instance", blobmsg_get_string(tb[SERVICE_WATCHDOG_INSTANCE])); + blobmsg_add_u32(&b, "mode", in->watchdog.mode); + blobmsg_add_u32(&b, "timeout", in->watchdog.freq); + + ubus_send_reply(ctx, req, b.head); + + return UBUS_STATUS_OK; +} + static int container_handle_console(struct ubus_context *ctx, struct ubus_object *obj, struct ubus_request_data *req, const char *method, @@ -797,6 +864,7 @@ static struct ubus_method main_object_methods[] = { UBUS_METHOD("validate", service_handle_validate, validate_policy), UBUS_METHOD("get_data", service_get_data, get_data_policy), UBUS_METHOD("state", service_handle_state, service_state_attrs), + UBUS_METHOD("watchdog", service_handle_watchdog, service_watchdog_policy), }; static struct ubus_object_type main_object_type = -- 2.25.1
_______________________________________________ openwrt-devel mailing list [email protected] https://lists.openwrt.org/mailman/listinfo/openwrt-devel
