This introduces a self-fence mechanism to Qemu, causing it to die if a heartbeat condition is not met. Currently, a file-based heartbeat is available and can be configured as follows:
-object file-fence,id=ff0,file=/foo,qtimeout=20,ktimeout=25,signal=kill Qemu will watch 'file' for attribute changes. Touching the file works as a heartbeat. This parameter is mandatory. Fencing happens after 'qtimeout' or 'ktimeout' seconds elapse without a heartbeat. At least one of these must be specified. Both may be used. When using 'qtimeout', an internal Qemu timer is used. Fencing with this method gives Qemu a chance to write a log message indicating which file caused the event. If Qemu's main loop is hung for whatever reason, this method won't successfully kill Qemu. When using 'ktimeout', a kernel timer is used. In this case, 'signal' can be 'kill' (for SIGKILL, default) or 'quit' (for SIGQUIT). Using SIGQUIT may be preferred for obtaining core dumps. If Qemu is hung (eg. uninterruptable sleep), this method won't successfully kill Qemu. It is worth noting that even successfully killing Qemu may not be sufficient to completely fence a VM as certain operations like network packets or block commands may be pending in the kernel. If that is a concern, systems should consider using further fencing mechanisms like hardware watchdogs either in addition or in conjunction with this for additional protection. Signed-off-by: Felipe Franciosi <fel...@nutanix.com> --- Based-on: <20191125153619.39893-2-fel...@nutanix.com> Makefile.objs | 1 + fence/Makefile.objs | 1 + fence/file_fence.c | 381 ++++++++++++++++++++++++++++++++++++++++++++ qemu-options.hx | 27 +++- 4 files changed, 409 insertions(+), 1 deletion(-) create mode 100644 fence/Makefile.objs create mode 100644 fence/file_fence.c diff --git a/Makefile.objs b/Makefile.objs index 11ba1a36bd..998eed4796 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -75,6 +75,7 @@ common-obj-$(CONFIG_TPM) += tpm.o common-obj-y += backends/ common-obj-y += chardev/ +common-obj-y += fence/ common-obj-$(CONFIG_SECCOMP) += qemu-seccomp.o qemu-seccomp.o-cflags := $(SECCOMP_CFLAGS) diff --git a/fence/Makefile.objs b/fence/Makefile.objs new file mode 100644 index 0000000000..2ed2092568 --- /dev/null +++ b/fence/Makefile.objs @@ -0,0 +1 @@ +common-obj-y += file_fence.o diff --git a/fence/file_fence.c b/fence/file_fence.c new file mode 100644 index 0000000000..5b743e69d2 --- /dev/null +++ b/fence/file_fence.c @@ -0,0 +1,381 @@ +/* + * QEMU file-based self-fence mechanism + * + * Copyright (c) 2019 Nutanix Inc. All rights reserved. + * + * Authors: + * Felipe Franciosi <fel...@nutanix.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + * + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qom/object_interfaces.h" +#include "qemu/filemonitor.h" +#include "qemu/timer.h" + +#include <time.h> + +#define TYPE_FILE_FENCE "file-fence" + +typedef struct FileFence { + Object parent_obj; + + gchar *dir; + gchar *file; + uint32_t qtimeout; + uint32_t ktimeout; + int signal; + + timer_t ktimer; + QEMUTimer *qtimer; + + QFileMonitor *fm; + uint64_t id; +} FileFence; + +#define FILE_FENCE(obj) \ + OBJECT_CHECK(FileFence, (obj), TYPE_FILE_FENCE) + +static void +timer_update(FileFence *ff) +{ + struct itimerspec its = { + .it_value.tv_sec = ff->ktimeout, + }; + int err; + + if (ff->qtimeout) { + timer_mod(ff->qtimer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + + ff->qtimeout * 1000); + } + + if (ff->ktimeout) { + err = timer_settime(ff->ktimer, 0, &its, NULL); + g_assert(err == 0); + } +} + +static void +file_fence_abort_cb(void *opaque) +{ + FileFence *ff = opaque; + printf("Fencing after %u seconds on '%s'\n", + ff->qtimeout, g_strconcat(ff->dir, "/", ff->file, NULL)); + abort(); +} + +static void +file_fence_watch_cb(int64_t id, QFileMonitorEvent ev, const char *file, + void *opaque) +{ + FileFence *ff = opaque; + + if (ev != QFILE_MONITOR_EVENT_ATTRIBUTES) { + return; + } + + if (g_strcmp0(file, ff->file) != 0) { + return; + } + + timer_update(ff); +} + +static void +ktimer_tear(FileFence *ff) +{ + int err; + + if (ff->ktimer) { + err = timer_delete(ff->ktimer); + g_assert(err == 0); + ff->ktimer = NULL; + } +} + +static void +ktimer_setup(FileFence *ff, Error **errp) +{ + int err; + + struct sigevent sev = { + .sigev_notify = SIGEV_SIGNAL, + .sigev_signo = ff->signal ? ff->signal : SIGKILL, + }; + + if (ff->ktimeout == 0) { + return; + } + + err = timer_create(CLOCK_MONOTONIC, &sev, &ff->ktimer); + if (err == -1) { + error_setg(errp, "Error creating kernel timer: %m"); + return; + } +} + +static void +qtimer_tear(FileFence *ff) +{ + if (ff->qtimer) { + timer_del(ff->qtimer); + timer_free(ff->qtimer); + } + ff->qtimer = NULL; +} + +static void +qtimer_setup(FileFence *ff, Error **errp) +{ + QEMUTimer *qtimer; + + if (ff->qtimeout == 0) { + return; + } + + qtimer = timer_new_ms(QEMU_CLOCK_REALTIME, file_fence_abort_cb, ff); + if (qtimer == NULL) { + error_setg(errp, "Error creating Qemu timer"); + return; + } + + ff->qtimer = qtimer; +} + +static void +watch_tear(FileFence *ff) +{ + if (ff->fm) { + qemu_file_monitor_remove_watch(ff->fm, ff->dir, ff->id); + qemu_file_monitor_free(ff->fm); + ff->fm = NULL; + ff->id = 0; + } +} + +static void +watch_setup(FileFence *ff, Error **errp) +{ + QFileMonitor *fm; + int64_t id; + + fm = qemu_file_monitor_new(errp); + if (!fm) { + return; + } + + id = qemu_file_monitor_add_watch(fm, ff->dir, ff->file, + file_fence_watch_cb, ff, errp); + if (id < 0) { + qemu_file_monitor_free(fm); + return; + } + + ff->fm = fm; + ff->id = id; +} + +static void +file_fence_complete(UserCreatable *obj, Error **errp) +{ + FileFence *ff = FILE_FENCE(obj); + + if (ff->dir == NULL) { + error_setg(errp, "A 'file' must be set"); + return; + } + + if (ff->signal != 0 && ff->ktimeout == 0) { + error_setg(errp, "Using 'signal' requires 'ktimeout' to be set"); + return; + } + + if (ff->ktimeout == 0 && ff->qtimeout == 0) { + error_setg(errp, "One or both of 'ktimeout' or 'qtimeout' must be set"); + return; + } + + if (ff->qtimeout >= ff->ktimeout) { + error_setg(errp, "Using 'qtimeout' >= 'ktimeout' doesn't make sense"); + return; + } + + watch_setup(ff, errp); + if (*errp != NULL) { + return; + } + + qtimer_setup(ff, errp); + if (*errp != NULL) { + goto err_watch; + } + + ktimer_setup(ff, errp); + if (*errp != NULL) { + goto err_qtimer; + } + + timer_update(ff); + + return; + +err_qtimer: + qtimer_tear(ff); +err_watch: + watch_tear(ff); +} + +static void +file_fence_set_signal(Object *obj, const char *value, Error **errp) +{ + FileFence *ff = FILE_FENCE(obj); + gchar *gsig; + + if (ff->signal) { + error_setg(errp, "Signal property already set"); + return; + } + + gsig = g_ascii_strup(value, -1); + + if (g_strcmp0(gsig, "QUIT") == 0) { + ff->signal = SIGQUIT; + } else + if (g_strcmp0(gsig, "KILL") == 0) { + ff->signal = SIGKILL; + } else { + error_setg(errp, "Invalid signal. Must be 'quit' or 'kill'"); + } + + g_free(gsig); +} + +static char * +file_fence_get_signal(Object *obj, Error **errp) +{ + FileFence *ff = FILE_FENCE(obj); + + switch (ff->signal) { + case SIGKILL: + return g_strdup("kill"); + case SIGQUIT: + return g_strdup("quit"); + } + + /* Unreachable */ + abort(); +} + +static void +file_fence_set_file(Object *obj, const char *value, Error **errp) +{ + FileFence *ff = FILE_FENCE(obj); + gchar *dir, *file; + + if (ff->dir) { + error_setg(errp, "File property already set"); + return; + } + + dir = g_path_get_dirname(value); + if (g_str_equal(dir, ".")) { + error_setg(errp, "Path for file-fence must be absolute"); + return; + } + + file = g_path_get_basename(value); + if (g_str_equal(file, ".")) { + error_setg(errp, "Path for file-fence must be a file"); + g_free(dir); + return; + } + + ff->dir = dir; + ff->file = file; +} + +static char * +file_fence_get_file(Object *obj, Error **errp) +{ + FileFence *ff = FILE_FENCE(obj); + + if (ff->file) { + return g_strconcat(ff->dir, "/", ff->file, NULL); + } + + return NULL; +} + +static void +file_fence_instance_finalize(Object *obj) +{ + FileFence *ff = FILE_FENCE(obj); + + ktimer_tear(ff); + qtimer_tear(ff); + watch_tear(ff); + + g_free(ff->file); + g_free(ff->dir); +} + +static void +file_fence_instance_init(Object *obj) +{ + FileFence *ff = FILE_FENCE(obj); + + object_property_add_str(obj, "file", + file_fence_get_file, + file_fence_set_file, + &error_abort); + object_property_add_str(obj, "signal", + file_fence_get_signal, + file_fence_set_signal, + &error_abort); + object_property_add_uint32_ptr(obj, "qtimeout", &ff->qtimeout, + false, &error_abort); + object_property_add_uint32_ptr(obj, "ktimeout", &ff->ktimeout, + false, &error_abort); +} + +static void +file_fence_class_init(ObjectClass *klass, void *class_data) +{ + UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass); + ucc->complete = file_fence_complete; +} + +static const TypeInfo file_fence_info = { + .name = TYPE_FILE_FENCE, + .parent = TYPE_OBJECT, + .class_init = file_fence_class_init, + .instance_size = sizeof(FileFence), + .instance_init = file_fence_instance_init, + .instance_finalize = file_fence_instance_finalize, + .interfaces = (InterfaceInfo[]) { + { TYPE_USER_CREATABLE }, + { } + } +}; + +static void +register_types(void) +{ + type_register_static(&file_fence_info); +} + +type_init(register_types); diff --git a/qemu-options.hx b/qemu-options.hx index 65c9473b73..995d3d6abf 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -4929,8 +4929,33 @@ CN=laptop.example.com,O=Example Home,L=London,ST=London,C=GB @end table -ETEXI +@item -object file-fence,id=@var{id},file=@var{file},qtimeout=@var{qtimeout},ktimeout=@var{ktimeout},signal=@{signal} + +Self-fence Qemu if @var{file} is not modified within a given timeout. + +Qemu will watch @var{file} for attribute changes. Touching the file works as a +heartbeat. This parameter is mandatory. + +Fencing happens after @var{qtimeout} or @var{ktimeout} seconds elapse +without a heartbeat. At least one of these must be specified. Both may be used. +When using @var{qtimeout}, an internal Qemu timer is used. Fencing with +this method gives Qemu a chance to write a log message indicating which file +caused the event. If Qemu's main loop is hung for whatever reason, this method +won't successfully kill Qemu. + +When using @var{ktimeout}, a kernel timer is used. In this case, @var{signal} +can be 'kill' (for SIGKILL, default) or 'quit' (for SIGQUIT). Using SIGQUIT may +be preferred for obtaining core dumps. If Qemu is hung (eg. uninterruptable +sleep), this method won't successfully kill Qemu. + +It is worth noting that even successfully killing Qemu may not be sufficient to +completely fence a VM as certain operations like network packets or block +commands may be pending in the kernel. If that is a concern, systems should +consider using further fencing mechanisms like hardware watchdogs either in +addition or in conjunction with this feature for additional protection. + +ETEXI HXCOMM This is the last statement. Insert new options before this line! STEXI -- 2.20.1