This introduces a self-fence mechanism to Qemu, causing it to die if a
heartbeat condition is not met. Currently, a file-based heartbeat is
available and can be configured as follows:

-object file-fence,id=ff0,file=/foo,qtimeout=20,ktimeout=25,signal=kill

Qemu will watch 'file' for attribute changes. Touching the file works as
a heartbeat. This parameter is mandatory.

Fencing happens after 'qtimeout' or 'ktimeout' seconds elapse without a
heartbeat. At least one of these must be specified. Both may be used.

When using 'qtimeout', an internal Qemu timer is used. Fencing with this
method gives Qemu a chance to write a log message indicating which file
caused the event. If Qemu's main loop is hung for whatever reason, this
method won't successfully kill Qemu.

When using 'ktimeout', a kernel timer is used. In this case, 'signal'
can be 'kill' (for SIGKILL, default) or 'quit' (for SIGQUIT). Using
SIGQUIT may be preferred for obtaining core dumps. If Qemu is hung
(eg. uninterruptable sleep), this method won't successfully kill Qemu.

It is worth noting that even successfully killing Qemu may not be
sufficient to completely fence a VM as certain operations like network
packets or block commands may be pending in the kernel. If that is a
concern, systems should consider using further fencing mechanisms like
hardware watchdogs either in addition or in conjunction with this for
additional protection.

Signed-off-by: Felipe Franciosi <fel...@nutanix.com>
---
Based-on: <20191125153619.39893-2-fel...@nutanix.com>

 Makefile.objs       |   1 +
 fence/Makefile.objs |   1 +
 fence/file_fence.c  | 381 ++++++++++++++++++++++++++++++++++++++++++++
 qemu-options.hx     |  27 +++-
 4 files changed, 409 insertions(+), 1 deletion(-)
 create mode 100644 fence/Makefile.objs
 create mode 100644 fence/file_fence.c

diff --git a/Makefile.objs b/Makefile.objs
index 11ba1a36bd..998eed4796 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -75,6 +75,7 @@ common-obj-$(CONFIG_TPM) += tpm.o
 
 common-obj-y += backends/
 common-obj-y += chardev/
+common-obj-y += fence/
 
 common-obj-$(CONFIG_SECCOMP) += qemu-seccomp.o
 qemu-seccomp.o-cflags := $(SECCOMP_CFLAGS)
diff --git a/fence/Makefile.objs b/fence/Makefile.objs
new file mode 100644
index 0000000000..2ed2092568
--- /dev/null
+++ b/fence/Makefile.objs
@@ -0,0 +1 @@
+common-obj-y += file_fence.o
diff --git a/fence/file_fence.c b/fence/file_fence.c
new file mode 100644
index 0000000000..5b743e69d2
--- /dev/null
+++ b/fence/file_fence.c
@@ -0,0 +1,381 @@
+/*
+ * QEMU file-based self-fence mechanism
+ *
+ * Copyright (c) 2019 Nutanix Inc. All rights reserved.
+ *
+ * Authors:
+ *    Felipe Franciosi <fel...@nutanix.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qom/object_interfaces.h"
+#include "qemu/filemonitor.h"
+#include "qemu/timer.h"
+
+#include <time.h>
+
+#define TYPE_FILE_FENCE "file-fence"
+
+typedef struct FileFence {
+    Object parent_obj;
+
+    gchar *dir;
+    gchar *file;
+    uint32_t qtimeout;
+    uint32_t ktimeout;
+    int signal;
+
+    timer_t ktimer;
+    QEMUTimer *qtimer;
+
+    QFileMonitor *fm;
+    uint64_t id;
+} FileFence;
+
+#define FILE_FENCE(obj) \
+    OBJECT_CHECK(FileFence, (obj), TYPE_FILE_FENCE)
+
+static void
+timer_update(FileFence *ff)
+{
+    struct itimerspec its = {
+        .it_value.tv_sec = ff->ktimeout,
+    };
+    int err;
+
+    if (ff->qtimeout) {
+        timer_mod(ff->qtimer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) +
+                              ff->qtimeout * 1000);
+    }
+
+    if (ff->ktimeout) {
+        err = timer_settime(ff->ktimer, 0, &its, NULL);
+        g_assert(err == 0);
+    }
+}
+
+static void
+file_fence_abort_cb(void *opaque)
+{
+    FileFence *ff = opaque;
+    printf("Fencing after %u seconds on '%s'\n",
+           ff->qtimeout, g_strconcat(ff->dir, "/", ff->file, NULL));
+    abort();
+}
+
+static void
+file_fence_watch_cb(int64_t id, QFileMonitorEvent ev, const char *file,
+                    void *opaque)
+{
+    FileFence *ff = opaque;
+
+    if (ev != QFILE_MONITOR_EVENT_ATTRIBUTES) {
+        return;
+    }
+
+    if (g_strcmp0(file, ff->file) != 0) {
+        return;
+    }
+
+    timer_update(ff);
+}
+
+static void
+ktimer_tear(FileFence *ff)
+{
+    int err;
+
+    if (ff->ktimer) {
+        err = timer_delete(ff->ktimer);
+        g_assert(err == 0);
+        ff->ktimer = NULL;
+    }
+}
+
+static void
+ktimer_setup(FileFence *ff, Error **errp)
+{
+    int err;
+
+    struct sigevent sev = {
+        .sigev_notify = SIGEV_SIGNAL,
+        .sigev_signo = ff->signal ? ff->signal : SIGKILL,
+    };
+
+    if (ff->ktimeout == 0) {
+        return;
+    }
+
+    err = timer_create(CLOCK_MONOTONIC, &sev, &ff->ktimer);
+    if (err == -1) {
+        error_setg(errp, "Error creating kernel timer: %m");
+        return;
+    }
+}
+
+static void
+qtimer_tear(FileFence *ff)
+{
+    if (ff->qtimer) {
+        timer_del(ff->qtimer);
+        timer_free(ff->qtimer);
+    }
+    ff->qtimer = NULL;
+}
+
+static void
+qtimer_setup(FileFence *ff, Error **errp)
+{
+    QEMUTimer *qtimer;
+
+    if (ff->qtimeout == 0) {
+        return;
+    }
+
+    qtimer = timer_new_ms(QEMU_CLOCK_REALTIME, file_fence_abort_cb, ff);
+    if (qtimer == NULL) {
+        error_setg(errp, "Error creating Qemu timer");
+        return;
+    }
+
+    ff->qtimer = qtimer;
+}
+
+static void
+watch_tear(FileFence *ff)
+{
+    if (ff->fm) {
+        qemu_file_monitor_remove_watch(ff->fm, ff->dir, ff->id);
+        qemu_file_monitor_free(ff->fm);
+        ff->fm = NULL;
+        ff->id = 0;
+    }
+}
+
+static void
+watch_setup(FileFence *ff, Error **errp)
+{
+    QFileMonitor *fm;
+    int64_t id;
+
+    fm = qemu_file_monitor_new(errp);
+    if (!fm) {
+        return;
+    }
+
+    id = qemu_file_monitor_add_watch(fm, ff->dir, ff->file,
+                                     file_fence_watch_cb, ff, errp);
+    if (id < 0) {
+        qemu_file_monitor_free(fm);
+        return;
+    }
+
+    ff->fm = fm;
+    ff->id = id;
+}
+
+static void
+file_fence_complete(UserCreatable *obj, Error **errp)
+{
+    FileFence *ff = FILE_FENCE(obj);
+
+    if (ff->dir == NULL) {
+        error_setg(errp, "A 'file' must be set");
+        return;
+    }
+
+    if (ff->signal != 0 && ff->ktimeout == 0) {
+        error_setg(errp, "Using 'signal' requires 'ktimeout' to be set");
+        return;
+    }
+
+    if (ff->ktimeout == 0 && ff->qtimeout == 0) {
+        error_setg(errp, "One or both of 'ktimeout' or 'qtimeout' must be 
set");
+        return;
+    }
+
+    if (ff->qtimeout >= ff->ktimeout) {
+        error_setg(errp, "Using 'qtimeout' >= 'ktimeout' doesn't make sense");
+        return;
+    }
+
+    watch_setup(ff, errp);
+    if (*errp != NULL) {
+        return;
+    }
+
+    qtimer_setup(ff, errp);
+    if (*errp != NULL) {
+        goto err_watch;
+    }
+
+    ktimer_setup(ff, errp);
+    if (*errp != NULL) {
+        goto err_qtimer;
+    }
+
+    timer_update(ff);
+
+    return;
+
+err_qtimer:
+    qtimer_tear(ff);
+err_watch:
+    watch_tear(ff);
+}
+
+static void
+file_fence_set_signal(Object *obj, const char *value, Error **errp)
+{
+    FileFence *ff = FILE_FENCE(obj);
+    gchar *gsig;
+
+    if (ff->signal) {
+        error_setg(errp, "Signal property already set");
+        return;
+    }
+
+    gsig = g_ascii_strup(value, -1);
+
+    if (g_strcmp0(gsig, "QUIT") == 0) {
+        ff->signal = SIGQUIT;
+    } else
+    if (g_strcmp0(gsig, "KILL") == 0) {
+        ff->signal = SIGKILL;
+    } else {
+        error_setg(errp, "Invalid signal. Must be 'quit' or 'kill'");
+    }
+
+    g_free(gsig);
+}
+
+static char *
+file_fence_get_signal(Object *obj, Error **errp)
+{
+    FileFence *ff = FILE_FENCE(obj);
+
+    switch (ff->signal) {
+    case SIGKILL:
+        return g_strdup("kill");
+    case SIGQUIT:
+        return g_strdup("quit");
+    }
+
+    /* Unreachable */
+    abort();
+}
+
+static void
+file_fence_set_file(Object *obj, const char *value, Error **errp)
+{
+    FileFence *ff = FILE_FENCE(obj);
+    gchar *dir, *file;
+
+    if (ff->dir) {
+        error_setg(errp, "File property already set");
+        return;
+    }
+
+    dir = g_path_get_dirname(value);
+    if (g_str_equal(dir, ".")) {
+        error_setg(errp, "Path for file-fence must be absolute");
+        return;
+    }
+
+    file = g_path_get_basename(value);
+    if (g_str_equal(file, ".")) {
+        error_setg(errp, "Path for file-fence must be a file");
+        g_free(dir);
+        return;
+    }
+
+    ff->dir = dir;
+    ff->file = file;
+}
+
+static char *
+file_fence_get_file(Object *obj, Error **errp)
+{
+    FileFence *ff = FILE_FENCE(obj);
+
+    if (ff->file) {
+        return g_strconcat(ff->dir, "/", ff->file, NULL);
+    }
+
+    return NULL;
+}
+
+static void
+file_fence_instance_finalize(Object *obj)
+{
+    FileFence *ff = FILE_FENCE(obj);
+
+    ktimer_tear(ff);
+    qtimer_tear(ff);
+    watch_tear(ff);
+
+    g_free(ff->file);
+    g_free(ff->dir);
+}
+
+static void
+file_fence_instance_init(Object *obj)
+{
+    FileFence *ff = FILE_FENCE(obj);
+
+    object_property_add_str(obj, "file",
+                            file_fence_get_file,
+                            file_fence_set_file,
+                            &error_abort);
+    object_property_add_str(obj, "signal",
+                            file_fence_get_signal,
+                            file_fence_set_signal,
+                            &error_abort);
+    object_property_add_uint32_ptr(obj, "qtimeout", &ff->qtimeout,
+                                   false, &error_abort);
+    object_property_add_uint32_ptr(obj, "ktimeout", &ff->ktimeout,
+                                   false, &error_abort);
+}
+
+static void
+file_fence_class_init(ObjectClass *klass, void *class_data)
+{
+    UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass);
+    ucc->complete = file_fence_complete;
+}
+
+static const TypeInfo file_fence_info = {
+    .name = TYPE_FILE_FENCE,
+    .parent = TYPE_OBJECT,
+    .class_init = file_fence_class_init,
+    .instance_size = sizeof(FileFence),
+    .instance_init = file_fence_instance_init,
+    .instance_finalize = file_fence_instance_finalize,
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_USER_CREATABLE },
+        { }
+    }
+};
+
+static void
+register_types(void)
+{
+    type_register_static(&file_fence_info);
+}
+
+type_init(register_types);
diff --git a/qemu-options.hx b/qemu-options.hx
index 65c9473b73..995d3d6abf 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -4929,8 +4929,33 @@ CN=laptop.example.com,O=Example 
Home,L=London,ST=London,C=GB
 
 @end table
 
-ETEXI
+@item -object 
file-fence,id=@var{id},file=@var{file},qtimeout=@var{qtimeout},ktimeout=@var{ktimeout},signal=@{signal}
+
+Self-fence Qemu if @var{file} is not modified within a given timeout.
+
+Qemu will watch @var{file} for attribute changes. Touching the file works as a
+heartbeat. This parameter is mandatory.
+
+Fencing happens after @var{qtimeout} or @var{ktimeout} seconds elapse
+without a heartbeat. At least one of these must be specified. Both may be used.
 
+When using @var{qtimeout}, an internal Qemu timer is used. Fencing with
+this method gives Qemu a chance to write a log message indicating which file
+caused the event. If Qemu's main loop is hung for whatever reason, this method
+won't successfully kill Qemu.
+
+When using @var{ktimeout}, a kernel timer is used. In this case, @var{signal}
+can be 'kill' (for SIGKILL, default) or 'quit' (for SIGQUIT). Using SIGQUIT may
+be preferred for obtaining core dumps. If Qemu is hung (eg. uninterruptable
+sleep), this method won't successfully kill Qemu.
+
+It is worth noting that even successfully killing Qemu may not be sufficient to
+completely fence a VM as certain operations like network packets or block
+commands may be pending in the kernel. If that is a concern, systems should
+consider using further fencing mechanisms like hardware watchdogs either in
+addition or in conjunction with this feature for additional protection.
+
+ETEXI
 
 HXCOMM This is the last statement. Insert new options before this line!
 STEXI
-- 
2.20.1

Reply via email to