Re: [libvirt PATCH v7 24/35] qemu: Monitor nbdkit process for exit

2023-08-29 Thread Peter Krempa
On Mon, Aug 28, 2023 at 16:44:59 -0500, Jonathon Jongsma wrote:
> Adds the ability to monitor the nbdkit process so that we can take
> action in case the child exits unexpectedly.
> 
> When the nbdkit process exits, we pause the vm, restart nbdkit, and then
> resume the vm. This allows the vm to continue working in the event of a
> nbdkit failure.
> 
> Eventually we may want to generalize this functionality since we may
> need something similar for e.g. qemu-storage-daemon, etc.
> 
> The process is monitored with the pidfd_open() syscall if it exists
> (since linux 5.3). Otherwise it resorts to checking whether the process
> is alive once a second. The one-second time period was chosen somewhat
> arbitrarily.
> 
> Signed-off-by: Jonathon Jongsma 
> ---
>  meson.build |   7 ++
>  src/qemu/qemu_domain.c  |   1 +
>  src/qemu/qemu_domain.h  |   1 +
>  src/qemu/qemu_driver.c  |  18 ++
>  src/qemu/qemu_nbdkit.c  | 137 ++--
>  src/qemu/qemu_nbdkit.h  |   8 ++-
>  src/qemu/qemu_process.c |  13 +++-
>  src/qemu/qemu_process.h |   3 +
>  8 files changed, 180 insertions(+), 8 deletions(-)

[...]

> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
> index ad8428948b..46e089fe0f 100644
> --- a/src/qemu/qemu_driver.c
> +++ b/src/qemu/qemu_driver.c
> @@ -4040,6 +4040,21 @@ processResetEvent(virQEMUDriver *driver,
>  }
>  
>  
> +
> +

Too much whitespace before function.

> +static void
> +processNbdkitExitedEvent(virDomainObj *vm,
> + qemuNbdkitProcess *nbdkit)
> +{
> +if (virDomainObjBeginJob(vm, VIR_JOB_MODIFY) < 0)
> +return;
> +
> +qemuNbdkitProcessRestart(nbdkit, vm);
> +
> +virDomainObjEndJob(vm);
> +}
> +
> +
>  static void qemuProcessEventHandler(void *data, void *opaque)
>  {
>  struct qemuProcessEvent *processEvent = data;

[...]

> diff --git a/src/qemu/qemu_nbdkit.c b/src/qemu/qemu_nbdkit.c
> index df638e99c0..c3fa349922 100644
> --- a/src/qemu/qemu_nbdkit.c
> +++ b/src/qemu/qemu_nbdkit.c

[...]

> @@ -611,6 +613,106 @@ qemuNbdkitCapsCacheNew(const char *cachedir)
>  }
>  
>  
> +void
> +qemuNbdkitProcessRestart(qemuNbdkitProcess *proc,
> + virDomainObj *vm)
> +{
> +qemuDomainObjPrivate *vmpriv = vm->privateData;
> +virQEMUDriver *driver = vmpriv->driver;
> +
> +/* clean up resources associated with process */
> +qemuNbdkitProcessStop(proc);
> +
> +if (qemuNbdkitProcessStart(proc, vm, driver) < 0)
> +VIR_WARN("Unable to restart nbkdit process");

As noted before VIR_WARN is not acceptable. Move patch 25 before this
one and do the taint right away. And ditch the VIR_WARN.

> +}
> +
> +
> +typedef struct {
> +qemuNbdkitProcess *proc;
> +virDomainObj *vm;
> +} qemuNbdkitProcessEventData;
> +
> +
> +static qemuNbdkitProcessEventData*
> +qemuNbdkitProcessEventDataNew(qemuNbdkitProcess *proc,
> +  virDomainObj *vm)
> +{
> +qemuNbdkitProcessEventData *d = g_new(qemuNbdkitProcessEventData, 1);
> +d->proc = proc;
> +d->vm = virObjectRef(vm);
> +return d;
> +}
> +
> +
> +static void
> +qemuNbdkitProcessEventDataFree(qemuNbdkitProcessEventData *d)
> +{
> +virObjectUnref(d->vm);
> +g_free(d);
> +}
> +
> +
> +#if WITH_DECL_SYS_PIDFD_OPEN
> +static void
> +qemuNbdkitProcessPidfdCb(int watch G_GNUC_UNUSED,
> + int fd,
> + int events G_GNUC_UNUSED,
> + void *opaque)
> +{
> +qemuNbdkitProcessEventData *d = opaque;
> +
> +VIR_FORCE_CLOSE(fd);
> +VIR_DEBUG("nbdkit process %i died", d->proc->pid);

I presume that the 'qemuNbdkitProcess' struct is protected by belonging
into the virDomainObj and thus using it's lock. In such chase this
violates locking as 'vm' is not locked at this point, but 'proc' is
accessed.

> +/* submit an event so that it is handled in the per-vm event thread */
> +qemuProcessHandleNbdkitExit(d->proc, d->vm);
> +}
> +#endif /* WITH_DECL_SYS_PIDFD_OPEN */
> +
> +
> +static int
> +qemuNbdkitProcessStartMonitor(qemuNbdkitProcess *proc,
> +  virDomainObj *vm)
> +{
> +#if WITH_DECL_SYS_PIDFD_OPEN
> +int pidfd;
> +
> +pidfd = syscall(SYS_pidfd_open, proc->pid, 0);
> +if (pidfd < 0) {
> +virReportSystemError(errno, _("pidfd_open failed for %1$i"), 
> proc->pid);
> +return -1;
> +}
> +
> +proc->eventwatch = virEventAddHandle(pidfd,
> + VIR_EVENT_HANDLE_READABLE,
> + qemuNbdkitProcessPidfdCb,
> + qemuNbdkitProcessEventDataNew(proc, 
> vm),
> + 
> (virFreeCallback)qemuNbdkitProcessEventDataFree);

'virEventAddHandle' can fail and return -1, if the corresponding
callback is not registered. All other callers check the return value and
also prevent the memory leak from the opaque data 

[libvirt PATCH v7 24/35] qemu: Monitor nbdkit process for exit

2023-08-28 Thread Jonathon Jongsma
Adds the ability to monitor the nbdkit process so that we can take
action in case the child exits unexpectedly.

When the nbdkit process exits, we pause the vm, restart nbdkit, and then
resume the vm. This allows the vm to continue working in the event of a
nbdkit failure.

Eventually we may want to generalize this functionality since we may
need something similar for e.g. qemu-storage-daemon, etc.

The process is monitored with the pidfd_open() syscall if it exists
(since linux 5.3). Otherwise it resorts to checking whether the process
is alive once a second. The one-second time period was chosen somewhat
arbitrarily.

Signed-off-by: Jonathon Jongsma 
---
 meson.build |   7 ++
 src/qemu/qemu_domain.c  |   1 +
 src/qemu/qemu_domain.h  |   1 +
 src/qemu/qemu_driver.c  |  18 ++
 src/qemu/qemu_nbdkit.c  | 137 ++--
 src/qemu/qemu_nbdkit.h  |   8 ++-
 src/qemu/qemu_process.c |  13 +++-
 src/qemu/qemu_process.h |   3 +
 8 files changed, 180 insertions(+), 8 deletions(-)

diff --git a/meson.build b/meson.build
index 965ada483b..8da3b59070 100644
--- a/meson.build
+++ b/meson.build
@@ -682,6 +682,13 @@ symbols = [
   [ 'sched.h', 'cpu_set_t' ],
 ]
 
+if host_machine.system() == 'linux'
+  symbols += [
+# process management
+[ 'sys/syscall.h', 'SYS_pidfd_open' ],
+  ]
+endif
+
 foreach symbol : symbols
   if cc.has_header_symbol(symbol[0], symbol[1], args: '-D_GNU_SOURCE', prefix: 
symbol.get(2, ''))
 conf.set('WITH_DECL_@0@'.format(symbol[1].to_upper()), 1)
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index 46fe5a1cf4..2b8ece8f19 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -11511,6 +11511,7 @@ qemuProcessEventFree(struct qemuProcessEvent *event)
 case QEMU_PROCESS_EVENT_PR_DISCONNECT:
 case QEMU_PROCESS_EVENT_UNATTENDED_MIGRATION:
 case QEMU_PROCESS_EVENT_RESET:
+case QEMU_PROCESS_EVENT_NBDKIT_EXITED:
 case QEMU_PROCESS_EVENT_LAST:
 break;
 }
diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
index ddd20e67b4..f018b45eb6 100644
--- a/src/qemu/qemu_domain.h
+++ b/src/qemu/qemu_domain.h
@@ -465,6 +465,7 @@ typedef enum {
 QEMU_PROCESS_EVENT_MEMORY_DEVICE_SIZE_CHANGE,
 QEMU_PROCESS_EVENT_UNATTENDED_MIGRATION,
 QEMU_PROCESS_EVENT_RESET,
+QEMU_PROCESS_EVENT_NBDKIT_EXITED,
 
 QEMU_PROCESS_EVENT_LAST
 } qemuProcessEventType;
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index ad8428948b..46e089fe0f 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -4040,6 +4040,21 @@ processResetEvent(virQEMUDriver *driver,
 }
 
 
+
+
+static void
+processNbdkitExitedEvent(virDomainObj *vm,
+ qemuNbdkitProcess *nbdkit)
+{
+if (virDomainObjBeginJob(vm, VIR_JOB_MODIFY) < 0)
+return;
+
+qemuNbdkitProcessRestart(nbdkit, vm);
+
+virDomainObjEndJob(vm);
+}
+
+
 static void qemuProcessEventHandler(void *data, void *opaque)
 {
 struct qemuProcessEvent *processEvent = data;
@@ -4097,6 +4112,9 @@ static void qemuProcessEventHandler(void *data, void 
*opaque)
 case QEMU_PROCESS_EVENT_RESET:
 processResetEvent(driver, vm);
 break;
+case QEMU_PROCESS_EVENT_NBDKIT_EXITED:
+processNbdkitExitedEvent(vm, processEvent->data);
+break;
 case QEMU_PROCESS_EVENT_LAST:
 break;
 }
diff --git a/src/qemu/qemu_nbdkit.c b/src/qemu/qemu_nbdkit.c
index df638e99c0..c3fa349922 100644
--- a/src/qemu/qemu_nbdkit.c
+++ b/src/qemu/qemu_nbdkit.c
@@ -19,6 +19,7 @@
 
 #include 
 #include 
+#include 
 
 #include "vircommand.h"
 #include "virerror.h"
@@ -33,6 +34,7 @@
 #include "qemu_nbdkit.h"
 #define LIBVIRT_QEMU_NBDKITPRIV_H_ALLOW
 #include "qemu_nbdkitpriv.h"
+#include "qemu_process.h"
 #include "qemu_security.h"
 
 #include 
@@ -611,6 +613,106 @@ qemuNbdkitCapsCacheNew(const char *cachedir)
 }
 
 
+void
+qemuNbdkitProcessRestart(qemuNbdkitProcess *proc,
+ virDomainObj *vm)
+{
+qemuDomainObjPrivate *vmpriv = vm->privateData;
+virQEMUDriver *driver = vmpriv->driver;
+
+/* clean up resources associated with process */
+qemuNbdkitProcessStop(proc);
+
+if (qemuNbdkitProcessStart(proc, vm, driver) < 0)
+VIR_WARN("Unable to restart nbkdit process");
+}
+
+
+typedef struct {
+qemuNbdkitProcess *proc;
+virDomainObj *vm;
+} qemuNbdkitProcessEventData;
+
+
+static qemuNbdkitProcessEventData*
+qemuNbdkitProcessEventDataNew(qemuNbdkitProcess *proc,
+  virDomainObj *vm)
+{
+qemuNbdkitProcessEventData *d = g_new(qemuNbdkitProcessEventData, 1);
+d->proc = proc;
+d->vm = virObjectRef(vm);
+return d;
+}
+
+
+static void
+qemuNbdkitProcessEventDataFree(qemuNbdkitProcessEventData *d)
+{
+virObjectUnref(d->vm);
+g_free(d);
+}
+
+
+#if WITH_DECL_SYS_PIDFD_OPEN
+static void
+qemuNbdkitProcessPidfdCb(int watch G_GNUC_UNUSED,
+ int fd,
+ int