On 3/25/25 15:11, Kirill Shchetiniuk via Devel wrote:
> When the new CH monitor was started, it ran as a non-daemonized
> process and was a child of the CH driver process. This led to a
> situation where if the CH driver died, the monitor process were
> killed too, terminating the running VM under the monitor. This
> led to termination of all VM started under the libvirt.
> 
> Make new monitor running daemonized to avoid VMs shutdown when
> driver dies. Also added a pidfile its preparetion to be able
> to aquire daemon's PID.
> 
> Signed-off-by: Kirill Shchetiniuk <kshch...@redhat.com>
> ---
>  src/ch/ch_domain.c  |  1 +
>  src/ch/ch_domain.h  |  1 +
>  src/ch/ch_monitor.c | 24 ++++++++++++++++++++++--
>  src/ch/ch_process.c | 16 ++++++++++++++++
>  4 files changed, 40 insertions(+), 2 deletions(-)
> 
> diff --git a/src/ch/ch_domain.c b/src/ch/ch_domain.c
> index a08b18c5b9..c0c9acd85b 100644
> --- a/src/ch/ch_domain.c
> +++ b/src/ch/ch_domain.c
> @@ -68,6 +68,7 @@ virCHDomainObjPrivateFree(void *data)
>      virBitmapFree(priv->autoCpuset);
>      virBitmapFree(priv->autoNodeset);
>      virCgroupFree(priv->cgroup);
> +    g_free(priv->pidfile);
>      g_free(priv);
>  }
>  
> diff --git a/src/ch/ch_domain.h b/src/ch/ch_domain.h
> index 8dea2b2123..69a657f6af 100644
> --- a/src/ch/ch_domain.h
> +++ b/src/ch/ch_domain.h
> @@ -36,6 +36,7 @@ struct _virCHDomainObjPrivate {
>      virBitmap *autoCpuset;
>      virBitmap *autoNodeset;
>      virCgroup *cgroup;
> +    char *pidfile;
>  };
>  
>  #define CH_DOMAIN_PRIVATE(vm) \
> diff --git a/src/ch/ch_monitor.c b/src/ch/ch_monitor.c
> index 0ba927a194..1dc085a755 100644
> --- a/src/ch/ch_monitor.c
> +++ b/src/ch/ch_monitor.c
> @@ -27,6 +27,7 @@
>  
>  #include "datatypes.h"
>  #include "ch_conf.h"
> +#include "ch_domain.h"
>  #include "ch_events.h"
>  #include "ch_interface.h"
>  #include "ch_monitor.h"
> @@ -37,6 +38,7 @@
>  #include "virfile.h"
>  #include "virjson.h"
>  #include "virlog.h"
> +#include "virpidfile.h"
>  #include "virstring.h"
>  
>  #define VIR_FROM_THIS VIR_FROM_CH
> @@ -584,6 +586,8 @@ virCHMonitorNew(virDomainObj *vm, virCHDriverConfig *cfg, 
> int logfile)
>  {
>      g_autoptr(virCHMonitor) mon = NULL;
>      g_autoptr(virCommand) cmd = NULL;
> +    virCHDomainObjPrivate *priv = vm->privateData;
> +    int rv;
>      int socket_fd = 0;
>      int event_monitor_fd;
>  
> @@ -644,6 +648,7 @@ virCHMonitorNew(virDomainObj *vm, virCHDriverConfig *cfg, 
> int logfile)
>      virCommandSetErrorFD(cmd, &logfile);
>      virCommandNonblockingFDs(cmd);
>      virCommandSetUmask(cmd, 0x002);
> +
>      socket_fd = chMonitorCreateSocket(mon->socketpath);
>      if (socket_fd < 0) {
>          virReportSystemError(errno,
> @@ -655,13 +660,28 @@ virCHMonitorNew(virDomainObj *vm, virCHDriverConfig 
> *cfg, int logfile)
>      virCommandAddArg(cmd, "--api-socket");
>      virCommandAddArgFormat(cmd, "fd=%d", socket_fd);
>      virCommandPassFD(cmd, socket_fd, VIR_COMMAND_PASS_FD_CLOSE_PARENT);
> -
>      virCommandAddArg(cmd, "--event-monitor");
>      virCommandAddArgFormat(cmd, "path=%s", mon->eventmonitorpath);
> +    virCommandSetPidFile(cmd, priv->pidfile);
> +    virCommandDaemonize(cmd);
>  
>      /* launch Cloud-Hypervisor socket */
> -    if (virCommandRunAsync(cmd, &mon->pid) < 0)
> +    rv = virCommandRun(cmd, NULL);
> +
> +    if (rv == 0) {
> +        if ((rv = virPidFileReadPath(priv->pidfile, &mon->pid)) < 0) {
> +            virReportSystemError(-rv,
> +                                 _("Domain  %1$s didn't show up"),
> +                                 vm->def->name);
> +            return NULL;
> +        }
> +        VIR_DEBUG("CH vm=%p name=%s running with pid=%lld",
> +                  vm, vm->def->name, (long long)vm->pid);

This needs to be mon->pid because vm->pid is set elsewhere (in the
caller virCHProcessStartRestore() after virCHProcessConnectMonitor()
returns).

> +    } else {
> +        VIR_DEBUG("CH vm=%p name=%s failed to spawn",
> +                  vm, vm->def->name);
>          return NULL;
> +    }


Alternatively, this can be simplified to:

  if (virCommandRun() < 0) {
    VIR_DEBUG(...)
    return NULL;
  }

  if ((rv = virPidFileReadPath()) < 0) {
    virReportSystemError(-rv, ...);
    return NULL;
  }

>  
>      /* open the reader end of fifo before start Event Handler */
>      while ((event_monitor_fd = open(mon->eventmonitorpath, O_RDONLY)) < 0) {
> diff --git a/src/ch/ch_process.c b/src/ch/ch_process.c
> index b3eddd61e8..0954de6180 100644
> --- a/src/ch/ch_process.c
> +++ b/src/ch/ch_process.c
> @@ -36,6 +36,7 @@
>  #include "virjson.h"
>  #include "virlog.h"
>  #include "virnuma.h"
> +#include "virpidfile.h"
>  #include "virstring.h"
>  #include "ch_interface.h"
>  #include "ch_hostdev.h"
> @@ -850,6 +851,21 @@ virCHProcessPrepareHost(virCHDriver *driver, 
> virDomainObj *vm)
>      if (virCHHostdevPrepareDomainDevices(driver, vm->def, hostdev_flags) < 0)
>          return -1;
>  
> +    VIR_FREE(priv->pidfile);
> +    if (!(priv->pidfile = virPidFileBuildPath(cfg->stateDir, 
> vm->def->name))) {
> +        virReportSystemError(errno,
> +                             "%s", _("Failed to build pidfile path."));
> +        return -1;
> +    }
> +
> +    if (unlink(priv->pidfile) < 0 &&
> +        errno != ENOENT) {
> +        virReportSystemError(errno,
> +                             _("Cannot remove stale PID file %1$s"),
> +                             priv->pidfile);
> +        return -1;
> +    }
> +

This works, but something similar should be in virCHProcessStop().

>      /* Ensure no historical cgroup for this VM is lying around */
>      VIR_DEBUG("Ensuring no historical cgroup is lying around");
>      virDomainCgroupRemoveCgroup(vm, priv->cgroup, priv->machineName);


Michal

Reply via email to