Dave Voutila writes:

> Dave Voutila writes:
>
>> vmd(8) users of tech@,
>>
>> NOTE: I have no intention to try to commit this prior to 6.9's release
>> due to its complexity, but I didn't want to "wait" to solicit testers or
>> potential feedback.
>
> Freeze is over, so bumping this thread with an updated diff below.
>

Now that there's been some testing and snaps are building once again,
anyone willing to review & OK?

>>
>> I noticed recently that I could not have two vmctl(8) clients "wait" for
>> the same vm to shutdown as one would cancel the other. Worse yet, if you
>> cancel a wait (^C) you can effectively corrupt the state being used for
>> tracking the waiting client preventing future clients from waiting on
>> the vm.
>>
>> It turns out the socket fd of the vmctl(8) client is being sent by the
>> control process as the peerid in the imsg. This fd is being stored on
>> the vmd_vm structure in the vm_peerid member, but this fd only has
>> meaning in the scope of the control process. Consequently:
>>
>> - only 1 value can be stored at a time, meaning only 1 waiting client
>>   can exist at a time
>> - since vm_peerid is used for storing if another vmd(8) process is
>>   waiting on the vm, vm_peerid can be corrupted by vmctl(8)
>> - the control process cannot update waiting state on vmctl disconnects
>>   and since fd's are reused it's possible the message could be sent to a
>>   vmctl(8) client performing an operation other than a "wait"
>>
>> The below diff:
>>
>> 1. enables support for multiple vmctl(8) clients to wait on the same vm
>>    to terminate
>> 2. keeps the wait state in the control process and out of the parent's
>>    global vm state, tracking waiting parties in a TAILQ
>> 3. removes the waiting client state on client disconnect/cancellation
>> 4. simplifies vmd(8) by removing IMSG_VMDOP_WAIT_VM_REQUEST handling
>>    from the vmm process, which isn't needed (and was partially
>>    responsible for the corruption)
>>
>
> Above design still stands, but I've fixed some messaging issues related
> to the fact the parent process was forwarding
> IMSG_VMDOP_TERMINATE_VM_RESPONSE messages directly to the control
> process resulting in duplicate messages. This broke doing a `vmctl stop`
> for all vms (-a) and waiting (-w). It now only forwards errors.
>
>> There are some subsequent tweaks that may follow this diff, specifically
>> one related to the fact I've switched the logic to send
>> IMSG_VMDOP_TERMINATE_VM_EVENT messages to the control process (which
>> makes sense to me) but control relays a IMSG_VMDOP_TERMINATE_VM_RESPONSE
>> message to the waiting vmctl(8) client. I'd need to update vmctl(8) to
>> look for the other event and don't want to complicate the diff further.
>>
>> If any testers out there can try to break this for me it would be much
>> appreciated. :-)
>>
>
> Testers? I'd like to give people a few days to kick the tires before
> asking for OK to commit.
>
> -dv
>
>
> Index: control.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/vmd/control.c,v
> retrieving revision 1.34
> diff -u -p -r1.34 control.c
> --- control.c 20 Apr 2021 21:11:56 -0000      1.34
> +++ control.c 21 Apr 2021 17:17:04 -0000
> @@ -41,6 +41,13 @@
>
>  struct ctl_connlist ctl_conns = TAILQ_HEAD_INITIALIZER(ctl_conns);
>
> +struct ctl_notify {
> +     int                     ctl_fd;
> +     uint32_t                ctl_vmid;
> +     TAILQ_ENTRY(ctl_notify) entry;
> +};
> +TAILQ_HEAD(ctl_notify_q, ctl_notify) ctl_notify_q =
> +     TAILQ_HEAD_INITIALIZER(ctl_notify_q);
>  void
>        control_accept(int, short, void *);
>  struct ctl_conn
> @@ -78,7 +85,10 @@ int
>  control_dispatch_vmd(int fd, struct privsep_proc *p, struct imsg *imsg)
>  {
>       struct ctl_conn         *c;
> +     struct ctl_notify       *notify = NULL, *notify_next;
>       struct privsep          *ps = p->p_ps;
> +     struct vmop_result       vmr;
> +     int                      waiting = 0;
>
>       switch (imsg->hdr.type) {
>       case IMSG_VMDOP_START_VM_RESPONSE:
> @@ -86,11 +96,11 @@ control_dispatch_vmd(int fd, struct priv
>       case IMSG_VMDOP_SEND_VM_RESPONSE:
>       case IMSG_VMDOP_RECEIVE_VM_RESPONSE:
>       case IMSG_VMDOP_UNPAUSE_VM_RESPONSE:
> -     case IMSG_VMDOP_TERMINATE_VM_RESPONSE:
>       case IMSG_VMDOP_GET_INFO_VM_DATA:
>       case IMSG_VMDOP_GET_INFO_VM_END_DATA:
>       case IMSG_CTL_FAIL:
>       case IMSG_CTL_OK:
> +             /* Provide basic response back to a specific control client */
>               if ((c = control_connbyfd(imsg->hdr.peerid)) == NULL) {
>                       log_warnx("%s: lost control connection: fd %d",
>                           __func__, imsg->hdr.peerid);
> @@ -99,6 +109,61 @@ control_dispatch_vmd(int fd, struct priv
>               imsg_compose_event(&c->iev, imsg->hdr.type,
>                   0, 0, imsg->fd, imsg->data, IMSG_DATA_SIZE(imsg));
>               break;
> +     case IMSG_VMDOP_TERMINATE_VM_RESPONSE:
> +             IMSG_SIZE_CHECK(imsg, &vmr);
> +             memcpy(&vmr, imsg->data, sizeof(vmr));
> +
> +             if ((c = control_connbyfd(imsg->hdr.peerid)) == NULL) {
> +                     log_warnx("%s: lost control connection: fd %d",
> +                         __func__, imsg->hdr.peerid);
> +                     return (0);
> +             }
> +
> +             TAILQ_FOREACH(notify, &ctl_notify_q, entry) {
> +                     if (notify->ctl_fd == (int) imsg->hdr.peerid) {
> +                             /*
> +                              * Update if waiting by vm name. This is only
> +                              * supported when stopping a single vm. If
> +                              * stopping all vms, vmctl(8) sends the request
> +                              * using the vmid.
> +                              */
> +                             if (notify->ctl_vmid < 1)
> +                                     notify->ctl_vmid = vmr.vmr_id;
> +                             waiting = 1;
> +                             break;
> +                     }
> +             }
> +
> +             /* An error needs to be relayed to the client immediately */
> +             if (!waiting || vmr.vmr_result) {
> +                     imsg_compose_event(&c->iev, imsg->hdr.type,
> +                         0, 0, imsg->fd, imsg->data, IMSG_DATA_SIZE(imsg));
> +
> +                     if (notify) {
> +                             TAILQ_REMOVE(&ctl_notify_q, notify, entry);
> +                             free(notify);
> +                     }
> +             }
> +             break;
> +     case IMSG_VMDOP_TERMINATE_VM_EVENT:
> +             /* Notify any waiting clients that a VM terminated */
> +             IMSG_SIZE_CHECK(imsg, &vmr);
> +             memcpy(&vmr, imsg->data, sizeof(vmr));
> +
> +             TAILQ_FOREACH_SAFE(notify, &ctl_notify_q, entry, notify_next) {
> +                     if (notify->ctl_vmid != vmr.vmr_id)
> +                             continue;
> +                     if ((c = control_connbyfd(notify->ctl_fd)) != NULL) {
> +                             /* XXX vmctl expects *_RESPONSE, not *_EVENT */
> +                             imsg_compose_event(&c->iev,
> +                                 IMSG_VMDOP_TERMINATE_VM_RESPONSE,
> +                                 0, 0, imsg->fd, imsg->data,
> +                                 IMSG_DATA_SIZE(imsg));
> +                             TAILQ_REMOVE(&ctl_notify_q, notify, entry);
> +                             free(notify);
> +                     }
> +             }
> +             break;
>       case IMSG_VMDOP_CONFIG:
>               config_getconfig(ps->ps_env, imsg);
>               proc_compose(ps, PROC_PARENT, IMSG_VMDOP_DONE, NULL, 0);
> @@ -276,7 +341,8 @@ control_connbyfd(int fd)
>  void
>  control_close(int fd, struct control_sock *cs)
>  {
> -     struct ctl_conn *c;
> +     struct ctl_conn         *c;
> +     struct ctl_notify       *notify, *notify_next;
>
>       if ((c = control_connbyfd(fd)) == NULL) {
>               log_warn("%s: fd %d: not found", __func__, fd);
> @@ -286,6 +352,14 @@ control_close(int fd, struct control_soc
>       msgbuf_clear(&c->iev.ibuf.w);
>       TAILQ_REMOVE(&ctl_conns, c, entry);
>
> +     TAILQ_FOREACH_SAFE(notify, &ctl_notify_q, entry, notify_next) {
> +             if (notify->ctl_fd == fd) {
> +                     TAILQ_REMOVE(&ctl_notify_q, notify, entry);
> +                     free(notify);
> +                     break;
> +             }
> +     }
> +
>       event_del(&c->iev.ev);
>       close(c->iev.ibuf.fd);
>
> @@ -308,7 +382,8 @@ control_dispatch_imsg(int fd, short even
>       struct imsg                      imsg;
>       struct vmop_create_params        vmc;
>       struct vmop_id                   vid;
> -     int                              n, v, ret = 0;
> +     struct ctl_notify               *notify;
> +     int                              n, v, wait = 0, ret = 0;
>
>       if ((c = control_connbyfd(fd)) == NULL) {
>               log_warn("%s: fd %d: not found", __func__, fd);
> @@ -388,11 +463,25 @@ control_dispatch_imsg(int fd, short even
>                       }
>                       break;
>               case IMSG_VMDOP_WAIT_VM_REQUEST:
> +                     wait = 1;
> +                     /* FALLTHROUGH */
>               case IMSG_VMDOP_TERMINATE_VM_REQUEST:
>                       if (IMSG_DATA_SIZE(&imsg) < sizeof(vid))
>                               goto fail;
>                       memcpy(&vid, imsg.data, sizeof(vid));
>                       vid.vid_uid = c->peercred.uid;
> +
> +                     if (wait || vid.vid_flags & VMOP_WAIT) {
> +                             vid.vid_flags |= VMOP_WAIT;
> +                             notify = calloc(1, sizeof(struct ctl_notify));
> +                             if (notify == NULL)
> +                                     fatal("%s: calloc", __func__);
> +                             notify->ctl_vmid = vid.vid_id;
> +                             notify->ctl_fd = fd;
> +                             TAILQ_INSERT_TAIL(&ctl_notify_q, notify, entry);
> +                             log_debug("%s: registered wait for peer %d",
> +                                 __func__, fd);
> +                     }
>
>                       if (proc_compose_imsg(ps, PROC_PARENT, -1,
>                           imsg.hdr.type, fd, -1, &vid, sizeof(vid)) == -1) {
> Index: vmd.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/vmd/vmd.c,v
> retrieving revision 1.122
> diff -u -p -r1.122 vmd.c
> --- vmd.c     5 Apr 2021 11:35:26 -0000       1.122
> +++ vmd.c     21 Apr 2021 17:17:04 -0000
> @@ -128,42 +128,41 @@ vmd_dispatch_control(int fd, struct priv
>               IMSG_SIZE_CHECK(imsg, &vid);
>               memcpy(&vid, imsg->data, sizeof(vid));
>               flags = vid.vid_flags;
> +             cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE;
>
>               if ((id = vid.vid_id) == 0) {
>                       /* Lookup vm (id) by name */
>                       if ((vm = vm_getbyname(vid.vid_name)) == NULL) {
>                               res = ENOENT;
> -                             cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE;
>                               break;
>                       } else if ((vm->vm_state & VM_STATE_SHUTDOWN) &&
>                           (flags & VMOP_FORCE) == 0) {
>                               res = EALREADY;
> -                             cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE;
>                               break;
>                       } else if (!(vm->vm_state & VM_STATE_RUNNING)) {
>                               res = EINVAL;
> -                             cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE;
>                               break;
>                       }
>                       id = vm->vm_vmid;
>               } else if ((vm = vm_getbyvmid(id)) == NULL) {
>                       res = ENOENT;
> -                     cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE;
>                       break;
>               }
> -             if (vm_checkperm(vm, &vm->vm_params.vmc_owner,
> -                 vid.vid_uid) != 0) {
> +             if (vm_checkperm(vm, &vm->vm_params.vmc_owner, vid.vid_uid)) {
>                       res = EPERM;
> -                     cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE;
>                       break;
>               }
>
> -             memset(&vid, 0, sizeof(vid));
> -             vid.vid_id = id;
> -             vid.vid_flags = flags;
> -             if (proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type,
> -                 imsg->hdr.peerid, -1, &vid, sizeof(vid)) == -1)
> -                     return (-1);
> +             /* Only relay TERMINATION requests, not WAIT requests */
> +             if (imsg->hdr.type == IMSG_VMDOP_TERMINATE_VM_REQUEST) {
> +                     memset(&vid, 0, sizeof(vid));
> +                     vid.vid_id = id;
> +                     vid.vid_flags = flags;
> +
> +                     if (proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type,
> +                             imsg->hdr.peerid, -1, &vid, sizeof(vid)) == -1)
> +                             return (-1);
> +             }
>               break;
>       case IMSG_VMDOP_GET_INFO_VM_REQUEST:
>               proc_forward_imsg(ps, imsg, PROC_VMM, -1);
> @@ -420,12 +419,14 @@ vmd_dispatch_vmm(int fd, struct privsep_
>       case IMSG_VMDOP_TERMINATE_VM_RESPONSE:
>               IMSG_SIZE_CHECK(imsg, &vmr);
>               memcpy(&vmr, imsg->data, sizeof(vmr));
> -             DPRINTF("%s: forwarding TERMINATE VM for vm id %d",
> -                 __func__, vmr.vmr_id);
> -             proc_forward_imsg(ps, imsg, PROC_CONTROL, -1);
> -             if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL)
> -                     break;
> -             if (vmr.vmr_result == 0) {
> +
> +             if (vmr.vmr_result) {
> +                     DPRINTF("%s: forwarding TERMINATE VM for vm id %d",
> +                         __func__, vmr.vmr_id);
> +                     proc_forward_imsg(ps, imsg, PROC_CONTROL, -1);
> +             } else {
> +                     if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL)
> +                             break;
>                       /* Mark VM as shutting down */
>                       vm->vm_state |= VM_STATE_SHUTDOWN;
>               }
> @@ -478,16 +479,13 @@ vmd_dispatch_vmm(int fd, struct privsep_
>                       config_setvm(ps, vm, (uint32_t)-1, vm->vm_uid);
>               }
>
> -             /* Send a response if a control client is waiting for it */
> -             if (imsg->hdr.peerid != (uint32_t)-1) {
> -                     /* the error is meaningless for deferred responses */
> -                     vmr.vmr_result = 0;
> +             /* The error is meaningless for deferred responses */
> +             vmr.vmr_result = 0;
>
> -                     if (proc_compose_imsg(ps, PROC_CONTROL, -1,
> -                         IMSG_VMDOP_TERMINATE_VM_RESPONSE,
> -                         imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1)
> -                             return (-1);
> -             }
> +             if (proc_compose_imsg(ps, PROC_CONTROL, -1,
> +                     IMSG_VMDOP_TERMINATE_VM_EVENT,
> +                     imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1)
> +                     return (-1);
>               break;
>       case IMSG_VMDOP_GET_INFO_VM_DATA:
>               IMSG_SIZE_CHECK(imsg, &vir);
> Index: vmm.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/vmd/vmm.c,v
> retrieving revision 1.100
> diff -u -p -r1.100 vmm.c
> --- vmm.c     11 Apr 2021 21:02:40 -0000      1.100
> +++ vmm.c     21 Apr 2021 17:17:04 -0000
> @@ -150,30 +150,6 @@ vmm_dispatch_parent(int fd, struct privs
>                       res = ENOENT;
>               cmd = IMSG_VMDOP_START_VM_RESPONSE;
>               break;
> -     case IMSG_VMDOP_WAIT_VM_REQUEST:
> -             IMSG_SIZE_CHECK(imsg, &vid);
> -             memcpy(&vid, imsg->data, sizeof(vid));
> -             id = vid.vid_id;
> -
> -             DPRINTF("%s: recv'ed WAIT_VM for %d", __func__, id);
> -
> -             cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE;
> -             if (id == 0) {
> -                     res = ENOENT;
> -             } else if ((vm = vm_getbyvmid(id)) != NULL) {
> -                     if (vm->vm_peerid != (uint32_t)-1) {
> -                             peerid = vm->vm_peerid;
> -                             res = EINTR;
> -                     } else
> -                             cmd = 0;
> -                     vm->vm_peerid = imsg->hdr.peerid;
> -             } else {
> -                     /* vm doesn't exist, cannot stop vm */
> -                     log_debug("%s: cannot stop vm that is not running",
> -                         __func__);
> -                     res = VMD_VM_STOP_INVALID;
> -             }
> -             break;
>       case IMSG_VMDOP_TERMINATE_VM_REQUEST:
>               IMSG_SIZE_CHECK(imsg, &vid);
>               memcpy(&vid, imsg->data, sizeof(vid));
> @@ -221,15 +197,6 @@ vmm_dispatch_parent(int fd, struct privs
>                                           __func__);
>                                       res = VMD_VM_STOP_INVALID;
>                               }
> -                     }
> -                     if ((flags & VMOP_WAIT) &&
> -                         res == 0 && (vm->vm_state & VM_STATE_SHUTDOWN)) {
> -                             if (vm->vm_peerid != (uint32_t)-1) {
> -                                     peerid = vm->vm_peerid;
> -                                     res = EINTR;
> -                             } else
> -                                     cmd = 0;
> -                             vm->vm_peerid = imsg->hdr.peerid;
>                       }
>               } else {
>                       /* VM doesn't exist, cannot stop vm */

Reply via email to