On Sun, Jul 11, 2021 at 08:10:42AM -0400, Dave Voutila wrote:
>
> Ping...looking for OK. Would like to get this committed this week.
>

Sorry this took so long. ok mlarkin.

Thanks to the numerous testers who ran with this for the past few
weeks.

> Dave Voutila writes:
>
> > Looking for an OK for this one now. Anyone?
> >
> > Dave Voutila <[email protected]> writes:
> >
> >> Dave Voutila writes:
> >>
> >>> Looking for some broader testing of the following diff. It cleans up
> >>> some complicated logic predominantly left over from the early days of
> >>> vmd prior to its having a dedicated device thread.
> >>
> >> Still looking for tester feedback. I've been running this diff while
> >> hosting multiple guests continously (OpenBSD-current, Alpine 3.14,
> >> Debian 10.10, Ubuntu 20.04) with no issues.
> >>
> >> I know a few folks have told me they've applied the diff and have not
> >> seen issues.
> >
> > I've had positive reports from 4 people. Thanks everyone that tested and
> > provided feedback!
> >
> >>
> >> I'll prod for OK next week, so if you've tested the diff please let me
> >> know!
> >
> > OK to commit?
> >
> >>
> >>>
> >>> In summary, this diff:
> >>>
> >>> - Removes vionet "rx pending" state handling and removes the code path
> >>>   for the vcpu thread to possibly take control of the virtio net device
> >>>   and attempt a read of the underlying tap(4). (virtio.{c,h}, vm.c)
> >>>
> >>> - Removes ns8250 "rcv pending" state handling and removes the code path
> >>>   for the vcpu thread to read the pty via com_rcv(). (ns8250.{c,h})
> >>>
> >>> In both of the above cases, the event handling thread will be notified
> >>> of readable data and deal with it.
> >>>
> >>> Why remove them? The logic is overly complicated and hard to reason
> >>> about for zero gain. (This diff results in no intended functional
> >>> change.) Plus, some of the above logic I helped add to deal with the
> >>> race conditions and state corruption over a year ago. The logic was
> >>> needed once upon a time, but shouldn't be needed at present.
> >>>
> >>> I've had positive testing feedback from abieber@ so far with at least
> >>> the ns8250/uart diff, but want to cast a broader net here with both
> >>> before either part is committed. I debated splitting these up, but
> >>> they're thematically related.
> >>>
> >>> -dv
> >>>
> >>> Index: virtio.c
> >>> ===================================================================
> >>> RCS file: /cvs/src/usr.sbin/vmd/virtio.c,v
> >>> retrieving revision 1.91
> >>> diff -u -p -r1.91 virtio.c
> >>> --- virtio.c      21 Jun 2021 02:38:18 -0000      1.91
> >>> +++ virtio.c      23 Jun 2021 11:28:03 -0000
> >>> @@ -1254,12 +1254,12 @@ static int
> >>>  vionet_rx(struct vionet_dev *dev)
> >>>  {
> >>>   char buf[PAGE_SIZE];
> >>> - int hasdata, num_enq = 0, spc = 0;
> >>> + int num_enq = 0, spc = 0;
> >>>   struct ether_header *eh;
> >>>   ssize_t sz;
> >>>
> >>>   do {
> >>> -         sz = read(dev->fd, buf, sizeof buf);
> >>> +         sz = read(dev->fd, buf, sizeof(buf));
> >>>           if (sz == -1) {
> >>>                   /*
> >>>                    * If we get EAGAIN, No data is currently available.
> >>> @@ -1270,21 +1270,17 @@ vionet_rx(struct vionet_dev *dev)
> >>>                               "device");
> >>>           } else if (sz > 0) {
> >>>                   eh = (struct ether_header *)buf;
> >>> -                 if (!dev->lockedmac || sz < ETHER_HDR_LEN ||
> >>> +                 if (!dev->lockedmac ||
> >>>                       ETHER_IS_MULTICAST(eh->ether_dhost) ||
> >>>                       memcmp(eh->ether_dhost, dev->mac,
> >>>                       sizeof(eh->ether_dhost)) == 0)
> >>>                           num_enq += vionet_enq_rx(dev, buf, sz, &spc);
> >>>           } else if (sz == 0) {
> >>>                   log_debug("process_rx: no data");
> >>> -                 hasdata = 0;
> >>>                   break;
> >>>           }
> >>> + } while (spc > 0 && sz > 0);
> >>>
> >>> -         hasdata = fd_hasdata(dev->fd);
> >>> - } while (spc && hasdata);
> >>> -
> >>> - dev->rx_pending = hasdata;
> >>>   return (num_enq);
> >>>  }
> >>>
> >>> @@ -1301,16 +1297,6 @@ vionet_rx_event(int fd, short kind, void
> >>>
> >>>   mutex_lock(&dev->mutex);
> >>>
> >>> - /*
> >>> -  * We already have other data pending to be received. The data that
> >>> -  * has become available now will be enqueued to the vionet_dev
> >>> -  * later.
> >>> -  */
> >>> - if (dev->rx_pending) {
> >>> -         mutex_unlock(&dev->mutex);
> >>> -         return;
> >>> - }
> >>> -
> >>>   if (vionet_rx(dev) > 0) {
> >>>           /* XXX: vcpu_id */
> >>>           vcpu_assert_pic_irq(dev->vm_id, 0, dev->irq);
> >>> @@ -1320,40 +1306,6 @@ vionet_rx_event(int fd, short kind, void
> >>>  }
> >>>
> >>>  /*
> >>> - * vionet_process_rx
> >>> - *
> >>> - * Processes any remaining pending receivable data for a vionet device.
> >>> - * Called on VCPU exit. Although we poll on the tap file descriptor of
> >>> - * a vionet_dev in a separate thread, this function still needs to be
> >>> - * called on VCPU exit: it can happen that not all data fits into the
> >>> - * receive queue of the vionet_dev immediately. So any outstanding data
> >>> - * is handled here.
> >>> - *
> >>> - * Parameters:
> >>> - *  vm_id: VM ID of the VM for which to process vionet events
> >>> - */
> >>> -void
> >>> -vionet_process_rx(uint32_t vm_id)
> >>> -{
> >>> - int i;
> >>> -
> >>> - for (i = 0 ; i < nr_vionet; i++) {
> >>> -         mutex_lock(&vionet[i].mutex);
> >>> -         if (!vionet[i].rx_added) {
> >>> -                 mutex_unlock(&vionet[i].mutex);
> >>> -                 continue;
> >>> -         }
> >>> -
> >>> -         if (vionet[i].rx_pending) {
> >>> -                 if (vionet_rx(&vionet[i])) {
> >>> -                         vcpu_assert_pic_irq(vm_id, 0, vionet[i].irq);
> >>> -                 }
> >>> -         }
> >>> -         mutex_unlock(&vionet[i].mutex);
> >>> - }
> >>> -}
> >>> -
> >>> -/*
> >>>   * Must be called with dev->mutex acquired.
> >>>   */
> >>>  void
> >>> @@ -1383,7 +1335,6 @@ vionet_notify_rx(struct vionet_dev *dev)
> >>>   /* Compute offset into avail ring */
> >>>   avail = (struct vring_avail *)(vr + dev->vq[RXQ].vq_availoffset);
> >>>
> >>> - dev->rx_added = 1;
> >>>   dev->vq[RXQ].notified_avail = avail->idx - 1;
> >>>
> >>>   free(vr);
> >>> @@ -1937,7 +1888,6 @@ virtio_init(struct vmd_vm *vm, int child
> >>>                   vionet[i].vq[TXQ].last_avail = 0;
> >>>                   vionet[i].vq[TXQ].notified_avail = 0;
> >>>                   vionet[i].fd = child_taps[i];
> >>> -                 vionet[i].rx_pending = 0;
> >>>                   vionet[i].vm_id = vcp->vcp_id;
> >>>                   vionet[i].vm_vmid = vm->vm_vmid;
> >>>                   vionet[i].irq = pci_get_dev_irq(id);
> >>> @@ -2222,7 +2172,6 @@ vionet_restore(int fd, struct vmd_vm *vm
> >>>                           return (-1);
> >>>                   }
> >>>                   vionet[i].fd = child_taps[i];
> >>> -                 vionet[i].rx_pending = 0;
> >>>                   vionet[i].vm_id = vcp->vcp_id;
> >>>                   vionet[i].vm_vmid = vm->vm_vmid;
> >>>                   vionet[i].irq = pci_get_dev_irq(vionet[i].pci_id);
> >>> Index: virtio.h
> >>> ===================================================================
> >>> RCS file: /cvs/src/usr.sbin/vmd/virtio.h,v
> >>> retrieving revision 1.40
> >>> diff -u -p -r1.40 virtio.h
> >>> --- virtio.h      21 Jun 2021 02:38:18 -0000      1.40
> >>> +++ virtio.h      23 Jun 2021 11:28:03 -0000
> >>> @@ -217,8 +217,7 @@ struct vionet_dev {
> >>>
> >>>   struct virtio_vq_info vq[VIRTIO_MAX_QUEUES];
> >>>
> >>> - int fd, rx_added;
> >>> - int rx_pending;
> >>> + int fd;
> >>>   uint32_t vm_id;
> >>>   uint32_t vm_vmid;
> >>>   int irq;
> >>> Index: vm.c
> >>> ===================================================================
> >>> RCS file: /cvs/src/usr.sbin/vmd/vm.c,v
> >>> retrieving revision 1.63
> >>> diff -u -p -r1.63 vm.c
> >>> --- vm.c  16 Jun 2021 16:55:02 -0000      1.63
> >>> +++ vm.c  23 Jun 2021 11:28:03 -0000
> >>> @@ -1711,9 +1711,6 @@ vcpu_exit(struct vm_run_params *vrp)
> >>>               __progname, vrp->vrp_exit_reason);
> >>>   }
> >>>
> >>> - /* Process any pending traffic */
> >>> - vionet_process_rx(vrp->vrp_vm_id);
> >>> -
> >>>   vrp->vrp_continue = 1;
> >>>
> >>>   return (0);
> >>>
> >>>
> >>> Index: ns8250.c
> >>> ===================================================================
> >>> RCS file: /cvs/src/usr.sbin/vmd/ns8250.c,v
> >>> retrieving revision 1.31
> >>> diff -u -p -r1.31 ns8250.c
> >>> --- ns8250.c      16 Jun 2021 16:55:02 -0000      1.31
> >>> +++ ns8250.c      24 Jun 2021 02:20:10 -0000
> >>> @@ -36,14 +36,10 @@
> >>>  extern char *__progname;
> >>>  struct ns8250_dev com1_dev;
> >>>
> >>> -/* Flags to distinguish calling threads to com_rcv */
> >>> -#define NS8250_DEV_THREAD        0
> >>> -#define NS8250_CPU_THREAD        1
> >>> -
> >>>  static struct vm_dev_pipe dev_pipe;
> >>>
> >>>  static void com_rcv_event(int, short, void *);
> >>> -static void com_rcv(struct ns8250_dev *, uint32_t, uint32_t, int);
> >>> +static void com_rcv(struct ns8250_dev *, uint32_t, uint32_t);
> >>>
> >>>  /*
> >>>   * ns8250_pipe_dispatch
> >>> @@ -58,11 +54,6 @@ ns8250_pipe_dispatch(int fd, short event
> >>>
> >>>   msg = vm_pipe_recv(&dev_pipe);
> >>>   switch(msg) {
> >>> - case NS8250_ZERO_READ:
> >>> -         log_debug("%s: resetting events after zero byte read", 
> >>> __func__);
> >>> -         event_del(&com1_dev.event);
> >>> -         event_add(&com1_dev.wake, NULL);
> >>> -         break;
> >>>   case NS8250_RATELIMIT:
> >>>           evtimer_add(&com1_dev.rate, &com1_dev.rate_tv);
> >>>           break;
> >>> @@ -110,7 +101,6 @@ ns8250_init(int fd, uint32_t vmid)
> >>>   com1_dev.fd = fd;
> >>>   com1_dev.irq = 4;
> >>>   com1_dev.portid = NS8250_COM1;
> >>> - com1_dev.rcv_pending = 0;
> >>>   com1_dev.vmid = vmid;
> >>>   com1_dev.byte_out = 0;
> >>>   com1_dev.regs.divlo = 1;
> >>> @@ -163,17 +153,8 @@ com_rcv_event(int fd, short kind, void *
> >>>           return;
> >>>   }
> >>>
> >>> - /*
> >>> -  * We already have other data pending to be received. The data that
> >>> -  * has become available now will be moved to the com port later by
> >>> -  * the vcpu.
> >>> -  */
> >>> - if (!com1_dev.rcv_pending) {
> >>> -         if (com1_dev.regs.lsr & LSR_RXRDY)
> >>> -                 com1_dev.rcv_pending = 1;
> >>> -         else
> >>> -                 com_rcv(&com1_dev, (uintptr_t)arg, 0, 
> >>> NS8250_DEV_THREAD);
> >>> - }
> >>> + if ((com1_dev.regs.lsr & LSR_RXRDY) == 0)
> >>> +         com_rcv(&com1_dev, (uintptr_t)arg, 0);
> >>>
> >>>   /* If pending interrupt, inject */
> >>>   if ((com1_dev.regs.iir & IIR_NOPEND) == 0) {
> >>> @@ -216,7 +197,7 @@ com_rcv_handle_break(struct ns8250_dev *
> >>>   * Must be called with the mutex of the com device acquired
> >>>   */
> >>>  static void
> >>> -com_rcv(struct ns8250_dev *com, uint32_t vm_id, uint32_t vcpu_id, int 
> >>> thread)
> >>> +com_rcv(struct ns8250_dev *com, uint32_t vm_id, uint32_t vcpu_id)
> >>>  {
> >>>   char buf[2];
> >>>   ssize_t sz;
> >>> @@ -236,13 +217,9 @@ com_rcv(struct ns8250_dev *com, uint32_t
> >>>           if (errno != EAGAIN)
> >>>                   log_warn("unexpected read error on com device");
> >>>   } else if (sz == 0) {
> >>> -         if (thread == NS8250_DEV_THREAD) {
> >>> -                 event_del(&com->event);
> >>> -                 event_add(&com->wake, NULL);
> >>> -         } else {
> >>> -                 /* Called by vcpu thread, use pipe for event changes */
> >>> -                 vm_pipe_send(&dev_pipe, NS8250_ZERO_READ);
> >>> -         }
> >>> +         /* Zero read typically occurs on a disconnect */
> >>> +         event_del(&com->event);
> >>> +         event_add(&com->wake, NULL);
> >>>           return;
> >>>   } else if (sz != 1 && sz != 2)
> >>>           log_warnx("unexpected read return value %zd on com device", sz);
> >>> @@ -258,8 +235,6 @@ com_rcv(struct ns8250_dev *com, uint32_t
> >>>                   com->regs.iir &= ~IIR_NOPEND;
> >>>           }
> >>>   }
> >>> -
> >>> - com->rcv_pending = fd_hasdata(com->fd);
> >>>  }
> >>>
> >>>  /*
> >>> @@ -337,9 +312,6 @@ vcpu_process_com_data(struct vm_exit *ve
> >>>            */
> >>>           if (com1_dev.regs.iir == 0x0)
> >>>                   com1_dev.regs.iir = 0x1;
> >>> -
> >>> -         if (com1_dev.rcv_pending)
> >>> -                 com_rcv(&com1_dev, vm_id, vcpu_id, NS8250_CPU_THREAD);
> >>>   }
> >>>
> >>>   /* If pending interrupt, make sure it gets injected */
> >>> @@ -688,7 +660,6 @@ ns8250_restore(int fd, int con_fd, uint3
> >>>   com1_dev.fd = con_fd;
> >>>   com1_dev.irq = 4;
> >>>   com1_dev.portid = NS8250_COM1;
> >>> - com1_dev.rcv_pending = 0;
> >>>   com1_dev.vmid = vmid;
> >>>   com1_dev.byte_out = 0;
> >>>   com1_dev.regs.divlo = 1;
> >>> Index: ns8250.h
> >>> ===================================================================
> >>> RCS file: /cvs/src/usr.sbin/vmd/ns8250.h,v
> >>> retrieving revision 1.9
> >>> diff -u -p -r1.9 ns8250.h
> >>> --- ns8250.h      11 Dec 2019 06:45:16 -0000      1.9
> >>> +++ ns8250.h      24 Jun 2021 02:20:10 -0000
> >>> @@ -69,7 +69,6 @@ struct ns8250_dev {
> >>>   enum ns8250_portid portid;
> >>>   int fd;
> >>>   int irq;
> >>> - int rcv_pending;
> >>>   uint32_t vmid;
> >>>   uint64_t byte_out;
> >>>   uint32_t baudrate;
>

Reply via email to