On Sun, Jul 11, 2021 at 08:10:42AM -0400, Dave Voutila wrote: > > Ping...looking for OK. Would like to get this committed this week. >
Sorry this took so long. ok mlarkin. Thanks to the numerous testers who ran with this for the past few weeks. > Dave Voutila writes: > > > Looking for an OK for this one now. Anyone? > > > > Dave Voutila <[email protected]> writes: > > > >> Dave Voutila writes: > >> > >>> Looking for some broader testing of the following diff. It cleans up > >>> some complicated logic predominantly left over from the early days of > >>> vmd prior to its having a dedicated device thread. > >> > >> Still looking for tester feedback. I've been running this diff while > >> hosting multiple guests continously (OpenBSD-current, Alpine 3.14, > >> Debian 10.10, Ubuntu 20.04) with no issues. > >> > >> I know a few folks have told me they've applied the diff and have not > >> seen issues. > > > > I've had positive reports from 4 people. Thanks everyone that tested and > > provided feedback! > > > >> > >> I'll prod for OK next week, so if you've tested the diff please let me > >> know! > > > > OK to commit? > > > >> > >>> > >>> In summary, this diff: > >>> > >>> - Removes vionet "rx pending" state handling and removes the code path > >>> for the vcpu thread to possibly take control of the virtio net device > >>> and attempt a read of the underlying tap(4). (virtio.{c,h}, vm.c) > >>> > >>> - Removes ns8250 "rcv pending" state handling and removes the code path > >>> for the vcpu thread to read the pty via com_rcv(). (ns8250.{c,h}) > >>> > >>> In both of the above cases, the event handling thread will be notified > >>> of readable data and deal with it. > >>> > >>> Why remove them? The logic is overly complicated and hard to reason > >>> about for zero gain. (This diff results in no intended functional > >>> change.) Plus, some of the above logic I helped add to deal with the > >>> race conditions and state corruption over a year ago. The logic was > >>> needed once upon a time, but shouldn't be needed at present. > >>> > >>> I've had positive testing feedback from abieber@ so far with at least > >>> the ns8250/uart diff, but want to cast a broader net here with both > >>> before either part is committed. I debated splitting these up, but > >>> they're thematically related. > >>> > >>> -dv > >>> > >>> Index: virtio.c > >>> =================================================================== > >>> RCS file: /cvs/src/usr.sbin/vmd/virtio.c,v > >>> retrieving revision 1.91 > >>> diff -u -p -r1.91 virtio.c > >>> --- virtio.c 21 Jun 2021 02:38:18 -0000 1.91 > >>> +++ virtio.c 23 Jun 2021 11:28:03 -0000 > >>> @@ -1254,12 +1254,12 @@ static int > >>> vionet_rx(struct vionet_dev *dev) > >>> { > >>> char buf[PAGE_SIZE]; > >>> - int hasdata, num_enq = 0, spc = 0; > >>> + int num_enq = 0, spc = 0; > >>> struct ether_header *eh; > >>> ssize_t sz; > >>> > >>> do { > >>> - sz = read(dev->fd, buf, sizeof buf); > >>> + sz = read(dev->fd, buf, sizeof(buf)); > >>> if (sz == -1) { > >>> /* > >>> * If we get EAGAIN, No data is currently available. > >>> @@ -1270,21 +1270,17 @@ vionet_rx(struct vionet_dev *dev) > >>> "device"); > >>> } else if (sz > 0) { > >>> eh = (struct ether_header *)buf; > >>> - if (!dev->lockedmac || sz < ETHER_HDR_LEN || > >>> + if (!dev->lockedmac || > >>> ETHER_IS_MULTICAST(eh->ether_dhost) || > >>> memcmp(eh->ether_dhost, dev->mac, > >>> sizeof(eh->ether_dhost)) == 0) > >>> num_enq += vionet_enq_rx(dev, buf, sz, &spc); > >>> } else if (sz == 0) { > >>> log_debug("process_rx: no data"); > >>> - hasdata = 0; > >>> break; > >>> } > >>> + } while (spc > 0 && sz > 0); > >>> > >>> - hasdata = fd_hasdata(dev->fd); > >>> - } while (spc && hasdata); > >>> - > >>> - dev->rx_pending = hasdata; > >>> return (num_enq); > >>> } > >>> > >>> @@ -1301,16 +1297,6 @@ vionet_rx_event(int fd, short kind, void > >>> > >>> mutex_lock(&dev->mutex); > >>> > >>> - /* > >>> - * We already have other data pending to be received. The data that > >>> - * has become available now will be enqueued to the vionet_dev > >>> - * later. > >>> - */ > >>> - if (dev->rx_pending) { > >>> - mutex_unlock(&dev->mutex); > >>> - return; > >>> - } > >>> - > >>> if (vionet_rx(dev) > 0) { > >>> /* XXX: vcpu_id */ > >>> vcpu_assert_pic_irq(dev->vm_id, 0, dev->irq); > >>> @@ -1320,40 +1306,6 @@ vionet_rx_event(int fd, short kind, void > >>> } > >>> > >>> /* > >>> - * vionet_process_rx > >>> - * > >>> - * Processes any remaining pending receivable data for a vionet device. > >>> - * Called on VCPU exit. Although we poll on the tap file descriptor of > >>> - * a vionet_dev in a separate thread, this function still needs to be > >>> - * called on VCPU exit: it can happen that not all data fits into the > >>> - * receive queue of the vionet_dev immediately. So any outstanding data > >>> - * is handled here. > >>> - * > >>> - * Parameters: > >>> - * vm_id: VM ID of the VM for which to process vionet events > >>> - */ > >>> -void > >>> -vionet_process_rx(uint32_t vm_id) > >>> -{ > >>> - int i; > >>> - > >>> - for (i = 0 ; i < nr_vionet; i++) { > >>> - mutex_lock(&vionet[i].mutex); > >>> - if (!vionet[i].rx_added) { > >>> - mutex_unlock(&vionet[i].mutex); > >>> - continue; > >>> - } > >>> - > >>> - if (vionet[i].rx_pending) { > >>> - if (vionet_rx(&vionet[i])) { > >>> - vcpu_assert_pic_irq(vm_id, 0, vionet[i].irq); > >>> - } > >>> - } > >>> - mutex_unlock(&vionet[i].mutex); > >>> - } > >>> -} > >>> - > >>> -/* > >>> * Must be called with dev->mutex acquired. > >>> */ > >>> void > >>> @@ -1383,7 +1335,6 @@ vionet_notify_rx(struct vionet_dev *dev) > >>> /* Compute offset into avail ring */ > >>> avail = (struct vring_avail *)(vr + dev->vq[RXQ].vq_availoffset); > >>> > >>> - dev->rx_added = 1; > >>> dev->vq[RXQ].notified_avail = avail->idx - 1; > >>> > >>> free(vr); > >>> @@ -1937,7 +1888,6 @@ virtio_init(struct vmd_vm *vm, int child > >>> vionet[i].vq[TXQ].last_avail = 0; > >>> vionet[i].vq[TXQ].notified_avail = 0; > >>> vionet[i].fd = child_taps[i]; > >>> - vionet[i].rx_pending = 0; > >>> vionet[i].vm_id = vcp->vcp_id; > >>> vionet[i].vm_vmid = vm->vm_vmid; > >>> vionet[i].irq = pci_get_dev_irq(id); > >>> @@ -2222,7 +2172,6 @@ vionet_restore(int fd, struct vmd_vm *vm > >>> return (-1); > >>> } > >>> vionet[i].fd = child_taps[i]; > >>> - vionet[i].rx_pending = 0; > >>> vionet[i].vm_id = vcp->vcp_id; > >>> vionet[i].vm_vmid = vm->vm_vmid; > >>> vionet[i].irq = pci_get_dev_irq(vionet[i].pci_id); > >>> Index: virtio.h > >>> =================================================================== > >>> RCS file: /cvs/src/usr.sbin/vmd/virtio.h,v > >>> retrieving revision 1.40 > >>> diff -u -p -r1.40 virtio.h > >>> --- virtio.h 21 Jun 2021 02:38:18 -0000 1.40 > >>> +++ virtio.h 23 Jun 2021 11:28:03 -0000 > >>> @@ -217,8 +217,7 @@ struct vionet_dev { > >>> > >>> struct virtio_vq_info vq[VIRTIO_MAX_QUEUES]; > >>> > >>> - int fd, rx_added; > >>> - int rx_pending; > >>> + int fd; > >>> uint32_t vm_id; > >>> uint32_t vm_vmid; > >>> int irq; > >>> Index: vm.c > >>> =================================================================== > >>> RCS file: /cvs/src/usr.sbin/vmd/vm.c,v > >>> retrieving revision 1.63 > >>> diff -u -p -r1.63 vm.c > >>> --- vm.c 16 Jun 2021 16:55:02 -0000 1.63 > >>> +++ vm.c 23 Jun 2021 11:28:03 -0000 > >>> @@ -1711,9 +1711,6 @@ vcpu_exit(struct vm_run_params *vrp) > >>> __progname, vrp->vrp_exit_reason); > >>> } > >>> > >>> - /* Process any pending traffic */ > >>> - vionet_process_rx(vrp->vrp_vm_id); > >>> - > >>> vrp->vrp_continue = 1; > >>> > >>> return (0); > >>> > >>> > >>> Index: ns8250.c > >>> =================================================================== > >>> RCS file: /cvs/src/usr.sbin/vmd/ns8250.c,v > >>> retrieving revision 1.31 > >>> diff -u -p -r1.31 ns8250.c > >>> --- ns8250.c 16 Jun 2021 16:55:02 -0000 1.31 > >>> +++ ns8250.c 24 Jun 2021 02:20:10 -0000 > >>> @@ -36,14 +36,10 @@ > >>> extern char *__progname; > >>> struct ns8250_dev com1_dev; > >>> > >>> -/* Flags to distinguish calling threads to com_rcv */ > >>> -#define NS8250_DEV_THREAD 0 > >>> -#define NS8250_CPU_THREAD 1 > >>> - > >>> static struct vm_dev_pipe dev_pipe; > >>> > >>> static void com_rcv_event(int, short, void *); > >>> -static void com_rcv(struct ns8250_dev *, uint32_t, uint32_t, int); > >>> +static void com_rcv(struct ns8250_dev *, uint32_t, uint32_t); > >>> > >>> /* > >>> * ns8250_pipe_dispatch > >>> @@ -58,11 +54,6 @@ ns8250_pipe_dispatch(int fd, short event > >>> > >>> msg = vm_pipe_recv(&dev_pipe); > >>> switch(msg) { > >>> - case NS8250_ZERO_READ: > >>> - log_debug("%s: resetting events after zero byte read", > >>> __func__); > >>> - event_del(&com1_dev.event); > >>> - event_add(&com1_dev.wake, NULL); > >>> - break; > >>> case NS8250_RATELIMIT: > >>> evtimer_add(&com1_dev.rate, &com1_dev.rate_tv); > >>> break; > >>> @@ -110,7 +101,6 @@ ns8250_init(int fd, uint32_t vmid) > >>> com1_dev.fd = fd; > >>> com1_dev.irq = 4; > >>> com1_dev.portid = NS8250_COM1; > >>> - com1_dev.rcv_pending = 0; > >>> com1_dev.vmid = vmid; > >>> com1_dev.byte_out = 0; > >>> com1_dev.regs.divlo = 1; > >>> @@ -163,17 +153,8 @@ com_rcv_event(int fd, short kind, void * > >>> return; > >>> } > >>> > >>> - /* > >>> - * We already have other data pending to be received. The data that > >>> - * has become available now will be moved to the com port later by > >>> - * the vcpu. > >>> - */ > >>> - if (!com1_dev.rcv_pending) { > >>> - if (com1_dev.regs.lsr & LSR_RXRDY) > >>> - com1_dev.rcv_pending = 1; > >>> - else > >>> - com_rcv(&com1_dev, (uintptr_t)arg, 0, > >>> NS8250_DEV_THREAD); > >>> - } > >>> + if ((com1_dev.regs.lsr & LSR_RXRDY) == 0) > >>> + com_rcv(&com1_dev, (uintptr_t)arg, 0); > >>> > >>> /* If pending interrupt, inject */ > >>> if ((com1_dev.regs.iir & IIR_NOPEND) == 0) { > >>> @@ -216,7 +197,7 @@ com_rcv_handle_break(struct ns8250_dev * > >>> * Must be called with the mutex of the com device acquired > >>> */ > >>> static void > >>> -com_rcv(struct ns8250_dev *com, uint32_t vm_id, uint32_t vcpu_id, int > >>> thread) > >>> +com_rcv(struct ns8250_dev *com, uint32_t vm_id, uint32_t vcpu_id) > >>> { > >>> char buf[2]; > >>> ssize_t sz; > >>> @@ -236,13 +217,9 @@ com_rcv(struct ns8250_dev *com, uint32_t > >>> if (errno != EAGAIN) > >>> log_warn("unexpected read error on com device"); > >>> } else if (sz == 0) { > >>> - if (thread == NS8250_DEV_THREAD) { > >>> - event_del(&com->event); > >>> - event_add(&com->wake, NULL); > >>> - } else { > >>> - /* Called by vcpu thread, use pipe for event changes */ > >>> - vm_pipe_send(&dev_pipe, NS8250_ZERO_READ); > >>> - } > >>> + /* Zero read typically occurs on a disconnect */ > >>> + event_del(&com->event); > >>> + event_add(&com->wake, NULL); > >>> return; > >>> } else if (sz != 1 && sz != 2) > >>> log_warnx("unexpected read return value %zd on com device", sz); > >>> @@ -258,8 +235,6 @@ com_rcv(struct ns8250_dev *com, uint32_t > >>> com->regs.iir &= ~IIR_NOPEND; > >>> } > >>> } > >>> - > >>> - com->rcv_pending = fd_hasdata(com->fd); > >>> } > >>> > >>> /* > >>> @@ -337,9 +312,6 @@ vcpu_process_com_data(struct vm_exit *ve > >>> */ > >>> if (com1_dev.regs.iir == 0x0) > >>> com1_dev.regs.iir = 0x1; > >>> - > >>> - if (com1_dev.rcv_pending) > >>> - com_rcv(&com1_dev, vm_id, vcpu_id, NS8250_CPU_THREAD); > >>> } > >>> > >>> /* If pending interrupt, make sure it gets injected */ > >>> @@ -688,7 +660,6 @@ ns8250_restore(int fd, int con_fd, uint3 > >>> com1_dev.fd = con_fd; > >>> com1_dev.irq = 4; > >>> com1_dev.portid = NS8250_COM1; > >>> - com1_dev.rcv_pending = 0; > >>> com1_dev.vmid = vmid; > >>> com1_dev.byte_out = 0; > >>> com1_dev.regs.divlo = 1; > >>> Index: ns8250.h > >>> =================================================================== > >>> RCS file: /cvs/src/usr.sbin/vmd/ns8250.h,v > >>> retrieving revision 1.9 > >>> diff -u -p -r1.9 ns8250.h > >>> --- ns8250.h 11 Dec 2019 06:45:16 -0000 1.9 > >>> +++ ns8250.h 24 Jun 2021 02:20:10 -0000 > >>> @@ -69,7 +69,6 @@ struct ns8250_dev { > >>> enum ns8250_portid portid; > >>> int fd; > >>> int irq; > >>> - int rcv_pending; > >>> uint32_t vmid; > >>> uint64_t byte_out; > >>> uint32_t baudrate; >
