Dave Voutila <d...@sisu.io> writes:
> mbuhl@ found an issue where the emulated virtio block device can > hang. The tl;dr: the emulated pic never injects an interrupt and the > vioblk(4) driver in the guest starves, waiting to be told to check the > virtqueue. > > Flipping the bit in the i8259 using gdb causes the spice to flow once > again. > > This diff fixes two related things (so could be committed in 2 parts): > > 1. the irq deassert on a virtio pci interrupt status register read > should occur synchronously by the vcpu thread in the vm and not > async. > > 2. always raise the irr bit in the pic, regardless of mask. The mask is > used when finding an irq to ack and shouldn't be used to determine if > the irr bit is raised. AFAICT from the old i8259 data sheets, masking > should have no effect on if the irr bit is set. > > This is holding up another diff I want to share that reduces latency in > interrupts, but it's shown to make this i8259 race condition worse, so > I'd like to give folks a few days to check if the below diff causes > regressions. Once this is committed, I'll feel safe sharing the latency > diff with tech@. > > Any ok's pending a few days for folks to test? Still looking for ok's or test reports (other than from mbuhl@, of course.) > > -dv > > > diff refs/heads/master cdba90a89ee6e77b52c2b6955d36a260a23a3b85 > commit - ad1cd1152fddbf55189657a2df9f2468409698ab > commit + cdba90a89ee6e77b52c2b6955d36a260a23a3b85 > blob - f7862f5e9d17f96f5260358271fab8f253b26505 > blob + b6891c52c824d7b8c69e67e5323772152b1ed844 > --- usr.sbin/vmd/i8259.c > +++ usr.sbin/vmd/i8259.c > @@ -222,20 +222,16 @@ i8259_assert_irq(uint8_t irq) > { > mutex_lock(&pic_mtx); > if (irq <= 7) { > - if (!ISSET(pics[MASTER].imr, 1 << irq)) { > - SET(pics[MASTER].irr, 1 << irq); > - pics[MASTER].asserted = 1; > - } > + SET(pics[MASTER].irr, 1 << irq); > + pics[MASTER].asserted = 1; > } else { > irq -= 8; > - if (!ISSET(pics[SLAVE].imr, 1 << irq)) { > - SET(pics[SLAVE].irr, 1 << irq); > - pics[SLAVE].asserted = 1; > + SET(pics[SLAVE].irr, 1 << irq); > + pics[SLAVE].asserted = 1; > > - /* Assert cascade IRQ on master PIC */ > - SET(pics[MASTER].irr, 1 << 2); > - pics[MASTER].asserted = 1; > - } > + /* Assert cascade IRQ on master PIC */ > + SET(pics[MASTER].irr, 1 << 2); > + pics[MASTER].asserted = 1; > } > mutex_unlock(&pic_mtx); > } > blob - 7bc76c4daed427dae82688452ec21985be679bc4 > blob + 4d321fd4ff23514ffa7a4bee0eeb7a9324020d80 > --- usr.sbin/vmd/vioblk.c > +++ usr.sbin/vmd/vioblk.c > @@ -39,7 +39,8 @@ static uint32_t handle_io_read(struct viodev_msg *, st > extern struct vmd_vm *current_vm; > > static const char *disk_type(int); > -static uint32_t handle_io_read(struct viodev_msg *, struct virtio_dev *); > +static uint32_t handle_io_read(struct viodev_msg *, struct virtio_dev *, > + int8_t *); > static int handle_io_write(struct viodev_msg *, struct virtio_dev *); > void vioblk_notify_rx(struct vioblk_dev *); > int vioblk_notifyq(struct vioblk_dev *); > @@ -723,6 +724,7 @@ handle_sync_io(int fd, short event, void *arg) > struct viodev_msg msg; > struct imsg imsg; > ssize_t n; > + int8_t intr = INTR_STATE_NOOP; > > if (event & EV_READ) { > if ((n = imsg_read(ibuf)) == -1 && errno != EAGAIN) > @@ -770,8 +772,9 @@ handle_sync_io(int fd, short event, void *arg) > } > case VIODEV_MSG_IO_READ: > /* Read IO: make sure to send a reply */ > - msg.data = handle_io_read(&msg, dev); > + msg.data = handle_io_read(&msg, dev, &intr); > msg.data_valid = 1; > + msg.state = intr; > imsg_compose_event(iev, IMSG_DEVOP_MSG, 0, 0, -1, &msg, > sizeof(msg)); > break; > @@ -844,7 +847,7 @@ handle_io_read(struct viodev_msg *msg, struct virtio_d > } > > static uint32_t > -handle_io_read(struct viodev_msg *msg, struct virtio_dev *dev) > +handle_io_read(struct viodev_msg *msg, struct virtio_dev *dev, int8_t *intr) > { > struct vioblk_dev *vioblk = &dev->vioblk; > uint8_t sz = msg->io_sz; > @@ -1001,7 +1004,8 @@ handle_io_read(struct viodev_msg *msg, struct virtio_d > case VIRTIO_CONFIG_ISR_STATUS: > data = vioblk->cfg.isr_status; > vioblk->cfg.isr_status = 0; > - virtio_deassert_pic_irq(dev, 0); > + if (intr != NULL) > + *intr = INTR_STATE_DEASSERT; > break; > default: > return (0xFFFFFFFF); > blob - c16ad2635ea622fd7fce48b5145e2199dd451346 > blob + 5c2a943ac7ad02dfbc4793f5caab3200a3ced803 > --- usr.sbin/vmd/vionet.c > +++ usr.sbin/vmd/vionet.c > @@ -52,7 +52,8 @@ static uint32_t handle_io_read(struct viodev_msg *, st > > static int vionet_rx(struct vionet_dev *); > static void vionet_rx_event(int, short, void *); > -static uint32_t handle_io_read(struct viodev_msg *, struct virtio_dev *); > +static uint32_t handle_io_read(struct viodev_msg *, struct virtio_dev *, > + int8_t *); > static int handle_io_write(struct viodev_msg *, struct virtio_dev *); > void vionet_notify_rx(struct virtio_dev *); > int vionet_notifyq(struct virtio_dev *); > @@ -757,6 +758,7 @@ handle_sync_io(int fd, short event, void *arg) > struct viodev_msg msg; > struct imsg imsg; > ssize_t n; > + int8_t intr = INTR_STATE_NOOP; > > if (event & EV_READ) { > if ((n = imsg_read(ibuf)) == -1 && errno != EAGAIN) > @@ -804,8 +806,9 @@ handle_sync_io(int fd, short event, void *arg) > } > case VIODEV_MSG_IO_READ: > /* Read IO: make sure to send a reply */ > - msg.data = handle_io_read(&msg, dev); > + msg.data = handle_io_read(&msg, dev, &intr); > msg.data_valid = 1; > + msg.state = intr; > imsg_compose_event(iev, IMSG_DEVOP_MSG, 0, 0, -1, &msg, > sizeof(msg)); > break; > @@ -891,7 +894,7 @@ handle_io_read(struct viodev_msg *msg, struct virtio_d > } > > static uint32_t > -handle_io_read(struct viodev_msg *msg, struct virtio_dev *dev) > +handle_io_read(struct viodev_msg *msg, struct virtio_dev *dev, int8_t *intr) > { > struct vionet_dev *vionet = &dev->vionet; > uint32_t data; > @@ -930,7 +933,8 @@ handle_io_read(struct viodev_msg *msg, struct virtio_d > case VIRTIO_CONFIG_ISR_STATUS: > data = vionet->cfg.isr_status; > vionet->cfg.isr_status = 0; > - virtio_deassert_pic_irq(dev, 0); > + if (intr != NULL) > + *intr = INTR_STATE_DEASSERT; > break; > default: > return (0xFFFFFFFF);