Dave Voutila <d...@sisu.io> writes:

> mbuhl@ found an issue where the emulated virtio block device can
> hang. The tl;dr: the emulated pic never injects an interrupt and the
> vioblk(4) driver in the guest starves, waiting to be told to check the
> virtqueue.
>
> Flipping the bit in the i8259 using gdb causes the spice to flow once
> again.
>
> This diff fixes two related things (so could be committed in 2 parts):
>
> 1. the irq deassert on a virtio pci interrupt status register read
>    should occur synchronously by the vcpu thread in the vm and not
>    async.
>
> 2. always raise the irr bit in the pic, regardless of mask. The mask is
>    used when finding an irq to ack and shouldn't be used to determine if
>    the irr bit is raised. AFAICT from the old i8259 data sheets, masking
>    should have no effect on if the irr bit is set.
>
> This is holding up another diff I want to share that reduces latency in
> interrupts, but it's shown to make this i8259 race condition worse, so
> I'd like to give folks a few days to check if the below diff causes
> regressions. Once this is committed, I'll feel safe sharing the latency
> diff with tech@.
>
> Any ok's pending a few days for folks to test?

Still looking for ok's or test reports (other than from mbuhl@, of
course.)

>
> -dv
>
>
> diff refs/heads/master cdba90a89ee6e77b52c2b6955d36a260a23a3b85
> commit - ad1cd1152fddbf55189657a2df9f2468409698ab
> commit + cdba90a89ee6e77b52c2b6955d36a260a23a3b85
> blob - f7862f5e9d17f96f5260358271fab8f253b26505
> blob + b6891c52c824d7b8c69e67e5323772152b1ed844
> --- usr.sbin/vmd/i8259.c
> +++ usr.sbin/vmd/i8259.c
> @@ -222,20 +222,16 @@ i8259_assert_irq(uint8_t irq)
>  {
>       mutex_lock(&pic_mtx);
>       if (irq <= 7) {
> -             if (!ISSET(pics[MASTER].imr, 1 << irq)) {
> -                     SET(pics[MASTER].irr, 1 << irq);
> -                     pics[MASTER].asserted = 1;
> -             }
> +             SET(pics[MASTER].irr, 1 << irq);
> +             pics[MASTER].asserted = 1;
>       } else {
>               irq -= 8;
> -             if (!ISSET(pics[SLAVE].imr, 1 << irq)) {
> -                     SET(pics[SLAVE].irr, 1 << irq);
> -                     pics[SLAVE].asserted = 1;
> +             SET(pics[SLAVE].irr, 1 << irq);
> +             pics[SLAVE].asserted = 1;
>
> -                     /* Assert cascade IRQ on master PIC */
> -                     SET(pics[MASTER].irr, 1 << 2);
> -                     pics[MASTER].asserted = 1;
> -             }
> +             /* Assert cascade IRQ on master PIC */
> +             SET(pics[MASTER].irr, 1 << 2);
> +             pics[MASTER].asserted = 1;
>       }
>       mutex_unlock(&pic_mtx);
>  }
> blob - 7bc76c4daed427dae82688452ec21985be679bc4
> blob + 4d321fd4ff23514ffa7a4bee0eeb7a9324020d80
> --- usr.sbin/vmd/vioblk.c
> +++ usr.sbin/vmd/vioblk.c
> @@ -39,7 +39,8 @@ static uint32_t handle_io_read(struct viodev_msg *, st
>  extern struct vmd_vm *current_vm;
>
>  static const char *disk_type(int);
> -static uint32_t handle_io_read(struct viodev_msg *, struct virtio_dev *);
> +static uint32_t handle_io_read(struct viodev_msg *, struct virtio_dev *,
> +    int8_t *);
>  static int handle_io_write(struct viodev_msg *, struct virtio_dev *);
>  void vioblk_notify_rx(struct vioblk_dev *);
>  int vioblk_notifyq(struct vioblk_dev *);
> @@ -723,6 +724,7 @@ handle_sync_io(int fd, short event, void *arg)
>       struct viodev_msg msg;
>       struct imsg imsg;
>       ssize_t n;
> +     int8_t intr = INTR_STATE_NOOP;
>
>       if (event & EV_READ) {
>               if ((n = imsg_read(ibuf)) == -1 && errno != EAGAIN)
> @@ -770,8 +772,9 @@ handle_sync_io(int fd, short event, void *arg)
>                       }
>               case VIODEV_MSG_IO_READ:
>                       /* Read IO: make sure to send a reply */
> -                     msg.data = handle_io_read(&msg, dev);
> +                     msg.data = handle_io_read(&msg, dev, &intr);
>                       msg.data_valid = 1;
> +                     msg.state = intr;
>                       imsg_compose_event(iev, IMSG_DEVOP_MSG, 0, 0, -1, &msg,
>                           sizeof(msg));
>                       break;
> @@ -844,7 +847,7 @@ handle_io_read(struct viodev_msg *msg, struct virtio_d
>  }
>
>  static uint32_t
> -handle_io_read(struct viodev_msg *msg, struct virtio_dev *dev)
> +handle_io_read(struct viodev_msg *msg, struct virtio_dev *dev, int8_t *intr)
>  {
>       struct vioblk_dev *vioblk = &dev->vioblk;
>       uint8_t sz = msg->io_sz;
> @@ -1001,7 +1004,8 @@ handle_io_read(struct viodev_msg *msg, struct virtio_d
>       case VIRTIO_CONFIG_ISR_STATUS:
>               data = vioblk->cfg.isr_status;
>               vioblk->cfg.isr_status = 0;
> -             virtio_deassert_pic_irq(dev, 0);
> +             if (intr != NULL)
> +                     *intr = INTR_STATE_DEASSERT;
>               break;
>       default:
>               return (0xFFFFFFFF);
> blob - c16ad2635ea622fd7fce48b5145e2199dd451346
> blob + 5c2a943ac7ad02dfbc4793f5caab3200a3ced803
> --- usr.sbin/vmd/vionet.c
> +++ usr.sbin/vmd/vionet.c
> @@ -52,7 +52,8 @@ static uint32_t handle_io_read(struct viodev_msg *, st
>
>  static int vionet_rx(struct vionet_dev *);
>  static void vionet_rx_event(int, short, void *);
> -static uint32_t handle_io_read(struct viodev_msg *, struct virtio_dev *);
> +static uint32_t handle_io_read(struct viodev_msg *, struct virtio_dev *,
> +    int8_t *);
>  static int handle_io_write(struct viodev_msg *, struct virtio_dev *);
>  void vionet_notify_rx(struct virtio_dev *);
>  int vionet_notifyq(struct virtio_dev *);
> @@ -757,6 +758,7 @@ handle_sync_io(int fd, short event, void *arg)
>       struct viodev_msg msg;
>       struct imsg imsg;
>       ssize_t n;
> +     int8_t intr = INTR_STATE_NOOP;
>
>       if (event & EV_READ) {
>               if ((n = imsg_read(ibuf)) == -1 && errno != EAGAIN)
> @@ -804,8 +806,9 @@ handle_sync_io(int fd, short event, void *arg)
>                       }
>               case VIODEV_MSG_IO_READ:
>                       /* Read IO: make sure to send a reply */
> -                     msg.data = handle_io_read(&msg, dev);
> +                     msg.data = handle_io_read(&msg, dev, &intr);
>                       msg.data_valid = 1;
> +                     msg.state = intr;
>                       imsg_compose_event(iev, IMSG_DEVOP_MSG, 0, 0, -1, &msg,
>                           sizeof(msg));
>                       break;
> @@ -891,7 +894,7 @@ handle_io_read(struct viodev_msg *msg, struct virtio_d
>  }
>
>  static uint32_t
> -handle_io_read(struct viodev_msg *msg, struct virtio_dev *dev)
> +handle_io_read(struct viodev_msg *msg, struct virtio_dev *dev, int8_t *intr)
>  {
>       struct vionet_dev *vionet = &dev->vionet;
>       uint32_t data;
> @@ -930,7 +933,8 @@ handle_io_read(struct viodev_msg *msg, struct virtio_d
>       case VIRTIO_CONFIG_ISR_STATUS:
>               data = vionet->cfg.isr_status;
>               vionet->cfg.isr_status = 0;
> -             virtio_deassert_pic_irq(dev, 0);
> +             if (intr != NULL)
> +                     *intr = INTR_STATE_DEASSERT;
>               break;
>       default:
>               return (0xFFFFFFFF);

Reply via email to