I forgot to mention that no stress test is necessary. If it boots and the virtio devices work at all, that should be enough.

Cheers,
Stefan

Am 20.05.23 um 15:00 schrieb Stefan Fritsch:
Hi,

with help from Aaron Mason, I have found the problem responsible for the
vioscsi panic on oracle cloud and windows qemu. Basically before using any
virt-queues, the guest must set the DRIVER_OK status bit, or the host may
not process the queues. This affects vioscsi(4) and viogpu(4) in openbsd,
but I have changed the code in all virtio drivers to work the same.

This seems to be a common bug in guests and qemu has workarounds in some
code paths, but not in others. I think that is the reason why I could not
reproduce the issue on my own qemu/linux system.

I have only tested this on amd64 / qemu. I would be interested in test
results on other systems, especially on top of vmd and on arm64 with
viogpu.

Aaron, can you please also test with the updated diff?

Thanks in advance.

Cheers,
Stefan

commit d3209e34d77ab7353973efb2a57303f636fff4f4
Author: Stefan Fritsch <s...@sfritsch.de>
Date:   Sat May 20 14:18:57 2023 +0200

     virtio: Set DRIVER_OK earlier
The DRIVER_OK must be set before using any virt-queues. To allow virtio
     device drivers to use the virt-queues in their attach functions, set the
     bit there and not in the virtio transport attach function.  Only vioscsi
     and viogpu really need this, but let's only have one standard way to do
     this .
Noticed because of hangs with vioscsi on qemu/windows and in the Oracle
     cloud.  With much debugging help by Aaron Mason .
Also revert vioscsi.c 1.31 "Temporarily workaround double calls into
     vioscsi_req_done()"

diff --git a/sys/dev/fdt/virtio_mmio.c b/sys/dev/fdt/virtio_mmio.c
index 12f5a6cdeb6..682cbd304a1 100644
--- a/sys/dev/fdt/virtio_mmio.c
+++ b/sys/dev/fdt/virtio_mmio.c
@@ -293,7 +293,6 @@ virtio_mmio_attach(struct device *parent, struct device 
*self, void *aux)
                goto fail_2;
        }
- virtio_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
        return;
fail_2:
diff --git a/sys/dev/pci/virtio_pci.c b/sys/dev/pci/virtio_pci.c
index 236120cf5a6..747101b8382 100644
--- a/sys/dev/pci/virtio_pci.c
+++ b/sys/dev/pci/virtio_pci.c
@@ -651,7 +651,6 @@ virtio_pci_attach(struct device *parent, struct device 
*self, void *aux)
        }
        printf("%s: %s\n", vsc->sc_dev.dv_xname, intrstr);
- virtio_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
        return;
fail_2:
diff --git a/sys/dev/pv/if_vio.c b/sys/dev/pv/if_vio.c
index be53ccaf7c3..5d1ca8d0030 100644
--- a/sys/dev/pv/if_vio.c
+++ b/sys/dev/pv/if_vio.c
@@ -600,6 +600,7 @@ vio_attach(struct device *parent, struct device *self, void 
*aux)
        timeout_set(&sc->sc_txtick, vio_txtick, &sc->sc_vq[VQTX]);
        timeout_set(&sc->sc_rxtick, vio_rxtick, &sc->sc_vq[VQRX]);
+ virtio_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
        if_attach(ifp);
        ether_ifattach(ifp);
diff --git a/sys/dev/pv/vioblk.c b/sys/dev/pv/vioblk.c
index 093ce9b5a58..9f8a10e0fa2 100644
--- a/sys/dev/pv/vioblk.c
+++ b/sys/dev/pv/vioblk.c
@@ -251,6 +251,7 @@ vioblk_attach(struct device *parent, struct device *self, 
void *aux)
        saa.saa_quirks = 0;
        saa.saa_wwpn = saa.saa_wwnn = 0;
+ virtio_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
        config_found(self, &saa, scsiprint);
return;
diff --git a/sys/dev/pv/viocon.c b/sys/dev/pv/viocon.c
index bd96467c810..882f0961ada 100644
--- a/sys/dev/pv/viocon.c
+++ b/sys/dev/pv/viocon.c
@@ -203,6 +203,7 @@ viocon_attach(struct device *parent, struct device *self, 
void *aux)
                goto err;
        }
        viocon_rx_fill(sc->sc_ports[0]);
+       virtio_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
return;
  err:
diff --git a/sys/dev/pv/viogpu.c b/sys/dev/pv/viogpu.c
index e8a91c34310..06f4388a7b0 100644
--- a/sys/dev/pv/viogpu.c
+++ b/sys/dev/pv/viogpu.c
@@ -235,6 +235,8 @@ viogpu_attach(struct device *parent, struct device *self, 
void *aux)
            sc->sc_fb_dma_kva, sc->sc_fb_dma_size, NULL, BUS_DMA_NOWAIT) != 0)
                goto fb_unmap;
+ virtio_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
+
        if (viogpu_create_2d(sc, 1, sc->sc_fb_width, sc->sc_fb_height) != 0)
                goto fb_unmap;
diff --git a/sys/dev/pv/viomb.c b/sys/dev/pv/viomb.c
index 8169770553e..a8d408e7392 100644
--- a/sys/dev/pv/viomb.c
+++ b/sys/dev/pv/viomb.c
@@ -220,6 +220,7 @@ viomb_attach(struct device *parent, struct device *self, 
void *aux)
        sensordev_install(&sc->sc_sensdev);
printf("\n");
+       virtio_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
        return;
  err_dmamap:
        bus_dmamap_destroy(vsc->sc_dmat, sc->sc_req.bl_dmamap);
diff --git a/sys/dev/pv/viornd.c b/sys/dev/pv/viornd.c
index 39442bc8391..490e823173b 100644
--- a/sys/dev/pv/viornd.c
+++ b/sys/dev/pv/viornd.c
@@ -138,6 +138,7 @@ viornd_attach(struct device *parent, struct device *self, 
void *aux)
        timeout_add(&sc->sc_tick, 1);
printf("\n");
+       virtio_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
        return;
  err2:
        bus_dmamap_destroy(vsc->sc_dmat, sc->sc_dmamap);
diff --git a/sys/dev/pv/vioscsi.c b/sys/dev/pv/vioscsi.c
index 20802c8bc7a..5afa5ebed9e 100644
--- a/sys/dev/pv/vioscsi.c
+++ b/sys/dev/pv/vioscsi.c
@@ -166,6 +166,7 @@ vioscsi_attach(struct device *parent, struct device *self, 
void *aux)
        saa.saa_quirks = saa.saa_flags = 0;
        saa.saa_wwpn = saa.saa_wwnn = 0;
+ virtio_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
        config_found(self, &saa, scsiprint);
        return;
@@ -296,6 +297,7 @@ vioscsi_req_done(struct vioscsi_softc *sc, struct virtio_softc *vsc,
        struct scsi_xfer *xs = vr->vr_xs;
        DPRINTF("vioscsi_req_done: enter vr: %p xs: %p\n", vr, xs);
+ int isread = !!(xs->flags & SCSI_DATA_IN);
        bus_dmamap_sync(vsc->sc_dmat, vr->vr_control,
            offsetof(struct vioscsi_req, vr_req),
            sizeof(struct virtio_scsi_req_hdr),
@@ -304,18 +306,6 @@ vioscsi_req_done(struct vioscsi_softc *sc, struct 
virtio_softc *vsc,
            offsetof(struct vioscsi_req, vr_res),
            sizeof(struct virtio_scsi_res_hdr),
            BUS_DMASYNC_POSTREAD);
-
-       /*
-        * XXX Should be impossible but somehow happens on Oracle Cloud and
-        *     particular QEMU configurations.
-        *
-        *     Stop the crashing while investigation into
-        *     the apparent queuing/dequeuing issue proceeds.
-        */
-       if (xs == NULL)
-               return;
-
-       int isread = !!(xs->flags & SCSI_DATA_IN);
        if (xs->flags & (SCSI_DATA_IN | SCSI_DATA_OUT)) {
                bus_dmamap_sync(vsc->sc_dmat, vr->vr_data, 0, xs->datalen,
                    isread ? BUS_DMASYNC_POSTREAD : BUS_DMASYNC_POSTWRITE);
diff --git a/sys/dev/pv/vmmci.c b/sys/dev/pv/vmmci.c
index 461df5801a1..a95a2e90a3c 100644
--- a/sys/dev/pv/vmmci.c
+++ b/sys/dev/pv/vmmci.c
@@ -120,6 +120,7 @@ vmmci_attach(struct device *parent, struct device *self, 
void *aux)
        }
printf("\n");
+       virtio_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
  }
int


Reply via email to