virtio_gpu_queue_ctrl_sgs() and virtio_gpu_queue_cursor() use
wait_event() without any abort condition when waiting for virtqueue
space. If the host device stops processing commands, these waits block
indefinitely inside a drm_dev_enter/exit() critical section. Since
drm_dev_unplug(), which is called in device removal and system shutdown
call path, blocks on synchronize_srcu() until all critical sections
complete, device removal and system shutdown also hang.

Add a vqs_released flag to virtio_gpu_device and include it in the
wait_event() condition. Set the flag and wake up both queues in a new
virtio_gpu_release_vqs() helper, called before drm_dev_unplug() in both
virtio_gpu_remove() and virtio_gpu_shutdown(). When the flag is set, the
wait returns immediately and the command is aborted, following the same
cleanup path as drm_dev_enter() failure.

Reported-by: 
syzbot+d6dd6f86d3aaf7eebe7406e45c1c6e549453f...@syzkaller.appspotmail.com
Closes: 
https://syzkaller.appspot.com/bug?id=d6dd6f86d3aaf7eebe7406e45c1c6e549453f224
Reported-by: 
syzbot+908bd910da5dd79b88de4cf7baf376cc873a9...@syzkaller.appspotmail.com
Closes: 
https://syzkaller.appspot.com/bug?id=908bd910da5dd79b88de4cf7baf376cc873a922e
Signed-off-by: Ryosuke Yasuoka <[email protected]>
---
Changes in v2:
- Update the commit message.
- Replace wait_event_timeout() with wait_event() using a compound
condition that includes a new vqs_released flag.
- Add virtio_gpu_release_vqs() helper to set the flag and wake up
both queues, called before drm_dev_unplug() in remove and shutdown
paths.
- Remove the hardcoded 5-second timeout. Recovery is now driven by
the driver flag instead of an arbitrary timeout value.
---
 drivers/gpu/drm/virtio/virtgpu_drv.c | 15 +++++++++++++++
 drivers/gpu/drm/virtio/virtgpu_drv.h |  1 +
 drivers/gpu/drm/virtio/virtgpu_vq.c  | 23 +++++++++++++++++++++--
 3 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c 
b/drivers/gpu/drm/virtio/virtgpu_drv.c
index a5ce96fb8a1d..e4fe5e0780f9 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.c
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.c
@@ -119,10 +119,24 @@ static int virtio_gpu_probe(struct virtio_device *vdev)
        return ret;
 }
 
+/*
+ * Release pending virtqueue waits so the drm_dev_enter/exit() critical
+ * sections complete before drm_dev_unplug() blocks on synchronize_srcu().
+ */
+static void virtio_gpu_release_vqs(struct drm_device *dev)
+{
+       struct virtio_gpu_device *vgdev = dev->dev_private;
+
+       vgdev->vqs_released = true;
+       wake_up_all(&vgdev->ctrlq.ack_queue);
+       wake_up_all(&vgdev->cursorq.ack_queue);
+}
+
 static void virtio_gpu_remove(struct virtio_device *vdev)
 {
        struct drm_device *dev = vdev->priv;
 
+       virtio_gpu_release_vqs(dev);
        drm_dev_unplug(dev);
        drm_atomic_helper_shutdown(dev);
        virtio_gpu_deinit(dev);
@@ -133,6 +147,7 @@ static void virtio_gpu_shutdown(struct virtio_device *vdev)
 {
        struct drm_device *dev = vdev->priv;
 
+       virtio_gpu_release_vqs(dev);
        /* stop talking to the device */
        drm_dev_unplug(dev);
 }
diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h 
b/drivers/gpu/drm/virtio/virtgpu_drv.h
index f17660a71a3e..0bd69a40857e 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.h
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.h
@@ -235,6 +235,7 @@ struct virtio_gpu_device {
 
        struct virtio_gpu_queue ctrlq;
        struct virtio_gpu_queue cursorq;
+       bool vqs_released;
        struct kmem_cache *vbufs;
 
        atomic_t pending_commands;
diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c 
b/drivers/gpu/drm/virtio/virtgpu_vq.c
index 67865810a2e7..8057a9b7356d 100644
--- a/drivers/gpu/drm/virtio/virtgpu_vq.c
+++ b/drivers/gpu/drm/virtio/virtgpu_vq.c
@@ -396,7 +396,19 @@ static int virtio_gpu_queue_ctrl_sgs(struct 
virtio_gpu_device *vgdev,
        if (vq->num_free < elemcnt) {
                spin_unlock(&vgdev->ctrlq.qlock);
                virtio_gpu_notify(vgdev);
-               wait_event(vgdev->ctrlq.ack_queue, vq->num_free >= elemcnt);
+               wait_event(vgdev->ctrlq.ack_queue,
+                          vq->num_free >= elemcnt || vgdev->vqs_released);
+               /*
+                * Set by virtio_gpu_release_vqs() to unblock
+                * synchronize_srcu() wait in drm_dev_unplug().
+                */
+               if (vgdev->vqs_released) {
+                       if (fence && vbuf->objs)
+                               virtio_gpu_array_unlock_resv(vbuf->objs);
+                       free_vbuf(vgdev, vbuf);
+                       drm_dev_exit(idx);
+                       return -ENODEV;
+               }
                goto again;
        }
 
@@ -566,7 +578,14 @@ static void virtio_gpu_queue_cursor(struct 
virtio_gpu_device *vgdev,
        ret = virtqueue_add_sgs(vq, sgs, outcnt, 0, vbuf, GFP_ATOMIC);
        if (ret == -ENOSPC) {
                spin_unlock(&vgdev->cursorq.qlock);
-               wait_event(vgdev->cursorq.ack_queue, vq->num_free >= outcnt);
+               wait_event(vgdev->cursorq.ack_queue,
+                          vq->num_free >= outcnt || vgdev->vqs_released);
+               /* See comment in virtio_gpu_queue_ctrl_sgs(). */
+               if (vgdev->vqs_released) {
+                       free_vbuf(vgdev, vbuf);
+                       drm_dev_exit(idx);
+                       return;
+               }
                spin_lock(&vgdev->cursorq.qlock);
                goto retry;
        } else {

---
base-commit: 5200f5f493f79f14bbdc349e402a40dfb32f23c8
change-id: 20260518-virtio-gpu_wait_event-5aa060754f12

Best regards,
-- 
Ryosuke Yasuoka <[email protected]>


Reply via email to