From: Sungho Bae <[email protected]>

With no_console_suspend enabled, hvc console output can continue while
virtio_console is freezing. In that window, put_chars can still enqueue
buffers to the output virtqueue while virtcons_freeze is tearing queues
down, triggering a BUG_ON in virtqueue_detach_unused_buf_split:

  BUG_ON(vq->vq.num_free != vq->split.vring.num)

Add a pm_freezing flag to ports_device. Set it via smp_store_release()
at the start of virtcons_freeze(); put_chars() and __send_to_port() drop
output while the flag is set, checked via smp_load_acquire().

The check in __send_to_port() is placed under outvq_lock, making it
atomic with remove_port_data() which also acquires outvq_lock. Once
remove_port_data() returns for a given port, no concurrent
__send_to_port() can add buffers before remove_vqs() tears down the vq.

After setting pm_freezing, acquire and release outvq_lock for each port
(protected by ports_lock to prevent list manipulation races) before
calling virtio_reset_device(). A TX thread that already passed the
pm_freezing check may still hold outvq_lock while spinning for host
acknowledgment; the drain loop ensures all such threads have completed
before the device is reset.

Clear pm_freezing in virtcons_restore() only after all port->out_vq
pointers have been reassigned to the newly allocated virtqueues,
preventing TX paths from dereferencing freed vqs during restore.

Link: https://sashiko.dev/#/patchset/20260519162242.7324-1-baver.bae%40gmail.com
Signed-off-by: Sungho Bae <[email protected]>
---
 drivers/char/virtio_console.c | 70 ++++++++++++++++++++++++++++++++---
 1 file changed, 64 insertions(+), 6 deletions(-)

diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index dd31f7069e19..482d57a311c6 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -157,6 +157,12 @@ struct ports_device {
 
        /* Major number for this device.  Ports will be created as minors. */
        int chr_major;
+
+       /*
+        * Set to true during PM freeze to block TX paths that may race
+        * with virtqueue teardown (e.g. hvc put_chars with no_console_suspend).
+        */
+       bool pm_freezing;
 };
 
 struct port_stats {
@@ -611,6 +617,17 @@ static ssize_t __send_to_port(struct port *port, struct 
scatterlist *sg,
                goto free_and_done;
        }
 
+       /*
+        * Check freeze flag under the lock so that the flag check and
+        * virtqueue_add_outbuf() are atomic with respect to
+        * remove_port_data() which also takes outvq_lock.  This
+        * guarantees that once remove_port_data() returns, no new
+        * buffers can be added before remove_vqs() tears down the vq.
+        * Pairs with smp_store_release() in virtcons_freeze/restore.
+        */
+       if (smp_load_acquire(&portdev->pm_freezing)) /* pairs with 
freeze/restore */
+               goto free_and_done;
+
        out_vq = port->out_vq;
 
        reclaim_consumed_buffers(port);
@@ -1125,14 +1142,27 @@ static ssize_t put_chars(u32 vtermno, const u8 *buf, 
size_t count)
                return -EPIPE;
 
        /*
-        * Silently drop output if device hot-unplug is in progress.
-        * portdev was NULLed by unplug_port() after hvc_remove() was
-        * already called, so the hvc layer will stop invoking put_chars()
-        * very soon. Returning count avoids a pointless retry loop in the
-        * interim.
+        * Silently drop output in two cases, both by returning count so
+        * that the hvc layer does not spin-retry:
+        *
+        *  1. Device hot-unplug (!portdev): portdev was NULLed by
+        *     unplug_port() after hvc_remove() was already called, so
+        *     the hvc layer will stop invoking put_chars() very soon.
+        *     Returning count avoids a pointless retry loop in the
+        *     interim.
+        *
+        *  2. PM freeze (pm_freezing): the hvc console stays active
+        *     under no_console_suspend but virtqueues are being torn
+        *     down.  Drop the output silently so the hvc layer does not
+        *     stall suspend.
+        *
+        * This early check avoids a pointless GFP_ATOMIC allocation;
+        * __send_to_port() rechecks under outvq_lock for correctness.
+        * Pairs with smp_store_release() in virtcons_freeze/restore.
         */
        portdev = READ_ONCE(port->portdev);
-       if (!portdev)
+       if (!portdev ||
+           smp_load_acquire(&portdev->pm_freezing)) /* pairs with 
freeze/restore */
                return count;
 
        pbuf = alloc_buf(portdev->vdev, count, 0, GFP_ATOMIC);
@@ -2002,6 +2032,7 @@ static int virtcons_probe(struct virtio_device *vdev)
        /* Attach this portdev to this virtio_device, and vice-versa. */
        portdev->vdev = vdev;
        vdev->priv = portdev;
+       portdev->pm_freezing = false;
 
        portdev->chr_major = register_chrdev(0, "virtio-portsdev",
                                             &portdev_fops);
@@ -2119,9 +2150,30 @@ static int virtcons_freeze(struct virtio_device *vdev)
 {
        struct ports_device *portdev;
        struct port *port;
+       unsigned long flags;
 
        portdev = vdev->priv;
 
+       /*
+        * Block TX paths (put_chars, __send_to_port) before resetting the
+        * device and tearing down virtqueues.  This prevents races with
+        * hvc console writes that remain active under no_console_suspend.
+        */
+       smp_store_release(&portdev->pm_freezing, true);
+
+       /*
+        * Synchronize with any concurrent __send_to_port() that may have
+        * passed the pm_freezing check. By acquiring and releasing the
+        * outvq_lock for each port, we ensure all active TX paths have
+        * completed before we reset the device.
+        */
+       spin_lock_irqsave(&portdev->ports_lock, flags);
+       list_for_each_entry(port, &portdev->ports, list) {
+               spin_lock(&port->outvq_lock);
+               spin_unlock(&port->outvq_lock);
+       }
+       spin_unlock_irqrestore(&portdev->ports_lock, flags);
+
        virtio_reset_device(vdev);
 
        if (use_multiport(portdev))
@@ -2192,6 +2244,12 @@ static int virtcons_restore(struct virtio_device *vdev)
        if (use_multiport(portdev))
                fill_queue(portdev->c_ivq, &portdev->c_ivq_lock);
 
+       /*
+        * Allow TX paths only after all port->out_vq pointers have
+        * been reassigned to the newly allocated virtqueues.
+        */
+       smp_store_release(&portdev->pm_freezing, false);
+
        return 0;
 }
 #endif
-- 
2.34.1


Reply via email to