__send_to_port() acquires outvq_lock with IRQs disabled via
spin_lock_irqsave(), then spins in virtqueue_get_buf() waiting for the
host to consume the TX descriptor.  When the host is slow to respond
(e.g. under heavy load from concurrent virtio_mem plug operations
during secondary VM boot), the spin never exits.

The watchdog bark ISR fires on another CPU and tries to printk(), which
calls hvc_console_print() -> put_chars() -> __send_to_port(), attempting
to acquire outvq_lock.  With outvq_lock already held and IRQs disabled,
all CPUs stall and the watchdog cannot be pet, triggering a bite.

Add a 200ms deadline using ktime_get_mono_fast_ns() to break out of the
spin loop as a fallback.  Since __send_to_port() may now return before
the host has consumed the TX descriptor, put_chars() is changed to
allocate a struct port_buffer (GFP_ATOMIC) as the virtqueue token
instead of a raw kmemdup() pointer, so that reclaim_consumed_buffers()
can safely call free_buf() on it regardless of whether a timeout
occurred.  put_chars() frees the buffer only when
virtqueue_add_outbuf() fails and the token was never submitted.

Signed-off-by: Peng Yang <[email protected]>
---
Changes in v3:
- Fix token leak in __send_to_port(): capture virtqueue_get_buf() return
  value and call free_buf() after the spin loop exits normally.
- Link to v2: 
https://patch.msgid.link/20260429-add_timeout_to___send_to_port-v2-1-3d23efd6e...@oss.qualcomm.com

Changes in v2:
- Rework put_chars() to allocate full struct port_buffer (GFP_ATOMIC)
  so free_buf() can safely reclaim the token on timeout.
- Transfer buffer ownership to virtqueue on success; free immediately
  on virtqueue_add_outbuf() failure.
- Link to v1: 
https://patch.msgid.link/20260420-add_timeout_to___send_to_port-v1-1-6c32d33bf...@oss.qualcomm.com

To: Amit Shah <[email protected]>
To: Arnd Bergmann <[email protected]>
To: Greg Kroah-Hartman <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
---
 drivers/char/virtio_console.c | 81 +++++++++++++++++++++++++++++++++++--------
 1 file changed, 67 insertions(+), 14 deletions(-)

diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index 9a33217c68d9..4d6b3e144e7f 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -27,6 +27,7 @@
 #include <linux/module.h>
 #include <linux/dma-mapping.h>
 #include <linux/string_choices.h>
+#include <linux/timekeeping.h>
 #include "../tty/hvc/hvc_console.h"
 
 #define is_rproc_enabled IS_ENABLED(CONFIG_REMOTEPROC)
@@ -601,6 +602,8 @@ static ssize_t __send_to_port(struct port *port, struct 
scatterlist *sg,
        int err;
        unsigned long flags;
        unsigned int len;
+       void *token;
+       u64 deadline;
 
        out_vq = port->out_vq;
 
@@ -632,10 +635,20 @@ static ssize_t __send_to_port(struct port *port, struct 
scatterlist *sg,
         * buffer and relax the spinning requirement.  The downside is
         * we need to kmalloc a GFP_ATOMIC buffer each time the
         * console driver writes something out.
+        *
+        * To avoid spinning forever if the host stops processing the
+        * TX virtqueue (e.g. during VM shutdown), a 200ms deadline is
+        * used to break out of the loop as a fallback.
         */
-       while (!virtqueue_get_buf(out_vq, &len)
-               && !virtqueue_is_broken(out_vq))
+       deadline = ktime_get_mono_fast_ns() + 200ULL * NSEC_PER_MSEC;
+       while (!(token = virtqueue_get_buf(out_vq, &len)) &&
+              !virtqueue_is_broken(out_vq)) {
+               if (ktime_get_mono_fast_ns() >= deadline)
+                       break;
                cpu_relax();
+       }
+       if (token)
+               free_buf(token, false);
 done:
        spin_unlock_irqrestore(&port->outvq_lock, flags);
 
@@ -1097,31 +1110,71 @@ static const struct file_operations port_fops = {
 };
 
 /*
- * The put_chars() callback is pretty straightforward.
+ * The put_chars() callback writes characters to the virtio console port.
  *
- * We turn the characters into a scatter-gather list, add it to the
- * output queue and then kick the Host.  Then we sit here waiting for
- * it to finish: inefficient in theory, but in practice
- * implementations will do it immediately.
+ * We allocate a struct port_buffer (with GFP_ATOMIC) to wrap the data so
+ * that reclaim_consumed_buffers() can safely call free_buf() on the token
+ * returned by virtqueue_get_buf(), even if __send_to_port() timed out
+ * before observing the used-ring update.
+ *
+ * On success, ownership of the buffer is transferred to the virtqueue as
+ * the descriptor token; it will be reclaimed by reclaim_consumed_buffers().
+ * On failure (virtqueue_add_outbuf() error), the buffer was never submitted
+ * and must be freed explicitly here.
  */
 static ssize_t put_chars(u32 vtermno, const u8 *buf, size_t count)
 {
        struct port *port;
        struct scatterlist sg[1];
-       void *data;
-       int ret;
+       struct port_buffer *pbuf;
+       ssize_t ret;
+
+       if (!count)
+               return 0;
 
        port = find_port_by_vtermno(vtermno);
        if (!port)
                return -EPIPE;
 
-       data = kmemdup(buf, count, GFP_ATOMIC);
-       if (!data)
+       /*
+        * Allocate a struct port_buffer with GFP_ATOMIC so that
+        * reclaim_consumed_buffers() can safely call free_buf() on the token
+        * returned by virtqueue_get_buf(), whether or not __send_to_port()
+        * timed out.  alloc_buf() uses GFP_KERNEL internally, so we open-code
+        * the allocation here.
+        */
+       pbuf = kmalloc(struct_size(pbuf, sg, 0), GFP_ATOMIC);
+       if (!pbuf)
                return -ENOMEM;
 
-       sg_init_one(sg, data, count);
-       ret = __send_to_port(port, sg, 1, count, data, false);
-       kfree(data);
+       pbuf->buf = kmalloc(count, GFP_ATOMIC);
+       if (!pbuf->buf) {
+               kfree(pbuf);
+               return -ENOMEM;
+       }
+       pbuf->dev = NULL;
+       pbuf->sgpages = 0;
+       pbuf->len = count;
+       pbuf->offset = 0;
+       pbuf->size = count;
+       memcpy(pbuf->buf, buf, count);
+
+       sg_init_one(sg, pbuf->buf, count);
+       ret = __send_to_port(port, sg, 1, count, pbuf, false);
+
+       /*
+        * If virtqueue_add_outbuf() failed inside __send_to_port() (ret <= 0),
+        * the token was never submitted to the virtqueue, so reclaim_consumed_
+        * buffers() will never see it.  Free pbuf explicitly in that case.
+        *
+        * On success (ret > 0), ownership of pbuf has been transferred to the
+        * virtqueue as the descriptor token.  It will be reclaimed and freed
+        * by reclaim_consumed_buffers() -> free_buf() when the host marks the
+        * descriptor as used, even if __send_to_port() timed out before
+        * observing the used-ring update.  Do NOT free pbuf here in that case.
+        */
+       if (ret <= 0)
+               free_buf(pbuf, false);
        return ret;
 }
 

---
base-commit: 97e797263a5e963da3d1e66e743fd518567dfe37
change-id: 20260420-add_timeout_to___send_to_port-104ce7bcf241

Best regards,
--  
Peng Yang <[email protected]>


Reply via email to