[PATCH] virtio-net: fix the race between refill work and close

Jason Wang Tue, 28 Jun 2022 02:03:45 -0700

We try using cancel_delayed_work_sync() to prevent the work from
enabling NAPI. This is insufficient since we don't disable the the
source the scheduling of the refill work. This means an NAPI after
cancel_delayed_work_sync() can schedule the refill work then can
re-enable the NAPI that leads to use-after-free [1].


Since the work can enable NAPI, we can't simply disable NAPI before
calling cancel_delayed_work_sync(). So fix this by introducing a
dedicated boolean to control whether or not the work could be
scheduled from NAPI.

[1]
==================================================================
BUG: KASAN: use-after-free in refill_work+0x43/0xd4
Read of size 2 at addr ffff88810562c92e by task kworker/2:1/42

CPU: 2 PID: 42 Comm: kworker/2:1 Not tainted 5.19.0-rc1+ #480
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 
rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014
Workqueue: events refill_work
Call Trace:
 <TASK>
 dump_stack_lvl+0x34/0x44
 print_report.cold+0xbb/0x6ac
 ? _printk+0xad/0xde
 ? refill_work+0x43/0xd4
 kasan_report+0xa8/0x130
 ? refill_work+0x43/0xd4
 refill_work+0x43/0xd4
 process_one_work+0x43d/0x780
 worker_thread+0x2a0/0x6f0
 ? process_one_work+0x780/0x780
 kthread+0x167/0x1a0
 ? kthread_exit+0x50/0x50
 ret_from_fork+0x22/0x30
 </TASK>
...

Fixes: b2baed69e605c ("virtio_net: set/cancel work on ndo_open/ndo_stop")
Signed-off-by: Jason Wang <[email protected]>
---
 drivers/net/virtio_net.c | 38 ++++++++++++++++++++++++++++++++++++--
 1 file changed, 36 insertions(+), 2 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index db05b5e930be..21bf1e5c81ef 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -251,6 +251,12 @@ struct virtnet_info {
        /* Does the affinity hint is set for virtqueues? */
        bool affinity_hint_set;
 
+       /* Is refill work enabled? */
+       bool refill_work_enabled;
+
+       /* The lock to synchronize the access to refill_work_enabled */
+       spinlock_t refill_lock;
+
        /* CPU hotplug instances for online & dead */
        struct hlist_node node;
        struct hlist_node node_dead;
@@ -348,6 +354,20 @@ static struct page *get_a_page(struct receive_queue *rq, 
gfp_t gfp_mask)
        return p;
 }
 
+static void enable_refill_work(struct virtnet_info *vi)
+{
+       spin_lock(&vi->refill_lock);
+       vi->refill_work_enabled = true;
+       spin_unlock(&vi->refill_lock);
+}
+
+static void disable_refill_work(struct virtnet_info *vi)
+{
+       spin_lock(&vi->refill_lock);
+       vi->refill_work_enabled = false;
+       spin_unlock(&vi->refill_lock);
+}
+
 static void virtqueue_napi_schedule(struct napi_struct *napi,
                                    struct virtqueue *vq)
 {
@@ -1527,8 +1547,12 @@ static int virtnet_receive(struct receive_queue *rq, int 
budget,
        }
 
        if (rq->vq->num_free > min((unsigned int)budget, 
virtqueue_get_vring_size(rq->vq)) / 2) {
-               if (!try_fill_recv(vi, rq, GFP_ATOMIC))
-                       schedule_delayed_work(&vi->refill, 0);
+               if (!try_fill_recv(vi, rq, GFP_ATOMIC)) {
+                       spin_lock(&vi->refill_lock);
+                       if (vi->refill_work_enabled)
+                               schedule_delayed_work(&vi->refill, 0);
+                       spin_unlock(&vi->refill_lock);
+               }
        }
 
        u64_stats_update_begin(&rq->stats.syncp);
@@ -1651,6 +1675,8 @@ static int virtnet_open(struct net_device *dev)
        struct virtnet_info *vi = netdev_priv(dev);
        int i, err;
 
+       enable_refill_work(vi);
+
        for (i = 0; i < vi->max_queue_pairs; i++) {
                if (i < vi->curr_queue_pairs)
                        /* Make sure we have some buffers: if oom use wq. */
@@ -2033,6 +2059,8 @@ static int virtnet_close(struct net_device *dev)
        struct virtnet_info *vi = netdev_priv(dev);
        int i;
 
+       /* Make sure NAPI doesn't schedule refill work */
+       disable_refill_work(vi);
        /* Make sure refill_work doesn't re-enable napi! */
        cancel_delayed_work_sync(&vi->refill);
 
@@ -2776,6 +2804,9 @@ static void virtnet_freeze_down(struct virtio_device 
*vdev)
        netif_tx_lock_bh(vi->dev);
        netif_device_detach(vi->dev);
        netif_tx_unlock_bh(vi->dev);
+       /* Make sure NAPI doesn't schedule refill work */
+       disable_refill_work(vi);
+       /* Make sure refill_work doesn't re-enable napi! */
        cancel_delayed_work_sync(&vi->refill);
 
        if (netif_running(vi->dev)) {
@@ -2799,6 +2830,8 @@ static int virtnet_restore_up(struct virtio_device *vdev)
 
        virtio_device_ready(vdev);
 
+       enable_refill_work(vi);
+
        if (netif_running(vi->dev)) {
                for (i = 0; i < vi->curr_queue_pairs; i++)
                        if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))
@@ -3548,6 +3581,7 @@ static int virtnet_probe(struct virtio_device *vdev)
        vdev->priv = vi;
 
        INIT_WORK(&vi->config_work, virtnet_config_changed_work);
+       spin_lock_init(&vi->refill_lock);
 
        /* If we can receive ANY GSO packets, we must allocate large ones. */
        if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
-- 
2.25.1

_______________________________________________
Virtualization mailing list
[email protected]
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH] virtio-net: fix the race between refill work and close

Reply via email to