From: Pavel Tikhomirov <[email protected]>

This ioctl is needed for QEMU's CPR (checkpoint-restore) migration of
the guest with vhost-vsock device.  For this to work, we need to reset
the device ownership on the source side by calling RESET_OWNER, and then
claim it on the dest side by calling SET_OWNER.  We expect not to lose any
AF_VSOCK connection while this happens.

RESET_OWNER keeps the guest CID hashed, so that connections survive. That
leaves the device reachable by a lockless send/cancel path while the worker
is being torn down: a concurrent vhost_transport_send_pkt() or
vhost_transport_cancel_pkt() can call vhost_vq_work_queue() as
vhost_workers_free() frees the worker.  That might cause a use-after-free
of vq->worker.  In addition, any work queued onto the dying worker leaves
VHOST_WORK_QUEUED stuck, stalling send_pkt_queue after resume.

Fence the send/cancel paths around the teardown: send_pkt()/cancel_pkt()
only kick the worker while the backend is alive.  And reset_owner() calls
synchronize_rcu() after drop_backends() so in-flight send/cancel finish
before the worker is freed.

Signed-off-by: Pavel Tikhomirov <[email protected]>
Signed-off-by: Andrey Drobyshev <[email protected]>
---
 drivers/vhost/vsock.c | 51 +++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 49 insertions(+), 2 deletions(-)

diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index 81d4f7209719..f0a0aa7d3200 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -318,7 +318,14 @@ vhost_transport_send_pkt(struct sk_buff *skb, struct net 
*net)
                atomic_inc(&vsock->queued_replies);
 
        virtio_vsock_skb_queue_tail(&vsock->send_pkt_queue, skb);
-       vhost_vq_work_queue(&vsock->vqs[VSOCK_VQ_RX], &vsock->send_pkt_work);
+
+       /* Skip the kick once the backend is gone (stop/RESET_OWNER); the skb
+        * stays queued and vhost_vsock_start() drains it. Pairs with the
+        * synchronize_rcu() in vhost_vsock_reset_owner().
+        */
+       if (data_race(vhost_vq_get_backend(&vsock->vqs[VSOCK_VQ_RX])))
+               vhost_vq_work_queue(&vsock->vqs[VSOCK_VQ_RX],
+                                   &vsock->send_pkt_work);
 
        rcu_read_unlock();
        return len;
@@ -346,7 +353,15 @@ vhost_transport_cancel_pkt(struct vsock_sock *vsk)
                int new_cnt;
 
                new_cnt = atomic_sub_return(cnt, &vsock->queued_replies);
-               if (new_cnt + cnt >= tx_vq->num && new_cnt < tx_vq->num)
+
+               /* Skip the kick once the backend is gone (stop/RESET_OWNER):
+                * vhost_poll_queue() would touch the worker which is being 
freed
+                * by teardown, e.g. on RESET_OWNER.  Pairs with the
+                * synchronize_rcu() in vhost_vsock_reset_owner().  The TX VQ is
+                * re-kicked by vhost_vsock_start().
+                */
+               if (data_race(vhost_vq_get_backend(tx_vq)) &&
+                   new_cnt + cnt >= tx_vq->num && new_cnt < tx_vq->num)
                        vhost_poll_queue(&tx_vq->poll);
        }
 
@@ -903,6 +918,36 @@ static int vhost_vsock_set_features(struct vhost_vsock 
*vsock, u64 features)
        return -EFAULT;
 }
 
+static int vhost_vsock_reset_owner(struct vhost_vsock *vsock)
+{
+       struct vhost_iotlb *umem;
+       long err;
+
+       mutex_lock(&vsock->dev.mutex);
+       err = vhost_dev_check_owner(&vsock->dev);
+       if (err)
+               goto done;
+       umem = vhost_dev_reset_owner_prepare();
+       if (!umem) {
+               err = -ENOMEM;
+               goto done;
+       }
+       vhost_vsock_drop_backends(vsock);
+
+       /* Let in-flight send_pkt() callers stop touching the worker before the
+        * flush + free below. Pairs with the backend check in
+        * vhost_transport_send_pkt().
+        */
+       synchronize_rcu();
+
+       vhost_vsock_flush(vsock);
+       vhost_dev_stop(&vsock->dev);
+       vhost_dev_reset_owner(&vsock->dev, umem);
+done:
+       mutex_unlock(&vsock->dev.mutex);
+       return err;
+}
+
 static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl,
                                  unsigned long arg)
 {
@@ -946,6 +991,8 @@ static long vhost_vsock_dev_ioctl(struct file *f, unsigned 
int ioctl,
                        return -EOPNOTSUPP;
                vhost_set_backend_features(&vsock->dev, features);
                return 0;
+       case VHOST_RESET_OWNER:
+               return vhost_vsock_reset_owner(vsock);
        default:
                mutex_lock(&vsock->dev.mutex);
                r = vhost_dev_ioctl(&vsock->dev, ioctl, argp);
-- 
2.47.1


Reply via email to