From: Pavel Tikhomirov <[email protected]> This ioctl is needed for QEMU's CPR (checkpoint-restore) migration of the guest with vhost-vsock device. For this to work, we need to reset the device ownership on the source side by calling RESET_OWNER, and then claim it on the dest side by calling SET_OWNER. We expect not to lose any AF_VSOCK connection while this happens.
RESET_OWNER keeps the guest CID hashed, so that connections survive. That leaves the device reachable by the lockless send path while the worker is being torn down: a concurrent vhost_transport_send_pkt() can call vhost_vq_work_queue() as vhost_workers_free() frees the worker. That might cause a use-after-free of vq->worker. In addition, any work queued onto the dying worker leaves VHOST_WORK_QUEUED stuck, stalling send_pkt_queue after resume. Fence the send path around the teardown: send_pkt() only kicks the worker while the backend is alive (otherwise the skb stays queued and vhost_vsock_start() drains it on resume). And reset_owner() calls synchronize_rcu() after drop_backends() so in-flight senders finish before the worker is freed. Signed-off-by: Pavel Tikhomirov <[email protected]> Signed-off-by: Andrey Drobyshev <[email protected]> --- drivers/vhost/vsock.c | 41 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 81d4f7209719..7d0146cd38d2 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -318,7 +318,14 @@ vhost_transport_send_pkt(struct sk_buff *skb, struct net *net) atomic_inc(&vsock->queued_replies); virtio_vsock_skb_queue_tail(&vsock->send_pkt_queue, skb); - vhost_vq_work_queue(&vsock->vqs[VSOCK_VQ_RX], &vsock->send_pkt_work); + + /* Skip the kick once the backend is gone (stop/RESET_OWNER); the skb + * stays queued and vhost_vsock_start() drains it. Pairs with the + * synchronize_rcu() in vhost_vsock_reset_owner(). + */ + if (data_race(vhost_vq_get_backend(&vsock->vqs[VSOCK_VQ_RX]))) + vhost_vq_work_queue(&vsock->vqs[VSOCK_VQ_RX], + &vsock->send_pkt_work); rcu_read_unlock(); return len; @@ -903,6 +910,36 @@ static int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 features) return -EFAULT; } +static int vhost_vsock_reset_owner(struct vhost_vsock *vsock) +{ + struct vhost_iotlb *umem; + long err; + + mutex_lock(&vsock->dev.mutex); + err = vhost_dev_check_owner(&vsock->dev); + if (err) + goto done; + umem = vhost_dev_reset_owner_prepare(); + if (!umem) { + err = -ENOMEM; + goto done; + } + vhost_vsock_drop_backends(vsock); + + /* Let in-flight send_pkt() callers stop touching the worker before the + * flush + free below. Pairs with the backend check in + * vhost_transport_send_pkt(). + */ + synchronize_rcu(); + + vhost_vsock_flush(vsock); + vhost_dev_stop(&vsock->dev); + vhost_dev_reset_owner(&vsock->dev, umem); +done: + mutex_unlock(&vsock->dev.mutex); + return err; +} + static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl, unsigned long arg) { @@ -946,6 +983,8 @@ static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl, return -EOPNOTSUPP; vhost_set_backend_features(&vsock->dev, features); return 0; + case VHOST_RESET_OWNER: + return vhost_vsock_reset_owner(vsock); default: mutex_lock(&vsock->dev.mutex); r = vhost_dev_ioctl(&vsock->dev, ioctl, argp); -- 2.47.1

