Reviewed-by: Pavel Tikhomirov <[email protected]>

On 6/25/26 17:54, Andrey Drobyshev wrote:
> From: Pavel Tikhomirov <[email protected]>
> 
> This ioctl is needed for QEMU's CPR (checkpoint-restore) migration of
> the guest with vhost-vsock device.  For this to work, we need to reset
> the device ownership on the source side by calling RESET_OWNER, and then
> claim it on the dest side by calling SET_OWNER.  We expect not to lose any
> AF_VSOCK connection while this happens.
> 
> RESET_OWNER keeps the guest CID hashed, so that connections survive. That
> leaves the device reachable by a lockless send/cancel path while the worker
> is being torn down: a concurrent vhost_transport_send_pkt() or
> vhost_transport_cancel_pkt() can call vhost_vq_work_queue() as
> vhost_workers_free() frees the worker.  That might cause a use-after-free
> of vq->worker.  In addition, any work queued onto the dying worker leaves
> VHOST_WORK_QUEUED stuck, stalling send_pkt_queue after resume.
> 
> Fence the send/cancel paths around the teardown: send_pkt()/cancel_pkt()
> only kick the worker while the backend is alive.  And reset_owner() calls
> synchronize_rcu() after drop_backends() so in-flight send/cancel finish
> before the worker is freed.
> 
> Signed-off-by: Pavel Tikhomirov <[email protected]>
> Signed-off-by: Andrey Drobyshev <[email protected]>
> ---
>  drivers/vhost/vsock.c | 51 +++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 49 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
> index 81d4f7209719..f0a0aa7d3200 100644
> --- a/drivers/vhost/vsock.c
> +++ b/drivers/vhost/vsock.c
> @@ -318,7 +318,14 @@ vhost_transport_send_pkt(struct sk_buff *skb, struct net 
> *net)
>               atomic_inc(&vsock->queued_replies);
>  
>       virtio_vsock_skb_queue_tail(&vsock->send_pkt_queue, skb);
> -     vhost_vq_work_queue(&vsock->vqs[VSOCK_VQ_RX], &vsock->send_pkt_work);
> +
> +     /* Skip the kick once the backend is gone (stop/RESET_OWNER); the skb
> +      * stays queued and vhost_vsock_start() drains it. Pairs with the
> +      * synchronize_rcu() in vhost_vsock_reset_owner().
> +      */
> +     if (data_race(vhost_vq_get_backend(&vsock->vqs[VSOCK_VQ_RX])))
> +             vhost_vq_work_queue(&vsock->vqs[VSOCK_VQ_RX],
> +                                 &vsock->send_pkt_work);
>  
>       rcu_read_unlock();
>       return len;
> @@ -346,7 +353,15 @@ vhost_transport_cancel_pkt(struct vsock_sock *vsk)
>               int new_cnt;
>  
>               new_cnt = atomic_sub_return(cnt, &vsock->queued_replies);
> -             if (new_cnt + cnt >= tx_vq->num && new_cnt < tx_vq->num)
> +
> +             /* Skip the kick once the backend is gone (stop/RESET_OWNER):
> +              * vhost_poll_queue() would touch the worker which is being 
> freed
> +              * by teardown, e.g. on RESET_OWNER.  Pairs with the
> +              * synchronize_rcu() in vhost_vsock_reset_owner().  The TX VQ is
> +              * re-kicked by vhost_vsock_start().
> +              */
> +             if (data_race(vhost_vq_get_backend(tx_vq)) &&
> +                 new_cnt + cnt >= tx_vq->num && new_cnt < tx_vq->num)
>                       vhost_poll_queue(&tx_vq->poll);
>       }
>  
> @@ -903,6 +918,36 @@ static int vhost_vsock_set_features(struct vhost_vsock 
> *vsock, u64 features)
>       return -EFAULT;
>  }
>  
> +static int vhost_vsock_reset_owner(struct vhost_vsock *vsock)
> +{
> +     struct vhost_iotlb *umem;
> +     long err;
> +
> +     mutex_lock(&vsock->dev.mutex);
> +     err = vhost_dev_check_owner(&vsock->dev);
> +     if (err)
> +             goto done;
> +     umem = vhost_dev_reset_owner_prepare();
> +     if (!umem) {
> +             err = -ENOMEM;
> +             goto done;
> +     }
> +     vhost_vsock_drop_backends(vsock);
> +
> +     /* Let in-flight send_pkt() callers stop touching the worker before the
> +      * flush + free below. Pairs with the backend check in
> +      * vhost_transport_send_pkt().
> +      */
> +     synchronize_rcu();
> +
> +     vhost_vsock_flush(vsock);
> +     vhost_dev_stop(&vsock->dev);
> +     vhost_dev_reset_owner(&vsock->dev, umem);
> +done:
> +     mutex_unlock(&vsock->dev.mutex);
> +     return err;
> +}
> +
>  static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl,
>                                 unsigned long arg)
>  {
> @@ -946,6 +991,8 @@ static long vhost_vsock_dev_ioctl(struct file *f, 
> unsigned int ioctl,
>                       return -EOPNOTSUPP;
>               vhost_set_backend_features(&vsock->dev, features);
>               return 0;
> +     case VHOST_RESET_OWNER:
> +             return vhost_vsock_reset_owner(vsock);
>       default:
>               mutex_lock(&vsock->dev.mutex);
>               r = vhost_dev_ioctl(&vsock->dev, ioctl, argp);

-- 
Best regards, Pavel Tikhomirov
Senior Software Developer, Virtuozzo.


Reply via email to