On Sun, Nov 9, 2025 at 3:27 PM Shani Peretz <[email protected]> wrote:
>
> This commit fixes a use-after-free that causes the application to crash
> on shutdown (detected by ASAN).
>
> The vhost library uses a background event dispatch thread that monitors
> fds with epoll. It runs in an infinite loop, waiting for I/O events
> and calling callbacks when they occur.
>
> During cleanup, a race condition existed:
>
>   Main Thread:                    Event Dispatch Thread:
>   1. Remove fds from fdset        while (1) {
>   2. Close file descriptors           epoll_wait() [gets interrupted]
>   3. Free fdset memory                [continues loop]
>   4. Continue...                      Accesses fdset...   CRASH
>                                   }
>
> There isn't explicit cleanup of the fdset structure.
> The fdset structue is allocated with rte_zmalloc() and the memory would

structure

> only be reclaimed at application shutdown when rte_eal_cleanup() is called,
> which invokes rte_eal_memory_detach() to unmap all the hugepage memory.
> Meanwhile, the event dispatch thread could still be running and accessing
> the fdset.
>
> The code had a `destroy` flag that the event dispatch thread checked,
> but it was never set during cleanup, and the code never waited for
> the thread to actually exit before freeing memory.
>
> To fix this, the commit implements `fdset_destroy()` that will set the
> destroy flag, wait for thread termination, and clean up all resources.
>
> The socket.c and vduse.c are updated to call fdset_destroy() when
> the last socket/device is unregistered.
>
> For vduse, reference counting was added to track the number of devices
> using the fdset. The fdset is destroyed when the last device is removed.

I believe the vduse change is unrelated.
Currently, once a vduse device has been created, we never destroy the fdset.

We can support destroying the vduse fdset once, but it should be in a
dedicated patch.
And it might not need to be a fix, even though I don't have a strong opinion.

> Fixes: 0e38b42bf61c ("vhost: manage FD with epoll")
> Cc: [email protected]
>
> Signed-off-by: Shani Peretz <[email protected]>
>
>
>
> ----------
> v2:
> - Extended the fix to vduse.c, added reference counting and mutex to vduse
>   structure to track the number of devices using the fdset
> - Call fdset_destroy() when last device is removed in vduse_device_destroy()
>   and in error paths of vduse_device_create()
> - Added mutex protection when checking/setting the destroy flag to prevent
>   race conditions in both fdset_event_dispatch() and fdset_destroy()
>
> ---
>  lib/vhost/fd_man.c | 45 ++++++++++++++++++++++++++++++++++++-
>  lib/vhost/fd_man.h |  1 +
>  lib/vhost/socket.c |  7 ++++++
>  lib/vhost/vduse.c  | 56 +++++++++++++++++++++++++++++++++++++---------
>  4 files changed, 98 insertions(+), 11 deletions(-)
>
> diff --git a/lib/vhost/fd_man.c b/lib/vhost/fd_man.c
> index f9147edee7..b4597dec75 100644
> --- a/lib/vhost/fd_man.c
> +++ b/lib/vhost/fd_man.c
> @@ -387,9 +387,52 @@ fdset_event_dispatch(void *arg)
>                         }
>                 }
>
> -               if (pfdset->destroy)
> +               pthread_mutex_lock(&pfdset->fd_mutex);
> +               bool should_destroy = pfdset->destroy;
> +               pthread_mutex_unlock(&pfdset->fd_mutex);
> +               if (should_destroy)
>                         break;
>         }
>
>         return 0;
>  }
> +
> +/**
> + * Destroy the fdset and stop its event dispatch thread.
> + */
> +void
> +fdset_destroy(struct fdset *pfdset)
> +{
> +       uint32_t val;
> +       int i;
> +
> +       if (pfdset == NULL)
> +               return;
> +
> +       /* Signal the event dispatch thread to stop */
> +       pthread_mutex_lock(&pfdset->fd_mutex);
> +       pfdset->destroy = true;
> +       pthread_mutex_unlock(&pfdset->fd_mutex);
> +
> +       /* Wait for the event dispatch thread to finish */
> +       rte_thread_join(pfdset->tid, &val);
> +
> +       /* Close the epoll file descriptor */
> +       close(pfdset->epfd);
> +
> +       /* Destroy the mutex */
> +       pthread_mutex_destroy(&pfdset->fd_mutex);
> +
> +       /* Remove from global registry */
> +       pthread_mutex_lock(&fdsets_mutex);
> +       for (i = 0; i < MAX_FDSETS; i++) {
> +               if (fdsets[i] == pfdset) {
> +                       fdsets[i] = NULL;
> +                       break;
> +               }
> +       }
> +       pthread_mutex_unlock(&fdsets_mutex);
> +
> +       /* Free the fdset structure */
> +       rte_free(pfdset);
> +}
> diff --git a/lib/vhost/fd_man.h b/lib/vhost/fd_man.h
> index eadcc6fb42..ed2109f3c8 100644
> --- a/lib/vhost/fd_man.h
> +++ b/lib/vhost/fd_man.h
> @@ -21,5 +21,6 @@ int fdset_add(struct fdset *pfdset, int fd,
>
>  void fdset_del(struct fdset *pfdset, int fd);
>  int fdset_try_del(struct fdset *pfdset, int fd);
> +void fdset_destroy(struct fdset *pfdset);
>
>  #endif
> diff --git a/lib/vhost/socket.c b/lib/vhost/socket.c
> index 9b4f332f94..0240da8ff0 100644
> --- a/lib/vhost/socket.c
> +++ b/lib/vhost/socket.c
> @@ -1141,6 +1141,13 @@ rte_vhost_driver_unregister(const char *path)
>                 count = --vhost_user.vsocket_cnt;
>                 vhost_user.vsockets[i] = vhost_user.vsockets[count];
>                 vhost_user.vsockets[count] = NULL;
> +
> +               /* Check if we need to destroy the vhost fdset */
> +               if (vhost_user.vsocket_cnt == 0 && vhost_user.fdset != NULL) {
> +                       fdset_destroy(vhost_user.fdset);
> +                       vhost_user.fdset = NULL;
> +               }
> +
>                 pthread_mutex_unlock(&vhost_user.mutex);
>                 return 0;
>         }
> diff --git a/lib/vhost/vduse.c b/lib/vhost/vduse.c
> index 68e56843fd..f255f717ab 100644
> --- a/lib/vhost/vduse.c
> +++ b/lib/vhost/vduse.c
> @@ -30,9 +30,15 @@
>
>  struct vduse {
>         struct fdset *fdset;
> +       int device_cnt;
> +       pthread_mutex_t mutex;
>  };
>
> -static struct vduse vduse;
> +static struct vduse vduse = {
> +       .fdset = NULL,
> +       .device_cnt = 0,
> +       .mutex = PTHREAD_MUTEX_INITIALIZER,
> +};
>
>  static const char * const vduse_reqs_str[] = {
>         "VDUSE_GET_VQ_STATE",
> @@ -683,19 +689,16 @@ vduse_device_create(const char *path, bool 
> compliant_ol_flags)
>         const char *name = path + strlen("/dev/vduse/");
>         bool reconnect = false;
>
> -       if (vduse.fdset == NULL) {
> -               vduse.fdset = fdset_init("vduse-evt");
> -               if (vduse.fdset == NULL) {
> -                       VHOST_CONFIG_LOG(path, ERR, "failed to init VDUSE 
> fdset");
> -                       return -1;
> -               }
> -       }
> +       pthread_mutex_lock(&vduse.mutex);
> +       vduse.device_cnt++;
> +       pthread_mutex_unlock(&vduse.mutex);
>
>         control_fd = open(VDUSE_CTRL_PATH, O_RDWR);
>         if (control_fd < 0) {
>                 VHOST_CONFIG_LOG(name, ERR, "Failed to open %s: %s",
>                                 VDUSE_CTRL_PATH, strerror(errno));
> -               return -1;
> +               ret = -1;
> +               goto out_dec_cnt;
>         }
>
>         if (ioctl(control_fd, VDUSE_SET_API_VERSION, &ver)) {
> @@ -845,6 +848,19 @@ vduse_device_create(const char *path, bool 
> compliant_ol_flags)
>
>         dev->cvq = dev->virtqueue[max_queue_pairs * 2];
>
> +       /* Only allocate when we know device creation will succeed */
> +       pthread_mutex_lock(&vduse.mutex);
> +       if (vduse.fdset == NULL) {
> +               vduse.fdset = fdset_init("vduse-evt");
> +               if (vduse.fdset == NULL) {
> +                       VHOST_CONFIG_LOG(path, ERR, "failed to init VDUSE 
> fdset");
> +                       pthread_mutex_unlock(&vduse.mutex);
> +                       ret = -1;
> +                       goto out_log_unmap;
> +               }
> +       }
> +       pthread_mutex_unlock(&vduse.mutex);
> +
>         ret = fdset_add(vduse.fdset, dev->vduse_dev_fd, vduse_events_handler, 
> NULL, dev);
>         if (ret) {
>                 VHOST_CONFIG_LOG(name, ERR, "Failed to add fd %d to vduse 
> fdset",
> @@ -861,6 +877,8 @@ vduse_device_create(const char *path, bool 
> compliant_ol_flags)
>         return 0;
>
>  out_log_unmap:
> +       if (vduse.fdset != NULL)
> +               fdset_del(vduse.fdset, dev->vduse_dev_fd);
>         munmap(dev->reconnect_log, sizeof(*dev->reconnect_log));
>  out_dev_destroy:
>         vhost_destroy_device(vid);
> @@ -870,6 +888,14 @@ vduse_device_create(const char *path, bool 
> compliant_ol_flags)
>         ioctl(control_fd, VDUSE_DESTROY_DEV, name);
>  out_ctrl_close:
>         close(control_fd);
> +out_dec_cnt:
> +       pthread_mutex_lock(&vduse.mutex);
> +       vduse.device_cnt--;
> +       if (vduse.device_cnt == 0 && vduse.fdset != NULL) {
> +               fdset_destroy(vduse.fdset);
> +               vduse.fdset = NULL;
> +       }
> +       pthread_mutex_unlock(&vduse.mutex);
>
>         return ret;
>  }
> @@ -899,7 +925,17 @@ vduse_device_destroy(const char *path)
>
>         vduse_device_stop(dev);
>
> -       fdset_del(vduse.fdset, dev->vduse_dev_fd);
> +       if (vduse.fdset != NULL)
> +               fdset_del(vduse.fdset, dev->vduse_dev_fd);
> +
> +       /* Check if we need to destroy the vduse fdset */
> +       pthread_mutex_lock(&vduse.mutex);
> +       vduse.device_cnt--;
> +       if (vduse.device_cnt == 0 && vduse.fdset != NULL) {
> +               fdset_destroy(vduse.fdset);
> +               vduse.fdset = NULL;
> +       }
> +       pthread_mutex_unlock(&vduse.mutex);
>
>         if (dev->vduse_dev_fd >= 0) {
>                 close(dev->vduse_dev_fd);
> --
> 2.34.1
>

Reply via email to