On Sun, Nov 9, 2025 at 3:27 PM Shani Peretz <[email protected]> wrote:
>
> This commit fixes a use-after-free that causes the application to crash
> on shutdown (detected by ASAN).
>
> The vhost library uses a background event dispatch thread that monitors
> fds with epoll. It runs in an infinite loop, waiting for I/O events
> and calling callbacks when they occur.
>
> During cleanup, a race condition existed:
>
> Main Thread: Event Dispatch Thread:
> 1. Remove fds from fdset while (1) {
> 2. Close file descriptors epoll_wait() [gets interrupted]
> 3. Free fdset memory [continues loop]
> 4. Continue... Accesses fdset... CRASH
> }
>
> There isn't explicit cleanup of the fdset structure.
> The fdset structue is allocated with rte_zmalloc() and the memory would
structure
> only be reclaimed at application shutdown when rte_eal_cleanup() is called,
> which invokes rte_eal_memory_detach() to unmap all the hugepage memory.
> Meanwhile, the event dispatch thread could still be running and accessing
> the fdset.
>
> The code had a `destroy` flag that the event dispatch thread checked,
> but it was never set during cleanup, and the code never waited for
> the thread to actually exit before freeing memory.
>
> To fix this, the commit implements `fdset_destroy()` that will set the
> destroy flag, wait for thread termination, and clean up all resources.
>
> The socket.c and vduse.c are updated to call fdset_destroy() when
> the last socket/device is unregistered.
>
> For vduse, reference counting was added to track the number of devices
> using the fdset. The fdset is destroyed when the last device is removed.
I believe the vduse change is unrelated.
Currently, once a vduse device has been created, we never destroy the fdset.
We can support destroying the vduse fdset once, but it should be in a
dedicated patch.
And it might not need to be a fix, even though I don't have a strong opinion.
> Fixes: 0e38b42bf61c ("vhost: manage FD with epoll")
> Cc: [email protected]
>
> Signed-off-by: Shani Peretz <[email protected]>
>
>
>
> ----------
> v2:
> - Extended the fix to vduse.c, added reference counting and mutex to vduse
> structure to track the number of devices using the fdset
> - Call fdset_destroy() when last device is removed in vduse_device_destroy()
> and in error paths of vduse_device_create()
> - Added mutex protection when checking/setting the destroy flag to prevent
> race conditions in both fdset_event_dispatch() and fdset_destroy()
>
> ---
> lib/vhost/fd_man.c | 45 ++++++++++++++++++++++++++++++++++++-
> lib/vhost/fd_man.h | 1 +
> lib/vhost/socket.c | 7 ++++++
> lib/vhost/vduse.c | 56 +++++++++++++++++++++++++++++++++++++---------
> 4 files changed, 98 insertions(+), 11 deletions(-)
>
> diff --git a/lib/vhost/fd_man.c b/lib/vhost/fd_man.c
> index f9147edee7..b4597dec75 100644
> --- a/lib/vhost/fd_man.c
> +++ b/lib/vhost/fd_man.c
> @@ -387,9 +387,52 @@ fdset_event_dispatch(void *arg)
> }
> }
>
> - if (pfdset->destroy)
> + pthread_mutex_lock(&pfdset->fd_mutex);
> + bool should_destroy = pfdset->destroy;
> + pthread_mutex_unlock(&pfdset->fd_mutex);
> + if (should_destroy)
> break;
> }
>
> return 0;
> }
> +
> +/**
> + * Destroy the fdset and stop its event dispatch thread.
> + */
> +void
> +fdset_destroy(struct fdset *pfdset)
> +{
> + uint32_t val;
> + int i;
> +
> + if (pfdset == NULL)
> + return;
> +
> + /* Signal the event dispatch thread to stop */
> + pthread_mutex_lock(&pfdset->fd_mutex);
> + pfdset->destroy = true;
> + pthread_mutex_unlock(&pfdset->fd_mutex);
> +
> + /* Wait for the event dispatch thread to finish */
> + rte_thread_join(pfdset->tid, &val);
> +
> + /* Close the epoll file descriptor */
> + close(pfdset->epfd);
> +
> + /* Destroy the mutex */
> + pthread_mutex_destroy(&pfdset->fd_mutex);
> +
> + /* Remove from global registry */
> + pthread_mutex_lock(&fdsets_mutex);
> + for (i = 0; i < MAX_FDSETS; i++) {
> + if (fdsets[i] == pfdset) {
> + fdsets[i] = NULL;
> + break;
> + }
> + }
> + pthread_mutex_unlock(&fdsets_mutex);
> +
> + /* Free the fdset structure */
> + rte_free(pfdset);
> +}
> diff --git a/lib/vhost/fd_man.h b/lib/vhost/fd_man.h
> index eadcc6fb42..ed2109f3c8 100644
> --- a/lib/vhost/fd_man.h
> +++ b/lib/vhost/fd_man.h
> @@ -21,5 +21,6 @@ int fdset_add(struct fdset *pfdset, int fd,
>
> void fdset_del(struct fdset *pfdset, int fd);
> int fdset_try_del(struct fdset *pfdset, int fd);
> +void fdset_destroy(struct fdset *pfdset);
>
> #endif
> diff --git a/lib/vhost/socket.c b/lib/vhost/socket.c
> index 9b4f332f94..0240da8ff0 100644
> --- a/lib/vhost/socket.c
> +++ b/lib/vhost/socket.c
> @@ -1141,6 +1141,13 @@ rte_vhost_driver_unregister(const char *path)
> count = --vhost_user.vsocket_cnt;
> vhost_user.vsockets[i] = vhost_user.vsockets[count];
> vhost_user.vsockets[count] = NULL;
> +
> + /* Check if we need to destroy the vhost fdset */
> + if (vhost_user.vsocket_cnt == 0 && vhost_user.fdset != NULL) {
> + fdset_destroy(vhost_user.fdset);
> + vhost_user.fdset = NULL;
> + }
> +
> pthread_mutex_unlock(&vhost_user.mutex);
> return 0;
> }
> diff --git a/lib/vhost/vduse.c b/lib/vhost/vduse.c
> index 68e56843fd..f255f717ab 100644
> --- a/lib/vhost/vduse.c
> +++ b/lib/vhost/vduse.c
> @@ -30,9 +30,15 @@
>
> struct vduse {
> struct fdset *fdset;
> + int device_cnt;
> + pthread_mutex_t mutex;
> };
>
> -static struct vduse vduse;
> +static struct vduse vduse = {
> + .fdset = NULL,
> + .device_cnt = 0,
> + .mutex = PTHREAD_MUTEX_INITIALIZER,
> +};
>
> static const char * const vduse_reqs_str[] = {
> "VDUSE_GET_VQ_STATE",
> @@ -683,19 +689,16 @@ vduse_device_create(const char *path, bool
> compliant_ol_flags)
> const char *name = path + strlen("/dev/vduse/");
> bool reconnect = false;
>
> - if (vduse.fdset == NULL) {
> - vduse.fdset = fdset_init("vduse-evt");
> - if (vduse.fdset == NULL) {
> - VHOST_CONFIG_LOG(path, ERR, "failed to init VDUSE
> fdset");
> - return -1;
> - }
> - }
> + pthread_mutex_lock(&vduse.mutex);
> + vduse.device_cnt++;
> + pthread_mutex_unlock(&vduse.mutex);
>
> control_fd = open(VDUSE_CTRL_PATH, O_RDWR);
> if (control_fd < 0) {
> VHOST_CONFIG_LOG(name, ERR, "Failed to open %s: %s",
> VDUSE_CTRL_PATH, strerror(errno));
> - return -1;
> + ret = -1;
> + goto out_dec_cnt;
> }
>
> if (ioctl(control_fd, VDUSE_SET_API_VERSION, &ver)) {
> @@ -845,6 +848,19 @@ vduse_device_create(const char *path, bool
> compliant_ol_flags)
>
> dev->cvq = dev->virtqueue[max_queue_pairs * 2];
>
> + /* Only allocate when we know device creation will succeed */
> + pthread_mutex_lock(&vduse.mutex);
> + if (vduse.fdset == NULL) {
> + vduse.fdset = fdset_init("vduse-evt");
> + if (vduse.fdset == NULL) {
> + VHOST_CONFIG_LOG(path, ERR, "failed to init VDUSE
> fdset");
> + pthread_mutex_unlock(&vduse.mutex);
> + ret = -1;
> + goto out_log_unmap;
> + }
> + }
> + pthread_mutex_unlock(&vduse.mutex);
> +
> ret = fdset_add(vduse.fdset, dev->vduse_dev_fd, vduse_events_handler,
> NULL, dev);
> if (ret) {
> VHOST_CONFIG_LOG(name, ERR, "Failed to add fd %d to vduse
> fdset",
> @@ -861,6 +877,8 @@ vduse_device_create(const char *path, bool
> compliant_ol_flags)
> return 0;
>
> out_log_unmap:
> + if (vduse.fdset != NULL)
> + fdset_del(vduse.fdset, dev->vduse_dev_fd);
> munmap(dev->reconnect_log, sizeof(*dev->reconnect_log));
> out_dev_destroy:
> vhost_destroy_device(vid);
> @@ -870,6 +888,14 @@ vduse_device_create(const char *path, bool
> compliant_ol_flags)
> ioctl(control_fd, VDUSE_DESTROY_DEV, name);
> out_ctrl_close:
> close(control_fd);
> +out_dec_cnt:
> + pthread_mutex_lock(&vduse.mutex);
> + vduse.device_cnt--;
> + if (vduse.device_cnt == 0 && vduse.fdset != NULL) {
> + fdset_destroy(vduse.fdset);
> + vduse.fdset = NULL;
> + }
> + pthread_mutex_unlock(&vduse.mutex);
>
> return ret;
> }
> @@ -899,7 +925,17 @@ vduse_device_destroy(const char *path)
>
> vduse_device_stop(dev);
>
> - fdset_del(vduse.fdset, dev->vduse_dev_fd);
> + if (vduse.fdset != NULL)
> + fdset_del(vduse.fdset, dev->vduse_dev_fd);
> +
> + /* Check if we need to destroy the vduse fdset */
> + pthread_mutex_lock(&vduse.mutex);
> + vduse.device_cnt--;
> + if (vduse.device_cnt == 0 && vduse.fdset != NULL) {
> + fdset_destroy(vduse.fdset);
> + vduse.fdset = NULL;
> + }
> + pthread_mutex_unlock(&vduse.mutex);
>
> if (dev->vduse_dev_fd >= 0) {
> close(dev->vduse_dev_fd);
> --
> 2.34.1
>