On Wed, May 20, 2026 at 4:20 AM <[email protected]> wrote:

> From: Pravin M Bathija <[email protected]>
>
> Add support for VHOST_USER_ADD_MEM_REG, VHOST_USER_REM_MEM_REG and
> VHOST_USER_GET_MAX_MEM_SLOTS. Refactor memory initialization into
> common helper and add supporting functions for dynamic memory management.
>
> Signed-off-by: Pravin M Bathija <[email protected]>
> Acked-by: Fengchengwen <[email protected]>
> Reviewed-by: Stephen Hemminger <[email protected]>
> Acked-by: Maxime Coquelin <[email protected]>
> ---
>  lib/vhost/vhost_user.c | 255 +++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 255 insertions(+)
>
> diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
> index 94fca8b589..522ba1db82 100644
> --- a/lib/vhost/vhost_user.c
> +++ b/lib/vhost/vhost_user.c
> @@ -71,6 +71,9 @@ VHOST_MESSAGE_HANDLER(VHOST_USER_SET_FEATURES,
> vhost_user_set_features, false, t
>  VHOST_MESSAGE_HANDLER(VHOST_USER_SET_OWNER, vhost_user_set_owner, false,
> true) \
>  VHOST_MESSAGE_HANDLER(VHOST_USER_RESET_OWNER, vhost_user_reset_owner,
> false, false) \
>  VHOST_MESSAGE_HANDLER(VHOST_USER_SET_MEM_TABLE, vhost_user_set_mem_table,
> true, true) \
> +VHOST_MESSAGE_HANDLER(VHOST_USER_GET_MAX_MEM_SLOTS,
> vhost_user_get_max_mem_slots, false, false) \
> +VHOST_MESSAGE_HANDLER(VHOST_USER_ADD_MEM_REG, vhost_user_add_mem_reg,
> true, true) \
> +VHOST_MESSAGE_HANDLER(VHOST_USER_REM_MEM_REG, vhost_user_rem_mem_reg,
> true, true) \
>

The removal request does not expect FDs in ancillary data.
It should be:
VHOST_MESSAGE_HANDLER(VHOST_USER_REM_MEM_REG, vhost_user_rem_mem_reg,
false, true)


>  VHOST_MESSAGE_HANDLER(VHOST_USER_SET_LOG_BASE, vhost_user_set_log_base,
> true, true) \
>  VHOST_MESSAGE_HANDLER(VHOST_USER_SET_LOG_FD, vhost_user_set_log_fd, true,
> true) \
>  VHOST_MESSAGE_HANDLER(VHOST_USER_SET_VRING_NUM, vhost_user_set_vring_num,
> false, true) \
> @@ -1167,6 +1170,24 @@ add_guest_pages(struct virtio_net *dev, struct
> rte_vhost_mem_region *reg,
>         return 0;
>  }
>
> +static void
> +remove_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region
> *reg)
> +{
> +       uint64_t reg_start = reg->host_user_addr;
> +       uint64_t reg_end = reg_start + reg->size;
> +       uint32_t i, j = 0;
> +
> +       for (i = 0; i < dev->nr_guest_pages; i++) {
> +               if (dev->guest_pages[i].host_user_addr >= reg_start &&
> +                   dev->guest_pages[i].host_user_addr < reg_end)
> +                       continue;
> +               if (j != i)
> +                       dev->guest_pages[j] = dev->guest_pages[i];
> +               j++;
> +       }
> +       dev->nr_guest_pages = j;
> +}
> +
>  #ifdef RTE_LIBRTE_VHOST_DEBUG
>  /* TODO: enable it only in debug mode? */
>  static void
> @@ -1591,6 +1612,240 @@ vhost_user_set_mem_table(struct virtio_net **pdev,
>         return RTE_VHOST_MSG_RESULT_ERR;
>  }
>
> +
> +static int
> +vhost_user_get_max_mem_slots(struct virtio_net **pdev __rte_unused,
> +                       struct vhu_msg_context *ctx,
> +                       int main_fd __rte_unused)
> +{
> +       uint32_t max_mem_slots = VHOST_MEMORY_MAX_NREGIONS;
> +
> +       ctx->msg.payload.u64 = max_mem_slots;
> +       ctx->msg.size = sizeof(ctx->msg.payload.u64);
> +       ctx->fd_num = 0;
> +
> +       return RTE_VHOST_MSG_RESULT_REPLY;
> +}
> +
> +/*
> + * Invalidate and re-translate all vring addresses after the memory table
> + * has been modified (add/remove region).
> + *
> + * translate_ring_addresses() may call numa_realloc(), which can
> reallocate
> + * the device structure.  The updated pointer is written back through
> *pdev
> + * so callers must refresh their local "dev" afterwards: dev = *pdev.
> + */
> +static void
> +vhost_user_invalidate_vrings(struct virtio_net **pdev)
> +{
> +       struct virtio_net *dev = *pdev;
> +       uint32_t i;
> +
> +       for (i = 0; i < dev->nr_vring; i++) {
> +               struct vhost_virtqueue *vq = dev->virtqueue[i];
> +
> +               if (!vq)
> +                       continue;
> +
> +               if (vq->desc || vq->avail || vq->used) {
> +                       vq_assert_lock(dev, vq);
> +
> +                       vring_invalidate(dev, vq);
> +
> +                       translate_ring_addresses(&dev, &vq);
> +               }
> +       }
> +
> +       *pdev = dev;
> +}
> +
> +/*
> + * Macro wrapper that performs the compile-time lock assertion with the
> + * correct message ID at the call site, then calls the implementation.
> + */
> +#define dev_invalidate_vrings(pdev, id) do { \
> +       static_assert(id ## _LOCK_ALL_QPS, \
> +               #id " handler is not declared as locking all queue
> pairs"); \
> +       vhost_user_invalidate_vrings(pdev); \
> +} while (0)
> +
> +static int
> +vhost_user_add_mem_reg(struct virtio_net **pdev,
> +                       struct vhu_msg_context *ctx,
> +                       int main_fd __rte_unused)
> +{
> +       struct VhostUserMemoryRegion *region =
> &ctx->msg.payload.memreg.region;
> +       struct virtio_net *dev = *pdev;
> +       uint32_t i;
> +
> +       /* convert first region add to normal memory table set */
> +       if (dev->mem == NULL) {
> +               if (vhost_user_initialize_memory(pdev) < 0)
> +                       goto close_msg_fds;
> +       }
> +
> +       /* make sure new region will fit */
> +       if (dev->mem->nregions >= VHOST_MEMORY_MAX_NREGIONS) {
> +               VHOST_CONFIG_LOG(dev->ifname, ERR, "too many memory
> regions already (%u)",
> +
>  dev->mem->nregions);
> +               goto close_msg_fds;
> +       }
> +
> +       /* make sure supplied memory fd present */
> +       if (ctx->fd_num != 1) {
> +               VHOST_CONFIG_LOG(dev->ifname, ERR, "fd count makes no
> sense (%u)", ctx->fd_num);
> +               goto close_msg_fds;
> +       }
> +
> +       /* Make sure no overlap in guest virtual address space */
> +       for (i = 0; i < dev->mem->nregions; i++) {
> +               struct rte_vhost_mem_region *cur = &dev->mem->regions[i];
> +               uint64_t cur_start = cur->guest_user_addr;
> +               uint64_t cur_end = cur_start + cur->size - 1;
> +               uint64_t new_start = region->userspace_addr;
> +               uint64_t new_end = new_start + region->memory_size - 1;
> +
> +               if (new_end >= cur_start && new_start <= cur_end) {
> +                       VHOST_CONFIG_LOG(dev->ifname, ERR,
> +                               "requested memory region overlaps with
> another region");
> +                       VHOST_CONFIG_LOG(dev->ifname, ERR,
> +                               "\tRequested region address:0x%" PRIx64,
> +                               region->userspace_addr);
> +                       VHOST_CONFIG_LOG(dev->ifname, ERR,
> +                               "\tRequested region size:0x%" PRIx64,
> +                               region->memory_size);
> +                       VHOST_CONFIG_LOG(dev->ifname, ERR,
> +                               "\tOverlapping region address:0x%" PRIx64,
> +                               cur->guest_user_addr);
> +                       VHOST_CONFIG_LOG(dev->ifname, ERR,
> +                               "\tOverlapping region size:0x%" PRIx64,
> +                               cur->size);
> +                       goto close_msg_fds;
> +               }
> +       }
> +
> +       /* New region goes at the end of the contiguous array */
> +       struct rte_vhost_mem_region *reg =
> &dev->mem->regions[dev->mem->nregions];
> +
> +       reg->guest_phys_addr = region->guest_phys_addr;
> +       reg->guest_user_addr = region->userspace_addr;
> +       reg->size            = region->memory_size;
> +       reg->fd              = ctx->fds[0];
> +       ctx->fds[0]          = -1;
> +
> +       if (vhost_user_mmap_region(dev, reg, region->mmap_offset) < 0) {
> +               VHOST_CONFIG_LOG(dev->ifname, ERR, "failed to mmap
> region");
> +               if (reg->mmap_addr) {
> +                       /* mmap succeeded but a later step (e.g.
> add_guest_pages)
> +                        * failed; undo the mapping and any guest-page
> entries.
> +                        */
> +                       remove_guest_pages(dev, reg);
> +                       free_mem_region(reg);
> +               } else {
> +                       close(reg->fd);
> +                       reg->fd = -1;
> +               }
> +               goto close_msg_fds;
> +       }
> +
> +       dev->mem->nregions++;
> +
> +       if (dev->async_copy && rte_vfio_is_enabled("vfio")) {
> +               if (async_dma_map_region(dev, reg, true) < 0)
> +                       goto free_new_region_no_dma;
> +       }
> +
> +       if (dev->postcopy_listening) {
> +               /*
> +                * Cannot use vhost_user_postcopy_register() here because
> it
> +                * reads ctx->msg.payload.memory (SET_MEM_TABLE layout),
> but
> +                * ADD_MEM_REG uses the memreg payload.  Register the
> +                * single new region directly instead.
> +                */
> +               if (vhost_user_postcopy_region_register(dev, reg) < 0)
> +                       goto free_new_region;
> +       }
> +
> +       dev_invalidate_vrings(pdev, VHOST_USER_ADD_MEM_REG);
> +       dev = *pdev;
> +       dump_guest_pages(dev);
> +
> +       /* Reply with the back-end's mapping address per vhost-user spec */
> +       ctx->msg.payload.memreg.region.userspace_addr =
> reg->host_user_addr;
> +       ctx->msg.size = sizeof(ctx->msg.payload.memreg);
> +       ctx->fd_num = 0;
> +
> +       return RTE_VHOST_MSG_RESULT_REPLY;
> +
> +free_new_region:
> +       if (dev->async_copy && rte_vfio_is_enabled("vfio"))
> +               async_dma_map_region(dev, reg, false);
> +free_new_region_no_dma:
> +       remove_guest_pages(dev, reg);
> +       free_mem_region(reg);
> +       dev->mem->nregions--;
> +close_msg_fds:
> +       close_msg_fds(ctx);
> +       return RTE_VHOST_MSG_RESULT_ERR;
> +}
> +
> +static int
> +vhost_user_rem_mem_reg(struct virtio_net **pdev,
> +                       struct vhu_msg_context *ctx,
> +                       int main_fd __rte_unused)
> +{
> +       struct VhostUserMemoryRegion *region =
> &ctx->msg.payload.memreg.region;
> +       struct virtio_net *dev = *pdev;
> +       uint32_t i;
> +
> +       if (dev->mem == NULL || dev->mem->nregions == 0) {
> +               VHOST_CONFIG_LOG(dev->ifname, ERR, "no memory regions to
> remove");
> +               close_msg_fds(ctx);
>
Not needed if properly declared.


> +               return RTE_VHOST_MSG_RESULT_ERR;
> +       }
> +
> +       if (validate_msg_fds(dev, ctx, 0) != 0)
> +               return RTE_VHOST_MSG_RESULT_ERR;
>

With proper declaration, we can remove this check, as it is done in a
generic way.



> +
> +       for (i = 0; i < dev->mem->nregions; i++) {
> +               struct rte_vhost_mem_region *current_region =
> &dev->mem->regions[i];
> +
> +               /*
> +                * According to the vhost-user specification:
> +                * The memory region to be removed is identified by its
> GPA,
> +                * user address and size. The mmap offset is ignored.
> +                */
> +               if (region->userspace_addr ==
> current_region->guest_user_addr
> +                       && region->guest_phys_addr ==
> current_region->guest_phys_addr
> +                       && region->memory_size == current_region->size) {
> +                       if (dev->async_copy && rte_vfio_is_enabled("vfio"))
> +                               async_dma_map_region(dev, current_region,
> false);
> +                       remove_guest_pages(dev, current_region);
>

You are missing the step to clear the IOTLB cache entries matching with
this removed region.
In vhost_user_set_mem_table(), a vhost_user_iotlb_flush_all() call is made,
but that would be kind of a nuclear option for memory hotplug.

I suggest removing only the entries matching the removed area, something
like this:
  if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
      vhost_user_iotlb_cache_remove(dev, current_region->guest_phys_addr,
current_region->size);


> +                       free_mem_region(current_region);
> +
> +                       /* Compact the regions array to keep it contiguous
> */
> +                       if (i < dev->mem->nregions - 1) {
> +                               memmove(&dev->mem->regions[i],
> +                                       &dev->mem->regions[i + 1],
> +                                       (dev->mem->nregions - 1 - i) *
> +                                       sizeof(struct
> rte_vhost_mem_region));
> +
>  memset(&dev->mem->regions[dev->mem->nregions - 1],
> +                                       0, sizeof(struct
> rte_vhost_mem_region));
> +                       }
> +
> +                       dev->mem->nregions--;
> +                       dev_invalidate_vrings(pdev,
> VHOST_USER_REM_MEM_REG);
> +                       dev = *pdev;
> +                       close_msg_fds(ctx);
>
And no need to close FDs, are we are now sure none were provided.


> +                       return RTE_VHOST_MSG_RESULT_OK;
> +               }
> +       }
> +
> +       VHOST_CONFIG_LOG(dev->ifname, ERR, "failed to find region");
> +       close_msg_fds(ctx);
>

Same, no more needed.


> +       return RTE_VHOST_MSG_RESULT_ERR;
> +}
> +
>  static bool
>  vq_is_ready(struct virtio_net *dev, struct vhost_virtqueue *vq)
>  {
> --
> 2.43.0
>
>

Reply via email to