On Sat, Jun 6, 2026 at 4:52 AM <[email protected]> wrote:
>
> From: Pravin M Bathija <[email protected]>
>
> Add support for VHOST_USER_ADD_MEM_REG, VHOST_USER_REM_MEM_REG and
> VHOST_USER_GET_MAX_MEM_SLOTS. Refactor memory initialization into
> common helper and add supporting functions for dynamic memory management.
>
> Signed-off-by: Pravin M Bathija <[email protected]>
> ---
>  lib/vhost/vhost_user.c | 266 +++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 266 insertions(+)
>
> diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
> index 94fca8b589..020c993b29 100644
> --- a/lib/vhost/vhost_user.c
> +++ b/lib/vhost/vhost_user.c
> @@ -71,6 +71,9 @@ VHOST_MESSAGE_HANDLER(VHOST_USER_SET_FEATURES, 
> vhost_user_set_features, false, t
>  VHOST_MESSAGE_HANDLER(VHOST_USER_SET_OWNER, vhost_user_set_owner, false, 
> true) \
>  VHOST_MESSAGE_HANDLER(VHOST_USER_RESET_OWNER, vhost_user_reset_owner, false, 
> false) \
>  VHOST_MESSAGE_HANDLER(VHOST_USER_SET_MEM_TABLE, vhost_user_set_mem_table, 
> true, true) \
> +VHOST_MESSAGE_HANDLER(VHOST_USER_GET_MAX_MEM_SLOTS, 
> vhost_user_get_max_mem_slots, false, false) \
> +VHOST_MESSAGE_HANDLER(VHOST_USER_ADD_MEM_REG, vhost_user_add_mem_reg, true, 
> true) \
> +VHOST_MESSAGE_HANDLER(VHOST_USER_REM_MEM_REG, vhost_user_rem_mem_reg, false, 
> true) \
>  VHOST_MESSAGE_HANDLER(VHOST_USER_SET_LOG_BASE, vhost_user_set_log_base, 
> true, true) \
>  VHOST_MESSAGE_HANDLER(VHOST_USER_SET_LOG_FD, vhost_user_set_log_fd, true, 
> true) \
>  VHOST_MESSAGE_HANDLER(VHOST_USER_SET_VRING_NUM, vhost_user_set_vring_num, 
> false, true) \
> @@ -1167,6 +1170,24 @@ add_guest_pages(struct virtio_net *dev, struct 
> rte_vhost_mem_region *reg,
>         return 0;
>  }
>
> +static void
> +remove_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg)
> +{
> +       uint64_t reg_start = reg->host_user_addr;
> +       uint64_t reg_end = reg_start + reg->size;
> +       uint32_t i, j = 0;
> +
> +       for (i = 0; i < dev->nr_guest_pages; i++) {
> +               if (dev->guest_pages[i].host_user_addr >= reg_start &&
> +                   dev->guest_pages[i].host_user_addr < reg_end)
> +                       continue;
> +               if (j != i)
> +                       dev->guest_pages[j] = dev->guest_pages[i];
> +               j++;
> +       }
> +       dev->nr_guest_pages = j;
> +}
> +
>  #ifdef RTE_LIBRTE_VHOST_DEBUG
>  /* TODO: enable it only in debug mode? */
>  static void
> @@ -1591,6 +1612,251 @@ vhost_user_set_mem_table(struct virtio_net **pdev,
>         return RTE_VHOST_MSG_RESULT_ERR;
>  }
>
> +
> +static int
> +vhost_user_get_max_mem_slots(struct virtio_net **pdev __rte_unused,
> +                       struct vhu_msg_context *ctx,
> +                       int main_fd __rte_unused)
> +{
> +       uint32_t max_mem_slots = VHOST_MEMORY_MAX_NREGIONS;
> +
> +       ctx->msg.payload.u64 = max_mem_slots;
> +       ctx->msg.size = sizeof(ctx->msg.payload.u64);
> +       ctx->fd_num = 0;
> +
> +       return RTE_VHOST_MSG_RESULT_REPLY;
> +}
> +
> +/*
> + * Invalidate and re-translate all vring addresses after the memory table
> + * has been modified (add/remove region).
> + *
> + * translate_ring_addresses() may call numa_realloc(), which can reallocate
> + * the device structure.  The updated pointer is written back through *pdev
> + * so callers must refresh their local "dev" afterwards: dev = *pdev.
> + */
> +static void
> +vhost_user_invalidate_vrings(struct virtio_net **pdev)
> +{
> +       struct virtio_net *dev = *pdev;
> +       uint32_t i;
> +
> +       for (i = 0; i < dev->nr_vring; i++) {
> +               struct vhost_virtqueue *vq = dev->virtqueue[i];
> +
> +               if (!vq)
> +                       continue;
> +
> +               if (vq->desc || vq->avail || vq->used) {
> +                       vq_assert_lock(dev, vq);
> +
> +                       vring_invalidate(dev, vq);
> +
> +                       translate_ring_addresses(&dev, &vq);
> +               }
> +       }
> +
> +       *pdev = dev;
> +}
> +
> +/*
> + * Macro wrapper that performs the compile-time lock assertion with the
> + * correct message ID at the call site, then calls the implementation.
> + */
> +#define dev_invalidate_vrings(pdev, id) do { \
> +       static_assert(id ## _LOCK_ALL_QPS, \
> +               #id " handler is not declared as locking all queue pairs"); \
> +       vhost_user_invalidate_vrings(pdev); \
> +} while (0)
> +
> +static int
> +vhost_user_add_mem_reg(struct virtio_net **pdev,
> +                       struct vhu_msg_context *ctx,
> +                       int main_fd __rte_unused)
> +{
> +       struct VhostUserMemoryRegion *region = 
> &ctx->msg.payload.memreg.region;
> +       struct virtio_net *dev = *pdev;
> +       uint32_t i;
> +
> +       /* convert first region add to normal memory table set */
> +       if (dev->mem == NULL) {
> +               if (vhost_user_initialize_memory(pdev) < 0)
> +                       goto close_msg_fds;
> +       }
> +
> +       /* make sure new region will fit */
> +       if (dev->mem->nregions >= VHOST_MEMORY_MAX_NREGIONS) {
> +               VHOST_CONFIG_LOG(dev->ifname, ERR, "too many memory regions 
> already (%u)",
> +                                                                       
> dev->mem->nregions);
> +               goto close_msg_fds;
> +       }
> +
> +       /* make sure supplied memory fd present */
> +       if (ctx->fd_num != 1) {
> +               VHOST_CONFIG_LOG(dev->ifname, ERR, "fd count makes no sense 
> (%u)", ctx->fd_num);
> +               goto close_msg_fds;
> +       }
> +
> +       /* Make sure no overlap in guest virtual address space */
> +       for (i = 0; i < dev->mem->nregions; i++) {
> +               struct rte_vhost_mem_region *cur = &dev->mem->regions[i];
> +               uint64_t cur_start = cur->guest_user_addr;
> +               uint64_t cur_end = cur_start + cur->size - 1;
> +               uint64_t new_start = region->userspace_addr;
> +               uint64_t new_end = new_start + region->memory_size - 1;
> +
> +               if (new_end >= cur_start && new_start <= cur_end) {
> +                       VHOST_CONFIG_LOG(dev->ifname, ERR,
> +                               "requested memory region overlaps with 
> another region");
> +                       VHOST_CONFIG_LOG(dev->ifname, ERR,
> +                               "\tRequested region address:0x%" PRIx64,
> +                               region->userspace_addr);
> +                       VHOST_CONFIG_LOG(dev->ifname, ERR,
> +                               "\tRequested region size:0x%" PRIx64,
> +                               region->memory_size);
> +                       VHOST_CONFIG_LOG(dev->ifname, ERR,
> +                               "\tOverlapping region address:0x%" PRIx64,
> +                               cur->guest_user_addr);
> +                       VHOST_CONFIG_LOG(dev->ifname, ERR,
> +                               "\tOverlapping region size:0x%" PRIx64,
> +                               cur->size);
> +                       goto close_msg_fds;
> +               }
> +       }
> +
> +       /* New region goes at the end of the contiguous array */
> +       struct rte_vhost_mem_region *reg = 
> &dev->mem->regions[dev->mem->nregions];
> +
> +       reg->guest_phys_addr = region->guest_phys_addr;
> +       reg->guest_user_addr = region->userspace_addr;
> +       reg->size            = region->memory_size;
> +       reg->fd              = ctx->fds[0];
> +       ctx->fds[0]          = -1;
> +
> +       if (vhost_user_mmap_region(dev, reg, region->mmap_offset) < 0) {
> +               VHOST_CONFIG_LOG(dev->ifname, ERR, "failed to mmap region");
> +               if (reg->mmap_addr) {
> +                       /* mmap succeeded but a later step (e.g. 
> add_guest_pages)
> +                        * failed; undo the mapping and any guest-page 
> entries.
> +                        */
> +                       remove_guest_pages(dev, reg);
> +                       free_mem_region(reg);
> +               } else {
> +                       close(reg->fd);
> +                       reg->fd = -1;
> +               }
> +               goto close_msg_fds;
> +       }
> +
> +       dev->mem->nregions++;
> +
> +       if (dev->async_copy && rte_vfio_is_enabled("vfio")) {
> +               if (async_dma_map_region(dev, reg, true) < 0)
> +                       goto free_new_region_no_dma;
> +       }
> +
> +       if (dev->postcopy_listening) {
> +               /*
> +                * Cannot use vhost_user_postcopy_register() here because it
> +                * reads ctx->msg.payload.memory (SET_MEM_TABLE layout), but
> +                * ADD_MEM_REG uses the memreg payload.  Register the
> +                * single new region directly instead.
> +                */
> +               if (vhost_user_postcopy_region_register(dev, reg) < 0)
> +                       goto free_new_region;
> +       }
> +
> +       dev_invalidate_vrings(pdev, VHOST_USER_ADD_MEM_REG);
> +       dev = *pdev;
> +       dump_guest_pages(dev);
> +
> +       /*
> +        * In postcopy mode the front-end expects the back-end to reply with
> +        * the base of the mapped region (see VHOST_USER_SET_MEM_TABLE, which
> +        * applies here accordingly).  No reply is expected otherwise.
> +        *
> +        * translate_ring_addresses() above may have reallocated dev->mem via
> +        * numa_realloc(), so re-derive the region pointer from the refreshed
> +        * dev rather than using the now-stale reg.  The new region is the 
> last
> +        * entry in the contiguous array.
> +        */
> +       if (dev->postcopy_listening) {
> +               reg = &dev->mem->regions[dev->mem->nregions - 1];
> +               ctx->msg.payload.memreg.region.userspace_addr = 
> reg->host_user_addr;
> +               ctx->msg.size = sizeof(ctx->msg.payload.memreg);
> +               ctx->fd_num = 0;
> +               return RTE_VHOST_MSG_RESULT_REPLY;
> +       }

Thanks Stephen, good catch by the AI.
I did some digging into Qemu code, which AI later confirmed, and if
the series was tested,
the test passed "by accident" when postcopy was not enabled because
Qemu would read the
padding field of the payload, and would treat its value as
RTE_VHOST_MSG_RESULT_OK
because it is zero-initialized...


With this fix:
Reviewed-by: Maxime Coquelin <[email protected]>

> +
> +       return RTE_VHOST_MSG_RESULT_OK;
> +
> +free_new_region:
> +       if (dev->async_copy && rte_vfio_is_enabled("vfio"))
> +               async_dma_map_region(dev, reg, false);
> +free_new_region_no_dma:
> +       remove_guest_pages(dev, reg);
> +       free_mem_region(reg);
> +       dev->mem->nregions--;
> +close_msg_fds:
> +       close_msg_fds(ctx);
> +       return RTE_VHOST_MSG_RESULT_ERR;
> +}
> +
> +static int
> +vhost_user_rem_mem_reg(struct virtio_net **pdev,
> +                       struct vhu_msg_context *ctx,
> +                       int main_fd __rte_unused)
> +{
> +       struct VhostUserMemoryRegion *region = 
> &ctx->msg.payload.memreg.region;
> +       struct virtio_net *dev = *pdev;
> +       uint32_t i;
> +
> +       if (dev->mem == NULL || dev->mem->nregions == 0) {
> +               VHOST_CONFIG_LOG(dev->ifname, ERR, "no memory regions to 
> remove");
> +               return RTE_VHOST_MSG_RESULT_ERR;
> +       }
> +
> +       for (i = 0; i < dev->mem->nregions; i++) {
> +               struct rte_vhost_mem_region *current_region = 
> &dev->mem->regions[i];
> +
> +               /*
> +                * According to the vhost-user specification:
> +                * The memory region to be removed is identified by its GPA,
> +                * user address and size. The mmap offset is ignored.
> +                */
> +               if (region->userspace_addr == current_region->guest_user_addr
> +                       && region->guest_phys_addr == 
> current_region->guest_phys_addr
> +                       && region->memory_size == current_region->size) {
> +                       if (dev->async_copy && rte_vfio_is_enabled("vfio"))
> +                               async_dma_map_region(dev, current_region, 
> false);
> +                       if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> +                               vhost_user_iotlb_cache_remove(dev,
> +                                       current_region->guest_phys_addr,
> +                                       current_region->size);
> +                       remove_guest_pages(dev, current_region);
> +                       free_mem_region(current_region);
> +
> +                       /* Compact the regions array to keep it contiguous */
> +                       if (i < dev->mem->nregions - 1) {
> +                               memmove(&dev->mem->regions[i],
> +                                       &dev->mem->regions[i + 1],
> +                                       (dev->mem->nregions - 1 - i) *
> +                                       sizeof(struct rte_vhost_mem_region));
> +                               memset(&dev->mem->regions[dev->mem->nregions 
> - 1],
> +                                       0, sizeof(struct 
> rte_vhost_mem_region));
> +                       }
> +
> +                       dev->mem->nregions--;
> +                       dev_invalidate_vrings(pdev, VHOST_USER_REM_MEM_REG);
> +                       dev = *pdev;
> +                       return RTE_VHOST_MSG_RESULT_OK;
> +               }
> +       }
> +
> +       VHOST_CONFIG_LOG(dev->ifname, ERR, "failed to find region");
> +       return RTE_VHOST_MSG_RESULT_ERR;
> +}
> +
>  static bool
>  vq_is_ready(struct virtio_net *dev, struct vhost_virtqueue *vq)
>  {
> --
> 2.43.0
>

Reply via email to