On Tue, Oct 14, 2025 at 03:52:17PM +0200, Albert Esteve wrote:
> On Mon, Sep 29, 2025 at 8:39 PM Stefan Hajnoczi <[email protected]> wrote:
> >
> > On Wed, Sep 10, 2025 at 01:54:14PM +0200, Albert Esteve wrote:
> > > Add SHMEM_MAP/UNMAP requests to vhost-user for dynamic management of
> > > VIRTIO Shared Memory mappings.
> > >
> > > This implementation introduces VirtioSharedMemoryMapping as a unified
> > > QOM object that manages both the mapping metadata and MemoryRegion
> > > lifecycle. This object provides reference-counted lifecycle management
> > > with automatic cleanup of file descriptors and memory regions
> > > through QOM finalization.
> > >
> > > This request allows backends to dynamically map file descriptors into a
> > > VIRTIO Shared Memory Region identified by their shmid. Maps are created
> > > using memory_region_init_ram_from_fd() with configurable read/write
> > > permissions, and the resulting MemoryRegions are added as subregions to
> > > the shmem container region. The mapped memory is then advertised to the
> > > guest VIRTIO drivers as a base address plus offset for reading and
> > > writting according to the requested mmap flags.
> > >
> > > The backend can unmap memory ranges within a given VIRTIO Shared Memory
> > > Region to free resources. Upon receiving this message, the frontend
> > > removes the MemoryRegion as a subregion and automatically unreferences
> > > the VirtioSharedMemoryMapping object, triggering cleanup if no other
> > > references exist.
> > >
> > > Error handling has been improved to ensure consistent behavior across
> > > handlers that manage their own vhost_user_send_resp() calls. Since
> > > these handlers clear the VHOST_USER_NEED_REPLY_MASK flag, explicit
> > > error checking ensures proper connection closure on failures,
> > > maintaining the expected error flow.
> > >
> > > Note the memory region commit for these operations needs to be delayed
> > > until after we respond to the backend to avoid deadlocks.
> > >
> > > Signed-off-by: Albert Esteve <[email protected]>
> > > ---
> > >  hw/virtio/vhost-user.c                    | 206 +++++++++++++++++++++-
> > >  hw/virtio/virtio.c                        | 198 +++++++++++++++++++++
> > >  include/hw/virtio/virtio.h                | 136 ++++++++++++++
> > >  subprojects/libvhost-user/libvhost-user.c |  70 ++++++++
> > >  subprojects/libvhost-user/libvhost-user.h |  54 ++++++
> > >  5 files changed, 662 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
> > > index 1e1d6b0d6e..4783b1904b 100644
> > > --- a/hw/virtio/vhost-user.c
> > > +++ b/hw/virtio/vhost-user.c
> > > @@ -115,6 +115,8 @@ typedef enum VhostUserBackendRequest {
> > >      VHOST_USER_BACKEND_SHARED_OBJECT_ADD = 6,
> > >      VHOST_USER_BACKEND_SHARED_OBJECT_REMOVE = 7,
> > >      VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP = 8,
> > > +    VHOST_USER_BACKEND_SHMEM_MAP = 9,
> > > +    VHOST_USER_BACKEND_SHMEM_UNMAP = 10,
> > >      VHOST_USER_BACKEND_MAX
> > >  }  VhostUserBackendRequest;
> > >
> > > @@ -192,6 +194,23 @@ typedef struct VhostUserShared {
> > >      unsigned char uuid[16];
> > >  } VhostUserShared;
> > >
> > > +/* For the flags field of VhostUserMMap */
> > > +#define VHOST_USER_FLAG_MAP_RW (1u << 0)
> > > +
> > > +typedef struct {
> > > +    /* VIRTIO Shared Memory Region ID */
> > > +    uint8_t shmid;
> > > +    uint8_t padding[7];
> > > +    /* File offset */
> > > +    uint64_t fd_offset;
> > > +    /* Offset within the VIRTIO Shared Memory Region */
> > > +    uint64_t shm_offset;
> > > +    /* Size of the mapping */
> > > +    uint64_t len;
> > > +    /* Flags for the mmap operation, from VHOST_USER_FLAG_MAP_* */
> > > +    uint16_t flags;
> > > +} VhostUserMMap;
> > > +
> > >  typedef struct {
> > >      VhostUserRequest request;
> > >
> > > @@ -224,6 +243,7 @@ typedef union {
> > >          VhostUserInflight inflight;
> > >          VhostUserShared object;
> > >          VhostUserTransferDeviceState transfer_state;
> > > +        VhostUserMMap mmap;
> > >  } VhostUserPayload;
> > >
> > >  typedef struct VhostUserMsg {
> > > @@ -1768,6 +1788,172 @@ 
> > > vhost_user_backend_handle_shared_object_lookup(struct vhost_user *u,
> > >      return 0;
> > >  }
> > >
> > > +/**
> > > + * vhost_user_backend_handle_shmem_map() - Handle SHMEM_MAP backend 
> > > request
> > > + * @dev: vhost device
> > > + * @ioc: QIOChannel for communication
> > > + * @hdr: vhost-user message header
> > > + * @payload: message payload containing mapping details
> > > + * @fd: file descriptor for the shared memory region
> > > + *
> > > + * Handles VHOST_USER_BACKEND_SHMEM_MAP requests from the backend. 
> > > Creates
> > > + * a VhostUserShmemObject to manage the shared memory mapping and adds it
> > > + * to the appropriate VirtIO shared memory region. The 
> > > VhostUserShmemObject
> > > + * serves as an intermediate parent for the MemoryRegion, ensuring proper
> > > + * lifecycle management with reference counting.
> > > + *
> > > + * Returns: 0 on success, negative errno on failure
> > > + */
> > > +static int
> > > +vhost_user_backend_handle_shmem_map(struct vhost_dev *dev,
> > > +                                    QIOChannel *ioc,
> > > +                                    VhostUserHeader *hdr,
> > > +                                    VhostUserPayload *payload,
> > > +                                    int fd)
> > > +{
> > > +    VirtioSharedMemory *shmem;
> > > +    VhostUserMMap *vu_mmap = &payload->mmap;
> > > +    Error *local_err = NULL;
> > > +    g_autoptr(GString) shm_name = g_string_new(NULL);
> > > +
> > > +    if (fd < 0) {
> > > +        error_report("Bad fd for map");
> > > +        return -EBADF;
> > > +    }
> > > +
> > > +    if (QSIMPLEQ_EMPTY(&dev->vdev->shmem_list)) {
> > > +        error_report("Device has no VIRTIO Shared Memory Regions. "
> > > +                     "Requested ID: %d", vu_mmap->shmid);
> > > +        return -EFAULT;
> > > +    }
> > > +
> > > +    shmem = virtio_find_shmem_region(dev->vdev, vu_mmap->shmid);
> > > +    if (!shmem) {
> > > +        error_report("VIRTIO Shared Memory Region at "
> > > +                     "ID %d not found or unitialized", vu_mmap->shmid);
> >
> > uninitialized
> >
> > > +        return -EFAULT;
> > > +    }
> > > +
> > > +    if ((vu_mmap->shm_offset + vu_mmap->len) < vu_mmap->len ||
> > > +        (vu_mmap->shm_offset + vu_mmap->len) > shmem->mr.size) {
> > > +        error_report("Bad offset/len for mmap %" PRIx64 "+%" PRIx64,
> > > +                     vu_mmap->shm_offset, vu_mmap->len);
> > > +        return -EFAULT;
> > > +    }
> > > +
> > > +    g_string_printf(shm_name, "virtio-shm%i-%lu",
> > > +                    vu_mmap->shmid, vu_mmap->shm_offset);
> > > +
> > > +    memory_region_transaction_begin();
> > > +
> > > +    /* Create VirtioSharedMemoryMapping object */
> > > +    VirtioSharedMemoryMapping *mapping = 
> > > virtio_shared_memory_mapping_new(
> > > +        vu_mmap->shmid, fd, vu_mmap->fd_offset, vu_mmap->shm_offset,
> > > +        vu_mmap->len, vu_mmap->flags & VHOST_USER_FLAG_MAP_RW);
> > > +
> > > +    if (!mapping) {
> > > +        memory_region_transaction_commit();
> > > +        return -EFAULT;
> > > +    }
> > > +
> > > +    /* Add the mapping to the shared memory region */
> > > +    if (virtio_add_shmem_map(shmem, mapping) != 0) {
> > > +        error_report("Failed to add shared memory mapping");
> > > +        object_unref(OBJECT(mapping));
> > > +        memory_region_transaction_commit();
> > > +        return -EFAULT;
> > > +    }
> > > +
> > > +    if (hdr->flags & VHOST_USER_NEED_REPLY_MASK) {
> > > +        payload->u64 = 0;
> > > +        hdr->size = sizeof(payload->u64);
> > > +        vhost_user_send_resp(ioc, hdr, payload, &local_err);
> > > +        if (local_err) {
> > > +            error_report_err(local_err);
> > > +            memory_region_transaction_commit();
> > > +            return -EFAULT;
> > > +        }
> > > +    }
> > > +
> > > +    memory_region_transaction_commit();
> > > +
> > > +    return 0;
> > > +}
> > > +
> > > +/**
> > > + * vhost_user_backend_handle_shmem_unmap() - Handle SHMEM_UNMAP backend 
> > > request
> > > + * @dev: vhost device
> > > + * @ioc: QIOChannel for communication
> > > + * @hdr: vhost-user message header
> > > + * @payload: message payload containing unmapping details
> > > + *
> > > + * Handles VHOST_USER_BACKEND_SHMEM_UNMAP requests from the backend. 
> > > Removes
> > > + * the specified memory mapping from the VirtIO shared memory region. 
> > > This
> > > + * automatically unreferences the associated VhostUserShmemObject, which 
> > > may
> > > + * trigger its finalization and cleanup (munmap, close fd) if no other
> > > + * references exist.
> > > + *
> > > + * Returns: 0 on success, negative errno on failure
> > > + */
> > > +static int
> > > +vhost_user_backend_handle_shmem_unmap(struct vhost_dev *dev,
> > > +                                      QIOChannel *ioc,
> > > +                                      VhostUserHeader *hdr,
> > > +                                      VhostUserPayload *payload)
> > > +{
> > > +    VirtioSharedMemory *shmem;
> > > +    VirtioSharedMemoryMapping *mmap = NULL;
> > > +    VhostUserMMap *vu_mmap = &payload->mmap;
> > > +    Error *local_err = NULL;
> > > +
> > > +    if (QSIMPLEQ_EMPTY(&dev->vdev->shmem_list)) {
> > > +        error_report("Device has no VIRTIO Shared Memory Regions. "
> > > +                     "Requested ID: %d", vu_mmap->shmid);
> > > +        return -EFAULT;
> > > +    }
> > > +
> > > +    shmem = virtio_find_shmem_region(dev->vdev, vu_mmap->shmid);
> > > +    if (!shmem) {
> > > +        error_report("VIRTIO Shared Memory Region at "
> > > +                     "ID %d not found or unitialized", vu_mmap->shmid);
> >
> > uninitialized
> >
> > > +        return -EFAULT;
> > > +    }
> > > +
> > > +    if ((vu_mmap->shm_offset + vu_mmap->len) < vu_mmap->len ||
> > > +        (vu_mmap->shm_offset + vu_mmap->len) > shmem->mr.size) {
> > > +        error_report("Bad offset/len for unmmap %" PRIx64 "+%" PRIx64,
> > > +                     vu_mmap->shm_offset, vu_mmap->len);
> > > +        return -EFAULT;
> > > +    }
> > > +
> > > +    mmap = virtio_find_shmem_map(shmem, vu_mmap->shm_offset, 
> > > vu_mmap->len);
> > > +    if (!mmap) {
> > > +        error_report("Shared memory mapping not found at offset %" PRIx64
> > > +                     " with length %" PRIx64,
> > > +                     vu_mmap->shm_offset, vu_mmap->len);
> > > +        return -EFAULT;
> > > +    }
> > > +
> > > +    memory_region_transaction_begin();
> > > +    memory_region_del_subregion(&shmem->mr, mmap->mr);
> > > +    if (hdr->flags & VHOST_USER_NEED_REPLY_MASK) {
> > > +        payload->u64 = 0;
> > > +        hdr->size = sizeof(payload->u64);
> > > +        vhost_user_send_resp(ioc, hdr, payload, &local_err);
> > > +        if (local_err) {
> > > +            error_report_err(local_err);
> > > +            memory_region_transaction_commit();
> > > +            return -EFAULT;
> > > +        }
> > > +    }
> > > +    memory_region_transaction_commit();
> > > +
> > > +    /* Free the MemoryRegion only after vhost_commit */
> > > +    virtio_del_shmem_map(shmem, vu_mmap->shm_offset, vu_mmap->len);
> > > +
> > > +    return 0;
> > > +}
> > > +
> > >  static void close_backend_channel(struct vhost_user *u)
> > >  {
> > >      g_source_destroy(u->backend_src);
> > > @@ -1833,8 +2019,24 @@ static gboolean backend_read(QIOChannel *ioc, 
> > > GIOCondition condition,
> > >                                                               
> > > &payload.object);
> > >          break;
> > >      case VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP:
> > > -        ret = 
> > > vhost_user_backend_handle_shared_object_lookup(dev->opaque, ioc,
> > > -                                                             &hdr, 
> > > &payload);
> > > +        /* Handler manages its own response, check error and close 
> > > connection */
> > > +        if (vhost_user_backend_handle_shared_object_lookup(dev->opaque, 
> > > ioc,
> > > +                                                           &hdr, 
> > > &payload) < 0) {
> > > +            goto err;
> > > +        }
> > > +        break;
> >
> > This looks like a bug fix that is unrelated to this patch. The change
> > doesn't look correct though: the vhost-user response is skipped when
> > vhost_user_backend_handle_shared_object_lookup() returns non-zero and
> > the return value in the payload is replaced with 0 in the -EINVAL and
> > vhost_user_get_shared_object() failure cases.
> 
> What vhost_user_backend_handle_shared_object_lookup() returns, depends
> basically on whether vhost_user_backend_send_dmabuf_fd() succeded or
> not to send a reponse. What you described is what determines the
> return type in the response. But as the comment states, the response
> is sent within the handler. When an error in sending the response
> occurs, we want to close the backend channel to be consistent with
> other messages. That is what this small fix does. On the other hand,
> when the response succeds then the VHOST_USER_NEED_REPLY_MASK flag is
> unset and the response here is skipped, going directly to the
> fdcleanup. I think the logic is correct.

I understand now, thanks!

> 
> >
> > I suggest dropping this and sending a separate patch (independently from
> > this series) that fixes VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP replies.
> 
> I mean, I can do that. It is true that is technically unrelated.

Yes, that way you can explain the reason for the change in the commit
message and it will be easier for reviewers to understand. Keeping
unrelated changes separate also makes backports easier (e.g. including
the fix in QEMU's stable branch or downstream distro packages).

> >
> > > +    case VHOST_USER_BACKEND_SHMEM_MAP:
> > > +        /* Handler manages its own response, check error and close 
> > > connection */
> >
> > This comment is not true: the response is only sent in the success case.
> > Errors do not result in a response being sent because goto err skips
> > vhost_user_send_resp().
> >
> > Please take a look and ensure that a response is sent in all cases.
> 
> OK. True, I mixed other handler's style of early returning with what
> should've been the fix for this. I need to set the return type instead
> and goto the reply when something breaks.
> 
> >
> > > +        if (vhost_user_backend_handle_shmem_map(dev, ioc, &hdr, &payload,
> > > +                                                fd ? fd[0] : -1) < 0) {
> > > +            goto err;
> > > +        }
> > > +        break;
> > > +    case VHOST_USER_BACKEND_SHMEM_UNMAP:
> > > +        /* Handler manages its own response, check error and close 
> > > connection */
> >
> > Same here.
> >
> > > +        if (vhost_user_backend_handle_shmem_unmap(dev, ioc, &hdr, 
> > > &payload) < 0) {
> > > +            goto err;
> > > +        }
> > >          break;
> > >      default:
> > >          error_report("Received unexpected msg type: %d.", hdr.request);
> > > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> > > index 9a81ad912e..ba5ffa58bd 100644
> > > --- a/hw/virtio/virtio.c
> > > +++ b/hw/virtio/virtio.c
> > > @@ -3045,6 +3045,181 @@ int virtio_save(VirtIODevice *vdev, QEMUFile *f)
> > >      return vmstate_save_state(f, &vmstate_virtio, vdev, NULL);
> > >  }
> > >
> > > +VirtioSharedMemory *virtio_new_shmem_region(VirtIODevice *vdev, uint8_t 
> > > shmid, uint64_t size)
> > > +{
> > > +    VirtioSharedMemory *elem;
> > > +    g_autofree char *name = NULL;
> > > +
> > > +    elem = g_new0(VirtioSharedMemory, 1);
> > > +    elem->shmid = shmid;
> > > +
> > > +    /* Initialize embedded MemoryRegion as container for shmem mappings 
> > > */
> > > +    name = g_strdup_printf("virtio-shmem-%d", shmid);
> > > +    memory_region_init(&elem->mr, OBJECT(vdev), name, size);
> > > +    QTAILQ_INIT(&elem->mmaps);
> > > +    QSIMPLEQ_INSERT_TAIL(&vdev->shmem_list, elem, entry);
> > > +    return elem;
> > > +}
> > > +
> > > +VirtioSharedMemory *virtio_find_shmem_region(VirtIODevice *vdev, uint8_t 
> > > shmid)
> > > +{
> > > +    VirtioSharedMemory *shmem, *next;
> > > +    QSIMPLEQ_FOREACH_SAFE(shmem, &vdev->shmem_list, entry, next) {
> > > +        if (shmem->shmid == shmid) {
> > > +            return shmem;
> > > +        }
> > > +    }
> > > +    return NULL;
> > > +}
> > > +
> > > +static void virtio_shared_memory_mapping_instance_init(Object *obj)
> > > +{
> > > +    VirtioSharedMemoryMapping *mapping = 
> > > VIRTIO_SHARED_MEMORY_MAPPING(obj);
> > > +
> > > +    mapping->shmid = 0;
> > > +    mapping->fd = -1;
> > > +    mapping->offset = 0;
> > > +    mapping->len = 0;
> > > +    mapping->mr = NULL;
> > > +}
> > > +
> > > +static void virtio_shared_memory_mapping_finalize(Object *obj)
> >
> > The naming is inconsistent, instance_init vs finalize:
> >   .instance_init = virtio_shared_memory_mapping_instance_init,
> >   .instance_finalize = virtio_shared_memory_mapping_finalize,
> >
> > virtio_shared_memory_mapping_instance_finalize() would be consistent.
> >
> > > +{
> > > +    VirtioSharedMemoryMapping *mapping = 
> > > VIRTIO_SHARED_MEMORY_MAPPING(obj);
> > > +
> > > +    /* Clean up MemoryRegion if it exists */
> > > +    if (mapping->mr) {
> > > +        /* Unparent the MemoryRegion to trigger cleanup */
> > > +        object_unparent(OBJECT(mapping->mr));
> > > +        mapping->mr = NULL;
> > > +    }
> > > +
> > > +    /* Close file descriptor */
> > > +    if (mapping->fd >= 0) {
> > > +        close(mapping->fd);
> > > +        mapping->fd = -1;
> > > +    }
> > > +}
> > > +
> > > +VirtioSharedMemoryMapping *virtio_shared_memory_mapping_new(uint8_t 
> > > shmid,
> > > +                                                            int fd,
> > > +                                                            uint64_t 
> > > fd_offset,
> > > +                                                            uint64_t 
> > > shm_offset,
> > > +                                                            uint64_t len,
> > > +                                                            bool 
> > > allow_write)
> > > +{
> > > +    VirtioSharedMemoryMapping *mapping;
> > > +    MemoryRegion *mr;
> > > +    g_autoptr(GString) mr_name = g_string_new(NULL);
> > > +    uint32_t ram_flags;
> > > +    Error *local_err = NULL;
> > > +
> > > +    if (len == 0) {
> > > +        error_report("Shared memory mapping size cannot be zero");
> > > +        return NULL;
> > > +    }
> > > +
> > > +    fd = dup(fd);
> > > +    if (fd < 0) {
> > > +        error_report("Failed to duplicate fd: %s", strerror(errno));
> > > +        return NULL;
> > > +    }
> > > +
> > > +    /* Determine RAM flags */
> > > +    ram_flags = RAM_SHARED;
> > > +    if (!allow_write) {
> > > +        ram_flags |= RAM_READONLY_FD;
> > > +    }
> > > +
> > > +    /* Create the VirtioSharedMemoryMapping */
> > > +    mapping = VIRTIO_SHARED_MEMORY_MAPPING(
> > > +        object_new(TYPE_VIRTIO_SHARED_MEMORY_MAPPING));
> > > +
> > > +    /* Set up object properties */
> > > +    mapping->shmid = shmid;
> > > +    mapping->fd = fd;
> > > +    mapping->offset = shm_offset;
> > > +    mapping->len = len;
> > > +
> > > +    /* Create MemoryRegion as a child of this object */
> > > +    mr = g_new0(MemoryRegion, 1);
> > > +    g_string_printf(mr_name, "virtio-shmem-%d-%" PRIx64, shmid, 
> > > shm_offset);
> > > +
> > > +    /* Initialize MemoryRegion with file descriptor */
> > > +    if (!memory_region_init_ram_from_fd(mr, OBJECT(mapping), 
> > > mr_name->str,
> > > +                                        len, ram_flags, fd, fd_offset,
> > > +                                        &local_err)) {
> > > +        error_report_err(local_err);
> > > +        g_free(mr);
> > > +        close(fd);
> > > +        object_unref(OBJECT(mapping));
> > > +        return NULL;
> > > +    }
> > > +
> > > +    mapping->mr = mr;
> > > +    return mapping;
> > > +}
> > > +
> > > +int virtio_add_shmem_map(VirtioSharedMemory *shmem,
> > > +                         VirtioSharedMemoryMapping *mapping)
> > > +{
> > > +    if (!mapping) {
> > > +        error_report("VirtioSharedMemoryMapping cannot be NULL");
> > > +        return -1;
> > > +    }
> > > +    if (!mapping->mr) {
> > > +        error_report("VirtioSharedMemoryMapping has no MemoryRegion");
> > > +        return -1;
> > > +    }
> > > +
> > > +    /* Validate boundaries against the VIRTIO shared memory region */
> > > +    if (mapping->offset + mapping->len > shmem->mr.size) {
> > > +        error_report("Memory exceeds the shared memory boundaries");
> > > +        return -1;
> > > +    }
> > > +
> > > +    /* Add as subregion to the VIRTIO shared memory */
> > > +    memory_region_add_subregion(&shmem->mr, mapping->offset, 
> > > mapping->mr);
> > > +
> > > +    /* Add to the mapped regions list */
> > > +    QTAILQ_INSERT_TAIL(&shmem->mmaps, mapping, link);
> > > +
> > > +    return 0;
> > > +}
> > > +
> > > +VirtioSharedMemoryMapping *virtio_find_shmem_map(VirtioSharedMemory 
> > > *shmem,
> > > +                                          hwaddr offset, uint64_t size)
> > > +{
> > > +    VirtioSharedMemoryMapping *mapping;
> > > +    QTAILQ_FOREACH(mapping, &shmem->mmaps, link) {
> > > +        if (mapping->offset == offset && mapping->mr->size == size) {
> > > +            return mapping;
> > > +        }
> > > +    }
> > > +    return NULL;
> > > +}
> > > +
> > > +void virtio_del_shmem_map(VirtioSharedMemory *shmem, hwaddr offset,
> > > +                          uint64_t size)
> > > +{
> > > +    VirtioSharedMemoryMapping *mapping = virtio_find_shmem_map(shmem, 
> > > offset, size);
> > > +    if (mapping == NULL) {
> > > +        return;
> > > +    }
> > > +
> > > +    /*
> > > +     * Remove from memory region first
> > > +     */
> > > +    memory_region_del_subregion(&shmem->mr, mapping->mr);
> >
> > There is an identical call in vhost_user_backend_handle_shmem_unmap().
> > It looks like memory_region_del_subregion()'s
> > assert(subregion->container == mr) would fail here when called from
> > vhost_user_backend_handle_shmem_unmap().
> >
> > How does this work?
> 
> Uhm. I was sure I tested this and spent some time trying to figure out
> the logic. But after all, I have to agree with you and tested again to
> make sure. And indeed it fails on that assertion.
> 
> So I think the memory_region_del_subregion() that is added in
> virtio_del_shmem_map() for this revision is necessary as
> virtio_del_shmem_map() is also called from
> virtio_device_instance_finalize() (and virtio_reset() with your next
> comment).

Sounds good.

> 
> So what I will try is to remove memory_region_del_subregion() from the
> vhost_user_backend_handle_shmem_unmap(), probably I can also remove
> the memory_region_transaction_* function calls. That was there for a
> few revisions now so I probably overlooked it when doing the latest
> changes.

By the way, I think I understand the deadlock that required
memory_region_transaction_commit() to come after the vhost-user reply is
sent:

hw/virtio/vhost.c has a MemoryListener that sends
VHOST_USER_SET_MEM_TABLE messages in its .commit() callback
(vhost_commit()). vhost-user backends might not expect a new message
from the frontend before the reply to the request on the backend
channel.

> >
> > > +
> > > +    /*
> > > +     * Remove from list and unref the mapping which will trigger 
> > > automatic cleanup
> > > +     * when the reference count reaches zero.
> > > +     */
> > > +    QTAILQ_REMOVE(&shmem->mmaps, mapping, link);
> > > +    object_unref(OBJECT(mapping));
> > > +}
> > > +
> > >  /* A wrapper for use as a VMState .put function */
> > >  static int virtio_device_put(QEMUFile *f, void *opaque, size_t size,
> > >                                const VMStateField *field, JSONWriter 
> > > *vmdesc)
> > > @@ -3521,6 +3696,7 @@ void virtio_init(VirtIODevice *vdev, uint16_t 
> > > device_id, size_t config_size)
> > >              NULL, virtio_vmstate_change, vdev);
> > >      vdev->device_endian = virtio_default_endian();
> > >      vdev->use_guest_notifier_mask = true;
> > > +    QSIMPLEQ_INIT(&vdev->shmem_list);
> > >  }
> > >
> > >  /*
> > > @@ -4032,11 +4208,24 @@ static void 
> > > virtio_device_free_virtqueues(VirtIODevice *vdev)
> > >  static void virtio_device_instance_finalize(Object *obj)
> > >  {
> > >      VirtIODevice *vdev = VIRTIO_DEVICE(obj);
> > > +    VirtioSharedMemory *shmem;
> > >
> > >      virtio_device_free_virtqueues(vdev);
> > >
> > >      g_free(vdev->config);
> > >      g_free(vdev->vector_queues);
> > > +    while (!QSIMPLEQ_EMPTY(&vdev->shmem_list)) {
> > > +        shmem = QSIMPLEQ_FIRST(&vdev->shmem_list);
> > > +        while (!QTAILQ_EMPTY(&shmem->mmaps)) {
> > > +            VirtioSharedMemoryMapping *mapping = 
> > > QTAILQ_FIRST(&shmem->mmaps);
> > > +            virtio_del_shmem_map(shmem, mapping->offset, 
> > > mapping->mr->size);
> > > +        }
> > > +
> > > +        /* Clean up the embedded MemoryRegion */
> > > +        object_unparent(OBJECT(&shmem->mr));
> > > +        QSIMPLEQ_REMOVE_HEAD(&vdev->shmem_list, entry);
> > > +        g_free(shmem);
> > > +    }
> > >  }
> >
> > The semantics across device reset also need to be defined. I think
> > mappings should be deleted when the device is reset, but the VIRTIO
> > Shared Memory Regions should remain. The reason I think this behavior
> > makes sense is that it prevents stale fds remaining open and mapped into
> > the guest (e.g. GPU or virtiofs resources). It seems safer than relying
> > on the backend to unmap explicitly.
> 
> Got it.
> 
> >
> > >
> > >  static const Property virtio_properties[] = {
> > > @@ -4402,9 +4591,18 @@ static const TypeInfo virtio_device_info = {
> > >      .class_size = sizeof(VirtioDeviceClass),
> > >  };
> > >
> > > +static const TypeInfo virtio_shared_memory_mapping_info = {
> > > +    .name = TYPE_VIRTIO_SHARED_MEMORY_MAPPING,
> > > +    .parent = TYPE_OBJECT,
> > > +    .instance_size = sizeof(VirtioSharedMemoryMapping),
> > > +    .instance_init = virtio_shared_memory_mapping_instance_init,
> > > +    .instance_finalize = virtio_shared_memory_mapping_finalize,
> > > +};
> > > +
> > >  static void virtio_register_types(void)
> > >  {
> > >      type_register_static(&virtio_device_info);
> > > +    type_register_static(&virtio_shared_memory_mapping_info);
> > >  }
> > >
> > >  type_init(virtio_register_types)
> > > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
> > > index c594764f23..8cecb221cb 100644
> > > --- a/include/hw/virtio/virtio.h
> > > +++ b/include/hw/virtio/virtio.h
> > > @@ -98,6 +98,46 @@ enum virtio_device_endian {
> > >      VIRTIO_DEVICE_ENDIAN_BIG,
> > >  };
> > >
> > > +#define TYPE_VIRTIO_SHARED_MEMORY_MAPPING "virtio-shared-memory-mapping"
> > > +OBJECT_DECLARE_SIMPLE_TYPE(VirtioSharedMemoryMapping, 
> > > VIRTIO_SHARED_MEMORY_MAPPING)
> > > +
> > > +/**
> > > + * VirtioSharedMemoryMapping:
> > > + * @parent: Parent QOM object
> > > + * @shmid: VIRTIO Shared Memory Region ID
> > > + * @fd: File descriptor for the shared memory region
> > > + * @offset: Offset within the VIRTIO Shared Memory Region
> > > + * @len: Size of the mapping
> > > + * @mr: MemoryRegion associated with this shared memory mapping
> > > + * @link: List entry for the shared memory region's mapping list
> > > + *
> > > + * A QOM object that represents an individual file descriptor-based 
> > > shared
> > > + * memory mapping within a VIRTIO Shared Memory Region. It manages the
> > > + * MemoryRegion lifecycle and file descriptor cleanup through QOM 
> > > reference
> > > + * counting. When the object is unreferenced and its reference count 
> > > drops
> > > + * to zero, it automatically cleans up the MemoryRegion and closes the 
> > > file
> > > + * descriptor.
> > > + */
> > > +struct VirtioSharedMemoryMapping {
> > > +    Object parent;
> > > +
> > > +    uint8_t shmid;
> > > +    int fd;
> > > +    hwaddr offset;
> > > +    uint64_t len;
> > > +    MemoryRegion *mr;
> > > +    QTAILQ_ENTRY(VirtioSharedMemoryMapping) link;
> > > +};
> > > +
> > > +struct VirtioSharedMemory {
> > > +    uint8_t shmid;
> > > +    MemoryRegion mr;
> > > +    QTAILQ_HEAD(, VirtioSharedMemoryMapping) mmaps;
> > > +    QSIMPLEQ_ENTRY(VirtioSharedMemory) entry;
> > > +};
> > > +
> > > +typedef struct VirtioSharedMemory VirtioSharedMemory;
> > > +
> > >  /**
> > >   * struct VirtIODevice - common VirtIO structure
> > >   * @name: name of the device
> > > @@ -167,6 +207,8 @@ struct VirtIODevice
> > >       */
> > >      EventNotifier config_notifier;
> > >      bool device_iotlb_enabled;
> > > +    /* Shared memory region for mappings. */
> > > +    QSIMPLEQ_HEAD(, VirtioSharedMemory) shmem_list;
> > >  };
> > >
> > >  struct VirtioDeviceClass {
> > > @@ -295,6 +337,100 @@ void virtio_notify(VirtIODevice *vdev, VirtQueue 
> > > *vq);
> > >
> > >  int virtio_save(VirtIODevice *vdev, QEMUFile *f);
> > >
> > > +/**
> > > + * virtio_new_shmem_region() - Create a new shared memory region
> > > + * @vdev: VirtIODevice
> > > + * @shmid: Shared memory ID
> > > + * @size: Size of the shared memory region
> > > + *
> > > + * Creates a new VirtioSharedMemory region for the given device and ID.
> > > + * The returned VirtioSharedMemory is owned by the VirtIODevice and will
> > > + * be automatically freed when the device is destroyed. The caller
> > > + * should not free the returned pointer.
> > > + *
> > > + * Returns: Pointer to the new VirtioSharedMemory region, or NULL on 
> > > failure
> > > + */
> > > +VirtioSharedMemory *virtio_new_shmem_region(VirtIODevice *vdev, uint8_t 
> > > shmid, uint64_t size);
> > > +
> > > +/**
> > > + * virtio_find_shmem_region() - Find an existing shared memory region
> > > + * @vdev: VirtIODevice
> > > + * @shmid: Shared memory ID to find
> > > + *
> > > + * Finds an existing VirtioSharedMemory region by ID. The returned 
> > > pointer
> > > + * is owned by the VirtIODevice and should not be freed by the caller.
> > > + *
> > > + * Returns: Pointer to the VirtioSharedMemory region, or NULL if not 
> > > found
> > > + */
> > > +VirtioSharedMemory *virtio_find_shmem_region(VirtIODevice *vdev, uint8_t 
> > > shmid);
> > > +
> > > +/**
> > > + * virtio_shared_memory_mapping_new() - Create a new 
> > > VirtioSharedMemoryMapping
> > > + * @shmid: VIRTIO Shared Memory Region ID
> > > + * @fd: File descriptor for the shared memory
> > > + * @fd_offset: Offset within the file descriptor
> > > + * @shm_offset: Offset within the VIRTIO Shared Memory Region
> > > + * @len: Size of the mapping
> > > + * @allow_write: Whether to allow write access to the mapping
> > > + *
> > > + * Creates a new VirtioSharedMemoryMapping that manages a shared memory 
> > > mapping.
> > > + * The object will create a MemoryRegion using 
> > > memory_region_init_ram_from_fd()
> > > + * as a child object. When the object is finalized, it will automatically
> > > + * clean up the MemoryRegion and close the file descriptor.
> > > + *
> > > + * Return: A new VirtioSharedMemoryMapping on success, NULL on error.
> > > + */
> > > +VirtioSharedMemoryMapping *virtio_shared_memory_mapping_new(uint8_t 
> > > shmid,
> > > +                                                            int fd,
> > > +                                                            uint64_t 
> > > fd_offset,
> > > +                                                            uint64_t 
> > > shm_offset,
> > > +                                                            uint64_t len,
> > > +                                                            bool 
> > > allow_write);
> > > +
> > > +/**
> > > + * virtio_add_shmem_map() - Add a memory mapping to a shared region
> > > + * @shmem: VirtioSharedMemory region
> > > + * @mapping: VirtioSharedMemoryMapping to add (transfers ownership)
> > > + *
> > > + * Adds a memory mapping to the shared memory region. The 
> > > VirtioSharedMemoryMapping
> > > + * ownership is transferred to the shared memory region and will be 
> > > automatically
> > > + * cleaned up through QOM reference counting when virtio_del_shmem_map() 
> > > is
> > > + * called or when the shared memory region is destroyed.
> > > + *
> > > + * Returns: 0 on success, negative errno on failure
> > > + */
> > > +int virtio_add_shmem_map(VirtioSharedMemory *shmem,
> > > +                         VirtioSharedMemoryMapping *mapping);
> > > +
> > > +/**
> > > + * virtio_find_shmem_map() - Find a memory mapping in a shared region
> > > + * @shmem: VirtioSharedMemory region
> > > + * @offset: Offset within the shared memory region
> > > + * @size: Size of the mapping to find
> > > + *
> > > + * Finds an existing memory mapping that covers the specified range.
> > > + * The returned VirtioSharedMemoryMapping is owned by the 
> > > VirtioSharedMemory
> > > + * region and should not be freed by the caller.
> > > + *
> > > + * Returns: Pointer to the VirtioSharedMemoryMapping, or NULL if not 
> > > found
> > > + */
> > > +VirtioSharedMemoryMapping *virtio_find_shmem_map(VirtioSharedMemory 
> > > *shmem,
> > > +                                          hwaddr offset, uint64_t size);
> > > +
> > > +/**
> > > + * virtio_del_shmem_map() - Remove a memory mapping from a shared region
> > > + * @shmem: VirtioSharedMemory region
> > > + * @offset: Offset of the mapping to remove
> > > + * @size: Size of the mapping to remove
> > > + *
> > > + * Removes a memory mapping from the shared memory region. This will
> > > + * automatically unref the associated VhostUserShmemObject, which may
> > > + * trigger its finalization and cleanup if no other references exist.
> > > + * The mapping's MemoryRegion will be properly unmapped and cleaned up.
> > > + */
> > > +void virtio_del_shmem_map(VirtioSharedMemory *shmem, hwaddr offset,
> > > +                          uint64_t size);
> > > +
> > >  extern const VMStateInfo virtio_vmstate_info;
> > >
> > >  #define VMSTATE_VIRTIO_DEVICE \
> > > diff --git a/subprojects/libvhost-user/libvhost-user.c 
> > > b/subprojects/libvhost-user/libvhost-user.c
> > > index 9c630c2170..034cbfdc3c 100644
> > > --- a/subprojects/libvhost-user/libvhost-user.c
> > > +++ b/subprojects/libvhost-user/libvhost-user.c
> > > @@ -1592,6 +1592,76 @@ vu_rm_shared_object(VuDev *dev, unsigned char 
> > > uuid[UUID_LEN])
> > >      return vu_send_message(dev, &msg);
> > >  }
> > >
> > > +bool
> > > +vu_shmem_map(VuDev *dev, uint8_t shmid, uint64_t fd_offset,
> > > +             uint64_t shm_offset, uint64_t len, uint64_t flags, int fd)
> > > +{
> > > +    VhostUserMsg vmsg = {
> > > +        .request = VHOST_USER_BACKEND_SHMEM_MAP,
> > > +        .size = sizeof(vmsg.payload.mmap),
> > > +        .flags = VHOST_USER_VERSION,
> > > +        .payload.mmap = {
> > > +            .shmid = shmid,
> > > +            .fd_offset = fd_offset,
> > > +            .shm_offset = shm_offset,
> > > +            .len = len,
> > > +            .flags = flags,
> > > +        },
> > > +        .fd_num = 1,
> > > +        .fds[0] = fd,
> > > +    };
> > > +
> > > +    if (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_SHMEM)) {
> > > +        return false;
> > > +    }
> > > +
> > > +    if (vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_REPLY_ACK)) {
> > > +        vmsg.flags |= VHOST_USER_NEED_REPLY_MASK;
> > > +    }
> > > +
> > > +    pthread_mutex_lock(&dev->backend_mutex);
> > > +    if (!vu_message_write(dev, dev->backend_fd, &vmsg)) {
> > > +        pthread_mutex_unlock(&dev->backend_mutex);
> > > +        return false;
> > > +    }
> > > +
> > > +    /* Also unlocks the backend_mutex */
> > > +    return vu_process_message_reply(dev, &vmsg);
> > > +}
> > > +
> > > +bool
> > > +vu_shmem_unmap(VuDev *dev, uint8_t shmid, uint64_t shm_offset, uint64_t 
> > > len)
> > > +{
> > > +    VhostUserMsg vmsg = {
> > > +        .request = VHOST_USER_BACKEND_SHMEM_UNMAP,
> > > +        .size = sizeof(vmsg.payload.mmap),
> > > +        .flags = VHOST_USER_VERSION,
> > > +        .payload.mmap = {
> > > +            .shmid = shmid,
> > > +            .fd_offset = 0,
> > > +            .shm_offset = shm_offset,
> > > +            .len = len,
> > > +        },
> > > +    };
> > > +
> > > +    if (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_SHMEM)) {
> > > +        return false;
> > > +    }
> > > +
> > > +    if (vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_REPLY_ACK)) {
> > > +        vmsg.flags |= VHOST_USER_NEED_REPLY_MASK;
> > > +    }
> > > +
> > > +    pthread_mutex_lock(&dev->backend_mutex);
> > > +    if (!vu_message_write(dev, dev->backend_fd, &vmsg)) {
> > > +        pthread_mutex_unlock(&dev->backend_mutex);
> > > +        return false;
> > > +    }
> > > +
> > > +    /* Also unlocks the backend_mutex */
> > > +    return vu_process_message_reply(dev, &vmsg);
> > > +}
> > > +
> > >  static bool
> > >  vu_set_vring_call_exec(VuDev *dev, VhostUserMsg *vmsg)
> > >  {
> > > diff --git a/subprojects/libvhost-user/libvhost-user.h 
> > > b/subprojects/libvhost-user/libvhost-user.h
> > > index 2ffc58c11b..26b710c92d 100644
> > > --- a/subprojects/libvhost-user/libvhost-user.h
> > > +++ b/subprojects/libvhost-user/libvhost-user.h
> > > @@ -69,6 +69,8 @@ enum VhostUserProtocolFeature {
> > >      /* Feature 16 is reserved for VHOST_USER_PROTOCOL_F_STATUS. */
> > >      /* Feature 17 reserved for VHOST_USER_PROTOCOL_F_XEN_MMAP. */
> > >      VHOST_USER_PROTOCOL_F_SHARED_OBJECT = 18,
> > > +    /* Feature 19 is reserved for VHOST_USER_PROTOCOL_F_DEVICE_STATE */
> > > +    VHOST_USER_PROTOCOL_F_SHMEM = 20,
> > >      VHOST_USER_PROTOCOL_F_MAX
> > >  };
> > >
> > > @@ -127,6 +129,8 @@ typedef enum VhostUserBackendRequest {
> > >      VHOST_USER_BACKEND_SHARED_OBJECT_ADD = 6,
> > >      VHOST_USER_BACKEND_SHARED_OBJECT_REMOVE = 7,
> > >      VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP = 8,
> > > +    VHOST_USER_BACKEND_SHMEM_MAP = 9,
> > > +    VHOST_USER_BACKEND_SHMEM_UNMAP = 10,
> > >      VHOST_USER_BACKEND_MAX
> > >  }  VhostUserBackendRequest;
> > >
> > > @@ -186,6 +190,23 @@ typedef struct VhostUserShared {
> > >      unsigned char uuid[UUID_LEN];
> > >  } VhostUserShared;
> > >
> > > +/* For the flags field of VhostUserMMap */
> > > +#define VHOST_USER_FLAG_MAP_RW (1u << 0)
> > > +
> > > +typedef struct {
> > > +    /* VIRTIO Shared Memory Region ID */
> > > +    uint8_t shmid;
> > > +    uint8_t padding[7];
> > > +    /* File offset */
> > > +    uint64_t fd_offset;
> > > +    /* Offset within the VIRTIO Shared Memory Region */
> > > +    uint64_t shm_offset;
> > > +    /* Size of the mapping */
> > > +    uint64_t len;
> > > +    /* Flags for the mmap operation, from VHOST_USER_FLAG_MAP_* */
> > > +    uint16_t flags;
> > > +} VhostUserMMap;
> > > +
> > >  #define VU_PACKED __attribute__((packed))
> > >
> > >  typedef struct VhostUserMsg {
> > > @@ -210,6 +231,7 @@ typedef struct VhostUserMsg {
> > >          VhostUserVringArea area;
> > >          VhostUserInflight inflight;
> > >          VhostUserShared object;
> > > +        VhostUserMMap mmap;
> > >      } payload;
> > >
> > >      int fds[VHOST_MEMORY_BASELINE_NREGIONS];
> > > @@ -593,6 +615,38 @@ bool vu_add_shared_object(VuDev *dev, unsigned char 
> > > uuid[UUID_LEN]);
> > >   */
> > >  bool vu_rm_shared_object(VuDev *dev, unsigned char uuid[UUID_LEN]);
> > >
> > > +/**
> > > + * vu_shmem_map:
> > > + * @dev: a VuDev context
> > > + * @shmid: VIRTIO Shared Memory Region ID
> > > + * @fd_offset: File offset
> > > + * @shm_offset: Offset within the VIRTIO Shared Memory Region
> > > + * @len: Size of the mapping
> > > + * @flags: Flags for the mmap operation
> > > + * @fd: A file descriptor
> > > + *
> > > + * Advertises a new mapping to be made in a given VIRTIO Shared Memory 
> > > Region.
> > > + *
> > > + * Returns: TRUE on success, FALSE on failure.
> > > + */
> > > +bool vu_shmem_map(VuDev *dev, uint8_t shmid, uint64_t fd_offset,
> > > +                  uint64_t shm_offset, uint64_t len, uint64_t flags, int 
> > > fd);
> > > +
> > > +/**
> > > + * vu_shmem_unmap:
> > > + * @dev: a VuDev context
> > > + * @shmid: VIRTIO Shared Memory Region ID
> > > + * @fd_offset: File offset
> > > + * @len: Size of the mapping
> > > + *
> > > + * The front-end un-mmaps a given range in the VIRTIO Shared Memory 
> > > Region
> > > + * with the requested `shmid`.
> > > + *
> > > + * Returns: TRUE on success, FALSE on failure.
> > > + */
> > > +bool vu_shmem_unmap(VuDev *dev, uint8_t shmid, uint64_t shm_offset,
> > > +                    uint64_t len);
> > > +
> > >  /**
> > >   * vu_queue_set_notification:
> > >   * @dev: a VuDev context
> > > --
> > > 2.49.0
> > >
> 

Attachment: signature.asc
Description: PGP signature

Reply via email to