On Tue, Oct 14, 2025 at 5:50 PM Stefan Hajnoczi <[email protected]> wrote:
>
> On Tue, Oct 14, 2025 at 03:52:17PM +0200, Albert Esteve wrote:
> > On Mon, Sep 29, 2025 at 8:39 PM Stefan Hajnoczi <[email protected]> wrote:
> > >
> > > On Wed, Sep 10, 2025 at 01:54:14PM +0200, Albert Esteve wrote:
> > > > Add SHMEM_MAP/UNMAP requests to vhost-user for dynamic management of
> > > > VIRTIO Shared Memory mappings.
> > > >
> > > > This implementation introduces VirtioSharedMemoryMapping as a unified
> > > > QOM object that manages both the mapping metadata and MemoryRegion
> > > > lifecycle. This object provides reference-counted lifecycle management
> > > > with automatic cleanup of file descriptors and memory regions
> > > > through QOM finalization.
> > > >
> > > > This request allows backends to dynamically map file descriptors into a
> > > > VIRTIO Shared Memory Region identified by their shmid. Maps are created
> > > > using memory_region_init_ram_from_fd() with configurable read/write
> > > > permissions, and the resulting MemoryRegions are added as subregions to
> > > > the shmem container region. The mapped memory is then advertised to the
> > > > guest VIRTIO drivers as a base address plus offset for reading and
> > > > writting according to the requested mmap flags.
> > > >
> > > > The backend can unmap memory ranges within a given VIRTIO Shared Memory
> > > > Region to free resources. Upon receiving this message, the frontend
> > > > removes the MemoryRegion as a subregion and automatically unreferences
> > > > the VirtioSharedMemoryMapping object, triggering cleanup if no other
> > > > references exist.
> > > >
> > > > Error handling has been improved to ensure consistent behavior across
> > > > handlers that manage their own vhost_user_send_resp() calls. Since
> > > > these handlers clear the VHOST_USER_NEED_REPLY_MASK flag, explicit
> > > > error checking ensures proper connection closure on failures,
> > > > maintaining the expected error flow.
> > > >
> > > > Note the memory region commit for these operations needs to be delayed
> > > > until after we respond to the backend to avoid deadlocks.
> > > >
> > > > Signed-off-by: Albert Esteve <[email protected]>
> > > > ---
> > > >  hw/virtio/vhost-user.c                    | 206 +++++++++++++++++++++-
> > > >  hw/virtio/virtio.c                        | 198 +++++++++++++++++++++
> > > >  include/hw/virtio/virtio.h                | 136 ++++++++++++++
> > > >  subprojects/libvhost-user/libvhost-user.c |  70 ++++++++
> > > >  subprojects/libvhost-user/libvhost-user.h |  54 ++++++
> > > >  5 files changed, 662 insertions(+), 2 deletions(-)
> > > >
> > > > diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
> > > > index 1e1d6b0d6e..4783b1904b 100644
> > > > --- a/hw/virtio/vhost-user.c
> > > > +++ b/hw/virtio/vhost-user.c
> > > > @@ -115,6 +115,8 @@ typedef enum VhostUserBackendRequest {
> > > >      VHOST_USER_BACKEND_SHARED_OBJECT_ADD = 6,
> > > >      VHOST_USER_BACKEND_SHARED_OBJECT_REMOVE = 7,
> > > >      VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP = 8,
> > > > +    VHOST_USER_BACKEND_SHMEM_MAP = 9,
> > > > +    VHOST_USER_BACKEND_SHMEM_UNMAP = 10,
> > > >      VHOST_USER_BACKEND_MAX
> > > >  }  VhostUserBackendRequest;
> > > >
> > > > @@ -192,6 +194,23 @@ typedef struct VhostUserShared {
> > > >      unsigned char uuid[16];
> > > >  } VhostUserShared;
> > > >
> > > > +/* For the flags field of VhostUserMMap */
> > > > +#define VHOST_USER_FLAG_MAP_RW (1u << 0)
> > > > +
> > > > +typedef struct {
> > > > +    /* VIRTIO Shared Memory Region ID */
> > > > +    uint8_t shmid;
> > > > +    uint8_t padding[7];
> > > > +    /* File offset */
> > > > +    uint64_t fd_offset;
> > > > +    /* Offset within the VIRTIO Shared Memory Region */
> > > > +    uint64_t shm_offset;
> > > > +    /* Size of the mapping */
> > > > +    uint64_t len;
> > > > +    /* Flags for the mmap operation, from VHOST_USER_FLAG_MAP_* */
> > > > +    uint16_t flags;
> > > > +} VhostUserMMap;
> > > > +
> > > >  typedef struct {
> > > >      VhostUserRequest request;
> > > >
> > > > @@ -224,6 +243,7 @@ typedef union {
> > > >          VhostUserInflight inflight;
> > > >          VhostUserShared object;
> > > >          VhostUserTransferDeviceState transfer_state;
> > > > +        VhostUserMMap mmap;
> > > >  } VhostUserPayload;
> > > >
> > > >  typedef struct VhostUserMsg {
> > > > @@ -1768,6 +1788,172 @@ 
> > > > vhost_user_backend_handle_shared_object_lookup(struct vhost_user *u,
> > > >      return 0;
> > > >  }
> > > >
> > > > +/**
> > > > + * vhost_user_backend_handle_shmem_map() - Handle SHMEM_MAP backend 
> > > > request
> > > > + * @dev: vhost device
> > > > + * @ioc: QIOChannel for communication
> > > > + * @hdr: vhost-user message header
> > > > + * @payload: message payload containing mapping details
> > > > + * @fd: file descriptor for the shared memory region
> > > > + *
> > > > + * Handles VHOST_USER_BACKEND_SHMEM_MAP requests from the backend. 
> > > > Creates
> > > > + * a VhostUserShmemObject to manage the shared memory mapping and adds 
> > > > it
> > > > + * to the appropriate VirtIO shared memory region. The 
> > > > VhostUserShmemObject
> > > > + * serves as an intermediate parent for the MemoryRegion, ensuring 
> > > > proper
> > > > + * lifecycle management with reference counting.
> > > > + *
> > > > + * Returns: 0 on success, negative errno on failure
> > > > + */
> > > > +static int
> > > > +vhost_user_backend_handle_shmem_map(struct vhost_dev *dev,
> > > > +                                    QIOChannel *ioc,
> > > > +                                    VhostUserHeader *hdr,
> > > > +                                    VhostUserPayload *payload,
> > > > +                                    int fd)
> > > > +{
> > > > +    VirtioSharedMemory *shmem;
> > > > +    VhostUserMMap *vu_mmap = &payload->mmap;
> > > > +    Error *local_err = NULL;
> > > > +    g_autoptr(GString) shm_name = g_string_new(NULL);
> > > > +
> > > > +    if (fd < 0) {
> > > > +        error_report("Bad fd for map");
> > > > +        return -EBADF;
> > > > +    }
> > > > +
> > > > +    if (QSIMPLEQ_EMPTY(&dev->vdev->shmem_list)) {
> > > > +        error_report("Device has no VIRTIO Shared Memory Regions. "
> > > > +                     "Requested ID: %d", vu_mmap->shmid);
> > > > +        return -EFAULT;
> > > > +    }
> > > > +
> > > > +    shmem = virtio_find_shmem_region(dev->vdev, vu_mmap->shmid);
> > > > +    if (!shmem) {
> > > > +        error_report("VIRTIO Shared Memory Region at "
> > > > +                     "ID %d not found or unitialized", vu_mmap->shmid);
> > >
> > > uninitialized
> > >
> > > > +        return -EFAULT;
> > > > +    }
> > > > +
> > > > +    if ((vu_mmap->shm_offset + vu_mmap->len) < vu_mmap->len ||
> > > > +        (vu_mmap->shm_offset + vu_mmap->len) > shmem->mr.size) {
> > > > +        error_report("Bad offset/len for mmap %" PRIx64 "+%" PRIx64,
> > > > +                     vu_mmap->shm_offset, vu_mmap->len);
> > > > +        return -EFAULT;
> > > > +    }
> > > > +
> > > > +    g_string_printf(shm_name, "virtio-shm%i-%lu",
> > > > +                    vu_mmap->shmid, vu_mmap->shm_offset);
> > > > +
> > > > +    memory_region_transaction_begin();
> > > > +
> > > > +    /* Create VirtioSharedMemoryMapping object */
> > > > +    VirtioSharedMemoryMapping *mapping = 
> > > > virtio_shared_memory_mapping_new(
> > > > +        vu_mmap->shmid, fd, vu_mmap->fd_offset, vu_mmap->shm_offset,
> > > > +        vu_mmap->len, vu_mmap->flags & VHOST_USER_FLAG_MAP_RW);
> > > > +
> > > > +    if (!mapping) {
> > > > +        memory_region_transaction_commit();
> > > > +        return -EFAULT;
> > > > +    }
> > > > +
> > > > +    /* Add the mapping to the shared memory region */
> > > > +    if (virtio_add_shmem_map(shmem, mapping) != 0) {
> > > > +        error_report("Failed to add shared memory mapping");
> > > > +        object_unref(OBJECT(mapping));
> > > > +        memory_region_transaction_commit();
> > > > +        return -EFAULT;
> > > > +    }
> > > > +
> > > > +    if (hdr->flags & VHOST_USER_NEED_REPLY_MASK) {
> > > > +        payload->u64 = 0;
> > > > +        hdr->size = sizeof(payload->u64);
> > > > +        vhost_user_send_resp(ioc, hdr, payload, &local_err);
> > > > +        if (local_err) {
> > > > +            error_report_err(local_err);
> > > > +            memory_region_transaction_commit();
> > > > +            return -EFAULT;
> > > > +        }
> > > > +    }
> > > > +
> > > > +    memory_region_transaction_commit();
> > > > +
> > > > +    return 0;
> > > > +}
> > > > +
> > > > +/**
> > > > + * vhost_user_backend_handle_shmem_unmap() - Handle SHMEM_UNMAP 
> > > > backend request
> > > > + * @dev: vhost device
> > > > + * @ioc: QIOChannel for communication
> > > > + * @hdr: vhost-user message header
> > > > + * @payload: message payload containing unmapping details
> > > > + *
> > > > + * Handles VHOST_USER_BACKEND_SHMEM_UNMAP requests from the backend. 
> > > > Removes
> > > > + * the specified memory mapping from the VirtIO shared memory region. 
> > > > This
> > > > + * automatically unreferences the associated VhostUserShmemObject, 
> > > > which may
> > > > + * trigger its finalization and cleanup (munmap, close fd) if no other
> > > > + * references exist.
> > > > + *
> > > > + * Returns: 0 on success, negative errno on failure
> > > > + */
> > > > +static int
> > > > +vhost_user_backend_handle_shmem_unmap(struct vhost_dev *dev,
> > > > +                                      QIOChannel *ioc,
> > > > +                                      VhostUserHeader *hdr,
> > > > +                                      VhostUserPayload *payload)
> > > > +{
> > > > +    VirtioSharedMemory *shmem;
> > > > +    VirtioSharedMemoryMapping *mmap = NULL;
> > > > +    VhostUserMMap *vu_mmap = &payload->mmap;
> > > > +    Error *local_err = NULL;
> > > > +
> > > > +    if (QSIMPLEQ_EMPTY(&dev->vdev->shmem_list)) {
> > > > +        error_report("Device has no VIRTIO Shared Memory Regions. "
> > > > +                     "Requested ID: %d", vu_mmap->shmid);
> > > > +        return -EFAULT;
> > > > +    }
> > > > +
> > > > +    shmem = virtio_find_shmem_region(dev->vdev, vu_mmap->shmid);
> > > > +    if (!shmem) {
> > > > +        error_report("VIRTIO Shared Memory Region at "
> > > > +                     "ID %d not found or unitialized", vu_mmap->shmid);
> > >
> > > uninitialized
> > >
> > > > +        return -EFAULT;
> > > > +    }
> > > > +
> > > > +    if ((vu_mmap->shm_offset + vu_mmap->len) < vu_mmap->len ||
> > > > +        (vu_mmap->shm_offset + vu_mmap->len) > shmem->mr.size) {
> > > > +        error_report("Bad offset/len for unmmap %" PRIx64 "+%" PRIx64,
> > > > +                     vu_mmap->shm_offset, vu_mmap->len);
> > > > +        return -EFAULT;
> > > > +    }
> > > > +
> > > > +    mmap = virtio_find_shmem_map(shmem, vu_mmap->shm_offset, 
> > > > vu_mmap->len);
> > > > +    if (!mmap) {
> > > > +        error_report("Shared memory mapping not found at offset %" 
> > > > PRIx64
> > > > +                     " with length %" PRIx64,
> > > > +                     vu_mmap->shm_offset, vu_mmap->len);
> > > > +        return -EFAULT;
> > > > +    }
> > > > +
> > > > +    memory_region_transaction_begin();
> > > > +    memory_region_del_subregion(&shmem->mr, mmap->mr);
> > > > +    if (hdr->flags & VHOST_USER_NEED_REPLY_MASK) {
> > > > +        payload->u64 = 0;
> > > > +        hdr->size = sizeof(payload->u64);
> > > > +        vhost_user_send_resp(ioc, hdr, payload, &local_err);
> > > > +        if (local_err) {
> > > > +            error_report_err(local_err);
> > > > +            memory_region_transaction_commit();
> > > > +            return -EFAULT;
> > > > +        }
> > > > +    }
> > > > +    memory_region_transaction_commit();
> > > > +
> > > > +    /* Free the MemoryRegion only after vhost_commit */
> > > > +    virtio_del_shmem_map(shmem, vu_mmap->shm_offset, vu_mmap->len);
> > > > +
> > > > +    return 0;
> > > > +}
> > > > +
> > > >  static void close_backend_channel(struct vhost_user *u)
> > > >  {
> > > >      g_source_destroy(u->backend_src);
> > > > @@ -1833,8 +2019,24 @@ static gboolean backend_read(QIOChannel *ioc, 
> > > > GIOCondition condition,
> > > >                                                               
> > > > &payload.object);
> > > >          break;
> > > >      case VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP:
> > > > -        ret = 
> > > > vhost_user_backend_handle_shared_object_lookup(dev->opaque, ioc,
> > > > -                                                             &hdr, 
> > > > &payload);
> > > > +        /* Handler manages its own response, check error and close 
> > > > connection */
> > > > +        if 
> > > > (vhost_user_backend_handle_shared_object_lookup(dev->opaque, ioc,
> > > > +                                                           &hdr, 
> > > > &payload) < 0) {
> > > > +            goto err;
> > > > +        }
> > > > +        break;
> > >
> > > This looks like a bug fix that is unrelated to this patch. The change
> > > doesn't look correct though: the vhost-user response is skipped when
> > > vhost_user_backend_handle_shared_object_lookup() returns non-zero and
> > > the return value in the payload is replaced with 0 in the -EINVAL and
> > > vhost_user_get_shared_object() failure cases.
> >
> > What vhost_user_backend_handle_shared_object_lookup() returns, depends
> > basically on whether vhost_user_backend_send_dmabuf_fd() succeded or
> > not to send a reponse. What you described is what determines the
> > return type in the response. But as the comment states, the response
> > is sent within the handler. When an error in sending the response
> > occurs, we want to close the backend channel to be consistent with
> > other messages. That is what this small fix does. On the other hand,
> > when the response succeds then the VHOST_USER_NEED_REPLY_MASK flag is
> > unset and the response here is skipped, going directly to the
> > fdcleanup. I think the logic is correct.
>
> I understand now, thanks!
>
> >
> > >
> > > I suggest dropping this and sending a separate patch (independently from
> > > this series) that fixes VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP replies.
> >
> > I mean, I can do that. It is true that is technically unrelated.
>
> Yes, that way you can explain the reason for the change in the commit
> message and it will be easier for reviewers to understand. Keeping
> unrelated changes separate also makes backports easier (e.g. including
> the fix in QEMU's stable branch or downstream distro packages).
>
> > >
> > > > +    case VHOST_USER_BACKEND_SHMEM_MAP:
> > > > +        /* Handler manages its own response, check error and close 
> > > > connection */
> > >
> > > This comment is not true: the response is only sent in the success case.
> > > Errors do not result in a response being sent because goto err skips
> > > vhost_user_send_resp().
> > >
> > > Please take a look and ensure that a response is sent in all cases.
> >
> > OK. True, I mixed other handler's style of early returning with what
> > should've been the fix for this. I need to set the return type instead
> > and goto the reply when something breaks.
> >
> > >
> > > > +        if (vhost_user_backend_handle_shmem_map(dev, ioc, &hdr, 
> > > > &payload,
> > > > +                                                fd ? fd[0] : -1) < 0) {
> > > > +            goto err;
> > > > +        }
> > > > +        break;
> > > > +    case VHOST_USER_BACKEND_SHMEM_UNMAP:
> > > > +        /* Handler manages its own response, check error and close 
> > > > connection */
> > >
> > > Same here.
> > >
> > > > +        if (vhost_user_backend_handle_shmem_unmap(dev, ioc, &hdr, 
> > > > &payload) < 0) {
> > > > +            goto err;
> > > > +        }
> > > >          break;
> > > >      default:
> > > >          error_report("Received unexpected msg type: %d.", hdr.request);
> > > > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> > > > index 9a81ad912e..ba5ffa58bd 100644
> > > > --- a/hw/virtio/virtio.c
> > > > +++ b/hw/virtio/virtio.c
> > > > @@ -3045,6 +3045,181 @@ int virtio_save(VirtIODevice *vdev, QEMUFile *f)
> > > >      return vmstate_save_state(f, &vmstate_virtio, vdev, NULL);
> > > >  }
> > > >
> > > > +VirtioSharedMemory *virtio_new_shmem_region(VirtIODevice *vdev, 
> > > > uint8_t shmid, uint64_t size)
> > > > +{
> > > > +    VirtioSharedMemory *elem;
> > > > +    g_autofree char *name = NULL;
> > > > +
> > > > +    elem = g_new0(VirtioSharedMemory, 1);
> > > > +    elem->shmid = shmid;
> > > > +
> > > > +    /* Initialize embedded MemoryRegion as container for shmem 
> > > > mappings */
> > > > +    name = g_strdup_printf("virtio-shmem-%d", shmid);
> > > > +    memory_region_init(&elem->mr, OBJECT(vdev), name, size);
> > > > +    QTAILQ_INIT(&elem->mmaps);
> > > > +    QSIMPLEQ_INSERT_TAIL(&vdev->shmem_list, elem, entry);
> > > > +    return elem;
> > > > +}
> > > > +
> > > > +VirtioSharedMemory *virtio_find_shmem_region(VirtIODevice *vdev, 
> > > > uint8_t shmid)
> > > > +{
> > > > +    VirtioSharedMemory *shmem, *next;
> > > > +    QSIMPLEQ_FOREACH_SAFE(shmem, &vdev->shmem_list, entry, next) {
> > > > +        if (shmem->shmid == shmid) {
> > > > +            return shmem;
> > > > +        }
> > > > +    }
> > > > +    return NULL;
> > > > +}
> > > > +
> > > > +static void virtio_shared_memory_mapping_instance_init(Object *obj)
> > > > +{
> > > > +    VirtioSharedMemoryMapping *mapping = 
> > > > VIRTIO_SHARED_MEMORY_MAPPING(obj);
> > > > +
> > > > +    mapping->shmid = 0;
> > > > +    mapping->fd = -1;
> > > > +    mapping->offset = 0;
> > > > +    mapping->len = 0;
> > > > +    mapping->mr = NULL;
> > > > +}
> > > > +
> > > > +static void virtio_shared_memory_mapping_finalize(Object *obj)
> > >
> > > The naming is inconsistent, instance_init vs finalize:
> > >   .instance_init = virtio_shared_memory_mapping_instance_init,
> > >   .instance_finalize = virtio_shared_memory_mapping_finalize,
> > >
> > > virtio_shared_memory_mapping_instance_finalize() would be consistent.
> > >
> > > > +{
> > > > +    VirtioSharedMemoryMapping *mapping = 
> > > > VIRTIO_SHARED_MEMORY_MAPPING(obj);
> > > > +
> > > > +    /* Clean up MemoryRegion if it exists */
> > > > +    if (mapping->mr) {
> > > > +        /* Unparent the MemoryRegion to trigger cleanup */
> > > > +        object_unparent(OBJECT(mapping->mr));
> > > > +        mapping->mr = NULL;
> > > > +    }
> > > > +
> > > > +    /* Close file descriptor */
> > > > +    if (mapping->fd >= 0) {
> > > > +        close(mapping->fd);
> > > > +        mapping->fd = -1;
> > > > +    }
> > > > +}
> > > > +
> > > > +VirtioSharedMemoryMapping *virtio_shared_memory_mapping_new(uint8_t 
> > > > shmid,
> > > > +                                                            int fd,
> > > > +                                                            uint64_t 
> > > > fd_offset,
> > > > +                                                            uint64_t 
> > > > shm_offset,
> > > > +                                                            uint64_t 
> > > > len,
> > > > +                                                            bool 
> > > > allow_write)
> > > > +{
> > > > +    VirtioSharedMemoryMapping *mapping;
> > > > +    MemoryRegion *mr;
> > > > +    g_autoptr(GString) mr_name = g_string_new(NULL);
> > > > +    uint32_t ram_flags;
> > > > +    Error *local_err = NULL;
> > > > +
> > > > +    if (len == 0) {
> > > > +        error_report("Shared memory mapping size cannot be zero");
> > > > +        return NULL;
> > > > +    }
> > > > +
> > > > +    fd = dup(fd);
> > > > +    if (fd < 0) {
> > > > +        error_report("Failed to duplicate fd: %s", strerror(errno));
> > > > +        return NULL;
> > > > +    }
> > > > +
> > > > +    /* Determine RAM flags */
> > > > +    ram_flags = RAM_SHARED;
> > > > +    if (!allow_write) {
> > > > +        ram_flags |= RAM_READONLY_FD;
> > > > +    }
> > > > +
> > > > +    /* Create the VirtioSharedMemoryMapping */
> > > > +    mapping = VIRTIO_SHARED_MEMORY_MAPPING(
> > > > +        object_new(TYPE_VIRTIO_SHARED_MEMORY_MAPPING));
> > > > +
> > > > +    /* Set up object properties */
> > > > +    mapping->shmid = shmid;
> > > > +    mapping->fd = fd;
> > > > +    mapping->offset = shm_offset;
> > > > +    mapping->len = len;
> > > > +
> > > > +    /* Create MemoryRegion as a child of this object */
> > > > +    mr = g_new0(MemoryRegion, 1);
> > > > +    g_string_printf(mr_name, "virtio-shmem-%d-%" PRIx64, shmid, 
> > > > shm_offset);
> > > > +
> > > > +    /* Initialize MemoryRegion with file descriptor */
> > > > +    if (!memory_region_init_ram_from_fd(mr, OBJECT(mapping), 
> > > > mr_name->str,
> > > > +                                        len, ram_flags, fd, fd_offset,
> > > > +                                        &local_err)) {
> > > > +        error_report_err(local_err);
> > > > +        g_free(mr);
> > > > +        close(fd);
> > > > +        object_unref(OBJECT(mapping));
> > > > +        return NULL;
> > > > +    }
> > > > +
> > > > +    mapping->mr = mr;
> > > > +    return mapping;
> > > > +}
> > > > +
> > > > +int virtio_add_shmem_map(VirtioSharedMemory *shmem,
> > > > +                         VirtioSharedMemoryMapping *mapping)
> > > > +{
> > > > +    if (!mapping) {
> > > > +        error_report("VirtioSharedMemoryMapping cannot be NULL");
> > > > +        return -1;
> > > > +    }
> > > > +    if (!mapping->mr) {
> > > > +        error_report("VirtioSharedMemoryMapping has no MemoryRegion");
> > > > +        return -1;
> > > > +    }
> > > > +
> > > > +    /* Validate boundaries against the VIRTIO shared memory region */
> > > > +    if (mapping->offset + mapping->len > shmem->mr.size) {
> > > > +        error_report("Memory exceeds the shared memory boundaries");
> > > > +        return -1;
> > > > +    }
> > > > +
> > > > +    /* Add as subregion to the VIRTIO shared memory */
> > > > +    memory_region_add_subregion(&shmem->mr, mapping->offset, 
> > > > mapping->mr);
> > > > +
> > > > +    /* Add to the mapped regions list */
> > > > +    QTAILQ_INSERT_TAIL(&shmem->mmaps, mapping, link);
> > > > +
> > > > +    return 0;
> > > > +}
> > > > +
> > > > +VirtioSharedMemoryMapping *virtio_find_shmem_map(VirtioSharedMemory 
> > > > *shmem,
> > > > +                                          hwaddr offset, uint64_t size)
> > > > +{
> > > > +    VirtioSharedMemoryMapping *mapping;
> > > > +    QTAILQ_FOREACH(mapping, &shmem->mmaps, link) {
> > > > +        if (mapping->offset == offset && mapping->mr->size == size) {
> > > > +            return mapping;
> > > > +        }
> > > > +    }
> > > > +    return NULL;
> > > > +}
> > > > +
> > > > +void virtio_del_shmem_map(VirtioSharedMemory *shmem, hwaddr offset,
> > > > +                          uint64_t size)
> > > > +{
> > > > +    VirtioSharedMemoryMapping *mapping = virtio_find_shmem_map(shmem, 
> > > > offset, size);
> > > > +    if (mapping == NULL) {
> > > > +        return;
> > > > +    }
> > > > +
> > > > +    /*
> > > > +     * Remove from memory region first
> > > > +     */
> > > > +    memory_region_del_subregion(&shmem->mr, mapping->mr);
> > >
> > > There is an identical call in vhost_user_backend_handle_shmem_unmap().
> > > It looks like memory_region_del_subregion()'s
> > > assert(subregion->container == mr) would fail here when called from
> > > vhost_user_backend_handle_shmem_unmap().
> > >
> > > How does this work?
> >
> > Uhm. I was sure I tested this and spent some time trying to figure out
> > the logic. But after all, I have to agree with you and tested again to
> > make sure. And indeed it fails on that assertion.
> >
> > So I think the memory_region_del_subregion() that is added in
> > virtio_del_shmem_map() for this revision is necessary as
> > virtio_del_shmem_map() is also called from
> > virtio_device_instance_finalize() (and virtio_reset() with your next
> > comment).
>
> Sounds good.
>
> >
> > So what I will try is to remove memory_region_del_subregion() from the
> > vhost_user_backend_handle_shmem_unmap(), probably I can also remove
> > the memory_region_transaction_* function calls. That was there for a
> > few revisions now so I probably overlooked it when doing the latest
> > changes.
>
> By the way, I think I understand the deadlock that required
> memory_region_transaction_commit() to come after the vhost-user reply is
> sent:
>
> hw/virtio/vhost.c has a MemoryListener that sends
> VHOST_USER_SET_MEM_TABLE messages in its .commit() callback
> (vhost_commit()). vhost-user backends might not expect a new message
> from the frontend before the reply to the request on the backend
> channel.

Yes, correct. Adding and deleting subregions do a
memory_region_transaction_commit() which triggers the set_mem_table
before the reply. And it did not fail, just got stuck.

>
> > >
> > > > +
> > > > +    /*
> > > > +     * Remove from list and unref the mapping which will trigger 
> > > > automatic cleanup
> > > > +     * when the reference count reaches zero.
> > > > +     */
> > > > +    QTAILQ_REMOVE(&shmem->mmaps, mapping, link);
> > > > +    object_unref(OBJECT(mapping));
> > > > +}
> > > > +
> > > >  /* A wrapper for use as a VMState .put function */
> > > >  static int virtio_device_put(QEMUFile *f, void *opaque, size_t size,
> > > >                                const VMStateField *field, JSONWriter 
> > > > *vmdesc)
> > > > @@ -3521,6 +3696,7 @@ void virtio_init(VirtIODevice *vdev, uint16_t 
> > > > device_id, size_t config_size)
> > > >              NULL, virtio_vmstate_change, vdev);
> > > >      vdev->device_endian = virtio_default_endian();
> > > >      vdev->use_guest_notifier_mask = true;
> > > > +    QSIMPLEQ_INIT(&vdev->shmem_list);
> > > >  }
> > > >
> > > >  /*
> > > > @@ -4032,11 +4208,24 @@ static void 
> > > > virtio_device_free_virtqueues(VirtIODevice *vdev)
> > > >  static void virtio_device_instance_finalize(Object *obj)
> > > >  {
> > > >      VirtIODevice *vdev = VIRTIO_DEVICE(obj);
> > > > +    VirtioSharedMemory *shmem;
> > > >
> > > >      virtio_device_free_virtqueues(vdev);
> > > >
> > > >      g_free(vdev->config);
> > > >      g_free(vdev->vector_queues);
> > > > +    while (!QSIMPLEQ_EMPTY(&vdev->shmem_list)) {
> > > > +        shmem = QSIMPLEQ_FIRST(&vdev->shmem_list);
> > > > +        while (!QTAILQ_EMPTY(&shmem->mmaps)) {
> > > > +            VirtioSharedMemoryMapping *mapping = 
> > > > QTAILQ_FIRST(&shmem->mmaps);
> > > > +            virtio_del_shmem_map(shmem, mapping->offset, 
> > > > mapping->mr->size);
> > > > +        }
> > > > +
> > > > +        /* Clean up the embedded MemoryRegion */
> > > > +        object_unparent(OBJECT(&shmem->mr));
> > > > +        QSIMPLEQ_REMOVE_HEAD(&vdev->shmem_list, entry);
> > > > +        g_free(shmem);
> > > > +    }
> > > >  }
> > >
> > > The semantics across device reset also need to be defined. I think
> > > mappings should be deleted when the device is reset, but the VIRTIO
> > > Shared Memory Regions should remain. The reason I think this behavior
> > > makes sense is that it prevents stale fds remaining open and mapped into
> > > the guest (e.g. GPU or virtiofs resources). It seems safer than relying
> > > on the backend to unmap explicitly.
> >
> > Got it.
> >
> > >
> > > >
> > > >  static const Property virtio_properties[] = {
> > > > @@ -4402,9 +4591,18 @@ static const TypeInfo virtio_device_info = {
> > > >      .class_size = sizeof(VirtioDeviceClass),
> > > >  };
> > > >
> > > > +static const TypeInfo virtio_shared_memory_mapping_info = {
> > > > +    .name = TYPE_VIRTIO_SHARED_MEMORY_MAPPING,
> > > > +    .parent = TYPE_OBJECT,
> > > > +    .instance_size = sizeof(VirtioSharedMemoryMapping),
> > > > +    .instance_init = virtio_shared_memory_mapping_instance_init,
> > > > +    .instance_finalize = virtio_shared_memory_mapping_finalize,
> > > > +};
> > > > +
> > > >  static void virtio_register_types(void)
> > > >  {
> > > >      type_register_static(&virtio_device_info);
> > > > +    type_register_static(&virtio_shared_memory_mapping_info);
> > > >  }
> > > >
> > > >  type_init(virtio_register_types)
> > > > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
> > > > index c594764f23..8cecb221cb 100644
> > > > --- a/include/hw/virtio/virtio.h
> > > > +++ b/include/hw/virtio/virtio.h
> > > > @@ -98,6 +98,46 @@ enum virtio_device_endian {
> > > >      VIRTIO_DEVICE_ENDIAN_BIG,
> > > >  };
> > > >
> > > > +#define TYPE_VIRTIO_SHARED_MEMORY_MAPPING 
> > > > "virtio-shared-memory-mapping"
> > > > +OBJECT_DECLARE_SIMPLE_TYPE(VirtioSharedMemoryMapping, 
> > > > VIRTIO_SHARED_MEMORY_MAPPING)
> > > > +
> > > > +/**
> > > > + * VirtioSharedMemoryMapping:
> > > > + * @parent: Parent QOM object
> > > > + * @shmid: VIRTIO Shared Memory Region ID
> > > > + * @fd: File descriptor for the shared memory region
> > > > + * @offset: Offset within the VIRTIO Shared Memory Region
> > > > + * @len: Size of the mapping
> > > > + * @mr: MemoryRegion associated with this shared memory mapping
> > > > + * @link: List entry for the shared memory region's mapping list
> > > > + *
> > > > + * A QOM object that represents an individual file descriptor-based 
> > > > shared
> > > > + * memory mapping within a VIRTIO Shared Memory Region. It manages the
> > > > + * MemoryRegion lifecycle and file descriptor cleanup through QOM 
> > > > reference
> > > > + * counting. When the object is unreferenced and its reference count 
> > > > drops
> > > > + * to zero, it automatically cleans up the MemoryRegion and closes the 
> > > > file
> > > > + * descriptor.
> > > > + */
> > > > +struct VirtioSharedMemoryMapping {
> > > > +    Object parent;
> > > > +
> > > > +    uint8_t shmid;
> > > > +    int fd;
> > > > +    hwaddr offset;
> > > > +    uint64_t len;
> > > > +    MemoryRegion *mr;
> > > > +    QTAILQ_ENTRY(VirtioSharedMemoryMapping) link;
> > > > +};
> > > > +
> > > > +struct VirtioSharedMemory {
> > > > +    uint8_t shmid;
> > > > +    MemoryRegion mr;
> > > > +    QTAILQ_HEAD(, VirtioSharedMemoryMapping) mmaps;
> > > > +    QSIMPLEQ_ENTRY(VirtioSharedMemory) entry;
> > > > +};
> > > > +
> > > > +typedef struct VirtioSharedMemory VirtioSharedMemory;
> > > > +
> > > >  /**
> > > >   * struct VirtIODevice - common VirtIO structure
> > > >   * @name: name of the device
> > > > @@ -167,6 +207,8 @@ struct VirtIODevice
> > > >       */
> > > >      EventNotifier config_notifier;
> > > >      bool device_iotlb_enabled;
> > > > +    /* Shared memory region for mappings. */
> > > > +    QSIMPLEQ_HEAD(, VirtioSharedMemory) shmem_list;
> > > >  };
> > > >
> > > >  struct VirtioDeviceClass {
> > > > @@ -295,6 +337,100 @@ void virtio_notify(VirtIODevice *vdev, VirtQueue 
> > > > *vq);
> > > >
> > > >  int virtio_save(VirtIODevice *vdev, QEMUFile *f);
> > > >
> > > > +/**
> > > > + * virtio_new_shmem_region() - Create a new shared memory region
> > > > + * @vdev: VirtIODevice
> > > > + * @shmid: Shared memory ID
> > > > + * @size: Size of the shared memory region
> > > > + *
> > > > + * Creates a new VirtioSharedMemory region for the given device and ID.
> > > > + * The returned VirtioSharedMemory is owned by the VirtIODevice and 
> > > > will
> > > > + * be automatically freed when the device is destroyed. The caller
> > > > + * should not free the returned pointer.
> > > > + *
> > > > + * Returns: Pointer to the new VirtioSharedMemory region, or NULL on 
> > > > failure
> > > > + */
> > > > +VirtioSharedMemory *virtio_new_shmem_region(VirtIODevice *vdev, 
> > > > uint8_t shmid, uint64_t size);
> > > > +
> > > > +/**
> > > > + * virtio_find_shmem_region() - Find an existing shared memory region
> > > > + * @vdev: VirtIODevice
> > > > + * @shmid: Shared memory ID to find
> > > > + *
> > > > + * Finds an existing VirtioSharedMemory region by ID. The returned 
> > > > pointer
> > > > + * is owned by the VirtIODevice and should not be freed by the caller.
> > > > + *
> > > > + * Returns: Pointer to the VirtioSharedMemory region, or NULL if not 
> > > > found
> > > > + */
> > > > +VirtioSharedMemory *virtio_find_shmem_region(VirtIODevice *vdev, 
> > > > uint8_t shmid);
> > > > +
> > > > +/**
> > > > + * virtio_shared_memory_mapping_new() - Create a new 
> > > > VirtioSharedMemoryMapping
> > > > + * @shmid: VIRTIO Shared Memory Region ID
> > > > + * @fd: File descriptor for the shared memory
> > > > + * @fd_offset: Offset within the file descriptor
> > > > + * @shm_offset: Offset within the VIRTIO Shared Memory Region
> > > > + * @len: Size of the mapping
> > > > + * @allow_write: Whether to allow write access to the mapping
> > > > + *
> > > > + * Creates a new VirtioSharedMemoryMapping that manages a shared 
> > > > memory mapping.
> > > > + * The object will create a MemoryRegion using 
> > > > memory_region_init_ram_from_fd()
> > > > + * as a child object. When the object is finalized, it will 
> > > > automatically
> > > > + * clean up the MemoryRegion and close the file descriptor.
> > > > + *
> > > > + * Return: A new VirtioSharedMemoryMapping on success, NULL on error.
> > > > + */
> > > > +VirtioSharedMemoryMapping *virtio_shared_memory_mapping_new(uint8_t 
> > > > shmid,
> > > > +                                                            int fd,
> > > > +                                                            uint64_t 
> > > > fd_offset,
> > > > +                                                            uint64_t 
> > > > shm_offset,
> > > > +                                                            uint64_t 
> > > > len,
> > > > +                                                            bool 
> > > > allow_write);
> > > > +
> > > > +/**
> > > > + * virtio_add_shmem_map() - Add a memory mapping to a shared region
> > > > + * @shmem: VirtioSharedMemory region
> > > > + * @mapping: VirtioSharedMemoryMapping to add (transfers ownership)
> > > > + *
> > > > + * Adds a memory mapping to the shared memory region. The 
> > > > VirtioSharedMemoryMapping
> > > > + * ownership is transferred to the shared memory region and will be 
> > > > automatically
> > > > + * cleaned up through QOM reference counting when 
> > > > virtio_del_shmem_map() is
> > > > + * called or when the shared memory region is destroyed.
> > > > + *
> > > > + * Returns: 0 on success, negative errno on failure
> > > > + */
> > > > +int virtio_add_shmem_map(VirtioSharedMemory *shmem,
> > > > +                         VirtioSharedMemoryMapping *mapping);
> > > > +
> > > > +/**
> > > > + * virtio_find_shmem_map() - Find a memory mapping in a shared region
> > > > + * @shmem: VirtioSharedMemory region
> > > > + * @offset: Offset within the shared memory region
> > > > + * @size: Size of the mapping to find
> > > > + *
> > > > + * Finds an existing memory mapping that covers the specified range.
> > > > + * The returned VirtioSharedMemoryMapping is owned by the 
> > > > VirtioSharedMemory
> > > > + * region and should not be freed by the caller.
> > > > + *
> > > > + * Returns: Pointer to the VirtioSharedMemoryMapping, or NULL if not 
> > > > found
> > > > + */
> > > > +VirtioSharedMemoryMapping *virtio_find_shmem_map(VirtioSharedMemory 
> > > > *shmem,
> > > > +                                          hwaddr offset, uint64_t 
> > > > size);
> > > > +
> > > > +/**
> > > > + * virtio_del_shmem_map() - Remove a memory mapping from a shared 
> > > > region
> > > > + * @shmem: VirtioSharedMemory region
> > > > + * @offset: Offset of the mapping to remove
> > > > + * @size: Size of the mapping to remove
> > > > + *
> > > > + * Removes a memory mapping from the shared memory region. This will
> > > > + * automatically unref the associated VhostUserShmemObject, which may
> > > > + * trigger its finalization and cleanup if no other references exist.
> > > > + * The mapping's MemoryRegion will be properly unmapped and cleaned up.
> > > > + */
> > > > +void virtio_del_shmem_map(VirtioSharedMemory *shmem, hwaddr offset,
> > > > +                          uint64_t size);
> > > > +
> > > >  extern const VMStateInfo virtio_vmstate_info;
> > > >
> > > >  #define VMSTATE_VIRTIO_DEVICE \
> > > > diff --git a/subprojects/libvhost-user/libvhost-user.c 
> > > > b/subprojects/libvhost-user/libvhost-user.c
> > > > index 9c630c2170..034cbfdc3c 100644
> > > > --- a/subprojects/libvhost-user/libvhost-user.c
> > > > +++ b/subprojects/libvhost-user/libvhost-user.c
> > > > @@ -1592,6 +1592,76 @@ vu_rm_shared_object(VuDev *dev, unsigned char 
> > > > uuid[UUID_LEN])
> > > >      return vu_send_message(dev, &msg);
> > > >  }
> > > >
> > > > +bool
> > > > +vu_shmem_map(VuDev *dev, uint8_t shmid, uint64_t fd_offset,
> > > > +             uint64_t shm_offset, uint64_t len, uint64_t flags, int fd)
> > > > +{
> > > > +    VhostUserMsg vmsg = {
> > > > +        .request = VHOST_USER_BACKEND_SHMEM_MAP,
> > > > +        .size = sizeof(vmsg.payload.mmap),
> > > > +        .flags = VHOST_USER_VERSION,
> > > > +        .payload.mmap = {
> > > > +            .shmid = shmid,
> > > > +            .fd_offset = fd_offset,
> > > > +            .shm_offset = shm_offset,
> > > > +            .len = len,
> > > > +            .flags = flags,
> > > > +        },
> > > > +        .fd_num = 1,
> > > > +        .fds[0] = fd,
> > > > +    };
> > > > +
> > > > +    if (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_SHMEM)) {
> > > > +        return false;
> > > > +    }
> > > > +
> > > > +    if (vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_REPLY_ACK)) 
> > > > {
> > > > +        vmsg.flags |= VHOST_USER_NEED_REPLY_MASK;
> > > > +    }
> > > > +
> > > > +    pthread_mutex_lock(&dev->backend_mutex);
> > > > +    if (!vu_message_write(dev, dev->backend_fd, &vmsg)) {
> > > > +        pthread_mutex_unlock(&dev->backend_mutex);
> > > > +        return false;
> > > > +    }
> > > > +
> > > > +    /* Also unlocks the backend_mutex */
> > > > +    return vu_process_message_reply(dev, &vmsg);
> > > > +}
> > > > +
> > > > +bool
> > > > +vu_shmem_unmap(VuDev *dev, uint8_t shmid, uint64_t shm_offset, 
> > > > uint64_t len)
> > > > +{
> > > > +    VhostUserMsg vmsg = {
> > > > +        .request = VHOST_USER_BACKEND_SHMEM_UNMAP,
> > > > +        .size = sizeof(vmsg.payload.mmap),
> > > > +        .flags = VHOST_USER_VERSION,
> > > > +        .payload.mmap = {
> > > > +            .shmid = shmid,
> > > > +            .fd_offset = 0,
> > > > +            .shm_offset = shm_offset,
> > > > +            .len = len,
> > > > +        },
> > > > +    };
> > > > +
> > > > +    if (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_SHMEM)) {
> > > > +        return false;
> > > > +    }
> > > > +
> > > > +    if (vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_REPLY_ACK)) 
> > > > {
> > > > +        vmsg.flags |= VHOST_USER_NEED_REPLY_MASK;
> > > > +    }
> > > > +
> > > > +    pthread_mutex_lock(&dev->backend_mutex);
> > > > +    if (!vu_message_write(dev, dev->backend_fd, &vmsg)) {
> > > > +        pthread_mutex_unlock(&dev->backend_mutex);
> > > > +        return false;
> > > > +    }
> > > > +
> > > > +    /* Also unlocks the backend_mutex */
> > > > +    return vu_process_message_reply(dev, &vmsg);
> > > > +}
> > > > +
> > > >  static bool
> > > >  vu_set_vring_call_exec(VuDev *dev, VhostUserMsg *vmsg)
> > > >  {
> > > > diff --git a/subprojects/libvhost-user/libvhost-user.h 
> > > > b/subprojects/libvhost-user/libvhost-user.h
> > > > index 2ffc58c11b..26b710c92d 100644
> > > > --- a/subprojects/libvhost-user/libvhost-user.h
> > > > +++ b/subprojects/libvhost-user/libvhost-user.h
> > > > @@ -69,6 +69,8 @@ enum VhostUserProtocolFeature {
> > > >      /* Feature 16 is reserved for VHOST_USER_PROTOCOL_F_STATUS. */
> > > >      /* Feature 17 reserved for VHOST_USER_PROTOCOL_F_XEN_MMAP. */
> > > >      VHOST_USER_PROTOCOL_F_SHARED_OBJECT = 18,
> > > > +    /* Feature 19 is reserved for VHOST_USER_PROTOCOL_F_DEVICE_STATE */
> > > > +    VHOST_USER_PROTOCOL_F_SHMEM = 20,
> > > >      VHOST_USER_PROTOCOL_F_MAX
> > > >  };
> > > >
> > > > @@ -127,6 +129,8 @@ typedef enum VhostUserBackendRequest {
> > > >      VHOST_USER_BACKEND_SHARED_OBJECT_ADD = 6,
> > > >      VHOST_USER_BACKEND_SHARED_OBJECT_REMOVE = 7,
> > > >      VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP = 8,
> > > > +    VHOST_USER_BACKEND_SHMEM_MAP = 9,
> > > > +    VHOST_USER_BACKEND_SHMEM_UNMAP = 10,
> > > >      VHOST_USER_BACKEND_MAX
> > > >  }  VhostUserBackendRequest;
> > > >
> > > > @@ -186,6 +190,23 @@ typedef struct VhostUserShared {
> > > >      unsigned char uuid[UUID_LEN];
> > > >  } VhostUserShared;
> > > >
> > > > +/* For the flags field of VhostUserMMap */
> > > > +#define VHOST_USER_FLAG_MAP_RW (1u << 0)
> > > > +
> > > > +typedef struct {
> > > > +    /* VIRTIO Shared Memory Region ID */
> > > > +    uint8_t shmid;
> > > > +    uint8_t padding[7];
> > > > +    /* File offset */
> > > > +    uint64_t fd_offset;
> > > > +    /* Offset within the VIRTIO Shared Memory Region */
> > > > +    uint64_t shm_offset;
> > > > +    /* Size of the mapping */
> > > > +    uint64_t len;
> > > > +    /* Flags for the mmap operation, from VHOST_USER_FLAG_MAP_* */
> > > > +    uint16_t flags;
> > > > +} VhostUserMMap;
> > > > +
> > > >  #define VU_PACKED __attribute__((packed))
> > > >
> > > >  typedef struct VhostUserMsg {
> > > > @@ -210,6 +231,7 @@ typedef struct VhostUserMsg {
> > > >          VhostUserVringArea area;
> > > >          VhostUserInflight inflight;
> > > >          VhostUserShared object;
> > > > +        VhostUserMMap mmap;
> > > >      } payload;
> > > >
> > > >      int fds[VHOST_MEMORY_BASELINE_NREGIONS];
> > > > @@ -593,6 +615,38 @@ bool vu_add_shared_object(VuDev *dev, unsigned 
> > > > char uuid[UUID_LEN]);
> > > >   */
> > > >  bool vu_rm_shared_object(VuDev *dev, unsigned char uuid[UUID_LEN]);
> > > >
> > > > +/**
> > > > + * vu_shmem_map:
> > > > + * @dev: a VuDev context
> > > > + * @shmid: VIRTIO Shared Memory Region ID
> > > > + * @fd_offset: File offset
> > > > + * @shm_offset: Offset within the VIRTIO Shared Memory Region
> > > > + * @len: Size of the mapping
> > > > + * @flags: Flags for the mmap operation
> > > > + * @fd: A file descriptor
> > > > + *
> > > > + * Advertises a new mapping to be made in a given VIRTIO Shared Memory 
> > > > Region.
> > > > + *
> > > > + * Returns: TRUE on success, FALSE on failure.
> > > > + */
> > > > +bool vu_shmem_map(VuDev *dev, uint8_t shmid, uint64_t fd_offset,
> > > > +                  uint64_t shm_offset, uint64_t len, uint64_t flags, 
> > > > int fd);
> > > > +
> > > > +/**
> > > > + * vu_shmem_unmap:
> > > > + * @dev: a VuDev context
> > > > + * @shmid: VIRTIO Shared Memory Region ID
> > > > + * @fd_offset: File offset
> > > > + * @len: Size of the mapping
> > > > + *
> > > > + * The front-end un-mmaps a given range in the VIRTIO Shared Memory 
> > > > Region
> > > > + * with the requested `shmid`.
> > > > + *
> > > > + * Returns: TRUE on success, FALSE on failure.
> > > > + */
> > > > +bool vu_shmem_unmap(VuDev *dev, uint8_t shmid, uint64_t shm_offset,
> > > > +                    uint64_t len);
> > > > +
> > > >  /**
> > > >   * vu_queue_set_notification:
> > > >   * @dev: a VuDev context
> > > > --
> > > > 2.49.0
> > > >
> >


Reply via email to