On Thu, Dec 4, 2025 at 8:39 AM Wafer Xie <[email protected]> wrote:
>
> Dynamic Allocation/Free indirect descriptor.
>
> Signed-off-by: Wafer Xie <[email protected]>
> ---
> hw/virtio/vhost-shadow-virtqueue.c | 5 +-
> hw/virtio/vhost-shadow-virtqueue.h | 49 ++++++++++-
> hw/virtio/vhost-vdpa.c | 130 ++++++++++++++++++++++++++++-
> 3 files changed, 180 insertions(+), 4 deletions(-)
>
> diff --git a/hw/virtio/vhost-shadow-virtqueue.c
> b/hw/virtio/vhost-shadow-virtqueue.c
> index 2481d49345..ecc3245138 100644
> --- a/hw/virtio/vhost-shadow-virtqueue.c
> +++ b/hw/virtio/vhost-shadow-virtqueue.c
> @@ -756,15 +756,18 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq)
> *
> * @ops: SVQ owner callbacks
> * @ops_opaque: ops opaque pointer
> + * @indirect_ops: Indirect descriptor operations
> */
> VhostShadowVirtqueue *vhost_svq_new(const VhostShadowVirtqueueOps *ops,
> - void *ops_opaque)
> + void *ops_opaque,
> + SVQIndirectOps *indirect_ops)
> {
> VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1);
>
> event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND);
> svq->ops = ops;
> svq->ops_opaque = ops_opaque;
> + svq->indirect_ops = indirect_ops;
> return svq;
> }
>
> diff --git a/hw/virtio/vhost-shadow-virtqueue.h
> b/hw/virtio/vhost-shadow-virtqueue.h
> index 2b36df4dd5..c5d654eae8 100644
> --- a/hw/virtio/vhost-shadow-virtqueue.h
> +++ b/hw/virtio/vhost-shadow-virtqueue.h
> @@ -54,6 +54,49 @@ typedef struct VhostShadowVirtqueueOps {
> VirtQueueAvailCallback avail_handler;
> } VhostShadowVirtqueueOps;
>
> +/**
> + * Callback to allocate indirect descriptor table.
> + *
> + * @svq: Shadow virtqueue
> + * @num: Number of descriptors
> + * @desc: Output pointer to allocated descriptor table
> + * @iova: Output IOVA of the allocated table
> + * @size: Output size of the allocated region
> + * @opaque: Opaque data (vhost_vdpa pointer)
> + *
> + * Returns true on success, false on failure.
> + */
> +typedef bool (*SVQIndirectDescAlloc)(VhostShadowVirtqueue *svq,
> + size_t num,
> + vring_desc_t **desc,
> + hwaddr *iova,
> + size_t *size,
> + void *opaque);
> +
> +/**
> + * Callback to free indirect descriptor table.
> + *
> + * @svq: Shadow virtqueue
> + * @desc: Descriptor table to free
> + * @iova: IOVA of the table
> + * @size: Size of the allocated region
> + * @opaque: Opaque data (vhost_vdpa pointer)
> + */
> +typedef void (*SVQIndirectDescFree)(VhostShadowVirtqueue *svq,
> + vring_desc_t *desc,
> + hwaddr iova,
> + size_t size,
> + void *opaque);
> +
> +/**
> + * Indirect descriptor operations and context
> + */
> +typedef struct SVQIndirectOps {
> + SVQIndirectDescAlloc alloc;
> + SVQIndirectDescFree free;
> + void *opaque; /* Pointer to struct vhost_vdpa */
> +} SVQIndirectOps;
> +
> /* Shadow virtqueue to relay notifications */
> typedef struct VhostShadowVirtqueue {
> /* Shadow vring */
> @@ -104,6 +147,9 @@ typedef struct VhostShadowVirtqueue {
> /* Caller callbacks opaque */
> void *ops_opaque;
>
> + /* Indirect descriptor operations */
> + SVQIndirectOps *indirect_ops;
> +
> /* Next head to expose to the device */
> uint16_t shadow_avail_idx;
>
> @@ -143,7 +189,8 @@ void vhost_svq_start(VhostShadowVirtqueue *svq,
> VirtIODevice *vdev,
> void vhost_svq_stop(VhostShadowVirtqueue *svq);
>
> VhostShadowVirtqueue *vhost_svq_new(const VhostShadowVirtqueueOps *ops,
> - void *ops_opaque);
> + void *ops_opaque,
> + SVQIndirectOps *indirect_ops);
>
> void vhost_svq_free(gpointer vq);
> G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostShadowVirtqueue, vhost_svq_free);
> diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> index 7061b6e1a3..1719993f52 100644
> --- a/hw/virtio/vhost-vdpa.c
> +++ b/hw/virtio/vhost-vdpa.c
> @@ -584,15 +584,130 @@ static int vhost_vdpa_get_dev_features(struct
> vhost_dev *dev,
> return ret;
> }
>
> +/**
> + * Allocate indirect descriptor table for SVQ
> + *
> + * @svq: Shadow virtqueue
> + * @num: Number of descriptors needed
> + * @desc: Output pointer to the allocated table
> + * @iova: Output IOVA for the table
> + * @size: Output size of the allocated region
> + * @opaque: Opaque pointer (vhost_vdpa instance)
> + *
> + * Returns true on success, false on failure.
> + */
> +static bool vhost_vdpa_svq_indirect_desc_alloc(VhostShadowVirtqueue *svq,
> + size_t num,
> + vring_desc_t **desc,
> + hwaddr *iova,
> + size_t *size,
> + void *opaque)
> +{
> + struct vhost_vdpa *v = opaque;
> + size_t desc_size = sizeof(vring_desc_t) * num;
> + size_t alloc_size = ROUND_UP(desc_size, qemu_real_host_page_size());
> + DMAMap needle = {
> + .size = alloc_size - 1,
> + .perm = IOMMU_RO,
> + };
> + vring_desc_t *indirect_desc;
> + int r;
> +
> + indirect_desc = mmap(NULL, alloc_size, PROT_READ | PROT_WRITE,
> + MAP_SHARED | MAP_ANONYMOUS, -1, 0);
> + if (indirect_desc == MAP_FAILED) {
> + error_report("Cannot allocate indirect descriptor table");
> + return false;
> + }
> +
> + r = vhost_iova_tree_map_alloc(v->shared->iova_tree, &needle,
> + (hwaddr)(uintptr_t)indirect_desc);
> + if (unlikely(r != IOVA_OK)) {
> + error_report("Cannot allocate iova for indirect descriptors (%d)",
> r);
> + goto err_iova_alloc;
> + }
> +
> + r = vhost_vdpa_dma_map(v->shared, v->address_space_id, needle.iova,
> + alloc_size, indirect_desc, true);
Thanks for moving this forward! But the map and unmap operations could
be heavy in vDPA. My idea was to use a static allocated buffer for the
indirect table first, but then we need to agree on the right size of
it etc.
Can you profile the impact of it vs converting them to chained? If we
find we can improve the performance then it would be a great addition.
> + if (unlikely(r != 0)) {
> + error_report("Cannot map indirect descriptors to device: %s (%d)",
> + g_strerror(-r), -r);
> + goto err_dma_map;
> + }
> +
> + *desc = indirect_desc;
> + *iova = needle.iova;
> + *size = alloc_size;
> + return true;
> +
> +err_dma_map:
> + vhost_iova_tree_remove(v->shared->iova_tree, needle);
> +err_iova_alloc:
> + munmap(indirect_desc, alloc_size);
> + return false;
> +}
> +
> +/**
> + * Free indirect descriptor table for SVQ
> + *
> + * @svq: Shadow virtqueue
> + * @desc: Descriptor table to free
> + * @iova: IOVA of the table
> + * @size: Size of the allocated region
> + * @opaque: Opaque pointer (vhost_vdpa instance)
> + */
> +static void vhost_vdpa_svq_indirect_desc_free(VhostShadowVirtqueue *svq,
> + vring_desc_t *desc,
> + hwaddr iova,
> + size_t size,
> + void *opaque)
> +{
> + struct vhost_vdpa *v = opaque;
> + const DMAMap needle = {
> + .translated_addr = (hwaddr)(uintptr_t)desc,
> + };
> + const DMAMap *result;
> + int r;
> +
> + /* Find the mapping in the IOVA tree by HVA */
> + result = vhost_iova_tree_find_iova(v->shared->iova_tree, &needle);
> + if (unlikely(!result)) {
> + error_report("Cannot find indirect descriptor mapping to unmap: "
> + "iova=0x%" HWADDR_PRIx " hva=0x%" HWADDR_PRIx "
> size=%zu",
> + iova, needle.translated_addr, size);
> + return;
> + }
> +
> + r = vhost_vdpa_dma_unmap(v->shared, v->address_space_id, result->iova,
> + size);
> + if (unlikely(r != 0)) {
> + error_report("Cannot unmap indirect descriptors: "
> + "iova=0x%" HWADDR_PRIx " size=%zu: %s (%d)",
> + result->iova, size, g_strerror(-r), -r);
> + }
> +
> + vhost_iova_tree_remove(v->shared->iova_tree, *result);
> +
> + munmap(desc, size);
> +}
> +
> static void vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v)
> {
> g_autoptr(GPtrArray) shadow_vqs = NULL;
> + SVQIndirectOps *indirect_ops;
> +
> + /* Create indirect descriptor ops */
> + indirect_ops = g_new0(SVQIndirectOps, 1);
> + indirect_ops->alloc = vhost_vdpa_svq_indirect_desc_alloc;
> + indirect_ops->free = vhost_vdpa_svq_indirect_desc_free;
> + indirect_ops->opaque = v;
>
> shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free);
> for (unsigned n = 0; n < hdev->nvqs; ++n) {
> VhostShadowVirtqueue *svq;
>
> - svq = vhost_svq_new(v->shadow_vq_ops, v->shadow_vq_ops_opaque);
> + svq = vhost_svq_new(v->shadow_vq_ops, v->shadow_vq_ops_opaque,
> + indirect_ops);
> g_ptr_array_add(shadow_vqs, svq);
> }
>
> @@ -789,10 +904,21 @@ static void vhost_vdpa_svq_cleanup(struct vhost_dev
> *dev)
> {
> struct vhost_vdpa *v = dev->opaque;
> size_t idx;
> + SVQIndirectOps *indirect_ops = NULL;
>
> for (idx = 0; idx < v->shadow_vqs->len; ++idx) {
> - vhost_svq_stop(g_ptr_array_index(v->shadow_vqs, idx));
> + VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, idx);
> +
> + /* Save indirect_ops pointer from first SVQ (all share the same one)
> */
Why not put it in shared them?
But I don't really get why they are callbacks. What's the issue with
if (indirect) { call to indirect functions } ?
> + if (idx == 0 && svq->indirect_ops) {
> + indirect_ops = svq->indirect_ops;
> + }
> +
> + vhost_svq_stop(svq);
> }
> +
> + g_free(indirect_ops);
> +
> g_ptr_array_free(v->shadow_vqs, true);
> }
>
> --
> 2.34.1
>