> -----Original Message-----
> From: Eugenio Perez Martin <[email protected]>
> Sent: 2025年12月4日 16:41
> To: Wafer <[email protected]>
> Cc: [email protected]; [email protected]; [email protected];
> [email protected]; [email protected]; Angus Chen
> <[email protected]>
> Subject: Re: [PATCH 2/4] vdpa: implement the interfaces alloc/free for
> indirect desc
>
> External Mail: This email originated from OUTSIDE of the organization!
> Do not click links, open attachments or provide ANY information unless you
> recognize the sender and know the content is safe.
>
>
> On Thu, Dec 4, 2025 at 8:39 AM Wafer Xie <[email protected]> wrote:
> >
> > Dynamic Allocation/Free indirect descriptor.
> >
> > Signed-off-by: Wafer Xie <[email protected]>
> > ---
> > hw/virtio/vhost-shadow-virtqueue.c | 5 +-
> > hw/virtio/vhost-shadow-virtqueue.h | 49 ++++++++++-
> > hw/virtio/vhost-vdpa.c | 130 ++++++++++++++++++++++++++++-
> > 3 files changed, 180 insertions(+), 4 deletions(-)
> >
> > diff --git a/hw/virtio/vhost-shadow-virtqueue.c
> > b/hw/virtio/vhost-shadow-virtqueue.c
> > index 2481d49345..ecc3245138 100644
> > --- a/hw/virtio/vhost-shadow-virtqueue.c
> > +++ b/hw/virtio/vhost-shadow-virtqueue.c
> > @@ -756,15 +756,18 @@ void vhost_svq_stop(VhostShadowVirtqueue
> *svq)
> > *
> > * @ops: SVQ owner callbacks
> > * @ops_opaque: ops opaque pointer
> > + * @indirect_ops: Indirect descriptor operations
> > */
> > VhostShadowVirtqueue *vhost_svq_new(const
> VhostShadowVirtqueueOps *ops,
> > - void *ops_opaque)
> > + void *ops_opaque,
> > + SVQIndirectOps *indirect_ops)
> > {
> > VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1);
> >
> > event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND);
> > svq->ops = ops;
> > svq->ops_opaque = ops_opaque;
> > + svq->indirect_ops = indirect_ops;
> > return svq;
> > }
> >
> > diff --git a/hw/virtio/vhost-shadow-virtqueue.h
> > b/hw/virtio/vhost-shadow-virtqueue.h
> > index 2b36df4dd5..c5d654eae8 100644
> > --- a/hw/virtio/vhost-shadow-virtqueue.h
> > +++ b/hw/virtio/vhost-shadow-virtqueue.h
> > @@ -54,6 +54,49 @@ typedef struct VhostShadowVirtqueueOps {
> > VirtQueueAvailCallback avail_handler; } VhostShadowVirtqueueOps;
> >
> > +/**
> > + * Callback to allocate indirect descriptor table.
> > + *
> > + * @svq: Shadow virtqueue
> > + * @num: Number of descriptors
> > + * @desc: Output pointer to allocated descriptor table
> > + * @iova: Output IOVA of the allocated table
> > + * @size: Output size of the allocated region
> > + * @opaque: Opaque data (vhost_vdpa pointer)
> > + *
> > + * Returns true on success, false on failure.
> > + */
> > +typedef bool (*SVQIndirectDescAlloc)(VhostShadowVirtqueue *svq,
> > + size_t num,
> > + vring_desc_t **desc,
> > + hwaddr *iova,
> > + size_t *size,
> > + void *opaque);
> > +
> > +/**
> > + * Callback to free indirect descriptor table.
> > + *
> > + * @svq: Shadow virtqueue
> > + * @desc: Descriptor table to free
> > + * @iova: IOVA of the table
> > + * @size: Size of the allocated region
> > + * @opaque: Opaque data (vhost_vdpa pointer) */ typedef void
> > +(*SVQIndirectDescFree)(VhostShadowVirtqueue *svq,
> > + vring_desc_t *desc,
> > + hwaddr iova,
> > + size_t size,
> > + void *opaque);
> > +
> > +/**
> > + * Indirect descriptor operations and context */ typedef struct
> > +SVQIndirectOps {
> > + SVQIndirectDescAlloc alloc;
> > + SVQIndirectDescFree free;
> > + void *opaque; /* Pointer to struct vhost_vdpa */ }
> > +SVQIndirectOps;
> > +
> > /* Shadow virtqueue to relay notifications */ typedef struct
> > VhostShadowVirtqueue {
> > /* Shadow vring */
> > @@ -104,6 +147,9 @@ typedef struct VhostShadowVirtqueue {
> > /* Caller callbacks opaque */
> > void *ops_opaque;
> >
> > + /* Indirect descriptor operations */
> > + SVQIndirectOps *indirect_ops;
> > +
> > /* Next head to expose to the device */
> > uint16_t shadow_avail_idx;
> >
> > @@ -143,7 +189,8 @@ void vhost_svq_start(VhostShadowVirtqueue *svq,
> > VirtIODevice *vdev, void vhost_svq_stop(VhostShadowVirtqueue *svq);
> >
> > VhostShadowVirtqueue *vhost_svq_new(const
> VhostShadowVirtqueueOps *ops,
> > - void *ops_opaque);
> > + void *ops_opaque,
> > + SVQIndirectOps *indirect_ops);
> >
> > void vhost_svq_free(gpointer vq);
> > G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostShadowVirtqueue,
> vhost_svq_free);
> > diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index
> > 7061b6e1a3..1719993f52 100644
> > --- a/hw/virtio/vhost-vdpa.c
> > +++ b/hw/virtio/vhost-vdpa.c
> > @@ -584,15 +584,130 @@ static int vhost_vdpa_get_dev_features(struct
> vhost_dev *dev,
> > return ret;
> > }
> >
> > +/**
> > + * Allocate indirect descriptor table for SVQ
> > + *
> > + * @svq: Shadow virtqueue
> > + * @num: Number of descriptors needed
> > + * @desc: Output pointer to the allocated table
> > + * @iova: Output IOVA for the table
> > + * @size: Output size of the allocated region
> > + * @opaque: Opaque pointer (vhost_vdpa instance)
> > + *
> > + * Returns true on success, false on failure.
> > + */
> > +static bool vhost_vdpa_svq_indirect_desc_alloc(VhostShadowVirtqueue
> *svq,
> > + size_t num,
> > + vring_desc_t **desc,
> > + hwaddr *iova,
> > + size_t *size,
> > + void *opaque) {
> > + struct vhost_vdpa *v = opaque;
> > + size_t desc_size = sizeof(vring_desc_t) * num;
> > + size_t alloc_size = ROUND_UP(desc_size,
> qemu_real_host_page_size());
> > + DMAMap needle = {
> > + .size = alloc_size - 1,
> > + .perm = IOMMU_RO,
> > + };
> > + vring_desc_t *indirect_desc;
> > + int r;
> > +
> > + indirect_desc = mmap(NULL, alloc_size, PROT_READ | PROT_WRITE,
> > + MAP_SHARED | MAP_ANONYMOUS, -1, 0);
> > + if (indirect_desc == MAP_FAILED) {
> > + error_report("Cannot allocate indirect descriptor table");
> > + return false;
> > + }
> > +
> > + r = vhost_iova_tree_map_alloc(v->shared->iova_tree, &needle,
> > + (hwaddr)(uintptr_t)indirect_desc);
> > + if (unlikely(r != IOVA_OK)) {
> > + error_report("Cannot allocate iova for indirect descriptors (%d)",
> > r);
> > + goto err_iova_alloc;
> > + }
> > +
> > + r = vhost_vdpa_dma_map(v->shared, v->address_space_id,
> needle.iova,
> > + alloc_size, indirect_desc, true);
>
> Thanks for moving this forward! But the map and unmap operations could be
> heavy in vDPA. My idea was to use a static allocated buffer for the indirect
> table first, but then we need to agree on the right size of it etc.
>
Yes, since virtio indirect descriptors require a contiguous descriptor array
and the size is not fixed, using a pool doesn’t really fit.
I’m planning to implement it using a dual-array approach instead. The updated
patch will be ready soon.
> Can you profile the impact of it vs converting them to chained? If we find we
> can improve the performance then it would be a great addition.
>
Regarding the performance impact and the possibility of switching to chained
descriptors,
I will include the benchmark results between chained and indirect descriptors
in the next version of the patch.
> > + if (unlikely(r != 0)) {
> > + error_report("Cannot map indirect descriptors to device: %s (%d)",
> > + g_strerror(-r), -r);
> > + goto err_dma_map;
> > + }
> > +
> > + *desc = indirect_desc;
> > + *iova = needle.iova;
> > + *size = alloc_size;
> > + return true;
> > +
> > +err_dma_map:
> > + vhost_iova_tree_remove(v->shared->iova_tree, needle);
> > +err_iova_alloc:
> > + munmap(indirect_desc, alloc_size);
> > + return false;
> > +}
> > +
> > +/**
> > + * Free indirect descriptor table for SVQ
> > + *
> > + * @svq: Shadow virtqueue
> > + * @desc: Descriptor table to free
> > + * @iova: IOVA of the table
> > + * @size: Size of the allocated region
> > + * @opaque: Opaque pointer (vhost_vdpa instance) */ static void
> > +vhost_vdpa_svq_indirect_desc_free(VhostShadowVirtqueue *svq,
> > + vring_desc_t *desc,
> > + hwaddr iova,
> > + size_t size,
> > + void *opaque) {
> > + struct vhost_vdpa *v = opaque;
> > + const DMAMap needle = {
> > + .translated_addr = (hwaddr)(uintptr_t)desc,
> > + };
> > + const DMAMap *result;
> > + int r;
> > +
> > + /* Find the mapping in the IOVA tree by HVA */
> > + result = vhost_iova_tree_find_iova(v->shared->iova_tree, &needle);
> > + if (unlikely(!result)) {
> > + error_report("Cannot find indirect descriptor mapping to unmap: "
> > + "iova=0x%" HWADDR_PRIx " hva=0x%" HWADDR_PRIx "
> size=%zu",
> > + iova, needle.translated_addr, size);
> > + return;
> > + }
> > +
> > + r = vhost_vdpa_dma_unmap(v->shared, v->address_space_id, result-
> >iova,
> > + size);
> > + if (unlikely(r != 0)) {
> > + error_report("Cannot unmap indirect descriptors: "
> > + "iova=0x%" HWADDR_PRIx " size=%zu: %s (%d)",
> > + result->iova, size, g_strerror(-r), -r);
> > + }
> > +
> > + vhost_iova_tree_remove(v->shared->iova_tree, *result);
> > +
> > + munmap(desc, size);
> > +}
> > +
> > static void vhost_vdpa_init_svq(struct vhost_dev *hdev, struct
> > vhost_vdpa *v) {
> > g_autoptr(GPtrArray) shadow_vqs = NULL;
> > + SVQIndirectOps *indirect_ops;
> > +
> > + /* Create indirect descriptor ops */
> > + indirect_ops = g_new0(SVQIndirectOps, 1);
> > + indirect_ops->alloc = vhost_vdpa_svq_indirect_desc_alloc;
> > + indirect_ops->free = vhost_vdpa_svq_indirect_desc_free;
> > + indirect_ops->opaque = v;
> >
> > shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free);
> > for (unsigned n = 0; n < hdev->nvqs; ++n) {
> > VhostShadowVirtqueue *svq;
> >
> > - svq = vhost_svq_new(v->shadow_vq_ops, v-
> >shadow_vq_ops_opaque);
> > + svq = vhost_svq_new(v->shadow_vq_ops, v-
> >shadow_vq_ops_opaque,
> > + indirect_ops);
> > g_ptr_array_add(shadow_vqs, svq);
> > }
> >
> > @@ -789,10 +904,21 @@ static void vhost_vdpa_svq_cleanup(struct
> > vhost_dev *dev) {
> > struct vhost_vdpa *v = dev->opaque;
> > size_t idx;
> > + SVQIndirectOps *indirect_ops = NULL;
> >
> > for (idx = 0; idx < v->shadow_vqs->len; ++idx) {
> > - vhost_svq_stop(g_ptr_array_index(v->shadow_vqs, idx));
> > + VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs,
> > + idx);
> > +
> > + /* Save indirect_ops pointer from first SVQ (all share the
> > + same one) */
>
> Why not put it in shared them?
>
> But I don't really get why they are callbacks. What's the issue with if
> (indirect)
> { call to indirect functions } ?
>
The next version of the patch will use a static allocated buffer, so no
callbacks will be added.
> > + if (idx == 0 && svq->indirect_ops) {
> > + indirect_ops = svq->indirect_ops;
> > + }
> > +
> > + vhost_svq_stop(svq);
> > }
> > +
> > + g_free(indirect_ops);
> > +
> > g_ptr_array_free(v->shadow_vqs, true); }
> >
> > --
> > 2.34.1
> >