> -----Original Message-----
> From: Eugenio Perez Martin <[email protected]>
> Sent: 2025年12月19日 16:12
> To: Wafer <[email protected]>
> Cc: [email protected]; [email protected]; [email protected];
> [email protected]; [email protected]; Angus Chen
> <[email protected]>
> Subject: Re: [PATCH v3 4/4] vhost: SVQ add the indirect descriptors to
> available ring
> 
> External Mail: This email originated from OUTSIDE of the organization!
> Do not click links, open attachments or provide ANY information unless you
> recognize the sender and know the content is safe.
> 
> 
> On Tue, Dec 16, 2025 at 2:59 AM Wafer Xie <[email protected]> wrote:
> >
> > From: wafer Xie <[email protected]>
> >
> > Retrieve the target buffer from the indirect buffers by index, add the
> > elements sent by the guest into the buffer’s indirect descriptors, and
> > update freed_head and freed_number. If freed_number is zero, or if the
> > current buffer’s freed_number is less than the number of elements,
> > update the buffer state to SVQ_INDIRECT_BUF_FREEING.
> >
> > Signed-off-by: wafer Xie <[email protected]>
> > ---
> >  hw/virtio/vhost-shadow-virtqueue.c | 257
> > ++++++++++++++++++++++++++---
> >  1 file changed, 235 insertions(+), 22 deletions(-)
> >
> > diff --git a/hw/virtio/vhost-shadow-virtqueue.c
> > b/hw/virtio/vhost-shadow-virtqueue.c
> > index adee52f50b..c6064fb839 100644
> > --- a/hw/virtio/vhost-shadow-virtqueue.c
> > +++ b/hw/virtio/vhost-shadow-virtqueue.c
> > @@ -189,36 +189,246 @@ static bool
> vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
> >      return true;
> >  }
> >
> > -static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
> > +/**
> > + * Write descriptors to indirect descriptor table
> > + *
> > + * @svq: The shadow virtqueue
> > + * @sg: Cache for hwaddr
> > + * @iovec: The iovec from the guest
> > + * @num: iovec length
> > + * @addr: Descriptors' GPAs, if backed by guest memory
> > + * @buf: The indirect descriptor buffer
> > + * @offset_idx: Offset for write position
> > + * and next field (0 for out, out_num for in)
> > + * @more_descs: True if more descriptors come in the chain
> > + * @write: True if they are writeable descriptors
> > + *
> > + * Return true if success, false otherwise and print error.
> > + */
> > +static bool
> vhost_svq_vring_write_indirect_descs(VhostShadowVirtqueue *svq,
> > +                                                  hwaddr *sg,
> > +                                                  const struct iovec 
> > *iovec,
> > +                                                  size_t num,
> > +                                                  const hwaddr *addr,
> > +                                                  SVQIndirectDescBuf *buf,
> > +                                                  size_t offset_idx,
> > +                                                  bool more_descs,
> > +                                                  bool write) {
> > +    bool ok;
> > +    uint16_t flags = write ? cpu_to_le16(VRING_DESC_F_WRITE) : 0;
> > +    vring_desc_t *descs = buf->desc;
> > +    uint16_t i = buf->freed_head + offset_idx;
> > +
> > +    if (num == 0) {
> > +        return true;
> > +    }
> > +
> > +    ok = vhost_svq_translate_addr(svq, sg, iovec, num, addr);
> > +    if (unlikely(!ok)) {
> > +        return false;
> > +    }
> > +
> > +    for (size_t n = 0; n < num; n++) {
> > +        descs[i].addr = cpu_to_le64(sg[n]);
> > +        descs[i].len = cpu_to_le32(iovec[n].iov_len);
> > +        if (more_descs || (n + 1 < num)) {
> > +            descs[i].flags = flags | cpu_to_le16(VRING_DESC_F_NEXT);
> > +            descs[i].next = cpu_to_le16(offset_idx + n + 1);
> > +        } else {
> > +            descs[i].flags = flags;
> > +        }
> > +        i++;
> > +    }
> > +
> > +    return true;
> 
> Most of this function is the same as current vhost_svq_vring_write_descs in
> master. Why not make a common one that accepts vring_desc_t * as a
> parameter and make the two of them call it?
> 

Thank you for the suggestion.
I will improve this by following the approach used in 
linux/driver/virtio/virtio_ring.c, function virtqueue_add_desc_split, and 
refactor the code accordingly.

> > +}
> > +
> > +/**
> > + * Add descriptors to SVQ vring using indirect descriptors
> > +(dual-buffer)
> > + *
> > + * @svq: The shadow virtqueue
> > + * @out_sg: The out iovec from the guest
> > + * @out_num: The out iovec length
> > + * @out_addr: The out descriptors' GPAs
> > + * @in_sg: The in iovec from the guest
> > + * @in_num: The in iovec length
> > + * @in_addr: The in descriptors' GPAs
> > + * @sgs: Cache for hwaddr
> > + * @buf_idx: Index of the indirect buffer to use
> > + *
> > + * Return true if success, false otherwise and print error.
> > + */
> > +static bool vhost_svq_add_split_indirect(VhostShadowVirtqueue *svq,
> > +                                         const struct iovec *out_sg,
> > +                                         size_t out_num,
> > +                                         const hwaddr *out_addr,
> > +                                         const struct iovec *in_sg,
> > +                                         size_t in_num,
> > +                                         const hwaddr *in_addr,
> > +                                         hwaddr *sgs, int buf_idx) {
> > +    SVQIndirectDescBuf *buf = &svq->indirect_bufs[buf_idx];
> > +    uint16_t start_idx = buf->freed_head;
> > +    size_t total_descs = out_num + in_num;
> > +    hwaddr indirect_iova;
> > +    bool ok;
> > +
> > +    /* Populate indirect descriptor table for out descriptors */
> > +    ok = vhost_svq_vring_write_indirect_descs(svq, sgs, out_sg, out_num,
> > +                                               out_addr, buf, 0,
> > +                                               in_num > 0, false);
> > +    if (unlikely(!ok)) {
> > +        return false;
> > +    }
> > +
> > +    /* Populate indirect descriptor table for in descriptors */
> > +    ok = vhost_svq_vring_write_indirect_descs(svq, sgs, in_sg, in_num,
> > +                                               in_addr, buf, out_num,
> > +                                               false, true);
> > +    if (unlikely(!ok)) {
> > +        return false;
> > +    }
> > +
> > +    /* Calculate IOVA for this indirect descriptor range */
> > +    indirect_iova = buf->iova + start_idx * sizeof(vring_desc_t);
> > +
> > +    /* Add a single descriptor pointing to the indirect table */
> > +    svq->vring.desc[svq->free_head].addr = cpu_to_le64(indirect_iova);
> > +    svq->vring.desc[svq->free_head].len =
> > +            cpu_to_le32(total_descs * sizeof(vring_desc_t));
> > +    svq->vring.desc[svq->free_head].flags =
> > + cpu_to_le16(VRING_DESC_F_INDIRECT);
> > +
> > +    /* Store indirect descriptor info in desc_state */
> > +    svq->desc_state[svq->free_head].indirect_buf_idx = buf_idx;
> > +
> > +    /* Update buffer state */
> > +    buf->freed_head = start_idx + total_descs;
> > +    buf->freed_descs -= total_descs;
> > +
> > +    /* Move free_head forward */
> > +    svq->free_head = svq->desc_next[svq->free_head];
> > +
> > +    return true;
> 
> Same here, most logic is duplicated in vhost_svq_add_split even when
> vhost_svq_add_split calls vhost_svq_add_split_indirect.
> 

Thank you for the suggestion. I will modify this in the next version of the 
patch.

> > +}
> > +
> > +/**
> > + * Try to get a freed indirect buffer for use
> > + *
> > + * @svq: The shadow virtqueue
> > + * @total_descs: Number of descriptors needed
> > + *
> > + * Returns buffer index (0 to SVQ_NUM_INDIRECT_BUFS-1)
> > + * if available, -1 if none available.
> > + */
> > +static int vhost_svq_get_indirect_buf(VhostShadowVirtqueue *svq,
> > +                                      size_t total_descs) {
> > +    int cur = svq->current_indirect_buf;
> > +    SVQIndirectDescBuf *buf;
> > +
> > +    if (!svq->indirect_enabled) {
> > +        return -1;
> > +    }
> > +
> > +    /* Try current buffer first if it's in FREED state */
> > +    if (cur >= 0) {
> > +        buf = &svq->indirect_bufs[cur];
> > +        if (buf->state == SVQ_INDIRECT_BUF_FREED) {
> > +            /* Check if we have enough free descriptors */
> > +            if (buf->freed_descs >= total_descs) {
> > +                return cur;
> > +            }
> > +            /* Not enough space, switch to FREEING and try next buffer */
> > +            buf->state = SVQ_INDIRECT_BUF_FREEING;
> > +        }
> > +    }
> > +
> > +    /* Try all other buffers */
> > +    for (int i = 0; i < SVQ_NUM_INDIRECT_BUFS; i++) {
> > +        if (i == cur) {
> > +            continue;
> > +        }
> > +        buf = &svq->indirect_bufs[i];
> > +        if (buf->state == SVQ_INDIRECT_BUF_FREED &&
> > +            buf->freed_descs >= total_descs) {
> > +            svq->current_indirect_buf = i;
> > +            return i;
> > +        }
> > +    }
> > +
> 
> Why not store the free indirect descriptors as a linked list instead of a 
> circular
> buffer, the same way as regular descriptors? That way we don't need to
> traverse all the circular buffer. Also, we reduce the complexity as you don't
> need to keep the state in each descriptor or the
> SVQ_INDIRECT_BUF_FREEING state.
> 

For indirect descriptors, the virtio specification requires the descriptor 
table to be contiguous in memory. 
If a linked list were used, descriptors would need to be sorted and reassembled 
into a contiguous layout 
when building the indirect table, which would add extra complexity during 
descriptor insertion.

> 
> > +    /* All buffers unavailable, fallback to chain mode */
> > +    return -1;
> > +}
> > +
> > +static int vhost_svq_add_split(VhostShadowVirtqueue *svq,
> >                                  const struct iovec *out_sg, size_t out_num,
> >                                  const hwaddr *out_addr,
> >                                  const struct iovec *in_sg, size_t in_num,
> > -                                const hwaddr *in_addr, unsigned *head)
> > +                                const hwaddr *in_addr, unsigned *head,
> > +                                bool *used_indirect)
> >  {
> >      unsigned avail_idx;
> >      vring_avail_t *avail = svq->vring.avail;
> >      bool ok;
> >      g_autofree hwaddr *sgs = g_new(hwaddr, MAX(out_num, in_num));
> > +    size_t total_descs = out_num + in_num;
> > +    int indirect_buf_idx = -1;
> >
> >      *head = svq->free_head;
> > +    *used_indirect = false;
> >
> >      /* We need some descriptors here */
> >      if (unlikely(!out_num && !in_num)) {
> >          qemu_log_mask(LOG_GUEST_ERROR,
> >                        "Guest provided element with no descriptors");
> > -        return false;
> > +        return -EINVAL;
> >      }
> >
> > -    ok = vhost_svq_vring_write_descs(svq, sgs, out_sg, out_num, out_addr,
> > -                                     in_num > 0, false);
> > -    if (unlikely(!ok)) {
> > -        return false;
> > +    /* Try to use indirect descriptors if feature is negotiated and total 
> > > 1 */
> > +    if (virtio_vdev_has_feature(svq->vdev,
> VIRTIO_RING_F_INDIRECT_DESC) &&
> > +        total_descs > 1) {
> > +        indirect_buf_idx = vhost_svq_get_indirect_buf(svq,
> > + total_descs);
> >      }
> >
> > -    ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, in_addr,
> false,
> > -                                     true);
> > -    if (unlikely(!ok)) {
> > -        return false;
> > +    if (indirect_buf_idx >= 0) {
> > +        /* Indirect mode: only need 1 main descriptor slot */
> > +        if (unlikely(vhost_svq_available_slots(svq) < 1)) {
> > +            return -ENOSPC;
> > +        }
> > +
> > +        /* Use indirect mode */
> > +        ok = vhost_svq_add_split_indirect(svq, out_sg, out_num, out_addr,
> > +                                          in_sg, in_num, in_addr,
> > +                                          sgs, indirect_buf_idx);
> > +        if (unlikely(!ok)) {
> > +            error_report("indirect error, out_num %zu in_num %zu "
> > +                         "avail index %u head %u",
> > +                         out_num, in_num, svq->shadow_avail_idx, *head);
> > +            return -EINVAL;
> > +        }
> > +        *used_indirect = true;
> > +    } else {
> > +        /* Chain mode: need total_descs descriptor slots */
> > +        if (unlikely(vhost_svq_available_slots(svq) < total_descs)) {
> > +            return -ENOSPC;
> > +        }
> > +
> > +        /* Use direct (chain) mode */
> > +        svq->desc_state[svq->free_head].indirect_buf_idx = -1;
> > +
> > +        ok = vhost_svq_vring_write_descs(svq, sgs, out_sg, out_num,
> out_addr,
> > +                                         in_num > 0, false);
> > +        if (unlikely(!ok)) {
> > +            return -EINVAL;
> > +        }
> > +
> > +        ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, in_addr,
> > +                                         false,
> > +                                         true);
> > +        if (unlikely(!ok)) {
> > +            return -EINVAL;
> > +        }
> >      }
> >
> >      /*
> > @@ -233,7 +443,7 @@ static bool
> vhost_svq_add_split(VhostShadowVirtqueue *svq,
> >      smp_wmb();
> >      avail->idx = cpu_to_le16(svq->shadow_avail_idx);
> >
> > -    return true;
> > +    return 0;
> >  }
> >
> >  static void vhost_svq_kick(VhostShadowVirtqueue *svq) @@ -249,7
> > +459,8 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq)
> >      if (virtio_vdev_has_feature(svq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
> >          uint16_t avail_event = le16_to_cpu(
> >                  *(uint16_t *)(&svq->vring.used->ring[svq->vring.num]));
> > -        needs_kick = vring_need_event(avail_event, svq->shadow_avail_idx,
> svq->shadow_avail_idx - 1);
> > +        needs_kick = vring_need_event(avail_event, svq->shadow_avail_idx,
> > +                                      svq->shadow_avail_idx - 1);
> >      } else {
> >          needs_kick =
> >                  !(svq->vring.used->flags &
> > cpu_to_le16(VRING_USED_F_NO_NOTIFY));
> > @@ -274,19 +485,21 @@ int vhost_svq_add(VhostShadowVirtqueue *svq,
> > const struct iovec *out_sg,  {
> >      unsigned qemu_head;
> >      unsigned ndescs = in_num + out_num;
> > -    bool ok;
> > +    int r;
> > +    bool used_indirect = false;
> >
> > -    if (unlikely(ndescs > vhost_svq_available_slots(svq))) {
> > -        return -ENOSPC;
> > +    r = vhost_svq_add_split(svq, out_sg, out_num, out_addr, in_sg,
> in_num,
> > +                             in_addr, &qemu_head, &used_indirect);
> > +    if (unlikely(r != 0)) {
> > +        return r;
> >      }
> >
> > -    ok = vhost_svq_add_split(svq, out_sg, out_num, out_addr, in_sg,
> in_num,
> > -                             in_addr, &qemu_head);
> > -    if (unlikely(!ok)) {
> > -        return -EINVAL;
> > +    /* If using indirect, only 1 main descriptor is used; otherwise ndescs 
> > */
> > +    if (used_indirect) {
> > +        svq->num_free -= 1;
> > +    } else {
> > +        svq->num_free -= ndescs;
> >      }
> > -
> > -    svq->num_free -= ndescs;
> >      svq->desc_state[qemu_head].elem = elem;
> >      svq->desc_state[qemu_head].ndescs = ndescs;
> >      vhost_svq_kick(svq);
> > --
> > 2.48.1
> >

Reply via email to