From: wafer Xie <[email protected]> Retrieve the target buffer from the indirect buffers by index, add the elements sent by the guest into the buffer’s indirect descriptors, and update freed_head and freed_number. If freed_number is zero, or if the current buffer’s freed_number is less than the number of elements, update the buffer state to SVQ_INDIRECT_BUF_FREEING.
Signed-off-by: wafer Xie <[email protected]> --- hw/virtio/vhost-shadow-virtqueue.c | 257 ++++++++++++++++++++++++++--- 1 file changed, 235 insertions(+), 22 deletions(-) diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c index adee52f50b..c6064fb839 100644 --- a/hw/virtio/vhost-shadow-virtqueue.c +++ b/hw/virtio/vhost-shadow-virtqueue.c @@ -189,36 +189,246 @@ static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, return true; } -static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, +/** + * Write descriptors to indirect descriptor table + * + * @svq: The shadow virtqueue + * @sg: Cache for hwaddr + * @iovec: The iovec from the guest + * @num: iovec length + * @addr: Descriptors' GPAs, if backed by guest memory + * @buf: The indirect descriptor buffer + * @offset_idx: Offset for write position + * and next field (0 for out, out_num for in) + * @more_descs: True if more descriptors come in the chain + * @write: True if they are writeable descriptors + * + * Return true if success, false otherwise and print error. + */ +static bool vhost_svq_vring_write_indirect_descs(VhostShadowVirtqueue *svq, + hwaddr *sg, + const struct iovec *iovec, + size_t num, + const hwaddr *addr, + SVQIndirectDescBuf *buf, + size_t offset_idx, + bool more_descs, + bool write) +{ + bool ok; + uint16_t flags = write ? cpu_to_le16(VRING_DESC_F_WRITE) : 0; + vring_desc_t *descs = buf->desc; + uint16_t i = buf->freed_head + offset_idx; + + if (num == 0) { + return true; + } + + ok = vhost_svq_translate_addr(svq, sg, iovec, num, addr); + if (unlikely(!ok)) { + return false; + } + + for (size_t n = 0; n < num; n++) { + descs[i].addr = cpu_to_le64(sg[n]); + descs[i].len = cpu_to_le32(iovec[n].iov_len); + if (more_descs || (n + 1 < num)) { + descs[i].flags = flags | cpu_to_le16(VRING_DESC_F_NEXT); + descs[i].next = cpu_to_le16(offset_idx + n + 1); + } else { + descs[i].flags = flags; + } + i++; + } + + return true; +} + +/** + * Add descriptors to SVQ vring using indirect descriptors (dual-buffer) + * + * @svq: The shadow virtqueue + * @out_sg: The out iovec from the guest + * @out_num: The out iovec length + * @out_addr: The out descriptors' GPAs + * @in_sg: The in iovec from the guest + * @in_num: The in iovec length + * @in_addr: The in descriptors' GPAs + * @sgs: Cache for hwaddr + * @buf_idx: Index of the indirect buffer to use + * + * Return true if success, false otherwise and print error. + */ +static bool vhost_svq_add_split_indirect(VhostShadowVirtqueue *svq, + const struct iovec *out_sg, + size_t out_num, + const hwaddr *out_addr, + const struct iovec *in_sg, + size_t in_num, + const hwaddr *in_addr, + hwaddr *sgs, int buf_idx) +{ + SVQIndirectDescBuf *buf = &svq->indirect_bufs[buf_idx]; + uint16_t start_idx = buf->freed_head; + size_t total_descs = out_num + in_num; + hwaddr indirect_iova; + bool ok; + + /* Populate indirect descriptor table for out descriptors */ + ok = vhost_svq_vring_write_indirect_descs(svq, sgs, out_sg, out_num, + out_addr, buf, 0, + in_num > 0, false); + if (unlikely(!ok)) { + return false; + } + + /* Populate indirect descriptor table for in descriptors */ + ok = vhost_svq_vring_write_indirect_descs(svq, sgs, in_sg, in_num, + in_addr, buf, out_num, + false, true); + if (unlikely(!ok)) { + return false; + } + + /* Calculate IOVA for this indirect descriptor range */ + indirect_iova = buf->iova + start_idx * sizeof(vring_desc_t); + + /* Add a single descriptor pointing to the indirect table */ + svq->vring.desc[svq->free_head].addr = cpu_to_le64(indirect_iova); + svq->vring.desc[svq->free_head].len = + cpu_to_le32(total_descs * sizeof(vring_desc_t)); + svq->vring.desc[svq->free_head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT); + + /* Store indirect descriptor info in desc_state */ + svq->desc_state[svq->free_head].indirect_buf_idx = buf_idx; + + /* Update buffer state */ + buf->freed_head = start_idx + total_descs; + buf->freed_descs -= total_descs; + + /* Move free_head forward */ + svq->free_head = svq->desc_next[svq->free_head]; + + return true; +} + +/** + * Try to get a freed indirect buffer for use + * + * @svq: The shadow virtqueue + * @total_descs: Number of descriptors needed + * + * Returns buffer index (0 to SVQ_NUM_INDIRECT_BUFS-1) + * if available, -1 if none available. + */ +static int vhost_svq_get_indirect_buf(VhostShadowVirtqueue *svq, + size_t total_descs) +{ + int cur = svq->current_indirect_buf; + SVQIndirectDescBuf *buf; + + if (!svq->indirect_enabled) { + return -1; + } + + /* Try current buffer first if it's in FREED state */ + if (cur >= 0) { + buf = &svq->indirect_bufs[cur]; + if (buf->state == SVQ_INDIRECT_BUF_FREED) { + /* Check if we have enough free descriptors */ + if (buf->freed_descs >= total_descs) { + return cur; + } + /* Not enough space, switch to FREEING and try next buffer */ + buf->state = SVQ_INDIRECT_BUF_FREEING; + } + } + + /* Try all other buffers */ + for (int i = 0; i < SVQ_NUM_INDIRECT_BUFS; i++) { + if (i == cur) { + continue; + } + buf = &svq->indirect_bufs[i]; + if (buf->state == SVQ_INDIRECT_BUF_FREED && + buf->freed_descs >= total_descs) { + svq->current_indirect_buf = i; + return i; + } + } + + /* All buffers unavailable, fallback to chain mode */ + return -1; +} + +static int vhost_svq_add_split(VhostShadowVirtqueue *svq, const struct iovec *out_sg, size_t out_num, const hwaddr *out_addr, const struct iovec *in_sg, size_t in_num, - const hwaddr *in_addr, unsigned *head) + const hwaddr *in_addr, unsigned *head, + bool *used_indirect) { unsigned avail_idx; vring_avail_t *avail = svq->vring.avail; bool ok; g_autofree hwaddr *sgs = g_new(hwaddr, MAX(out_num, in_num)); + size_t total_descs = out_num + in_num; + int indirect_buf_idx = -1; *head = svq->free_head; + *used_indirect = false; /* We need some descriptors here */ if (unlikely(!out_num && !in_num)) { qemu_log_mask(LOG_GUEST_ERROR, "Guest provided element with no descriptors"); - return false; + return -EINVAL; } - ok = vhost_svq_vring_write_descs(svq, sgs, out_sg, out_num, out_addr, - in_num > 0, false); - if (unlikely(!ok)) { - return false; + /* Try to use indirect descriptors if feature is negotiated and total > 1 */ + if (virtio_vdev_has_feature(svq->vdev, VIRTIO_RING_F_INDIRECT_DESC) && + total_descs > 1) { + indirect_buf_idx = vhost_svq_get_indirect_buf(svq, total_descs); } - ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, in_addr, false, - true); - if (unlikely(!ok)) { - return false; + if (indirect_buf_idx >= 0) { + /* Indirect mode: only need 1 main descriptor slot */ + if (unlikely(vhost_svq_available_slots(svq) < 1)) { + return -ENOSPC; + } + + /* Use indirect mode */ + ok = vhost_svq_add_split_indirect(svq, out_sg, out_num, out_addr, + in_sg, in_num, in_addr, + sgs, indirect_buf_idx); + if (unlikely(!ok)) { + error_report("indirect error, out_num %zu in_num %zu " + "avail index %u head %u", + out_num, in_num, svq->shadow_avail_idx, *head); + return -EINVAL; + } + *used_indirect = true; + } else { + /* Chain mode: need total_descs descriptor slots */ + if (unlikely(vhost_svq_available_slots(svq) < total_descs)) { + return -ENOSPC; + } + + /* Use direct (chain) mode */ + svq->desc_state[svq->free_head].indirect_buf_idx = -1; + + ok = vhost_svq_vring_write_descs(svq, sgs, out_sg, out_num, out_addr, + in_num > 0, false); + if (unlikely(!ok)) { + return -EINVAL; + } + + ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, in_addr, + false, + true); + if (unlikely(!ok)) { + return -EINVAL; + } } /* @@ -233,7 +443,7 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, smp_wmb(); avail->idx = cpu_to_le16(svq->shadow_avail_idx); - return true; + return 0; } static void vhost_svq_kick(VhostShadowVirtqueue *svq) @@ -249,7 +459,8 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq) if (virtio_vdev_has_feature(svq->vdev, VIRTIO_RING_F_EVENT_IDX)) { uint16_t avail_event = le16_to_cpu( *(uint16_t *)(&svq->vring.used->ring[svq->vring.num])); - needs_kick = vring_need_event(avail_event, svq->shadow_avail_idx, svq->shadow_avail_idx - 1); + needs_kick = vring_need_event(avail_event, svq->shadow_avail_idx, + svq->shadow_avail_idx - 1); } else { needs_kick = !(svq->vring.used->flags & cpu_to_le16(VRING_USED_F_NO_NOTIFY)); @@ -274,19 +485,21 @@ int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, { unsigned qemu_head; unsigned ndescs = in_num + out_num; - bool ok; + int r; + bool used_indirect = false; - if (unlikely(ndescs > vhost_svq_available_slots(svq))) { - return -ENOSPC; + r = vhost_svq_add_split(svq, out_sg, out_num, out_addr, in_sg, in_num, + in_addr, &qemu_head, &used_indirect); + if (unlikely(r != 0)) { + return r; } - ok = vhost_svq_add_split(svq, out_sg, out_num, out_addr, in_sg, in_num, - in_addr, &qemu_head); - if (unlikely(!ok)) { - return -EINVAL; + /* If using indirect, only 1 main descriptor is used; otherwise ndescs */ + if (used_indirect) { + svq->num_free -= 1; + } else { + svq->num_free -= ndescs; } - - svq->num_free -= ndescs; svq->desc_state[qemu_head].elem = elem; svq->desc_state[qemu_head].ndescs = ndescs; vhost_svq_kick(svq); -- 2.48.1
