When vhost support VIRTIO_VSOCK_F_MRG_RXBUF feature,
it will merge big packet into rx vq.

Signed-off-by: Yiwen Jiang <jiangyi...@huawei.com>
---
 drivers/vhost/vsock.c             | 111 ++++++++++++++++++++++++++++++--------
 include/linux/virtio_vsock.h      |   1 +
 include/uapi/linux/virtio_vsock.h |   5 ++
 3 files changed, 94 insertions(+), 23 deletions(-)

diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index 34bc3ab..dc52b0f 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -22,7 +22,8 @@
 #define VHOST_VSOCK_DEFAULT_HOST_CID   2

 enum {
-       VHOST_VSOCK_FEATURES = VHOST_FEATURES,
+       VHOST_VSOCK_FEATURES = VHOST_FEATURES |
+                       (1ULL << VIRTIO_VSOCK_F_MRG_RXBUF),
 };

 /* Used to track all the vhost_vsock instances on the system. */
@@ -80,6 +81,69 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
        return vsock;
 }

+/* This segment of codes are copied from drivers/vhost/net.c */
+static int get_rx_bufs(struct vhost_virtqueue *vq,
+               struct vring_used_elem *heads, int datalen,
+               unsigned *iovcount, unsigned int quota)
+{
+       unsigned int out, in;
+       int seg = 0;
+       int headcount = 0;
+       unsigned d;
+       int ret;
+       /*
+        * len is always initialized before use since we are always called with
+        * datalen > 0.
+        */
+       u32 uninitialized_var(len);
+
+       while (datalen > 0 && headcount < quota) {
+               if (unlikely(seg >= UIO_MAXIOV)) {
+                       ret = -ENOBUFS;
+                       goto err;
+               }
+
+               ret = vhost_get_vq_desc(vq, vq->iov + seg,
+                               ARRAY_SIZE(vq->iov) - seg, &out,
+                               &in, NULL, NULL);
+               if (unlikely(ret < 0))
+                       goto err;
+
+               d = ret;
+               if (d == vq->num) {
+                       ret = 0;
+                       goto err;
+               }
+
+               if (unlikely(out || in <= 0)) {
+                       vq_err(vq, "unexpected descriptor format for RX: "
+                                       "out %d, in %d\n", out, in);
+                       ret = -EINVAL;
+                       goto err;
+               }
+
+               heads[headcount].id = cpu_to_vhost32(vq, d);
+               len = iov_length(vq->iov + seg, in);
+               heads[headcount].len = cpu_to_vhost32(vq, len);
+               datalen -= len;
+               ++headcount;
+               seg += in;
+       }
+
+       heads[headcount - 1].len = cpu_to_vhost32(vq, len + datalen);
+       *iovcount = seg;
+
+       /* Detect overrun */
+       if (unlikely(datalen > 0)) {
+               ret = UIO_MAXIOV + 1;
+               goto err;
+       }
+       return headcount;
+err:
+       vhost_discard_vq_desc(vq, headcount);
+       return ret;
+}
+
 static void
 vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
                            struct vhost_virtqueue *vq)
@@ -87,22 +151,34 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
        struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX];
        bool added = false;
        bool restart_tx = false;
+       int mergeable;
+       size_t vsock_hlen;

        mutex_lock(&vq->mutex);

        if (!vq->private_data)
                goto out;

+       mergeable = vhost_has_feature(vq, VIRTIO_VSOCK_F_MRG_RXBUF);
+       /*
+        * Guest fill page for rx vq in mergeable case, so it will not
+        * allocate pkt structure, we should reserve size of pkt in advance.
+        */
+       if (likely(mergeable))
+               vsock_hlen = sizeof(struct virtio_vsock_pkt);
+       else
+               vsock_hlen = sizeof(struct virtio_vsock_hdr);
+
        /* Avoid further vmexits, we're already processing the virtqueue */
        vhost_disable_notify(&vsock->dev, vq);

        for (;;) {
                struct virtio_vsock_pkt *pkt;
                struct iov_iter iov_iter;
-               unsigned out, in;
+               unsigned out = 0, in = 0;
                size_t nbytes;
                size_t len;
-               int head;
+               s16 headcount;

                spin_lock_bh(&vsock->send_pkt_list_lock);
                if (list_empty(&vsock->send_pkt_list)) {
@@ -116,16 +192,9 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
                list_del_init(&pkt->list);
                spin_unlock_bh(&vsock->send_pkt_list_lock);

-               head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
-                                        &out, &in, NULL, NULL);
-               if (head < 0) {
-                       spin_lock_bh(&vsock->send_pkt_list_lock);
-                       list_add(&pkt->list, &vsock->send_pkt_list);
-                       spin_unlock_bh(&vsock->send_pkt_list_lock);
-                       break;
-               }
-
-               if (head == vq->num) {
+               headcount = get_rx_bufs(vq, vq->heads, vsock_hlen + pkt->len,
+                               &in, likely(mergeable) ? UIO_MAXIOV : 1);
+               if (headcount <= 0) {
                        spin_lock_bh(&vsock->send_pkt_list_lock);
                        list_add(&pkt->list, &vsock->send_pkt_list);
                        spin_unlock_bh(&vsock->send_pkt_list_lock);
@@ -133,24 +202,20 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
                        /* We cannot finish yet if more buffers snuck in while
                         * re-enabling notify.
                         */
-                       if (unlikely(vhost_enable_notify(&vsock->dev, vq))) {
+                       if (!headcount && 
unlikely(vhost_enable_notify(&vsock->dev, vq))) {
                                vhost_disable_notify(&vsock->dev, vq);
                                continue;
                        }
                        break;
                }

-               if (out) {
-                       virtio_transport_free_pkt(pkt);
-                       vq_err(vq, "Expected 0 output buffers, got %u\n", out);
-                       break;
-               }
-
                len = iov_length(&vq->iov[out], in);
                iov_iter_init(&iov_iter, READ, &vq->iov[out], in, len);

-               nbytes = copy_to_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter);
-               if (nbytes != sizeof(pkt->hdr)) {
+               if (likely(mergeable))
+                       pkt->mrg_rxbuf_hdr.num_buffers = cpu_to_le16(headcount);
+               nbytes = copy_to_iter(&pkt->hdr, vsock_hlen, &iov_iter);
+               if (nbytes != vsock_hlen) {
                        virtio_transport_free_pkt(pkt);
                        vq_err(vq, "Faulted on copying pkt hdr\n");
                        break;
@@ -163,7 +228,7 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
                        break;
                }

-               vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len);
+               vhost_add_used_n(vq, vq->heads, headcount);
                added = true;

                if (pkt->reply) {
diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
index bf84418..da9e1fe 100644
--- a/include/linux/virtio_vsock.h
+++ b/include/linux/virtio_vsock.h
@@ -50,6 +50,7 @@ struct virtio_vsock_sock {

 struct virtio_vsock_pkt {
        struct virtio_vsock_hdr hdr;
+       struct virtio_vsock_mrg_rxbuf_hdr mrg_rxbuf_hdr;
        struct work_struct work;
        struct list_head list;
        /* socket refcnt not held, only use for cancellation */
diff --git a/include/uapi/linux/virtio_vsock.h 
b/include/uapi/linux/virtio_vsock.h
index 1d57ed3..2292f30 100644
--- a/include/uapi/linux/virtio_vsock.h
+++ b/include/uapi/linux/virtio_vsock.h
@@ -63,6 +63,11 @@ struct virtio_vsock_hdr {
        __le32  fwd_cnt;
 } __attribute__((packed));

+/* It add mergeable rx buffers feature */
+struct virtio_vsock_mrg_rxbuf_hdr {
+       __le16  num_buffers;    /* number of mergeable rx buffers */
+} __attribute__((packed));
+
 enum virtio_vsock_type {
        VIRTIO_VSOCK_TYPE_STREAM = 1,
 };
-- 
1.8.3.1


_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Reply via email to