Since virtio-vsock was introduced, the buffers filled by the host
and pushed to the guest using the vring, are directly queued in
a per-socket list. These buffers are preallocated by the guest
with a fixed size (4 KB).

The maximum amount of memory used by each socket should be
controlled by the credit mechanism.
The default credit available per-socket is 256 KB, but if we use
only 1 byte per packet, the guest can queue up to 262144 of 4 KB
buffers, using up to 1 GB of memory per-socket. In addition, the
guest will continue to fill the vring with new 4 KB free buffers
to avoid starvation of other sockets.

This patch mitigates this issue copying the payload of small
packets (< 128 bytes) into the buffer of last packet queued, in
order to avoid wasting memory.

Signed-off-by: Stefano Garzarella <sgarz...@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefa...@redhat.com>
Acked-by: Michael S. Tsirkin <m...@redhat.com>
---
 drivers/vhost/vsock.c                   |  2 +
 include/linux/virtio_vsock.h            |  1 +
 net/vmw_vsock/virtio_transport.c        |  1 +
 net/vmw_vsock/virtio_transport_common.c | 60 +++++++++++++++++++++----
 4 files changed, 55 insertions(+), 9 deletions(-)

diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index 6a50e1d0529c..6c8390a2af52 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -329,6 +329,8 @@ vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq,
                return NULL;
        }
 
+       pkt->buf_len = pkt->len;
+
        nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter);
        if (nbytes != pkt->len) {
                vq_err(vq, "Expected %u byte payload, got %zu bytes\n",
diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
index e223e2632edd..7d973903f52e 100644
--- a/include/linux/virtio_vsock.h
+++ b/include/linux/virtio_vsock.h
@@ -52,6 +52,7 @@ struct virtio_vsock_pkt {
        /* socket refcnt not held, only use for cancellation */
        struct vsock_sock *vsk;
        void *buf;
+       u32 buf_len;
        u32 len;
        u32 off;
        bool reply;
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 0815d1357861..082a30936690 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -307,6 +307,7 @@ static void virtio_vsock_rx_fill(struct virtio_vsock *vsock)
                        break;
                }
 
+               pkt->buf_len = buf_len;
                pkt->len = buf_len;
 
                sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr));
diff --git a/net/vmw_vsock/virtio_transport_common.c 
b/net/vmw_vsock/virtio_transport_common.c
index 6f1a8aff65c5..095221f94786 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -26,6 +26,9 @@
 /* How long to wait for graceful shutdown of a connection */
 #define VSOCK_CLOSE_TIMEOUT (8 * HZ)
 
+/* Threshold for detecting small packets to copy */
+#define GOOD_COPY_LEN  128
+
 static const struct virtio_transport *virtio_transport_get_ops(void)
 {
        const struct vsock_transport *t = vsock_core_get_transport();
@@ -64,6 +67,9 @@ virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info,
                pkt->buf = kmalloc(len, GFP_KERNEL);
                if (!pkt->buf)
                        goto out_pkt;
+
+               pkt->buf_len = len;
+
                err = memcpy_from_msg(pkt->buf, info->msg, len);
                if (err)
                        goto out;
@@ -841,24 +847,60 @@ virtio_transport_recv_connecting(struct sock *sk,
        return err;
 }
 
+static void
+virtio_transport_recv_enqueue(struct vsock_sock *vsk,
+                             struct virtio_vsock_pkt *pkt)
+{
+       struct virtio_vsock_sock *vvs = vsk->trans;
+       bool free_pkt = false;
+
+       pkt->len = le32_to_cpu(pkt->hdr.len);
+       pkt->off = 0;
+
+       spin_lock_bh(&vvs->rx_lock);
+
+       virtio_transport_inc_rx_pkt(vvs, pkt);
+
+       /* Try to copy small packets into the buffer of last packet queued,
+        * to avoid wasting memory queueing the entire buffer with a small
+        * payload.
+        */
+       if (pkt->len <= GOOD_COPY_LEN && !list_empty(&vvs->rx_queue)) {
+               struct virtio_vsock_pkt *last_pkt;
+
+               last_pkt = list_last_entry(&vvs->rx_queue,
+                                          struct virtio_vsock_pkt, list);
+
+               /* If there is space in the last packet queued, we copy the
+                * new packet in its buffer.
+                */
+               if (pkt->len <= last_pkt->buf_len - last_pkt->len) {
+                       memcpy(last_pkt->buf + last_pkt->len, pkt->buf,
+                              pkt->len);
+                       last_pkt->len += pkt->len;
+                       free_pkt = true;
+                       goto out;
+               }
+       }
+
+       list_add_tail(&pkt->list, &vvs->rx_queue);
+
+out:
+       spin_unlock_bh(&vvs->rx_lock);
+       if (free_pkt)
+               virtio_transport_free_pkt(pkt);
+}
+
 static int
 virtio_transport_recv_connected(struct sock *sk,
                                struct virtio_vsock_pkt *pkt)
 {
        struct vsock_sock *vsk = vsock_sk(sk);
-       struct virtio_vsock_sock *vvs = vsk->trans;
        int err = 0;
 
        switch (le16_to_cpu(pkt->hdr.op)) {
        case VIRTIO_VSOCK_OP_RW:
-               pkt->len = le32_to_cpu(pkt->hdr.len);
-               pkt->off = 0;
-
-               spin_lock_bh(&vvs->rx_lock);
-               virtio_transport_inc_rx_pkt(vvs, pkt);
-               list_add_tail(&pkt->list, &vvs->rx_queue);
-               spin_unlock_bh(&vvs->rx_lock);
-
+               virtio_transport_recv_enqueue(vsk, pkt);
                sk->sk_data_ready(sk);
                return err;
        case VIRTIO_VSOCK_OP_CREDIT_UPDATE:
-- 
2.20.1

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Reply via email to