Support mergable rx buffers for virtio-net. This helps reduce the amount
of memory the guest kernel has to allocate per rx vq.

Signed-off-by: Sasha Levin <sasha.le...@oracle.com>
---
 tools/kvm/include/kvm/uip.h  |  4 ++--
 tools/kvm/include/kvm/util.h |  3 +++
 tools/kvm/net/uip/core.c     | 54 +++++---------------------------------------
 tools/kvm/net/uip/tcp.c      |  2 +-
 tools/kvm/net/uip/udp.c      |  2 +-
 tools/kvm/util/util.c        | 15 ++++++++++++
 tools/kvm/virtio/net.c       | 37 ++++++++++++++++++++++--------
 7 files changed, 55 insertions(+), 62 deletions(-)

diff --git a/tools/kvm/include/kvm/uip.h b/tools/kvm/include/kvm/uip.h
index ac248d2..fa82f10 100644
--- a/tools/kvm/include/kvm/uip.h
+++ b/tools/kvm/include/kvm/uip.h
@@ -252,7 +252,7 @@ struct uip_tcp_socket {
 };
 
 struct uip_tx_arg {
-       struct virtio_net_hdr *vnet;
+       struct virtio_net_hdr_mrg_rxbuf *vnet;
        struct uip_info *info;
        struct uip_eth *eth;
        int vnet_len;
@@ -332,7 +332,7 @@ static inline u16 uip_eth_hdrlen(struct uip_eth *eth)
 }
 
 int uip_tx(struct iovec *iov, u16 out, struct uip_info *info);
-int uip_rx(struct iovec *iov, u16 in, struct uip_info *info);
+int uip_rx(unsigned char *buffer, u32 length, struct uip_info *info);
 int uip_init(struct uip_info *info);
 
 int uip_tx_do_ipv4_udp_dhcp(struct uip_tx_arg *arg);
diff --git a/tools/kvm/include/kvm/util.h b/tools/kvm/include/kvm/util.h
index 0df9f0d..6f8ac83 100644
--- a/tools/kvm/include/kvm/util.h
+++ b/tools/kvm/include/kvm/util.h
@@ -22,6 +22,7 @@
 #include <sys/param.h>
 #include <sys/types.h>
 #include <linux/types.h>
+#include <sys/uio.h>
 
 #ifdef __GNUC__
 #define NORETURN __attribute__((__noreturn__))
@@ -94,4 +95,6 @@ struct kvm;
 void *mmap_hugetlbfs(struct kvm *kvm, const char *htlbfs_path, u64 size);
 void *mmap_anon_or_hugetlbfs(struct kvm *kvm, const char *hugetlbfs_path, u64 
size);
 
+int memcpy_toiovecend(const struct iovec *iov, int iovlen, unsigned char 
*kdata, size_t len);
+
 #endif /* KVM__UTIL_H */
diff --git a/tools/kvm/net/uip/core.c b/tools/kvm/net/uip/core.c
index 4e5bb82..d9e9993 100644
--- a/tools/kvm/net/uip/core.c
+++ b/tools/kvm/net/uip/core.c
@@ -7,7 +7,7 @@
 
 int uip_tx(struct iovec *iov, u16 out, struct uip_info *info)
 {
-       struct virtio_net_hdr *vnet;
+       struct virtio_net_hdr_mrg_rxbuf *vnet;
        struct uip_tx_arg arg;
        int eth_len, vnet_len;
        struct uip_eth *eth;
@@ -74,63 +74,21 @@ int uip_tx(struct iovec *iov, u16 out, struct uip_info 
*info)
        return vnet_len + eth_len;
 }
 
-int uip_rx(struct iovec *iov, u16 in, struct uip_info *info)
+int uip_rx(unsigned char *buffer, u32 length, struct uip_info *info)
 {
-       struct virtio_net_hdr *vnet;
-       struct uip_eth *eth;
        struct uip_buf *buf;
-       int vnet_len;
-       int eth_len;
-       char *p;
        int len;
-       int cnt;
-       int i;
 
        /*
         * Sleep until there is a buffer for guest
         */
        buf = uip_buf_get_used(info);
 
-       /*
-        * Fill device to guest buffer, vnet hdr fisrt
-        */
-       vnet_len = iov[0].iov_len;
-       vnet = iov[0].iov_base;
-       if (buf->vnet_len > vnet_len) {
-               len = -1;
-               goto out;
-       }
-       memcpy(vnet, buf->vnet, buf->vnet_len);
-
-       /*
-        * Then, the real eth data
-        * Note: Be sure buf->eth_len is not bigger than the buffer len that 
guest provides
-        */
-       cnt = buf->eth_len;
-       p = buf->eth;
-       for (i = 1; i < in; i++) {
-               eth_len = iov[i].iov_len;
-               eth = iov[i].iov_base;
-               if (cnt > eth_len) {
-                       memcpy(eth, p, eth_len);
-                       cnt -= eth_len;
-                       p += eth_len;
-               } else {
-                       memcpy(eth, p, cnt);
-                       cnt -= cnt;
-                       break;
-               }
-       }
-
-       if (cnt) {
-               pr_warning("uip_rx error");
-               len = -1;
-               goto out;
-       }
+       memcpy(buffer, buf->vnet, buf->vnet_len);
+       memcpy(buffer + buf->vnet_len, buf->eth, buf->eth_len);
 
        len = buf->vnet_len + buf->eth_len;
 
-out:
        uip_buf_set_free(info, buf);
        return len;
 }
@@ -172,8 +130,8 @@ int uip_init(struct uip_info *info)
        }
 
        list_for_each_entry(buf, buf_head, list) {
-               buf->vnet       = malloc(sizeof(struct virtio_net_hdr));
-               buf->vnet_len   = sizeof(struct virtio_net_hdr);
+               buf->vnet       = malloc(sizeof(struct 
virtio_net_hdr_mrg_rxbuf));
+               buf->vnet_len   = sizeof(struct virtio_net_hdr_mrg_rxbuf);
                buf->eth        = malloc(1024*64 + sizeof(struct 
uip_pseudo_hdr));
                buf->eth_len    = 1024*64 + sizeof(struct uip_pseudo_hdr);
 
diff --git a/tools/kvm/net/uip/tcp.c b/tools/kvm/net/uip/tcp.c
index 9044f40..a99b95e 100644
--- a/tools/kvm/net/uip/tcp.c
+++ b/tools/kvm/net/uip/tcp.c
@@ -153,7 +153,7 @@ static int uip_tcp_payload_send(struct uip_tcp_socket *sk, 
u8 flag, u16 payload_
        /*
         * virtio_net_hdr
         */
-       buf->vnet_len   = sizeof(struct virtio_net_hdr);
+       buf->vnet_len   = sizeof(struct virtio_net_hdr_mrg_rxbuf);
        memset(buf->vnet, 0, buf->vnet_len);
 
        buf->eth_len    = ntohs(ip2->len) + uip_eth_hdrlen(&ip2->eth);
diff --git a/tools/kvm/net/uip/udp.c b/tools/kvm/net/uip/udp.c
index 31c417c..083c221 100644
--- a/tools/kvm/net/uip/udp.c
+++ b/tools/kvm/net/uip/udp.c
@@ -142,7 +142,7 @@ int uip_udp_make_pkg(struct uip_info *info, struct 
uip_udp_socket *sk, struct ui
        /*
         * virtio_net_hdr
         */
-       buf->vnet_len   = sizeof(struct virtio_net_hdr);
+       buf->vnet_len   = sizeof(struct virtio_net_hdr_mrg_rxbuf);
        memset(buf->vnet, 0, buf->vnet_len);
 
        buf->eth_len    = ntohs(ip2->len) + uip_eth_hdrlen(&ip2->eth);
diff --git a/tools/kvm/util/util.c b/tools/kvm/util/util.c
index c11a15a..4e0069c 100644
--- a/tools/kvm/util/util.c
+++ b/tools/kvm/util/util.c
@@ -9,6 +9,7 @@
 #include <sys/mman.h>
 #include <sys/stat.h>
 #include <sys/statfs.h>
+#include <linux/kernel.h>
 
 static void report(const char *prefix, const char *err, va_list params)
 {
@@ -131,3 +132,17 @@ void *mmap_anon_or_hugetlbfs(struct kvm *kvm, const char 
*hugetlbfs_path, u64 si
                return mmap(NULL, size, PROT_RW, MAP_ANON_NORESERVE, -1, 0);
        }
 }
+
+int memcpy_toiovecend(const struct iovec *iov, int iovlen, unsigned char 
*kdata, size_t len)
+{
+       size_t copy, start_len = len;
+
+       for (; len > 0 && iovlen > 0; ++iov, --iovlen) {
+               copy = min(iov->iov_len, len);
+               memcpy(iov->iov_base, kdata, copy);
+               kdata += copy;
+               len -= copy;
+       }
+
+       return start_len - len;
+}
diff --git a/tools/kvm/virtio/net.c b/tools/kvm/virtio/net.c
index c0a8f12..c99c5a6 100644
--- a/tools/kvm/virtio/net.c
+++ b/tools/kvm/virtio/net.c
@@ -32,8 +32,8 @@
 struct net_dev;
 
 struct net_dev_operations {
-       int (*rx)(struct iovec *iov, u16 in, struct net_dev *ndev);
-       int (*tx)(struct iovec *iov, u16 in, struct net_dev *ndev);
+       int (*rx)(unsigned char *buffer, u32 length, struct net_dev *ndev);
+       int (*tx)(struct iovec *iov, u16 out, struct net_dev *ndev);
 };
 
 struct net_dev {
@@ -63,6 +63,8 @@ struct net_dev {
 static LIST_HEAD(ndevs);
 static int compat_id = -1;
 
+#define MAX_PACKET_SIZE ((u16)(-1))
+
 static void *virtio_net_rx_thread(void *p)
 {
        struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
@@ -90,9 +92,23 @@ static void *virtio_net_rx_thread(void *p)
                mutex_unlock(&ndev->io_lock[id]);
 
                while (virt_queue__available(vq)) {
+                       unsigned char buffer[MAX_PACKET_SIZE];
+                       struct virtio_net_hdr_mrg_rxbuf *hdr;
+
+                       len = ndev->ops->rx(buffer, MAX_PACKET_SIZE, ndev);
                        head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
-                       len = ndev->ops->rx(iov, in, ndev);
-                       virt_queue__set_used_elem(vq, head, len);
+                       hdr = (void *)iov[0].iov_base;
+                       while (len) {
+                               int copied;
+
+                               copied = memcpy_toiovecend(iov, in, buffer, 
len);
+                               len -= copied;
+                               hdr->num_buffers++;
+                               virt_queue__set_used_elem(vq, head, copied);
+                               if (len == 0)
+                                       break;
+                               head = virt_queue__get_iov(vq, iov, &out, &in, 
kvm);
+                       }
 
                        /* We should interrupt guest right now, otherwise 
latency is huge. */
                        if (virtio_queue__should_signal(vq))
@@ -241,7 +257,7 @@ static bool virtio_net__tap_init(const struct 
virtio_net_params *params,
                goto fail;
        }
 
-       hdr_len = sizeof(struct virtio_net_hdr);
+       hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
        if (ioctl(ndev->tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0)
                pr_warning("Config tap device TUNSETVNETHDRSZ error");
 
@@ -300,9 +316,9 @@ static inline int tap_ops_tx(struct iovec *iov, u16 out, 
struct net_dev *ndev)
        return writev(ndev->tap_fd, iov, out);
 }
 
-static inline int tap_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
+static inline int tap_ops_rx(unsigned char *buffer, u32 length, struct net_dev 
*ndev)
 {
-       return readv(ndev->tap_fd, iov, in);
+       return read(ndev->tap_fd, buffer, length);
 }
 
 static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
@@ -310,9 +326,9 @@ static inline int uip_ops_tx(struct iovec *iov, u16 out, 
struct net_dev *ndev)
        return uip_tx(iov, out, &ndev->info);
 }
 
-static inline int uip_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
+static inline int uip_ops_rx(unsigned char *buffer, u32 length, struct net_dev 
*ndev)
 {
-       return uip_rx(iov, in, &ndev->info);
+       return uip_rx(buffer, length, &ndev->info);
 }
 
 static struct net_dev_operations tap_ops = {
@@ -347,7 +363,8 @@ static u32 get_host_features(struct kvm *kvm, void *dev)
                | 1UL << VIRTIO_RING_F_EVENT_IDX
                | 1UL << VIRTIO_RING_F_INDIRECT_DESC
                | 1UL << VIRTIO_NET_F_CTRL_VQ
-               | 1UL << (ndev->queue_pairs > 1 ? VIRTIO_NET_F_MQ : 0);
+               | 1UL << (ndev->queue_pairs > 1 ? VIRTIO_NET_F_MQ : 0)
+               | 1UL << VIRTIO_NET_F_MRG_RXBUF;
 }
 
 static void set_guest_features(struct kvm *kvm, void *dev, u32 features)
-- 
1.8.2.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to