On Mon, Apr 22, 2013 at 08:32:27PM -0400, Sasha Levin wrote:
> Support mergable rx buffers for virtio-net. This helps reduce the amount
> of memory the guest kernel has to allocate per rx vq.

With this, we always do an extra copy of the rx data into a linear buffer.
I am thinking about whether we should keep the non MRG_RXBUF code. For
tap use case, it is fine, user can use vhost. But for uip use case, we
can not use vhost.

> Signed-off-by: Sasha Levin <[email protected]>
> ---
>  tools/kvm/include/kvm/uip.h  |  4 ++--
>  tools/kvm/include/kvm/util.h |  3 +++
>  tools/kvm/net/uip/core.c     | 54 
> +++++---------------------------------------
>  tools/kvm/net/uip/tcp.c      |  2 +-
>  tools/kvm/net/uip/udp.c      |  2 +-
>  tools/kvm/util/util.c        | 15 ++++++++++++
>  tools/kvm/virtio/net.c       | 37 ++++++++++++++++++++++--------
>  7 files changed, 55 insertions(+), 62 deletions(-)
> 
> diff --git a/tools/kvm/include/kvm/uip.h b/tools/kvm/include/kvm/uip.h
> index ac248d2..fa82f10 100644
> --- a/tools/kvm/include/kvm/uip.h
> +++ b/tools/kvm/include/kvm/uip.h
> @@ -252,7 +252,7 @@ struct uip_tcp_socket {
>  };
>  
>  struct uip_tx_arg {
> -     struct virtio_net_hdr *vnet;
> +     struct virtio_net_hdr_mrg_rxbuf *vnet;
>       struct uip_info *info;
>       struct uip_eth *eth;
>       int vnet_len;
> @@ -332,7 +332,7 @@ static inline u16 uip_eth_hdrlen(struct uip_eth *eth)
>  }
>  
>  int uip_tx(struct iovec *iov, u16 out, struct uip_info *info);
> -int uip_rx(struct iovec *iov, u16 in, struct uip_info *info);
> +int uip_rx(unsigned char *buffer, u32 length, struct uip_info *info);
>  int uip_init(struct uip_info *info);
>  
>  int uip_tx_do_ipv4_udp_dhcp(struct uip_tx_arg *arg);
> diff --git a/tools/kvm/include/kvm/util.h b/tools/kvm/include/kvm/util.h
> index 0df9f0d..6f8ac83 100644
> --- a/tools/kvm/include/kvm/util.h
> +++ b/tools/kvm/include/kvm/util.h
> @@ -22,6 +22,7 @@
>  #include <sys/param.h>
>  #include <sys/types.h>
>  #include <linux/types.h>
> +#include <sys/uio.h>
>  
>  #ifdef __GNUC__
>  #define NORETURN __attribute__((__noreturn__))
> @@ -94,4 +95,6 @@ struct kvm;
>  void *mmap_hugetlbfs(struct kvm *kvm, const char *htlbfs_path, u64 size);
>  void *mmap_anon_or_hugetlbfs(struct kvm *kvm, const char *hugetlbfs_path, 
> u64 size);
>  
> +int memcpy_toiovecend(const struct iovec *iov, int iovlen, unsigned char 
> *kdata, size_t len);
> +
>  #endif /* KVM__UTIL_H */
> diff --git a/tools/kvm/net/uip/core.c b/tools/kvm/net/uip/core.c
> index 4e5bb82..d9e9993 100644
> --- a/tools/kvm/net/uip/core.c
> +++ b/tools/kvm/net/uip/core.c
> @@ -7,7 +7,7 @@
>  
>  int uip_tx(struct iovec *iov, u16 out, struct uip_info *info)
>  {
> -     struct virtio_net_hdr *vnet;
> +     struct virtio_net_hdr_mrg_rxbuf *vnet;
>       struct uip_tx_arg arg;
>       int eth_len, vnet_len;
>       struct uip_eth *eth;
> @@ -74,63 +74,21 @@ int uip_tx(struct iovec *iov, u16 out, struct uip_info 
> *info)
>       return vnet_len + eth_len;
>  }
>  
> -int uip_rx(struct iovec *iov, u16 in, struct uip_info *info)
> +int uip_rx(unsigned char *buffer, u32 length, struct uip_info *info)
>  {
> -     struct virtio_net_hdr *vnet;
> -     struct uip_eth *eth;
>       struct uip_buf *buf;
> -     int vnet_len;
> -     int eth_len;
> -     char *p;
>       int len;
> -     int cnt;
> -     int i;
>  
>       /*
>        * Sleep until there is a buffer for guest
>        */
>       buf = uip_buf_get_used(info);
>  
> -     /*
> -      * Fill device to guest buffer, vnet hdr fisrt
> -      */
> -     vnet_len = iov[0].iov_len;
> -     vnet = iov[0].iov_base;
> -     if (buf->vnet_len > vnet_len) {
> -             len = -1;
> -             goto out;
> -     }
> -     memcpy(vnet, buf->vnet, buf->vnet_len);
> -
> -     /*
> -      * Then, the real eth data
> -      * Note: Be sure buf->eth_len is not bigger than the buffer len that 
> guest provides
> -      */
> -     cnt = buf->eth_len;
> -     p = buf->eth;
> -     for (i = 1; i < in; i++) {
> -             eth_len = iov[i].iov_len;
> -             eth = iov[i].iov_base;
> -             if (cnt > eth_len) {
> -                     memcpy(eth, p, eth_len);
> -                     cnt -= eth_len;
> -                     p += eth_len;
> -             } else {
> -                     memcpy(eth, p, cnt);
> -                     cnt -= cnt;
> -                     break;
> -             }
> -     }
> -
> -     if (cnt) {
> -             pr_warning("uip_rx error");
> -             len = -1;
> -             goto out;
> -     }
> +     memcpy(buffer, buf->vnet, buf->vnet_len);
> +     memcpy(buffer + buf->vnet_len, buf->eth, buf->eth_len);
>  
>       len = buf->vnet_len + buf->eth_len;
>  
> -out:
>       uip_buf_set_free(info, buf);
>       return len;
>  }
> @@ -172,8 +130,8 @@ int uip_init(struct uip_info *info)
>       }
>  
>       list_for_each_entry(buf, buf_head, list) {
> -             buf->vnet       = malloc(sizeof(struct virtio_net_hdr));
> -             buf->vnet_len   = sizeof(struct virtio_net_hdr);
> +             buf->vnet       = malloc(sizeof(struct 
> virtio_net_hdr_mrg_rxbuf));
> +             buf->vnet_len   = sizeof(struct virtio_net_hdr_mrg_rxbuf);
>               buf->eth        = malloc(1024*64 + sizeof(struct 
> uip_pseudo_hdr));
>               buf->eth_len    = 1024*64 + sizeof(struct uip_pseudo_hdr);
>  
> diff --git a/tools/kvm/net/uip/tcp.c b/tools/kvm/net/uip/tcp.c
> index 9044f40..a99b95e 100644
> --- a/tools/kvm/net/uip/tcp.c
> +++ b/tools/kvm/net/uip/tcp.c
> @@ -153,7 +153,7 @@ static int uip_tcp_payload_send(struct uip_tcp_socket 
> *sk, u8 flag, u16 payload_
>       /*
>        * virtio_net_hdr
>        */
> -     buf->vnet_len   = sizeof(struct virtio_net_hdr);
> +     buf->vnet_len   = sizeof(struct virtio_net_hdr_mrg_rxbuf);
>       memset(buf->vnet, 0, buf->vnet_len);
>  
>       buf->eth_len    = ntohs(ip2->len) + uip_eth_hdrlen(&ip2->eth);
> diff --git a/tools/kvm/net/uip/udp.c b/tools/kvm/net/uip/udp.c
> index 31c417c..083c221 100644
> --- a/tools/kvm/net/uip/udp.c
> +++ b/tools/kvm/net/uip/udp.c
> @@ -142,7 +142,7 @@ int uip_udp_make_pkg(struct uip_info *info, struct 
> uip_udp_socket *sk, struct ui
>       /*
>        * virtio_net_hdr
>        */
> -     buf->vnet_len   = sizeof(struct virtio_net_hdr);
> +     buf->vnet_len   = sizeof(struct virtio_net_hdr_mrg_rxbuf);
>       memset(buf->vnet, 0, buf->vnet_len);
>  
>       buf->eth_len    = ntohs(ip2->len) + uip_eth_hdrlen(&ip2->eth);
> diff --git a/tools/kvm/util/util.c b/tools/kvm/util/util.c
> index c11a15a..4e0069c 100644
> --- a/tools/kvm/util/util.c
> +++ b/tools/kvm/util/util.c
> @@ -9,6 +9,7 @@
>  #include <sys/mman.h>
>  #include <sys/stat.h>
>  #include <sys/statfs.h>
> +#include <linux/kernel.h>
>  
>  static void report(const char *prefix, const char *err, va_list params)
>  {
> @@ -131,3 +132,17 @@ void *mmap_anon_or_hugetlbfs(struct kvm *kvm, const char 
> *hugetlbfs_path, u64 si
>               return mmap(NULL, size, PROT_RW, MAP_ANON_NORESERVE, -1, 0);
>       }
>  }
> +
> +int memcpy_toiovecend(const struct iovec *iov, int iovlen, unsigned char 
> *kdata, size_t len)
> +{
> +     size_t copy, start_len = len;
> +
> +     for (; len > 0 && iovlen > 0; ++iov, --iovlen) {
> +             copy = min(iov->iov_len, len);
> +             memcpy(iov->iov_base, kdata, copy);
> +             kdata += copy;
> +             len -= copy;
> +     }
> +
> +     return start_len - len;
> +}
> diff --git a/tools/kvm/virtio/net.c b/tools/kvm/virtio/net.c
> index c0a8f12..c99c5a6 100644
> --- a/tools/kvm/virtio/net.c
> +++ b/tools/kvm/virtio/net.c
> @@ -32,8 +32,8 @@
>  struct net_dev;
>  
>  struct net_dev_operations {
> -     int (*rx)(struct iovec *iov, u16 in, struct net_dev *ndev);
> -     int (*tx)(struct iovec *iov, u16 in, struct net_dev *ndev);
> +     int (*rx)(unsigned char *buffer, u32 length, struct net_dev *ndev);
> +     int (*tx)(struct iovec *iov, u16 out, struct net_dev *ndev);
>  };
>  
>  struct net_dev {
> @@ -63,6 +63,8 @@ struct net_dev {
>  static LIST_HEAD(ndevs);
>  static int compat_id = -1;
>  
> +#define MAX_PACKET_SIZE ((u16)(-1))

Why 64KB - 1? The package can be 65550 bytes which is max TCP or UDP
plus 14 bytes eth header.

> +
>  static void *virtio_net_rx_thread(void *p)
>  {
>       struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
> @@ -90,9 +92,23 @@ static void *virtio_net_rx_thread(void *p)
>               mutex_unlock(&ndev->io_lock[id]);
>  
>               while (virt_queue__available(vq)) {
> +                     unsigned char buffer[MAX_PACKET_SIZE];

The buffer size should at least be:

MAX_PACKET_SIZE + sizeof(struct virtio_net_hdr_mrg_rxbuf)

> +                     struct virtio_net_hdr_mrg_rxbuf *hdr;
> +
> +                     len = ndev->ops->rx(buffer, MAX_PACKET_SIZE, ndev);
>                       head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
> -                     len = ndev->ops->rx(iov, in, ndev);
> -                     virt_queue__set_used_elem(vq, head, len);
> +                     hdr = (void *)iov[0].iov_base;
> +                     while (len) {
> +                             int copied;
> +
> +                             copied = memcpy_toiovecend(iov, in, buffer, 
> len);
> +                             len -= copied;
> +                             hdr->num_buffers++;
> +                             virt_queue__set_used_elem(vq, head, copied);
> +                             if (len == 0)
> +                                     break;
> +                             head = virt_queue__get_iov(vq, iov, &out, &in, 
> kvm);

Here, need to check if we still have available buffer from guest. If not
need to wait for it.

> +                     }
>  
>                       /* We should interrupt guest right now, otherwise 
> latency is huge. */
>                       if (virtio_queue__should_signal(vq))
> @@ -241,7 +257,7 @@ static bool virtio_net__tap_init(const struct 
> virtio_net_params *params,
>               goto fail;
>       }
>  
> -     hdr_len = sizeof(struct virtio_net_hdr);
> +     hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
>       if (ioctl(ndev->tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0)
>               pr_warning("Config tap device TUNSETVNETHDRSZ error");
>  
> @@ -300,9 +316,9 @@ static inline int tap_ops_tx(struct iovec *iov, u16 out, 
> struct net_dev *ndev)
>       return writev(ndev->tap_fd, iov, out);
>  }
>  
> -static inline int tap_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
> +static inline int tap_ops_rx(unsigned char *buffer, u32 length, struct 
> net_dev *ndev)
>  {
> -     return readv(ndev->tap_fd, iov, in);
> +     return read(ndev->tap_fd, buffer, length);
>  }
>  
>  static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev 
> *ndev)
> @@ -310,9 +326,9 @@ static inline int uip_ops_tx(struct iovec *iov, u16 out, 
> struct net_dev *ndev)
>       return uip_tx(iov, out, &ndev->info);
>  }
>  
> -static inline int uip_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
> +static inline int uip_ops_rx(unsigned char *buffer, u32 length, struct 
> net_dev *ndev)
>  {
> -     return uip_rx(iov, in, &ndev->info);
> +     return uip_rx(buffer, length, &ndev->info);
>  }
>  
>  static struct net_dev_operations tap_ops = {
> @@ -347,7 +363,8 @@ static u32 get_host_features(struct kvm *kvm, void *dev)
>               | 1UL << VIRTIO_RING_F_EVENT_IDX
>               | 1UL << VIRTIO_RING_F_INDIRECT_DESC
>               | 1UL << VIRTIO_NET_F_CTRL_VQ
> -             | 1UL << (ndev->queue_pairs > 1 ? VIRTIO_NET_F_MQ : 0);
> +             | 1UL << (ndev->queue_pairs > 1 ? VIRTIO_NET_F_MQ : 0)
> +             | 1UL << VIRTIO_NET_F_MRG_RXBUF;
>  }
>  
>  static void set_guest_features(struct kvm *kvm, void *dev, u32 features)
> -- 
> 1.8.2.1
> 

-- 
Asias
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to