From: Anton Ivanov <anton.iva...@cambridgegreys.com>

1. TSO/GSO support where applicable or available
RX - raw and tapraw
TX - tap only (raw appears to be hitting a bug in the
af_packet family in the kernel resulting in it being
stuck in a -ENOBUFS loop.

This results in TX/RX TCP performance ~ 2-3 times higher
than qemu on same hardware (measured with iperf).

2. Cleanup and unification of the RX/TX code to use the
same skb and msg prep routines.

Adds two new transport arguments applicable to all transports

gro - enable/disable GRO in driver
vec - enable/disable multi-message vector IO

3. Adds change/set device features support. Gro,gso,gso,sg,etc
can now be adjusted via ethtool.

Signed-off-by: Anton Ivanov <anton.iva...@cambridgegreys.com>
---
 arch/um/drivers/vector_kern.c       | 167 ++++++++++++++++++++++++++----------
 arch/um/drivers/vector_kern.h       |   1 +
 arch/um/drivers/vector_transports.c |  15 ++--
 arch/um/drivers/vector_user.c       |   5 +-
 4 files changed, 135 insertions(+), 53 deletions(-)

diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c
index f0ea7f98b86c..268862d8f915 100644
--- a/arch/um/drivers/vector_kern.c
+++ b/arch/um/drivers/vector_kern.c
@@ -75,7 +75,7 @@ static void vector_eth_configure(int n, struct arglist *def);
 #define SAFETY_MARGIN 32
 #define DEFAULT_VECTOR_SIZE 64
 #define TX_SMALL_PACKET 128
-#define MAX_IOV_SIZE 8
+#define MAX_IOV_SIZE (MAX_SKB_FRAGS + 1)
 
 static const struct {
        const char string[ETH_GSTRING_LEN];
@@ -162,15 +162,45 @@ static int get_headroom(struct arglist *def)
        return DEFAULT_HEADROOM;
 }
 
+static int get_req_size(struct arglist *def)
+{
+       char *gro = uml_vector_fetch_arg(def, "gro");
+       long result;
+
+       if (gro != NULL) {
+               if (kstrtoul(gro, 10, &result) == 0) {
+                       if (result > 0)
+                               return 65536;
+               }
+       }
+       return get_mtu(def) + ETH_HEADER_OTHER + get_headroom(def) + 
SAFETY_MARGIN;
+}
+
+
 static int get_transport_options(struct arglist *def)
 {
        char *transport = uml_vector_fetch_arg(def, "transport");
+       char *vector = uml_vector_fetch_arg(def, "vec");
+
+       int vec_rx = VECTOR_RX;
+       int vec_tx = VECTOR_TX;
+       long parsed;
+
+       if (vector != NULL) {
+               if (kstrtoul(vector, 10, &parsed) == 0) {
+                       if (parsed == 0) {
+                               vec_rx = 0;
+                               vec_tx = 0;
+                       }
+               }
+       }
+
 
        if (strncmp(transport, TRANS_TAP, TRANS_TAP_LEN) == 0)
-               return (VECTOR_RX | VECTOR_BPF);
+               return (vec_rx | VECTOR_BPF);
        if (strncmp(transport, TRANS_RAW, TRANS_RAW_LEN) == 0)
-               return (VECTOR_TX | VECTOR_RX | VECTOR_BPF);
-       return (VECTOR_TX | VECTOR_RX);
+               return (vec_rx | vec_tx | VECTOR_BPF);
+       return (vec_rx | vec_tx);
 }
 
 
@@ -547,13 +577,59 @@ static struct vector_queue *create_queue(
  * just read into a prepared queue filled with skbuffs.
  */
 
+static struct sk_buff *prep_skb(struct vector_private *vp, struct user_msghdr 
*msg)
+{
+       int linear = vp->max_packet + vp->headroom + SAFETY_MARGIN;
+       struct sk_buff *result;
+       int iov_index = 0, len;
+       struct iovec *iov = msg->msg_iov;
+       int err, nr_frags, frag;
+       skb_frag_t *skb_frag;
+
+       if (vp->req_size <= linear)
+               len = linear;
+       else
+               len = vp->req_size;
+       result = alloc_skb_with_frags(linear, len - vp->max_packet, 3, &err, 
GFP_ATOMIC);
+       if (vp->header_size > 0)
+               iov_index++;
+       if (result == NULL) {
+               iov[iov_index].iov_base = NULL;
+               iov[iov_index].iov_len = 0;
+               goto done;
+       }
+       skb_reserve(result, vp->headroom);
+       result->dev = vp->dev;
+       skb_put(result, vp->max_packet);
+       result->data_len = len - vp->max_packet;
+       result->len += len - vp->max_packet;
+       skb_reset_mac_header(result);
+       result->ip_summed = CHECKSUM_NONE;
+       iov[iov_index].iov_base = result->data;
+       iov[iov_index].iov_len = vp->max_packet;
+       iov_index++;
+
+       nr_frags = skb_shinfo(result)->nr_frags;
+       for (frag = 0; frag < nr_frags; frag++) {
+               skb_frag = &skb_shinfo(result)->frags[frag];
+               iov[iov_index].iov_base = skb_frag_address_safe(skb_frag);
+               if (iov[iov_index].iov_base != NULL)
+                       iov[iov_index].iov_len = skb_frag_size(skb_frag);
+               else
+                       iov[iov_index].iov_len = 0;
+               iov_index++;
+       }
+done:
+       msg->msg_iovlen = iov_index;
+       return result;
+}
+
+
 /* Prepare queue for recvmmsg one-shot rx - fill with fresh sk_buffs*/
 
 static void prep_queue_for_rx(struct vector_queue *qi)
 {
        struct vector_private *vp = netdev_priv(qi->dev);
-       struct sk_buff *skb;
-       struct iovec *iov;
        struct mmsghdr *mmsg_vector = qi->mmsg_vector;
        void **skbuff_vector = qi->skbuff_vector;
        int i;
@@ -566,26 +642,7 @@ static void prep_queue_for_rx(struct vector_queue *qi)
                 * This allows us stop faffing around with a "drop buffer"
                 */
 
-               skb = netdev_alloc_skb(
-                       vp->dev,
-                       vp->max_packet + vp->headroom + SAFETY_MARGIN);
-               iov = mmsg_vector->msg_hdr.msg_iov;
-               mmsg_vector->msg_len = 0;
-               if (vp->header_size > 0)
-                       iov++;
-               if (skb != NULL) {
-                       skb_reserve(skb, vp->headroom);
-                       skb->dev = qi->dev;
-                       skb_put(skb, vp->max_packet);
-                       skb_reset_mac_header(skb);
-                       skb->ip_summed = CHECKSUM_NONE;
-                       iov->iov_base = skb->data;
-                       iov->iov_len = vp->max_packet;
-               } else {
-                       iov->iov_base = NULL;
-                       iov->iov_len = 0;
-               }
-               *skbuff_vector = skb;
+               *skbuff_vector = prep_skb(vp, &mmsg_vector->msg_hdr);
                skbuff_vector++;
                mmsg_vector++;
        }
@@ -738,7 +795,7 @@ static int vector_legacy_rx(struct vector_private *vp)
 {
        int pkt_len;
        struct user_msghdr hdr;
-       struct iovec iov[2]; /* header + data use case only */
+       struct iovec iov[2 + MAX_IOV_SIZE]; /* header + data use case only */
        int iovpos = 0;
        struct sk_buff *skb;
        int header_check;
@@ -746,34 +803,25 @@ static int vector_legacy_rx(struct vector_private *vp)
        hdr.msg_name = NULL;
        hdr.msg_namelen = 0;
        hdr.msg_iov = (struct iovec *) &iov;
-       hdr.msg_iovlen = 1;
        hdr.msg_control = NULL;
        hdr.msg_controllen = 0;
        hdr.msg_flags = 0;
 
        if (vp->header_size > 0) {
-               iov[iovpos].iov_base = vp->header_rxbuffer;
-               iov[iovpos].iov_len = vp->rx_header_size;
-               hdr.msg_iovlen++;
-               iovpos++;
+               iov[0].iov_base = vp->header_rxbuffer;
+               iov[0].iov_len = vp->header_size;
        }
 
-       skb = netdev_alloc_skb(vp->dev,
-                       vp->max_packet + vp->headroom + SAFETY_MARGIN);
+       skb = prep_skb(vp, &hdr);
+
        if (skb == NULL) {
                /* Read a packet into drop_buffer and don't do
                 * anything with it.
                 */
                iov[iovpos].iov_base = drop_buffer;
                iov[iovpos].iov_len = DROP_BUFFER_SIZE;
+               hdr.msg_iovlen = 1;
                vp->dev->stats.rx_dropped++;
-       } else {
-               skb_reserve(skb, vp->headroom);
-               skb->dev = vp->dev;
-               skb_put(skb, vp->max_packet);
-               skb_reset_mac_header(skb);
-               iov[iovpos].iov_base = skb->data;
-               iov[iovpos].iov_len = vp->max_packet;
        }
 
        pkt_len = uml_vector_recvmsg(vp->fds->rx_fd, &hdr, 0);
@@ -794,7 +842,7 @@ static int vector_legacy_rx(struct vector_private *vp)
                                        skb->ip_summed = CHECKSUM_UNNECESSARY;
                                }
                        }
-                       skb_trim(skb, pkt_len - vp->rx_header_size);
+                       pskb_trim(skb, pkt_len - vp->rx_header_size);
                        skb->protocol = eth_type_trans(skb, skb->dev);
                        vp->dev->stats.rx_bytes += skb->len;
                        vp->dev->stats.rx_packets++;
@@ -898,7 +946,7 @@ static int vector_mmsg_rx(struct vector_private *vp)
                                        skb->ip_summed = CHECKSUM_UNNECESSARY;
                                }
                        }
-                       skb_trim(skb,
+                       pskb_trim(skb,
                                mmsg_vector->msg_len - vp->rx_header_size);
                        skb->protocol = eth_type_trans(skb, skb->dev);
                        /*
@@ -1109,7 +1157,7 @@ static int vector_net_open(struct net_device *dev)
 
        if ((vp->options & VECTOR_RX) > 0) {
                vp->rx_queue = create_queue(
-                       vp, get_depth(vp->parsed), vp->rx_header_size, 0);
+                       vp, get_depth(vp->parsed), vp->rx_header_size, 
MAX_IOV_SIZE);
                vp->rx_queue->queue_depth = get_depth(vp->parsed);
        } else {
                vp->header_rxbuffer = kmalloc(vp->rx_header_size, GFP_KERNEL);
@@ -1200,6 +1248,30 @@ static void vector_net_tx_timeout(struct net_device *dev)
        schedule_work(&vp->reset_tx);
 }
 
+static netdev_features_t vector_fix_features(struct net_device *dev,
+       netdev_features_t features)
+{
+       features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
+       return features;
+}
+
+static int vector_set_features(struct net_device *dev,
+       netdev_features_t features)
+{
+       struct vector_private *vp = netdev_priv(dev);
+       /* Adjust buffer sizes for GSO/GRO. Unfortunately, there is
+       * no way to negotiate it on raw sockets, so we can change
+       * only our side.
+       */
+       if (features & NETIF_F_GRO)
+               /* All new frame buffers will be GRO-sized */
+               vp->req_size = 65536;
+       else
+               /* All new frame buffers will be normal sized */
+               vp->req_size = vp->max_packet + vp->headroom + SAFETY_MARGIN;
+       return 0;
+}
+
 #ifdef CONFIG_NET_POLL_CONTROLLER
 static void vector_net_poll_controller(struct net_device *dev)
 {
@@ -1303,6 +1375,8 @@ static const struct net_device_ops vector_netdev_ops = {
        .ndo_tx_timeout         = vector_net_tx_timeout,
        .ndo_set_mac_address    = eth_mac_addr,
        .ndo_validate_addr      = eth_validate_addr,
+       .ndo_fix_features       = vector_fix_features,
+       .ndo_set_features       = vector_set_features,
 #ifdef CONFIG_NET_POLL_CONTROLLER
        .ndo_poll_controller = vector_net_poll_controller,
 #endif
@@ -1394,10 +1468,11 @@ static void vector_eth_configure(
                .opened                 = false,
                .transport_data         = NULL,
                .in_write_poll          = false,
-               .coalesce               = 2
+               .coalesce               = 2,
+               .req_size               = get_req_size(def)
                });
 
-       dev->features = NETIF_F_SG;
+       dev->features = dev->hw_features = (NETIF_F_SG | NETIF_F_FRAGLIST);
        tasklet_init(&vp->tx_poll, vector_tx_poll, (unsigned long)vp);
        INIT_WORK(&vp->reset_tx, vector_reset_tx);
 
diff --git a/arch/um/drivers/vector_kern.h b/arch/um/drivers/vector_kern.h
index a9ade0851fda..699696deb396 100644
--- a/arch/um/drivers/vector_kern.h
+++ b/arch/um/drivers/vector_kern.h
@@ -90,6 +90,7 @@ struct vector_private {
        void *transport_data; /* transport specific params if needed */
 
        int max_packet;
+       int req_size; /* different from max packet - used for TSO */
        int headroom;
 
        int options;
diff --git a/arch/um/drivers/vector_transports.c 
b/arch/um/drivers/vector_transports.c
index 9f07d585f71b..57aa9cb5434c 100644
--- a/arch/um/drivers/vector_transports.c
+++ b/arch/um/drivers/vector_transports.c
@@ -187,9 +187,8 @@ static int raw_verify_header (
 {
        struct virtio_net_hdr *vheader = (struct virtio_net_hdr *) header;
 
-       if (vheader->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
-               printk(KERN_ERR "raw: GSO enabled on interface, please turn 
off");
-               return -1;      /* GSO, we cannot process this */
+       if ((vheader->gso_type != VIRTIO_NET_HDR_GSO_NONE) && (vp->req_size != 
65536)) {
+               printk(KERN_INFO "Incoming GSO frames and GRO disabled on the 
interface");
        }
        if ((vheader->flags & VIRTIO_NET_HDR_F_DATA_VALID) > 0)
                return 1;
@@ -389,8 +388,9 @@ static int build_raw_transport_data(struct vector_private 
*vp)
                vp->verify_header = &raw_verify_header;
                vp->header_size = sizeof(struct virtio_net_hdr);
                vp->rx_header_size = sizeof(struct virtio_net_hdr);
-               vp->dev->features |= NETIF_F_HW_CSUM; /* TSO does not work on 
RAW */
-               printk(KERN_INFO "raw: using vnet headers to offload checksum");
+               vp->dev->hw_features |= (NETIF_F_GRO); /* TSO does not work on 
RAW */
+               vp->dev->features |= (NETIF_F_RXCSUM | NETIF_F_HW_CSUM | 
NETIF_F_GRO);
+               printk(KERN_INFO "raw: using vnet headers for tso and tx/rx 
checksum");
        }
        return 0;
 }
@@ -402,7 +402,10 @@ static int build_tap_transport_data(struct vector_private 
*vp)
                vp->verify_header = &raw_verify_header;
                vp->header_size = sizeof(struct virtio_net_hdr);
                vp->rx_header_size = sizeof(struct virtio_net_hdr);
-               vp->dev->features |= (NETIF_F_HW_CSUM | NETIF_F_TSO);
+               vp->dev->hw_features |= (NETIF_F_TSO | NETIF_F_GSO | 
NETIF_F_GRO);
+               vp->dev->features |=
+                       (NETIF_F_RXCSUM | NETIF_F_HW_CSUM |
+                               NETIF_F_TSO | NETIF_F_GSO | NETIF_F_GRO_BIT);
                printk(KERN_INFO "tap/raw: using vnet headers for tso and tx/rx 
checksum");
        } else {
                return 0; /* do not try to enable tap too if raw failed */
diff --git a/arch/um/drivers/vector_user.c b/arch/um/drivers/vector_user.c
index 9210bf2db569..259c3c639eab 100644
--- a/arch/um/drivers/vector_user.c
+++ b/arch/um/drivers/vector_user.c
@@ -115,7 +115,7 @@ static struct vector_fds *user_init_tap_fds(struct arglist 
*ifspec)
        struct ifreq ifr;
        int fd = -1;
        struct sockaddr_ll sock;
-       int err = -ENOMEM;
+       int err = -ENOMEM, offload;
        char *iface;
        struct vector_fds *result = NULL;
 
@@ -153,6 +153,9 @@ static struct vector_fds *user_init_tap_fds(struct arglist 
*ifspec)
                goto tap_cleanup;
        }
 
+       offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6;
+       ioctl(fd, TUNSETOFFLOAD, offload);
+
        /* RAW */
 
        fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
-- 
2.11.0


------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
User-mode-linux-devel mailing list
User-mode-linux-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/user-mode-linux-devel

Reply via email to