From 89aa9ca5a7866a542807af2c66fbbaa96c1e0672 Mon Sep 17 00:00:00 2001
From: Vladimir Sokolovsky <[EMAIL PROTECTED]>
Date: Tue, 24 Jun 2008 19:20:41 +0300
Subject: [PATCH] IPOIB:  add LRO support.

add "ipoib_use_lro" module parameter to enable LRO.
add "ipoib_lro_max_aggr" module parameter to set
the Max number of packets to be aggregated.
LRO statistics accessible through ethtool.

Signed-off-by: Vladimir Sokolovsky <[EMAIL PROTECTED]>
Signed-off-by: Eli Cohen <[EMAIL PROTECTED]>
---

Changes from v1:
added use_lro parameter per device through sysfs.

 drivers/infiniband/ulp/ipoib/Kconfig         |    1 +
 drivers/infiniband/ulp/ipoib/ipoib.h         |   15 ++++
 drivers/infiniband/ulp/ipoib/ipoib_ethtool.c |   50 ++++++++++++++
 drivers/infiniband/ulp/ipoib/ipoib_ib.c      |    8 ++-
 drivers/infiniband/ulp/ipoib/ipoib_main.c    |   94 ++++++++++++++++++++++++++
 5 files changed, 167 insertions(+), 1 deletions(-)

diff --git a/drivers/infiniband/ulp/ipoib/Kconfig 
b/drivers/infiniband/ulp/ipoib/Kconfig
index 1f76bad..691525c 100644
--- a/drivers/infiniband/ulp/ipoib/Kconfig
+++ b/drivers/infiniband/ulp/ipoib/Kconfig
@@ -1,6 +1,7 @@
 config INFINIBAND_IPOIB
        tristate "IP-over-InfiniBand"
        depends on NETDEVICES && INET && (IPV6 || IPV6=n)
+       select INET_LRO
        ---help---
          Support for the IP-over-InfiniBand protocol (IPoIB). This
          transports IP packets over InfiniBand so you can use your IB
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h 
b/drivers/infiniband/ulp/ipoib/ipoib.h
index 8754b36..3aae34d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -50,6 +50,7 @@
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_pack.h>
 #include <rdma/ib_sa.h>
+#include <linux/inet_lro.h>

 /* constants */

@@ -94,6 +95,9 @@ enum {
        IPOIB_MCAST_FLAG_BUSY     = 2,  /* joining or already joined */
        IPOIB_MCAST_FLAG_ATTACHED = 3,

+       IPOIB_MAX_LRO_DESCRIPTORS = 8,
+       IPOIB_LRO_MAX_AGGR        = 64,
+
        MAX_SEND_CQE              = 16,
        IPOIB_CM_COPYBREAK        = 256,
 };
@@ -248,6 +252,12 @@ struct ipoib_ethtool_st {
        u16     max_coalesced_frames;
 };

+struct ipoib_lro {
+       int     use_lro;
+       struct  net_lro_mgr lro_mgr;
+       struct  net_lro_desc lro_desc[IPOIB_MAX_LRO_DESCRIPTORS];
+};
+
 /*
  * Device private locking: tx_lock protects members used in TX fast
  * path (and we use LLTX so upper layers don't do extra locking).
@@ -334,6 +344,8 @@ struct ipoib_dev_priv {
        int     hca_caps;
        struct ipoib_ethtool_st ethtool;
        struct timer_list poll_timer;
+
+       struct ipoib_lro lro;
 };

 struct ipoib_ah {
@@ -417,6 +429,7 @@ static inline void ipoib_put_ah(struct ipoib_ah *ah)
 int ipoib_open(struct net_device *dev);
 int ipoib_add_pkey_attr(struct net_device *dev);
 int ipoib_add_umcast_attr(struct net_device *dev);
+int ipoib_add_use_lro_attr(struct net_device *dev);

 void ipoib_send(struct net_device *dev, struct sk_buff *skb,
                struct ipoib_ah *address, u32 qpn);
@@ -679,6 +692,8 @@ extern struct ib_sa_client ipoib_sa_client;

 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
 extern int ipoib_debug_level;
+extern int ipoib_use_lro;
+extern int ipoib_lro_max_aggr;

 #define ipoib_dbg(priv, format, arg...)                        \
        do {                                            \
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c 
b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
index 10279b7..79709f0 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
@@ -86,11 +86,61 @@ static int ipoib_set_coalesce(struct net_device *dev,
        return 0;
 }

+#define IPOIB_STATS_LEN  ARRAY_SIZE(ipoib_gstrings_stats)
+
+static const char ipoib_gstrings_stats[][ETH_GSTRING_LEN] = {
+       "LRO aggregated", "LRO flushed",
+       "LRO avg aggr", "LRO no_desc"
+};
+
+static void
+ipoib_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
+{
+       switch (stringset) {
+       case ETH_SS_STATS:
+               memcpy(data, *ipoib_gstrings_stats,
+                       sizeof(ipoib_gstrings_stats));
+                       data += sizeof(ipoib_gstrings_stats);
+               break;
+       }
+}
+
+static int ipoib_get_sset_count(struct net_device *dev, int sset)
+{
+       switch (sset) {
+       case ETH_SS_STATS:
+               return IPOIB_STATS_LEN;
+       default:
+               return -EOPNOTSUPP;
+       }
+}
+
+static void ipoib_get_ethtool_stats(struct net_device *dev,
+                               struct ethtool_stats *stats, uint64_t *data)
+{
+       struct ipoib_dev_priv *priv = netdev_priv(dev);
+       int index = 0;
+
+       /* Get LRO statistics */
+       data[index++] = priv->lro.lro_mgr.stats.aggregated;
+       data[index++] = priv->lro.lro_mgr.stats.flushed;
+       if (priv->lro.lro_mgr.stats.flushed)
+               data[index++] = priv->lro.lro_mgr.stats.aggregated /
+                               priv->lro.lro_mgr.stats.flushed;
+       else
+               data[index++] = 0;
+       data[index++] = priv->lro.lro_mgr.stats.no_desc;
+
+}
+
 static const struct ethtool_ops ipoib_ethtool_ops = {
        .get_drvinfo            = ipoib_get_drvinfo,
        .get_tso                = ethtool_op_get_tso,
        .get_coalesce           = ipoib_get_coalesce,
        .set_coalesce           = ipoib_set_coalesce,
+       .get_strings            = ipoib_get_strings,
+       .get_sset_count         = ipoib_get_sset_count,
+       .get_ethtool_stats      = ipoib_get_ethtool_stats,
 };

 void ipoib_set_ethtool_ops(struct net_device *dev)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c 
b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index eca8518..009b862 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -288,7 +288,10 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, 
struct ib_wc *wc)
        if (test_bit(IPOIB_FLAG_CSUM, &priv->flags) && likely(wc->csum_ok))
                skb->ip_summed = CHECKSUM_UNNECESSARY;

-       netif_receive_skb(skb);
+       if (priv->lro.use_lro)
+               lro_receive_skb(&priv->lro.lro_mgr, skb, 0);
+       else
+               netif_receive_skb(skb);

 repost:
        if (unlikely(ipoib_ib_post_receive(dev, wr_id)))
@@ -448,6 +451,9 @@ poll_more:
                        goto poll_more;
        }

+       if (priv->lro.use_lro)
+               lro_flush_all(&priv->lro.lro_mgr);
+
        return done;
 }

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c 
b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index bfe1dbf..f8cce51 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -60,6 +60,17 @@ MODULE_PARM_DESC(send_queue_size, "Number of descriptors in send 
queue");
 module_param_named(recv_queue_size, ipoib_recvq_size, int, 0444);
 MODULE_PARM_DESC(recv_queue_size, "Number of descriptors in receive queue");

+int ipoib_use_lro __read_mostly;
+module_param_named(ipoib_use_lro, ipoib_use_lro, int, 0644);
+MODULE_PARM_DESC(ipoib_use_lro,  " Large Receive Offload, 1: enable, "
+               "0: disable, Default = 0");
+
+int ipoib_lro_max_aggr __read_mostly = IPOIB_LRO_MAX_AGGR;
+module_param_named(ipoib_lro_max_aggr, ipoib_lro_max_aggr, int, 0644);
+MODULE_PARM_DESC(ipoib_lro_max_aggr, " LRO: Max packets to be aggregated. "
+               "Default = 64");
+
+
 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
 int ipoib_debug_level;

@@ -936,6 +947,85 @@ static const struct header_ops ipoib_header_ops = {
        .create = ipoib_hard_header,
 };

+static int get_skb_hdr(struct sk_buff *skb, void **iphdr,
+                      void **tcph, u64 *hdr_flags, void *priv)
+{
+       unsigned int ip_len;
+       struct iphdr *iph;
+
+       if (unlikely(skb->protocol != htons(ETH_P_IP)))
+               return -1;
+
+       if (unlikely(skb->ip_summed != CHECKSUM_UNNECESSARY))
+               return -1;
+
+       /* non tcp packet */
+       skb_reset_network_header(skb);
+       iph = ip_hdr(skb);
+       if (iph->protocol != IPPROTO_TCP)
+               return -1;
+
+       ip_len = ip_hdrlen(skb);
+       skb_set_transport_header(skb, ip_len);
+       *tcph = tcp_hdr(skb);
+
+       /* check if ip header and tcp header are complete */
+       if (iph->tot_len < ip_len + tcp_hdrlen(skb))
+               return -1;
+
+       *hdr_flags = LRO_IPV4 | LRO_TCP;
+       *iphdr = iph;
+
+       return 0;
+}
+
+static ssize_t show_use_lro(struct device *d, struct device_attribute *attr,
+                        char *buf)
+{
+       struct ipoib_dev_priv *priv = netdev_priv(to_net_dev(d));
+
+       if (priv->lro.use_lro)
+               return sprintf(buf, "yes\n");
+       else
+               return sprintf(buf, "no\n");
+}
+
+static ssize_t set_use_lro(struct device *d, struct device_attribute *attr,
+                       const char *buf, size_t count)
+{
+       struct net_device *dev = to_net_dev(d);
+       struct ipoib_dev_priv *priv = netdev_priv(dev);
+
+       if (!strcmp(buf, "yes\n"))
+               priv->lro.use_lro = 1;
+       else if (!strcmp(buf, "no\n"))
+               priv->lro.use_lro = 0;
+       else
+               return -EINVAL;
+
+       return count;
+}
+
+static DEVICE_ATTR(use_lro, S_IWUSR | S_IRUGO, show_use_lro, set_use_lro);
+
+int ipoib_add_use_lro_attr(struct net_device *dev)
+{
+       return device_create_file(&dev->dev, &dev_attr_use_lro);
+}
+
+static void ipoib_lro_setup(struct ipoib_dev_priv *priv)
+{
+       priv->lro.use_lro = ipoib_use_lro;
+       priv->lro.lro_mgr.max_aggr = ipoib_lro_max_aggr;
+       priv->lro.lro_mgr.max_desc = IPOIB_MAX_LRO_DESCRIPTORS;
+       priv->lro.lro_mgr.lro_arr = priv->lro.lro_desc;
+       priv->lro.lro_mgr.get_skb_header = get_skb_hdr;
+       priv->lro.lro_mgr.features = LRO_F_NAPI;
+       priv->lro.lro_mgr.dev = priv->dev;
+       priv->lro.lro_mgr.ip_summed = CHECKSUM_UNNECESSARY;
+       priv->lro.lro_mgr.ip_summed_aggr = CHECKSUM_UNNECESSARY;
+}
+
 static void ipoib_setup(struct net_device *dev)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -975,6 +1065,8 @@ static void ipoib_setup(struct net_device *dev)

        priv->dev = dev;

+       ipoib_lro_setup(priv);
+
        spin_lock_init(&priv->lock);
        spin_lock_init(&priv->tx_lock);

@@ -1204,6 +1296,8 @@ static struct net_device *ipoib_add_port(const char 
*format,
                goto sysfs_failed;
        if (ipoib_add_umcast_attr(priv->dev))
                goto sysfs_failed;
+       if (ipoib_add_use_lro_attr(priv->dev))
+               goto sysfs_failed;
        if (device_create_file(&priv->dev->dev, &dev_attr_create_child))
                goto sysfs_failed;
        if (device_create_file(&priv->dev->dev, &dev_attr_delete_child))
--
1.5.5.1

_______________________________________________
general mailing list
[email protected]
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to