Author: sephe
Date: Tue Aug  2 06:36:47 2016
New Revision: 303656
URL: https://svnweb.freebsd.org/changeset/base/303656

Log:
  tcp/lro: Implement hash table for LRO entries.
  
  This significantly improves HTTP workload performance and reduces
  HTTP workload latency.
  
  Reviewed by:  rrs, gallatin, hps
  Obtained from:        rrs, gallatin
  Sponsored by: Netflix (rrs, gallatin) , Microsoft (sephe)
  Differential Revision:        https://reviews.freebsd.org/D6689

Modified:
  head/sys/netinet/tcp_lro.c
  head/sys/netinet/tcp_lro.h

Modified: head/sys/netinet/tcp_lro.c
==============================================================================
--- head/sys/netinet/tcp_lro.c  Tue Aug  2 03:05:59 2016        (r303655)
+++ head/sys/netinet/tcp_lro.c  Tue Aug  2 06:36:47 2016        (r303656)
@@ -68,19 +68,24 @@ static MALLOC_DEFINE(M_LRO, "LRO", "LRO 
 #endif
 
 static void    tcp_lro_rx_done(struct lro_ctrl *lc);
+static int     tcp_lro_rx2(struct lro_ctrl *lc, struct mbuf *m,
+                   uint32_t csum, int use_hash);
 
 static __inline void
-tcp_lro_active_insert(struct lro_ctrl *lc, struct lro_entry *le)
+tcp_lro_active_insert(struct lro_ctrl *lc, struct lro_head *bucket,
+    struct lro_entry *le)
 {
 
        LIST_INSERT_HEAD(&lc->lro_active, le, next);
+       LIST_INSERT_HEAD(bucket, le, hash_next);
 }
 
 static __inline void
 tcp_lro_active_remove(struct lro_entry *le)
 {
 
-       LIST_REMOVE(le, next);
+       LIST_REMOVE(le, next);          /* active list */
+       LIST_REMOVE(le, hash_next);     /* hash bucket */
 }
 
 int
@@ -95,7 +100,7 @@ tcp_lro_init_args(struct lro_ctrl *lc, s
 {
        struct lro_entry *le;
        size_t size;
-       unsigned i;
+       unsigned i, elements;
 
        lc->lro_bad_csum = 0;
        lc->lro_queued = 0;
@@ -110,6 +115,18 @@ tcp_lro_init_args(struct lro_ctrl *lc, s
        LIST_INIT(&lc->lro_free);
        LIST_INIT(&lc->lro_active);
 
+       /* create hash table to accelerate entry lookup */
+       if (lro_entries > lro_mbufs)
+               elements = lro_entries;
+       else
+               elements = lro_mbufs;
+       lc->lro_hash = phashinit_flags(elements, M_LRO, &lc->lro_hashsz,
+           HASH_NOWAIT);
+       if (lc->lro_hash == NULL) {
+               memset(lc, 0, sizeof(*lc));
+               return (ENOMEM);
+       }
+
        /* compute size to allocate */
        size = (lro_mbufs * sizeof(struct lro_mbuf_sort)) +
            (lro_entries * sizeof(*le));
@@ -147,6 +164,13 @@ tcp_lro_free(struct lro_ctrl *lc)
                m_freem(le->m_head);
        }
 
+       /* free hash table */
+       if (lc->lro_hash != NULL) {
+               free(lc->lro_hash, M_LRO);
+               lc->lro_hash = NULL;
+       }
+       lc->lro_hashsz = 0;
+
        /* free mbuf array, if any */
        for (x = 0; x != lc->lro_mbuf_count; x++)
                m_freem(lc->lro_mbuf_data[x].mb);
@@ -487,7 +511,7 @@ tcp_lro_flush_all(struct lro_ctrl *lc)
                }
 
                /* add packet to LRO engine */
-               if (tcp_lro_rx(lc, mb, 0) != 0) {
+               if (tcp_lro_rx2(lc, mb, 0, 0) != 0) {
                        /* input packet to network layer */
                        (*lc->ifp->if_input)(lc->ifp, mb);
                        lc->lro_queued++;
@@ -561,8 +585,8 @@ tcp_lro_rx_ipv4(struct lro_ctrl *lc, str
 }
 #endif
 
-int
-tcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum)
+static int
+tcp_lro_rx2(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum, int use_hash)
 {
        struct lro_entry *le;
        struct ether_header *eh;
@@ -578,6 +602,7 @@ tcp_lro_rx(struct lro_ctrl *lc, struct m
        tcp_seq seq;
        int error, ip_len, l;
        uint16_t eh_type, tcp_data_len;
+       struct lro_head *bucket;
 
        /* We expect a contiguous header [eh, ip, tcp]. */
 
@@ -670,8 +695,41 @@ tcp_lro_rx(struct lro_ctrl *lc, struct m
 
        seq = ntohl(th->th_seq);
 
+       if (!use_hash) {
+               bucket = &lc->lro_hash[0];
+       } else if (M_HASHTYPE_ISHASH(m)) {
+               bucket = &lc->lro_hash[m->m_pkthdr.flowid % lc->lro_hashsz];
+       } else {
+               uint32_t hash;
+
+               switch (eh_type) {
+#ifdef INET
+               case ETHERTYPE_IP:
+                       hash = ip4->ip_src.s_addr + ip4->ip_dst.s_addr;
+                       break;
+#endif
+#ifdef INET6
+               case ETHERTYPE_IPV6:
+                       hash = ip6->ip6_src.s6_addr32[0] +
+                           ip6->ip6_dst.s6_addr32[0];
+                       hash += ip6->ip6_src.s6_addr32[1] +
+                           ip6->ip6_dst.s6_addr32[1];
+                       hash += ip6->ip6_src.s6_addr32[2] +
+                           ip6->ip6_dst.s6_addr32[2];
+                       hash += ip6->ip6_src.s6_addr32[3] +
+                           ip6->ip6_dst.s6_addr32[3];
+                       break;
+#endif
+               default:
+                       hash = 0;
+                       break;
+               }
+               hash += th->th_sport + th->th_dport;
+               bucket = &lc->lro_hash[hash % lc->lro_hashsz];
+       }
+
        /* Try to find a matching previous segment. */
-       LIST_FOREACH(le, &lc->lro_active, next) {
+       LIST_FOREACH(le, bucket, hash_next) {
                if (le->eh_type != eh_type)
                        continue;
                if (le->source_port != th->th_sport ||
@@ -779,7 +837,7 @@ tcp_lro_rx(struct lro_ctrl *lc, struct m
        /* Start a new segment chain. */
        le = LIST_FIRST(&lc->lro_free);
        LIST_REMOVE(le, next);
-       tcp_lro_active_insert(lc, le);
+       tcp_lro_active_insert(lc, bucket, le);
        getmicrotime(&le->mtime);
 
        /* Start filling in details. */
@@ -837,6 +895,13 @@ tcp_lro_rx(struct lro_ctrl *lc, struct m
        return (0);
 }
 
+int
+tcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum)
+{
+
+       return tcp_lro_rx2(lc, m, csum, 1);
+}
+
 void
 tcp_lro_queue_mbuf(struct lro_ctrl *lc, struct mbuf *mb)
 {

Modified: head/sys/netinet/tcp_lro.h
==============================================================================
--- head/sys/netinet/tcp_lro.h  Tue Aug  2 03:05:59 2016        (r303655)
+++ head/sys/netinet/tcp_lro.h  Tue Aug  2 06:36:47 2016        (r303656)
@@ -40,6 +40,7 @@
 
 struct lro_entry {
        LIST_ENTRY(lro_entry)   next;
+       LIST_ENTRY(lro_entry)   hash_next;
        struct mbuf             *m_head;
        struct mbuf             *m_tail;
        union {
@@ -95,6 +96,8 @@ struct lro_ctrl {
        unsigned short  lro_ackcnt_lim;         /* max # of aggregated ACKs */
        unsigned        lro_length_lim;         /* max len of aggregated data */
 
+       u_long          lro_hashsz;
+       struct lro_head *lro_hash;
        struct lro_head lro_active;
        struct lro_head lro_free;
 };
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to