When a SYNFLOOD targets a non SO_REUSEPORT listener, multiple
cpus contend on sk->sk_refcnt and sk->sk_wmem_alloc changes.

By letting listeners use SOCK_RCU_FREE infrastructure,
we can relax TCP_LISTEN lookup rules and avoid touching sk_refcnt

Note that we still use SLAB_DESTROY_BY_RCU rules for other sockets,
only listeners are impacted by this change.

Peak performance under SYNFLOOD is increased by ~33% :

On my test machine, I could process 3.2 Mpps instead of 2.4 Mpps

Most consuming functions are now skb_set_owner_w() and sock_wfree()
contending on sk->sk_wmem_alloc when cooking SYNACK and freeing them.

Signed-off-by: Eric Dumazet <eduma...@google.com>
---
 include/net/inet6_hashtables.h | 12 ++++---
 include/net/inet_hashtables.h  | 40 ++++++++++++++---------
 net/dccp/ipv4.c                |  7 ++--
 net/dccp/ipv6.c                |  7 ++--
 net/ipv4/inet_diag.c           |  3 +-
 net/ipv4/inet_hashtables.c     | 73 +++++++++++++++---------------------------
 net/ipv4/tcp_ipv4.c            | 66 +++++++++++++++++++-------------------
 net/ipv6/inet6_hashtables.c    | 56 +++++++++-----------------------
 net/ipv6/tcp_ipv6.c            | 27 +++++++++-------
 net/netfilter/xt_socket.c      |  6 ++--
 10 files changed, 134 insertions(+), 163 deletions(-)

diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
index 28332bdac333..b87becacd9d3 100644
--- a/include/net/inet6_hashtables.h
+++ b/include/net/inet6_hashtables.h
@@ -66,13 +66,15 @@ static inline struct sock *__inet6_lookup(struct net *net,
                                          const __be16 sport,
                                          const struct in6_addr *daddr,
                                          const u16 hnum,
-                                         const int dif)
+                                         const int dif,
+                                         bool *refcounted)
 {
        struct sock *sk = __inet6_lookup_established(net, hashinfo, saddr,
                                                sport, daddr, hnum, dif);
+       *refcounted = true;
        if (sk)
                return sk;
-
+       *refcounted = false;
        return inet6_lookup_listener(net, hashinfo, skb, doff, saddr, sport,
                                     daddr, hnum, dif);
 }
@@ -81,17 +83,19 @@ static inline struct sock *__inet6_lookup_skb(struct 
inet_hashinfo *hashinfo,
                                              struct sk_buff *skb, int doff,
                                              const __be16 sport,
                                              const __be16 dport,
-                                             int iif)
+                                             int iif,
+                                             bool *refcounted)
 {
        struct sock *sk = skb_steal_sock(skb);
 
+       *refcounted = true;
        if (sk)
                return sk;
 
        return __inet6_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb,
                              doff, &ipv6_hdr(skb)->saddr, sport,
                              &ipv6_hdr(skb)->daddr, ntohs(dport),
-                             iif);
+                             iif, refcounted);
 }
 
 struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index a77acee93aaf..0574493e3899 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -100,14 +100,10 @@ struct inet_bind_hashbucket {
 
 /*
  * Sockets can be hashed in established or listening table
- * We must use different 'nulls' end-of-chain value for listening
- * hash table, or we might find a socket that was closed and
- * reallocated/inserted into established hash table
  */
-#define LISTENING_NULLS_BASE (1U << 29)
 struct inet_listen_hashbucket {
        spinlock_t              lock;
-       struct hlist_nulls_head head;
+       struct hlist_head       head;
 };
 
 /* This is for listening sockets, thus all sockets which possess wildcards. */
@@ -304,14 +300,20 @@ static inline struct sock *__inet_lookup(struct net *net,
                                         struct sk_buff *skb, int doff,
                                         const __be32 saddr, const __be16 sport,
                                         const __be32 daddr, const __be16 dport,
-                                        const int dif)
+                                        const int dif,
+                                        bool *refcounted)
 {
        u16 hnum = ntohs(dport);
-       struct sock *sk = __inet_lookup_established(net, hashinfo,
-                               saddr, sport, daddr, hnum, dif);
+       struct sock *sk;
 
-       return sk ? : __inet_lookup_listener(net, hashinfo, skb, doff, saddr,
-                                            sport, daddr, hnum, dif);
+       sk = __inet_lookup_established(net, hashinfo, saddr, sport,
+                                      daddr, hnum, dif);
+       *refcounted = true;
+       if (sk)
+               return sk;
+       *refcounted = false;
+       return __inet_lookup_listener(net, hashinfo, skb, doff, saddr,
+                                     sport, daddr, hnum, dif);
 }
 
 static inline struct sock *inet_lookup(struct net *net,
@@ -322,10 +324,13 @@ static inline struct sock *inet_lookup(struct net *net,
                                       const int dif)
 {
        struct sock *sk;
+       bool refcounted;
 
        sk = __inet_lookup(net, hashinfo, skb, doff, saddr, sport, daddr,
-                          dport, dif);
+                          dport, dif, &refcounted);
 
+       if (sk && !refcounted && !atomic_inc_not_zero(&sk->sk_refcnt))
+               sk = NULL;
        return sk;
 }
 
@@ -333,17 +338,20 @@ static inline struct sock *__inet_lookup_skb(struct 
inet_hashinfo *hashinfo,
                                             struct sk_buff *skb,
                                             int doff,
                                             const __be16 sport,
-                                            const __be16 dport)
+                                            const __be16 dport,
+                                            bool *refcounted)
 {
        struct sock *sk = skb_steal_sock(skb);
        const struct iphdr *iph = ip_hdr(skb);
 
+       *refcounted = true;
        if (sk)
                return sk;
-       else
-               return __inet_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb,
-                                    doff, iph->saddr, sport,
-                                    iph->daddr, dport, inet_iif(skb));
+
+       return __inet_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb,
+                            doff, iph->saddr, sport,
+                            iph->daddr, dport, inet_iif(skb),
+                            refcounted);
 }
 
 u32 sk_ehashfn(const struct sock *sk);
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 9c67a961ba53..6438c5a7efc4 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -764,6 +764,7 @@ static int dccp_v4_rcv(struct sk_buff *skb)
 {
        const struct dccp_hdr *dh;
        const struct iphdr *iph;
+       bool refcounted;
        struct sock *sk;
        int min_cov;
 
@@ -801,7 +802,7 @@ static int dccp_v4_rcv(struct sk_buff *skb)
 
 lookup:
        sk = __inet_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh),
-                              dh->dccph_sport, dh->dccph_dport);
+                              dh->dccph_sport, dh->dccph_dport, &refcounted);
        if (!sk) {
                dccp_pr_debug("failed to look up flow ID in table and "
                              "get corresponding socket\n");
@@ -830,6 +831,7 @@ lookup:
                        goto lookup;
                }
                sock_hold(sk);
+               refcounted = true;
                nsk = dccp_check_req(sk, skb, req);
                if (!nsk) {
                        reqsk_put(req);
@@ -886,7 +888,8 @@ discard_it:
        return 0;
 
 discard_and_relse:
-       sock_put(sk);
+       if (refcounted)
+               sock_put(sk);
        goto discard_it;
 }
 
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 4663a01d5039..71bf1deba4c5 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -642,6 +642,7 @@ discard:
 static int dccp_v6_rcv(struct sk_buff *skb)
 {
        const struct dccp_hdr *dh;
+       bool refcounted;
        struct sock *sk;
        int min_cov;
 
@@ -670,7 +671,7 @@ static int dccp_v6_rcv(struct sk_buff *skb)
 lookup:
        sk = __inet6_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh),
                                dh->dccph_sport, dh->dccph_dport,
-                               inet6_iif(skb));
+                               inet6_iif(skb), &refcounted);
        if (!sk) {
                dccp_pr_debug("failed to look up flow ID in table and "
                              "get corresponding socket\n");
@@ -699,6 +700,7 @@ lookup:
                        goto lookup;
                }
                sock_hold(sk);
+               refcounted = true;
                nsk = dccp_check_req(sk, skb, req);
                if (!nsk) {
                        reqsk_put(req);
@@ -752,7 +754,8 @@ discard_it:
        return 0;
 
 discard_and_relse:
-       sock_put(sk);
+       if (refcounted)
+               sock_put(sk);
        goto discard_it;
 }
 
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index ea8df527b279..bd591eb81ec9 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -775,13 +775,12 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, 
struct sk_buff *skb,
 
                for (i = s_i; i < INET_LHTABLE_SIZE; i++) {
                        struct inet_listen_hashbucket *ilb;
-                       struct hlist_nulls_node *node;
                        struct sock *sk;
 
                        num = 0;
                        ilb = &hashinfo->listening_hash[i];
                        spin_lock_bh(&ilb->lock);
-                       sk_nulls_for_each(sk, node, &ilb->head) {
+                       sk_for_each(sk, &ilb->head) {
                                struct inet_sock *inet = inet_sk(sk);
 
                                if (!net_eq(sock_net(sk), net))
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 387338d71dcd..98ba03b6f87d 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -198,13 +198,13 @@ static inline int compute_score(struct sock *sk, struct 
net *net,
 }
 
 /*
- * Don't inline this cruft. Here are some nice properties to exploit here. The
- * BSD API does not allow a listening sock to specify the remote port nor the
+ * Here are some nice properties to exploit here. The BSD API
+ * does not allow a listening sock to specify the remote port nor the
  * remote address for the connection. So always assume those are both
  * wildcarded during the search since they can never be otherwise.
  */
 
-
+/* called with rcu_read_lock() : No refcount taken on the socket */
 struct sock *__inet_lookup_listener(struct net *net,
                                    struct inet_hashinfo *hashinfo,
                                    struct sk_buff *skb, int doff,
@@ -212,37 +212,27 @@ struct sock *__inet_lookup_listener(struct net *net,
                                    const __be32 daddr, const unsigned short 
hnum,
                                    const int dif)
 {
-       struct sock *sk, *result;
-       struct hlist_nulls_node *node;
        unsigned int hash = inet_lhashfn(net, hnum);
        struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
-       int score, hiscore, matches = 0, reuseport = 0;
-       bool select_ok = true;
+       int score, hiscore = 0, matches = 0, reuseport = 0;
+       struct sock *sk, *result = NULL;
        u32 phash = 0;
 
-begin:
-       result = NULL;
-       hiscore = 0;
-       sk_nulls_for_each_rcu(sk, node, &ilb->head) {
+       sk_for_each_rcu(sk, &ilb->head) {
                score = compute_score(sk, net, hnum, daddr, dif);
                if (score > hiscore) {
-                       result = sk;
-                       hiscore = score;
                        reuseport = sk->sk_reuseport;
                        if (reuseport) {
                                phash = inet_ehashfn(net, daddr, hnum,
                                                     saddr, sport);
-                               if (select_ok) {
-                                       struct sock *sk2;
-                                       sk2 = reuseport_select_sock(sk, phash,
-                                                                   skb, doff);
-                                       if (sk2) {
-                                               result = sk2;
-                                               goto found;
-                                       }
-                               }
+                               result = reuseport_select_sock(sk, phash,
+                                                              skb, doff);
+                               if (result)
+                                       return result;
                                matches = 1;
                        }
+                       result = sk;
+                       hiscore = score;
                } else if (score == hiscore && reuseport) {
                        matches++;
                        if (reciprocal_scale(phash, matches) == 0)
@@ -250,24 +240,6 @@ begin:
                        phash = next_pseudo_random32(phash);
                }
        }
-       /*
-        * if the nulls value we got at the end of this lookup is
-        * not the expected one, we must restart lookup.
-        * We probably met an item that was moved to another chain.
-        */
-       if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE)
-               goto begin;
-       if (result) {
-found:
-               if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
-                       result = NULL;
-               else if (unlikely(compute_score(result, net, hnum, daddr,
-                                 dif) < hiscore)) {
-                       sock_put(result);
-                       select_ok = false;
-                       goto begin;
-               }
-       }
        return result;
 }
 EXPORT_SYMBOL_GPL(__inet_lookup_listener);
@@ -508,7 +480,8 @@ int __inet_hash(struct sock *sk, struct sock *osk,
                if (err)
                        goto unlock;
        }
-       __sk_nulls_add_node_rcu(sk, &ilb->head);
+       hlist_add_head_rcu(&sk->sk_node, &ilb->head);
+       sock_set_flag(sk, SOCK_RCU_FREE);
        sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 unlock:
        spin_unlock(&ilb->lock);
@@ -535,20 +508,25 @@ void inet_unhash(struct sock *sk)
 {
        struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
        spinlock_t *lock;
+       bool listener = false;
        int done;
 
        if (sk_unhashed(sk))
                return;
 
-       if (sk->sk_state == TCP_LISTEN)
+       if (sk->sk_state == TCP_LISTEN) {
                lock = 
&hashinfo->listening_hash[inet_sk_listen_hashfn(sk)].lock;
-       else
+               listener = true;
+       } else {
                lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
-
+       }
        spin_lock_bh(lock);
        if (rcu_access_pointer(sk->sk_reuseport_cb))
                reuseport_detach_sock(sk);
-       done = __sk_nulls_del_node_init_rcu(sk);
+       if (listener)
+               done = __sk_del_node_init(sk);
+       else
+               done = __sk_nulls_del_node_init_rcu(sk);
        if (done)
                sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
        spin_unlock_bh(lock);
@@ -684,9 +662,8 @@ void inet_hashinfo_init(struct inet_hashinfo *h)
 
        for (i = 0; i < INET_LHTABLE_SIZE; i++) {
                spin_lock_init(&h->listening_hash[i].lock);
-               INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].head,
-                                     i + LISTENING_NULLS_BASE);
-               }
+               INIT_HLIST_HEAD(&h->listening_hash[i].head);
+       }
 }
 EXPORT_SYMBOL_GPL(inet_hashinfo_init);
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index ad450509029b..e5f924b29946 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -628,6 +628,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct 
sk_buff *skb)
 
        net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
 #ifdef CONFIG_TCP_MD5SIG
+       rcu_read_lock();
        hash_location = tcp_parse_md5sig_option(th);
        if (sk && sk_fullsock(sk)) {
                key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
@@ -646,16 +647,18 @@ static void tcp_v4_send_reset(const struct sock *sk, 
struct sk_buff *skb)
                                             ntohs(th->source), inet_iif(skb));
                /* don't send rst if it can't find key */
                if (!sk1)
-                       return;
-               rcu_read_lock();
+                       goto out;
+
                key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
                                        &ip_hdr(skb)->saddr, AF_INET);
                if (!key)
-                       goto release_sk1;
+                       goto out;
+
 
                genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb);
                if (genhash || memcmp(hash_location, newhash, 16) != 0)
-                       goto release_sk1;
+                       goto out;
+
        }
 
        if (key) {
@@ -698,11 +701,8 @@ static void tcp_v4_send_reset(const struct sock *sk, 
struct sk_buff *skb)
        TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
 
 #ifdef CONFIG_TCP_MD5SIG
-release_sk1:
-       if (sk1) {
-               rcu_read_unlock();
-               sock_put(sk1);
-       }
+out:
+       rcu_read_unlock();
 #endif
 }
 
@@ -1538,11 +1538,12 @@ EXPORT_SYMBOL(tcp_prequeue);
 
 int tcp_v4_rcv(struct sk_buff *skb)
 {
+       struct net *net = dev_net(skb->dev);
        const struct iphdr *iph;
        const struct tcphdr *th;
+       bool refcounted;
        struct sock *sk;
        int ret;
-       struct net *net = dev_net(skb->dev);
 
        if (skb->pkt_type != PACKET_HOST)
                goto discard_it;
@@ -1588,7 +1589,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
 
 lookup:
        sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
-                              th->dest);
+                              th->dest, &refcounted);
        if (!sk)
                goto no_tcp_socket;
 
@@ -1609,7 +1610,11 @@ process:
                        inet_csk_reqsk_queue_drop_and_put(sk, req);
                        goto lookup;
                }
+               /* We own a reference on the listener, increase it again
+                * as we might lose it too soon.
+                */
                sock_hold(sk);
+               refcounted = true;
                nsk = tcp_check_req(sk, skb, req, false);
                if (!nsk) {
                        reqsk_put(req);
@@ -1665,7 +1670,8 @@ process:
        bh_unlock_sock(sk);
 
 put_and_return:
-       sock_put(sk);
+       if (refcounted)
+               sock_put(sk);
 
        return ret;
 
@@ -1688,7 +1694,8 @@ discard_it:
        return 0;
 
 discard_and_relse:
-       sock_put(sk);
+       if (refcounted)
+               sock_put(sk);
        goto discard_it;
 
 do_time_wait:
@@ -1712,6 +1719,7 @@ do_time_wait:
                if (sk2) {
                        inet_twsk_deschedule_put(inet_twsk(sk));
                        sk = sk2;
+                       refcounted = false;
                        goto process;
                }
                /* Fall through to ACK */
@@ -1845,17 +1853,17 @@ EXPORT_SYMBOL(tcp_v4_destroy_sock);
  */
 static void *listening_get_next(struct seq_file *seq, void *cur)
 {
-       struct inet_connection_sock *icsk;
-       struct hlist_nulls_node *node;
-       struct sock *sk = cur;
-       struct inet_listen_hashbucket *ilb;
        struct tcp_iter_state *st = seq->private;
        struct net *net = seq_file_net(seq);
+       struct inet_listen_hashbucket *ilb;
+       struct inet_connection_sock *icsk;
+       struct sock *sk = cur;
 
        if (!sk) {
+get_head:
                ilb = &tcp_hashinfo.listening_hash[st->bucket];
                spin_lock_bh(&ilb->lock);
-               sk = sk_nulls_head(&ilb->head);
+               sk = sk_head(&ilb->head);
                st->offset = 0;
                goto get_sk;
        }
@@ -1863,28 +1871,20 @@ static void *listening_get_next(struct seq_file *seq, 
void *cur)
        ++st->num;
        ++st->offset;
 
-       sk = sk_nulls_next(sk);
+       sk = sk_next(sk);
 get_sk:
-       sk_nulls_for_each_from(sk, node) {
+       sk_for_each_from(sk) {
                if (!net_eq(sock_net(sk), net))
                        continue;
-               if (sk->sk_family == st->family) {
-                       cur = sk;
-                       goto out;
-               }
+               if (sk->sk_family == st->family)
+                       return sk;
                icsk = inet_csk(sk);
        }
        spin_unlock_bh(&ilb->lock);
        st->offset = 0;
-       if (++st->bucket < INET_LHTABLE_SIZE) {
-               ilb = &tcp_hashinfo.listening_hash[st->bucket];
-               spin_lock_bh(&ilb->lock);
-               sk = sk_nulls_head(&ilb->head);
-               goto get_sk;
-       }
-       cur = NULL;
-out:
-       return cur;
+       if (++st->bucket < INET_LHTABLE_SIZE)
+               goto get_head;
+       return NULL;
 }
 
 static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index e6ef6ce1ed74..607da088344d 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -120,6 +120,7 @@ static inline int compute_score(struct sock *sk, struct net 
*net,
        return score;
 }
 
+/* called with rcu_read_lock() */
 struct sock *inet6_lookup_listener(struct net *net,
                struct inet_hashinfo *hashinfo,
                struct sk_buff *skb, int doff,
@@ -127,38 +128,27 @@ struct sock *inet6_lookup_listener(struct net *net,
                const __be16 sport, const struct in6_addr *daddr,
                const unsigned short hnum, const int dif)
 {
-       struct sock *sk;
-       const struct hlist_nulls_node *node;
-       struct sock *result;
-       int score, hiscore, matches = 0, reuseport = 0;
-       bool select_ok = true;
-       u32 phash = 0;
        unsigned int hash = inet_lhashfn(net, hnum);
        struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
+       int score, hiscore = 0, matches = 0, reuseport = 0;
+       struct sock *sk, *result = NULL;
+       u32 phash = 0;
 
-begin:
-       result = NULL;
-       hiscore = 0;
-       sk_nulls_for_each(sk, node, &ilb->head) {
+       sk_for_each(sk, &ilb->head) {
                score = compute_score(sk, net, hnum, daddr, dif);
                if (score > hiscore) {
                        hiscore = score;
-                       result = sk;
-                       reuseport = sk->sk_reuseport;
                        if (reuseport) {
                                phash = inet6_ehashfn(net, daddr, hnum,
                                                      saddr, sport);
-                               if (select_ok) {
-                                       struct sock *sk2;
-                                       sk2 = reuseport_select_sock(sk, phash,
-                                                                   skb, doff);
-                                       if (sk2) {
-                                               result = sk2;
-                                               goto found;
-                                       }
-                               }
+                               result = reuseport_select_sock(sk, phash,
+                                                              skb, doff);
+                               if (result)
+                                       return result;
                                matches = 1;
                        }
+                       result = sk;
+                       reuseport = sk->sk_reuseport;
                } else if (score == hiscore && reuseport) {
                        matches++;
                        if (reciprocal_scale(phash, matches) == 0)
@@ -166,24 +156,6 @@ begin:
                        phash = next_pseudo_random32(phash);
                }
        }
-       /*
-        * if the nulls value we got at the end of this lookup is
-        * not the expected one, we must restart lookup.
-        * We probably met an item that was moved to another chain.
-        */
-       if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE)
-               goto begin;
-       if (result) {
-found:
-               if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
-                       result = NULL;
-               else if (unlikely(compute_score(result, net, hnum, daddr,
-                                 dif) < hiscore)) {
-                       sock_put(result);
-                       select_ok = false;
-                       goto begin;
-               }
-       }
        return result;
 }
 EXPORT_SYMBOL_GPL(inet6_lookup_listener);
@@ -195,10 +167,12 @@ struct sock *inet6_lookup(struct net *net, struct 
inet_hashinfo *hashinfo,
                          const int dif)
 {
        struct sock *sk;
+       bool refcounted;
 
        sk = __inet6_lookup(net, hashinfo, skb, doff, saddr, sport, daddr,
-                           ntohs(dport), dif);
-
+                           ntohs(dport), dif, &refcounted);
+       if (sk && !refcounted && !atomic_inc_not_zero(&sk->sk_refcnt))
+               sk = NULL;
        return sk;
 }
 EXPORT_SYMBOL_GPL(inet6_lookup);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 711d209f9124..f0422e782731 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -858,6 +858,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct 
sk_buff *skb)
                return;
 
 #ifdef CONFIG_TCP_MD5SIG
+       rcu_read_lock();
        hash_location = tcp_parse_md5sig_option(th);
        if (sk && sk_fullsock(sk)) {
                key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
@@ -875,16 +876,15 @@ static void tcp_v6_send_reset(const struct sock *sk, 
struct sk_buff *skb)
                                           th->source, &ipv6h->daddr,
                                           ntohs(th->source), tcp_v6_iif(skb));
                if (!sk1)
-                       return;
+                       goto out;
 
-               rcu_read_lock();
                key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
                if (!key)
-                       goto release_sk1;
+                       goto out;
 
                genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
                if (genhash || memcmp(hash_location, newhash, 16) != 0)
-                       goto release_sk1;
+                       goto out;
        }
 #endif
 
@@ -898,11 +898,8 @@ static void tcp_v6_send_reset(const struct sock *sk, 
struct sk_buff *skb)
        tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0);
 
 #ifdef CONFIG_TCP_MD5SIG
-release_sk1:
-       if (sk1) {
-               rcu_read_unlock();
-               sock_put(sk1);
-       }
+out:
+       rcu_read_unlock();
 #endif
 }
 
@@ -1351,6 +1348,7 @@ static int tcp_v6_rcv(struct sk_buff *skb)
 {
        const struct tcphdr *th;
        const struct ipv6hdr *hdr;
+       bool refcounted;
        struct sock *sk;
        int ret;
        struct net *net = dev_net(skb->dev);
@@ -1381,7 +1379,8 @@ static int tcp_v6_rcv(struct sk_buff *skb)
 
 lookup:
        sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
-                               th->source, th->dest, inet6_iif(skb));
+                               th->source, th->dest, inet6_iif(skb),
+                               &refcounted);
        if (!sk)
                goto no_tcp_socket;
 
@@ -1404,6 +1403,7 @@ process:
                        goto lookup;
                }
                sock_hold(sk);
+               refcounted = true;
                nsk = tcp_check_req(sk, skb, req, false);
                if (!nsk) {
                        reqsk_put(req);
@@ -1460,7 +1460,8 @@ process:
        bh_unlock_sock(sk);
 
 put_and_return:
-       sock_put(sk);
+       if (refcounted)
+               sock_put(sk);
        return ret ? -1 : 0;
 
 no_tcp_socket:
@@ -1483,7 +1484,8 @@ discard_it:
        return 0;
 
 discard_and_relse:
-       sock_put(sk);
+       if (refcounted)
+               sock_put(sk);
        goto discard_it;
 
 do_time_wait:
@@ -1514,6 +1516,7 @@ do_time_wait:
                        inet_twsk_deschedule_put(tw);
                        sk = sk2;
                        tcp_v6_restore_cb(skb);
+                       refcounted = false;
                        goto process;
                }
                /* Fall through to ACK */
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 49d14ecad444..b10ade272b50 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -120,9 +120,9 @@ xt_socket_get_sock_v4(struct net *net, struct sk_buff *skb, 
const int doff,
 {
        switch (protocol) {
        case IPPROTO_TCP:
-               return __inet_lookup(net, &tcp_hashinfo, skb, doff,
-                                    saddr, sport, daddr, dport,
-                                    in->ifindex);
+               return inet_lookup(net, &tcp_hashinfo, skb, doff,
+                                  saddr, sport, daddr, dport,
+                                  in->ifindex);
        case IPPROTO_UDP:
                return udp4_lib_lookup(net, saddr, sport, daddr, dport,
                                       in->ifindex);
-- 
2.8.0.rc3.226.g39d4020

Reply via email to