We must be careful to avoid leaking such sockets outside
the RCU section containing the early demux call; we clear
them on nonlocal delivery.

For ipv4 we clear sknoref even for multicast traffic entering
the ip_mr_input() path; we will lose the mcast early demux
optimization when the host is acting as multicast router, but
that will help to keep to code simple.

Also update all iptables/nftables extension that can
happen in the input chain and can transmit the skb outside
such patch, namely TEE, nft_dup and nfqueue.

Signed-off-by: Paolo Abeni <pab...@redhat.com>
---
 net/ipv4/ip_input.c              | 8 ++++++++
 net/ipv4/netfilter/nf_dup_ipv4.c | 3 +++
 net/ipv6/ip6_input.c             | 4 ++++
 net/ipv6/netfilter/nf_dup_ipv6.c | 3 +++
 net/netfilter/nf_queue.c         | 3 +++
 5 files changed, 21 insertions(+)

diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index fa2dc8f692c6..5690ef09da28 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -351,6 +351,14 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, 
struct sk_buff *skb)
                }
        }
 
+       /* Since the sk has no reference to the socket, we must
+        * clear it before escaping this RCU section.
+        * The sk is just an hint and we know we are not going to use
+        * it outside the input path.
+        */
+       if (skb_dst(skb)->input != ip_local_deliver)
+               skb_clear_noref_sk(skb);
+
 #ifdef CONFIG_IP_ROUTE_CLASSID
        if (unlikely(skb_dst(skb)->tclassid)) {
                struct ip_rt_acct *st = this_cpu_ptr(ip_rt_acct);
diff --git a/net/ipv4/netfilter/nf_dup_ipv4.c b/net/ipv4/netfilter/nf_dup_ipv4.c
index 39895b9ddeb9..bf8b78492fc8 100644
--- a/net/ipv4/netfilter/nf_dup_ipv4.c
+++ b/net/ipv4/netfilter/nf_dup_ipv4.c
@@ -71,6 +71,9 @@ void nf_dup_ipv4(struct net *net, struct sk_buff *skb, 
unsigned int hooknum,
        nf_reset(skb);
        nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
 #endif
+       /* Avoid leaking noref sk outside the input path */
+       skb_clear_noref_sk(skb);
+
        /*
         * If we are in PREROUTING/INPUT, decrease the TTL to mitigate potential
         * loops between two hosts.
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 9ee208a348f5..e15ec2d36b9e 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -68,6 +68,10 @@ int ip6_rcv_finish(struct net *net, struct sock *sk, struct 
sk_buff *skb)
        if (!skb_valid_dst(skb))
                ip6_route_input(skb);
 
+       /* see comment on ipv4 edmux */
+       if (skb_dst(skb)->input != ip6_input)
+               skb_clear_noref_sk(skb);
+
        return dst_input(skb);
 }
 
diff --git a/net/ipv6/netfilter/nf_dup_ipv6.c b/net/ipv6/netfilter/nf_dup_ipv6.c
index 4a7ddeddbaab..939f6a2238f9 100644
--- a/net/ipv6/netfilter/nf_dup_ipv6.c
+++ b/net/ipv6/netfilter/nf_dup_ipv6.c
@@ -60,6 +60,9 @@ void nf_dup_ipv6(struct net *net, struct sk_buff *skb, 
unsigned int hooknum,
        nf_reset(skb);
        nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
 #endif
+       /* Avoid leaking noref sk outside the input path */
+       skb_clear_noref_sk(skb);
+
        if (hooknum == NF_INET_PRE_ROUTING ||
            hooknum == NF_INET_LOCAL_IN) {
                struct ipv6hdr *iph = ipv6_hdr(skb);
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index f7e21953b1de..100eff08cb51 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -145,6 +145,9 @@ static int __nf_queue(struct sk_buff *skb, const struct 
nf_hook_state *state,
                .size   = sizeof(*entry) + afinfo->route_key_size,
        };
 
+       /* Avoid leaking noref sk outside the input path */
+       skb_clear_noref_sk(skb);
+
        nf_queue_entry_get_refs(entry);
        skb_dst_force(skb);
        afinfo->saveroute(skb, entry);
-- 
2.13.5

Reply via email to