Re: [PATCH net] ipv4: ipv6: netfilter: Adjust the frag mem limit when truesize changes

2018-12-05 Thread David Miller
From: Jiri Wiesner 
Date: Wed, 5 Dec 2018 16:55:29 +0100

> The *_frag_reasm() functions are susceptible to miscalculating the byte
> count of packet fragments in case the truesize of a head buffer changes.
> The truesize member may be changed by the call to skb_unclone(), leaving
> the fragment memory limit counter unbalanced even if all fragments are
> processed. This miscalculation goes unnoticed as long as the network
> namespace which holds the counter is not destroyed.
> 
> Should an attempt be made to destroy a network namespace that holds an
> unbalanced fragment memory limit counter the cleanup of the namespace
> never finishes. The thread handling the cleanup gets stuck in
> inet_frags_exit_net() waiting for the percpu counter to reach zero. The
> thread is usually in running state with a stacktrace similar to:
> 
>  PID: 1073   TASK: 880626711440  CPU: 1   COMMAND: "kworker/u48:4"
>   #5 [880621563d48] _raw_spin_lock at 815f5480
>   #6 [880621563d48] inet_evict_bucket at 8158020b
>   #7 [880621563d80] inet_frags_exit_net at 8158051c
>   #8 [880621563db0] ops_exit_list at 814f5856
>   #9 [880621563dd8] cleanup_net at 814f67c0
>  #10 [880621563e38] process_one_work at 81096f14
> 
> It is not possible to create new network namespaces, and processes
> that call unshare() end up being stuck in uninterruptible sleep state
> waiting to acquire the net_mutex.
> 
> The bug was observed in the IPv6 netfilter code by Per Sundstrom.
> I thank him for his analysis of the problem. The parts of this patch
> that apply to IPv4 and IPv6 fragment reassembly are preemptive measures.
> 
> Signed-off-by: Jiri Wiesner 
> Reported-by: Per Sundstrom 

Nice catch.

Applied and queued up for -stable, thanks!


Re: [PATCH net] ipv4: ipv6: netfilter: Adjust the frag mem limit when truesize changes

2018-12-05 Thread Peter Oskolkov
On Wed, Dec 5, 2018 at 7:57 AM Jiri Wiesner  wrote:
>
> The *_frag_reasm() functions are susceptible to miscalculating the byte
> count of packet fragments in case the truesize of a head buffer changes.
> The truesize member may be changed by the call to skb_unclone(), leaving
> the fragment memory limit counter unbalanced even if all fragments are
> processed. This miscalculation goes unnoticed as long as the network
> namespace which holds the counter is not destroyed.
>
> Should an attempt be made to destroy a network namespace that holds an
> unbalanced fragment memory limit counter the cleanup of the namespace
> never finishes. The thread handling the cleanup gets stuck in
> inet_frags_exit_net() waiting for the percpu counter to reach zero. The
> thread is usually in running state with a stacktrace similar to:
>
>  PID: 1073   TASK: 880626711440  CPU: 1   COMMAND: "kworker/u48:4"
>   #5 [880621563d48] _raw_spin_lock at 815f5480
>   #6 [880621563d48] inet_evict_bucket at 8158020b
>   #7 [880621563d80] inet_frags_exit_net at 8158051c
>   #8 [880621563db0] ops_exit_list at 814f5856
>   #9 [880621563dd8] cleanup_net at 814f67c0
>  #10 [880621563e38] process_one_work at 81096f14
>
> It is not possible to create new network namespaces, and processes
> that call unshare() end up being stuck in uninterruptible sleep state
> waiting to acquire the net_mutex.
>
> The bug was observed in the IPv6 netfilter code by Per Sundstrom.
> I thank him for his analysis of the problem. The parts of this patch
> that apply to IPv4 and IPv6 fragment reassembly are preemptive measures.
>
> Signed-off-by: Jiri Wiesner 
> Reported-by: Per Sundstrom 
> ---

Acked-by: Peter Oskolkov 

>  net/ipv4/ip_fragment.c  | 7 +++
>  net/ipv6/netfilter/nf_conntrack_reasm.c | 8 +++-
>  net/ipv6/reassembly.c   | 8 +++-
>  3 files changed, 21 insertions(+), 2 deletions(-)
>
> diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
> index d6ee343fdb86..aa0b22697998 100644
> --- a/net/ipv4/ip_fragment.c
> +++ b/net/ipv4/ip_fragment.c
> @@ -515,6 +515,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff 
> *skb,
> struct rb_node *rbn;
> int len;
> int ihlen;
> +   int delta;
> int err;
> u8 ecn;
>
> @@ -556,10 +557,16 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff 
> *skb,
> if (len > 65535)
> goto out_oversize;
>
> +   delta = - head->truesize;
> +
> /* Head of list must not be cloned. */
> if (skb_unclone(head, GFP_ATOMIC))
> goto out_nomem;
>
> +   delta += head->truesize;
> +   if (delta)
> +   add_frag_mem_limit(qp->q.net, delta);
> +
> /* If the first fragment is fragmented itself, we split
>  * it to two chunks: the first with data and paged part
>  * and the second, holding only fragments. */
> diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c 
> b/net/ipv6/netfilter/nf_conntrack_reasm.c
> index d219979c3e52..181da2c40f9a 100644
> --- a/net/ipv6/netfilter/nf_conntrack_reasm.c
> +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
> @@ -341,7 +341,7 @@ static bool
>  nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev,  struct 
> net_device *dev)
>  {
> struct sk_buff *fp, *head = fq->q.fragments;
> -   intpayload_len;
> +   intpayload_len, delta;
> u8 ecn;
>
> inet_frag_kill(&fq->q);
> @@ -363,10 +363,16 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff 
> *prev,  struct net_devic
> return false;
> }
>
> +   delta = - head->truesize;
> +
> /* Head of list must not be cloned. */
> if (skb_unclone(head, GFP_ATOMIC))
> return false;
>
> +   delta += head->truesize;
> +   if (delta)
> +   add_frag_mem_limit(fq->q.net, delta);
> +
> /* If the first fragment is fragmented itself, we split
>  * it to two chunks: the first with data and paged part
>  * and the second, holding only fragments. */
> diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
> index 5c3c92713096..aa26c45486d9 100644
> --- a/net/ipv6/reassembly.c
> +++ b/net/ipv6/reassembly.c
> @@ -281,7 +281,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct 
> sk_buff *prev,
>  {
> struct net *net = container_of(fq->q.net, struct net, ipv6.frags);
> struct sk_buff *fp, *head = fq->q.fragments;
> -   intpayload_len;
> +   intpayload_len, delta;
> unsigned int nhoff;
> int sum_truesize;
> u8 ecn;
> @@ -322,10 +322,16 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct 
> sk_buff *prev,
> if (payload_len > IPV6_MAXPLEN)
> goto out_oversize;
>
> +   delta = - head->truesize;
> +
> /* Head of list must not be cloned. */
> if (sk

[PATCH net] ipv4: ipv6: netfilter: Adjust the frag mem limit when truesize changes

2018-12-05 Thread Jiri Wiesner
The *_frag_reasm() functions are susceptible to miscalculating the byte
count of packet fragments in case the truesize of a head buffer changes.
The truesize member may be changed by the call to skb_unclone(), leaving
the fragment memory limit counter unbalanced even if all fragments are
processed. This miscalculation goes unnoticed as long as the network
namespace which holds the counter is not destroyed.

Should an attempt be made to destroy a network namespace that holds an
unbalanced fragment memory limit counter the cleanup of the namespace
never finishes. The thread handling the cleanup gets stuck in
inet_frags_exit_net() waiting for the percpu counter to reach zero. The
thread is usually in running state with a stacktrace similar to:

 PID: 1073   TASK: 880626711440  CPU: 1   COMMAND: "kworker/u48:4"
  #5 [880621563d48] _raw_spin_lock at 815f5480
  #6 [880621563d48] inet_evict_bucket at 8158020b
  #7 [880621563d80] inet_frags_exit_net at 8158051c
  #8 [880621563db0] ops_exit_list at 814f5856
  #9 [880621563dd8] cleanup_net at 814f67c0
 #10 [880621563e38] process_one_work at 81096f14

It is not possible to create new network namespaces, and processes
that call unshare() end up being stuck in uninterruptible sleep state
waiting to acquire the net_mutex.

The bug was observed in the IPv6 netfilter code by Per Sundstrom.
I thank him for his analysis of the problem. The parts of this patch
that apply to IPv4 and IPv6 fragment reassembly are preemptive measures.

Signed-off-by: Jiri Wiesner 
Reported-by: Per Sundstrom 
---
 net/ipv4/ip_fragment.c  | 7 +++
 net/ipv6/netfilter/nf_conntrack_reasm.c | 8 +++-
 net/ipv6/reassembly.c   | 8 +++-
 3 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index d6ee343fdb86..aa0b22697998 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -515,6 +515,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff 
*skb,
struct rb_node *rbn;
int len;
int ihlen;
+   int delta;
int err;
u8 ecn;
 
@@ -556,10 +557,16 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff 
*skb,
if (len > 65535)
goto out_oversize;
 
+   delta = - head->truesize;
+
/* Head of list must not be cloned. */
if (skb_unclone(head, GFP_ATOMIC))
goto out_nomem;
 
+   delta += head->truesize;
+   if (delta)
+   add_frag_mem_limit(qp->q.net, delta);
+
/* If the first fragment is fragmented itself, we split
 * it to two chunks: the first with data and paged part
 * and the second, holding only fragments. */
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c 
b/net/ipv6/netfilter/nf_conntrack_reasm.c
index d219979c3e52..181da2c40f9a 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -341,7 +341,7 @@ static bool
 nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev,  struct 
net_device *dev)
 {
struct sk_buff *fp, *head = fq->q.fragments;
-   intpayload_len;
+   intpayload_len, delta;
u8 ecn;
 
inet_frag_kill(&fq->q);
@@ -363,10 +363,16 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff 
*prev,  struct net_devic
return false;
}
 
+   delta = - head->truesize;
+
/* Head of list must not be cloned. */
if (skb_unclone(head, GFP_ATOMIC))
return false;
 
+   delta += head->truesize;
+   if (delta)
+   add_frag_mem_limit(fq->q.net, delta);
+
/* If the first fragment is fragmented itself, we split
 * it to two chunks: the first with data and paged part
 * and the second, holding only fragments. */
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 5c3c92713096..aa26c45486d9 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -281,7 +281,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct 
sk_buff *prev,
 {
struct net *net = container_of(fq->q.net, struct net, ipv6.frags);
struct sk_buff *fp, *head = fq->q.fragments;
-   intpayload_len;
+   intpayload_len, delta;
unsigned int nhoff;
int sum_truesize;
u8 ecn;
@@ -322,10 +322,16 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct 
sk_buff *prev,
if (payload_len > IPV6_MAXPLEN)
goto out_oversize;
 
+   delta = - head->truesize;
+
/* Head of list must not be cloned. */
if (skb_unclone(head, GFP_ATOMIC))
goto out_oom;
 
+   delta += head->truesize;
+   if (delta)
+   add_frag_mem_limit(fq->q.net, delta);
+
/* If the first fragment is fragmented itself, we split
 * it to two chunks: the first with data and paged part
 * an