Please queue up the following networking bug fixes for 3.0-stable Thanks!
>From fbfcf8f098139cea98c7d3305aee8b2d1dd1c696 Mon Sep 17 00:00:00 2001 From: Steffen Klassert <[email protected]> Date: Tue, 4 Sep 2012 00:03:29 +0000 Subject: [PATCH 01/31] xfrm: Workaround incompatibility of ESN and async crypto
[ Upstream commit 3b59df46a449ec9975146d71318c4777ad086744 ] ESN for esp is defined in RFC 4303. This RFC assumes that the sequence number counters are always up to date. However, this is not true if an async crypto algorithm is employed. If the sequence number counters are not up to date on sequence number check, we may incorrectly update the upper 32 bit of the sequence number. This leads to a DOS. We workaround this by comparing the upper sequence number, (used for authentication) with the upper sequence number computed after the async processing. We drop the packet if these numbers are different. To do this, we introduce a recheck function that does this check in the ESN case. Signed-off-by: Steffen Klassert <[email protected]> Acked-by: Herbert Xu <[email protected]> Signed-off-by: David S. Miller <[email protected]> --- include/net/xfrm.h | 3 +++ net/xfrm/xfrm_input.c | 2 +- net/xfrm/xfrm_replay.c | 15 +++++++++++++++ 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index b203e14..921f627 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -269,6 +269,9 @@ struct xfrm_replay { int (*check)(struct xfrm_state *x, struct sk_buff *skb, __be32 net_seq); + int (*recheck)(struct xfrm_state *x, + struct sk_buff *skb, + __be32 net_seq); void (*notify)(struct xfrm_state *x, int event); int (*overflow)(struct xfrm_state *x, struct sk_buff *skb); }; diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 54a0dc2..ab2bb42 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -212,7 +212,7 @@ resume: /* only the first xfrm gets the encap type */ encap_type = 0; - if (async && x->repl->check(x, skb, seq)) { + if (async && x->repl->recheck(x, skb, seq)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR); goto drop_unlock; } diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index 3235023..379c176 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -437,6 +437,18 @@ err: return -EINVAL; } +static int xfrm_replay_recheck_esn(struct xfrm_state *x, + struct sk_buff *skb, __be32 net_seq) +{ + if (unlikely(XFRM_SKB_CB(skb)->seq.input.hi != + htonl(xfrm_replay_seqhi(x, net_seq)))) { + x->stats.replay_window++; + return -EINVAL; + } + + return xfrm_replay_check_esn(x, skb, net_seq); +} + static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq) { unsigned int bitnr, nr, i; @@ -508,6 +520,7 @@ static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq) static struct xfrm_replay xfrm_replay_legacy = { .advance = xfrm_replay_advance, .check = xfrm_replay_check, + .recheck = xfrm_replay_check, .notify = xfrm_replay_notify, .overflow = xfrm_replay_overflow, }; @@ -515,6 +528,7 @@ static struct xfrm_replay xfrm_replay_legacy = { static struct xfrm_replay xfrm_replay_bmp = { .advance = xfrm_replay_advance_bmp, .check = xfrm_replay_check_bmp, + .recheck = xfrm_replay_check_bmp, .notify = xfrm_replay_notify_bmp, .overflow = xfrm_replay_overflow_bmp, }; @@ -522,6 +536,7 @@ static struct xfrm_replay xfrm_replay_bmp = { static struct xfrm_replay xfrm_replay_esn = { .advance = xfrm_replay_advance_esn, .check = xfrm_replay_check_esn, + .recheck = xfrm_replay_recheck_esn, .notify = xfrm_replay_notify_bmp, .overflow = xfrm_replay_overflow_esn, }; -- 1.7.11.4 >From f356c026150463a7289d3fd9ca76b489c52abec6 Mon Sep 17 00:00:00 2001 From: Mathias Krause <[email protected]> Date: Thu, 13 Sep 2012 11:41:26 +0000 Subject: [PATCH 02/31] xfrm_user: return error pointer instead of NULL [ Upstream commit 864745d291b5ba80ea0bd0edcbe67273de368836 ] When dump_one_state() returns an error, e.g. because of a too small buffer to dump the whole xfrm state, xfrm_state_netlink() returns NULL instead of an error pointer. But its callers expect an error pointer and therefore continue to operate on a NULL skbuff. This could lead to a privilege escalation (execution of user code in kernel context) if the attacker has CAP_NET_ADMIN and is able to map address 0. Signed-off-by: Mathias Krause <[email protected]> Acked-by: Steffen Klassert <[email protected]> Signed-off-by: David S. Miller <[email protected]> --- net/xfrm/xfrm_user.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index c658cb3..dbd2852 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -862,6 +862,7 @@ static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb, { struct xfrm_dump_info info; struct sk_buff *skb; + int err; skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!skb) @@ -872,9 +873,10 @@ static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb, info.nlmsg_seq = seq; info.nlmsg_flags = 0; - if (dump_one_state(x, 0, &info)) { + err = dump_one_state(x, 0, &info); + if (err) { kfree_skb(skb); - return NULL; + return ERR_PTR(err); } return skb; -- 1.7.11.4 >From eba2ba4a166058cdf0ff8e19780e8274a9faf2cf Mon Sep 17 00:00:00 2001 From: Mathias Krause <[email protected]> Date: Fri, 14 Sep 2012 09:58:32 +0000 Subject: [PATCH 03/31] xfrm_user: return error pointer instead of NULL #2 [ Upstream commit c25463722509fef0ed630b271576a8c9a70236f3 ] When dump_one_policy() returns an error, e.g. because of a too small buffer to dump the whole xfrm policy, xfrm_policy_netlink() returns NULL instead of an error pointer. But its caller expects an error pointer and therefore continues to operate on a NULL skbuff. Signed-off-by: Mathias Krause <[email protected]> Acked-by: Steffen Klassert <[email protected]> Signed-off-by: David S. Miller <[email protected]> --- net/xfrm/xfrm_user.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index dbd2852..59801ce 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1531,6 +1531,7 @@ static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb, { struct xfrm_dump_info info; struct sk_buff *skb; + int err; skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!skb) @@ -1541,9 +1542,10 @@ static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb, info.nlmsg_seq = seq; info.nlmsg_flags = 0; - if (dump_one_policy(xp, dir, 0, &info) < 0) { + err = dump_one_policy(xp, dir, 0, &info); + if (err) { kfree_skb(skb); - return NULL; + return ERR_PTR(err); } return skb; -- 1.7.11.4 >From bfa1b065ac9f5f692b6cf7d3e9ab50182f7d46cc Mon Sep 17 00:00:00 2001 From: Li RongQing <[email protected]> Date: Mon, 17 Sep 2012 22:40:10 +0000 Subject: [PATCH 04/31] xfrm: fix a read lock imbalance in make_blackhole [ Upstream commit 433a19548061bb5457b6ab77ed7ea58ca6e43ddb ] if xfrm_policy_get_afinfo returns 0, it has already released the read lock, xfrm_policy_put_afinfo should not be called again. Signed-off-by: Li RongQing <[email protected]> Signed-off-by: David S. Miller <[email protected]> --- net/xfrm/xfrm_policy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 0c0e40e..7c8e0cb 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1759,7 +1759,7 @@ static struct dst_entry *make_blackhole(struct net *net, u16 family, if (!afinfo) { dst_release(dst_orig); - ret = ERR_PTR(-EINVAL); + return ERR_PTR(-EINVAL); } else { ret = afinfo->blackhole_route(net, dst_orig); } -- 1.7.11.4 >From be06abfd704d41fa2b1c9b748bd7a894294b59e2 Mon Sep 17 00:00:00 2001 From: Mathias Krause <[email protected]> Date: Wed, 19 Sep 2012 11:33:38 +0000 Subject: [PATCH 05/31] xfrm_user: fix info leak in copy_to_user_auth() [ Upstream commit 4c87308bdea31a7b4828a51f6156e6f721a1fcc9 ] copy_to_user_auth() fails to initialize the remainder of alg_name and therefore discloses up to 54 bytes of heap memory via netlink to userland. Use strncpy() instead of strcpy() to fill the trailing bytes of alg_name with null bytes. Signed-off-by: Mathias Krause <[email protected]> Acked-by: Steffen Klassert <[email protected]> Signed-off-by: David S. Miller <[email protected]> --- net/xfrm/xfrm_user.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 59801ce..d78ebe2 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -742,7 +742,7 @@ static int copy_to_user_auth(struct xfrm_algo_auth *auth, struct sk_buff *skb) return -EMSGSIZE; algo = nla_data(nla); - strcpy(algo->alg_name, auth->alg_name); + strncpy(algo->alg_name, auth->alg_name, sizeof(algo->alg_name)); memcpy(algo->alg_key, auth->alg_key, (auth->alg_key_len + 7) / 8); algo->alg_key_len = auth->alg_key_len; -- 1.7.11.4 >From ba79115820f6ad23382a21a8ef0959d78f7d9e51 Mon Sep 17 00:00:00 2001 From: Mathias Krause <[email protected]> Date: Wed, 19 Sep 2012 11:33:39 +0000 Subject: [PATCH 06/31] xfrm_user: fix info leak in copy_to_user_state() [ Upstream commit f778a636713a435d3a922c60b1622a91136560c1 ] The memory reserved to dump the xfrm state includes the padding bytes of struct xfrm_usersa_info added by the compiler for alignment (7 for amd64, 3 for i386). Add an explicit memset(0) before filling the buffer to avoid the info leak. Signed-off-by: Mathias Krause <[email protected]> Acked-by: Steffen Klassert <[email protected]> Signed-off-by: David S. Miller <[email protected]> --- net/xfrm/xfrm_user.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index d78ebe2..8e878eb 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -689,6 +689,7 @@ out: static void copy_to_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p) { + memset(p, 0, sizeof(*p)); memcpy(&p->id, &x->id, sizeof(p->id)); memcpy(&p->sel, &x->sel, sizeof(p->sel)); memcpy(&p->lft, &x->lft, sizeof(p->lft)); -- 1.7.11.4 >From 5d1c5bd6579fa7335c3bd5dad8d40dc06ad4402f Mon Sep 17 00:00:00 2001 From: Mathias Krause <[email protected]> Date: Wed, 19 Sep 2012 11:33:40 +0000 Subject: [PATCH 07/31] xfrm_user: fix info leak in copy_to_user_policy() [ Upstream commit 7b789836f434c87168eab067cfbed1ec4783dffd ] The memory reserved to dump the xfrm policy includes multiple padding bytes added by the compiler for alignment (padding bytes in struct xfrm_selector and struct xfrm_userpolicy_info). Add an explicit memset(0) before filling the buffer to avoid the heap info leak. Signed-off-by: Mathias Krause <[email protected]> Acked-by: Steffen Klassert <[email protected]> Signed-off-by: David S. Miller <[email protected]> --- net/xfrm/xfrm_user.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 8e878eb..b5215b4 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1300,6 +1300,7 @@ static void copy_from_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy static void copy_to_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy_info *p, int dir) { + memset(p, 0, sizeof(*p)); memcpy(&p->sel, &xp->selector, sizeof(p->sel)); memcpy(&p->lft, &xp->lft, sizeof(p->lft)); memcpy(&p->curlft, &xp->curlft, sizeof(p->curlft)); -- 1.7.11.4 >From d69e52e3a58dc5f29bcbe7a4e9d040044ea7b53b Mon Sep 17 00:00:00 2001 From: Mathias Krause <[email protected]> Date: Wed, 19 Sep 2012 11:33:41 +0000 Subject: [PATCH 08/31] xfrm_user: fix info leak in copy_to_user_tmpl() [ Upstream commit 1f86840f897717f86d523a13e99a447e6a5d2fa5 ] The memory used for the template copy is a local stack variable. As struct xfrm_user_tmpl contains multiple holes added by the compiler for alignment, not initializing the memory will lead to leaking stack bytes to userland. Add an explicit memset(0) to avoid the info leak. Initial version of the patch by Brad Spengler. Cc: Brad Spengler <[email protected]> Signed-off-by: Mathias Krause <[email protected]> Acked-by: Steffen Klassert <[email protected]> Signed-off-by: David S. Miller <[email protected]> --- net/xfrm/xfrm_user.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index b5215b4..de4874f 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1405,6 +1405,7 @@ static int copy_to_user_tmpl(struct xfrm_policy *xp, struct sk_buff *skb) struct xfrm_user_tmpl *up = &vec[i]; struct xfrm_tmpl *kp = &xp->xfrm_vec[i]; + memset(up, 0, sizeof(*up)); memcpy(&up->id, &kp->id, sizeof(up->id)); up->family = kp->encap_family; memcpy(&up->saddr, &kp->saddr, sizeof(up->saddr)); -- 1.7.11.4 >From 33b6cf7f83d3a8bdef1582c4098b25919e96a925 Mon Sep 17 00:00:00 2001 From: Mathias Krause <[email protected]> Date: Wed, 19 Sep 2012 11:33:43 +0000 Subject: [PATCH 09/31] xfrm_user: don't copy esn replay window twice for new states [ Upstream commit e3ac104d41a97b42316915020ba228c505447d21 ] The ESN replay window was already fully initialized in xfrm_alloc_replay_state_esn(). No need to copy it again. Cc: Steffen Klassert <[email protected]> Signed-off-by: Mathias Krause <[email protected]> Acked-by: Steffen Klassert <[email protected]> Signed-off-by: David S. Miller <[email protected]> --- net/xfrm/xfrm_user.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index de4874f..7be5d6a 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -442,10 +442,11 @@ static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info * * somehow made shareable and move it to xfrm_state.c - JHS * */ -static void xfrm_update_ae_params(struct xfrm_state *x, struct nlattr **attrs) +static void xfrm_update_ae_params(struct xfrm_state *x, struct nlattr **attrs, + int update_esn) { struct nlattr *rp = attrs[XFRMA_REPLAY_VAL]; - struct nlattr *re = attrs[XFRMA_REPLAY_ESN_VAL]; + struct nlattr *re = update_esn ? attrs[XFRMA_REPLAY_ESN_VAL] : NULL; struct nlattr *lt = attrs[XFRMA_LTIME_VAL]; struct nlattr *et = attrs[XFRMA_ETIMER_THRESH]; struct nlattr *rt = attrs[XFRMA_REPLAY_THRESH]; @@ -555,7 +556,7 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, goto error; /* override default values from above */ - xfrm_update_ae_params(x, attrs); + xfrm_update_ae_params(x, attrs, 0); return x; @@ -1801,7 +1802,7 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh, goto out; spin_lock_bh(&x->lock); - xfrm_update_ae_params(x, attrs); + xfrm_update_ae_params(x, attrs, 1); spin_unlock_bh(&x->lock); c.event = nlh->nlmsg_type; -- 1.7.11.4 >From 019ccfa316085ca0049d4d50a571365a5a722816 Mon Sep 17 00:00:00 2001 From: Mathias Krause <[email protected]> Date: Thu, 20 Sep 2012 10:01:49 +0000 Subject: [PATCH 10/31] xfrm_user: ensure user supplied esn replay window is valid [ Upstream commit ecd7918745234e423dd87fcc0c077da557909720 ] The current code fails to ensure that the netlink message actually contains as many bytes as the header indicates. If a user creates a new state or updates an existing one but does not supply the bytes for the whole ESN replay window, the kernel copies random heap bytes into the replay bitmap, the ones happen to follow the XFRMA_REPLAY_ESN_VAL netlink attribute. This leads to following issues: 1. The replay window has random bits set confusing the replay handling code later on. 2. A malicious user could use this flaw to leak up to ~3.5kB of heap memory when she has access to the XFRM netlink interface (requires CAP_NET_ADMIN). Known users of the ESN replay window are strongSwan and Steffen's iproute2 patch (<http://patchwork.ozlabs.org/patch/85962/>). The latter uses the interface with a bitmap supplied while the former does not. strongSwan is therefore prone to run into issue 1. To fix both issues without breaking existing userland allow using the XFRMA_REPLAY_ESN_VAL netlink attribute with either an empty bitmap or a fully specified one. For the former case we initialize the in-kernel bitmap with zero, for the latter we copy the user supplied bitmap. For state updates the full bitmap must be supplied. To prevent overflows in the bitmap length calculation the maximum size of bmp_len is limited to 128 by this patch -- resulting in a maximum replay window of 4096 packets. This should be sufficient for all real life scenarios (RFC 4303 recommends a default replay window size of 64). Cc: Steffen Klassert <[email protected]> Cc: Martin Willi <[email protected]> Cc: Ben Hutchings <[email protected]> Signed-off-by: Mathias Krause <[email protected]> Signed-off-by: David S. Miller <[email protected]> --- include/linux/xfrm.h | 2 ++ net/xfrm/xfrm_user.c | 31 +++++++++++++++++++++++++------ 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index 22e61fd..28e493b 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -84,6 +84,8 @@ struct xfrm_replay_state { __u32 bitmap; }; +#define XFRMA_REPLAY_ESN_MAX 4096 + struct xfrm_replay_state_esn { unsigned int bmp_len; __u32 oseq; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 7be5d6a..05f82e6 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -123,9 +123,21 @@ static inline int verify_replay(struct xfrm_usersa_info *p, struct nlattr **attrs) { struct nlattr *rt = attrs[XFRMA_REPLAY_ESN_VAL]; + struct xfrm_replay_state_esn *rs; - if ((p->flags & XFRM_STATE_ESN) && !rt) - return -EINVAL; + if (p->flags & XFRM_STATE_ESN) { + if (!rt) + return -EINVAL; + + rs = nla_data(rt); + + if (rs->bmp_len > XFRMA_REPLAY_ESN_MAX / sizeof(rs->bmp[0]) / 8) + return -EINVAL; + + if (nla_len(rt) < xfrm_replay_state_esn_len(rs) && + nla_len(rt) != sizeof(*rs)) + return -EINVAL; + } if (!rt) return 0; @@ -370,14 +382,15 @@ static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_es struct nlattr *rp) { struct xfrm_replay_state_esn *up; + int ulen; if (!replay_esn || !rp) return 0; up = nla_data(rp); + ulen = xfrm_replay_state_esn_len(up); - if (xfrm_replay_state_esn_len(replay_esn) != - xfrm_replay_state_esn_len(up)) + if (nla_len(rp) < ulen || xfrm_replay_state_esn_len(replay_esn) != ulen) return -EINVAL; return 0; @@ -388,22 +401,28 @@ static int xfrm_alloc_replay_state_esn(struct xfrm_replay_state_esn **replay_esn struct nlattr *rta) { struct xfrm_replay_state_esn *p, *pp, *up; + int klen, ulen; if (!rta) return 0; up = nla_data(rta); + klen = xfrm_replay_state_esn_len(up); + ulen = nla_len(rta) >= klen ? klen : sizeof(*up); - p = kmemdup(up, xfrm_replay_state_esn_len(up), GFP_KERNEL); + p = kzalloc(klen, GFP_KERNEL); if (!p) return -ENOMEM; - pp = kmemdup(up, xfrm_replay_state_esn_len(up), GFP_KERNEL); + pp = kzalloc(klen, GFP_KERNEL); if (!pp) { kfree(p); return -ENOMEM; } + memcpy(p, up, ulen); + memcpy(pp, up, ulen); + *replay_esn = p; *preplay_esn = pp; -- 1.7.11.4 >From 347ed54cf912b1b30b0690e7ecf4d55f57e52a5d Mon Sep 17 00:00:00 2001 From: htbegin <[email protected]> Date: Mon, 1 Oct 2012 16:42:43 +0000 Subject: [PATCH 11/31] net: ethernet: davinci_cpdma: decrease the desc count when cleaning up the remaining packets [ Upstream commit ffb5ba90017505a19e238e986e6d33f09e4df765 ] chan->count is used by rx channel. If the desc count is not updated by the clean up loop in cpdma_chan_stop, the value written to the rxfree register in cpdma_chan_start will be incorrect. Signed-off-by: Tao Hou <[email protected]> Signed-off-by: David S. Miller <[email protected]> --- drivers/net/davinci_cpdma.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/davinci_cpdma.c b/drivers/net/davinci_cpdma.c index ae47f23..6b67c52 100644 --- a/drivers/net/davinci_cpdma.c +++ b/drivers/net/davinci_cpdma.c @@ -849,6 +849,7 @@ int cpdma_chan_stop(struct cpdma_chan *chan) next_dma = desc_read(desc, hw_next); chan->head = desc_from_phys(pool, next_dma); + chan->count--; chan->stats.teardown_dequeue++; /* issue callback without locks held */ -- 1.7.11.4 >From fb1629e895b2a019eddd0201221c45a1ea55b364 Mon Sep 17 00:00:00 2001 From: Florian Fainelli <[email protected]> Date: Mon, 10 Sep 2012 14:06:58 +0200 Subject: [PATCH 12/31] ixp4xx_hss: fix build failure due to missing linux/module.h inclusion [ Upstream commit 0b836ddde177bdd5790ade83772860940bd481ea ] Commit 36a1211970193ce215de50ed1e4e1272bc814df1 (netprio_cgroup.h: dont include module.h from other includes) made the following build error on ixp4xx_hss pop up: CC [M] drivers/net/wan/ixp4xx_hss.o drivers/net/wan/ixp4xx_hss.c:1412:20: error: expected ';', ',' or ')' before string constant drivers/net/wan/ixp4xx_hss.c:1413:25: error: expected ';', ',' or ')' before string constant drivers/net/wan/ixp4xx_hss.c:1414:21: error: expected ';', ',' or ')' before string constant drivers/net/wan/ixp4xx_hss.c:1415:19: error: expected ';', ',' or ')' before string constant make[8]: *** [drivers/net/wan/ixp4xx_hss.o] Error 1 This was previously hidden because ixp4xx_hss includes linux/hdlc.h which includes linux/netdevice.h which includes linux/netprio_cgroup.h which used to include linux/module.h. The real issue was actually present since the initial commit that added this driver since it uses macros from linux/module.h without including this file. Signed-off-by: Florian Fainelli <[email protected]> Signed-off-by: David S. Miller <[email protected]> --- drivers/net/wan/ixp4xx_hss.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wan/ixp4xx_hss.c b/drivers/net/wan/ixp4xx_hss.c index f1e1643..78c51ab 100644 --- a/drivers/net/wan/ixp4xx_hss.c +++ b/drivers/net/wan/ixp4xx_hss.c @@ -8,6 +8,7 @@ * as published by the Free Software Foundation. */ +#include <linux/module.h> #include <linux/bitops.h> #include <linux/cdev.h> #include <linux/dma-mapping.h> -- 1.7.11.4 >From 2eb076f12f8a1985d05f381657cd7fab4fe2c003 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov <[email protected]> Date: Fri, 14 Sep 2012 05:50:03 +0000 Subject: [PATCH 13/31] netxen: check for root bus in netxen_mask_aer_correctable [ Upstream commit e4d1aa40e363ed3e0486aeeeb0d173f7f822737e ] Add a check if pdev->bus->self == NULL (root bus). When attaching a netxen NIC to a VM it can be on the root bus and the guest would crash in netxen_mask_aer_correctable() because of a NULL pointer dereference if CONFIG_PCIEAER is present. Signed-off-by: Nikolay Aleksandrov <[email protected]> Signed-off-by: David S. Miller <[email protected]> --- drivers/net/netxen/netxen_nic_main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/netxen/netxen_nic_main.c b/drivers/net/netxen/netxen_nic_main.c index c0788a3..78d5b67 100644 --- a/drivers/net/netxen/netxen_nic_main.c +++ b/drivers/net/netxen/netxen_nic_main.c @@ -1288,6 +1288,10 @@ static void netxen_mask_aer_correctable(struct netxen_adapter *adapter) struct pci_dev *root = pdev->bus->self; u32 aer_pos; + /* root bus? */ + if (!root) + return; + if (adapter->ahw.board_type != NETXEN_BRDTYPE_P3_4_GB_MM && adapter->ahw.board_type != NETXEN_BRDTYPE_P3_10G_TP) return; -- 1.7.11.4 >From e0d110b06c0f409eae090cbd426f373ab401da51 Mon Sep 17 00:00:00 2001 From: Eric Dumazet <[email protected]> Date: Tue, 11 Sep 2012 13:11:12 +0000 Subject: [PATCH 14/31] net-sched: sch_cbq: avoid infinite loop [ Upstream commit bdfc87f7d1e253e0a61e2fc6a75ea9d76f7fc03a ] Its possible to setup a bad cbq configuration leading to an infinite loop in cbq_classify() DEV_OUT=eth0 ICMP="match ip protocol 1 0xff" U32="protocol ip u32" DST="match ip dst" tc qdisc add dev $DEV_OUT root handle 1: cbq avpkt 1000 \ bandwidth 100mbit tc class add dev $DEV_OUT parent 1: classid 1:1 cbq \ rate 512kbit allot 1500 prio 5 bounded isolated tc filter add dev $DEV_OUT parent 1: prio 3 $U32 \ $ICMP $DST 192.168.3.234 flowid 1: Reported-by: Denys Fedoryschenko <[email protected]> Tested-by: Denys Fedoryschenko <[email protected]> Signed-off-by: Eric Dumazet <[email protected]> Signed-off-by: David S. Miller <[email protected]> --- net/sched/sch_cbq.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 24d94c0..599f67a 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -250,10 +250,11 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) else if ((cl = defmap[res.classid & TC_PRIO_MAX]) == NULL) cl = defmap[TC_PRIO_BESTEFFORT]; - if (cl == NULL || cl->level >= head->level) + if (cl == NULL) goto fallback; } - + if (cl->level >= head->level) + goto fallback; #ifdef CONFIG_NET_CLS_ACT switch (result) { case TC_ACT_QUEUED: -- 1.7.11.4 >From 4030f00a52841cab583bc1cb9945d1a4bd151779 Mon Sep 17 00:00:00 2001 From: Paolo Valente <[email protected]> Date: Sat, 15 Sep 2012 00:41:35 +0000 Subject: [PATCH 15/31] pkt_sched: fix virtual-start-time update in QFQ [ Upstream commit 71261956973ba9e0637848a5adb4a5819b4bae83 ] If the old timestamps of a class, say cl, are stale when the class becomes active, then QFQ may assign to cl a much higher start time than the maximum value allowed. This may happen when QFQ assigns to the start time of cl the finish time of a group whose classes are characterized by a higher value of the ratio max_class_pkt/weight_of_the_class with respect to that of cl. Inserting a class with a too high start time into the bucket list corrupts the data structure and may eventually lead to crashes. This patch limits the maximum start time assigned to a class. Signed-off-by: Paolo Valente <[email protected]> Signed-off-by: David S. Miller <[email protected]> --- net/sched/sch_qfq.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 1033434..f86bc72 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -829,7 +829,10 @@ static void qfq_update_start(struct qfq_sched *q, struct qfq_class *cl) if (mask) { struct qfq_group *next = qfq_ffs(q, mask); if (qfq_gt(roundedF, next->F)) { - cl->S = next->F; + if (qfq_gt(limit, next->F)) + cl->S = next->F; + else /* preserve timestamp correctness */ + cl->S = limit; return; } } -- 1.7.11.4 >From 8d17536638dd0b5a9cdfa9faa183e172f6c3e2d1 Mon Sep 17 00:00:00 2001 From: Lennart Sorensen <[email protected]> Date: Fri, 7 Sep 2012 12:14:02 +0000 Subject: [PATCH 16/31] sierra_net: Endianess bug fix. [ Upstream commit 2120c52da6fe741454a60644018ad2a6abd957ac ] I discovered I couldn't get sierra_net to work on a powerpc. Turns out the firmware attribute check assumes the system is little endian and hence fails because the attributes is a 16 bit value. Signed-off-by: Len Sorensen <[email protected]> Signed-off-by: David S. Miller <[email protected]> --- drivers/net/usb/sierra_net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/sierra_net.c b/drivers/net/usb/sierra_net.c index 864448b..e773250 100644 --- a/drivers/net/usb/sierra_net.c +++ b/drivers/net/usb/sierra_net.c @@ -678,7 +678,7 @@ static int sierra_net_get_fw_attr(struct usbnet *dev, u16 *datap) return -EIO; } - *datap = *attrdata; + *datap = le16_to_cpu(*attrdata); kfree(attrdata); return result; -- 1.7.11.4 >From 4eb9e407ce619f4f242f0d4101ddf76af1efe38a Mon Sep 17 00:00:00 2001 From: Antonio Quartulli <[email protected]> Date: Tue, 2 Oct 2012 06:14:17 +0000 Subject: [PATCH 17/31] 8021q: fix mac_len recomputation in vlan_untag() [ Upstream commit 5316cf9a5197eb80b2800e1acadde287924ca975 ] skb_reset_mac_len() relies on the value of the skb->network_header pointer, therefore we must wait for such pointer to be recalculated before computing the new mac_len value. Signed-off-by: Antonio Quartulli <[email protected]> Signed-off-by: David S. Miller <[email protected]> --- net/8021q/vlan_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 27263fb..c177f9e 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -106,7 +106,6 @@ static struct sk_buff *vlan_reorder_header(struct sk_buff *skb) return NULL; memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN); skb->mac_header += VLAN_HLEN; - skb_reset_mac_len(skb); return skb; } @@ -173,6 +172,8 @@ struct sk_buff *vlan_untag(struct sk_buff *skb) skb_reset_network_header(skb); skb_reset_transport_header(skb); + skb_reset_mac_len(skb); + return skb; err_free: -- 1.7.11.4 >From 75758f947536d431fd0f03aa7a83b1cddc468eaa Mon Sep 17 00:00:00 2001 From: Gao feng <[email protected]> Date: Wed, 19 Sep 2012 19:25:34 +0000 Subject: [PATCH 18/31] ipv6: release reference of ip6_null_entry's dst entry in __ip6_del_rt [ Upstream commit 6825a26c2dc21eb4f8df9c06d3786ddec97cf53b ] as we hold dst_entry before we call __ip6_del_rt, so we should alse call dst_release not only return -ENOENT when the rt6_info is ip6_null_entry. and we already hold the dst entry, so I think it's safe to call dst_release out of the write-read lock. Signed-off-by: Gao feng <[email protected]> Signed-off-by: David S. Miller <[email protected]> --- net/ipv6/route.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 7c5b4cb..9172568 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1399,17 +1399,18 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) struct fib6_table *table; struct net *net = dev_net(rt->rt6i_dev); - if (rt == net->ipv6.ip6_null_entry) - return -ENOENT; + if (rt == net->ipv6.ip6_null_entry) { + err = -ENOENT; + goto out; + } table = rt->rt6i_table; write_lock_bh(&table->tb6_lock); - err = fib6_del(rt, info); - dst_release(&rt->dst); - write_unlock_bh(&table->tb6_lock); +out: + dst_release(&rt->dst); return err; } -- 1.7.11.4 >From 63ff71994c8a35fc2213bf2fe4bb83920a7daa42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Kube=C4=8Dek?= <[email protected]> Date: Fri, 14 Sep 2012 04:59:52 +0000 Subject: [PATCH 19/31] tcp: flush DMA queue before sk_wait_data if rcv_wnd is zero [ Upstream commit 15c041759bfcd9ab0a4e43f1c16e2644977d0467 ] If recv() syscall is called for a TCP socket so that - IOAT DMA is used - MSG_WAITALL flag is used - requested length is bigger than sk_rcvbuf - enough data has already arrived to bring rcv_wnd to zero then when tcp_recvmsg() gets to calling sk_wait_data(), receive window can be still zero while sk_async_wait_queue exhausts enough space to keep it zero. As this queue isn't cleaned until the tcp_service_net_dma() call, sk_wait_data() cannot receive any data and blocks forever. If zero receive window and non-empty sk_async_wait_queue is detected before calling sk_wait_data(), process the queue first. Signed-off-by: Michal Kubecek <[email protected]> Signed-off-by: David S. Miller <[email protected]> --- net/ipv4/tcp.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e57df66..dd3af6c 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1592,8 +1592,14 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, } #ifdef CONFIG_NET_DMA - if (tp->ucopy.dma_chan) - dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); + if (tp->ucopy.dma_chan) { + if (tp->rcv_wnd == 0 && + !skb_queue_empty(&sk->sk_async_wait_queue)) { + tcp_service_net_dma(sk, true); + tcp_cleanup_rbuf(sk, copied); + } else + dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); + } #endif if (copied >= target) { /* Do not sleep, just process backlog. */ -- 1.7.11.4 >From a060208a2146f5e45fe0f6196d0636771752913d Mon Sep 17 00:00:00 2001 From: Thomas Graf <[email protected]> Date: Mon, 3 Sep 2012 04:27:42 +0000 Subject: [PATCH 20/31] sctp: Don't charge for data in sndbuf again when transmitting packet [ Upstream commit 4c3a5bdae293f75cdf729c6c00124e8489af2276 ] SCTP charges wmem_alloc via sctp_set_owner_w() in sctp_sendmsg() and via skb_set_owner_w() in sctp_packet_transmit(). If a sender runs out of sndbuf it will sleep in sctp_wait_for_sndbuf() and expects to be waken up by __sctp_write_space(). Buffer space charged via sctp_set_owner_w() is released in sctp_wfree() which calls __sctp_write_space() directly. Buffer space charged via skb_set_owner_w() is released via sock_wfree() which calls sk->sk_write_space() _if_ SOCK_USE_WRITE_QUEUE is not set. sctp_endpoint_init() sets SOCK_USE_WRITE_QUEUE on all sockets. Therefore if sctp_packet_transmit() manages to queue up more than sndbuf bytes, sctp_wait_for_sndbuf() will never be woken up again unless it is interrupted by a signal. This could be fixed by clearing the SOCK_USE_WRITE_QUEUE flag but ... Charging for the data twice does not make sense in the first place, it leads to overcharging sndbuf by a factor 2. Therefore this patch only charges a single byte in wmem_alloc when transmitting an SCTP packet to ensure that the socket stays alive until the packet has been released. This means that control chunks are no longer accounted for in wmem_alloc which I believe is not a problem as skb->truesize will typically lead to overcharging anyway and thus compensates for any control overhead. Signed-off-by: Thomas Graf <[email protected]> CC: Vlad Yasevich <[email protected]> CC: Neil Horman <[email protected]> CC: David Miller <[email protected]> Acked-by: Vlad Yasevich <[email protected]> Signed-off-by: David S. Miller <[email protected]> --- net/sctp/output.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/net/sctp/output.c b/net/sctp/output.c index 8fc4dcd..32ba8d0 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -334,6 +334,25 @@ finish: return retval; } +static void sctp_packet_release_owner(struct sk_buff *skb) +{ + sk_free(skb->sk); +} + +static void sctp_packet_set_owner_w(struct sk_buff *skb, struct sock *sk) +{ + skb_orphan(skb); + skb->sk = sk; + skb->destructor = sctp_packet_release_owner; + + /* + * The data chunks have already been accounted for in sctp_sendmsg(), + * therefore only reserve a single byte to keep socket around until + * the packet has been transmitted. + */ + atomic_inc(&sk->sk_wmem_alloc); +} + /* All packets are sent to the network through this function from * sctp_outq_tail(). * @@ -375,7 +394,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) /* Set the owning socket so that we know where to get the * destination IP address. */ - skb_set_owner_w(nskb, sk); + sctp_packet_set_owner_w(nskb, sk); if (!sctp_transport_dst_check(tp)) { sctp_transport_route(tp, NULL, sctp_sk(sk)); -- 1.7.11.4 >From f9612f65dae1e7391357329b6d86dbe1db7ec615 Mon Sep 17 00:00:00 2001 From: Xiaodong Xu <[email protected]> Date: Sat, 22 Sep 2012 00:09:32 +0000 Subject: [PATCH 21/31] pppoe: drop PPPOX_ZOMBIEs in pppoe_release [ Upstream commit 2b018d57ff18e5405823e5cb59651a5b4d946d7b ] When PPPOE is running over a virtual ethernet interface (e.g., a bonding interface) and the user tries to delete the interface in case the PPPOE state is ZOMBIE, the kernel will loop forever while unregistering net_device for the reference count is not decreased to zero which should have been done with dev_put(). Signed-off-by: Xiaodong Xu <[email protected]> Signed-off-by: David S. Miller <[email protected]> --- drivers/net/pppoe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c index bc9a4bb..1161584 100644 --- a/drivers/net/pppoe.c +++ b/drivers/net/pppoe.c @@ -576,7 +576,7 @@ static int pppoe_release(struct socket *sock) po = pppox_sk(sk); - if (sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) { + if (sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND | PPPOX_ZOMBIE)) { dev_put(po->pppoe_dev); po->pppoe_dev = NULL; } -- 1.7.11.4 >From 821b1989e0f88d0dcd86e83834f10c914d830b4d Mon Sep 17 00:00:00 2001 From: Chema Gonzalez <[email protected]> Date: Fri, 7 Sep 2012 13:40:50 +0000 Subject: [PATCH 22/31] net: small bug on rxhash calculation [ Upstream commit 6862234238e84648c305526af2edd98badcad1e0 ] In the current rxhash calculation function, while the sorting of the ports/addrs is coherent (you get the same rxhash for packets sharing the same 4-tuple, in both directions), ports and addrs are sorted independently. This implies packets from a connection between the same addresses but crossed ports hash to the same rxhash. For example, traffic between A=S:l and B=L:s is hashed (in both directions) from {L, S, {s, l}}. The same rxhash is obtained for packets between C=S:s and D=L:l. This patch ensures that you either swap both addrs and ports, or you swap none. Traffic between A and B, and traffic between C and D, get their rxhash from different sources ({L, S, {l, s}} for A<->B, and {L, S, {s, l}} for C<->D) The patch is co-written with Eric Dumazet <[email protected]> Signed-off-by: Chema Gonzalez <[email protected]> Signed-off-by: Eric Dumazet <[email protected]> Signed-off-by: David S. Miller <[email protected]> --- net/core/dev.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index d8bc889..df6ee28 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2559,16 +2559,17 @@ __u32 __skb_get_rxhash(struct sk_buff *skb) poff = proto_ports_offset(ip_proto); if (poff >= 0) { nhoff += ihl * 4 + poff; - if (pskb_may_pull(skb, nhoff + 4)) { + if (pskb_may_pull(skb, nhoff + 4)) ports.v32 = * (__force u32 *) (skb->data + nhoff); - if (ports.v16[1] < ports.v16[0]) - swap(ports.v16[0], ports.v16[1]); - } } /* get a consistent hash (same value on both flow directions) */ - if (addr2 < addr1) + if (addr2 < addr1 || + (addr2 == addr1 && + ports.v16[1] < ports.v16[0])) { swap(addr1, addr2); + swap(ports.v16[0], ports.v16[1]); + } hash = jhash_3words(addr1, addr2, ports.v32, hashrnd); if (!hash) -- 1.7.11.4 >From 57a58bcf994a28a9a5cdeffbc14628b63a7ee745 Mon Sep 17 00:00:00 2001 From: Eric Dumazet <[email protected]> Date: Mon, 24 Sep 2012 07:00:11 +0000 Subject: [PATCH 23/31] net: guard tcp_set_keepalive() to tcp sockets [ Upstream commit 3e10986d1d698140747fcfc2761ec9cb64c1d582 ] Its possible to use RAW sockets to get a crash in tcp_set_keepalive() / sk_reset_timer() Fix is to make sure socket is a SOCK_STREAM one. Reported-by: Dave Jones <[email protected]> Signed-off-by: Eric Dumazet <[email protected]> Signed-off-by: David S. Miller <[email protected]> --- net/core/sock.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/sock.c b/net/core/sock.c index 56623ad..3da11ba 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -594,7 +594,8 @@ set_rcvbuf: case SO_KEEPALIVE: #ifdef CONFIG_INET - if (sk->sk_protocol == IPPROTO_TCP) + if (sk->sk_protocol == IPPROTO_TCP && + sk->sk_type == SOCK_STREAM) tcp_set_keepalive(sk, valbool); #endif sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool); -- 1.7.11.4 >From fbc1e35d3cbcc617aa0230b2a1f3c92b01c10226 Mon Sep 17 00:00:00 2001 From: Eric Dumazet <[email protected]> Date: Sat, 22 Sep 2012 00:08:29 +0000 Subject: [PATCH 24/31] ipv4: raw: fix icmp_filter() [ Upstream commit ab43ed8b7490cb387782423ecf74aeee7237e591 ] icmp_filter() should not modify its input, or else its caller would need to recompute ip_hdr() if skb->head is reallocated. Use skb_header_pointer() instead of pskb_may_pull() and change the prototype to make clear both sk and skb are const. Signed-off-by: Eric Dumazet <[email protected]> Signed-off-by: David S. Miller <[email protected]> --- net/ipv4/raw.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index c9893d4..3d8bb18 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -130,18 +130,20 @@ found: * 0 - deliver * 1 - block */ -static __inline__ int icmp_filter(struct sock *sk, struct sk_buff *skb) +static int icmp_filter(const struct sock *sk, const struct sk_buff *skb) { - int type; + struct icmphdr _hdr; + const struct icmphdr *hdr; - if (!pskb_may_pull(skb, sizeof(struct icmphdr))) + hdr = skb_header_pointer(skb, skb_transport_offset(skb), + sizeof(_hdr), &_hdr); + if (!hdr) return 1; - type = icmp_hdr(skb)->type; - if (type < 32) { + if (hdr->type < 32) { __u32 data = raw_sk(sk)->filter.data; - return ((1 << type) & data) != 0; + return ((1U << hdr->type) & data) != 0; } /* Do not block unknown ICMP types */ -- 1.7.11.4 >From 2165251a96ecdb893a155e9aa6cbcf8d011bad66 Mon Sep 17 00:00:00 2001 From: Eric Dumazet <[email protected]> Date: Tue, 25 Sep 2012 07:03:40 +0000 Subject: [PATCH 25/31] ipv6: raw: fix icmpv6_filter() [ Upstream commit 1b05c4b50edbddbdde715c4a7350629819f6655e ] icmpv6_filter() should not modify its input, or else its caller would need to recompute ipv6_hdr() if skb->head is reallocated. Use skb_header_pointer() instead of pskb_may_pull() and change the prototype to make clear both sk and skb are const. Also, if icmpv6 header cannot be found, do not deliver the packet, as we do in IPv4. Signed-off-by: Eric Dumazet <[email protected]> Signed-off-by: David S. Miller <[email protected]> --- net/ipv6/raw.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index cc7313b..fb812a6 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -106,21 +106,20 @@ found: * 0 - deliver * 1 - block */ -static __inline__ int icmpv6_filter(struct sock *sk, struct sk_buff *skb) +static int icmpv6_filter(const struct sock *sk, const struct sk_buff *skb) { - struct icmp6hdr *icmph; - struct raw6_sock *rp = raw6_sk(sk); - - if (pskb_may_pull(skb, sizeof(struct icmp6hdr))) { - __u32 *data = &rp->filter.data[0]; - int bit_nr; + struct icmp6hdr *_hdr; + const struct icmp6hdr *hdr; - icmph = (struct icmp6hdr *) skb->data; - bit_nr = icmph->icmp6_type; + hdr = skb_header_pointer(skb, skb_transport_offset(skb), + sizeof(_hdr), &_hdr); + if (hdr) { + const __u32 *data = &raw6_sk(sk)->filter.data[0]; + unsigned int type = hdr->icmp6_type; - return (data[bit_nr >> 5] & (1 << (bit_nr & 31))) != 0; + return (data[type >> 5] & (1U << (type & 31))) != 0; } - return 0; + return 1; } #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) -- 1.7.11.4 >From 5eb098563ad94d3cfeb980a9cf2967b996772e41 Mon Sep 17 00:00:00 2001 From: Eric Dumazet <[email protected]> Date: Tue, 25 Sep 2012 22:01:28 +0200 Subject: [PATCH 26/31] ipv6: mip6: fix mip6_mh_filter() [ Upstream commit 96af69ea2a83d292238bdba20e4508ee967cf8cb ] mip6_mh_filter() should not modify its input, or else its caller would need to recompute ipv6_hdr() if skb->head is reallocated. Use skb_header_pointer() instead of pskb_may_pull() Signed-off-by: Eric Dumazet <[email protected]> Signed-off-by: David S. Miller <[email protected]> --- net/ipv6/mip6.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index 43242e6..42853c4 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -84,28 +84,30 @@ static int mip6_mh_len(int type) static int mip6_mh_filter(struct sock *sk, struct sk_buff *skb) { - struct ip6_mh *mh; + struct ip6_mh _hdr; + const struct ip6_mh *mh; - if (!pskb_may_pull(skb, (skb_transport_offset(skb)) + 8) || - !pskb_may_pull(skb, (skb_transport_offset(skb) + - ((skb_transport_header(skb)[1] + 1) << 3)))) + mh = skb_header_pointer(skb, skb_transport_offset(skb), + sizeof(_hdr), &_hdr); + if (!mh) return -1; - mh = (struct ip6_mh *)skb_transport_header(skb); + if (((mh->ip6mh_hdrlen + 1) << 3) > skb->len) + return -1; if (mh->ip6mh_hdrlen < mip6_mh_len(mh->ip6mh_type)) { LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH message too short: %d vs >=%d\n", mh->ip6mh_hdrlen, mip6_mh_len(mh->ip6mh_type)); - mip6_param_prob(skb, 0, ((&mh->ip6mh_hdrlen) - - skb_network_header(skb))); + mip6_param_prob(skb, 0, offsetof(struct ip6_mh, ip6mh_hdrlen) + + skb_network_header_len(skb)); return -1; } if (mh->ip6mh_proto != IPPROTO_NONE) { LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH invalid payload proto = %d\n", mh->ip6mh_proto); - mip6_param_prob(skb, 0, ((&mh->ip6mh_proto) - - skb_network_header(skb))); + mip6_param_prob(skb, 0, offsetof(struct ip6_mh, ip6mh_proto) + + skb_network_header_len(skb)); return -1; } -- 1.7.11.4 >From eeb21572bc3366a7775f74ec83365ea4d09d93cd Mon Sep 17 00:00:00 2001 From: Eric Dumazet <[email protected]> Date: Tue, 4 Sep 2012 15:54:55 -0400 Subject: [PATCH 27/31] l2tp: fix a typo in l2tp_eth_dev_recv() [ Upstream commit c0cc88a7627c333de50b07b7c60b1d49d9d2e6cc ] While investigating l2tp bug, I hit a bug in eth_type_trans(), because not enough bytes were pulled in skb head. Signed-off-by: Eric Dumazet <[email protected]> Signed-off-by: David S. Miller <[email protected]> --- net/l2tp/l2tp_eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index 3c55f63..2cef50b 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -132,7 +132,7 @@ static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb, printk("\n"); } - if (!pskb_may_pull(skb, sizeof(ETH_HLEN))) + if (!pskb_may_pull(skb, ETH_HLEN)) goto error; secpath_reset(skb); -- 1.7.11.4 >From c146f2c3dec9577daac2bbead6bc85a2b28eab8b Mon Sep 17 00:00:00 2001 From: Alan Cox <[email protected]> Date: Tue, 4 Sep 2012 04:13:18 +0000 Subject: [PATCH 28/31] netrom: copy_datagram_iovec can fail [ Upstream commit 6cf5c951175abcec4da470c50565cc0afe6cd11d ] Check for an error from this and if so bail properly. Signed-off-by: Alan Cox <[email protected]> Signed-off-by: David S. Miller <[email protected]> --- net/netrom/af_netrom.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 732152f..f156382 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1170,7 +1170,12 @@ static int nr_recvmsg(struct kiocb *iocb, struct socket *sock, msg->msg_flags |= MSG_TRUNC; } - skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); + er = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); + if (er < 0) { + skb_free_datagram(sk, skb); + release_sock(sk); + return er; + } if (sax != NULL) { sax->sax25_family = AF_NETROM; -- 1.7.11.4 >From 581e7922078a0b23b87d38e234a2cac7eebc3878 Mon Sep 17 00:00:00 2001 From: Ed Cashin <[email protected]> Date: Wed, 19 Sep 2012 15:49:00 +0000 Subject: [PATCH 29/31] net: do not disable sg for packets requiring no checksum [ Upstream commit c0d680e577ff171e7b37dbdb1b1bf5451e851f04 ] A change in a series of VLAN-related changes appears to have inadvertently disabled the use of the scatter gather feature of network cards for transmission of non-IP ethernet protocols like ATA over Ethernet (AoE). Below is a reference to the commit that introduces a "harmonize_features" function that turns off scatter gather when the NIC does not support hardware checksumming for the ethernet protocol of an sk buff. commit f01a5236bd4b140198fbcc550f085e8361fd73fa Author: Jesse Gross <[email protected]> Date: Sun Jan 9 06:23:31 2011 +0000 net offloading: Generalize netif_get_vlan_features(). The can_checksum_protocol function is not equipped to consider a protocol that does not require checksumming. Calling it for a protocol that requires no checksum is inappropriate. The patch below has harmonize_features call can_checksum_protocol when the protocol needs a checksum, so that the network layer is not forced to perform unnecessary skb linearization on the transmission of AoE packets. Unnecessary linearization results in decreased performance and increased memory pressure, as reported here: http://www.spinics.net/lists/linux-mm/msg15184.html The problem has probably not been widely experienced yet, because only recently has the kernel.org-distributed aoe driver acquired the ability to use payloads of over a page in size, with the patchset recently included in the mm tree: https://lkml.org/lkml/2012/8/28/140 The coraid.com-distributed aoe driver already could use payloads of greater than a page in size, but its users generally do not use the newest kernels. Signed-off-by: Ed Cashin <[email protected]> Signed-off-by: David S. Miller <[email protected]> --- net/core/dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index df6ee28..5b84eaf 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2038,7 +2038,8 @@ static bool can_checksum_protocol(unsigned long features, __be16 protocol) static u32 harmonize_features(struct sk_buff *skb, __be16 protocol, u32 features) { - if (!can_checksum_protocol(features, protocol)) { + if (skb->ip_summed != CHECKSUM_NONE && + !can_checksum_protocol(features, protocol)) { features &= ~NETIF_F_ALL_CSUM; features &= ~NETIF_F_SG; } else if (illegal_highdma(skb->dev, skb)) { -- 1.7.11.4 >From 983751e7e6bf9cb3af43ac7ae5c4b725567e73f2 Mon Sep 17 00:00:00 2001 From: Ed Cashin <[email protected]> Date: Wed, 19 Sep 2012 15:46:39 +0000 Subject: [PATCH 30/31] aoe: assert AoE packets marked as requiring no checksum [ Upstream commit 8babe8cc6570ed896b7b596337eb8fe730c3ff45 ] In order for the network layer to see that AoE requires no checksumming in a generic way, the packets must be marked as requiring no checksum, so we make this requirement explicit with the assertion. Signed-off-by: Ed Cashin <[email protected]> Signed-off-by: David S. Miller <[email protected]> --- drivers/block/aoe/aoecmd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index de0435e..887f68f 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -35,6 +35,7 @@ new_skb(ulong len) skb_reset_mac_header(skb); skb_reset_network_header(skb); skb->protocol = __constant_htons(ETH_P_AOE); + skb_checksum_none_assert(skb); } return skb; } -- 1.7.11.4 >From e13f9bb51ccf1d3673dafe69f30e5a9de751476a Mon Sep 17 00:00:00 2001 From: Matt Carlson <[email protected]> Date: Mon, 28 Nov 2011 09:41:03 +0000 Subject: [PATCH 31/31] tg3: Fix TSO CAP for 5704 devs w / ASF enabled [ Upstream commit cf9ecf4b631f649a964fa611f1a5e8874f2a76db ] On the earliest TSO capable devices, TSO was accomplished through firmware. The TSO cannot coexist with ASF management firmware though. The tg3 driver determines whether or not ASF is enabled by calling tg3_get_eeprom_hw_cfg(), which checks a particular bit of NIC memory. Commit dabc5c670d3f86d15ee4f42ab38ec5bd2682487d, entitled "tg3: Move TSO_CAPABLE assignment", accidentally moved the code that determines TSO capabilities earlier than the call to tg3_get_eeprom_hw_cfg(). As a consequence, the driver was attempting to determine TSO capabilities before it had all the data it needed to make the decision. This patch fixes the problem by revisiting and reevaluating the decision after tg3_get_eeprom_hw_cfg() is called. Signed-off-by: Matt Carlson <[email protected]> Signed-off-by: Michael Chan <[email protected]> Signed-off-by: David S. Miller <[email protected]> --- drivers/net/tg3.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index c4ab8a7..85931ca 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -13647,9 +13647,13 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) if (tg3_flag(tp, HW_TSO_1) || tg3_flag(tp, HW_TSO_2) || tg3_flag(tp, HW_TSO_3) || - (tp->fw_needed && !tg3_flag(tp, ENABLE_ASF))) + tp->fw_needed) { + /* For firmware TSO, assume ASF is disabled. + * We'll disable TSO later if we discover ASF + * is enabled in tg3_get_eeprom_hw_cfg(). + */ tg3_flag_set(tp, TSO_CAPABLE); - else { + } else { tg3_flag_clear(tp, TSO_CAPABLE); tg3_flag_clear(tp, TSO_BUG); tp->fw_needed = NULL; @@ -13887,6 +13891,12 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) */ tg3_get_eeprom_hw_cfg(tp); + if (tp->fw_needed && tg3_flag(tp, ENABLE_ASF)) { + tg3_flag_clear(tp, TSO_CAPABLE); + tg3_flag_clear(tp, TSO_BUG); + tp->fw_needed = NULL; + } + if (tg3_flag(tp, ENABLE_APE)) { /* Allow reads and writes to the * APE register and memory space. -- 1.7.11.4
