Please add the following networking fixes to 2.6.39-stable

In particular the inet_diag fix is serious as the problem allows any
user to hang a box.

Thanks!

>From baa589a82de63861d8b8d18814f24deed30502ae Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <[email protected]>
Date: Tue, 24 May 2011 10:20:17 +0200
Subject: [PATCH 01/16] netfilter: ipset: Use proper timeout value to jiffies
 conversion

[ Upstream commit 249ddc79a38a8918ad53ac22606ca8af694344a5 ]

Signed-off-by: Jozsef Kadlecsik <[email protected]>
Signed-off-by: Pablo Neira Ayuso <[email protected]>
---
 include/linux/netfilter/ipset/ip_set_timeout.h |   18 ++++++++++--------
 1 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/include/linux/netfilter/ipset/ip_set_timeout.h 
b/include/linux/netfilter/ipset/ip_set_timeout.h
index 9f30c5f..bcdd40a 100644
--- a/include/linux/netfilter/ipset/ip_set_timeout.h
+++ b/include/linux/netfilter/ipset/ip_set_timeout.h
@@ -45,7 +45,7 @@ ip_set_timeout_test(unsigned long timeout)
 {
        return timeout != IPSET_ELEM_UNSET &&
               (timeout == IPSET_ELEM_PERMANENT ||
-               time_after(timeout, jiffies));
+               time_is_after_jiffies(timeout));
 }
 
 static inline bool
@@ -53,7 +53,7 @@ ip_set_timeout_expired(unsigned long timeout)
 {
        return timeout != IPSET_ELEM_UNSET &&
               timeout != IPSET_ELEM_PERMANENT &&
-              time_before(timeout, jiffies);
+              time_is_before_jiffies(timeout);
 }
 
 static inline unsigned long
@@ -64,7 +64,7 @@ ip_set_timeout_set(u32 timeout)
        if (!timeout)
                return IPSET_ELEM_PERMANENT;
 
-       t = timeout * HZ + jiffies;
+       t = msecs_to_jiffies(timeout * 1000) + jiffies;
        if (t == IPSET_ELEM_UNSET || t == IPSET_ELEM_PERMANENT)
                /* Bingo! */
                t++;
@@ -75,7 +75,8 @@ ip_set_timeout_set(u32 timeout)
 static inline u32
 ip_set_timeout_get(unsigned long timeout)
 {
-       return timeout == IPSET_ELEM_PERMANENT ? 0 : (timeout - jiffies)/HZ;
+       return timeout == IPSET_ELEM_PERMANENT ? 0 : 
+               jiffies_to_msecs(timeout - jiffies)/1000;
 }
 
 #else
@@ -89,14 +90,14 @@ static inline bool
 ip_set_timeout_test(unsigned long timeout)
 {
        return timeout == IPSET_ELEM_PERMANENT ||
-              time_after(timeout, jiffies);
+              time_is_after_jiffies(timeout);
 }
 
 static inline bool
 ip_set_timeout_expired(unsigned long timeout)
 {
        return timeout != IPSET_ELEM_PERMANENT &&
-              time_before(timeout, jiffies);
+              time_is_before_jiffies(timeout);
 }
 
 static inline unsigned long
@@ -107,7 +108,7 @@ ip_set_timeout_set(u32 timeout)
        if (!timeout)
                return IPSET_ELEM_PERMANENT;
 
-       t = timeout * HZ + jiffies;
+       t = msecs_to_jiffies(timeout * 1000) + jiffies;
        if (t == IPSET_ELEM_PERMANENT)
                /* Bingo! :-) */
                t++;
@@ -118,7 +119,8 @@ ip_set_timeout_set(u32 timeout)
 static inline u32
 ip_set_timeout_get(unsigned long timeout)
 {
-       return timeout == IPSET_ELEM_PERMANENT ? 0 : (timeout - jiffies)/HZ;
+       return timeout == IPSET_ELEM_PERMANENT ? 0 :
+               jiffies_to_msecs(timeout - jiffies)/1000;
 }
 #endif /* ! IP_SET_BITMAP_TIMEOUT */
 
-- 
1.7.5.4


>From 30357d1cec07320f46627db61889f885f3688458 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= <[email protected]>
Date: Thu, 26 May 2011 00:42:57 +0000
Subject: [PATCH 02/16] net: fix ETHTOOL_SFEATURES compatibility with old
 ethtool_ops.set_flags
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit fd0daf9d58f6b3342d07c5f6bbfb304dbe5db4ec ]

Current code squashes flags to bool - this makes set_flags fail whenever
some ETH_FLAG_* equivalent features are set. Fix this.

Signed-off-by: Michał Mirosław <[email protected]>
Signed-off-by: David S. Miller <[email protected]>
---
 net/core/ethtool.c |   25 ++++++++++++++++++++++++-
 1 files changed, 24 insertions(+), 1 deletions(-)

diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index f337525..76ed645 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -231,6 +231,29 @@ static int ethtool_set_feature_compat(struct net_device 
*dev,
        return 1;
 }
 
+static int ethtool_set_flags_compat(struct net_device *dev,
+       int (*legacy_set)(struct net_device *, u32),
+       struct ethtool_set_features_block *features, u32 mask)
+{
+       u32 value;
+
+       if (!legacy_set)
+               return 0;
+
+       if (!(features[0].valid & mask))
+               return 0;
+
+       value = dev->features & ~features[0].valid;
+       value |= features[0].requested;
+
+       features[0].valid &= ~mask;
+
+       if (legacy_set(dev, value & mask) < 0)
+               netdev_info(dev, "Legacy flags change failed\n");
+
+       return 1;
+}
+
 static int ethtool_set_features_compat(struct net_device *dev,
        struct ethtool_set_features_block *features)
 {
@@ -247,7 +270,7 @@ static int ethtool_set_features_compat(struct net_device 
*dev,
                features, NETIF_F_ALL_TSO);
        compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_rx_csum,
                features, NETIF_F_RXCSUM);
-       compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_flags,
+       compat |= ethtool_set_flags_compat(dev, dev->ethtool_ops->set_flags,
                features, flags_dup_features);
 
        return compat;
-- 
1.7.5.4


>From 73b6eb4e70dc6af2aacd14ea2995d666f75d9911 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <[email protected]>
Date: Tue, 24 May 2011 10:20:18 +0200
Subject: [PATCH 03/16] netfilter: ipset: remove unused variable from
 type_pf_tdel()

[ Upstream commit b141c242ff978b63cdf0f3d1a767a5152750166b ]

Variable 'ret' is set in type_pf_tdel() but not used, remove.

Signed-off-by: Jozsef Kadlecsik <[email protected]>
Signed-off-by: Pablo Neira Ayuso <[email protected]>
---
 include/linux/netfilter/ipset/ip_set_ahash.h |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/netfilter/ipset/ip_set_ahash.h 
b/include/linux/netfilter/ipset/ip_set_ahash.h
index a0196ac..ac3c822 100644
--- a/include/linux/netfilter/ipset/ip_set_ahash.h
+++ b/include/linux/netfilter/ipset/ip_set_ahash.h
@@ -839,7 +839,7 @@ type_pf_tdel(struct ip_set *set, void *value, u32 timeout)
        struct htable *t = h->table;
        const struct type_pf_elem *d = value;
        struct hbucket *n;
-       int i, ret = 0;
+       int i;
        struct type_pf_elem *data;
        u32 key;
 
@@ -850,7 +850,7 @@ type_pf_tdel(struct ip_set *set, void *value, u32 timeout)
                if (!type_pf_data_equal(data, d))
                        continue;
                if (type_pf_data_expired(data))
-                       ret = -IPSET_ERR_EXIST;
+                       return -IPSET_ERR_EXIST;
                if (i != n->pos - 1)
                        /* Not last one */
                        type_pf_data_copy(data, ahash_tdata(n, n->pos - 1));
-- 
1.7.5.4


>From 0266ba0fb9d39bacdc9ef53f817b5c1f0bb24fdd Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <[email protected]>
Date: Tue, 24 May 2011 10:20:19 +0200
Subject: [PATCH 04/16] netfilter: ipset: fix ip_set_flush return code

[ Upstream commit 9184a9cba622d9e38462ce11dff7da094b4fea84 ]

ip_set_flush returned -EPROTO instead of -IPSET_ERR_PROTOCOL, fixed

Signed-off-by: Jozsef Kadlecsik <[email protected]>
Signed-off-by: Pablo Neira Ayuso <[email protected]>
---
 net/netfilter/ipset/ip_set_core.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/net/netfilter/ipset/ip_set_core.c 
b/net/netfilter/ipset/ip_set_core.c
index 72d1ac6..8041bef 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -815,7 +815,7 @@ ip_set_flush(struct sock *ctnl, struct sk_buff *skb,
        ip_set_id_t i;
 
        if (unlikely(protocol_failed(attr)))
-               return -EPROTO;
+               return -IPSET_ERR_PROTOCOL;
 
        if (!attr[IPSET_ATTR_SETNAME]) {
                for (i = 0; i < ip_set_max; i++)
-- 
1.7.5.4


>From ac8da642f31961c5dbc7b6cf4053fc9c78720b05 Mon Sep 17 00:00:00 2001
From: Joe Perches <[email protected]>
Date: Sat, 21 May 2011 07:48:39 +0000
Subject: [PATCH 05/16] bug.h: Add WARN_RATELIMIT

[ Upstream commit b3eec79b0776e5340a3db75b34953977c7e5086e ]

Add a generic mechanism to ratelimit WARN(foo, fmt, ...) messages
using a hidden per call site static struct ratelimit_state.

Also add an __WARN_RATELIMIT variant to be able to use a specific
struct ratelimit_state.

Signed-off-by: Joe Perches <[email protected]>
Signed-off-by: David S. Miller <[email protected]>
---
 include/asm-generic/bug.h |   16 ++++++++++++++++
 1 files changed, 16 insertions(+), 0 deletions(-)

diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index e5a3f58..12b250c 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -165,6 +165,22 @@ extern void warn_slowpath_null(const char *file, const int 
line);
 #define WARN_ON_RATELIMIT(condition, state)                    \
                WARN_ON((condition) && __ratelimit(state))
 
+#define __WARN_RATELIMIT(condition, state, format...)          \
+({                                                             \
+       int rtn = 0;                                            \
+       if (unlikely(__ratelimit(state)))                       \
+               rtn = WARN(condition, format);                  \
+       rtn;                                                    \
+})
+
+#define WARN_RATELIMIT(condition, format...)                   \
+({                                                             \
+       static DEFINE_RATELIMIT_STATE(_rs,                      \
+                                     DEFAULT_RATELIMIT_INTERVAL,       \
+                                     DEFAULT_RATELIMIT_BURST); \
+       __WARN_RATELIMIT(condition, &_rs, format);              \
+})
+
 /*
  * WARN_ON_SMP() is for cases that the warning is either
  * meaningless for !SMP or may even cause failures.
-- 
1.7.5.4


>From a5d102e51f39a86721569072135e801e0afee3a2 Mon Sep 17 00:00:00 2001
From: Joe Perches <[email protected]>
Date: Sat, 21 May 2011 07:48:40 +0000
Subject: [PATCH 06/16] net: filter: Use WARN_RATELIMIT

[ Upstream commit 6c4a5cb219520c7bc937ee186ca53f03733bd09f ]

A mis-configured filter can spam the logs with lots of stack traces.

Rate-limit the warnings and add printout of the bogus filter information.

Original-patch-by: Ben Greear <[email protected]>
Signed-off-by: Joe Perches <[email protected]>
Signed-off-by: David S. Miller <[email protected]>
---
 net/core/filter.c |    4 +++-
 1 files changed, 3 insertions(+), 1 deletions(-)

diff --git a/net/core/filter.c b/net/core/filter.c
index afb8afb..c788b37 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -409,7 +409,9 @@ load_b:
                        continue;
                }
                default:
-                       WARN_ON(1);
+                       WARN_RATELIMIT(1, "Unknown code:%u jt:%u tf:%u k:%u\n",
+                                      fentry->code, fentry->jt,
+                                      fentry->jf, fentry->k);
                        return 0;
                }
        }
-- 
1.7.5.4


>From f6f81b9cf25027b0a8f25a7107df748bdaa29523 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <[email protected]>
Date: Tue, 24 May 2011 16:15:41 -0400
Subject: [PATCH 07/16] bug.h: Fix build with CONFIG_PRINTK disabled.

[ Upstream commit 6b3678354647a653e669746c05765f05d2b90239 ]

Based upon an email by Joe Perches.

Reported-by: Randy Dunlap <[email protected]>
Signed-off-by: David S. Miller <[email protected]>
Acked-by: Randy Dunlap <[email protected]>
---
 include/asm-generic/bug.h |   21 +++++++++++++++++++++
 1 files changed, 21 insertions(+), 0 deletions(-)

diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index 12b250c..9178484 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -162,6 +162,8 @@ extern void warn_slowpath_null(const char *file, const int 
line);
        unlikely(__ret_warn_once);                              \
 })
 
+#ifdef CONFIG_PRINTK
+
 #define WARN_ON_RATELIMIT(condition, state)                    \
                WARN_ON((condition) && __ratelimit(state))
 
@@ -181,6 +183,25 @@ extern void warn_slowpath_null(const char *file, const int 
line);
        __WARN_RATELIMIT(condition, &_rs, format);              \
 })
 
+#else
+
+#define WARN_ON_RATELIMIT(condition, state)                    \
+       WARN_ON(condition)
+
+#define __WARN_RATELIMIT(condition, state, format...)          \
+({                                                             \
+       int rtn = WARN(condition, format);                      \
+       rtn;                                                    \
+})
+
+#define WARN_RATELIMIT(condition, format...)                   \
+({                                                             \
+       int rtn = WARN(condition, format);                      \
+       rtn;                                                    \
+})
+
+#endif
+
 /*
  * WARN_ON_SMP() is for cases that the warning is either
  * meaningless for !SMP or may even cause failures.
-- 
1.7.5.4


>From d8a772dfa635993b44829c502074c79b3bc405f9 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <[email protected]>
Date: Thu, 26 May 2011 15:00:31 -0400
Subject: [PATCH 08/16] bug.h: Move ratelimit warn interfaces to ratelimit.h

[ Upstream commit 86e4ca66e81bba0f8640f1fa19b8b8f72cbd0561 ]

As reported by Ingo Molnar, we still have configuration combinations
where use of the WARN_RATELIMIT interfaces break the build because
dependencies don't get met.

Instead of going down the long road of trying to make it so that
ratelimit.h can get included by kernel.h or asm-generic/bug.h,
just move the interface into ratelimit.h and make users have
to include that.

Reported-by: Ingo Molnar <[email protected]>
Signed-off-by: David S. Miller <[email protected]>
Acked-by: Randy Dunlap <[email protected]>
---
 include/asm-generic/bug.h |   40 ----------------------------------------
 include/linux/ratelimit.h |   40 ++++++++++++++++++++++++++++++++++++++++
 net/core/filter.c         |    1 +
 3 files changed, 41 insertions(+), 40 deletions(-)

diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index 9178484..dfb0ec6 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -162,46 +162,6 @@ extern void warn_slowpath_null(const char *file, const int 
line);
        unlikely(__ret_warn_once);                              \
 })
 
-#ifdef CONFIG_PRINTK
-
-#define WARN_ON_RATELIMIT(condition, state)                    \
-               WARN_ON((condition) && __ratelimit(state))
-
-#define __WARN_RATELIMIT(condition, state, format...)          \
-({                                                             \
-       int rtn = 0;                                            \
-       if (unlikely(__ratelimit(state)))                       \
-               rtn = WARN(condition, format);                  \
-       rtn;                                                    \
-})
-
-#define WARN_RATELIMIT(condition, format...)                   \
-({                                                             \
-       static DEFINE_RATELIMIT_STATE(_rs,                      \
-                                     DEFAULT_RATELIMIT_INTERVAL,       \
-                                     DEFAULT_RATELIMIT_BURST); \
-       __WARN_RATELIMIT(condition, &_rs, format);              \
-})
-
-#else
-
-#define WARN_ON_RATELIMIT(condition, state)                    \
-       WARN_ON(condition)
-
-#define __WARN_RATELIMIT(condition, state, format...)          \
-({                                                             \
-       int rtn = WARN(condition, format);                      \
-       rtn;                                                    \
-})
-
-#define WARN_RATELIMIT(condition, format...)                   \
-({                                                             \
-       int rtn = WARN(condition, format);                      \
-       rtn;                                                    \
-})
-
-#endif
-
 /*
  * WARN_ON_SMP() is for cases that the warning is either
  * meaningless for !SMP or may even cause failures.
diff --git a/include/linux/ratelimit.h b/include/linux/ratelimit.h
index 03ff67b..2f00715 100644
--- a/include/linux/ratelimit.h
+++ b/include/linux/ratelimit.h
@@ -41,4 +41,44 @@ extern struct ratelimit_state printk_ratelimit_state;
 extern int ___ratelimit(struct ratelimit_state *rs, const char *func);
 #define __ratelimit(state) ___ratelimit(state, __func__)
 
+#ifdef CONFIG_PRINTK
+
+#define WARN_ON_RATELIMIT(condition, state)                    \
+               WARN_ON((condition) && __ratelimit(state))
+
+#define __WARN_RATELIMIT(condition, state, format...)          \
+({                                                             \
+       int rtn = 0;                                            \
+       if (unlikely(__ratelimit(state)))                       \
+               rtn = WARN(condition, format);                  \
+       rtn;                                                    \
+})
+
+#define WARN_RATELIMIT(condition, format...)                   \
+({                                                             \
+       static DEFINE_RATELIMIT_STATE(_rs,                      \
+                                     DEFAULT_RATELIMIT_INTERVAL,       \
+                                     DEFAULT_RATELIMIT_BURST); \
+       __WARN_RATELIMIT(condition, &_rs, format);              \
+})
+
+#else
+
+#define WARN_ON_RATELIMIT(condition, state)                    \
+       WARN_ON(condition)
+
+#define __WARN_RATELIMIT(condition, state, format...)          \
+({                                                             \
+       int rtn = WARN(condition, format);                      \
+       rtn;                                                    \
+})
+
+#define WARN_RATELIMIT(condition, format...)                   \
+({                                                             \
+       int rtn = WARN(condition, format);                      \
+       rtn;                                                    \
+})
+
+#endif
+
 #endif /* _LINUX_RATELIMIT_H */
diff --git a/net/core/filter.c b/net/core/filter.c
index c788b37..1238cbd 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -38,6 +38,7 @@
 #include <asm/unaligned.h>
 #include <linux/filter.h>
 #include <linux/reciprocal_div.h>
+#include <linux/ratelimit.h>
 
 enum {
        BPF_S_RET_K = 1,
-- 
1.7.5.4


>From 85e5741d01dc977f88b1ee286273893a1e364668 Mon Sep 17 00:00:00 2001
From: Hans Schillstrom <[email protected]>
Date: Tue, 24 May 2011 14:11:05 +0200
Subject: [PATCH 09/16] IPVS: bug in ip_vs_ftp, same list heaad used in all
 netns.

[ Upstream commit c74c0bfe0b61cf41a897c2444c038e0d3f600556 ]

When ip_vs was adapted to netns the ftp application was not adapted
in a correct way.
However this is a fix to avoid kernel errors. In the long term another solution
might be chosen.  I.e the ports that the ftp appl, uses should be per netns.

Signed-off-by: Hans Schillstrom <[email protected]>
Acked-by: Julian Anastasov <[email protected]>
Signed-off-by: Pablo Neira Ayuso <[email protected]>
---
 include/net/ip_vs.h            |    3 ++-
 net/netfilter/ipvs/ip_vs_ftp.c |   27 +++++++++++++++++++--------
 2 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 86aefed..e5bf4c8 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -802,7 +802,8 @@ struct netns_ipvs {
        struct list_head        rs_table[IP_VS_RTAB_SIZE];
        /* ip_vs_app */
        struct list_head        app_list;
-
+       /* ip_vs_ftp */
+       struct ip_vs_app        *ftp_app;
        /* ip_vs_proto */
        #define IP_VS_PROTO_TAB_SIZE    32      /* must be power of 2 */
        struct ip_vs_proto_data *proto_data_table[IP_VS_PROTO_TAB_SIZE];
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 6b5dd6d..af63553 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -411,25 +411,35 @@ static struct ip_vs_app ip_vs_ftp = {
 static int __net_init __ip_vs_ftp_init(struct net *net)
 {
        int i, ret;
-       struct ip_vs_app *app = &ip_vs_ftp;
+       struct ip_vs_app *app;
+       struct netns_ipvs *ipvs = net_ipvs(net);
+
+       app = kmemdup(&ip_vs_ftp, sizeof(struct ip_vs_app), GFP_KERNEL);
+       if (!app)
+               return -ENOMEM;
+       INIT_LIST_HEAD(&app->a_list);
+       INIT_LIST_HEAD(&app->incs_list);
+       ipvs->ftp_app = app;
 
        ret = register_ip_vs_app(net, app);
        if (ret)
-               return ret;
+               goto err_exit;
 
        for (i=0; i<IP_VS_APP_MAX_PORTS; i++) {
                if (!ports[i])
                        continue;
                ret = register_ip_vs_app_inc(net, app, app->protocol, ports[i]);
                if (ret)
-                       break;
+                       goto err_unreg;
                pr_info("%s: loaded support on port[%d] = %d\n",
                        app->name, i, ports[i]);
        }
+       return 0;
 
-       if (ret)
-               unregister_ip_vs_app(net, app);
-
+err_unreg:
+       unregister_ip_vs_app(net, app);
+err_exit:
+       kfree(ipvs->ftp_app);
        return ret;
 }
 /*
@@ -437,9 +447,10 @@ static int __net_init __ip_vs_ftp_init(struct net *net)
  */
 static void __ip_vs_ftp_exit(struct net *net)
 {
-       struct ip_vs_app *app = &ip_vs_ftp;
+       struct netns_ipvs *ipvs = net_ipvs(net);
 
-       unregister_ip_vs_app(net, app);
+       unregister_ip_vs_app(net, ipvs->ftp_app);
+       kfree(ipvs->ftp_app);
 }
 
 static struct pernet_operations ip_vs_ftp_ops = {
-- 
1.7.5.4


>From 452af3cfcca516324a287a0d610c73d987c5d092 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <[email protected]>
Date: Thu, 26 May 2011 17:27:11 +0000
Subject: [PATCH 10/16] inetpeer: fix race in unused_list manipulations

[ Upstream commit 686a7e32ca7fdd819eb9606abd3db52b77d1479f ]

Several crashes in cleanup_once() were reported in recent kernels.

Commit d6cc1d642de9 (inetpeer: various changes) added a race in
unlink_from_unused().

One way to avoid taking unused_peers.lock before doing the list_empty()
test is to catch 0->1 refcnt transitions, using full barrier atomic
operations variants (atomic_cmpxchg() and atomic_inc_return()) instead
of previous atomic_inc() and atomic_add_unless() variants.

We then call unlink_from_unused() only for the owner of the 0->1
transition.

Add a new atomic_add_unless_return() static helper

With help from Arun Sharma.

Refs: https://bugzilla.kernel.org/show_bug.cgi?id=32772

Reported-by: Arun Sharma <[email protected]>
Reported-by: Maximilian Engelhardt <[email protected]>
Reported-by: Yann Dupont <[email protected]>
Reported-by: Denys Fedoryshchenko <[email protected]>
Signed-off-by: Eric Dumazet <[email protected]>
Signed-off-by: David S. Miller <[email protected]>
---
 net/ipv4/inetpeer.c |   42 +++++++++++++++++++++++++++---------------
 1 files changed, 27 insertions(+), 15 deletions(-)

diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 9df4e63..ce616d9 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -154,11 +154,9 @@ void __init inet_initpeers(void)
 /* Called with or without local BH being disabled. */
 static void unlink_from_unused(struct inet_peer *p)
 {
-       if (!list_empty(&p->unused)) {
-               spin_lock_bh(&unused_peers.lock);
-               list_del_init(&p->unused);
-               spin_unlock_bh(&unused_peers.lock);
-       }
+       spin_lock_bh(&unused_peers.lock);
+       list_del_init(&p->unused);
+       spin_unlock_bh(&unused_peers.lock);
 }
 
 static int addr_compare(const struct inetpeer_addr *a,
@@ -205,6 +203,20 @@ static int addr_compare(const struct inetpeer_addr *a,
        u;                                                      \
 })
 
+static bool atomic_add_unless_return(atomic_t *ptr, int a, int u, int *newv)
+{
+       int cur, old = atomic_read(ptr);
+
+       while (old != u) {
+               *newv = old + a;
+               cur = atomic_cmpxchg(ptr, old, *newv);
+               if (cur == old)
+                       return true;
+               old = cur;
+       }
+       return false;
+}
+
 /*
  * Called with rcu_read_lock()
  * Because we hold no lock against a writer, its quite possible we fall
@@ -213,7 +225,8 @@ static int addr_compare(const struct inetpeer_addr *a,
  * We exit from this function if number of links exceeds PEER_MAXDEPTH
  */
 static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr,
-                                   struct inet_peer_base *base)
+                                   struct inet_peer_base *base,
+                                   int *newrefcnt)
 {
        struct inet_peer *u = rcu_dereference(base->root);
        int count = 0;
@@ -226,7 +239,7 @@ static struct inet_peer *lookup_rcu(const struct 
inetpeer_addr *daddr,
                         * distinction between an unused entry (refcnt=0) and
                         * a freed one.
                         */
-                       if (unlikely(!atomic_add_unless(&u->refcnt, 1, -1)))
+                       if (!atomic_add_unless_return(&u->refcnt, 1, -1, 
newrefcnt))
                                u = NULL;
                        return u;
                }
@@ -465,22 +478,23 @@ struct inet_peer *inet_getpeer(struct inetpeer_addr 
*daddr, int create)
        struct inet_peer_base *base = family_to_base(daddr->family);
        struct inet_peer *p;
        unsigned int sequence;
-       int invalidated;
+       int invalidated, newrefcnt = 0;
 
        /* Look up for the address quickly, lockless.
         * Because of a concurrent writer, we might not find an existing entry.
         */
        rcu_read_lock();
        sequence = read_seqbegin(&base->lock);
-       p = lookup_rcu(daddr, base);
+       p = lookup_rcu(daddr, base, &newrefcnt);
        invalidated = read_seqretry(&base->lock, sequence);
        rcu_read_unlock();
 
        if (p) {
-               /* The existing node has been found.
+found:         /* The existing node has been found.
                 * Remove the entry from unused list if it was there.
                 */
-               unlink_from_unused(p);
+               if (newrefcnt == 1)
+                       unlink_from_unused(p);
                return p;
        }
 
@@ -494,11 +508,9 @@ struct inet_peer *inet_getpeer(struct inetpeer_addr 
*daddr, int create)
        write_seqlock_bh(&base->lock);
        p = lookup(daddr, stack, base);
        if (p != peer_avl_empty) {
-               atomic_inc(&p->refcnt);
+               newrefcnt = atomic_inc_return(&p->refcnt);
                write_sequnlock_bh(&base->lock);
-               /* Remove the entry from unused list if it was there. */
-               unlink_from_unused(p);
-               return p;
+               goto found;
        }
        p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL;
        if (p) {
-- 
1.7.5.4


>From f23d9dfddc4185d15e64635174e11f9c1c1e430b Mon Sep 17 00:00:00 2001
From: Alexander Holler <[email protected]>
Date: Tue, 7 Jun 2011 00:51:35 -0700
Subject: [PATCH 11/16] bridge: provide a cow_metrics method for fake_ops

[ Upstream commit 6407d74c5106bb362b4087693688afd34942b094 ]

Like in commit 0972ddb237 (provide cow_metrics() methods to blackhole
dst_ops), we must provide a cow_metrics for bridges fake_dst_ops as
well.

This fixes a regression coming from commits 62fa8a846d7d (net: Implement
read-only protection and COW'ing of metrics.) and 33eb9873a28 (bridge:
initialize fake_rtable metrics)

ip link set mybridge mtu 1234
-->
[  136.546243] Pid: 8415, comm: ip Tainted: P
2.6.39.1-00006-g40545b7 #103 ASUSTeK Computer Inc.         V1Sn
        /V1Sn
[  136.546256] EIP: 0060:[<00000000>] EFLAGS: 00010202 CPU: 0
[  136.546268] EIP is at 0x0
[  136.546273] EAX: f14a389c EBX: 000005d4 ECX: f80d32c0 EDX: f80d1da1
[  136.546279] ESI: f14a3000 EDI: f255bf10 EBP: f15c3b54 ESP: f15c3b48
[  136.546285]  DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068
[  136.546293] Process ip (pid: 8415, ti=f15c2000 task=f4741f80
task.ti=f15c2000)
[  136.546297] Stack:
[  136.546301]  f80c658f f14a3000 ffffffed f15c3b64 c12cb9c8 f80d1b80
ffffffa1 f15c3bbc
[  136.546315]  c12da347 c12d9c7d 00000000 f7670b00 00000000 f80d1b80
ffffffa6 f15c3be4
[  136.546329]  00000004 f14a3000 f255bf20 00000008 f15c3bbc c11d6cae
00000000 00000000
[  136.546343] Call Trace:
[  136.546359]  [<f80c658f>] ? br_change_mtu+0x5f/0x80 [bridge]
[  136.546372]  [<c12cb9c8>] dev_set_mtu+0x38/0x80
[  136.546381]  [<c12da347>] do_setlink+0x1a7/0x860
[  136.546390]  [<c12d9c7d>] ? rtnl_fill_ifinfo+0x9bd/0xc70
[  136.546400]  [<c11d6cae>] ? nla_parse+0x6e/0xb0
[  136.546409]  [<c12db931>] rtnl_newlink+0x361/0x510
[  136.546420]  [<c1023240>] ? vmalloc_sync_all+0x100/0x100
[  136.546429]  [<c1362762>] ? error_code+0x5a/0x60
[  136.546438]  [<c12db5d0>] ? rtnl_configure_link+0x80/0x80
[  136.546446]  [<c12db27a>] rtnetlink_rcv_msg+0xfa/0x210
[  136.546454]  [<c12db180>] ? __rtnl_unlock+0x20/0x20
[  136.546463]  [<c12ee0fe>] netlink_rcv_skb+0x8e/0xb0
[  136.546471]  [<c12daf1c>] rtnetlink_rcv+0x1c/0x30
[  136.546479]  [<c12edafa>] netlink_unicast+0x23a/0x280
[  136.546487]  [<c12ede6b>] netlink_sendmsg+0x26b/0x2f0
[  136.546497]  [<c12bb828>] sock_sendmsg+0xc8/0x100
[  136.546508]  [<c10adf61>] ? __alloc_pages_nodemask+0xe1/0x750
[  136.546517]  [<c11d0602>] ? _copy_from_user+0x42/0x60
[  136.546525]  [<c12c5e4c>] ? verify_iovec+0x4c/0xc0
[  136.546534]  [<c12bd805>] sys_sendmsg+0x1c5/0x200
[  136.546542]  [<c10c2150>] ? __do_fault+0x310/0x410
[  136.546549]  [<c10c2c46>] ? do_wp_page+0x1d6/0x6b0
[  136.546557]  [<c10c47d1>] ? handle_pte_fault+0xe1/0x720
[  136.546565]  [<c12bd1af>] ? sys_getsockname+0x7f/0x90
[  136.546574]  [<c10c4ec1>] ? handle_mm_fault+0xb1/0x180
[  136.546582]  [<c1023240>] ? vmalloc_sync_all+0x100/0x100
[  136.546589]  [<c10233b3>] ? do_page_fault+0x173/0x3d0
[  136.546596]  [<c12bd87b>] ? sys_recvmsg+0x3b/0x60
[  136.546605]  [<c12bdd83>] sys_socketcall+0x293/0x2d0
[  136.546614]  [<c13629d0>] sysenter_do_call+0x12/0x26
[  136.546619] Code:  Bad EIP value.
[  136.546627] EIP: [<00000000>] 0x0 SS:ESP 0068:f15c3b48
[  136.546645] CR2: 0000000000000000
[  136.546652] ---[ end trace 6909b560e78934fa ]---

Signed-off-by: Alexander Holler <[email protected]>
Signed-off-by: Eric Dumazet <[email protected]>
Signed-off-by: David S. Miller <[email protected]>
---
 net/bridge/br_netfilter.c |    6 ++++++
 1 files changed, 6 insertions(+), 0 deletions(-)

diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 5f9c091..ecaaa00 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -104,10 +104,16 @@ static void fake_update_pmtu(struct dst_entry *dst, u32 
mtu)
 {
 }
 
+static u32 *fake_cow_metrics(struct dst_entry *dst, unsigned long old)
+{
+       return NULL;
+}
+
 static struct dst_ops fake_dst_ops = {
        .family =               AF_INET,
        .protocol =             cpu_to_be16(ETH_P_IP),
        .update_pmtu =          fake_update_pmtu,
+       .cow_metrics =          fake_cow_metrics,
 };
 
 /*
-- 
1.7.5.4


>From 21d93118c34b2deb1eeca842e072dfe97eb4a1a8 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <[email protected]>
Date: Mon, 6 Jun 2011 22:42:06 -0700
Subject: [PATCH 12/16] af_packet: prevent information leak

[ Upstream commit 13fcb7bd322164c67926ffe272846d4860196dc6 ]

In 2.6.27, commit 393e52e33c6c2 (packet: deliver VLAN TCI to userspace)
added a small information leak.

Add padding field and make sure its zeroed before copy to user.

Signed-off-by: Eric Dumazet <[email protected]>
CC: Patrick McHardy <[email protected]>
Signed-off-by: David S. Miller <[email protected]>
---
 include/linux/if_packet.h |    2 ++
 net/packet/af_packet.c    |    2 ++
 2 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h
index 72bfa5a..8e37fba 100644
--- a/include/linux/if_packet.h
+++ b/include/linux/if_packet.h
@@ -62,6 +62,7 @@ struct tpacket_auxdata {
        __u16           tp_mac;
        __u16           tp_net;
        __u16           tp_vlan_tci;
+       __u16           tp_padding;
 };
 
 /* Rx ring - header status */
@@ -100,6 +101,7 @@ struct tpacket2_hdr {
        __u32           tp_sec;
        __u32           tp_nsec;
        __u16           tp_vlan_tci;
+       __u16           tp_padding;
 };
 
 #define TPACKET2_HDRLEN                (TPACKET_ALIGN(sizeof(struct 
tpacket2_hdr)) + sizeof(struct sockaddr_ll))
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index b5362e9..04098c2 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -799,6 +799,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct 
net_device *dev,
                h.h2->tp_sec = ts.tv_sec;
                h.h2->tp_nsec = ts.tv_nsec;
                h.h2->tp_vlan_tci = vlan_tx_tag_get(skb);
+               h.h2->tp_padding = 0;
                hdrlen = sizeof(*h.h2);
                break;
        default:
@@ -1727,6 +1728,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct 
socket *sock,
                aux.tp_net = skb_network_offset(skb);
                aux.tp_vlan_tci = vlan_tx_tag_get(skb);
 
+               aux.tp_padding = 0;
                put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
        }
 
-- 
1.7.5.4


>From e615bf10a59b2f5625cac993e15977586a367626 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <[email protected]>
Date: Fri, 17 Jun 2011 16:25:39 -0400
Subject: [PATCH 13/16] inet_diag: fix inet_diag_bc_audit()

[ Upstream commit eeb1497277d6b1a0a34ed36b97e18f2bd7d6de0d ]

A malicious user or buggy application can inject code and trigger an
infinite loop in inet_diag_bc_audit()

Also make sure each instruction is aligned on 4 bytes boundary, to avoid
unaligned accesses.

Reported-by: Dan Rosenberg <[email protected]>
Signed-off-by: Eric Dumazet <[email protected]>
Signed-off-by: David S. Miller <[email protected]>
---
 net/ipv4/inet_diag.c |   14 ++++++--------
 1 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 2ada171..65c23d9 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -437,7 +437,7 @@ static int valid_cc(const void *bc, int len, int cc)
                        return 0;
                if (cc == len)
                        return 1;
-               if (op->yes < 4)
+               if (op->yes < 4 || op->yes & 3)
                        return 0;
                len -= op->yes;
                bc  += op->yes;
@@ -447,11 +447,11 @@ static int valid_cc(const void *bc, int len, int cc)
 
 static int inet_diag_bc_audit(const void *bytecode, int bytecode_len)
 {
-       const unsigned char *bc = bytecode;
+       const void *bc = bytecode;
        int  len = bytecode_len;
 
        while (len > 0) {
-               struct inet_diag_bc_op *op = (struct inet_diag_bc_op *)bc;
+               const struct inet_diag_bc_op *op = bc;
 
 //printk("BC: %d %d %d {%d} / %d\n", op->code, op->yes, op->no, op[1].no, len);
                switch (op->code) {
@@ -462,22 +462,20 @@ static int inet_diag_bc_audit(const void *bytecode, int 
bytecode_len)
                case INET_DIAG_BC_S_LE:
                case INET_DIAG_BC_D_GE:
                case INET_DIAG_BC_D_LE:
-                       if (op->yes < 4 || op->yes > len + 4)
-                               return -EINVAL;
                case INET_DIAG_BC_JMP:
-                       if (op->no < 4 || op->no > len + 4)
+                       if (op->no < 4 || op->no > len + 4 || op->no & 3)
                                return -EINVAL;
                        if (op->no < len &&
                            !valid_cc(bytecode, bytecode_len, len - op->no))
                                return -EINVAL;
                        break;
                case INET_DIAG_BC_NOP:
-                       if (op->yes < 4 || op->yes > len + 4)
-                               return -EINVAL;
                        break;
                default:
                        return -EINVAL;
                }
+               if (op->yes < 4 || op->yes > len + 4 || op->yes & 3)
+                       return -EINVAL;
                bc  += op->yes;
                len -= op->yes;
        }
-- 
1.7.5.4


>From ed1cde6af9502a1955e390875a702f3be2da02de Mon Sep 17 00:00:00 2001
From: Marcus Meissner <[email protected]>
Date: Wed, 1 Jun 2011 21:05:22 -0700
Subject: [PATCH 14/16] net/ipv4: Check for mistakenly passed in non-IPv4
 address

[ Upstream commit d0733d2e29b652b2e7b1438ececa732e4eed98eb ]

Check against mistakenly passing in IPv6 addresses (which would result
in an INADDR_ANY bind) or similar incompatible sockaddrs.

Signed-off-by: Marcus Meissner <[email protected]>
Cc: Reinhard Max <[email protected]>
Signed-off-by: David S. Miller <[email protected]>
---
 net/ipv4/af_inet.c |    3 +++
 1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 807d83c..6bab50e 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -464,6 +464,9 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, 
int addr_len)
        if (addr_len < sizeof(struct sockaddr_in))
                goto out;
 
+       if (addr->sin_family != AF_INET)
+               goto out;
+
        chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);
 
        /* Not specified by any standard per-se, however it breaks too
-- 
1.7.5.4


>From 426f44b84f96244914a642bb63d40e1e0f064672 Mon Sep 17 00:00:00 2001
From: Xufeng Zhang <[email protected]>
Date: Tue, 21 Jun 2011 10:43:39 +0000
Subject: [PATCH 15/16] ipv6/udp: Use the correct variable to determine
 non-blocking condition

[ Upstream commit 32c90254ed4a0c698caa0794ebb4de63fcc69631 ]

udpv6_recvmsg() function is not using the correct variable to determine
whether or not the socket is in non-blocking operation, this will lead
to unexpected behavior when a UDP checksum error occurs.

Consider a non-blocking udp receive scenario: when udpv6_recvmsg() is
called by sock_common_recvmsg(), MSG_DONTWAIT bit of flags variable in
udpv6_recvmsg() is cleared by "flags & ~MSG_DONTWAIT" in this call:

    err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
                   flags & ~MSG_DONTWAIT, &addr_len);

i.e. with udpv6_recvmsg() getting these values:

        int noblock = flags & MSG_DONTWAIT
        int flags = flags & ~MSG_DONTWAIT

So, when udp checksum error occurs, the execution will go to
csum_copy_err, and then the problem happens:

    csum_copy_err:
            ...............
            if (flags & MSG_DONTWAIT)
                    return -EAGAIN;
            goto try_again;
            ...............

But it will always go to try_again as MSG_DONTWAIT has been cleared
from flags at call time -- only noblock contains the original value
of MSG_DONTWAIT, so the test should be:

            if (noblock)
                    return -EAGAIN;

This is also consistent with what the ipv4/udp code does.

Signed-off-by: Xufeng Zhang <[email protected]>
Signed-off-by: Paul Gortmaker <[email protected]>
Signed-off-by: David S. Miller <[email protected]>
---
 net/ipv6/udp.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 9e305d74..f3ffc19 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -453,7 +453,7 @@ csum_copy_err:
        }
        unlock_sock_fast(sk, slow);
 
-       if (flags & MSG_DONTWAIT)
+       if (noblock)
                return -EAGAIN;
        goto try_again;
 }
-- 
1.7.5.4


>From c40d1703fad9852603d7a84156168b46ae2abbcf Mon Sep 17 00:00:00 2001
From: Xufeng Zhang <[email protected]>
Date: Tue, 21 Jun 2011 10:43:40 +0000
Subject: [PATCH 16/16] udp/recvmsg: Clear MSG_TRUNC flag when starting over
 for a new packet

[ Upstream commit 9cfaa8def1c795a512bc04f2aec333b03724ca2e ]

Consider this scenario: When the size of the first received udp packet
is bigger than the receive buffer, MSG_TRUNC bit is set in msg->msg_flags.
However, if checksum error happens and this is a blocking socket, it will
goto try_again loop to receive the next packet.  But if the size of the
next udp packet is smaller than receive buffer, MSG_TRUNC flag should not
be set, but because MSG_TRUNC bit is not cleared in msg->msg_flags before
receive the next packet, MSG_TRUNC is still set, which is wrong.

Fix this problem by clearing MSG_TRUNC flag when starting over for a
new packet.

Signed-off-by: Xufeng Zhang <[email protected]>
Signed-off-by: Paul Gortmaker <[email protected]>
Signed-off-by: David S. Miller <[email protected]>
---
 net/ipv4/udp.c |    3 +++
 net/ipv6/udp.c |    3 +++
 2 files changed, 6 insertions(+), 0 deletions(-)

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index f87a8eb..0e33e34 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1241,6 +1241,9 @@ csum_copy_err:
 
        if (noblock)
                return -EAGAIN;
+
+       /* starting over for a new packet */
+       msg->msg_flags &= ~MSG_TRUNC;
        goto try_again;
 }
 
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index f3ffc19..b31a2f3 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -455,6 +455,9 @@ csum_copy_err:
 
        if (noblock)
                return -EAGAIN;
+
+       /* starting over for a new packet */
+       msg->msg_flags &= ~MSG_TRUNC;
        goto try_again;
 }
 
-- 
1.7.5.4

_______________________________________________
stable mailing list
[email protected]
http://linux.kernel.org/mailman/listinfo/stable

Reply via email to