[PATCH 20/53] ipset: remove unused function __ip_set_get_netlink

2017-05-01 Thread Pablo Neira Ayuso
From: Aaron Conole <acon...@bytheb.org>

There are no in-tree callers.

Signed-off-by: Aaron Conole <acon...@bytheb.org>
Acked-by: Jozsef Kadlecsik <kad...@blackhole.kfki.hu>
Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
---
 net/netfilter/ipset/ip_set_core.c | 8 
 1 file changed, 8 deletions(-)

diff --git a/net/netfilter/ipset/ip_set_core.c 
b/net/netfilter/ipset/ip_set_core.c
index cb120c3c040e..2b87d9fd3f72 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -501,14 +501,6 @@ __ip_set_put(struct ip_set *set)
  * a separate reference counter
  */
 static inline void
-__ip_set_get_netlink(struct ip_set *set)
-{
-   write_lock_bh(_set_ref_lock);
-   set->ref_netlink++;
-   write_unlock_bh(_set_ref_lock);
-}
-
-static inline void
 __ip_set_put_netlink(struct ip_set *set)
 {
write_lock_bh(_set_ref_lock);
-- 
2.1.4



[PATCH 15/53] netfilter: ip6_tables: Remove unneccessary comments

2017-05-01 Thread Pablo Neira Ayuso
From: Arushi Singhal 

This comments are obsolete and should go, as there are no set of rules
per CPU anymore.

Signed-off-by: Arushi Singhal 
---
 net/ipv6/netfilter/ip6_tables.c | 9 -
 1 file changed, 9 deletions(-)

diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index d862e3471935..1f90644056ac 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -51,15 +51,6 @@ void *ip6t_alloc_initial_table(const struct xt_table *info)
 }
 EXPORT_SYMBOL_GPL(ip6t_alloc_initial_table);
 
-/*
-   We keep a set of rules for each CPU, so we can avoid write-locking
-   them in the softirq when updating the counters and therefore
-   only need to read-lock in the softirq; doing a write_lock_bh() in user
-   context stops packets coming through and allows user context to read
-   the counters or update the rules.
-
-   Hence the start of any table is given by get_table() below.  */
-
 /* Returns whether matches rule or not. */
 /* Performance critical - called for every packet */
 static inline bool
-- 
2.1.4



[PATCH 25/53] netfilter: nft_ct: allow to set ctnetlink event types of a connection

2017-05-01 Thread Pablo Neira Ayuso
From: Florian Westphal <f...@strlen.de>

By default the kernel emits all ctnetlink events for a connection.
This allows to select the types of events to generate.

This can be used to e.g. only send DESTROY events but no NEW/UPDATE ones
and will work even if sysctl net.netfilter.nf_conntrack_events is set to 0.

This was already possible via iptables' CT target, but the nft version has
the advantage that it can also be used with already-established conntracks.

The added nf_ct_is_template() check isn't a bug fix as we only support
mark and labels (and unlike ecache the conntrack core doesn't copy those).

Signed-off-by: Florian Westphal <f...@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h |  2 ++
 net/netfilter/nft_ct.c   | 25 -
 2 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/netfilter/nf_tables.h 
b/include/uapi/linux/netfilter/nf_tables.h
index 8f3842690d17..683f6f88fcac 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -901,6 +901,7 @@ enum nft_rt_attributes {
  * @NFT_CT_BYTES: conntrack bytes
  * @NFT_CT_AVGPKT: conntrack average bytes per packet
  * @NFT_CT_ZONE: conntrack zone
+ * @NFT_CT_EVENTMASK: ctnetlink events to be generated for this conntrack
  */
 enum nft_ct_keys {
NFT_CT_STATE,
@@ -921,6 +922,7 @@ enum nft_ct_keys {
NFT_CT_BYTES,
NFT_CT_AVGPKT,
NFT_CT_ZONE,
+   NFT_CT_EVENTMASK,
 };
 
 /**
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 6c6fd48b024c..a34ceb38fc55 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -264,7 +264,7 @@ static void nft_ct_set_eval(const struct nft_expr *expr,
struct nf_conn *ct;
 
ct = nf_ct_get(skb, );
-   if (ct == NULL)
+   if (ct == NULL || nf_ct_is_template(ct))
return;
 
switch (priv->key) {
@@ -284,6 +284,22 @@ static void nft_ct_set_eval(const struct nft_expr *expr,
  NF_CT_LABELS_MAX_SIZE / sizeof(u32));
break;
 #endif
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+   case NFT_CT_EVENTMASK: {
+   struct nf_conntrack_ecache *e = nf_ct_ecache_find(ct);
+   u32 ctmask = regs->data[priv->sreg];
+
+   if (e) {
+   if (e->ctmask != ctmask)
+   e->ctmask = ctmask;
+   break;
+   }
+
+   if (ctmask && !nf_ct_is_confirmed(ct))
+   nf_ct_ecache_ext_add(ct, ctmask, 0, GFP_ATOMIC);
+   break;
+   }
+#endif
default:
break;
}
@@ -539,6 +555,13 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
len = sizeof(u16);
break;
 #endif
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+   case NFT_CT_EVENTMASK:
+   if (tb[NFTA_CT_DIRECTION])
+   return -EINVAL;
+   len = sizeof(u32);
+   break;
+#endif
default:
return -EOPNOTSUPP;
}
-- 
2.1.4



[PATCH 24/53] netfilter: remove nf_ct_is_untracked

2017-05-01 Thread Pablo Neira Ayuso
From: Florian Westphal <f...@strlen.de>

This function is now obsolete and always returns false.
This change has no effect on generated code.

Signed-off-by: Florian Westphal <f...@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
---
 include/net/ip_vs.h   |  4 ++--
 include/net/netfilter/nf_conntrack.h  |  5 -
 include/net/netfilter/nf_conntrack_core.h |  2 +-
 net/ipv4/netfilter/nf_nat_l3proto_ipv4.c  |  4 
 net/ipv4/netfilter/nf_socket_ipv4.c   |  2 +-
 net/ipv6/netfilter/nf_nat_l3proto_ipv6.c  |  4 
 net/netfilter/ipvs/ip_vs_ftp.c|  3 +--
 net/netfilter/ipvs/ip_vs_nfct.c   |  4 ++--
 net/netfilter/ipvs/ip_vs_xmit.c   |  8 
 net/netfilter/nf_conntrack_netlink.c  | 12 +---
 net/netfilter/xt_HMARK.c  |  2 +-
 net/netfilter/xt_cluster.c|  3 ---
 net/netfilter/xt_connlabel.c  |  2 +-
 net/netfilter/xt_connmark.c   |  4 ++--
 net/netfilter/xt_ipvs.c   |  2 +-
 net/openvswitch/conntrack.c   |  5 -
 16 files changed, 17 insertions(+), 49 deletions(-)

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 9a75d9933e63..632082300e77 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -1555,7 +1555,7 @@ static inline void ip_vs_notrack(struct sk_buff *skb)
enum ip_conntrack_info ctinfo;
struct nf_conn *ct = nf_ct_get(skb, );
 
-   if (!ct || !nf_ct_is_untracked(ct)) {
+   if (ct) {
nf_conntrack_put(>ct_general);
nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
}
@@ -1616,7 +1616,7 @@ static inline bool ip_vs_conn_uses_conntrack(struct 
ip_vs_conn *cp,
if (!(cp->flags & IP_VS_CONN_F_NFCT))
return false;
ct = nf_ct_get(skb, );
-   if (ct && !nf_ct_is_untracked(ct))
+   if (ct)
return true;
 #endif
return false;
diff --git a/include/net/netfilter/nf_conntrack.h 
b/include/net/netfilter/nf_conntrack.h
index 012b99f563e5..4978a82b75fa 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -273,11 +273,6 @@ static inline int nf_ct_is_dying(const struct nf_conn *ct)
return test_bit(IPS_DYING_BIT, >status);
 }
 
-static inline int nf_ct_is_untracked(const struct nf_conn *ct)
-{
-   return false;
-}
-
 /* Packet is received from loopback */
 static inline bool nf_is_loopback_packet(const struct sk_buff *skb)
 {
diff --git a/include/net/netfilter/nf_conntrack_core.h 
b/include/net/netfilter/nf_conntrack_core.h
index 84ec7ca5f195..81d7f8a30945 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -65,7 +65,7 @@ static inline int nf_conntrack_confirm(struct sk_buff *skb)
struct nf_conn *ct = (struct nf_conn *)skb_nfct(skb);
int ret = NF_ACCEPT;
 
-   if (ct && !nf_ct_is_untracked(ct)) {
+   if (ct) {
if (!nf_ct_is_confirmed(ct))
ret = __nf_conntrack_confirm(skb);
if (likely(ret == NF_ACCEPT))
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c 
b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
index 6f5e8d01b876..e3bfa6a169f0 100644
--- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
@@ -264,10 +264,6 @@ nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
if (!ct)
return NF_ACCEPT;
 
-   /* Don't try to NAT if this packet is not conntracked */
-   if (nf_ct_is_untracked(ct))
-   return NF_ACCEPT;
-
nat = nf_ct_nat_ext_add(ct);
if (nat == NULL)
return NF_ACCEPT;
diff --git a/net/ipv4/netfilter/nf_socket_ipv4.c 
b/net/ipv4/netfilter/nf_socket_ipv4.c
index a83d558e1aae..e9293bdebba0 100644
--- a/net/ipv4/netfilter/nf_socket_ipv4.c
+++ b/net/ipv4/netfilter/nf_socket_ipv4.c
@@ -139,7 +139,7 @@ struct sock *nf_sk_lookup_slow_v4(struct net *net, const 
struct sk_buff *skb,
 * SNAT-ted connection.
 */
ct = nf_ct_get(skb, );
-   if (ct && !nf_ct_is_untracked(ct) &&
+   if (ct &&
((iph->protocol != IPPROTO_ICMP &&
  ctinfo == IP_CT_ESTABLISHED_REPLY) ||
 (iph->protocol == IPPROTO_ICMP &&
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c 
b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
index e0be97e636a4..922b5aef273c 100644
--- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -273,10 +273,6 @@ nf_nat_ipv6_fn(void *priv, struct sk_buff *skb,
if (!ct)
return NF_ACCEPT;
 
-   /* Don't try to NAT if this packet is not conntracked */
-   if (nf_ct_is_untracked(ct))
-   return NF_ACCEPT;
-
nat = nf_ct_nat_ext_add(ct);
if (nat == NULL)
return NF_ACCEPT;
di

[PATCH 29/53] netfilter: helpers: remove data_len usage for inkernel helpers

2017-05-01 Thread Pablo Neira Ayuso
From: Florian Westphal <f...@strlen.de>

No need to track this for inkernel helpers anymore as
NF_CT_HELPER_BUILD_BUG_ON checks do this now.

All inkernel helpers know what kind of structure they
stored in helper->data.

Signed-off-by: Florian Westphal <f...@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
---
 include/net/netfilter/nf_conntrack_helper.h | 11 ++-
 net/netfilter/nf_conntrack_ftp.c|  6 ++
 net/netfilter/nf_conntrack_h323_main.c  |  4 
 net/netfilter/nf_conntrack_helper.c |  6 ++
 net/netfilter/nf_conntrack_irc.c|  2 +-
 net/netfilter/nf_conntrack_pptp.c   |  1 -
 net/netfilter/nf_conntrack_sane.c   |  6 ++
 net/netfilter/nf_conntrack_sip.c| 12 
 net/netfilter/nf_conntrack_tftp.c   |  4 ++--
 9 files changed, 19 insertions(+), 33 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack_helper.h 
b/include/net/netfilter/nf_conntrack_helper.h
index 29539ed1008f..e04fa7691e5d 100644
--- a/include/net/netfilter/nf_conntrack_helper.h
+++ b/include/net/netfilter/nf_conntrack_helper.h
@@ -29,9 +29,6 @@ struct nf_conntrack_helper {
struct module *me;  /* pointer to self */
const struct nf_conntrack_expect_policy *expect_policy;
 
-   /* length of internal data, ie. sizeof(struct nf_ct_*_master) */
-   size_t data_len;
-
/* Tuple of things we will help (compared against server response) */
struct nf_conntrack_tuple tuple;
 
@@ -49,7 +46,11 @@ struct nf_conntrack_helper {
unsigned int expect_class_max;
 
unsigned int flags;
-   unsigned int queue_num; /* For user-space helpers. */
+
+   /* For user-space helpers: */
+   unsigned int queue_num;
+   /* length of userspace private data stored in nf_conn_help->data */
+   u16 data_len;
 };
 
 /* Must be kept in sync with the classes defined by helpers */
@@ -82,7 +83,7 @@ void nf_ct_helper_init(struct nf_conntrack_helper *helper,
   u16 l3num, u16 protonum, const char *name,
   u16 default_port, u16 spec_port, u32 id,
   const struct nf_conntrack_expect_policy *exp_pol,
-  u32 expect_class_max, u32 data_len,
+  u32 expect_class_max,
   int (*help)(struct sk_buff *skb, unsigned int protoff,
   struct nf_conn *ct,
   enum ip_conntrack_info ctinfo),
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 58e1256cd05d..f0e9a7511e1a 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -591,12 +591,10 @@ static int __init nf_conntrack_ftp_init(void)
for (i = 0; i < ports_c; i++) {
nf_ct_helper_init([2 * i], AF_INET, IPPROTO_TCP, "ftp",
  FTP_PORT, ports[i], ports[i], _exp_policy,
- 0, sizeof(struct nf_ct_ftp_master), help,
- nf_ct_ftp_from_nlattr, THIS_MODULE);
+ 0, help, nf_ct_ftp_from_nlattr, THIS_MODULE);
nf_ct_helper_init([2 * i + 1], AF_INET6, IPPROTO_TCP, "ftp",
  FTP_PORT, ports[i], ports[i], _exp_policy,
- 0, sizeof(struct nf_ct_ftp_master), help,
- nf_ct_ftp_from_nlattr, THIS_MODULE);
+ 0, help, nf_ct_ftp_from_nlattr, THIS_MODULE);
}
 
ret = nf_conntrack_helpers_register(ftp, ports_c * 2);
diff --git a/net/netfilter/nf_conntrack_h323_main.c 
b/net/netfilter/nf_conntrack_h323_main.c
index e98204349efe..3bcdc718484e 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -637,7 +637,6 @@ static const struct nf_conntrack_expect_policy 
h245_exp_policy = {
 static struct nf_conntrack_helper nf_conntrack_helper_h245 __read_mostly = {
.name   = "H.245",
.me = THIS_MODULE,
-   .data_len   = sizeof(struct nf_ct_h323_master),
.tuple.src.l3num= AF_UNSPEC,
.tuple.dst.protonum = IPPROTO_UDP,
.help   = h245_help,
@@ -1215,7 +1214,6 @@ static struct nf_conntrack_helper 
nf_conntrack_helper_q931[] __read_mostly = {
{
.name   = "Q.931",
.me = THIS_MODULE,
-   .data_len   = sizeof(struct nf_ct_h323_master),
.tuple.src.l3num= AF_INET,
.tuple.src.u.tcp.port   = cpu_to_be16(Q931_PORT),
.tuple.dst.protonum = IPPROTO_TCP,
@@ -1800,7 +1798,6 @@ static struct nf_conntrack_helper 
nf_conntrack_helper_ras[] __read_

[PATCH 17/53] netfilter: nat: remove rcu_read_lock in __nf_nat_decode_session.

2017-05-01 Thread Pablo Neira Ayuso
From: Taehee Yoo <ap420...@gmail.com>

__nf_nat_decode_session is called from nf_nat_decode_session as decodefn.
before calling decodefn, it already set rcu_read_lock. so rcu_read_lock in
__nf_nat_decode_session can be removed.

Signed-off-by: Taehee Yoo <ap420...@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
---
 net/netfilter/nf_nat_core.c | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 376c1b36f222..fb0e65411785 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -71,11 +71,10 @@ static void __nf_nat_decode_session(struct sk_buff *skb, 
struct flowi *fl)
if (ct == NULL)
return;
 
-   family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
-   rcu_read_lock();
+   family = nf_ct_l3num(ct);
l3proto = __nf_nat_l3proto_find(family);
if (l3proto == NULL)
-   goto out;
+   return;
 
dir = CTINFO2DIR(ctinfo);
if (dir == IP_CT_DIR_ORIGINAL)
@@ -84,8 +83,6 @@ static void __nf_nat_decode_session(struct sk_buff *skb, 
struct flowi *fl)
statusbit = IPS_SRC_NAT;
 
l3proto->decode_session(skb, ct, dir, statusbit, fl);
-out:
-   rcu_read_unlock();
 }
 
 int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int 
family)
-- 
2.1.4



[PATCH 30/53] netfilter: remove last traces of variable-sized extensions

2017-05-01 Thread Pablo Neira Ayuso
From: Florian Westphal <f...@strlen.de>

get rid of the (now unused) nf_ct_ext_add_length define and also
rename the function to plain nf_ct_ext_add().

Signed-off-by: Florian Westphal <f...@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
---
 include/net/netfilter/nf_conntrack_extend.h |  8 +---
 net/netfilter/nf_conntrack_extend.c | 16 +++-
 2 files changed, 8 insertions(+), 16 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack_extend.h 
b/include/net/netfilter/nf_conntrack_extend.h
index 1c3035dda31f..4ec645c8b647 100644
--- a/include/net/netfilter/nf_conntrack_extend.h
+++ b/include/net/netfilter/nf_conntrack_extend.h
@@ -86,13 +86,7 @@ static inline void nf_ct_ext_free(struct nf_conn *ct)
 }
 
 /* Add this type, returns pointer to data or NULL. */
-void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id,
-size_t var_alloc_len, gfp_t gfp);
-
-#define nf_ct_ext_add(ct, id, gfp) \
-   ((id##_TYPE *)__nf_ct_ext_add_length((ct), (id), 0, (gfp)))
-#define nf_ct_ext_add_length(ct, id, len, gfp) \
-   ((id##_TYPE *)__nf_ct_ext_add_length((ct), (id), (len), (gfp)))
+void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp);
 
 #define NF_CT_EXT_F_PREALLOC   0x0001
 
diff --git a/net/netfilter/nf_conntrack_extend.c 
b/net/netfilter/nf_conntrack_extend.c
index 008299b7f78f..b5879a9c748d 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -44,8 +44,7 @@ void __nf_ct_ext_destroy(struct nf_conn *ct)
 EXPORT_SYMBOL(__nf_ct_ext_destroy);
 
 static void *
-nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id id,
-size_t var_alloc_len, gfp_t gfp)
+nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id id, gfp_t gfp)
 {
unsigned int off, len;
struct nf_ct_ext_type *t;
@@ -59,8 +58,8 @@ nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id id,
}
 
off = ALIGN(sizeof(struct nf_ct_ext), t->align);
-   len = off + t->len + var_alloc_len;
-   alloc_size = t->alloc_size + var_alloc_len;
+   len = off + t->len;
+   alloc_size = t->alloc_size;
rcu_read_unlock();
 
*ext = kzalloc(alloc_size, gfp);
@@ -73,8 +72,7 @@ nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id id,
return (void *)(*ext) + off;
 }
 
-void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id,
-size_t var_alloc_len, gfp_t gfp)
+void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp)
 {
struct nf_ct_ext *old, *new;
int newlen, newoff;
@@ -85,7 +83,7 @@ void *__nf_ct_ext_add_length(struct nf_conn *ct, enum 
nf_ct_ext_id id,
 
old = ct->ext;
if (!old)
-   return nf_ct_ext_create(>ext, id, var_alloc_len, gfp);
+   return nf_ct_ext_create(>ext, id, gfp);
 
if (__nf_ct_ext_exist(old, id))
return NULL;
@@ -98,7 +96,7 @@ void *__nf_ct_ext_add_length(struct nf_conn *ct, enum 
nf_ct_ext_id id,
}
 
newoff = ALIGN(old->len, t->align);
-   newlen = newoff + t->len + var_alloc_len;
+   newlen = newoff + t->len;
rcu_read_unlock();
 
new = __krealloc(old, newlen, gfp);
@@ -115,7 +113,7 @@ void *__nf_ct_ext_add_length(struct nf_conn *ct, enum 
nf_ct_ext_id id,
memset((void *)new + newoff, 0, newlen - newoff);
return (void *)new + newoff;
 }
-EXPORT_SYMBOL(__nf_ct_ext_add_length);
+EXPORT_SYMBOL(nf_ct_ext_add);
 
 static void update_alloc_size(struct nf_ct_ext_type *type)
 {
-- 
2.1.4



[PATCH 26/53] netfilter: conntrack: move helper struct to nf_conntrack_helper.h

2017-05-01 Thread Pablo Neira Ayuso
From: Florian Westphal <f...@strlen.de>

its definition is not needed in nf_conntrack.h.

Signed-off-by: Florian Westphal <f...@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
---
 include/net/netfilter/nf_conntrack.h| 19 ---
 include/net/netfilter/nf_conntrack_helper.h | 17 +
 2 files changed, 17 insertions(+), 19 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack.h 
b/include/net/netfilter/nf_conntrack.h
index 4978a82b75fa..8ece3612d0cd 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -50,25 +50,6 @@ union nf_conntrack_expect_proto {
 #define NF_CT_ASSERT(x)
 #endif
 
-struct nf_conntrack_helper;
-
-/* Must be kept in sync with the classes defined by helpers */
-#define NF_CT_MAX_EXPECT_CLASSES   4
-
-/* nf_conn feature for connections that have a helper */
-struct nf_conn_help {
-   /* Helper. if any */
-   struct nf_conntrack_helper __rcu *helper;
-
-   struct hlist_head expectations;
-
-   /* Current number of expected connections */
-   u8 expecting[NF_CT_MAX_EXPECT_CLASSES];
-
-   /* private helper information. */
-   char data[];
-};
-
 #include 
 #include 
 
diff --git a/include/net/netfilter/nf_conntrack_helper.h 
b/include/net/netfilter/nf_conntrack_helper.h
index 1eaac1f4cd6a..15d746558665 100644
--- a/include/net/netfilter/nf_conntrack_helper.h
+++ b/include/net/netfilter/nf_conntrack_helper.h
@@ -52,6 +52,23 @@ struct nf_conntrack_helper {
unsigned int queue_num; /* For user-space helpers. */
 };
 
+/* Must be kept in sync with the classes defined by helpers */
+#define NF_CT_MAX_EXPECT_CLASSES   4
+
+/* nf_conn feature for connections that have a helper */
+struct nf_conn_help {
+   /* Helper. if any */
+   struct nf_conntrack_helper __rcu *helper;
+
+   struct hlist_head expectations;
+
+   /* Current number of expected connections */
+   u8 expecting[NF_CT_MAX_EXPECT_CLASSES];
+
+   /* private helper information. */
+   char data[];
+};
+
 struct nf_conntrack_helper *__nf_conntrack_helper_find(const char *name,
   u16 l3num, u8 protonum);
 
-- 
2.1.4



[PATCH 33/53] nefilter: eache: reduce struct size from 32 to 24 byte

2017-05-01 Thread Pablo Neira Ayuso
From: Florian Westphal <f...@strlen.de>

Only "cache" needs to use ulong (its used with set_bit()), missed can use
u16.  Also add build-time assertion to ensure event bits fit.

Signed-off-by: Florian Westphal <f...@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
---
 include/net/netfilter/nf_conntrack_ecache.h| 4 ++--
 include/uapi/linux/netfilter/nf_conntrack_common.h | 3 +++
 net/netfilter/nf_conntrack_ecache.c| 3 +++
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack_ecache.h 
b/include/net/netfilter/nf_conntrack_ecache.h
index 12d967b58726..2a10c6570fcc 100644
--- a/include/net/netfilter/nf_conntrack_ecache.h
+++ b/include/net/netfilter/nf_conntrack_ecache.h
@@ -20,11 +20,11 @@ enum nf_ct_ecache_state {
 
 struct nf_conntrack_ecache {
unsigned long cache;/* bitops want long */
-   unsigned long missed;   /* missed events */
+   u16 missed; /* missed events */
u16 ctmask; /* bitmask of ct events to be delivered 
*/
u16 expmask;/* bitmask of expect events to be 
delivered */
+   enum nf_ct_ecache_state state:8;/* ecache state */
u32 portid; /* netlink portid of destroyer */
-   enum nf_ct_ecache_state state;  /* ecache state */
 };
 
 static inline struct nf_conntrack_ecache *
diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h 
b/include/uapi/linux/netfilter/nf_conntrack_common.h
index b4a0a1940118..a8072cc7fa0b 100644
--- a/include/uapi/linux/netfilter/nf_conntrack_common.h
+++ b/include/uapi/linux/netfilter/nf_conntrack_common.h
@@ -119,6 +119,9 @@ enum ip_conntrack_events {
IPCT_NATSEQADJ = IPCT_SEQADJ,
IPCT_SECMARK,   /* new security mark has been set */
IPCT_LABEL, /* new connlabel has been set */
+#ifdef __KERNEL__
+   __IPCT_MAX
+#endif
 };
 
 enum ip_conntrack_expect_events {
diff --git a/net/netfilter/nf_conntrack_ecache.c 
b/net/netfilter/nf_conntrack_ecache.c
index 6161e92d2980..515212948125 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -420,6 +420,9 @@ int nf_conntrack_ecache_init(void)
int ret = nf_ct_extend_register(_extend);
if (ret < 0)
pr_err("nf_ct_event: Unable to register event extension.\n");
+
+   BUILD_BUG_ON(__IPCT_MAX >= 16); /* ctmask, missed use u16 */
+
return ret;
 }
 
-- 
2.1.4



[PATCH 28/53] netfilter: nfnetlink_cthelper: reject too large userspace allocation requests

2017-05-01 Thread Pablo Neira Ayuso
From: Florian Westphal <f...@strlen.de>

Userspace should not abuse the kernel to store large amounts of data,
reject requests larger than the private area can accommodate.

Signed-off-by: Florian Westphal <f...@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
---
 net/netfilter/nfnetlink_cthelper.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/nfnetlink_cthelper.c 
b/net/netfilter/nfnetlink_cthelper.c
index 9a50bf93dd16..eef7120e1f74 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -104,7 +104,7 @@ nfnl_cthelper_from_nlattr(struct nlattr *attr, struct 
nf_conn *ct)
if (help->helper->data_len == 0)
return -EINVAL;
 
-   memcpy(help->data, nla_data(attr), help->helper->data_len);
+   nla_memcpy(help->data, nla_data(attr), sizeof(help->data));
return 0;
 }
 
@@ -216,6 +216,7 @@ nfnl_cthelper_create(const struct nlattr * const tb[],
 {
struct nf_conntrack_helper *helper;
struct nfnl_cthelper *nfcth;
+   unsigned int size;
int ret;
 
if (!tb[NFCTH_TUPLE] || !tb[NFCTH_POLICY] || !tb[NFCTH_PRIV_DATA_LEN])
@@ -231,7 +232,12 @@ nfnl_cthelper_create(const struct nlattr * const tb[],
goto err1;
 
strncpy(helper->name, nla_data(tb[NFCTH_NAME]), NF_CT_HELPER_NAME_LEN);
-   helper->data_len = ntohl(nla_get_be32(tb[NFCTH_PRIV_DATA_LEN]));
+   size = ntohl(nla_get_be32(tb[NFCTH_PRIV_DATA_LEN]));
+   if (size > FIELD_SIZEOF(struct nf_conn_help, data)) {
+   ret = -ENOMEM;
+   goto err2;
+   }
+
helper->flags |= NF_CT_HELPER_F_USERSPACE;
memcpy(>tuple, tuple, sizeof(struct nf_conntrack_tuple));
 
-- 
2.1.4



[PATCH 27/53] netfilter: helper: add build-time asserts for helper data size

2017-05-01 Thread Pablo Neira Ayuso
From: Florian Westphal <f...@strlen.de>

add a 32 byte scratch area in the helper struct instead of relying
on variable sized helpers plus compile-time asserts to let us know
if 32 bytes aren't enough anymore.

Not having variable sized helpers will later allow to add BUILD_BUG_ON
for the total size of conntrack extensions -- the helper extension is
the only one that doesn't have a fixed size.

The (useless!) NF_CT_HELPER_BUILD_BUG_ON(0); are added so that in case
someone adds a new helper and copy-pastes from one that doesn't store
private data at least some indication that this macro should be used
somehow is there...

Signed-off-by: Florian Westphal <f...@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
---
 include/net/netfilter/nf_conntrack_helper.h | 5 -
 net/netfilter/nf_conntrack_amanda.c | 2 ++
 net/netfilter/nf_conntrack_ftp.c| 2 ++
 net/netfilter/nf_conntrack_h323_main.c  | 2 ++
 net/netfilter/nf_conntrack_netbios_ns.c | 2 ++
 net/netfilter/nf_conntrack_pptp.c   | 2 ++
 net/netfilter/nf_conntrack_sane.c   | 2 ++
 net/netfilter/nf_conntrack_sip.c| 2 ++
 net/netfilter/nf_conntrack_tftp.c   | 2 ++
 9 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/include/net/netfilter/nf_conntrack_helper.h 
b/include/net/netfilter/nf_conntrack_helper.h
index 15d746558665..29539ed1008f 100644
--- a/include/net/netfilter/nf_conntrack_helper.h
+++ b/include/net/netfilter/nf_conntrack_helper.h
@@ -66,9 +66,12 @@ struct nf_conn_help {
u8 expecting[NF_CT_MAX_EXPECT_CLASSES];
 
/* private helper information. */
-   char data[];
+   char data[32] __aligned(8);
 };
 
+#define NF_CT_HELPER_BUILD_BUG_ON(structsize) \
+   BUILD_BUG_ON((structsize) > FIELD_SIZEOF(struct nf_conn_help, data))
+
 struct nf_conntrack_helper *__nf_conntrack_helper_find(const char *name,
   u16 l3num, u8 protonum);
 
diff --git a/net/netfilter/nf_conntrack_amanda.c 
b/net/netfilter/nf_conntrack_amanda.c
index 57a26cc90c9f..03d2ccffa9fa 100644
--- a/net/netfilter/nf_conntrack_amanda.c
+++ b/net/netfilter/nf_conntrack_amanda.c
@@ -207,6 +207,8 @@ static int __init nf_conntrack_amanda_init(void)
 {
int ret, i;
 
+   NF_CT_HELPER_BUILD_BUG_ON(0);
+
for (i = 0; i < ARRAY_SIZE(search); i++) {
search[i].ts = textsearch_prepare(ts_algo, search[i].string,
  search[i].len,
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 4aecef4a89fb..58e1256cd05d 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -577,6 +577,8 @@ static int __init nf_conntrack_ftp_init(void)
 {
int i, ret = 0;
 
+   NF_CT_HELPER_BUILD_BUG_ON(sizeof(struct nf_ct_ftp_master));
+
ftp_buffer = kmalloc(65536, GFP_KERNEL);
if (!ftp_buffer)
return -ENOMEM;
diff --git a/net/netfilter/nf_conntrack_h323_main.c 
b/net/netfilter/nf_conntrack_h323_main.c
index f65d93639d12..e98204349efe 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -1836,6 +1836,8 @@ static int __init nf_conntrack_h323_init(void)
 {
int ret;
 
+   NF_CT_HELPER_BUILD_BUG_ON(sizeof(struct nf_ct_h323_master));
+
h323_buffer = kmalloc(65536, GFP_KERNEL);
if (!h323_buffer)
return -ENOMEM;
diff --git a/net/netfilter/nf_conntrack_netbios_ns.c 
b/net/netfilter/nf_conntrack_netbios_ns.c
index 4c8f30a3d6d2..496ce173f0c1 100644
--- a/net/netfilter/nf_conntrack_netbios_ns.c
+++ b/net/netfilter/nf_conntrack_netbios_ns.c
@@ -58,6 +58,8 @@ static struct nf_conntrack_helper helper __read_mostly = {
 
 static int __init nf_conntrack_netbios_ns_init(void)
 {
+   NF_CT_HELPER_BUILD_BUG_ON(0);
+
exp_policy.timeout = timeout;
return nf_conntrack_helper_register();
 }
diff --git a/net/netfilter/nf_conntrack_pptp.c 
b/net/netfilter/nf_conntrack_pptp.c
index f60a4755d71e..34fac4c52c4c 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -607,6 +607,8 @@ static struct nf_conntrack_helper pptp __read_mostly = {
 
 static int __init nf_conntrack_pptp_init(void)
 {
+   NF_CT_HELPER_BUILD_BUG_ON(sizeof(struct nf_ct_pptp_master));
+
return nf_conntrack_helper_register();
 }
 
diff --git a/net/netfilter/nf_conntrack_sane.c 
b/net/netfilter/nf_conntrack_sane.c
index 9dcb9ee9b97d..1121db08d048 100644
--- a/net/netfilter/nf_conntrack_sane.c
+++ b/net/netfilter/nf_conntrack_sane.c
@@ -184,6 +184,8 @@ static int __init nf_conntrack_sane_init(void)
 {
int i, ret = 0;
 
+   NF_CT_HELPER_BUILD_BUG_ON(sizeof(struct nf_ct_sane_master));
+
sane_buffer = kmalloc(65536, GFP_KERNEL);
if (!sane_buffer)
return -ENOMEM;
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netf

[PATCH 32/53] netfilter: allow early drop of assured conntracks

2017-05-01 Thread Pablo Neira Ayuso
From: Florian Westphal <f...@strlen.de>

If insertion of a new conntrack fails because the table is full, the kernel
searches the next buckets of the hash slot where the new connection
was supposed to be inserted at for an entry that hasn't seen traffic
in reply direction (non-assured), if it finds one, that entry is
is dropped and the new connection entry is allocated.

Allow the conntrack gc worker to also remove *assured* conntracks if
resources are low.

Do this by querying the l4 tracker, e.g. tcp connections are now dropped
if they are no longer established (e.g. in finwait).

This could be refined further, e.g. by adding 'soft' established timeout
(i.e., a timeout that is only used once we get close to resource
exhaustion).

Cc: Jozsef Kadlecsik <kad...@blackhole.kfki.hu>
Signed-off-by: Florian Westphal <f...@strlen.de>
Acked-by: Jozsef Kadlecsik <kad...@blackhole.kfki.hu>
Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
---
 include/net/netfilter/nf_conntrack_l4proto.h |  3 ++
 net/netfilter/nf_conntrack_core.c| 49 
 net/netfilter/nf_conntrack_proto_dccp.c  | 16 +
 net/netfilter/nf_conntrack_proto_sctp.c  | 16 +
 net/netfilter/nf_conntrack_proto_tcp.c   | 18 ++
 5 files changed, 102 insertions(+)

diff --git a/include/net/netfilter/nf_conntrack_l4proto.h 
b/include/net/netfilter/nf_conntrack_l4proto.h
index 85e993e278d5..7032e044bbe2 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -58,6 +58,9 @@ struct nf_conntrack_l4proto {
 unsigned int dataoff,
 u_int8_t pf, unsigned int hooknum);
 
+   /* called by gc worker if table is full */
+   bool (*can_early_drop)(const struct nf_conn *ct);
+
/* Print out the per-protocol part of the tuple. Return like seq_* */
void (*print_tuple)(struct seq_file *s,
const struct nf_conntrack_tuple *);
diff --git a/net/netfilter/nf_conntrack_core.c 
b/net/netfilter/nf_conntrack_core.c
index 62368b05cef5..f9245dbfe435 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -76,6 +76,7 @@ struct conntrack_gc_work {
struct delayed_work dwork;
u32 last_bucket;
boolexiting;
+   boolearly_drop;
longnext_gc_run;
 };
 
@@ -951,10 +952,30 @@ static noinline int early_drop(struct net *net, unsigned 
int _hash)
return false;
 }
 
+static bool gc_worker_skip_ct(const struct nf_conn *ct)
+{
+   return !nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct);
+}
+
+static bool gc_worker_can_early_drop(const struct nf_conn *ct)
+{
+   const struct nf_conntrack_l4proto *l4proto;
+
+   if (!test_bit(IPS_ASSURED_BIT, >status))
+   return true;
+
+   l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
+   if (l4proto->can_early_drop && l4proto->can_early_drop(ct))
+   return true;
+
+   return false;
+}
+
 static void gc_worker(struct work_struct *work)
 {
unsigned int min_interval = max(HZ / GC_MAX_BUCKETS_DIV, 1u);
unsigned int i, goal, buckets = 0, expired_count = 0;
+   unsigned int nf_conntrack_max95 = 0;
struct conntrack_gc_work *gc_work;
unsigned int ratio, scanned = 0;
unsigned long next_run;
@@ -963,6 +984,8 @@ static void gc_worker(struct work_struct *work)
 
goal = nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV;
i = gc_work->last_bucket;
+   if (gc_work->early_drop)
+   nf_conntrack_max95 = nf_conntrack_max / 100u * 95u;
 
do {
struct nf_conntrack_tuple_hash *h;
@@ -979,6 +1002,8 @@ static void gc_worker(struct work_struct *work)
i = 0;
 
hlist_nulls_for_each_entry_rcu(h, n, _hash[i], hnnode) {
+   struct net *net;
+
tmp = nf_ct_tuplehash_to_ctrack(h);
 
scanned++;
@@ -987,6 +1012,27 @@ static void gc_worker(struct work_struct *work)
expired_count++;
continue;
}
+
+   if (nf_conntrack_max95 == 0 || gc_worker_skip_ct(tmp))
+   continue;
+
+   net = nf_ct_net(tmp);
+   if (atomic_read(>ct.count) < nf_conntrack_max95)
+   continue;
+
+   /* need to take reference to avoid possible races */
+   if (!atomic_inc_not_zero(>ct_general.use))
+   continue;
+
+   if (gc_worker_skip_ct(tmp)) {
+ 

[PATCH 31/53] netfilter: conntrack: use u8 for extension sizes again

2017-05-01 Thread Pablo Neira Ayuso
From: Florian Westphal <f...@strlen.de>

commit 223b02d923ecd7c84cf9780bb3686f455d279279
("netfilter: nf_conntrack: reserve two bytes for nf_ct_ext->len")
had to increase size of the extension offsets because total size of the
extensions had increased to a point where u8 did overflow.

3 years later we've managed to diet extensions a bit and we no longer
need u16.  Furthermore we can now add a compile-time assertion for this
problem.

Signed-off-by: Florian Westphal <f...@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
---
 include/net/netfilter/nf_conntrack_extend.h |  4 ++--
 net/netfilter/nf_conntrack_core.c   | 33 +
 2 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack_extend.h 
b/include/net/netfilter/nf_conntrack_extend.h
index 4ec645c8b647..5fc908dc9f32 100644
--- a/include/net/netfilter/nf_conntrack_extend.h
+++ b/include/net/netfilter/nf_conntrack_extend.h
@@ -43,8 +43,8 @@ enum nf_ct_ext_id {
 /* Extensions: optional stuff which isn't permanently in struct. */
 struct nf_ct_ext {
struct rcu_head rcu;
-   u16 offset[NF_CT_EXT_NUM];
-   u16 len;
+   u8 offset[NF_CT_EXT_NUM];
+   u8 len;
char data[0];
 };
 
diff --git a/net/netfilter/nf_conntrack_core.c 
b/net/netfilter/nf_conntrack_core.c
index 03150f60714d..62368b05cef5 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1804,12 +1804,45 @@ EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize);
 module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
  _conntrack_htable_size, 0600);
 
+static unsigned int total_extension_size(void)
+{
+   /* remember to add new extensions below */
+   BUILD_BUG_ON(NF_CT_EXT_NUM > 9);
+
+   return sizeof(struct nf_ct_ext) +
+  sizeof(struct nf_conn_help)
+#if IS_ENABLED(CONFIG_NF_NAT)
+   + sizeof(struct nf_conn_nat)
+#endif
+   + sizeof(struct nf_conn_seqadj)
+   + sizeof(struct nf_conn_acct)
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+   + sizeof(struct nf_conntrack_ecache)
+#endif
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+   + sizeof(struct nf_conn_tstamp)
+#endif
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
+   + sizeof(struct nf_conn_timeout)
+#endif
+#ifdef CONFIG_NF_CONNTRACK_LABELS
+   + sizeof(struct nf_conn_labels)
+#endif
+#if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY)
+   + sizeof(struct nf_conn_synproxy)
+#endif
+   ;
+};
+
 int nf_conntrack_init_start(void)
 {
int max_factor = 8;
int ret = -ENOMEM;
int i;
 
+   /* struct nf_ct_ext uses u8 to store offsets/size */
+   BUILD_BUG_ON(total_extension_size() > 255u);
+
seqcount_init(_conntrack_generation);
 
for (i = 0; i < CONNTRACK_LOCKS; i++)
-- 
2.1.4



[PATCH 34/53] netfilter: ipvs: fix incorrect conflict resolution

2017-05-01 Thread Pablo Neira Ayuso
From: Florian Westphal <f...@strlen.de>

The commit ab8bc7ed864b9c4f1fcb00a22bbe4e0f66ce8003
("netfilter: remove nf_ct_is_untracked")
changed the line
   if (ct && !nf_ct_is_untracked(ct) && nfct_nat(ct)) {
   to
   if (ct && nfct_nat(ct)) {

meanwhile, the commit 41390895e50bc4f28abe384c6b35ac27464a20ec
("netfilter: ipvs: don't check for presence of nat extension")
from ipvs-next had changed the same line to

  if (ct && !nf_ct_is_untracked(ct) && (ct->status & IPS_NAT_MASK)) {

When ipvs-next got merged into nf-next, the merge resolution took
the first version, dropping the conversion of nfct_nat().

While this doesn't cause a problem at the moment, it will once we stop
adding the nat extension by default.

Signed-off-by: Florian Westphal <f...@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
---
 net/netfilter/ipvs/ip_vs_ftp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index af3a9bbdf2ae..fb780be76d15 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -260,8 +260,9 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct 
ip_vs_conn *cp,
buf_len = strlen(buf);
 
ct = nf_ct_get(skb, );
-   if (ct && nfct_nat(ct)) {
+   if (ct && (ct->status & IPS_NAT_MASK)) {
bool mangled;
+
/* If mangling fails this function will return 0
 * which will cause the packet to be dropped.
 * Mangling can only fail under memory pressure,
-- 
2.1.4



[PATCH 37/53] ipvs: convert to use pernet nf_hook api

2017-05-01 Thread Pablo Neira Ayuso
From: Florian Westphal <f...@strlen.de>

nf_(un)register_hooks has to maintain an internal hook list to add/remove
those hooks from net namespaces as they are added/deleted.

ipvs already uses pernet_ops, so we can switch to the (more recent)
pernet hook api instead.

Compile tested only.

Signed-off-by: Florian Westphal <f...@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
---
 net/netfilter/ipvs/ip_vs_core.c | 19 +--
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index b4a746d0e39b..d2d7bdf1d510 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -2200,6 +2200,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
 static int __net_init __ip_vs_init(struct net *net)
 {
struct netns_ipvs *ipvs;
+   int ret;
 
ipvs = net_generic(net, ip_vs_net_id);
if (ipvs == NULL)
@@ -2231,11 +2232,17 @@ static int __net_init __ip_vs_init(struct net *net)
if (ip_vs_sync_net_init(ipvs) < 0)
goto sync_fail;
 
+   ret = nf_register_net_hooks(net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
+   if (ret < 0)
+   goto hook_fail;
+
return 0;
 /*
  * Error handling
  */
 
+hook_fail:
+   ip_vs_sync_net_cleanup(ipvs);
 sync_fail:
ip_vs_conn_net_cleanup(ipvs);
 conn_fail:
@@ -2255,6 +2262,7 @@ static void __net_exit __ip_vs_cleanup(struct net *net)
 {
struct netns_ipvs *ipvs = net_ipvs(net);
 
+   nf_unregister_net_hooks(net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
ip_vs_service_net_cleanup(ipvs);/* ip_vs_flush() with locks */
ip_vs_conn_net_cleanup(ipvs);
ip_vs_app_net_cleanup(ipvs);
@@ -2315,24 +2323,16 @@ static int __init ip_vs_init(void)
if (ret < 0)
goto cleanup_sub;
 
-   ret = nf_register_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
-   if (ret < 0) {
-   pr_err("can't register hooks.\n");
-   goto cleanup_dev;
-   }
-
ret = ip_vs_register_nl_ioctl();
if (ret < 0) {
pr_err("can't register netlink/ioctl.\n");
-   goto cleanup_hooks;
+   goto cleanup_dev;
}
 
pr_info("ipvs loaded.\n");
 
return ret;
 
-cleanup_hooks:
-   nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
 cleanup_dev:
unregister_pernet_device(_core_dev_ops);
 cleanup_sub:
@@ -2349,7 +2349,6 @@ static int __init ip_vs_init(void)
 static void __exit ip_vs_cleanup(void)
 {
ip_vs_unregister_nl_ioctl();
-   nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
unregister_pernet_device(_core_dev_ops);
unregister_pernet_subsys(_core_ops);   /* free ip_vs struct */
ip_vs_conn_cleanup();
-- 
2.1.4



[PATCH 35/53] netfilter: tcp: Use TCP_MAX_WSCALE instead of literal 14

2017-05-01 Thread Pablo Neira Ayuso
From: Gao Feng <f...@ikuai8.com>

The window scale may be enlarged from 14 to 15 according to the itef
draft https://tools.ietf.org/html/draft-nishida-tcpm-maxwin-03.

Use the macro TCP_MAX_WSCALE to support it easily with TCP stack in
the future.

Signed-off-by: Gao Feng <f...@ikuai8.com>
Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
---
 net/netfilter/nf_conntrack_proto_tcp.c | 7 +++
 net/netfilter/nf_synproxy_core.c   | 4 ++--
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/net/netfilter/nf_conntrack_proto_tcp.c 
b/net/netfilter/nf_conntrack_proto_tcp.c
index d0c0a31dfe74..d61a68759dea 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -419,10 +419,9 @@ static void tcp_options(const struct sk_buff *skb,
 && opsize == TCPOLEN_WINDOW) {
state->td_scale = *(u_int8_t *)ptr;
 
-   if (state->td_scale > 14) {
-   /* See RFC1323 */
-   state->td_scale = 14;
-   }
+   if (state->td_scale > TCP_MAX_WSCALE)
+   state->td_scale = TCP_MAX_WSCALE;
+
state->flags |=
IP_CT_TCP_FLAG_WINDOW_SCALE;
}
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index abe03e869f7b..a504e87c6ddf 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -66,8 +66,8 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned 
int doff,
case TCPOPT_WINDOW:
if (opsize == TCPOLEN_WINDOW) {
opts->wscale = *ptr;
-   if (opts->wscale > 14)
-   opts->wscale = 14;
+   if (opts->wscale > TCP_MAX_WSCALE)
+   opts->wscale = TCP_MAX_WSCALE;
opts->options |= XT_SYNPROXY_OPT_WSCALE;
}
break;
-- 
2.1.4



[PATCH 38/53] netfilter: decnet: only register hooks in init namespace

2017-05-01 Thread Pablo Neira Ayuso
From: Florian Westphal <f...@strlen.de>

looks like decnet isn't namespacified in first place, so restrict hook
registration to the initial namespace.

Prepares for eventual removal of legacy nf_register_hook() api.

Signed-off-by: Florian Westphal <f...@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
---
 net/decnet/netfilter/dn_rtmsg.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index 85f2fdc360c2..f44303a40105 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -134,7 +134,7 @@ static int __init dn_rtmsg_init(void)
return -ENOMEM;
}
 
-   rv = nf_register_hook(_ops);
+   rv = nf_register_net_hook(_net, _ops);
if (rv) {
netlink_kernel_release(dnrmg);
}
@@ -144,7 +144,7 @@ static int __init dn_rtmsg_init(void)
 
 static void __exit dn_rtmsg_fini(void)
 {
-   nf_unregister_hook(_ops);
+   nf_unregister_net_hook(_net, _ops);
netlink_kernel_release(dnrmg);
 }
 
-- 
2.1.4



[PATCH 36/53] netfilter: synproxy: only register hooks when needed

2017-05-01 Thread Pablo Neira Ayuso
From: Florian Westphal <f...@strlen.de>

Defer registration of the synproxy hooks until the first SYNPROXY rule is
added.  Also means we only register hooks in namespaces that need it.

Signed-off-by: Florian Westphal <f...@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
---
 include/net/netfilter/nf_conntrack_synproxy.h |  2 +
 net/ipv4/netfilter/ipt_SYNPROXY.c | 73 ++-
 net/ipv6/netfilter/ip6t_SYNPROXY.c| 73 ++-
 3 files changed, 80 insertions(+), 68 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack_synproxy.h 
b/include/net/netfilter/nf_conntrack_synproxy.h
index b0ca402c1f72..a2fcb5271726 100644
--- a/include/net/netfilter/nf_conntrack_synproxy.h
+++ b/include/net/netfilter/nf_conntrack_synproxy.h
@@ -52,6 +52,8 @@ struct synproxy_stats {
 struct synproxy_net {
struct nf_conn  *tmpl;
struct synproxy_stats __percpu  *stats;
+   unsigned inthook_ref4;
+   unsigned inthook_ref6;
 };
 
 extern unsigned int synproxy_net_id;
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c 
b/net/ipv4/netfilter/ipt_SYNPROXY.c
index 3240a2614e82..c308ee0ee0bc 100644
--- a/net/ipv4/netfilter/ipt_SYNPROXY.c
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -409,19 +409,56 @@ static unsigned int ipv4_synproxy_hook(void *priv,
return NF_ACCEPT;
 }
 
+static struct nf_hook_ops ipv4_synproxy_ops[] __read_mostly = {
+   {
+   .hook   = ipv4_synproxy_hook,
+   .pf = NFPROTO_IPV4,
+   .hooknum= NF_INET_LOCAL_IN,
+   .priority   = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
+   },
+   {
+   .hook   = ipv4_synproxy_hook,
+   .pf = NFPROTO_IPV4,
+   .hooknum= NF_INET_POST_ROUTING,
+   .priority   = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
+   },
+};
+
 static int synproxy_tg4_check(const struct xt_tgchk_param *par)
 {
+   struct synproxy_net *snet = synproxy_pernet(par->net);
const struct ipt_entry *e = par->entryinfo;
+   int err;
 
if (e->ip.proto != IPPROTO_TCP ||
e->ip.invflags & XT_INV_PROTO)
return -EINVAL;
 
-   return nf_ct_netns_get(par->net, par->family);
+   err = nf_ct_netns_get(par->net, par->family);
+   if (err)
+   return err;
+
+   if (snet->hook_ref4 == 0) {
+   err = nf_register_net_hooks(par->net, ipv4_synproxy_ops,
+   ARRAY_SIZE(ipv4_synproxy_ops));
+   if (err) {
+   nf_ct_netns_put(par->net, par->family);
+   return err;
+   }
+   }
+
+   snet->hook_ref4++;
+   return err;
 }
 
 static void synproxy_tg4_destroy(const struct xt_tgdtor_param *par)
 {
+   struct synproxy_net *snet = synproxy_pernet(par->net);
+
+   snet->hook_ref4--;
+   if (snet->hook_ref4 == 0)
+   nf_unregister_net_hooks(par->net, ipv4_synproxy_ops,
+   ARRAY_SIZE(ipv4_synproxy_ops));
nf_ct_netns_put(par->net, par->family);
 }
 
@@ -436,46 +473,14 @@ static struct xt_target synproxy_tg4_reg __read_mostly = {
.me = THIS_MODULE,
 };
 
-static struct nf_hook_ops ipv4_synproxy_ops[] __read_mostly = {
-   {
-   .hook   = ipv4_synproxy_hook,
-   .pf = NFPROTO_IPV4,
-   .hooknum= NF_INET_LOCAL_IN,
-   .priority   = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
-   },
-   {
-   .hook   = ipv4_synproxy_hook,
-   .pf = NFPROTO_IPV4,
-   .hooknum= NF_INET_POST_ROUTING,
-   .priority   = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
-   },
-};
-
 static int __init synproxy_tg4_init(void)
 {
-   int err;
-
-   err = nf_register_hooks(ipv4_synproxy_ops,
-   ARRAY_SIZE(ipv4_synproxy_ops));
-   if (err < 0)
-   goto err1;
-
-   err = xt_register_target(_tg4_reg);
-   if (err < 0)
-   goto err2;
-
-   return 0;
-
-err2:
-   nf_unregister_hooks(ipv4_synproxy_ops, ARRAY_SIZE(ipv4_synproxy_ops));
-err1:
-   return err;
+   return xt_register_target(_tg4_reg);
 }
 
 static void __exit synproxy_tg4_exit(void)
 {
xt_unregister_target(_tg4_reg);
-   nf_unregister_hooks(ipv4_synproxy_ops, ARRAY_SIZE(ipv4_synproxy_ops));
 }
 
 module_init(synproxy_tg4_init);
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c 
b/net/ipv6/netfilter/ip6t_SYNPROXY.c
index 4ef1ddd4bbbd..1252537f215f 100644
--- a/net/ipv6/netfilter/ip6t_SYNPROXY.c
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -430,20 +430,57 @@ static unsigned int ipv

[PATCH 23/53] netfilter: kill the fake untracked conntrack objects

2017-05-01 Thread Pablo Neira Ayuso
From: Florian Westphal <f...@strlen.de>

resurrect an old patch from Pablo Neira to remove the untracked objects.

Currently, there are four possible states of an skb wrt. conntrack.

1. No conntrack attached, ct is NULL.
2. Normal (kmem cache allocated) ct attached.
3. a template (kmalloc'd), not in any hash tables at any point in time
4. the 'untracked' conntrack, a percpu nf_conn object, tagged via
   IPS_UNTRACKED_BIT in ct->status.

Untracked is supposed to be identical to case 1.  It exists only
so users can check

-m conntrack --ctstate UNTRACKED vs.
-m conntrack --ctstate INVALID

e.g. attempts to set connmark on INVALID or UNTRACKED conntracks is
supposed to be a no-op.

Thus currently we need to check
 ct == NULL || nf_ct_is_untracked(ct)

in a lot of places in order to avoid altering untracked objects.

The other consequence of the percpu untracked object is that all
-j NOTRACK (and, later, kfree_skb of such skbs) result in an atomic op
(inc/dec the untracked conntracks refcount).

This adds a new kernel-private ctinfo state, IP_CT_UNTRACKED, to
make the distinction instead.

The (few) places that care about packet invalid (ct is NULL) vs.
packet untracked now need to test ct == NULL vs. ctinfo == IP_CT_UNTRACKED,
but all other places can omit the nf_ct_is_untracked() check.

Signed-off-by: Florian Westphal <f...@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
---
 include/net/ip_vs.h|  6 +--
 include/net/netfilter/nf_conntrack.h   | 10 +
 include/uapi/linux/netfilter/nf_conntrack_common.h |  6 ++-
 net/ipv4/netfilter/nf_dup_ipv4.c   |  3 +-
 net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c |  3 +-
 net/ipv6/netfilter/nf_dup_ipv6.c   |  3 +-
 net/netfilter/nf_conntrack_core.c  | 48 +++---
 net/netfilter/nf_nat_core.c|  3 --
 net/netfilter/nft_ct.c | 14 +++
 net/netfilter/xt_CT.c  | 16 
 net/netfilter/xt_conntrack.c   | 11 +++--
 net/netfilter/xt_state.c   | 13 +++---
 12 files changed, 39 insertions(+), 97 deletions(-)

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 8a4a57b887fb..9a75d9933e63 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -1556,12 +1556,8 @@ static inline void ip_vs_notrack(struct sk_buff *skb)
struct nf_conn *ct = nf_ct_get(skb, );
 
if (!ct || !nf_ct_is_untracked(ct)) {
-   struct nf_conn *untracked;
-
nf_conntrack_put(>ct_general);
-   untracked = nf_ct_untracked_get();
-   nf_conntrack_get(>ct_general);
-   nf_ct_set(skb, untracked, IP_CT_NEW);
+   nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
}
 #endif
 }
diff --git a/include/net/netfilter/nf_conntrack.h 
b/include/net/netfilter/nf_conntrack.h
index 19605878da47..012b99f563e5 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -243,14 +243,6 @@ extern s32 (*nf_ct_nat_offset)(const struct nf_conn *ct,
   enum ip_conntrack_dir dir,
   u32 seq);
 
-/* Fake conntrack entry for untracked connections */
-DECLARE_PER_CPU_ALIGNED(struct nf_conn, nf_conntrack_untracked);
-static inline struct nf_conn *nf_ct_untracked_get(void)
-{
-   return raw_cpu_ptr(_conntrack_untracked);
-}
-void nf_ct_untracked_status_or(unsigned long bits);
-
 /* Iterate over all conntracks: if iter returns true, it's deleted. */
 void nf_ct_iterate_cleanup(struct net *net,
   int (*iter)(struct nf_conn *i, void *data),
@@ -283,7 +275,7 @@ static inline int nf_ct_is_dying(const struct nf_conn *ct)
 
 static inline int nf_ct_is_untracked(const struct nf_conn *ct)
 {
-   return test_bit(IPS_UNTRACKED_BIT, >status);
+   return false;
 }
 
 /* Packet is received from loopback */
diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h 
b/include/uapi/linux/netfilter/nf_conntrack_common.h
index 6a8e33dd4ecb..b4a0a1940118 100644
--- a/include/uapi/linux/netfilter/nf_conntrack_common.h
+++ b/include/uapi/linux/netfilter/nf_conntrack_common.h
@@ -28,12 +28,14 @@ enum ip_conntrack_info {
/* only for userspace compatibility */
 #ifndef __KERNEL__
IP_CT_NEW_REPLY = IP_CT_NUMBER,
+#else
+   IP_CT_UNTRACKED = 7,
 #endif
 };
 
 #define NF_CT_STATE_INVALID_BIT(1 << 0)
 #define NF_CT_STATE_BIT(ctinfo)(1 << ((ctinfo) % 
IP_CT_IS_REPLY + 1))
-#define NF_CT_STATE_UNTRACKED_BIT  (1 << (IP_CT_NUMBER + 1))
+#define NF_CT_STATE_UNTRACKED_BIT  (1 << (IP_CT_UNTRACKED + 1))
 
 /* Bitset representing status of connection. */
 enum ip_conntrack_status {
@@ -94,7 +96,7 @@ enum ip_conntrack_status {
IPS_TEMPLATE_B

[PATCH 39/53] ebtables: remove nf_hook_register usage

2017-05-01 Thread Pablo Neira Ayuso
From: Florian Westphal <f...@strlen.de>

Similar to ip_register_table, pass nf_hook_ops to ebt_register_table().
This allows to handle hook registration also via pernet_ops and allows
us to avoid use of legacy register_hook api.

Signed-off-by: Florian Westphal <f...@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
---
 include/linux/netfilter_bridge/ebtables.h |  6 ++-
 net/bridge/netfilter/ebtable_broute.c |  4 +-
 net/bridge/netfilter/ebtable_filter.c | 15 ++--
 net/bridge/netfilter/ebtable_nat.c| 15 ++--
 net/bridge/netfilter/ebtables.c   | 61 +++
 5 files changed, 50 insertions(+), 51 deletions(-)

diff --git a/include/linux/netfilter_bridge/ebtables.h 
b/include/linux/netfilter_bridge/ebtables.h
index 984b2112c77b..a30efb437e6d 100644
--- a/include/linux/netfilter_bridge/ebtables.h
+++ b/include/linux/netfilter_bridge/ebtables.h
@@ -109,8 +109,10 @@ struct ebt_table {
 #define EBT_ALIGN(s) (((s) + (__alignof__(struct _xt_align)-1)) & \
 ~(__alignof__(struct _xt_align)-1))
 extern struct ebt_table *ebt_register_table(struct net *net,
-   const struct ebt_table *table);
-extern void ebt_unregister_table(struct net *net, struct ebt_table *table);
+   const struct ebt_table *table,
+   const struct nf_hook_ops *);
+extern void ebt_unregister_table(struct net *net, struct ebt_table *table,
+const struct nf_hook_ops *);
 extern unsigned int ebt_do_table(struct sk_buff *skb,
 const struct nf_hook_state *state,
 struct ebt_table *table);
diff --git a/net/bridge/netfilter/ebtable_broute.c 
b/net/bridge/netfilter/ebtable_broute.c
index 8fe36dc3aab2..2585b100ebbb 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -65,13 +65,13 @@ static int ebt_broute(struct sk_buff *skb)
 
 static int __net_init broute_net_init(struct net *net)
 {
-   net->xt.broute_table = ebt_register_table(net, _table);
+   net->xt.broute_table = ebt_register_table(net, _table, NULL);
return PTR_ERR_OR_ZERO(net->xt.broute_table);
 }
 
 static void __net_exit broute_net_exit(struct net *net)
 {
-   ebt_unregister_table(net, net->xt.broute_table);
+   ebt_unregister_table(net, net->xt.broute_table, NULL);
 }
 
 static struct pernet_operations broute_net_ops = {
diff --git a/net/bridge/netfilter/ebtable_filter.c 
b/net/bridge/netfilter/ebtable_filter.c
index 593a1bdc079e..f22ef7c21913 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -93,13 +93,13 @@ static struct nf_hook_ops ebt_ops_filter[] __read_mostly = {
 
 static int __net_init frame_filter_net_init(struct net *net)
 {
-   net->xt.frame_filter = ebt_register_table(net, _filter);
+   net->xt.frame_filter = ebt_register_table(net, _filter, 
ebt_ops_filter);
return PTR_ERR_OR_ZERO(net->xt.frame_filter);
 }
 
 static void __net_exit frame_filter_net_exit(struct net *net)
 {
-   ebt_unregister_table(net, net->xt.frame_filter);
+   ebt_unregister_table(net, net->xt.frame_filter, ebt_ops_filter);
 }
 
 static struct pernet_operations frame_filter_net_ops = {
@@ -109,20 +109,11 @@ static struct pernet_operations frame_filter_net_ops = {
 
 static int __init ebtable_filter_init(void)
 {
-   int ret;
-
-   ret = register_pernet_subsys(_filter_net_ops);
-   if (ret < 0)
-   return ret;
-   ret = nf_register_hooks(ebt_ops_filter, ARRAY_SIZE(ebt_ops_filter));
-   if (ret < 0)
-   unregister_pernet_subsys(_filter_net_ops);
-   return ret;
+   return register_pernet_subsys(_filter_net_ops);
 }
 
 static void __exit ebtable_filter_fini(void)
 {
-   nf_unregister_hooks(ebt_ops_filter, ARRAY_SIZE(ebt_ops_filter));
unregister_pernet_subsys(_filter_net_ops);
 }
 
diff --git a/net/bridge/netfilter/ebtable_nat.c 
b/net/bridge/netfilter/ebtable_nat.c
index eb33919821ee..2f7a4f314406 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -93,13 +93,13 @@ static struct nf_hook_ops ebt_ops_nat[] __read_mostly = {
 
 static int __net_init frame_nat_net_init(struct net *net)
 {
-   net->xt.frame_nat = ebt_register_table(net, _nat);
+   net->xt.frame_nat = ebt_register_table(net, _nat, ebt_ops_nat);
return PTR_ERR_OR_ZERO(net->xt.frame_nat);
 }
 
 static void __net_exit frame_nat_net_exit(struct net *net)
 {
-   ebt_unregister_table(net, net->xt.frame_nat);
+   ebt_unregister_table(net, net->xt.frame_nat, ebt_ops_nat);
 }
 
 static struct pernet_operations frame_nat_net_ops = {
@@ -109,20 +109,11 @@ static struct pernet_operations frame_nat_net_ops = {
 
 s

[PATCH 43/53] netfilter: conntrack: handle initial extension alloc via krealloc

2017-05-01 Thread Pablo Neira Ayuso
From: Florian Westphal <f...@strlen.de>

krealloc(NULL, ..) is same as kmalloc(), so we can avoid special-casing
the initial allocation after the prealloc removal (we had to use
->alloc_len as the initial allocation size).

This also means we do not zero the preallocated memory anymore; only
offsets[].  Existing code makes sure the new (used) extension space gets
zeroed out.

Signed-off-by: Florian Westphal <f...@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
---
 net/netfilter/nf_conntrack_extend.c | 51 +++--
 1 file changed, 15 insertions(+), 36 deletions(-)

diff --git a/net/netfilter/nf_conntrack_extend.c 
b/net/netfilter/nf_conntrack_extend.c
index 5c66816eb965..68ae1be08ed8 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -44,49 +44,24 @@ void __nf_ct_ext_destroy(struct nf_conn *ct)
 }
 EXPORT_SYMBOL(__nf_ct_ext_destroy);
 
-static void *
-nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id id, gfp_t gfp)
-{
-   unsigned int off, len, alloc;
-   struct nf_ct_ext_type *t;
-
-   rcu_read_lock();
-   t = rcu_dereference(nf_ct_ext_types[id]);
-   if (!t) {
-   rcu_read_unlock();
-   return NULL;
-   }
-
-   off = ALIGN(sizeof(struct nf_ct_ext), t->align);
-   len = off + t->len;
-   rcu_read_unlock();
-
-   alloc = max(len, NF_CT_EXT_PREALLOC);
-   *ext = kzalloc(alloc, gfp);
-   if (!*ext)
-   return NULL;
-
-   (*ext)->offset[id] = off;
-   (*ext)->len = len;
-
-   return (void *)(*ext) + off;
-}
-
 void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp)
 {
+   unsigned int newlen, newoff, oldlen, alloc;
struct nf_ct_ext *old, *new;
-   int newlen, newoff;
struct nf_ct_ext_type *t;
 
/* Conntrack must not be confirmed to avoid races on reallocation. */
NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
 
old = ct->ext;
-   if (!old)
-   return nf_ct_ext_create(>ext, id, gfp);
 
-   if (__nf_ct_ext_exist(old, id))
-   return NULL;
+   if (old) {
+   if (__nf_ct_ext_exist(old, id))
+   return NULL;
+   oldlen = old->len;
+   } else {
+   oldlen = sizeof(*new);
+   }
 
rcu_read_lock();
t = rcu_dereference(nf_ct_ext_types[id]);
@@ -95,15 +70,19 @@ void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id 
id, gfp_t gfp)
return NULL;
}
 
-   newoff = ALIGN(old->len, t->align);
+   newoff = ALIGN(oldlen, t->align);
newlen = newoff + t->len;
rcu_read_unlock();
 
-   new = __krealloc(old, newlen, gfp);
+   alloc = max(newlen, NF_CT_EXT_PREALLOC);
+   new = __krealloc(old, alloc, gfp);
if (!new)
return NULL;
 
-   if (new != old) {
+   if (!old) {
+   memset(new->offset, 0, sizeof(new->offset));
+   ct->ext = new;
+   } else if (new != old) {
kfree_rcu(old, rcu);
rcu_assign_pointer(ct->ext, new);
}
-- 
2.1.4



[PATCH 42/53] netfilter: conntrack: mark extension structs as const

2017-05-01 Thread Pablo Neira Ayuso
From: Florian Westphal <f...@strlen.de>

Signed-off-by: Florian Westphal <f...@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
---
 include/net/netfilter/nf_conntrack_extend.h | 4 ++--
 net/netfilter/nf_conntrack_acct.c   | 2 +-
 net/netfilter/nf_conntrack_ecache.c | 2 +-
 net/netfilter/nf_conntrack_extend.c | 4 ++--
 net/netfilter/nf_conntrack_helper.c | 2 +-
 net/netfilter/nf_conntrack_labels.c | 2 +-
 net/netfilter/nf_conntrack_seqadj.c | 2 +-
 net/netfilter/nf_conntrack_timeout.c| 2 +-
 net/netfilter/nf_conntrack_timestamp.c  | 2 +-
 9 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack_extend.h 
b/include/net/netfilter/nf_conntrack_extend.h
index dd776bf9e2fa..b01f73fb4dcb 100644
--- a/include/net/netfilter/nf_conntrack_extend.h
+++ b/include/net/netfilter/nf_conntrack_extend.h
@@ -99,6 +99,6 @@ struct nf_ct_ext_type {
u8 align;
 };
 
-int nf_ct_extend_register(struct nf_ct_ext_type *type);
-void nf_ct_extend_unregister(struct nf_ct_ext_type *type);
+int nf_ct_extend_register(const struct nf_ct_ext_type *type);
+void nf_ct_extend_unregister(const struct nf_ct_ext_type *type);
 #endif /* _NF_CONNTRACK_EXTEND_H */
diff --git a/net/netfilter/nf_conntrack_acct.c 
b/net/netfilter/nf_conntrack_acct.c
index 45da11afa785..866916712905 100644
--- a/net/netfilter/nf_conntrack_acct.c
+++ b/net/netfilter/nf_conntrack_acct.c
@@ -55,7 +55,7 @@ seq_print_acct(struct seq_file *s, const struct nf_conn *ct, 
int dir)
 };
 EXPORT_SYMBOL_GPL(seq_print_acct);
 
-static struct nf_ct_ext_type acct_extend __read_mostly = {
+static const struct nf_ct_ext_type acct_extend = {
.len= sizeof(struct nf_conn_acct),
.align  = __alignof__(struct nf_conn_acct),
.id = NF_CT_EXT_ACCT,
diff --git a/net/netfilter/nf_conntrack_ecache.c 
b/net/netfilter/nf_conntrack_ecache.c
index 515212948125..caac41ad9483 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -347,7 +347,7 @@ static struct ctl_table event_sysctl_table[] = {
 };
 #endif /* CONFIG_SYSCTL */
 
-static struct nf_ct_ext_type event_extend __read_mostly = {
+static const struct nf_ct_ext_type event_extend = {
.len= sizeof(struct nf_conntrack_ecache),
.align  = __alignof__(struct nf_conntrack_ecache),
.id = NF_CT_EXT_ECACHE,
diff --git a/net/netfilter/nf_conntrack_extend.c 
b/net/netfilter/nf_conntrack_extend.c
index 2e4b41bc67a0..5c66816eb965 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -116,7 +116,7 @@ void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id 
id, gfp_t gfp)
 EXPORT_SYMBOL(nf_ct_ext_add);
 
 /* This MUST be called in process context. */
-int nf_ct_extend_register(struct nf_ct_ext_type *type)
+int nf_ct_extend_register(const struct nf_ct_ext_type *type)
 {
int ret = 0;
 
@@ -134,7 +134,7 @@ int nf_ct_extend_register(struct nf_ct_ext_type *type)
 EXPORT_SYMBOL_GPL(nf_ct_extend_register);
 
 /* This MUST be called in process context. */
-void nf_ct_extend_unregister(struct nf_ct_ext_type *type)
+void nf_ct_extend_unregister(const struct nf_ct_ext_type *type)
 {
mutex_lock(_ct_ext_type_mutex);
RCU_INIT_POINTER(nf_ct_ext_types[type->id], NULL);
diff --git a/net/netfilter/nf_conntrack_helper.c 
b/net/netfilter/nf_conntrack_helper.c
index 8239b4406f56..a57a52f173f7 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -535,7 +535,7 @@ void nf_conntrack_helpers_unregister(struct 
nf_conntrack_helper *helper,
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_helpers_unregister);
 
-static struct nf_ct_ext_type helper_extend __read_mostly = {
+static const struct nf_ct_ext_type helper_extend = {
.len= sizeof(struct nf_conn_help),
.align  = __alignof__(struct nf_conn_help),
.id = NF_CT_EXT_HELPER,
diff --git a/net/netfilter/nf_conntrack_labels.c 
b/net/netfilter/nf_conntrack_labels.c
index bcab8bde7312..adf219859901 100644
--- a/net/netfilter/nf_conntrack_labels.c
+++ b/net/netfilter/nf_conntrack_labels.c
@@ -82,7 +82,7 @@ void nf_connlabels_put(struct net *net)
 }
 EXPORT_SYMBOL_GPL(nf_connlabels_put);
 
-static struct nf_ct_ext_type labels_extend __read_mostly = {
+static const struct nf_ct_ext_type labels_extend = {
.len= sizeof(struct nf_conn_labels),
.align  = __alignof__(struct nf_conn_labels),
.id = NF_CT_EXT_LABELS,
diff --git a/net/netfilter/nf_conntrack_seqadj.c 
b/net/netfilter/nf_conntrack_seqadj.c
index ef7063eced7c..a975efd6b8c3 100644
--- a/net/netfilter/nf_conntrack_seqadj.c
+++ b/net/netfilter/nf_conntrack_seqadj.c
@@ -231,7 +231,7 @@ s32 nf_ct_seq_offset(const struct nf_conn *ct,
 }
 EXPORT_SYMBOL_GPL(nf_ct_seq_offset);
 
-static struct nf_ct_ext_type nf_ct_seqadj_extend __read_mostly = {
+static const struct nf_ct_ext_type nf_ct_seqadj_ex

[PATCH 41/53] netfilter: conntrack: remove prealloc support

2017-05-01 Thread Pablo Neira Ayuso
From: Florian Westphal <f...@strlen.de>

It was used by the nat extension, but since commit
7c9664351980 ("netfilter: move nat hlist_head to nf_conn") its only needed
for connections that use MASQUERADE target or a nat helper.

Also it seems a lot easier to preallocate a fixed size instead.

With default settings, conntrack first adds ecache extension (sysctl
defaults to 1), so we get 40(ct extension header) + 24 (ecache) == 64 byte
on x86_64 for initial allocation.

Followup patches can constify the extension structs and avoid
the initial zeroing of the entire extension area.

Signed-off-by: Florian Westphal <f...@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
---
 include/net/netfilter/nf_conntrack_extend.h |  6 
 net/netfilter/nf_conntrack_extend.c | 49 +++--
 net/netfilter/nf_nat_core.c |  1 -
 3 files changed, 4 insertions(+), 52 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack_extend.h 
b/include/net/netfilter/nf_conntrack_extend.h
index 5fc908dc9f32..dd776bf9e2fa 100644
--- a/include/net/netfilter/nf_conntrack_extend.h
+++ b/include/net/netfilter/nf_conntrack_extend.h
@@ -88,21 +88,15 @@ static inline void nf_ct_ext_free(struct nf_conn *ct)
 /* Add this type, returns pointer to data or NULL. */
 void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp);
 
-#define NF_CT_EXT_F_PREALLOC   0x0001
-
 struct nf_ct_ext_type {
/* Destroys relationships (can be NULL). */
void (*destroy)(struct nf_conn *ct);
 
enum nf_ct_ext_id id;
 
-   unsigned int flags;
-
/* Length and min alignment. */
u8 len;
u8 align;
-   /* initial size of nf_ct_ext. */
-   u8 alloc_size;
 };
 
 int nf_ct_extend_register(struct nf_ct_ext_type *type);
diff --git a/net/netfilter/nf_conntrack_extend.c 
b/net/netfilter/nf_conntrack_extend.c
index b5879a9c748d..2e4b41bc67a0 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -18,6 +18,7 @@
 
 static struct nf_ct_ext_type __rcu *nf_ct_ext_types[NF_CT_EXT_NUM];
 static DEFINE_MUTEX(nf_ct_ext_type_mutex);
+#define NF_CT_EXT_PREALLOC 128u /* conntrack events are on by default */
 
 void __nf_ct_ext_destroy(struct nf_conn *ct)
 {
@@ -46,9 +47,8 @@ EXPORT_SYMBOL(__nf_ct_ext_destroy);
 static void *
 nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id id, gfp_t gfp)
 {
-   unsigned int off, len;
+   unsigned int off, len, alloc;
struct nf_ct_ext_type *t;
-   size_t alloc_size;
 
rcu_read_lock();
t = rcu_dereference(nf_ct_ext_types[id]);
@@ -59,10 +59,10 @@ nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id 
id, gfp_t gfp)
 
off = ALIGN(sizeof(struct nf_ct_ext), t->align);
len = off + t->len;
-   alloc_size = t->alloc_size;
rcu_read_unlock();
 
-   *ext = kzalloc(alloc_size, gfp);
+   alloc = max(len, NF_CT_EXT_PREALLOC);
+   *ext = kzalloc(alloc, gfp);
if (!*ext)
return NULL;
 
@@ -115,41 +115,6 @@ void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id 
id, gfp_t gfp)
 }
 EXPORT_SYMBOL(nf_ct_ext_add);
 
-static void update_alloc_size(struct nf_ct_ext_type *type)
-{
-   int i, j;
-   struct nf_ct_ext_type *t1, *t2;
-   enum nf_ct_ext_id min = 0, max = NF_CT_EXT_NUM - 1;
-
-   /* unnecessary to update all types */
-   if ((type->flags & NF_CT_EXT_F_PREALLOC) == 0) {
-   min = type->id;
-   max = type->id;
-   }
-
-   /* This assumes that extended areas in conntrack for the types
-  whose NF_CT_EXT_F_PREALLOC bit set are allocated in order */
-   for (i = min; i <= max; i++) {
-   t1 = rcu_dereference_protected(nf_ct_ext_types[i],
-   lockdep_is_held(_ct_ext_type_mutex));
-   if (!t1)
-   continue;
-
-   t1->alloc_size = ALIGN(sizeof(struct nf_ct_ext), t1->align) +
-t1->len;
-   for (j = 0; j < NF_CT_EXT_NUM; j++) {
-   t2 = rcu_dereference_protected(nf_ct_ext_types[j],
-   lockdep_is_held(_ct_ext_type_mutex));
-   if (t2 == NULL || t2 == t1 ||
-   (t2->flags & NF_CT_EXT_F_PREALLOC) == 0)
-   continue;
-
-   t1->alloc_size = ALIGN(t1->alloc_size, t2->align)
-+ t2->len;
-   }
-   }
-}
-
 /* This MUST be called in process context. */
 int nf_ct_extend_register(struct nf_ct_ext_type *type)
 {
@@ -161,12 +126,7 @@ int nf_ct_extend_register(struct nf_ct_ext_type *type)
goto out;
}
 
-   /* This ensures that nf_ct_ext_create() can allocate enough area
-  before updating 

[PATCH 53/53] netfilter: nf_ct_ext: invoke destroy even when ext is not attached

2017-05-01 Thread Pablo Neira Ayuso
From: Liping Zhang <zlpnob...@gmail.com>

For NF_NAT_MANIP_SRC, we will insert the ct to the nat_bysource_table,
then remove it from the nat_bysource_table via nat_extend->destroy.

But now, the nat extension is attached on demand, so if the nat extension
is not attached, we will not be notified when the ct is destroyed, i.e.
we may fail to remove ct from the nat_bysource_table.

So just keep it simple, even if the extension is not attached, we will
still invoke the related ext->destroy. And this will also preserve the
flexibility for the future extension.

Fixes: 9a08ecfe74d7 ("netfilter: don't attach a nat extension by default")
Signed-off-by: Liping Zhang <zlpnob...@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
---
 include/net/netfilter/nf_conntrack_extend.h | 7 +--
 net/netfilter/nf_conntrack_extend.c | 8 ++--
 2 files changed, 3 insertions(+), 12 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack_extend.h 
b/include/net/netfilter/nf_conntrack_extend.h
index b01f73fb4dcb..4944bc9153cf 100644
--- a/include/net/netfilter/nf_conntrack_extend.h
+++ b/include/net/netfilter/nf_conntrack_extend.h
@@ -69,12 +69,7 @@ static inline void *__nf_ct_ext_find(const struct nf_conn 
*ct, u8 id)
((id##_TYPE *)__nf_ct_ext_find((ext), (id)))
 
 /* Destroy all relationships */
-void __nf_ct_ext_destroy(struct nf_conn *ct);
-static inline void nf_ct_ext_destroy(struct nf_conn *ct)
-{
-   if (ct->ext)
-   __nf_ct_ext_destroy(ct);
-}
+void nf_ct_ext_destroy(struct nf_conn *ct);
 
 /* Free operation. If you want to free a object referred from private area,
  * please implement __nf_ct_ext_free() and call it.
diff --git a/net/netfilter/nf_conntrack_extend.c 
b/net/netfilter/nf_conntrack_extend.c
index 68ae1be08ed8..6c605e88ebae 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -20,16 +20,12 @@ static struct nf_ct_ext_type __rcu 
*nf_ct_ext_types[NF_CT_EXT_NUM];
 static DEFINE_MUTEX(nf_ct_ext_type_mutex);
 #define NF_CT_EXT_PREALLOC 128u /* conntrack events are on by default */
 
-void __nf_ct_ext_destroy(struct nf_conn *ct)
+void nf_ct_ext_destroy(struct nf_conn *ct)
 {
unsigned int i;
struct nf_ct_ext_type *t;
-   struct nf_ct_ext *ext = ct->ext;
 
for (i = 0; i < NF_CT_EXT_NUM; i++) {
-   if (!__nf_ct_ext_exist(ext, i))
-   continue;
-
rcu_read_lock();
t = rcu_dereference(nf_ct_ext_types[i]);
 
@@ -42,7 +38,7 @@ void __nf_ct_ext_destroy(struct nf_conn *ct)
rcu_read_unlock();
}
 }
-EXPORT_SYMBOL(__nf_ct_ext_destroy);
+EXPORT_SYMBOL(nf_ct_ext_destroy);
 
 void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp)
 {
-- 
2.1.4



[PATCH 45/53] netfilter: pptp: attach nat extension when needed

2017-05-01 Thread Pablo Neira Ayuso
From: Florian Westphal <f...@strlen.de>

make sure nat extension gets added if the master conntrack is subject to
NAT.  This will be required once the nat core stops adding it by default.

Signed-off-by: Florian Westphal <f...@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
---
 net/ipv4/netfilter/nf_nat_pptp.c  | 25 +
 net/netfilter/nf_conntrack_pptp.c | 12 ++--
 2 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index 211fee5fe59d..8a69363b4884 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -49,9 +49,14 @@ static void pptp_nat_expected(struct nf_conn *ct,
const struct nf_ct_pptp_master *ct_pptp_info;
const struct nf_nat_pptp *nat_pptp_info;
struct nf_nat_range range;
+   struct nf_conn_nat *nat;
 
+   nat = nf_ct_nat_ext_add(ct);
+   if (WARN_ON_ONCE(!nat))
+   return;
+
+   nat_pptp_info = >help.nat_pptp_info;
ct_pptp_info = nfct_help_data(master);
-   nat_pptp_info = _nat(master)->help.nat_pptp_info;
 
/* And here goes the grand finale of corrosion... */
if (exp->dir == IP_CT_DIR_ORIGINAL) {
@@ -120,13 +125,17 @@ pptp_outbound_pkt(struct sk_buff *skb,
 
 {
struct nf_ct_pptp_master *ct_pptp_info;
+   struct nf_conn_nat *nat = nfct_nat(ct);
struct nf_nat_pptp *nat_pptp_info;
u_int16_t msg;
__be16 new_callid;
unsigned int cid_off;
 
+   if (WARN_ON_ONCE(!nat))
+   return NF_DROP;
+
+   nat_pptp_info = >help.nat_pptp_info;
ct_pptp_info = nfct_help_data(ct);
-   nat_pptp_info = _nat(ct)->help.nat_pptp_info;
 
new_callid = ct_pptp_info->pns_call_id;
 
@@ -191,11 +200,15 @@ pptp_exp_gre(struct nf_conntrack_expect *expect_orig,
 struct nf_conntrack_expect *expect_reply)
 {
const struct nf_conn *ct = expect_orig->master;
+   struct nf_conn_nat *nat = nfct_nat(ct);
struct nf_ct_pptp_master *ct_pptp_info;
struct nf_nat_pptp *nat_pptp_info;
 
+   if (WARN_ON_ONCE(!nat))
+   return;
+
+   nat_pptp_info = >help.nat_pptp_info;
ct_pptp_info = nfct_help_data(ct);
-   nat_pptp_info = _nat(ct)->help.nat_pptp_info;
 
/* save original PAC call ID in nat_info */
nat_pptp_info->pac_call_id = ct_pptp_info->pac_call_id;
@@ -223,11 +236,15 @@ pptp_inbound_pkt(struct sk_buff *skb,
 union pptp_ctrl_union *pptpReq)
 {
const struct nf_nat_pptp *nat_pptp_info;
+   struct nf_conn_nat *nat = nfct_nat(ct);
u_int16_t msg;
__be16 new_pcid;
unsigned int pcid_off;
 
-   nat_pptp_info = _nat(ct)->help.nat_pptp_info;
+   if (WARN_ON_ONCE(!nat))
+   return NF_DROP;
+
+   nat_pptp_info = >help.nat_pptp_info;
new_pcid = nat_pptp_info->pns_call_id;
 
switch (msg = ntohs(ctlh->messageType)) {
diff --git a/net/netfilter/nf_conntrack_pptp.c 
b/net/netfilter/nf_conntrack_pptp.c
index 126031909fc7..6959e93063d4 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -263,7 +263,7 @@ static int exp_gre(struct nf_conn *ct, __be16 callid, 
__be16 peer_callid)
goto out_put_both;
 }
 
-static inline int
+static int
 pptp_inbound_pkt(struct sk_buff *skb, unsigned int protoff,
 struct PptpControlHeader *ctlh,
 union pptp_ctrl_union *pptpReq,
@@ -391,7 +391,7 @@ pptp_inbound_pkt(struct sk_buff *skb, unsigned int protoff,
return NF_ACCEPT;
 }
 
-static inline int
+static int
 pptp_outbound_pkt(struct sk_buff *skb, unsigned int protoff,
  struct PptpControlHeader *ctlh,
  union pptp_ctrl_union *pptpReq,
@@ -523,6 +523,14 @@ conntrack_pptp_help(struct sk_buff *skb, unsigned int 
protoff,
int ret;
u_int16_t msg;
 
+#if IS_ENABLED(CONFIG_NF_NAT)
+   if (!nf_ct_is_confirmed(ct) && (ct->status & IPS_NAT_MASK)) {
+   struct nf_conn_nat *nat = nf_ct_ext_find(ct, NF_CT_EXT_NAT);
+
+   if (!nat && !nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC))
+   return NF_DROP;
+   }
+#endif
/* don't do any tracking before tcp handshake complete */
if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY)
return NF_ACCEPT;
-- 
2.1.4



[PATCH 52/53] netfilter: snmp: avoid stack size warning

2017-05-01 Thread Pablo Neira Ayuso
From: Florian Westphal <f...@strlen.de>

net/ipv4/netfilter/nf_nat_snmp_basic.c:1158:1: warning: the frame size
of 1160 bytes is larger than 1024 bytes

Signed-off-by: Florian Westphal <f...@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
---
 net/ipv4/netfilter/nf_nat_snmp_basic.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c 
b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index da04b9c33ef3..d5b1e0b3f687 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -827,8 +827,8 @@ static unsigned char snmp_object_decode(struct asn1_ctx 
*ctx,
return 1;
 }
 
-static unsigned char snmp_request_decode(struct asn1_ctx *ctx,
-struct snmp_request *request)
+static unsigned char noinline_for_stack
+snmp_request_decode(struct asn1_ctx *ctx, struct snmp_request *request)
 {
unsigned int cls, con, tag;
unsigned char *end;
@@ -920,10 +920,10 @@ static inline void mangle_address(unsigned char *begin,
}
 }
 
-static unsigned char snmp_trap_decode(struct asn1_ctx *ctx,
- struct snmp_v1_trap *trap,
- const struct oct1_map *map,
- __sum16 *check)
+static unsigned char noinline_for_stack
+snmp_trap_decode(struct asn1_ctx *ctx, struct snmp_v1_trap *trap,
+const struct oct1_map *map,
+__sum16 *check)
 {
unsigned int cls, con, tag, len;
unsigned char *end;
-- 
2.1.4



[PATCH 46/53] netfilter: don't attach a nat extension by default

2017-05-01 Thread Pablo Neira Ayuso
From: Florian Westphal <f...@strlen.de>

nowadays the NAT extension only stores the interface index
(used to purge connections that got masqueraded when interface goes down)
and pptp nat information.

Previous patches moved nf_ct_nat_ext_add to those places that need it.

Signed-off-by: Florian Westphal <f...@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
---
 include/net/netfilter/nf_nat.h   | 2 +-
 net/ipv4/netfilter/nf_nat_l3proto_ipv4.c | 4 +---
 net/ipv6/netfilter/nf_nat_l3proto_ipv6.c | 4 +---
 net/netfilter/nf_nat_core.c  | 6 --
 4 files changed, 3 insertions(+), 13 deletions(-)

diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
index c327a431a6f3..05c82a1a4267 100644
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -67,7 +67,7 @@ static inline bool nf_nat_oif_changed(unsigned int hooknum,
 {
 #if IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV4) || \
 IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV6)
-   return nat->masq_index && hooknum == NF_INET_POST_ROUTING &&
+   return nat && nat->masq_index && hooknum == NF_INET_POST_ROUTING &&
   CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL &&
   nat->masq_index != out->ifindex;
 #else
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c 
b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
index e3bfa6a169f0..feedd759ca80 100644
--- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
@@ -264,9 +264,7 @@ nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
if (!ct)
return NF_ACCEPT;
 
-   nat = nf_ct_nat_ext_add(ct);
-   if (nat == NULL)
-   return NF_ACCEPT;
+   nat = nfct_nat(ct);
 
switch (ctinfo) {
case IP_CT_RELATED:
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c 
b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
index 922b5aef273c..bf3ad3e7b647 100644
--- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -273,9 +273,7 @@ nf_nat_ipv6_fn(void *priv, struct sk_buff *skb,
if (!ct)
return NF_ACCEPT;
 
-   nat = nf_ct_nat_ext_add(ct);
-   if (nat == NULL)
-   return NF_ACCEPT;
+   nat = nfct_nat(ct);
 
switch (ctinfo) {
case IP_CT_RELATED:
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 86eeacbb4793..ec9e6d8101b9 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -408,12 +408,6 @@ nf_nat_setup_info(struct nf_conn *ct,
  enum nf_nat_manip_type maniptype)
 {
struct nf_conntrack_tuple curr_tuple, new_tuple;
-   struct nf_conn_nat *nat;
-
-   /* nat helper or nfctnetlink also setup binding */
-   nat = nf_ct_nat_ext_add(ct);
-   if (nat == NULL)
-   return NF_ACCEPT;
 
NF_CT_ASSERT(maniptype == NF_NAT_MANIP_SRC ||
 maniptype == NF_NAT_MANIP_DST);
-- 
2.1.4



[PATCH 51/53] netfilter: nf_queue: only call synchronize_net twice if nf_queue is active

2017-05-01 Thread Pablo Neira Ayuso
From: Florian Westphal <f...@strlen.de>

nf_unregister_net_hook(s) can avoid a second call to synchronize_net,
provided there is no nfqueue active in that net namespace (which is
the common case).

This also gets rid of the extra arg to nf_queue_nf_hook_drop(), normally
this gets called during netns cleanup so no packets should be queued.

For the rare case of base chain being unregistered or module removal
while nfqueue is in use the extra hiccup due to the packet drops isn't
a big deal.

Signed-off-by: Florian Westphal <f...@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
---
 include/net/netfilter/nf_queue.h |  3 +--
 net/netfilter/core.c | 21 -
 net/netfilter/nf_internals.h |  2 +-
 net/netfilter/nf_queue.c |  7 +--
 net/netfilter/nfnetlink_queue.c  | 18 --
 5 files changed, 27 insertions(+), 24 deletions(-)

diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h
index 09948d10e38e..4454719ff849 100644
--- a/include/net/netfilter/nf_queue.h
+++ b/include/net/netfilter/nf_queue.h
@@ -24,8 +24,7 @@ struct nf_queue_entry {
 struct nf_queue_handler {
int (*outfn)(struct nf_queue_entry *entry,
 unsigned int queuenum);
-   void(*nf_hook_drop)(struct net *net,
-   const struct nf_hook_entry *hooks);
+   unsigned int(*nf_hook_drop)(struct net *net);
 };
 
 void nf_register_queue_handler(struct net *net, const struct nf_queue_handler 
*qh);
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index b5d908851cc8..552d606e57ca 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -162,14 +162,17 @@ __nf_unregister_net_hook(struct net *net, const struct 
nf_hook_ops *reg)
 void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
 {
struct nf_hook_entry *p = __nf_unregister_net_hook(net, reg);
+   unsigned int nfq;
 
if (!p)
return;
 
synchronize_net();
-   nf_queue_nf_hook_drop(net, p);
+
/* other cpu might still process nfqueue verdict that used reg */
-   synchronize_net();
+   nfq = nf_queue_nf_hook_drop(net);
+   if (nfq)
+   synchronize_net();
kfree(p);
 }
 EXPORT_SYMBOL(nf_unregister_net_hook);
@@ -198,7 +201,7 @@ void nf_unregister_net_hooks(struct net *net, const struct 
nf_hook_ops *reg,
 unsigned int hookcount)
 {
struct nf_hook_entry *to_free[16];
-   unsigned int i, n;
+   unsigned int i, n, nfq;
 
do {
n = min_t(unsigned int, hookcount, ARRAY_SIZE(to_free));
@@ -208,12 +211,12 @@ void nf_unregister_net_hooks(struct net *net, const 
struct nf_hook_ops *reg,
 
synchronize_net();
 
-   for (i = 0; i < n; i++) {
-   if (to_free[i])
-   nf_queue_nf_hook_drop(net, to_free[i]);
-   }
-
-   synchronize_net();
+   /* need 2nd synchronize_net() if nfqueue is used, skb
+* can get reinjected right before nf_queue_hook_drop()
+*/
+   nfq = nf_queue_nf_hook_drop(net);
+   if (nfq)
+   synchronize_net();
 
for (i = 0; i < n; i++)
kfree(to_free[i]);
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
index c46d214d5323..bfa742da83af 100644
--- a/net/netfilter/nf_internals.h
+++ b/net/netfilter/nf_internals.h
@@ -14,7 +14,7 @@
 /* nf_queue.c */
 int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
 struct nf_hook_entry **entryp, unsigned int verdict);
-void nf_queue_nf_hook_drop(struct net *net, const struct nf_hook_entry *entry);
+unsigned int nf_queue_nf_hook_drop(struct net *net);
 int __init netfilter_queue_init(void);
 
 /* nf_log.c */
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 4a7662486f44..043850c9d154 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -96,15 +96,18 @@ void nf_queue_entry_get_refs(struct nf_queue_entry *entry)
 }
 EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs);
 
-void nf_queue_nf_hook_drop(struct net *net, const struct nf_hook_entry *entry)
+unsigned int nf_queue_nf_hook_drop(struct net *net)
 {
const struct nf_queue_handler *qh;
+   unsigned int count = 0;
 
rcu_read_lock();
qh = rcu_dereference(net->nf.queue_handler);
if (qh)
-   qh->nf_hook_drop(net, entry);
+   count = qh->nf_hook_drop(net);
rcu_read_unlock();
+
+   return count;
 }
 
 static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index d09ab49e102a..dd8ec5b0fcd9 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ 

[PATCH 48/53] ipvs: change comparison on sync_refresh_period

2017-05-01 Thread Pablo Neira Ayuso
From: Aaron Conole 

The sync_refresh_period variable is unsigned, so it can never be < 0.

Signed-off-by: Aaron Conole 
Signed-off-by: Simon Horman 
---
 net/netfilter/ipvs/ip_vs_sync.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 30d6b2cc00a0..0e5b64a75da0 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -520,7 +520,7 @@ static int ip_vs_sync_conn_needed(struct netns_ipvs *ipvs,
if (!(cp->flags & IP_VS_CONN_F_TEMPLATE) &&
pkts % sync_period != sysctl_sync_threshold(ipvs))
return 0;
-   } else if (sync_refresh_period <= 0 &&
+   } else if (!sync_refresh_period &&
   pkts != sysctl_sync_threshold(ipvs))
return 0;
 
-- 
2.1.4



[PATCH 47/53] ipvs: remove unused function ip_vs_set_state_timeout

2017-05-01 Thread Pablo Neira Ayuso
From: Aaron Conole 

There are no in-tree callers of this function and it isn't exported.

Signed-off-by: Aaron Conole 
Signed-off-by: Simon Horman 
---
 include/net/ip_vs.h  |  2 --
 net/netfilter/ipvs/ip_vs_proto.c | 22 --
 2 files changed, 24 deletions(-)

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 632082300e77..4f4f786255ef 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -1349,8 +1349,6 @@ int ip_vs_protocol_init(void);
 void ip_vs_protocol_cleanup(void);
 void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags);
 int *ip_vs_create_timeout_table(int *table, int size);
-int ip_vs_set_state_timeout(int *table, int num, const char *const *names,
-   const char *name, int to);
 void ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp,
   const struct sk_buff *skb, int offset,
   const char *msg);
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index 8ae480715cea..ca880a3ad033 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -193,28 +193,6 @@ ip_vs_create_timeout_table(int *table, int size)
 }
 
 
-/*
- * Set timeout value for state specified by name
- */
-int
-ip_vs_set_state_timeout(int *table, int num, const char *const *names,
-   const char *name, int to)
-{
-   int i;
-
-   if (!table || !name || !to)
-   return -EINVAL;
-
-   for (i = 0; i < num; i++) {
-   if (strcmp(names[i], name))
-   continue;
-   table[i] = to * HZ;
-   return 0;
-   }
-   return -ENOENT;
-}
-
-
 const char * ip_vs_state_name(__u16 proto, int state)
 {
struct ip_vs_protocol *pp = ip_vs_proto_get(proto);
-- 
2.1.4



[PATCH 44/53] netfilter: masquerade: attach nat extension if not present

2017-05-01 Thread Pablo Neira Ayuso
From: Florian Westphal <f...@strlen.de>

Currently the nat extension is always attached as soon as nat module is
loaded.  However, most NAT uses do not need the nat extension anymore.

Prepare to remove the add-nat-by-default by making those places that need
it attach it if its not present yet.

Signed-off-by: Florian Westphal <f...@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
---
 net/ipv4/netfilter/nf_nat_masquerade_ipv4.c | 5 +++--
 net/ipv6/netfilter/nf_nat_masquerade_ipv6.c | 5 -
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c 
b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
index ea91058b5f6f..dc1dea15c1b4 100644
--- a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
@@ -37,7 +37,6 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int 
hooknum,
NF_CT_ASSERT(hooknum == NF_INET_POST_ROUTING);
 
ct = nf_ct_get(skb, );
-   nat = nfct_nat(ct);
 
NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
ctinfo == IP_CT_RELATED_REPLY));
@@ -56,7 +55,9 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int 
hooknum,
return NF_DROP;
}
 
-   nat->masq_index = out->ifindex;
+   nat = nf_ct_nat_ext_add(ct);
+   if (nat)
+   nat->masq_index = out->ifindex;
 
/* Transfer from original range. */
memset(_addr, 0, sizeof(newrange.min_addr));
diff --git a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c 
b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
index 051b6a6bfff6..2297c9f073ba 100644
--- a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
@@ -30,6 +30,7 @@ nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct 
nf_nat_range *range,
   const struct net_device *out)
 {
enum ip_conntrack_info ctinfo;
+   struct nf_conn_nat *nat;
struct in6_addr src;
struct nf_conn *ct;
struct nf_nat_range newrange;
@@ -42,7 +43,9 @@ nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct 
nf_nat_range *range,
   _hdr(skb)->daddr, 0, ) < 0)
return NF_DROP;
 
-   nfct_nat(ct)->masq_index = out->ifindex;
+   nat = nf_ct_nat_ext_add(ct);
+   if (nat)
+   nat->masq_index = out->ifindex;
 
newrange.flags  = range->flags | NF_NAT_RANGE_MAP_IPS;
newrange.min_addr.in6   = src;
-- 
2.1.4



[PATCH 50/53] netfilter: nf_log: don't call synchronize_rcu in nf_log_unset

2017-05-01 Thread Pablo Neira Ayuso
From: Florian Westphal <f...@strlen.de>

nf_log_unregister() (which is what gets called in the logger backends
module exit paths) does a (required, module is removed) synchronize_rcu().

But nf_log_unset() is only called from pernet exit handlers. It doesn't
free any memory so there appears to be no need to call synchronize_rcu.

v2: Liping Zhang points out that nf_log_unregister() needs to be called
after pernet unregister, else rmmod would become unsafe.

Signed-off-by: Florian Westphal <f...@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
---
 net/netfilter/nf_log.c| 1 -
 net/netfilter/nfnetlink_log.c | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index cc32727e3f32..8bb152a7cca4 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -71,7 +71,6 @@ void nf_log_unset(struct net *net, const struct nf_logger 
*logger)
RCU_INIT_POINTER(net->nf.nf_loggers[i], NULL);
}
mutex_unlock(_log_mutex);
-   synchronize_rcu();
 }
 EXPORT_SYMBOL(nf_log_unset);
 
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 896741206a50..da9704971a83 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -1140,10 +1140,10 @@ static int __init nfnetlink_log_init(void)
 
 static void __exit nfnetlink_log_fini(void)
 {
-   nf_log_unregister(_logger);
nfnetlink_subsys_unregister(_subsys);
netlink_unregister_notifier(_rtnl_notifier);
unregister_pernet_subsys(_log_net_ops);
+   nf_log_unregister(_logger);
 }
 
 MODULE_DESCRIPTION("netfilter userspace logging");
-- 
2.1.4



[PATCH 49/53] netfilter: batch synchronize_net calls during hook unregister

2017-05-01 Thread Pablo Neira Ayuso
From: Florian Westphal <f...@strlen.de>

synchronize_net is expensive and slows down netns cleanup a lot.

We have two APIs to unregister a hook:
nf_unregister_net_hook (which calls synchronize_net())
and
nf_unregister_net_hooks (calls nf_unregister_net_hook in a loop)

Make nf_unregister_net_hook a wapper around new helper
__nf_unregister_net_hook, which unlinks the hook but does not free it.

Then, we can call that helper in nf_unregister_net_hooks and then
call synchronize_net() only once.

Andrey Konovalov reports this change improves syzkaller fuzzing speed at
least twice.

Signed-off-by: Florian Westphal <f...@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
---
 net/netfilter/core.c | 46 --
 1 file changed, 40 insertions(+), 6 deletions(-)

diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index a87a6f8a74d8..b5d908851cc8 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -126,14 +126,15 @@ int nf_register_net_hook(struct net *net, const struct 
nf_hook_ops *reg)
 }
 EXPORT_SYMBOL(nf_register_net_hook);
 
-void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
+static struct nf_hook_entry *
+__nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
 {
struct nf_hook_entry __rcu **pp;
struct nf_hook_entry *p;
 
pp = nf_hook_entry_head(net, reg);
if (WARN_ON_ONCE(!pp))
-   return;
+   return NULL;
 
mutex_lock(_hook_mutex);
for (; (p = nf_entry_dereference(*pp)) != NULL; pp = >next) {
@@ -145,7 +146,7 @@ void nf_unregister_net_hook(struct net *net, const struct 
nf_hook_ops *reg)
mutex_unlock(_hook_mutex);
if (!p) {
WARN(1, "nf_unregister_net_hook: hook not found!\n");
-   return;
+   return NULL;
}
 #ifdef CONFIG_NETFILTER_INGRESS
if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
@@ -154,6 +155,17 @@ void nf_unregister_net_hook(struct net *net, const struct 
nf_hook_ops *reg)
 #ifdef HAVE_JUMP_LABEL
static_key_slow_dec(_hooks_needed[reg->pf][reg->hooknum]);
 #endif
+
+   return p;
+}
+
+void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
+{
+   struct nf_hook_entry *p = __nf_unregister_net_hook(net, reg);
+
+   if (!p)
+   return;
+
synchronize_net();
nf_queue_nf_hook_drop(net, p);
/* other cpu might still process nfqueue verdict that used reg */
@@ -183,10 +195,32 @@ int nf_register_net_hooks(struct net *net, const struct 
nf_hook_ops *reg,
 EXPORT_SYMBOL(nf_register_net_hooks);
 
 void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg,
-unsigned int n)
+unsigned int hookcount)
 {
-   while (n-- > 0)
-   nf_unregister_net_hook(net, [n]);
+   struct nf_hook_entry *to_free[16];
+   unsigned int i, n;
+
+   do {
+   n = min_t(unsigned int, hookcount, ARRAY_SIZE(to_free));
+
+   for (i = 0; i < n; i++)
+   to_free[i] = __nf_unregister_net_hook(net, [i]);
+
+   synchronize_net();
+
+   for (i = 0; i < n; i++) {
+   if (to_free[i])
+   nf_queue_nf_hook_drop(net, to_free[i]);
+   }
+
+   synchronize_net();
+
+   for (i = 0; i < n; i++)
+   kfree(to_free[i]);
+
+   reg += n;
+   hookcount -= n;
+   } while (hookcount > 0);
 }
 EXPORT_SYMBOL(nf_unregister_net_hooks);
 
-- 
2.1.4



[PATCH 40/53] netfilter: SYNPROXY: Return NF_STOLEN instead of NF_DROP during handshaking

2017-05-01 Thread Pablo Neira Ayuso
From: Gao Feng <f...@ikuai8.com>

Current SYNPROXY codes return NF_DROP during normal TCP handshaking,
it is not friendly to caller. Because the nf_hook_slow would treat
the NF_DROP as an error, and return -EPERM.
As a result, it may cause the top caller think it meets one error.

For example, the following codes are from cfv_rx_poll()
err = netif_receive_skb(skb);
if (unlikely(err)) {
++cfv->ndev->stats.rx_dropped;
} else {
++cfv->ndev->stats.rx_packets;
cfv->ndev->stats.rx_bytes += skb_len;
}
When SYNPROXY returns NF_DROP, then netif_receive_skb returns -EPERM.
As a result, the cfv driver would treat it as an error, and increase
the rx_dropped counter.

So use NF_STOLEN instead of NF_DROP now because there is no error
happened indeed, and free the skb directly.

Signed-off-by: Gao Feng <f...@ikuai8.com>
Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
---
 net/ipv4/netfilter/ipt_SYNPROXY.c  | 21 ++---
 net/ipv6/netfilter/ip6t_SYNPROXY.c | 20 ++--
 2 files changed, 28 insertions(+), 13 deletions(-)

diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c 
b/net/ipv4/netfilter/ipt_SYNPROXY.c
index c308ee0ee0bc..af2b69b6895f 100644
--- a/net/ipv4/netfilter/ipt_SYNPROXY.c
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -293,12 +293,16 @@ synproxy_tg4(struct sk_buff *skb, const struct 
xt_action_param *par)
  XT_SYNPROXY_OPT_ECN);
 
synproxy_send_client_synack(net, skb, th, );
-   return NF_DROP;
-
+   consume_skb(skb);
+   return NF_STOLEN;
} else if (th->ack && !(th->fin || th->rst || th->syn)) {
/* ACK from client */
-   synproxy_recv_client_ack(net, skb, th, , ntohl(th->seq));
-   return NF_DROP;
+   if (synproxy_recv_client_ack(net, skb, th, , 
ntohl(th->seq))) {
+   consume_skb(skb);
+   return NF_STOLEN;
+   } else {
+   return NF_DROP;
+   }
}
 
return XT_CONTINUE;
@@ -367,10 +371,13 @@ static unsigned int ipv4_synproxy_hook(void *priv,
 * number match the one of first SYN.
 */
if (synproxy_recv_client_ack(net, skb, th, ,
-ntohl(th->seq) + 1))
+ntohl(th->seq) + 1)) {
this_cpu_inc(snet->stats->cookie_retrans);
-
-   return NF_DROP;
+   consume_skb(skb);
+   return NF_STOLEN;
+   } else {
+   return NF_DROP;
+   }
}
 
synproxy->isn = ntohl(th->ack_seq);
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c 
b/net/ipv6/netfilter/ip6t_SYNPROXY.c
index 1252537f215f..d3c4daa708b9 100644
--- a/net/ipv6/netfilter/ip6t_SYNPROXY.c
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -307,12 +307,17 @@ synproxy_tg6(struct sk_buff *skb, const struct 
xt_action_param *par)
  XT_SYNPROXY_OPT_ECN);
 
synproxy_send_client_synack(net, skb, th, );
-   return NF_DROP;
+   consume_skb(skb);
+   return NF_STOLEN;
 
} else if (th->ack && !(th->fin || th->rst || th->syn)) {
/* ACK from client */
-   synproxy_recv_client_ack(net, skb, th, , ntohl(th->seq));
-   return NF_DROP;
+   if (synproxy_recv_client_ack(net, skb, th, , 
ntohl(th->seq))) {
+   consume_skb(skb);
+   return NF_STOLEN;
+   } else {
+   return NF_DROP;
+   }
}
 
return XT_CONTINUE;
@@ -388,10 +393,13 @@ static unsigned int ipv6_synproxy_hook(void *priv,
 * number match the one of first SYN.
 */
if (synproxy_recv_client_ack(net, skb, th, ,
-ntohl(th->seq) + 1))
+ntohl(th->seq) + 1)) {
this_cpu_inc(snet->stats->cookie_retrans);
-
-   return NF_DROP;
+   consume_skb(skb);
+   return NF_STOLEN;
+   } else {
+   return NF_DROP;
+   }
}
 
synproxy->isn = ntohl(th->ack_seq);
-- 
2.1.4



[PATCH 16/53] netfilter: udplite: Remove duplicated udplite4/6 declaration

2017-05-01 Thread Pablo Neira Ayuso
From: Gao Feng 

There are two nf_conntrack_l4proto_udp4 declarations in the head file
nf_conntrack_ipv4/6.h. Now remove one which is not enbraced by the macro
CONFIG_NF_CT_PROTO_UDPLITE.

Signed-off-by: Gao Feng 
---
 include/net/netfilter/ipv4/nf_conntrack_ipv4.h | 1 -
 include/net/netfilter/ipv6/nf_conntrack_ipv6.h | 1 -
 2 files changed, 2 deletions(-)

diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h 
b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
index 6ff32815641b..919e4e8af327 100644
--- a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
+++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
@@ -14,7 +14,6 @@ extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4;
 
 extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4;
 extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4;
-extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4;
 extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp;
 #ifdef CONFIG_NF_CT_PROTO_DCCP
 extern struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4;
diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h 
b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
index c59b82456f89..eaea968f8657 100644
--- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
+++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
@@ -5,7 +5,6 @@ extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6;
 
 extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6;
 extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6;
-extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6;
 extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6;
 #ifdef CONFIG_NF_CT_PROTO_DCCP
 extern struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6;
-- 
2.1.4



[PATCH 22/53] netfilter: ecache: Refine the nf_ct_deliver_cached_events

2017-05-01 Thread Pablo Neira Ayuso
From: Gao Feng <f...@ikuai8.com>

1. Remove single !events condition check to deliver the missed event
even though there is no new event happened.

Consider this case:
1) nf_ct_deliver_cached_events is invoked at the first time, the
event is failed to deliver, then the missed is set.
2) nf_ct_deliver_cached_events is invoked again, but there is no
any new event happened.
The missed event is lost really.

It would try to send the missed event again after remove this check.
And it is ok if there is no missed event because the latter check
!((events | missed) & e->ctmask) could avoid it.

2. Correct the return value check of notify->fcn.
When send the event successfully, it returns 0, not postive value.

Signed-off-by: Gao Feng <f...@ikuai8.com>
Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
---
 net/netfilter/nf_conntrack_ecache.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/nf_conntrack_ecache.c 
b/net/netfilter/nf_conntrack_ecache.c
index 22fc32143e9c..6161e92d2980 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -195,7 +195,7 @@ void nf_ct_deliver_cached_events(struct nf_conn *ct)
 
events = xchg(>cache, 0);
 
-   if (!nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct) || !events)
+   if (!nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct))
goto out_unlock;
 
/* We make a copy of the missed event cache without taking
@@ -212,7 +212,7 @@ void nf_ct_deliver_cached_events(struct nf_conn *ct)
 
ret = notify->fcn(events | missed, );
 
-   if (likely(ret >= 0 && !missed))
+   if (likely(ret == 0 && !missed))
goto out_unlock;
 
spin_lock_bh(>lock);
-- 
2.1.4



[PATCH 19/53] netfilter: nf_conntrack: remove double assignment

2017-05-01 Thread Pablo Neira Ayuso
From: Aaron Conole <acon...@bytheb.org>

The protonet pointer will unconditionally be rewritten, so just do the
needed assignment first.

Signed-off-by: Aaron Conole <acon...@bytheb.org>
Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
---
 net/netfilter/nf_conntrack_proto.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/netfilter/nf_conntrack_proto.c 
b/net/netfilter/nf_conntrack_proto.c
index 1329e090fd5e..2de6c1fe3261 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -441,9 +441,8 @@ EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister_one);
 void nf_ct_l4proto_pernet_unregister_one(struct net *net,
 struct nf_conntrack_l4proto *l4proto)
 {
-   struct nf_proto_net *pn = NULL;
+   struct nf_proto_net *pn = nf_ct_l4proto_net(net, l4proto);
 
-   pn = nf_ct_l4proto_net(net, l4proto);
if (pn == NULL)
return;
 
-- 
2.1.4



[PATCH 10/53] netfilter: Add nfnl_msg_type() helper function

2017-05-01 Thread Pablo Neira Ayuso
Add and use nfnl_msg_type() function to replace opencoded nfnetlink
message type. I suggested this change, Arushi Singhal made an initial
patch to address this but was missing several spots.

Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
---
 include/linux/netfilter/nfnetlink.h  |  5 +
 net/netfilter/ipset/ip_set_core.c|  2 +-
 net/netfilter/nf_conntrack_netlink.c | 16 +---
 net/netfilter/nf_tables_api.c| 20 +---
 net/netfilter/nf_tables_trace.c  |  3 ++-
 net/netfilter/nfnetlink_acct.c   |  2 +-
 net/netfilter/nfnetlink_cthelper.c   |  2 +-
 net/netfilter/nfnetlink_cttimeout.c  |  4 ++--
 net/netfilter/nfnetlink_log.c|  2 +-
 net/netfilter/nfnetlink_queue.c  |  2 +-
 net/netfilter/nft_compat.c   |  2 +-
 11 files changed, 33 insertions(+), 27 deletions(-)

diff --git a/include/linux/netfilter/nfnetlink.h 
b/include/linux/netfilter/nfnetlink.h
index 1b49209dd5c7..996711d8a7b4 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -41,6 +41,11 @@ int nfnetlink_set_err(struct net *net, u32 portid, u32 
group, int error);
 int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u32 portid,
  int flags);
 
+static inline u16 nfnl_msg_type(u8 subsys, u8 msg_type)
+{
+   return subsys << 8 | msg_type;
+}
+
 void nfnl_lock(__u8 subsys_id);
 void nfnl_unlock(__u8 subsys_id);
 #ifdef CONFIG_PROVE_LOCKING
diff --git a/net/netfilter/ipset/ip_set_core.c 
b/net/netfilter/ipset/ip_set_core.c
index c296f9b606d4..731ba9c0cf9b 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -769,7 +769,7 @@ start_msg(struct sk_buff *skb, u32 portid, u32 seq, 
unsigned int flags,
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
 
-   nlh = nlmsg_put(skb, portid, seq, cmd | (NFNL_SUBSYS_IPSET << 8),
+   nlh = nlmsg_put(skb, portid, seq, nfnl_msg_type(NFNL_SUBSYS_IPSET, cmd),
sizeof(*nfmsg), flags);
if (!nlh)
return NULL;
diff --git a/net/netfilter/nf_conntrack_netlink.c 
b/net/netfilter/nf_conntrack_netlink.c
index cd0a6d270ebe..773d2187a5ea 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -467,7 +467,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 
seq, u32 type,
struct nlattr *nest_parms;
unsigned int flags = portid ? NLM_F_MULTI : 0, event;
 
-   event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_CT_NEW);
+   event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, IPCTNL_MSG_CT_NEW);
nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
if (nlh == NULL)
goto nlmsg_failure;
@@ -652,7 +652,7 @@ ctnetlink_conntrack_event(unsigned int events, struct 
nf_ct_event *item)
if (skb == NULL)
goto errout;
 
-   type |= NFNL_SUBSYS_CTNETLINK << 8;
+   type = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, type);
nlh = nlmsg_put(skb, item->portid, 0, type, sizeof(*nfmsg), flags);
if (nlh == NULL)
goto nlmsg_failure;
@@ -1983,7 +1983,8 @@ ctnetlink_ct_stat_cpu_fill_info(struct sk_buff *skb, u32 
portid, u32 seq,
struct nfgenmsg *nfmsg;
unsigned int flags = portid ? NLM_F_MULTI : 0, event;
 
-   event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_CT_GET_STATS_CPU);
+   event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK,
+ IPCTNL_MSG_CT_GET_STATS_CPU);
nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
if (nlh == NULL)
goto nlmsg_failure;
@@ -2066,7 +2067,7 @@ ctnetlink_stat_ct_fill_info(struct sk_buff *skb, u32 
portid, u32 seq, u32 type,
unsigned int flags = portid ? NLM_F_MULTI : 0, event;
unsigned int nr_conntracks = atomic_read(>ct.count);
 
-   event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_CT_GET_STATS);
+   event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, IPCTNL_MSG_CT_GET_STATS);
nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
if (nlh == NULL)
goto nlmsg_failure;
@@ -2576,7 +2577,7 @@ ctnetlink_exp_fill_info(struct sk_buff *skb, u32 portid, 
u32 seq,
struct nfgenmsg *nfmsg;
unsigned int flags = portid ? NLM_F_MULTI : 0;
 
-   event |= NFNL_SUBSYS_CTNETLINK_EXP << 8;
+   event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_EXP, event);
nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
if (nlh == NULL)
goto nlmsg_failure;
@@ -2627,7 +2628,7 @@ ctnetlink_expect_event(unsigned int events, struct 
nf_exp_event *item)
if (skb == NULL)
goto errout;
 
-   type |= NFNL_SUBSYS_CTNETLINK_EXP << 8;
+   type = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_EXP, type);
nlh = nlmsg_put(skb, item->portid, 0, type, sizeof(*nfmsg), flag

[PATCH 02/53] netfilter: ipvs: Replace kzalloc with kcalloc.

2017-05-01 Thread Pablo Neira Ayuso
From: Varsha Rao 

Replace kzalloc with kcalloc. As kcalloc is preferred for allocating an
array instead of kzalloc. This patch fixes the checkpatch issue.

Signed-off-by: Varsha Rao 
---
 net/netfilter/ipvs/ip_vs_sync.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index b03c28084f81..30d6b2cc00a0 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1849,7 +1849,7 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct 
ipvs_sync_daemon_cfg *c,
if (state == IP_VS_STATE_MASTER) {
struct ipvs_master_sync_state *ms;
 
-   ipvs->ms = kzalloc(count * sizeof(ipvs->ms[0]), GFP_KERNEL);
+   ipvs->ms = kcalloc(count, sizeof(ipvs->ms[0]), GFP_KERNEL);
if (!ipvs->ms)
goto out;
ms = ipvs->ms;
@@ -1862,7 +1862,7 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct 
ipvs_sync_daemon_cfg *c,
ms->ipvs = ipvs;
}
} else {
-   array = kzalloc(count * sizeof(struct task_struct *),
+   array = kcalloc(count, sizeof(struct task_struct *),
GFP_KERNEL);
if (!array)
goto out;
-- 
2.1.4



[PATCH 21/53] netfilter: nf_nat: Fix return NF_DROP in nfnetlink_parse_nat_setup

2017-05-01 Thread Pablo Neira Ayuso
From: Gao Feng <f...@ikuai8.com>

The __nf_nat_alloc_null_binding invokes nf_nat_setup_info which may
return NF_DROP when memory is exhausted, so convert NF_DROP to -ENOMEM
to make ctnetlink happy. Or ctnetlink_setup_nat treats it as a success
when one error NF_DROP happens actully.

Signed-off-by: Gao Feng <f...@ikuai8.com>
Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
---
 net/netfilter/nf_nat_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index fb0e65411785..5e35643da650 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -806,7 +806,7 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct,
 
/* No NAT information has been passed, allocate the null-binding */
if (attr == NULL)
-   return __nf_nat_alloc_null_binding(ct, manip);
+   return __nf_nat_alloc_null_binding(ct, manip) == NF_DROP ? 
-ENOMEM : 0;
 
err = nfnetlink_parse_nat(attr, ct, , l3proto);
if (err < 0)
-- 
2.1.4



[PATCH 12/53] netfilter: Use seq_puts()/seq_putc() where possible

2017-05-01 Thread Pablo Neira Ayuso
From: simran singhal <singhalsimr...@gmail.com>

For string without format specifiers, use seq_puts(). For
seq_printf("\n"), use seq_putc('\n').

Signed-off-by: simran singhal <singhalsimr...@gmail.com>
Acked-by: Simon Horman <horms+rene...@verge.net.au>
Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
---
 net/netfilter/ipvs/ip_vs_ctl.c  | 8 
 net/netfilter/nf_conntrack_expect.c | 4 ++--
 net/netfilter/nf_conntrack_standalone.c | 6 +++---
 net/netfilter/nf_log.c  | 4 ++--
 net/netfilter/nf_synproxy_core.c| 6 +++---
 net/netfilter/xt_recent.c   | 2 +-
 6 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 541aa7694775..c578b6c0dc41 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -2130,8 +2130,8 @@ static int ip_vs_stats_show(struct seq_file *seq, void *v)
 /*   01234567 01234567 01234567 0123456701234567 0123456701234567 
*/
seq_puts(seq,
 "   Total Incoming Outgoing Incoming 
Outgoing\n");
-   seq_printf(seq,
-  "   Conns  Packets  PacketsBytes
Bytes\n");
+   seq_puts(seq,
+"   Conns  Packets  PacketsBytes
Bytes\n");
 
ip_vs_copy_stats(, _ipvs(net)->tot_stats);
seq_printf(seq, "%8LX %8LX %8LX %16LX %16LX\n\n",
@@ -2178,8 +2178,8 @@ static int ip_vs_stats_percpu_show(struct seq_file *seq, 
void *v)
 /*   01234567 01234567 01234567 0123456701234567 0123456701234567 
*/
seq_puts(seq,
 "   Total Incoming Outgoing Incoming 
Outgoing\n");
-   seq_printf(seq,
-  "CPUConns  Packets  PacketsBytes
Bytes\n");
+   seq_puts(seq,
+"CPUConns  Packets  PacketsBytes
Bytes\n");
 
for_each_possible_cpu(i) {
struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
diff --git a/net/netfilter/nf_conntrack_expect.c 
b/net/netfilter/nf_conntrack_expect.c
index 71d136469be0..7f12c8a78112 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -549,7 +549,7 @@ static int exp_seq_show(struct seq_file *s, void *v)
seq_printf(s, "%ld ", timer_pending(>timeout)
   ? (long)(expect->timeout.expires - jiffies)/HZ : 0);
else
-   seq_printf(s, "- ");
+   seq_puts(s, "- ");
seq_printf(s, "l3proto = %u proto=%u ",
   expect->tuple.src.l3num,
   expect->tuple.dst.protonum);
@@ -559,7 +559,7 @@ static int exp_seq_show(struct seq_file *s, void *v)
   expect->tuple.dst.protonum));
 
if (expect->flags & NF_CT_EXPECT_PERMANENT) {
-   seq_printf(s, "PERMANENT");
+   seq_puts(s, "PERMANENT");
delim = ",";
}
if (expect->flags & NF_CT_EXPECT_INACTIVE) {
diff --git a/net/netfilter/nf_conntrack_standalone.c 
b/net/netfilter/nf_conntrack_standalone.c
index 2256147dcaad..ccb5cb9043e0 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -250,7 +250,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
goto release;
 
if (!(test_bit(IPS_SEEN_REPLY_BIT, >status)))
-   seq_printf(s, "[UNREPLIED] ");
+   seq_puts(s, "[UNREPLIED] ");
 
print_tuple(s, >tuplehash[IP_CT_DIR_REPLY].tuple,
l3proto, l4proto);
@@ -261,7 +261,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
goto release;
 
if (test_bit(IPS_ASSURED_BIT, >status))
-   seq_printf(s, "[ASSURED] ");
+   seq_puts(s, "[ASSURED] ");
 
if (seq_has_overflowed(s))
goto release;
@@ -350,7 +350,7 @@ static int ct_cpu_seq_show(struct seq_file *seq, void *v)
const struct ip_conntrack_stat *st = v;
 
if (v == SEQ_START_TOKEN) {
-   seq_printf(seq, "entries  searched found new invalid ignore 
delete delete_list insert insert_failed drop early_drop icmp_error  expect_new 
expect_create expect_delete search_restart\n");
+   seq_puts(seq, "entries  searched found new invalid ignore 
delete delete_list insert insert_failed drop early_drop icmp_error  expect_new 
expect_create expect_delete search_restart\n");
return 0;
}
 
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 8d85a0598b60..cc32727e3f32 100644
--- a/net/netfilte

[PATCH 13/53] net: netfilter: Use list_{next/prev}_entry instead of list_entry

2017-05-01 Thread Pablo Neira Ayuso
From: simran singhal <singhalsimr...@gmail.com>

This patch replace list_entry with list_prev_entry as it makes the
code more clear to read.

Signed-off-by: simran singhal <singhalsimr...@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 7ba76da96cc2..22e191ad4468 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1905,7 +1905,7 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, 
struct net *net,
goto nla_put_failure;
 
if ((event != NFT_MSG_DELRULE) && (rule->list.prev != >rules)) {
-   prule = list_entry(rule->list.prev, struct nft_rule, list);
+   prule = list_prev_entry(rule, list);
if (nla_put_be64(skb, NFTA_RULE_POSITION,
 cpu_to_be64(prule->handle),
 NFTA_RULE_PAD))
-- 
2.1.4



[PATCH 11/53] netfilter: Remove unnecessary cast on void pointer

2017-05-01 Thread Pablo Neira Ayuso
From: simran singhal <singhalsimr...@gmail.com>

The following Coccinelle script was used to detect this:
@r@
expression x;
void* e;
type T;
identifier f;
@@
(
  *((T *)e)
|
  ((T *)x)[...]
|
  ((T*)x)->f
|

- (T*)
  e
)

Unnecessary parantheses are also remove.

Signed-off-by: simran singhal <singhalsimr...@gmail.com>
Reviewed-by: Stephen Hemminger <step...@networkplumber.org>
Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
---
 net/bridge/netfilter/ebtables.c |  2 +-
 net/ipv4/netfilter/arp_tables.c | 21 -
 net/ipv4/netfilter/ip_tables.c  | 20 
 net/ipv6/netfilter/ip6_tables.c | 20 
 net/netfilter/ipset/ip_set_bitmap_gen.h |  5 ++---
 net/netfilter/ipset/ip_set_core.c   |  2 +-
 net/netfilter/nf_conntrack_proto.c  |  2 +-
 net/netfilter/nft_set_hash.c|  2 +-
 net/netfilter/xt_hashlimit.c| 10 +-
 9 files changed, 35 insertions(+), 49 deletions(-)

diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 79b69917f521..bdc629eb0207 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1713,7 +1713,7 @@ static int compat_copy_entry_to_user(struct ebt_entry *e, 
void __user **dstptr,
if (*size < sizeof(*ce))
return -EINVAL;
 
-   ce = (struct ebt_entry __user *)*dstptr;
+   ce = *dstptr;
if (copy_to_user(ce, e, sizeof(*ce)))
return -EFAULT;
 
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index f17dab1dee6e..0bc3c3d73e61 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -309,8 +309,7 @@ static int mark_source_chains(const struct xt_table_info 
*newinfo,
 */
for (hook = 0; hook < NF_ARP_NUMHOOKS; hook++) {
unsigned int pos = newinfo->hook_entry[hook];
-   struct arpt_entry *e
-   = (struct arpt_entry *)(entry0 + pos);
+   struct arpt_entry *e = entry0 + pos;
 
if (!(valid_hooks & (1 << hook)))
continue;
@@ -354,14 +353,12 @@ static int mark_source_chains(const struct xt_table_info 
*newinfo,
if (pos == oldpos)
goto next;
 
-   e = (struct arpt_entry *)
-   (entry0 + pos);
+   e = entry0 + pos;
} while (oldpos == pos + e->next_offset);
 
/* Move along one */
size = e->next_offset;
-   e = (struct arpt_entry *)
-   (entry0 + pos + size);
+   e = entry0 + pos + size;
if (pos + size >= newinfo->size)
return 0;
e->counters.pcnt = pos;
@@ -376,16 +373,14 @@ static int mark_source_chains(const struct xt_table_info 
*newinfo,
if (!xt_find_jump_offset(offsets, 
newpos,
 
newinfo->number))
return 0;
-   e = (struct arpt_entry *)
-   (entry0 + newpos);
+   e = entry0 + newpos;
} else {
/* ... this is a fallthru */
newpos = pos + e->next_offset;
if (newpos >= newinfo->size)
return 0;
}
-   e = (struct arpt_entry *)
-   (entry0 + newpos);
+   e = entry0 + newpos;
e->counters.pcnt = pos;
pos = newpos;
}
@@ -681,7 +676,7 @@ static int copy_entries_to_user(unsigned int total_size,
for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
const struct xt_entry_target *t;
 
-   e = (struct arpt_entry *)(loc_cpu_entry + off);
+   e = loc_cpu_entry + off;
if (copy_to_user(userptr + off, e, sizeof(*e))) {
ret = -EFAULT;
goto free_counters;
@@ -1128,7 +1123,7 @@ compat_copy_entry_from_user(struct compat_arpt_entry *e, 
void **dstptr,
int h;
 
origsize = *size;
-   de = (struct arpt_entry *)*dstptr;
+   de = *

[PATCH 04/53] netfilter: nf_tables: add nft_is_base_chain() helper

2017-05-01 Thread Pablo Neira Ayuso
This new helper function allows us to check if this is a basechain.

Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
---
 include/net/netfilter/nf_tables.h |  5 +
 net/netfilter/nf_tables_api.c | 30 +++---
 net/netfilter/nf_tables_netdev.c  |  2 +-
 net/netfilter/nft_compat.c| 11 ++-
 4 files changed, 27 insertions(+), 21 deletions(-)

diff --git a/include/net/netfilter/nf_tables.h 
b/include/net/netfilter/nf_tables.h
index f713a053f89d..028faec8fc27 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -911,6 +911,11 @@ static inline struct nft_base_chain *nft_base_chain(const 
struct nft_chain *chai
return container_of(chain, struct nft_base_chain, chain);
 }
 
+static inline bool nft_is_base_chain(const struct nft_chain *chain)
+{
+   return chain->flags & NFT_BASE_CHAIN;
+}
+
 int __nft_release_basechain(struct nft_ctx *ctx);
 
 unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv);
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 2d822d2fd830..bf52acfe4eff 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -144,7 +144,7 @@ static int nf_tables_register_hooks(struct net *net,
unsigned int hook_nops)
 {
if (table->flags & NFT_TABLE_F_DORMANT ||
-   !(chain->flags & NFT_BASE_CHAIN))
+   !nft_is_base_chain(chain))
return 0;
 
return nf_register_net_hooks(net, nft_base_chain(chain)->ops,
@@ -157,7 +157,7 @@ static void nf_tables_unregister_hooks(struct net *net,
   unsigned int hook_nops)
 {
if (table->flags & NFT_TABLE_F_DORMANT ||
-   !(chain->flags & NFT_BASE_CHAIN))
+   !nft_is_base_chain(chain))
return;
 
nf_unregister_net_hooks(net, nft_base_chain(chain)->ops, hook_nops);
@@ -587,7 +587,7 @@ static void _nf_tables_table_disable(struct net *net,
list_for_each_entry(chain, >chains, list) {
if (!nft_is_active_next(net, chain))
continue;
-   if (!(chain->flags & NFT_BASE_CHAIN))
+   if (!nft_is_base_chain(chain))
continue;
 
if (cnt && i++ == cnt)
@@ -608,7 +608,7 @@ static int nf_tables_table_enable(struct net *net,
list_for_each_entry(chain, >chains, list) {
if (!nft_is_active_next(net, chain))
continue;
-   if (!(chain->flags & NFT_BASE_CHAIN))
+   if (!nft_is_base_chain(chain))
continue;
 
err = nf_register_net_hooks(net, nft_base_chain(chain)->ops,
@@ -1007,7 +1007,7 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, 
struct net *net,
if (nla_put_string(skb, NFTA_CHAIN_NAME, chain->name))
goto nla_put_failure;
 
-   if (chain->flags & NFT_BASE_CHAIN) {
+   if (nft_is_base_chain(chain)) {
const struct nft_base_chain *basechain = nft_base_chain(chain);
const struct nf_hook_ops *ops = >ops[0];
struct nlattr *nest;
@@ -1226,7 +1226,7 @@ static void nf_tables_chain_destroy(struct nft_chain 
*chain)
 {
BUG_ON(chain->use > 0);
 
-   if (chain->flags & NFT_BASE_CHAIN) {
+   if (nft_is_base_chain(chain)) {
struct nft_base_chain *basechain = nft_base_chain(chain);
 
module_put(basechain->type->owner);
@@ -1364,8 +1364,8 @@ static int nf_tables_newchain(struct net *net, struct 
sock *nlsk,
}
 
if (nla[NFTA_CHAIN_POLICY]) {
-   if ((chain != NULL &&
-   !(chain->flags & NFT_BASE_CHAIN)))
+   if (chain != NULL &&
+   !nft_is_base_chain(chain))
return -EOPNOTSUPP;
 
if (chain == NULL &&
@@ -1396,7 +1396,7 @@ static int nf_tables_newchain(struct net *net, struct 
sock *nlsk,
struct nft_chain_hook hook;
struct nf_hook_ops *ops;
 
-   if (!(chain->flags & NFT_BASE_CHAIN))
+   if (!nft_is_base_chain(chain))
return -EBUSY;
 
err = nft_chain_parse_hook(net, nla, afi, ,
@@ -1433,7 +1433,7 @@ static int nf_tables_newchain(struct net *net, struct 
sock *nlsk,
}
 
if (nla[NFTA_CHAIN_COUNTERS]) {
-   if (!(chain->flags & NFT_BASE_CHAIN))
+   if (!nft_is_base_chain(chain))
return -EOPNOTSUPP;
 
stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
@@ -4708,7 +4708,7 @@ static void nft_chain_co

[PATCH 01/53] netfilter: ipvs: don't check for presence of nat extension

2017-05-01 Thread Pablo Neira Ayuso
From: Florian Westphal 

Check for the NAT status bits, they are set once conntrack needs NAT in source 
or
reply direction, this is slightly faster than nfct_nat() as that has to check 
the
extension area.

Signed-off-by: Florian Westphal 
---
 net/netfilter/ipvs/ip_vs_ftp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index d30c327bb578..2e2bf7428cd1 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -260,7 +260,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct 
ip_vs_conn *cp,
buf_len = strlen(buf);
 
ct = nf_ct_get(skb, );
-   if (ct && !nf_ct_is_untracked(ct) && nfct_nat(ct)) {
+   if (ct && !nf_ct_is_untracked(ct) && (ct->status & 
IPS_NAT_MASK)) {
/* If mangling fails this function will return 0
 * which will cause the packet to be dropped.
 * Mangling can only fail under memory pressure,
-- 
2.1.4



[PATCH 03/53] ipvs: remove unused variable

2017-05-01 Thread Pablo Neira Ayuso
From: Arushi Singhal 

This patch uses the following coccinelle script to remove
a variable that was simply used to store the return
value of a function call before returning it:

@@
identifier len,f;
@@

-int len;
 ... when != len
 when strict
-len =
+return
f(...);
-return len;

Signed-off-by: Arushi Singhal 
Signed-off-by: Simon Horman 
---
 net/netfilter/ipvs/ip_vs_ftp.c | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 2e2bf7428cd1..6caf4459e981 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -482,11 +482,8 @@ static struct pernet_operations ip_vs_ftp_ops = {
 
 static int __init ip_vs_ftp_init(void)
 {
-   int rv;
-
-   rv = register_pernet_subsys(_vs_ftp_ops);
/* rcu_barrier() is called by netns on error */
-   return rv;
+   return register_pernet_subsys(_vs_ftp_ops);
 }
 
 /*
-- 
2.1.4



[PATCH 05/53] netfilter: expect: Make sure the max_expected limit is effective

2017-05-01 Thread Pablo Neira Ayuso
From: Gao Feng <f...@ikuai8.com>

Because the type of expecting, the member of nf_conn_help, is u8, it
would overflow after reach U8_MAX(255). So it doesn't work when we
configure the max_expected exceeds 255 with expect policy.

Now add the check for max_expected. Return the -EINVAL when it exceeds
the limit.

Signed-off-by: Gao Feng <f...@ikuai8.com>
Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
---
 include/net/netfilter/nf_conntrack_expect.h | 1 +
 net/netfilter/nf_conntrack_helper.c | 3 +++
 net/netfilter/nf_conntrack_irc.c| 6 ++
 net/netfilter/nfnetlink_cthelper.c  | 6 ++
 4 files changed, 16 insertions(+)

diff --git a/include/net/netfilter/nf_conntrack_expect.h 
b/include/net/netfilter/nf_conntrack_expect.h
index 65cc2cb005d9..e84df8d3bf37 100644
--- a/include/net/netfilter/nf_conntrack_expect.h
+++ b/include/net/netfilter/nf_conntrack_expect.h
@@ -73,6 +73,7 @@ struct nf_conntrack_expect_policy {
 };
 
 #define NF_CT_EXPECT_CLASS_DEFAULT 0
+#define NF_CT_EXPECT_MAX_CNT   255
 
 int nf_conntrack_expect_pernet_init(struct net *net);
 void nf_conntrack_expect_pernet_fini(struct net *net);
diff --git a/net/netfilter/nf_conntrack_helper.c 
b/net/netfilter/nf_conntrack_helper.c
index 6dc44d9b4190..752a977e9eef 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -385,6 +385,9 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper 
*me)
BUG_ON(me->expect_class_max >= NF_CT_MAX_EXPECT_CLASSES);
BUG_ON(strlen(me->name) > NF_CT_HELPER_NAME_LEN - 1);
 
+   if (me->expect_policy->max_expected > NF_CT_EXPECT_MAX_CNT)
+   return -EINVAL;
+
mutex_lock(_ct_helper_mutex);
hlist_for_each_entry(cur, _ct_helper_hash[h], hnode) {
if (nf_ct_tuple_src_mask_cmp(>tuple, >tuple, )) {
diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c
index 1972a149f958..1a5af4d4af2d 100644
--- a/net/netfilter/nf_conntrack_irc.c
+++ b/net/netfilter/nf_conntrack_irc.c
@@ -243,6 +243,12 @@ static int __init nf_conntrack_irc_init(void)
return -EINVAL;
}
 
+   if (max_dcc_channels > NF_CT_EXPECT_MAX_CNT) {
+   pr_err("max_dcc_channels must not be more than %u\n",
+  NF_CT_EXPECT_MAX_CNT);
+   return -EINVAL;
+   }
+
irc_exp_policy.max_expected = max_dcc_channels;
irc_exp_policy.timeout = dcc_timeout;
 
diff --git a/net/netfilter/nfnetlink_cthelper.c 
b/net/netfilter/nfnetlink_cthelper.c
index d45558178da5..d5025cc25df3 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -150,6 +150,9 @@ nfnl_cthelper_expect_policy(struct 
nf_conntrack_expect_policy *expect_policy,
nla_data(tb[NFCTH_POLICY_NAME]), NF_CT_HELPER_NAME_LEN);
expect_policy->max_expected =
ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_MAX]));
+   if (expect_policy->max_expected > NF_CT_EXPECT_MAX_CNT)
+   return -EINVAL;
+
expect_policy->timeout =
ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_TIMEOUT]));
 
@@ -290,6 +293,9 @@ nfnl_cthelper_update_policy_one(const struct 
nf_conntrack_expect_policy *policy,
 
new_policy->max_expected =
ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_MAX]));
+   if (new_policy->max_expected > NF_CT_EXPECT_MAX_CNT)
+   return -EINVAL;
+
new_policy->timeout =
ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_TIMEOUT]));
 
-- 
2.1.4



Re: [GIT PULL 0/2] Third Round of IPVS Updates for v4.12

2017-05-01 Thread Pablo Neira Ayuso
On Fri, Apr 28, 2017 at 12:11:57PM +0200, Simon Horman wrote:
> Hi Pablo,
> 
> please consider these enhancements to IPVS for v4.12.
> If it is too late for v4.12 then please consider them for v4.13.
> 
> * Remove unused function
> * Correct comparison of unsigned value

Pulled, thanks Simon.


Re: [GIT PULL v2 0/1] IPVS Fixes for v4.11

2017-04-28 Thread Pablo Neira Ayuso
On Fri, Apr 28, 2017 at 12:11:53PM +0200, Simon Horman wrote:
> Hi Pablo,
> 
> please consider this fix to IPVS for v4.11.
> Or if it is too late for v4.11 please consider it for v4.12.
> I would also like it considered for stable.
> 
> * Explicitly forbid ipv6 service/dest creation if ipv6 mod is disabled
>   to avoid oops caused by IPVS accesing IPv6 routing code in such
>   circumstances.
> 
> Change since v1 of pull request:
> * Rebase on nf
> * Correct URL; it should be ipvs not ipvs-next
> 
> 
> The following changes since commit 9dd2ab609eef736d5639e0de1bcc2e71e714b28e:
> 
>   netfilter: Wrong icmp6 checksum for ICMPV6_TIME_EXCEED in reverse SNATv6 
> path (2017-04-25 11:10:38 +0200)
> 
> are available in the git repository at:
> 
>   http://git.kernel.org/pub/scm/linux/kernel/git/horms/ipvs.git 
> ipvs-fixes-for-v4.11

Pulled into nf, thanks Simon.


Re: [PATCH net v3] bridge: ebtables: fix reception of frames DNAT-ed to bridge device/port

2017-04-25 Thread Pablo Neira Ayuso
On Wed, Apr 19, 2017 at 09:47:33PM +0200, Linus Lüssing wrote:
> When trying to redirect bridged frames to the bridge device itself or
> a bridge port (brouting) via the dnat target then this currently fails:
> 
> The ethernet destination of the frame is dnat'ed to the MAC address of
> the bridge device or port just fine. However, the IP code drops it in
> the beginning of ip_input.c/ip_rcv() as the dnat target left
> the skb->pkt_type as PACKET_OTHERHOST.
> 
> Fixing this by resetting skb->pkt_type to an appropriate type after
> dnat'ing.

Applied, thanks.

One comment below.
> @@ -18,11 +19,32 @@ static unsigned int
>  ebt_dnat_tg(struct sk_buff *skb, const struct xt_action_param *par)
>  {
>   const struct ebt_nat_info *info = par->targinfo;
> + struct net_device *dev;
>  
>   if (!skb_make_writable(skb, 0))
>   return EBT_DROP;
>  
>   ether_addr_copy(eth_hdr(skb)->h_dest, info->mac);
> +
> + if (is_multicast_ether_addr(info->mac)) {
> + if (is_broadcast_ether_addr(info->mac))
> + skb->pkt_type = PACKET_BROADCAST;
> + else
> + skb->pkt_type = PACKET_MULTICAST;
> + } else {
> + rcu_read_lock();

I'm going to manually remove this explicit rcu_read_lock() here, no
need to resend. We're guaranteed to run from packet path with read
side lock from netfilter hooks. So we just save some cycles from
running this unnecessary nesting.

Let me know if I'm missing anything. Thanks!


Re: [PATCH net-next v5 1/2] net sched actions: dump more than TCA_ACT_MAX_PRIO actions per batch

2017-04-24 Thread Pablo Neira Ayuso
On Mon, Apr 24, 2017 at 08:49:00AM -0400, Jamal Hadi Salim wrote:
> On 17-04-24 05:14 AM, Simon Horman wrote:
> [..]
> 
> >Jamal, I am confused about why are you so concerned about the space
> >consumed by this attribute, it's per-message, right? Is it the bigger
> >picture you are worried about - a similar per-entry flag at some point in
> >the future?
> 
> 
> To me the two worries are one and the same.
> 
> Jiri strongly believes (from a big picture view) we must use
> TLVs for extensibility.
> While I agree with him in general i have strong reservations
> in this case because i can get both extensibility and
> build for performance with using a flag bitmask as the
> content of the TLV.
> 
> A TLV consumes 64 bits minimum. It doesnt matter if we decide
> to use a u8 or a u16, we are still sending 64 bits on that
> TLV with the rest being PADding. Not to be melodramatic, but
> the worst case scenario of putting everything in a TLV for 32
> flags is using about 30x more space than using a bitmask.
> 
> Yes, space is important and if i can express upto 32 flags
> with one TLV rather than 32 TLVs i choose one TLV.
> I am always looking for ways to filter out crap i dont need
> when i do stats collection. I have numerous wounds from fdb
> entries which decided to use a TLV per flag.
> 
> The design approach we have used in netlink is: flags start
> as a bitmap (whether they are on main headers or TLVs); they may be
> complemented with a bitmask/selector (refer to IFLINK messages).
> 
> Lets look at this specific patch I have sending. I have already
> changed it 3 times and involved a churn of 3 different flags.
> If you asked me in the beggining i wouldve scratched my head
> thinking for a near term use for bit #3, #4 etc,
> 
> I am fine with the counter-Postel view of having the kernel
> validate that appropriate bits are set as long as we dont make
> user space to now start learning how to play acrobatics.

jamal, what performance concern you have in building this error
message? TLVs is the most flexible way. And this is error path, so we
should build this message rarely, only if the user sends us something
incorrect, why bother...


Re: [GIT 0/3] Second Round of IPVS Updates for v4.12

2017-04-15 Thread Pablo Neira Ayuso
On Fri, Apr 14, 2017 at 02:06:25AM +0200, Pablo Neira Ayuso wrote:
> On Fri, Apr 14, 2017 at 08:51:19AM +0900, Simon Horman wrote:
> > On Fri, Apr 14, 2017 at 01:01:34AM +0200, Pablo Neira Ayuso wrote:
> > > Hi Simon,
> > > 
> > > On Mon, Apr 10, 2017 at 09:58:32AM -0700, Simon Horman wrote:
> > > > Hi Pablo,
> > > > 
> > > > please consider these clean-ups and enhancements to IPVS for v4.12.
> > > > 
> > > > * Removal unused variable
> > > > * Use kzalloc where appropriate
> > > > * More efficient detection of presence of NAT extension
> > > > 
> > > > 
> > > > The following changes since commit 
> > > > 592d42ac7fd36408979e09bf2f170f2595dab7b8:
> > > > 
> > > >   Merge branch 'qed-IOV-cleanups' (2017-03-21 19:02:38 -0700)
> > > > 
> > > > are available in the git repository at:
> > > > 
> > > >   https://git.kernel.org/pub/scm/linux/kernel/git/horms/ipvs-next.git 
> > > > ipvs2-for-v4.12
> > > 
> > > This says:
> > > 
> > > $ git pull
> > > https://git.kernel.org/pub/scm/linux/kernel/git/horms/ipvs-next.git 
> > > ipvs2-for-v4.12
> > > fatal: Couldn't find remote ref ipvs2-for-v4.12
> > > 
> > > I don't any tag for this name in:
> > > 
> > > https://git.kernel.org/pub/scm/linux/kernel/git/horms/ipvs-next.git/refs/tags
> > 
> > Sorry about that, it looks like I forgot to push the tag.
> > It should be there now.
> 
> I'm hitting a conflict between this and what I have in nf-next.git.
> 
> If you can have a look, otherwise I will check tomorrow with fresher
> mind.

Fixed this here.

Pulled, thanks.


Re: [PATCH nf-next] ipset: remove unused function __ip_set_get_netlink

2017-04-15 Thread Pablo Neira Ayuso
On Fri, Apr 14, 2017 at 04:15:41PM +0200, Jozsef Kadlecsik wrote:
> Hi Pablo,
> 
> On Fri, 14 Apr 2017, Pablo Neira Ayuso wrote:
> 
> > On Mon, Apr 10, 2017 at 03:52:37PM -0400, Aaron Conole wrote:
> > > There are no in-tree callers.
> > 
> > @Jozsef, let me know if I should just take this to save you a pull
> > request.
> 
> Just take it, thank you.
> 
> Acked-by: Jozsef Kadlecsik <kad...@blackhole.kfki.hu>

Applied, thanks Jozsef.


[PATCH 3/9] netfilter: helper: Add the rcu lock when call __nf_conntrack_helper_find

2017-04-13 Thread Pablo Neira Ayuso
From: Gao Feng <f...@ikuai8.com>

When invoke __nf_conntrack_helper_find, it needs the rcu lock to
protect the helper module which would not be unloaded.

Now there are two caller nf_conntrack_helper_try_module_get and
ctnetlink_create_expect which don't hold rcu lock. And the other
callers left like ctnetlink_change_helper, ctnetlink_create_conntrack,
and ctnetlink_glue_attach_expect, they already hold the rcu lock
or spin_lock_bh.

Remove the rcu lock in functions nf_ct_helper_expectfn_find_by_name
and nf_ct_helper_expectfn_find_by_symbol. Because they return one pointer
which needs rcu lock, so their caller should hold the rcu lock, not in
these two functions.

Signed-off-by: Gao Feng <f...@ikuai8.com>
Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
---
 net/netfilter/nf_conntrack_helper.c  | 17 -
 net/netfilter/nf_conntrack_netlink.c | 10 --
 2 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/net/netfilter/nf_conntrack_helper.c 
b/net/netfilter/nf_conntrack_helper.c
index 6dc44d9b4190..4eeb3418366a 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -158,16 +158,25 @@ nf_conntrack_helper_try_module_get(const char *name, u16 
l3num, u8 protonum)
 {
struct nf_conntrack_helper *h;
 
+   rcu_read_lock();
+
h = __nf_conntrack_helper_find(name, l3num, protonum);
 #ifdef CONFIG_MODULES
if (h == NULL) {
-   if (request_module("nfct-helper-%s", name) == 0)
+   rcu_read_unlock();
+   if (request_module("nfct-helper-%s", name) == 0) {
+   rcu_read_lock();
h = __nf_conntrack_helper_find(name, l3num, protonum);
+   } else {
+   return h;
+   }
}
 #endif
if (h != NULL && !try_module_get(h->me))
h = NULL;
 
+   rcu_read_unlock();
+
return h;
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_helper_try_module_get);
@@ -311,38 +320,36 @@ void nf_ct_helper_expectfn_unregister(struct 
nf_ct_helper_expectfn *n)
 }
 EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_unregister);
 
+/* Caller should hold the rcu lock */
 struct nf_ct_helper_expectfn *
 nf_ct_helper_expectfn_find_by_name(const char *name)
 {
struct nf_ct_helper_expectfn *cur;
bool found = false;
 
-   rcu_read_lock();
list_for_each_entry_rcu(cur, _ct_helper_expectfn_list, head) {
if (!strcmp(cur->name, name)) {
found = true;
break;
}
}
-   rcu_read_unlock();
return found ? cur : NULL;
 }
 EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_find_by_name);
 
+/* Caller should hold the rcu lock */
 struct nf_ct_helper_expectfn *
 nf_ct_helper_expectfn_find_by_symbol(const void *symbol)
 {
struct nf_ct_helper_expectfn *cur;
bool found = false;
 
-   rcu_read_lock();
list_for_each_entry_rcu(cur, _ct_helper_expectfn_list, head) {
if (cur->expectfn == symbol) {
found = true;
break;
}
}
-   rcu_read_unlock();
return found ? cur : NULL;
 }
 EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_find_by_symbol);
diff --git a/net/netfilter/nf_conntrack_netlink.c 
b/net/netfilter/nf_conntrack_netlink.c
index 59ee27deb9a0..06d28ac663df 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -3133,23 +3133,27 @@ ctnetlink_create_expect(struct net *net,
return -ENOENT;
ct = nf_ct_tuplehash_to_ctrack(h);
 
+   rcu_read_lock();
if (cda[CTA_EXPECT_HELP_NAME]) {
const char *helpname = nla_data(cda[CTA_EXPECT_HELP_NAME]);
 
helper = __nf_conntrack_helper_find(helpname, u3,
nf_ct_protonum(ct));
if (helper == NULL) {
+   rcu_read_unlock();
 #ifdef CONFIG_MODULES
if (request_module("nfct-helper-%s", helpname) < 0) {
err = -EOPNOTSUPP;
goto err_ct;
}
+   rcu_read_lock();
helper = __nf_conntrack_helper_find(helpname, u3,
nf_ct_protonum(ct));
if (helper) {
err = -EAGAIN;
-   goto err_ct;
+   goto err_rcu;
}
+   rcu_read_unlock();
 #endif
err = -EOPNOTSUPP;
goto err_ct;
@@ -3159,11 +3163,13 @@ ctnetlink_create_expect(struct net *net,
exp = ctnetlink_alloc_expect(cda, ct, helper, , );
if (IS_ERR(exp)) {
e

[PATCH 7/9] netfilter: nf_ct_expect: use proper RCU list traversal/update APIs

2017-04-13 Thread Pablo Neira Ayuso
From: Liping Zhang <zlpnob...@gmail.com>

We should use proper RCU list APIs to manipulate help->expectations,
as we can dump the conntrack's expectations via nfnetlink, i.e. in
ctnetlink_exp_ct_dump_table(), where only rcu_read_lock is acquired.

So for list traversal, use hlist_for_each_entry_rcu; for list add/del,
use hlist_add_head_rcu and hlist_del_rcu.

Signed-off-by: Liping Zhang <zlpnob...@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
---
 net/netfilter/nf_conntrack_expect.c  | 4 ++--
 net/netfilter/nf_conntrack_netlink.c | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/net/netfilter/nf_conntrack_expect.c 
b/net/netfilter/nf_conntrack_expect.c
index 4b2e1fb28bb4..d80073037856 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -57,7 +57,7 @@ void nf_ct_unlink_expect_report(struct nf_conntrack_expect 
*exp,
hlist_del_rcu(>hnode);
net->ct.expect_count--;
 
-   hlist_del(>lnode);
+   hlist_del_rcu(>lnode);
master_help->expecting[exp->class]--;
 
nf_ct_expect_event_report(IPEXP_DESTROY, exp, portid, report);
@@ -363,7 +363,7 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect 
*exp)
/* two references : one for hash insert, one for the timer */
atomic_add(2, >use);
 
-   hlist_add_head(>lnode, _help->expectations);
+   hlist_add_head_rcu(>lnode, _help->expectations);
master_help->expecting[exp->class]++;
 
hlist_add_head_rcu(>hnode, _ct_expect_hash[h]);
diff --git a/net/netfilter/nf_conntrack_netlink.c 
b/net/netfilter/nf_conntrack_netlink.c
index f78eadba343d..dc7dfd68fafe 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2680,8 +2680,8 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct 
netlink_callback *cb)
last = (struct nf_conntrack_expect *)cb->args[1];
for (; cb->args[0] < nf_ct_expect_hsize; cb->args[0]++) {
 restart:
-   hlist_for_each_entry(exp, _ct_expect_hash[cb->args[0]],
-hnode) {
+   hlist_for_each_entry_rcu(exp, _ct_expect_hash[cb->args[0]],
+hnode) {
if (l3proto && exp->tuple.src.l3num != l3proto)
continue;
 
@@ -2732,7 +2732,7 @@ ctnetlink_exp_ct_dump_table(struct sk_buff *skb, struct 
netlink_callback *cb)
rcu_read_lock();
last = (struct nf_conntrack_expect *)cb->args[1];
 restart:
-   hlist_for_each_entry(exp, >expectations, lnode) {
+   hlist_for_each_entry_rcu(exp, >expectations, lnode) {
if (l3proto && exp->tuple.src.l3num != l3proto)
continue;
if (cb->args[1]) {
-- 
2.1.4



[PATCH 9/9] netfilter: ipt_CLUSTERIP: Fix wrong conntrack netns refcnt usage

2017-04-13 Thread Pablo Neira Ayuso
From: Gao Feng <f...@ikuai8.com>

Current codes invoke wrongly nf_ct_netns_get in the destroy routine,
it should use nf_ct_netns_put, not nf_ct_netns_get.
It could cause some modules could not be unloaded.

Fixes: ecb2421b5ddf ("netfilter: add and use nf_ct_netns_get/put")
Signed-off-by: Gao Feng <f...@ikuai8.com>
Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
---
 net/ipv4/netfilter/ipt_CLUSTERIP.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c 
b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 52f26459efc3..9b8841316e7b 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -461,7 +461,7 @@ static void clusterip_tg_destroy(const struct 
xt_tgdtor_param *par)
 
clusterip_config_put(cipinfo->config);
 
-   nf_ct_netns_get(par->net, par->family);
+   nf_ct_netns_put(par->net, par->family);
 }
 
 #ifdef CONFIG_COMPAT
-- 
2.1.4



[PATCH 1/9] netfilter: xt_TCPMSS: add more sanity tests on tcph->doff

2017-04-13 Thread Pablo Neira Ayuso
From: Eric Dumazet <eduma...@google.com>

Denys provided an awesome KASAN report pointing to an use
after free in xt_TCPMSS

I have provided three patches to fix this issue, either in xt_TCPMSS or
in xt_tcpudp.c. It seems xt_TCPMSS patch has the smallest possible
impact.

Signed-off-by: Eric Dumazet <eduma...@google.com>
Reported-by: Denys Fedoryshchenko <nuclear...@nuclearcat.com>
Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
---
 net/netfilter/xt_TCPMSS.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 27241a767f17..c64aca611ac5 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -104,7 +104,7 @@ tcpmss_mangle_packet(struct sk_buff *skb,
tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);
tcp_hdrlen = tcph->doff * 4;
 
-   if (len < tcp_hdrlen)
+   if (len < tcp_hdrlen || tcp_hdrlen < sizeof(struct tcphdr))
return -1;
 
if (info->mss == XT_TCPMSS_CLAMP_PMTU) {
@@ -152,6 +152,10 @@ tcpmss_mangle_packet(struct sk_buff *skb,
if (len > tcp_hdrlen)
return 0;
 
+   /* tcph->doff has 4 bits, do not wrap it to 0 */
+   if (tcp_hdrlen >= 15 * 4)
+   return 0;
+
/*
 * MSS Option not found ?! add it..
 */
-- 
2.1.4



[PATCH 4/9] netfilter: ctnetlink: make it safer when checking the ct helper name

2017-04-13 Thread Pablo Neira Ayuso
From: Liping Zhang <zlpnob...@gmail.com>

One CPU is doing ctnetlink_change_helper(), while another CPU is doing
unhelp() at the same time. So even if help->helper is not NULL at first,
the later statement strcmp(help->helper->name, ...) may still access
the NULL pointer.

So we must use rcu_read_lock and rcu_dereference to avoid such _bad_
thing happen.

Fixes: f95d7a46bc57 ("netfilter: ctnetlink: Fix regression in CTA_HELP 
processing")
Signed-off-by: Liping Zhang <zlpnob...@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
---
 net/netfilter/nf_conntrack_netlink.c | 15 ++-
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/net/netfilter/nf_conntrack_netlink.c 
b/net/netfilter/nf_conntrack_netlink.c
index 06d28ac663df..f9c643bc1a8e 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1488,11 +1488,16 @@ static int ctnetlink_change_helper(struct nf_conn *ct,
 * treat the second attempt as a no-op instead of returning
 * an error.
 */
-   if (help && help->helper &&
-   !strcmp(help->helper->name, helpname))
-   return 0;
-   else
-   return -EBUSY;
+   err = -EBUSY;
+   if (help) {
+   rcu_read_lock();
+   helper = rcu_dereference(help->helper);
+   if (helper && !strcmp(helper->name, helpname))
+   err = 0;
+   rcu_read_unlock();
+   }
+
+   return err;
}
 
if (!strcmp(helpname, "")) {
-- 
2.1.4



[PATCH 0/9] Netfilter fixes for net

2017-04-13 Thread Pablo Neira Ayuso
Hi David,

The following patchset contains Netfilter fixes for your net tree,
they are:

1) Missing TCP header sanity check in TCPMSS target, from Eric Dumazet.

2) Incorrect event message type for related conntracks created via
   ctnetlink, from Liping Zhang.

3) Fix incorrect rcu locking when handling helpers from ctnetlink,
   from Gao feng.

4) Fix missing rcu locking when updating helper, from Liping Zhang.

5) Fix missing read_lock_bh when iterating over list of device addresses
   from TPROXY and redirect, also from Liping.

6) Fix crash when trying to dump expectations from conntrack with no
   helper via ctnetlink, from Liping.

7) Missing RCU protection to expecation list update given ctnetlink
   iterates over the list under rcu read lock side, from Liping too.

8) Don't dump autogenerated seed in nft_hash to userspace, this is
   very confusing to the user, again from Liping.

9) Fix wrong conntrack netns module refcount in ipt_CLUSTERIP,
   from Gao feng.

You can pull these changes from:

  git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf.git

Thanks!



The following changes since commit 0b9aefea860063bb39e36bd7fe6c7087fed0ba87:

  tcp: minimize false-positives on TCP/GRO check (2017-04-03 18:43:41 -0700)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf.git HEAD

for you to fetch changes up to fe50543c194e2e1aee2f3eba41fcafd187b3dbde:

  netfilter: ipt_CLUSTERIP: Fix wrong conntrack netns refcnt usage (2017-04-13 
23:21:40 +0200)


Eric Dumazet (1):
  netfilter: xt_TCPMSS: add more sanity tests on tcph->doff

Gao Feng (2):
  netfilter: helper: Add the rcu lock when call __nf_conntrack_helper_find
  netfilter: ipt_CLUSTERIP: Fix wrong conntrack netns refcnt usage

Liping Zhang (6):
  netfilter: ctnetlink: using bit to represent the ct event
  netfilter: ctnetlink: make it safer when checking the ct helper name
  netfilter: make it safer during the inet6_dev->addr_list traversal
  netfilter: ctnetlink: skip dumping expect when nfct_help(ct) is NULL
  netfilter: nf_ct_expect: use proper RCU list traversal/update APIs
  netfilter: nft_hash: do not dump the auto generated seed

 net/ipv4/netfilter/ipt_CLUSTERIP.c   |  2 +-
 net/netfilter/nf_conntrack_expect.c  |  4 ++--
 net/netfilter/nf_conntrack_helper.c  | 17 ++-
 net/netfilter/nf_conntrack_netlink.c | 41 +---
 net/netfilter/nf_nat_redirect.c  |  2 ++
 net/netfilter/nft_hash.c | 10 ++---
 net/netfilter/xt_TCPMSS.c|  6 +-
 net/netfilter/xt_TPROXY.c|  5 -
 8 files changed, 62 insertions(+), 25 deletions(-)


[PATCH 8/9] netfilter: nft_hash: do not dump the auto generated seed

2017-04-13 Thread Pablo Neira Ayuso
From: Liping Zhang <zlpnob...@gmail.com>

This can prevent the nft utility from printing out the auto generated
seed to the user, which is unnecessary and confusing.

Fixes: cb1b69b0b15b ("netfilter: nf_tables: add hash expression")
Signed-off-by: Liping Zhang <zlpnob...@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
---
 net/netfilter/nft_hash.c | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index eb2721af898d..c4dad1254ead 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -21,6 +21,7 @@ struct nft_hash {
enum nft_registers  sreg:8;
enum nft_registers  dreg:8;
u8  len;
+   boolautogen_seed:1;
u32 modulus;
u32 seed;
u32 offset;
@@ -82,10 +83,12 @@ static int nft_hash_init(const struct nft_ctx *ctx,
if (priv->offset + priv->modulus - 1 < priv->offset)
return -EOVERFLOW;
 
-   if (tb[NFTA_HASH_SEED])
+   if (tb[NFTA_HASH_SEED]) {
priv->seed = ntohl(nla_get_be32(tb[NFTA_HASH_SEED]));
-   else
+   } else {
+   priv->autogen_seed = true;
get_random_bytes(>seed, sizeof(priv->seed));
+   }
 
return nft_validate_register_load(priv->sreg, len) &&
   nft_validate_register_store(ctx, priv->dreg, NULL,
@@ -105,7 +108,8 @@ static int nft_hash_dump(struct sk_buff *skb,
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_HASH_MODULUS, htonl(priv->modulus)))
goto nla_put_failure;
-   if (nla_put_be32(skb, NFTA_HASH_SEED, htonl(priv->seed)))
+   if (!priv->autogen_seed &&
+   nla_put_be32(skb, NFTA_HASH_SEED, htonl(priv->seed)))
goto nla_put_failure;
if (priv->offset != 0)
if (nla_put_be32(skb, NFTA_HASH_OFFSET, htonl(priv->offset)))
-- 
2.1.4



[PATCH 6/9] netfilter: ctnetlink: skip dumping expect when nfct_help(ct) is NULL

2017-04-13 Thread Pablo Neira Ayuso
From: Liping Zhang <zlpnob...@gmail.com>

For IPCTNL_MSG_EXP_GET, if the CTA_EXPECT_MASTER attr is specified, then
the NLM_F_DUMP request will dump the expectations related to this
connection tracking.

But we forget to check whether the conntrack has nf_conn_help or not,
so if nfct_help(ct) is NULL, oops will happen:

 BUG: unable to handle kernel NULL pointer dereference at 0008
 IP: ctnetlink_exp_ct_dump_table+0xf9/0x1e0 [nf_conntrack_netlink]
 Call Trace:
  ? ctnetlink_exp_ct_dump_table+0x75/0x1e0 [nf_conntrack_netlink]
  netlink_dump+0x124/0x2a0
  __netlink_dump_start+0x161/0x190
  ctnetlink_dump_exp_ct+0x16c/0x1bc [nf_conntrack_netlink]
  ? ctnetlink_exp_fill_info.constprop.33+0xf0/0xf0 [nf_conntrack_netlink]
  ? ctnetlink_glue_seqadj+0x20/0x20 [nf_conntrack_netlink]
  ctnetlink_get_expect+0x32e/0x370 [nf_conntrack_netlink]
  ? debug_lockdep_rcu_enabled+0x1d/0x20
  nfnetlink_rcv_msg+0x60a/0x6a9 [nfnetlink]
  ? nfnetlink_rcv_msg+0x1b9/0x6a9 [nfnetlink]
  [...]

Signed-off-by: Liping Zhang <zlpnob...@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
---
 net/netfilter/nf_conntrack_netlink.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/net/netfilter/nf_conntrack_netlink.c 
b/net/netfilter/nf_conntrack_netlink.c
index f9c643bc1a8e..f78eadba343d 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2794,6 +2794,12 @@ static int ctnetlink_dump_exp_ct(struct net *net, struct 
sock *ctnl,
return -ENOENT;
 
ct = nf_ct_tuplehash_to_ctrack(h);
+   /* No expectation linked to this connection tracking. */
+   if (!nfct_help(ct)) {
+   nf_ct_put(ct);
+   return 0;
+   }
+
c.data = ct;
 
err = netlink_dump_start(ctnl, skb, nlh, );
-- 
2.1.4



[PATCH 5/9] netfilter: make it safer during the inet6_dev->addr_list traversal

2017-04-13 Thread Pablo Neira Ayuso
From: Liping Zhang <zlpnob...@gmail.com>

inet6_dev->addr_list is protected by inet6_dev->lock, so only using
rcu_read_lock is not enough, we should acquire read_lock_bh(>lock)
before the inet6_dev->addr_list traversal.

Signed-off-by: Liping Zhang <zlpnob...@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
---
 net/netfilter/nf_nat_redirect.c | 2 ++
 net/netfilter/xt_TPROXY.c   | 5 -
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/nf_nat_redirect.c b/net/netfilter/nf_nat_redirect.c
index d43869879fcf..86067560a318 100644
--- a/net/netfilter/nf_nat_redirect.c
+++ b/net/netfilter/nf_nat_redirect.c
@@ -101,11 +101,13 @@ nf_nat_redirect_ipv6(struct sk_buff *skb, const struct 
nf_nat_range *range,
rcu_read_lock();
idev = __in6_dev_get(skb->dev);
if (idev != NULL) {
+   read_lock_bh(>lock);
list_for_each_entry(ifa, >addr_list, if_list) {
newdst = ifa->addr;
addr = true;
break;
}
+   read_unlock_bh(>lock);
}
rcu_read_unlock();
 
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index 80cb7babeb64..df7f1df00330 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -393,7 +393,8 @@ tproxy_laddr6(struct sk_buff *skb, const struct in6_addr 
*user_laddr,
 
rcu_read_lock();
indev = __in6_dev_get(skb->dev);
-   if (indev)
+   if (indev) {
+   read_lock_bh(>lock);
list_for_each_entry(ifa, >addr_list, if_list) {
if (ifa->flags & (IFA_F_TENTATIVE | IFA_F_DEPRECATED))
continue;
@@ -401,6 +402,8 @@ tproxy_laddr6(struct sk_buff *skb, const struct in6_addr 
*user_laddr,
laddr = >addr;
break;
}
+   read_unlock_bh(>lock);
+   }
rcu_read_unlock();
 
return laddr ? laddr : daddr;
-- 
2.1.4



[PATCH 2/9] netfilter: ctnetlink: using bit to represent the ct event

2017-04-13 Thread Pablo Neira Ayuso
From: Liping Zhang <zlpnob...@gmail.com>

Otherwise, creating a new conntrack via nfnetlink:
  # conntrack -I -p udp -s 1.1.1.1 -d 2.2.2.2 -t 10 --sport 10 --dport 20

will emit the wrong ct events(where UPDATE should be NEW):
  # conntrack -E
  [UPDATE] udp  17 10 src=1.1.1.1 dst=2.2.2.2 sport=10 dport=20
  [UNREPLIED] src=2.2.2.2 dst=1.1.1.1 sport=20 dport=10 mark=0

Signed-off-by: Liping Zhang <zlpnob...@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
---
 net/netfilter/nf_conntrack_netlink.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/nf_conntrack_netlink.c 
b/net/netfilter/nf_conntrack_netlink.c
index 908d858034e4..59ee27deb9a0 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1929,9 +1929,9 @@ static int ctnetlink_new_conntrack(struct net *net, 
struct sock *ctnl,
 
err = 0;
if (test_bit(IPS_EXPECTED_BIT, >status))
-   events = IPCT_RELATED;
+   events = 1 << IPCT_RELATED;
else
-   events = IPCT_NEW;
+   events = 1 << IPCT_NEW;
 
if (cda[CTA_LABELS] &&
ctnetlink_attach_labels(ct, cda) == 0)
-- 
2.1.4



Re: [PATCH nf-next] ipvs: remove unused function ip_vs_set_state_timeout

2017-04-13 Thread Pablo Neira Ayuso
On Mon, Apr 10, 2017 at 03:50:44PM -0400, Aaron Conole wrote:
> There are no in-tree callers of this function and it isn't exported.

Simon, let me know if you want to take this, or just add your
Signed-off-by.

Thanks!

> Signed-off-by: Aaron Conole 
> ---
>  include/net/ip_vs.h  |  2 --
>  net/netfilter/ipvs/ip_vs_proto.c | 22 --
>  2 files changed, 24 deletions(-)
> 
> diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
> index 8a4a57b8..c76fedb 100644
> --- a/include/net/ip_vs.h
> +++ b/include/net/ip_vs.h
> @@ -1349,8 +1349,6 @@ int ip_vs_protocol_init(void);
>  void ip_vs_protocol_cleanup(void);
>  void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags);
>  int *ip_vs_create_timeout_table(int *table, int size);
> -int ip_vs_set_state_timeout(int *table, int num, const char *const *names,
> - const char *name, int to);
>  void ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp,
>  const struct sk_buff *skb, int offset,
>  const char *msg);
> diff --git a/net/netfilter/ipvs/ip_vs_proto.c 
> b/net/netfilter/ipvs/ip_vs_proto.c
> index 8ae4807..ca880a3 100644
> --- a/net/netfilter/ipvs/ip_vs_proto.c
> +++ b/net/netfilter/ipvs/ip_vs_proto.c
> @@ -193,28 +193,6 @@ ip_vs_create_timeout_table(int *table, int size)
>  }
>  
>  
> -/*
> - *   Set timeout value for state specified by name
> - */
> -int
> -ip_vs_set_state_timeout(int *table, int num, const char *const *names,
> - const char *name, int to)
> -{
> - int i;
> -
> - if (!table || !name || !to)
> - return -EINVAL;
> -
> - for (i = 0; i < num; i++) {
> - if (strcmp(names[i], name))
> - continue;
> - table[i] = to * HZ;
> - return 0;
> - }
> - return -ENOENT;
> -}
> -
> -
>  const char * ip_vs_state_name(__u16 proto, int state)
>  {
>   struct ip_vs_protocol *pp = ip_vs_proto_get(proto);
> -- 
> 2.9.3
> 


Re: [GIT 0/3] Second Round of IPVS Updates for v4.12

2017-04-13 Thread Pablo Neira Ayuso
On Fri, Apr 14, 2017 at 08:51:19AM +0900, Simon Horman wrote:
> On Fri, Apr 14, 2017 at 01:01:34AM +0200, Pablo Neira Ayuso wrote:
> > Hi Simon,
> > 
> > On Mon, Apr 10, 2017 at 09:58:32AM -0700, Simon Horman wrote:
> > > Hi Pablo,
> > > 
> > > please consider these clean-ups and enhancements to IPVS for v4.12.
> > > 
> > > * Removal unused variable
> > > * Use kzalloc where appropriate
> > > * More efficient detection of presence of NAT extension
> > > 
> > > 
> > > The following changes since commit 
> > > 592d42ac7fd36408979e09bf2f170f2595dab7b8:
> > > 
> > >   Merge branch 'qed-IOV-cleanups' (2017-03-21 19:02:38 -0700)
> > > 
> > > are available in the git repository at:
> > > 
> > >   https://git.kernel.org/pub/scm/linux/kernel/git/horms/ipvs-next.git 
> > > ipvs2-for-v4.12
> > 
> > This says:
> > 
> > $ git pull
> > https://git.kernel.org/pub/scm/linux/kernel/git/horms/ipvs-next.git 
> > ipvs2-for-v4.12
> > fatal: Couldn't find remote ref ipvs2-for-v4.12
> > 
> > I don't any tag for this name in:
> > 
> > https://git.kernel.org/pub/scm/linux/kernel/git/horms/ipvs-next.git/refs/tags
> 
> Sorry about that, it looks like I forgot to push the tag.
> It should be there now.

I'm hitting a conflict between this and what I have in nf-next.git.

If you can have a look, otherwise I will check tomorrow with fresher
mind.


Re: [PATCH nf-next] ipset: remove unused function __ip_set_get_netlink

2017-04-13 Thread Pablo Neira Ayuso
On Mon, Apr 10, 2017 at 03:52:37PM -0400, Aaron Conole wrote:
> There are no in-tree callers.

@Jozsef, let me know if I should just take this to save you a pull
request.

Thanks.

> Signed-off-by: Aaron Conole 
> ---
>  net/netfilter/ipset/ip_set_core.c | 8 
>  1 file changed, 8 deletions(-)
> 
> diff --git a/net/netfilter/ipset/ip_set_core.c 
> b/net/netfilter/ipset/ip_set_core.c
> index c296f9b..68ba531 100644
> --- a/net/netfilter/ipset/ip_set_core.c
> +++ b/net/netfilter/ipset/ip_set_core.c
> @@ -501,14 +501,6 @@ __ip_set_put(struct ip_set *set)
>   * a separate reference counter
>   */
>  static inline void
> -__ip_set_get_netlink(struct ip_set *set)
> -{
> - write_lock_bh(_set_ref_lock);
> - set->ref_netlink++;
> - write_unlock_bh(_set_ref_lock);
> -}
> -
> -static inline void
>  __ip_set_put_netlink(struct ip_set *set)
>  {
>   write_lock_bh(_set_ref_lock);
> -- 
> 2.9.3
> 
> --
> To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH nf-next] nf_conntrack: remove double assignment

2017-04-13 Thread Pablo Neira Ayuso
On Wed, Apr 12, 2017 at 04:32:54PM -0400, Aaron Conole wrote:
> The protonet pointer will unconditionally be rewritten, so just do the
> needed assignment first.

Also applied, thanks.


Re: [PATCH nf-next] nf_tables: remove double return statement

2017-04-13 Thread Pablo Neira Ayuso
Applied, thanks.


Re: [GIT 0/3] Second Round of IPVS Updates for v4.12

2017-04-13 Thread Pablo Neira Ayuso
Hi Simon,

On Mon, Apr 10, 2017 at 09:58:32AM -0700, Simon Horman wrote:
> Hi Pablo,
> 
> please consider these clean-ups and enhancements to IPVS for v4.12.
> 
> * Removal unused variable
> * Use kzalloc where appropriate
> * More efficient detection of presence of NAT extension
> 
> 
> The following changes since commit 592d42ac7fd36408979e09bf2f170f2595dab7b8:
> 
>   Merge branch 'qed-IOV-cleanups' (2017-03-21 19:02:38 -0700)
> 
> are available in the git repository at:
> 
>   https://git.kernel.org/pub/scm/linux/kernel/git/horms/ipvs-next.git 
> ipvs2-for-v4.12

This says:

$ git pull
https://git.kernel.org/pub/scm/linux/kernel/git/horms/ipvs-next.git 
ipvs2-for-v4.12
fatal: Couldn't find remote ref ipvs2-for-v4.12

I don't any tag for this name in:

https://git.kernel.org/pub/scm/linux/kernel/git/horms/ipvs-next.git/refs/tags


Re: [PATCH v3 1/5] netlink: extended ACK reporting

2017-04-11 Thread Pablo Neira Ayuso
On Tue, Apr 11, 2017 at 08:25:57AM -0600, David Ahern wrote:
> On 4/11/17 1:02 AM, Johannes Berg wrote:
> > On Tue, 2017-04-11 at 08:59 +0200, Pablo Neira Ayuso wrote:
> >> CAP_ACK means: trim off the payload that the netlink error message
> >> is embedding, just like ICMP error does.
> >>
> >> What is exactly your concern? If the user explicitly requests this
> >> via socket option for this socket, then we're expecting they do the
> >> right handling for what they're asking for.
> > 
> > I think David's concern was that when you want to parse the ACK in a
> > library (or application), you may not easily know if the application
> > (or library) requested capping.
> 
> exactly.

Then, the library needs to be extended to enable this handling to
modify the way it needs to handle errors, together with the
setsockopt().


Re: [PATCH v3 1/5] netlink: extended ACK reporting

2017-04-11 Thread Pablo Neira Ayuso
On Mon, Apr 10, 2017 at 09:35:27AM -0600, David Ahern wrote:
> On 4/10/17 9:30 AM, Johannes Berg wrote:
> > On Mon, 2017-04-10 at 09:26 -0600, David Ahern wrote:
> >> On 4/8/17 2:24 PM, Johannes Berg wrote:
> >>> @@ -2300,14 +2332,35 @@ void netlink_ack(struct sk_buff *in_skb,
> >>> struct nlmsghdr *nlh, int err)
> >>> NLMSG_ERROR, payload, 0);
> >>>   errmsg = nlmsg_data(rep);
> >>>   errmsg->error = err;
> >>> - memcpy(>msg, nlh, payload > sizeof(*errmsg) ? nlh-
>  nlmsg_len : sizeof(*nlh));
> >>> + memcpy(>msg, nlh,
> >>> +!(nlk->flags & NETLINK_F_CAP_ACK) ? nlh->nlmsg_len
> >>> +  : sizeof(*nlh));
> >>> +
> >>
> >> generically this makes userspace parsing more problematic: the
> >> parsing layer may not know if the socket option has been set to
> >> precisely know the size of errmsg->msg and how much data needs to be
> >> skipped to get to the new attributes.
> > 
> > Yes, I know. I'd hope that userspace can remember that per socket - I
> > don't see a good other way to do this.
> > 
> > If we insert the TLVs in front of, or instead of (with a TLV containing
> > it), the request message then at least libnl's debugging will need to
> > be changed.
> > 
> > As it is, I can assume that libnl will not set the CAP setting, and
> > everything works fine even if I don't change libnl, which makes things
> > easier.
> > 
> > Do you have any better ideas?
> 
> NETLINK_F_CAP_ACK and NETLINK_F_EXT_ACK should be incompatible -- if one
> is set the other can not be set. CAP_ACK means abbreviate the response
> and EXT_ACK means give me more data.

CAP_ACK means: trim off the payload that the netlink error message
is embedding, just like ICMP error does.

What is exactly your concern? If the user explicitly requests this via
socket option for this socket, then we're expecting they do the right
handling for what they're asking for.


Re: [PATCH] net: netfilter: ipvs: Replace explicit NULL comparison

2017-04-10 Thread Pablo Neira Ayuso
Arushi,

On Sun, Apr 09, 2017 at 06:21:51AM +0800, kbuild test robot wrote:
> Hi Arushi,
> 
> [auto build test WARNING on ipvs-next/master]
> [also build test WARNING on v4.11-rc5 next-20170407]
> [if your patch is applied to the wrong git tree, please drop us a note to 
> help improve the system]
> 
> url:
> https://github.com/0day-ci/linux/commits/Arushi-Singhal/net-netfilter-ipvs-Replace-explicit-NULL-comparison/20170409-044710
> base:   https://git.kernel.org/pub/scm/linux/kernel/git/horms/ipvs-next.git 
> master
> config: i386-randconfig-x002-201715 (attached as .config)
> compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
> reproduce:
> # save the attached .config to linux build tree
> make ARCH=i386 
> 
> All warnings (new ones prefixed by >>):
> 
>net/netfilter/ipvs/ip_vs_proto.c: In function 'ip_vs_protocol_net_cleanup':
> >> net/netfilter/ipvs/ip_vs_proto.c:350:3: warning: suggest parentheses 
> >> around assignment used as truth value [-Wparentheses]
>   while (pd = ipvs->proto_data_table[i])
>   ^

This is bad, you have to be more careful in what you do. This is not a
speed coding contest.

Showing careful patchset handling, even if you submit less of them, is
way more prefered in my opinion.


Re: [PATCH] net: netfilter: Replace explicit NULL comparisons

2017-04-10 Thread Pablo Neira Ayuso
On Sun, Apr 09, 2017 at 09:12:18AM +0530, Arushi Singhal wrote:
> On Sun, Apr 9, 2017 at 1:44 AM, Pablo Neira Ayuso <pa...@netfilter.org>
> wrote:
> 
> > On Sat, Apr 08, 2017 at 08:21:56PM +0200, Jan Engelhardt wrote:
> > > On Saturday 2017-04-08 19:21, Arushi Singhal wrote:
> > >
> > > >Replace explicit NULL comparison with ! operator to simplify code.
> > >
> > > I still wouldn't do this, for the same reason as before. Comparing to
> > > NULL explicitly more or less gave an extra guarantee that the other
> > > operand was also a pointer.
> >
> > Arushi, where does it say in the coding style that this is prefered?
> 
> This is reported by checkpatch.pl script.

I don't find it in the coding style. I think this is what it stands as
preference in this case IMO. Otherwise, it would be good to get the
kernel coding style document in sync with it, including the reason why
this way to express thing is cleaner. We have to justify the changes.


Re: [PATCH net] netfilter: xt_TCPMSS: add more sanity tests on tcph->doff

2017-04-08 Thread Pablo Neira Ayuso
On Mon, Apr 03, 2017 at 10:55:11AM -0700, Eric Dumazet wrote:
> From: Eric Dumazet 
> 
> Denys provided an awesome KASAN report pointing to an use
> after free in xt_TCPMSS
> 
> I have provided three patches to fix this issue, either in xt_TCPMSS or
> in xt_tcpudp.c. It seems xt_TCPMSS patch has the smallest possible
> impact.

Applied to nf.git, thanks!


Re: [PATCH] net: netfilter: Replace explicit NULL comparisons

2017-04-08 Thread Pablo Neira Ayuso
On Sat, Apr 08, 2017 at 08:21:56PM +0200, Jan Engelhardt wrote:
> On Saturday 2017-04-08 19:21, Arushi Singhal wrote:
> 
> >Replace explicit NULL comparison with ! operator to simplify code.
> 
> I still wouldn't do this, for the same reason as before. Comparing to 
> NULL explicitly more or less gave an extra guarantee that the other 
> operand was also a pointer.

Arushi, where does it say in the coding style that this is prefered?


Re: [PATCH] net: ipv6: Remove unneccessary comments

2017-04-08 Thread Pablo Neira Ayuso
On Sat, Apr 08, 2017 at 09:19:30PM +0530, Arushi Singhal wrote:
> This comments are obsolete and should go, as there are no set of rules per
> CPU anymore.

Applied, thanks.


Re: [RFC 0/3] netlink: extended error reporting

2017-04-07 Thread Pablo Neira Ayuso
On Fri, Apr 07, 2017 at 09:29:17PM +0200, Johannes Berg wrote:
> On Fri, 2017-04-07 at 21:21 +0200, Pablo Neira Ayuso wrote:
> > I think the most flexible way is to pass the container error
> > structure to nla_parse() so it sets this for you. This would also
> > save tons of "malformed attribute" strings.
> 
> Yes, for sure. The thing is we'll probalby have to pass down the
> request skb *and* the error struct so that we can get the offset, and
> this seems like the generic thing that we really should try to get the
> most information generated.

We only need to store the pointer to the attribute in the error
container structure. We can calculate the offset from nl_err() by
pasing the skbuff as parameter there, right?


Re: [RFC 0/3] netlink: extended error reporting

2017-04-07 Thread Pablo Neira Ayuso
On Fri, Apr 07, 2017 at 12:20:53PM -0700, David Miller wrote:
[...]
> Let's just discuss the UAPI, since people complain we don't talk
> about that enough :-)  For those playing at home it is three new
> attributes returned in a netlink ACK when the application asks
> for the extended response:
> 
>   NLMSGERR_ATTR_MSG   string  Extended error string
>   NLMSGERR_ATTR_OFFS  u32 Byte offset to netlink element causing 
> error
>   NLMSGERR_ATTR_CODE  u32 Subsystem specific error code
>   NLMSGERR_ATTR_ATTR  u16 Netlink attribute triggering error or 
> missing

I think it would be good if we get a definition to cap the maximum
string length to something reasonable? This can be added in a follow
up patch BTW. Thus, we get people coming back to us and request larger
strings with a reason why they need more room for this.

In general, my main concern with strings is that they can be used in a
more freely way than these u32 offsets and error codes, and
specifically how inconsistent these string will look like across
different netlink subsystems.

Anyway, as long as this is optional (not every subsystem if forced to
use strings) I'm fine with it :).


Re: [RFC 0/3] netlink: extended error reporting

2017-04-07 Thread Pablo Neira Ayuso
On Fri, Apr 07, 2017 at 12:22:23PM -0700, David Miller wrote:
> From: Johannes Berg <johan...@sipsolutions.net>
> Date: Fri, 07 Apr 2017 21:09:45 +0200
> 
> > On Fri, 2017-04-07 at 21:06 +0200, Pablo Neira Ayuso wrote:
> >> On Fri, Apr 07, 2017 at 08:59:12PM +0200, Johannes Berg wrote:
> >> [...]
> >> > Heh. I think I really want to solve - at least partially -
> >> > nla_parse()
> >> > to see that it can be done this way. It'd be nice to even transform
> >> > all
> >> > the callers (I generated half of these patches with spatch anyway)
> >> > to
> >> > have at least that.
> >> 
> >> We can just have a modified version of nla_parse that deals with
> >> this.
> > 
> > Yes, but we need to figure out a good way to have the offset.
> > 
> > We also need to see if we want to *force* having the offset. In some
> > sense that'd be useful, in another it might be very complicated to fill
> > it in at all times, if for example errors come from lower layers like
> > drivers.
> 
> It has to be optional, some kinds of errors don't have an exact
> context per-se.
> 
> Also another way to look at this is that we're providing a lot of
> new power and expressability.  So even if only one aspect of the
> new error reporting is used it's a positive step forward.
> 
> So allow offset "0" meaning "unspecified".

Instead, we can just not send the offset attribute to userspace if
it's not specified. So missing attribute means "unspecified".

I'm always a bit worried this "0 means something" semantics :)


Re: [RFC 0/3] netlink: extended error reporting

2017-04-07 Thread Pablo Neira Ayuso
On Fri, Apr 07, 2017 at 09:09:45PM +0200, Johannes Berg wrote:
> On Fri, 2017-04-07 at 21:06 +0200, Pablo Neira Ayuso wrote:
> > On Fri, Apr 07, 2017 at 08:59:12PM +0200, Johannes Berg wrote:
> > [...]
> > > Heh. I think I really want to solve - at least partially -
> > > nla_parse()
> > > to see that it can be done this way. It'd be nice to even transform
> > > all
> > > the callers (I generated half of these patches with spatch anyway)
> > > to
> > > have at least that.
> > 
> > We can just have a modified version of nla_parse that deals with
> > this.
> 
> Yes, but we need to figure out a good way to have the offset.
> 
> We also need to see if we want to *force* having the offset. In some
> sense that'd be useful, in another it might be very complicated to fill
> it in at all times, if for example errors come from lower layers like
> drivers.

For my usecases in netfilter, the attributes and an specific error
code should be enough to figure out what is wrong. Will not need
strings.

BTW, we may not have an offset, eg. EINVAL because of missing
attribute. Given we have different requirements, I would leave it to
each subsystem to decide what netlink error attributes are specified.

> (It's ultimately always going to be optional since we'll continue
> returning errors without *any* extended error information - likely
> indefinitely - but if we have a wrong attribute, should we always have
> an offset? Would be nice, but could be difficult in practice)
> 
> > We can probably use struct nla_policy to place the extended error
> > code or the string (as we discussed I don't need string errors, but
> > I'm fine if other people find it useful).
> 
> I don't think for the error strings really would be useful for
> nla_parse() or a policy - we can return something generic like
> "malformed attribute" there as a string, and hopefully point to the
> attribute/offset from there generically. I just really want to see how
> to actually do that.

I think the most flexible way is to pass the container error structure
to nla_parse() so it sets this for you. This would also save tons of
"malformed attribute" strings.


Re: [RFC 0/3] netlink: extended error reporting

2017-04-07 Thread Pablo Neira Ayuso
On Fri, Apr 07, 2017 at 08:59:12PM +0200, Johannes Berg wrote:
[...]
> Heh. I think I really want to solve - at least partially - nla_parse()
> to see that it can be done this way. It'd be nice to even transform all
> the callers (I generated half of these patches with spatch anyway) to
> have at least that.

We can just have a modified version of nla_parse that deals with this.
We can probably use struct nla_policy to place the extended error code
or the string (as we discussed I don't need string errors, but I'm
fine if other people find it useful).

Thanks!


Re: [RFC 0/3] netlink: extended error reporting

2017-04-07 Thread Pablo Neira Ayuso
On Fri, Apr 07, 2017 at 11:53:15AM -0700, David Miller wrote:
> From: Johannes Berg <johan...@sipsolutions.net>
> Date: Fri,  7 Apr 2017 20:26:17 +0200
> 
> > So this is my first draft of what we'd talked about at netconf.
> > I'm not super happy with the way we have to pass the extended
> > error struct, but I don't see a way to implement reporting any
> > dynamic information (like error offsets) in any other way.
> > 
> > Alexander Shishkin had a nice way of reporting static extended
> > error data, but that isn't really suitable for reporting the
> > offset or even reporting the broken attribute from nla_parse().
> > 
> > Speaking of nla_parse(), that'll be somewhat complicated to do
> > since we'll have to track the offsets of where we're parsing,
> > but it might be possible since the nlattrs are just pointers
> > into the message, so (optionally?) passing the skb as well can
> > allow us to fill the offset information.
> 
> I like it, nice work.
> 
> I know people want dynamically generated strings and stuff, and we can
> get there, but I prefer that the first thing we commit is super simple.
> 
> Someone gave me a hard time about the fact that we've been talking
> about this idea for years but nothing ever happens.
> 
> I'm tempted to apply this as-is just to show that person that things
> do in fact happen eventually :-)

We can just send follow up patches to refine, I think it's a good
start, Johannes?

BTW, for this co-authored effort in designing this:

Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>

Thanks!


Re: [PATCH] net: ipv6: Removed unnecessary parenthesis

2017-04-07 Thread Pablo Neira Ayuso
On Wed, Mar 29, 2017 at 02:32:43PM +0530, Arushi Singhal wrote:
> Removed parentheses on the right hand side of assignment, as they are
> not required. The following coccinelle script was used to fix this
> issue:
> 
> @@
> local idexpression id;
> expression e;
> @@
> 
> id =
> -(
> e
> -)

You sent me two patches for this:

http://patchwork.ozlabs.org/patch/744661/
http://patchwork.ozlabs.org/patch/744702/

The follow up one does not indicate v2, so I'm confused. I'm not sure
this is a resend or just a new version.

Another question that is not clear from your description is if you had
a look at the entire tree to find more spots like this, or you just
found this one and update it.

Ideally, it would be better to fix everything in one go, so we don't
have to deal with oneliner patches that just fix one single spot.

Thanks!


Re: [PATCH 1/4] net: netfilter:Remove exceptional & on function name

2017-04-07 Thread Pablo Neira Ayuso
On Sun, Apr 02, 2017 at 02:52:12PM +0530, Arushi Singhal wrote:
> Remove & from function pointers to conform to the style found elsewhere
> in the file. Done using the following semantic patch
> 
> // 
> @r@
> identifier f;
> @@
> 
> f(...) { ... }
> @@
> identifier r.f;
> @@
> 
> - 
> + f
> // 

I have collapsed these four patches.

You only need to send one patch per logical update.

I also detected that you missed one spot in nft_hash.c, I manually
updated this but next time you have to be more careful.

Thanks!


Re: [Outreachy kernel] [PATCH] net: ipv6: netfilter: Format block comments.

2017-04-07 Thread Pablo Neira Ayuso
On Wed, Mar 29, 2017 at 02:09:43PM +0530, Arushi Singhal wrote:
> Fix checkpatch warnings:
> WARNING: Block comments use a trailing */ on a separate line
> WARNING: Block comments use * on subsequent lines
> 
> Signed-off-by: Arushi Singhal 
> ---
>  net/ipv6/netfilter/ip6_tables.c | 16 
>  1 file changed, 8 insertions(+), 8 deletions(-)
> 
> diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
> index b8cb61c27aa1..ac69ce3bfa1e 100644
> --- a/net/ipv6/netfilter/ip6_tables.c
> +++ b/net/ipv6/netfilter/ip6_tables.c
> @@ -51,14 +51,14 @@ void *ip6t_alloc_initial_table(const struct xt_table 
> *info)
>  }
>  EXPORT_SYMBOL_GPL(ip6t_alloc_initial_table);
>  
> -/*
> -   We keep a set of rules for each CPU, so we can avoid write-locking
> -   them in the softirq when updating the counters and therefore
> -   only need to read-lock in the softirq; doing a write_lock_bh() in user
> -   context stops packets coming through and allows user context to read
> -   the counters or update the rules.
> -
> -   Hence the start of any table is given by get_table() below.  */
> +/* We keep a set of rules for each CPU, so we can avoid write-locking
> + * them in the softirq when updating the counters and therefore
> + * only need to read-lock in the softirq; doing a write_lock_bh() in user
> + * context stops packets coming through and allows user context to read
> + * the counters or update the rules.
> + *
> + * Hence the start of any table is given by get_table() below.
> + */

This comment is obsolete and should go.

We have no set of rules per CPU anymore.


Re: [PATCH] net: netfilter: Use list_{next/prev}_entry instead of list_entry

2017-04-07 Thread Pablo Neira Ayuso
On Wed, Mar 29, 2017 at 11:15:40AM +0530, simran singhal wrote:
> This patch replace list_entry with list_prev_entry as it makes the
> code more clear to read.

Also applied, thanks.


Re: [PATCH] net: netfilter: Use seq_puts()/seq_putc() where possible

2017-04-07 Thread Pablo Neira Ayuso
On Wed, Mar 29, 2017 at 03:25:17AM +0530, simran singhal wrote:
> For string without format specifiers, use seq_puts(). For
> seq_printf("\n"), use seq_putc('\n').

Applied, thanks.


Re: [PATCH v2] net: Remove unnecessary cast on void pointer

2017-04-07 Thread Pablo Neira Ayuso
On Wed, Mar 29, 2017 at 12:35:16AM +0530, simran singhal wrote:
> The following Coccinelle script was used to detect this:
> @r@
> expression x;
> void* e;
> type T;
> identifier f;
> @@
> (
>   *((T *)e)
> |
>   ((T *)x)[...]
> |
>   ((T*)x)->f
> |
> 
> - (T*)
>   e
> )
> 
> Unnecessary parantheses are also remove.

Applied, thanks.


Re: [PATCH] net: ipv4: netfilter: Remove unused function nf_nat_need_gre()

2017-04-06 Thread Pablo Neira Ayuso
On Sat, Apr 01, 2017 at 07:06:33PM +0530, simran singhal wrote:
> The function nf_nat_need_gre() on being called, simply returns
> back. The function doesn't have FIXME code around.
> Hence, nf_nat_need_gre() and its calls have been removed.
> 
> Signed-off-by: simran singhal 
> ---
>  net/ipv4/netfilter/nf_nat_pptp.c  | 2 --
>  net/ipv4/netfilter/nf_nat_proto_gre.c | 6 --
>  2 files changed, 8 deletions(-)
> 
> diff --git a/net/ipv4/netfilter/nf_nat_pptp.c 
> b/net/ipv4/netfilter/nf_nat_pptp.c
> index b3ca21b..747e737 100644
> --- a/net/ipv4/netfilter/nf_nat_pptp.c
> +++ b/net/ipv4/netfilter/nf_nat_pptp.c
> @@ -282,8 +282,6 @@ pptp_inbound_pkt(struct sk_buff *skb,
>  
>  static int __init nf_nat_helper_pptp_init(void)
>  {
> - nf_nat_need_gre();
> -
>   BUG_ON(nf_nat_pptp_hook_outbound != NULL);
>   RCU_INIT_POINTER(nf_nat_pptp_hook_outbound, pptp_outbound_pkt);
>  
> diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c 
> b/net/ipv4/netfilter/nf_nat_proto_gre.c
> index edf0500..c020a4d 100644
> --- a/net/ipv4/netfilter/nf_nat_proto_gre.c
> +++ b/net/ipv4/netfilter/nf_nat_proto_gre.c
> @@ -142,9 +142,3 @@ static void __exit nf_nat_proto_gre_fini(void)
>  
>  module_init(nf_nat_proto_gre_init);
>  module_exit(nf_nat_proto_gre_fini);
> -
> -void nf_nat_need_gre(void)
> -{
> - return;
> -}
> -EXPORT_SYMBOL_GPL(nf_nat_need_gre);

There is a good reason why we have this :)

By digging out and doing a bit of software archeology work via the
'git annotate' you know this triggers an explicit module dependency.


Re: [Outreachy kernel] [PATCH] net: netfilter: Remove typedef from "typedef struct bitstr_t".

2017-04-06 Thread Pablo Neira Ayuso
On Tue, Mar 28, 2017 at 11:54:13PM +0530, Arushi Singhal wrote:
> This patch removes typedefs from struct and renames it from "typedef struct
> bitstr_t" to "struct bitstr" as per kernel coding standards."
> 
> Signed-off-by: Arushi Singhal 
> ---
>  net/netfilter/nf_conntrack_h323_asn1.c | 80 
> +-
>  1 file changed, 40 insertions(+), 40 deletions(-)
> 
> diff --git a/net/netfilter/nf_conntrack_h323_asn1.c 
> b/net/netfilter/nf_conntrack_h323_asn1.c
> index fb8cf238a76f..4502c0d6071d 100644
> --- a/net/netfilter/nf_conntrack_h323_asn1.c
> +++ b/net/netfilter/nf_conntrack_h323_asn1.c
> @@ -91,41 +91,41 @@ struct field {
>  };
>  
>  /* Bit Stream */
> -typedef struct {
> +struct bitstr {

Same thing here, I'd suggest you use 'struct h323_bitstr' instead.

Thanks!


Re: [Outreachy kernel] [PATCH v3] net: netfilter: Add nfnl_msg_type() helper function

2017-04-06 Thread Pablo Neira Ayuso
Hi,

On Tue, Mar 28, 2017 at 10:27:32PM +0530, Arushi Singhal wrote:
> To remove complexity of code the function is added in nfnetlink.h
> to make code more clear and readable.
> This is opencoded in a way that makes it error prone for future
> netfilter netlink subsystems.
> 
> Signed-off-by: Arushi Singhal <arushisinghal19971...@gmail.com>
> ---
> changes in v3
>  -make the subject more clear.
> 
>  include/linux/netfilter/nfnetlink.h  |  6 ++
>  net/netfilter/nf_conntrack_netlink.c | 12 +++-
>  net/netfilter/nfnetlink_acct.c   |  2 +-
>  net/netfilter/nfnetlink_cthelper.c   |  2 +-
>  net/netfilter/nfnetlink_cttimeout.c  |  4 ++--
>  5 files changed, 17 insertions(+), 9 deletions(-)
> 
> diff --git a/include/linux/netfilter/nfnetlink.h 
> b/include/linux/netfilter/nfnetlink.h
> index 1b49209dd5c7..9a36a7c3145d 100644
> --- a/include/linux/netfilter/nfnetlink.h
> +++ b/include/linux/netfilter/nfnetlink.h
> @@ -50,6 +50,12 @@ static inline bool lockdep_nfnl_is_held(__u8 subsys_id)
>  {
>   return true;
>  }
> +
> +static inline u16 nfnl_msg_type(u8 subsys, u8 msg_type)
> +{
> + return subsys << 8 | msg_type;
> +}

This is not right. You have placed this new function definition inside
the CONFIG_PROVE_LOCKING.

So this is only defined iff CONFIG_PROVE_LOCKING is set on.

>  #endif /* CONFIG_PROVE_LOCKING */
>  
>  /*
> diff --git a/net/netfilter/nf_conntrack_netlink.c 
> b/net/netfilter/nf_conntrack_netlink.c
> index aa344c5868c5..67f6f88a3e92 100644
> --- a/net/netfilter/nf_conntrack_netlink.c
> +++ b/net/netfilter/nf_conntrack_netlink.c
> @@ -467,7 +467,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 
> seq, u32 type,
>   struct nlattr *nest_parms;
>   unsigned int flags = portid ? NLM_F_MULTI : 0, event;
>  
> - event = NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_CT_NEW;

I can find many more spots to be replaced via:

git grep NFNL_SUBSYS_ net/netfilter/

Patch attached.
>From 1f03a770eb030480968c9cb29be85e3d1cbadf3e Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pa...@netfilter.org>
Date: Tue, 28 Mar 2017 22:27:32 +0530
Subject: [PATCH] netfilter: Add nfnl_msg_type() helper function

Add and use nfnl_msg_type() function to replace opencoded nfnetlink
message type. I suggested this change, Arushi Singhal made an initial
patch to address this but was missing several spots.

Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
---
 include/linux/netfilter/nfnetlink.h  |  5 +
 net/netfilter/ipset/ip_set_core.c|  2 +-
 net/netfilter/nf_conntrack_netlink.c | 16 +---
 net/netfilter/nf_tables_api.c| 17 -
 net/netfilter/nf_tables_trace.c  |  3 ++-
 net/netfilter/nfnetlink_acct.c   |  2 +-
 net/netfilter/nfnetlink_cthelper.c   |  2 +-
 net/netfilter/nfnetlink_cttimeout.c  |  4 ++--
 net/netfilter/nfnetlink_log.c|  2 +-
 net/netfilter/nfnetlink_queue.c  |  2 +-
 net/netfilter/nft_compat.c   |  2 +-
 11 files changed, 32 insertions(+), 25 deletions(-)

diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index 1b49209dd5c7..996711d8a7b4 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -41,6 +41,11 @@ int nfnetlink_set_err(struct net *net, u32 portid, u32 group, int error);
 int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u32 portid,
 		  int flags);
 
+static inline u16 nfnl_msg_type(u8 subsys, u8 msg_type)
+{
+	return subsys << 8 | msg_type;
+}
+
 void nfnl_lock(__u8 subsys_id);
 void nfnl_unlock(__u8 subsys_id);
 #ifdef CONFIG_PROVE_LOCKING
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index c296f9b606d4..731ba9c0cf9b 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -769,7 +769,7 @@ start_msg(struct sk_buff *skb, u32 portid, u32 seq, unsigned int flags,
 	struct nlmsghdr *nlh;
 	struct nfgenmsg *nfmsg;
 
-	nlh = nlmsg_put(skb, portid, seq, cmd | (NFNL_SUBSYS_IPSET << 8),
+	nlh = nlmsg_put(skb, portid, seq, nfnl_msg_type(NFNL_SUBSYS_IPSET, cmd),
 			sizeof(*nfmsg), flags);
 	if (!nlh)
 		return NULL;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index cd0a6d270ebe..773d2187a5ea 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -467,7 +467,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
 	struct nlattr *nest_parms;
 	unsigned int flags = portid ? NLM_F_MULTI : 0, event;
 
-	event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_CT_NEW);
+	event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, IPCTNL_MSG_CT_NEW);
 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
 	if (nlh == NULL)
 		goto nlmsg_failure;
@@ -

Re: [PATCH v2] net: netfilter: Remove multiple assignment.

2017-04-06 Thread Pablo Neira Ayuso
Hi Arushi,

On Tue, Mar 28, 2017 at 04:03:27AM +0530, Arushi Singhal wrote:
> This patch removes multiple assignments to follow the kernel coding
> style as also reported by checkpatch.pl.
> Done using coccinelle.
> @@
> identifier i1,i2;
> constant c;
> @@
> - i1=i2=c;
> + i1=c;
> + i2=i1;

I see more multiple assignments like this under:

net/netfilter/
net/ipv4/netfilter/
net/ipv6/netfilter/
net/bridge/netfilter/

So I would prefer whether fix them all or none.

Thanks!


Re: [PATCH 1/2] net: netfilter: Remove typedef from "typedef struct field_t"

2017-04-06 Thread Pablo Neira Ayuso
On Sat, Mar 25, 2017 at 05:57:55PM +0530, Arushi Singhal wrote:
> This patch removes typedefs from struct and renames it from "typedef struct
> field_t" to "struct field" as per kernel coding standards."
> 
> Signed-off-by: Arushi Singhal 
> ---
>  net/netfilter/nf_conntrack_h323_asn1.c | 68 
> +-
>  1 file changed, 34 insertions(+), 34 deletions(-)
> 
> diff --git a/net/netfilter/nf_conntrack_h323_asn1.c 
> b/net/netfilter/nf_conntrack_h323_asn1.c
> index 89b2e46925c4..fb8cf238a76f 100644
> --- a/net/netfilter/nf_conntrack_h323_asn1.c
> +++ b/net/netfilter/nf_conntrack_h323_asn1.c
> @@ -77,7 +77,7 @@
>  
>  
>  /* ASN.1 Field Structure */
> -typedef struct field_t {
> +struct field {

Probably better if you rename this to 'struct h323_field' to make sure
compilation doesn't break due to structure name pollution. And we also
got a report from kbuild robot that would be good to investigate.


Re: [PATCH] net: netfilter: Replace explicit NULL comparison with ! operator

2017-04-04 Thread Pablo Neira Ayuso
On Tue, Apr 04, 2017 at 01:41:11PM -0400, Simon Horman wrote:
> On Wed, Mar 29, 2017 at 03:45:01PM +0530, Arushi Singhal wrote:
> > Replace explicit NULL comparison with ! operator to simplify code.
> > 
> > Signed-off-by: Arushi Singhal 
> > ---
> >  net/netfilter/ipvs/ip_vs_ctl.c |  8 ++---
> >  net/netfilter/ipvs/ip_vs_proto.c   |  8 ++---
> 
> I count 18 instances of "!= NULL in net/netfilter/ipvs/ip_vs_proto but this
> patch only seems to update 8 of them. I would prefer to fix all or none of
> them.

Agreed.

Please address all instances and resubmit.


Re: [PATCH v2] net: netfilter: remove unused variable

2017-03-30 Thread Pablo Neira Ayuso
On Thu, Mar 30, 2017 at 07:38:08PM +0530, Arushi Singhal wrote:
> On Thu, Mar 30, 2017 at 6:25 PM, Simon Horman  wrote:
> 
> > On Wed, Mar 29, 2017 at 08:27:52PM +0530, Arushi Singhal wrote:
> > > This patch uses the following coccinelle script to remove
> > > a variable that was simply used to store the return
> > > value of a function call before returning it:
> > >
> > > @@
> > > identifier len,f;
> > > @@
> > >
> > > -int len;
> > >  ... when != len
> > >  when strict
> > > -len =
> > > +return
> > > f(...);
> > > -return len;
> > >
> > > Signed-off-by: Arushi Singhal 
> >
> > Applied with patch prefix updated to "ipvs:".
> >
> Hi
> Thanks
> How can I see my accepted patches of netfilter.

You can use links from patchwork:

http://patchwork.ozlabs.org/project/netfilter-devel/


[PATCH 6/8] netfilter: nf_ct_ext: fix possible panic after nf_ct_extend_unregister

2017-03-29 Thread Pablo Neira Ayuso
From: Liping Zhang <zlpnob...@gmail.com>

If one cpu is doing nf_ct_extend_unregister while another cpu is doing
__nf_ct_ext_add_length, then we may hit BUG_ON(t == NULL). Moreover,
there's no synchronize_rcu invocation after set nf_ct_ext_types[id] to
NULL, so it's possible that we may access invalid pointer.

But actually, most of the ct extends are built-in, so the problem listed
above will not happen. However, there are two exceptions: NF_CT_EXT_NAT
and NF_CT_EXT_SYNPROXY.

For _EXT_NAT, the panic will not happen, since adding the nat extend and
unregistering the nat extend are located in the same file(nf_nat_core.c),
this means that after the nat module is removed, we cannot add the nat
extend too.

For _EXT_SYNPROXY, synproxy extend may be added by init_conntrack, while
synproxy extend unregister will be done by synproxy_core_exit. So after
nf_synproxy_core.ko is removed, we may still try to add the synproxy
extend, then kernel panic may happen.

I know it's very hard to reproduce this issue, but I can play a tricky
game to make it happen very easily :)

Step 1. Enable SYNPROXY for tcp dport 1234 at FORWARD hook:
  # iptables -I FORWARD -p tcp --dport 1234 -j SYNPROXY
Step 2. Queue the syn packet to the userspace at raw table OUTPUT hook.
Also note, in the userspace we only add a 20s' delay, then
reinject the syn packet to the kernel:
  # iptables -t raw -I OUTPUT -p tcp --syn -j NFQUEUE --queue-num 1
Step 3. Using "nc 2.2.2.2 1234" to connect the server.
Step 4. Now remove the nf_synproxy_core.ko quickly:
  # iptables -F FORWARD
  # rmmod ipt_SYNPROXY
  # rmmod nf_synproxy_core
Step 5. After 20s' delay, the syn packet is reinjected to the kernel.

Now you will see the panic like this:
  kernel BUG at net/netfilter/nf_conntrack_extend.c:91!
  Call Trace:
   ? __nf_ct_ext_add_length+0x53/0x3c0 [nf_conntrack]
   init_conntrack+0x12b/0x600 [nf_conntrack]
   nf_conntrack_in+0x4cc/0x580 [nf_conntrack]
   ipv4_conntrack_local+0x48/0x50 [nf_conntrack_ipv4]
   nf_reinject+0x104/0x270
   nfqnl_recv_verdict+0x3e1/0x5f9 [nfnetlink_queue]
   ? nfqnl_recv_verdict+0x5/0x5f9 [nfnetlink_queue]
   ? nla_parse+0xa0/0x100
   nfnetlink_rcv_msg+0x175/0x6a9 [nfnetlink]
   [...]

One possible solution is to make NF_CT_EXT_SYNPROXY extend built-in, i.e.
introduce nf_conntrack_synproxy.c and only do ct extend register and
unregister in it, similar to nf_conntrack_timeout.c.

But having such a obscure restriction of nf_ct_extend_unregister is not a
good idea, so we should invoke synchronize_rcu after set nf_ct_ext_types
to NULL, and check the NULL pointer when do __nf_ct_ext_add_length. Then
it will be easier if we add new ct extend in the future.

Last, we use kfree_rcu to free nf_ct_ext, so rcu_barrier() is unnecessary
anymore, remove it too.

Signed-off-by: Liping Zhang <zlpnob...@gmail.com>
Acked-by: Florian Westphal <f...@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
---
 net/netfilter/nf_conntrack_extend.c | 13 ++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/net/netfilter/nf_conntrack_extend.c 
b/net/netfilter/nf_conntrack_extend.c
index 02bcf00c2492..008299b7f78f 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -53,7 +53,11 @@ nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id 
id,
 
rcu_read_lock();
t = rcu_dereference(nf_ct_ext_types[id]);
-   BUG_ON(t == NULL);
+   if (!t) {
+   rcu_read_unlock();
+   return NULL;
+   }
+
off = ALIGN(sizeof(struct nf_ct_ext), t->align);
len = off + t->len + var_alloc_len;
alloc_size = t->alloc_size + var_alloc_len;
@@ -88,7 +92,10 @@ void *__nf_ct_ext_add_length(struct nf_conn *ct, enum 
nf_ct_ext_id id,
 
rcu_read_lock();
t = rcu_dereference(nf_ct_ext_types[id]);
-   BUG_ON(t == NULL);
+   if (!t) {
+   rcu_read_unlock();
+   return NULL;
+   }
 
newoff = ALIGN(old->len, t->align);
newlen = newoff + t->len + var_alloc_len;
@@ -175,6 +182,6 @@ void nf_ct_extend_unregister(struct nf_ct_ext_type *type)
RCU_INIT_POINTER(nf_ct_ext_types[type->id], NULL);
update_alloc_size(type);
mutex_unlock(_ct_ext_type_mutex);
-   rcu_barrier(); /* Wait for completion of call_rcu()'s */
+   synchronize_rcu();
 }
 EXPORT_SYMBOL_GPL(nf_ct_extend_unregister);
-- 
2.1.4



[PATCH 8/8] netfilter: nfnetlink_queue: fix secctx memory leak

2017-03-29 Thread Pablo Neira Ayuso
From: Liping Zhang <zlpnob...@gmail.com>

We must call security_release_secctx to free the memory returned by
security_secid_to_secctx, otherwise memory may be leaked forever.

Fixes: ef493bd930ae ("netfilter: nfnetlink_queue: add security context 
information")
Signed-off-by: Liping Zhang <zlpnob...@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
---
 net/netfilter/nfnetlink_queue.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 3ee0b8a000a4..933509ebf3d3 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -443,7 +443,7 @@ nfqnl_build_packet_message(struct net *net, struct 
nfqnl_instance *queue,
skb = alloc_skb(size, GFP_ATOMIC);
if (!skb) {
skb_tx_error(entskb);
-   return NULL;
+   goto nlmsg_failure;
}
 
nlh = nlmsg_put(skb, 0, 0,
@@ -452,7 +452,7 @@ nfqnl_build_packet_message(struct net *net, struct 
nfqnl_instance *queue,
if (!nlh) {
skb_tx_error(entskb);
kfree_skb(skb);
-   return NULL;
+   goto nlmsg_failure;
}
nfmsg = nlmsg_data(nlh);
nfmsg->nfgen_family = entry->state.pf;
@@ -598,12 +598,17 @@ nfqnl_build_packet_message(struct net *net, struct 
nfqnl_instance *queue,
}
 
nlh->nlmsg_len = skb->len;
+   if (seclen)
+   security_release_secctx(secdata, seclen);
return skb;
 
 nla_put_failure:
skb_tx_error(entskb);
kfree_skb(skb);
net_err_ratelimited("nf_queue: error creating packet message\n");
+nlmsg_failure:
+   if (seclen)
+   security_release_secctx(secdata, seclen);
return NULL;
 }
 
-- 
2.1.4



[PATCH 7/8] netfilter: nf_nat_snmp: Fix panic when snmp_trap_helper fails to register

2017-03-29 Thread Pablo Neira Ayuso
From: Gao Feng <f...@ikuai8.com>

In the commit 93557f53e1fb ("netfilter: nf_conntrack: nf_conntrack snmp
helper"), the snmp_helper is replaced by nf_nat_snmp_hook. So the
snmp_helper is never registered. But it still tries to unregister the
snmp_helper, it could cause the panic.

Now remove the useless snmp_helper and the unregister call in the
error handler.

Fixes: 93557f53e1fb ("netfilter: nf_conntrack: nf_conntrack snmp helper")
Signed-off-by: Gao Feng <f...@ikuai8.com>
Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
---
 net/ipv4/netfilter/nf_nat_snmp_basic.c | 19 +--
 1 file changed, 1 insertion(+), 18 deletions(-)

diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c 
b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 5a8f7c360887..53e49f5011d3 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -1260,16 +1260,6 @@ static const struct nf_conntrack_expect_policy 
snmp_exp_policy = {
.timeout= 180,
 };
 
-static struct nf_conntrack_helper snmp_helper __read_mostly = {
-   .me = THIS_MODULE,
-   .help   = help,
-   .expect_policy  = _exp_policy,
-   .name   = "snmp",
-   .tuple.src.l3num= AF_INET,
-   .tuple.src.u.udp.port   = cpu_to_be16(SNMP_PORT),
-   .tuple.dst.protonum = IPPROTO_UDP,
-};
-
 static struct nf_conntrack_helper snmp_trap_helper __read_mostly = {
.me = THIS_MODULE,
.help   = help,
@@ -1288,17 +1278,10 @@ static struct nf_conntrack_helper snmp_trap_helper 
__read_mostly = {
 
 static int __init nf_nat_snmp_basic_init(void)
 {
-   int ret = 0;
-
BUG_ON(nf_nat_snmp_hook != NULL);
RCU_INIT_POINTER(nf_nat_snmp_hook, help);
 
-   ret = nf_conntrack_helper_register(_trap_helper);
-   if (ret < 0) {
-   nf_conntrack_helper_unregister(_helper);
-   return ret;
-   }
-   return ret;
+   return nf_conntrack_helper_register(_trap_helper);
 }
 
 static void __exit nf_nat_snmp_basic_fini(void)
-- 
2.1.4



[PATCH 3/8] netfilter: nfnl_cthelper: Fix memory leak

2017-03-29 Thread Pablo Neira Ayuso
From: Jeffy Chen <jeffy.c...@rock-chips.com>

We have memory leaks of nf_conntrack_helper & expect_policy.

Signed-off-by: Jeffy Chen <jeffy.c...@rock-chips.com>
Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
---
 net/netfilter/nfnetlink_cthelper.c | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/net/netfilter/nfnetlink_cthelper.c 
b/net/netfilter/nfnetlink_cthelper.c
index 90f291e27eb1..2b987d2a77bc 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -216,7 +216,7 @@ nfnl_cthelper_create(const struct nlattr * const tb[],
 
ret = nfnl_cthelper_parse_expect_policy(helper, tb[NFCTH_POLICY]);
if (ret < 0)
-   goto err;
+   goto err1;
 
strncpy(helper->name, nla_data(tb[NFCTH_NAME]), NF_CT_HELPER_NAME_LEN);
helper->data_len = ntohl(nla_get_be32(tb[NFCTH_PRIV_DATA_LEN]));
@@ -247,10 +247,12 @@ nfnl_cthelper_create(const struct nlattr * const tb[],
 
ret = nf_conntrack_helper_register(helper);
if (ret < 0)
-   goto err;
+   goto err2;
 
return 0;
-err:
+err2:
+   kfree(helper->expect_policy);
+err1:
kfree(helper);
return ret;
 }
@@ -696,6 +698,8 @@ static int nfnl_cthelper_del(struct net *net, struct sock 
*nfnl,
 
found = true;
nf_conntrack_helper_unregister(cur);
+   kfree(cur->expect_policy);
+   kfree(cur);
}
}
/* Make sure we return success if we flush and there is no helpers */
@@ -759,6 +763,8 @@ static void __exit nfnl_cthelper_exit(void)
continue;
 
nf_conntrack_helper_unregister(cur);
+   kfree(cur->expect_policy);
+   kfree(cur);
}
}
 }
-- 
2.1.4



[PATCH 5/8] netfilter: nfnl_cthelper: fix a race when walk the nf_ct_helper_hash table

2017-03-29 Thread Pablo Neira Ayuso
From: Liping Zhang <zlpnob...@gmail.com>

The nf_ct_helper_hash table is protected by nf_ct_helper_mutex, while
nfct_helper operation is protected by nfnl_lock(NFNL_SUBSYS_CTHELPER).
So it's possible that one CPU is walking the nf_ct_helper_hash for
cthelper add/get/del, another cpu is doing nf_conntrack_helpers_unregister
at the same time. This is dangrous, and may cause use after free error.

Note, delete operation will flush all cthelpers added via nfnetlink, so
using rcu to do protect is not easy.

Now introduce a dummy list to record all the cthelpers added via
nfnetlink, then we can walk the dummy list instead of walking the
nf_ct_helper_hash. Also, keep nfnl_cthelper_dump_table unchanged, it
may be invoked without nfnl_lock(NFNL_SUBSYS_CTHELPER) held.

Signed-off-by: Liping Zhang <zlpnob...@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
---
 net/netfilter/nfnetlink_cthelper.c | 177 +
 1 file changed, 81 insertions(+), 96 deletions(-)

diff --git a/net/netfilter/nfnetlink_cthelper.c 
b/net/netfilter/nfnetlink_cthelper.c
index 2b987d2a77bc..d45558178da5 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -32,6 +32,13 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Pablo Neira Ayuso <pa...@netfilter.org>");
 MODULE_DESCRIPTION("nfnl_cthelper: User-space connection tracking helpers");
 
+struct nfnl_cthelper {
+   struct list_headlist;
+   struct nf_conntrack_helper  helper;
+};
+
+static LIST_HEAD(nfnl_cthelper_list);
+
 static int
 nfnl_userspace_cthelper(struct sk_buff *skb, unsigned int protoff,
struct nf_conn *ct, enum ip_conntrack_info ctinfo)
@@ -205,14 +212,16 @@ nfnl_cthelper_create(const struct nlattr * const tb[],
 struct nf_conntrack_tuple *tuple)
 {
struct nf_conntrack_helper *helper;
+   struct nfnl_cthelper *nfcth;
int ret;
 
if (!tb[NFCTH_TUPLE] || !tb[NFCTH_POLICY] || !tb[NFCTH_PRIV_DATA_LEN])
return -EINVAL;
 
-   helper = kzalloc(sizeof(struct nf_conntrack_helper), GFP_KERNEL);
-   if (helper == NULL)
+   nfcth = kzalloc(sizeof(*nfcth), GFP_KERNEL);
+   if (nfcth == NULL)
return -ENOMEM;
+   helper = >helper;
 
ret = nfnl_cthelper_parse_expect_policy(helper, tb[NFCTH_POLICY]);
if (ret < 0)
@@ -249,11 +258,12 @@ nfnl_cthelper_create(const struct nlattr * const tb[],
if (ret < 0)
goto err2;
 
+   list_add_tail(>list, _cthelper_list);
return 0;
 err2:
kfree(helper->expect_policy);
 err1:
-   kfree(helper);
+   kfree(nfcth);
return ret;
 }
 
@@ -379,7 +389,8 @@ static int nfnl_cthelper_new(struct net *net, struct sock 
*nfnl,
const char *helper_name;
struct nf_conntrack_helper *cur, *helper = NULL;
struct nf_conntrack_tuple tuple;
-   int ret = 0, i;
+   struct nfnl_cthelper *nlcth;
+   int ret = 0;
 
if (!tb[NFCTH_NAME] || !tb[NFCTH_TUPLE])
return -EINVAL;
@@ -390,31 +401,22 @@ static int nfnl_cthelper_new(struct net *net, struct sock 
*nfnl,
if (ret < 0)
return ret;
 
-   rcu_read_lock();
-   for (i = 0; i < nf_ct_helper_hsize && !helper; i++) {
-   hlist_for_each_entry_rcu(cur, _ct_helper_hash[i], hnode) {
+   list_for_each_entry(nlcth, _cthelper_list, list) {
+   cur = >helper;
 
-   /* skip non-userspace conntrack helpers. */
-   if (!(cur->flags & NF_CT_HELPER_F_USERSPACE))
-   continue;
+   if (strncmp(cur->name, helper_name, NF_CT_HELPER_NAME_LEN))
+   continue;
 
-   if (strncmp(cur->name, helper_name,
-   NF_CT_HELPER_NAME_LEN) != 0)
-   continue;
+   if ((tuple.src.l3num != cur->tuple.src.l3num ||
+tuple.dst.protonum != cur->tuple.dst.protonum))
+   continue;
 
-   if ((tuple.src.l3num != cur->tuple.src.l3num ||
-tuple.dst.protonum != cur->tuple.dst.protonum))
-   continue;
+   if (nlh->nlmsg_flags & NLM_F_EXCL)
+   return -EEXIST;
 
-   if (nlh->nlmsg_flags & NLM_F_EXCL) {
-   ret = -EEXIST;
-   goto err;
-   }
-   helper = cur;
-   break;
-   }
+   helper = cur;
+   break;
}
-   rcu_read_unlock();
 
if (helper == NULL)
ret = nfnl_cthelper_create(tb, );
@@ -422,9 +424

<    5   6   7   8   9   10   11   12   13   14   >