Re: [PATCH 00/50] Netfilter/IPVS updates for net-next

2016-12-07 Thread David Miller
From: Pablo Neira Ayuso 
Date: Wed,  7 Dec 2016 22:52:06 +0100

> The following patchset contains a large Netfilter update for net-next,
> to summarise:
 ...
> You can pull these changes from:
> 
>   git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next.git

Pulled, thanks a lot Pablo.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 09/50] netfilter: nf_conntrack_tuple_common.h: fix #include

2016-12-07 Thread Pablo Neira Ayuso
From: Davide Caratti 

To allow usage of enum ip_conntrack_dir in include/net/netns/conntrack.h,
this patch encloses #include  in a #ifndef __KERNEL__
directive, so that compiler errors caused by unwanted inclusion of
include/linux/netfilter.h are avoided.
In addition, #include  line has
been added to resolve correctly CTINFO2DIR macro.

Signed-off-by: Davide Caratti 
Acked-by: Mikko Rapeli 
Signed-off-by: Pablo Neira Ayuso 
---
 include/uapi/linux/netfilter/nf_conntrack_tuple_common.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/uapi/linux/netfilter/nf_conntrack_tuple_common.h 
b/include/uapi/linux/netfilter/nf_conntrack_tuple_common.h
index a9c3834abdd4..526b42496b78 100644
--- a/include/uapi/linux/netfilter/nf_conntrack_tuple_common.h
+++ b/include/uapi/linux/netfilter/nf_conntrack_tuple_common.h
@@ -2,7 +2,10 @@
 #define _NF_CONNTRACK_TUPLE_COMMON_H
 
 #include 
+#ifndef __KERNEL__
 #include 
+#endif
+#include  /* IP_CT_IS_REPLY */
 
 enum ip_conntrack_dir {
IP_CT_DIR_ORIGINAL,
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 27/50] netfilter: nft_fib_ipv4: initialize *dest to zero

2016-12-07 Thread Pablo Neira Ayuso
From: Liping Zhang 

Otherwise, if fib lookup fail, *dest will be filled with garbage value,
so reverse path filtering will not work properly:
 # nft add rule x prerouting fib saddr oif eq 0 drop

Fixes: f6d0cbcf09c5 ("netfilter: nf_tables: add fib expression")
Signed-off-by: Liping Zhang 
Acked-by: Florian Westphal 
Signed-off-by: Pablo Neira Ayuso 
---
 net/ipv4/netfilter/nft_fib_ipv4.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c 
b/net/ipv4/netfilter/nft_fib_ipv4.c
index bfffa742f397..258136364f5e 100644
--- a/net/ipv4/netfilter/nft_fib_ipv4.c
+++ b/net/ipv4/netfilter/nft_fib_ipv4.c
@@ -122,6 +122,8 @@ void nft_fib4_eval(const struct nft_expr *expr, struct 
nft_regs *regs,
fl4.saddr = get_saddr(iph->daddr);
}
 
+   *dest = 0;
+
if (fib_lookup(nft_net(pkt), , , FIB_LOOKUP_IGNORE_LINKSTATE))
return;
 
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 36/50] netfilter: nft_quota: dump consumed quota

2016-12-07 Thread Pablo Neira Ayuso
Add a new attribute NFTA_QUOTA_CONSUMED that displays the amount of
quota that has been already consumed. This allows us to restore the
internal state of the quota object between reboots as well as to monitor
how wasted it is.

This patch changes the logic to account for the consumed bytes, instead
of the bytes that remain to be consumed.

Signed-off-by: Pablo Neira Ayuso 
---
 include/uapi/linux/netfilter/nf_tables.h |  2 ++
 net/netfilter/nft_quota.c| 21 -
 2 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/include/uapi/linux/netfilter/nf_tables.h 
b/include/uapi/linux/netfilter/nf_tables.h
index 1043ce4250c5..3d47582caa80 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -988,12 +988,14 @@ enum nft_quota_flags {
  *
  * @NFTA_QUOTA_BYTES: quota in bytes (NLA_U16)
  * @NFTA_QUOTA_FLAGS: flags (NLA_U32)
+ * @NFTA_QUOTA_CONSUMED: quota already consumed in bytes (NLA_U64)
  */
 enum nft_quota_attributes {
NFTA_QUOTA_UNSPEC,
NFTA_QUOTA_BYTES,
NFTA_QUOTA_FLAGS,
NFTA_QUOTA_PAD,
+   NFTA_QUOTA_CONSUMED,
__NFTA_QUOTA_MAX
 };
 #define NFTA_QUOTA_MAX (__NFTA_QUOTA_MAX - 1)
diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c
index 09ce72b1d6bf..0d344209803a 100644
--- a/net/netfilter/nft_quota.c
+++ b/net/netfilter/nft_quota.c
@@ -18,20 +18,20 @@
 struct nft_quota {
u64 quota;
boolinvert;
-   atomic64_t  remain;
+   atomic64_t  consumed;
 };
 
 static inline bool nft_overquota(struct nft_quota *priv,
-const struct nft_pktinfo *pkt)
+const struct sk_buff *skb)
 {
-   return atomic64_sub_return(pkt->skb->len, >remain) < 0;
+   return atomic64_add_return(skb->len, >consumed) >= priv->quota;
 }
 
 static inline void nft_quota_do_eval(struct nft_quota *priv,
 struct nft_regs *regs,
 const struct nft_pktinfo *pkt)
 {
-   if (nft_overquota(priv, pkt) ^ priv->invert)
+   if (nft_overquota(priv, pkt->skb) ^ priv->invert)
regs->verdict.code = NFT_BREAK;
 }
 
@@ -70,7 +70,7 @@ static int nft_quota_do_init(const struct nlattr * const tb[],
 
priv->quota = quota;
priv->invert = (flags & NFT_QUOTA_F_INV) ? true : false;
-   atomic64_set(>remain, quota);
+   atomic64_set(>consumed, 0);
 
return 0;
 }
@@ -86,9 +86,20 @@ static int nft_quota_obj_init(const struct nlattr * const 
tb[],
 static int nft_quota_do_dump(struct sk_buff *skb, const struct nft_quota *priv)
 {
u32 flags = priv->invert ? NFT_QUOTA_F_INV : 0;
+   u64 consumed;
+
+   consumed = atomic64_read(>consumed);
+   /* Since we inconditionally increment consumed quota for each packet
+* that we see, don't go over the quota boundary in what we send to
+* userspace.
+*/
+   if (consumed > priv->quota)
+   consumed = priv->quota;
 
if (nla_put_be64(skb, NFTA_QUOTA_BYTES, cpu_to_be64(priv->quota),
 NFTA_QUOTA_PAD) ||
+   nla_put_be64(skb, NFTA_QUOTA_CONSUMED, cpu_to_be64(consumed),
+NFTA_QUOTA_PAD) ||
nla_put_be32(skb, NFTA_QUOTA_FLAGS, htonl(flags)))
goto nla_put_failure;
return 0;
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 43/50] netfilter: rpfilter: bypass ipv4 lbcast packets with zeronet source

2016-12-07 Thread Pablo Neira Ayuso
From: Liping Zhang 

Otherwise, DHCP Discover packets(0.0.0.0->255.255.255.255) may be
dropped incorrectly.

Signed-off-by: Liping Zhang 
Acked-by: Florian Westphal 
Signed-off-by: Pablo Neira Ayuso 
---
 net/ipv4/netfilter/ipt_rpfilter.c |  8 +---
 net/ipv4/netfilter/nft_fib_ipv4.c | 13 +++--
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/net/ipv4/netfilter/ipt_rpfilter.c 
b/net/ipv4/netfilter/ipt_rpfilter.c
index 59b49945b481..f273098e48fd 100644
--- a/net/ipv4/netfilter/ipt_rpfilter.c
+++ b/net/ipv4/netfilter/ipt_rpfilter.c
@@ -83,10 +83,12 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct 
xt_action_param *par)
return true ^ invert;
 
iph = ip_hdr(skb);
-   if (ipv4_is_multicast(iph->daddr)) {
-   if (ipv4_is_zeronet(iph->saddr))
-   return ipv4_is_local_multicast(iph->daddr) ^ invert;
+   if (ipv4_is_zeronet(iph->saddr)) {
+   if (ipv4_is_lbcast(iph->daddr) ||
+   ipv4_is_local_multicast(iph->daddr))
+   return true ^ invert;
}
+
flow.flowi4_iif = LOOPBACK_IFINDEX;
flow.daddr = iph->saddr;
flow.saddr = rpfilter_get_saddr(iph->daddr);
diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c 
b/net/ipv4/netfilter/nft_fib_ipv4.c
index 258136364f5e..965b1a161369 100644
--- a/net/ipv4/netfilter/nft_fib_ipv4.c
+++ b/net/ipv4/netfilter/nft_fib_ipv4.c
@@ -101,12 +101,13 @@ void nft_fib4_eval(const struct nft_expr *expr, struct 
nft_regs *regs,
}
 
iph = ip_hdr(pkt->skb);
-   if (ipv4_is_multicast(iph->daddr) &&
-   ipv4_is_zeronet(iph->saddr) &&
-   ipv4_is_local_multicast(iph->daddr)) {
-   nft_fib_store_result(dest, priv->result, pkt,
-get_ifindex(pkt->skb->dev));
-   return;
+   if (ipv4_is_zeronet(iph->saddr)) {
+   if (ipv4_is_lbcast(iph->daddr) ||
+   ipv4_is_local_multicast(iph->daddr)) {
+   nft_fib_store_result(dest, priv->result, pkt,
+get_ifindex(pkt->skb->dev));
+   return;
+   }
}
 
if (priv->flags & NFTA_FIB_F_MARK)
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 06/50] netfilter: built-in NAT support for UDPlite

2016-12-07 Thread Pablo Neira Ayuso
From: Davide Caratti 

CONFIG_NF_NAT_PROTO_UDPLITE is no more a tristate. When set to y, NAT
support for UDPlite protocol is built-in into nf_nat.ko.

footprint test:

(nf_nat_proto_)   |udplite || nf_nat
--+++
no builtin| 408048 || 2241312
UDPLITE builtin   |   -|| 2577256

Signed-off-by: Davide Caratti 
Signed-off-by: Pablo Neira Ayuso 
---
 include/net/netfilter/nf_nat_l4proto.h |  3 +++
 net/netfilter/Kconfig  |  2 +-
 net/netfilter/Makefile |  5 ++---
 net/netfilter/nf_nat_core.c|  4 
 net/netfilter/nf_nat_proto_udplite.c   | 35 +-
 5 files changed, 11 insertions(+), 38 deletions(-)

diff --git a/include/net/netfilter/nf_nat_l4proto.h 
b/include/net/netfilter/nf_nat_l4proto.h
index 2cbaf3856e21..3923150f2a1e 100644
--- a/include/net/netfilter/nf_nat_l4proto.h
+++ b/include/net/netfilter/nf_nat_l4proto.h
@@ -60,6 +60,9 @@ extern const struct nf_nat_l4proto nf_nat_l4proto_dccp;
 #ifdef CONFIG_NF_NAT_PROTO_SCTP
 extern const struct nf_nat_l4proto nf_nat_l4proto_sctp;
 #endif
+#ifdef CONFIG_NF_NAT_PROTO_UDPLITE
+extern const struct nf_nat_l4proto nf_nat_l4proto_udplite;
+#endif
 
 bool nf_nat_l4proto_in_range(const struct nf_conntrack_tuple *tuple,
 enum nf_nat_manip_type maniptype,
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index ad72edf1f6ec..496e1dcbd003 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -389,7 +389,7 @@ config NF_NAT_PROTO_DCCP
default NF_NAT && NF_CT_PROTO_DCCP
 
 config NF_NAT_PROTO_UDPLITE
-   tristate
+   bool
depends on NF_NAT && NF_CT_PROTO_UDPLITE
default NF_NAT && NF_CT_PROTO_UDPLITE
 
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 02ef6decf94d..3b97d89df2cd 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -45,8 +45,10 @@ obj-$(CONFIG_NF_CONNTRACK_TFTP) += nf_conntrack_tftp.o
 nf_nat-y   := nf_nat_core.o nf_nat_proto_unknown.o nf_nat_proto_common.o \
   nf_nat_proto_udp.o nf_nat_proto_tcp.o nf_nat_helper.o
 
+# NAT protocols (nf_nat)
 nf_nat-$(CONFIG_NF_NAT_PROTO_DCCP) += nf_nat_proto_dccp.o
 nf_nat-$(CONFIG_NF_NAT_PROTO_SCTP) += nf_nat_proto_sctp.o
+nf_nat-$(CONFIG_NF_NAT_PROTO_UDPLITE) += nf_nat_proto_udplite.o
 
 # generic transport layer logging
 obj-$(CONFIG_NF_LOG_COMMON) += nf_log_common.o
@@ -57,9 +59,6 @@ obj-$(CONFIG_NF_LOG_NETDEV) += nf_log_netdev.o
 obj-$(CONFIG_NF_NAT) += nf_nat.o
 obj-$(CONFIG_NF_NAT_REDIRECT) += nf_nat_redirect.o
 
-# NAT protocols (nf_nat)
-obj-$(CONFIG_NF_NAT_PROTO_UDPLITE) += nf_nat_proto_udplite.o
-
 # NAT helpers
 obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o
 obj-$(CONFIG_NF_NAT_FTP) += nf_nat_ftp.o
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 80858bd110cc..94b14c5a8b17 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -690,6 +690,10 @@ int nf_nat_l3proto_register(const struct nf_nat_l3proto 
*l3proto)
RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_SCTP],
 _nat_l4proto_sctp);
 #endif
+#ifdef CONFIG_NF_NAT_PROTO_UDPLITE
+   RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_UDPLITE],
+_nat_l4proto_udplite);
+#endif
mutex_unlock(_nat_proto_mutex);
 
RCU_INIT_POINTER(nf_nat_l3protos[l3proto->l3proto], l3proto);
diff --git a/net/netfilter/nf_nat_proto_udplite.c 
b/net/netfilter/nf_nat_proto_udplite.c
index 58340c97bd83..366bfbfd82a1 100644
--- a/net/netfilter/nf_nat_proto_udplite.c
+++ b/net/netfilter/nf_nat_proto_udplite.c
@@ -8,11 +8,9 @@
  */
 
 #include 
-#include 
 #include 
 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -64,7 +62,7 @@ udplite_manip_pkt(struct sk_buff *skb,
return true;
 }
 
-static const struct nf_nat_l4proto nf_nat_l4proto_udplite = {
+const struct nf_nat_l4proto nf_nat_l4proto_udplite = {
.l4proto= IPPROTO_UDPLITE,
.manip_pkt  = udplite_manip_pkt,
.in_range   = nf_nat_l4proto_in_range,
@@ -73,34 +71,3 @@ static const struct nf_nat_l4proto nf_nat_l4proto_udplite = {
.nlattr_to_range= nf_nat_l4proto_nlattr_to_range,
 #endif
 };
-
-static int __init nf_nat_proto_udplite_init(void)
-{
-   int err;
-
-   err = nf_nat_l4proto_register(NFPROTO_IPV4, _nat_l4proto_udplite);
-   if (err < 0)
-   goto err1;
-   err = nf_nat_l4proto_register(NFPROTO_IPV6, _nat_l4proto_udplite);
-   if (err < 0)
-   goto err2;
-   return 0;
-
-err2:
-   nf_nat_l4proto_unregister(NFPROTO_IPV4, _nat_l4proto_udplite);
-err1:
-   return err;
-}
-
-static void __exit nf_nat_proto_udplite_fini(void)
-{
-   nf_nat_l4proto_unregister(NFPROTO_IPV6, _nat_l4proto_udplite);
-   

[PATCH 38/50] netfilter: nf_tables: notify internal updates of stateful objects

2016-12-07 Thread Pablo Neira Ayuso
Introduce nf_tables_obj_notify() to notify internal state changes in
stateful objects. This is used by the quota object to report depletion
in a follow up patch.

Signed-off-by: Pablo Neira Ayuso 
---
 include/net/netfilter/nf_tables.h |  4 
 net/netfilter/nf_tables_api.c | 31 +++
 2 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/include/net/netfilter/nf_tables.h 
b/include/net/netfilter/nf_tables.h
index 6f7d6a1dc09c..339e374c28b5 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -969,6 +969,10 @@ struct nft_object *nf_tables_obj_lookup(const struct 
nft_table *table,
const struct nlattr *nla, u32 objtype,
u8 genmask);
 
+int nft_obj_notify(struct net *net, struct nft_table *table,
+  struct nft_object *obj, u32 portid, u32 seq,
+  int event, int family, int report, gfp_t gfp);
+
 /**
  * struct nft_object_type - stateful object type
  *
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index bfc015af366a..9d2ed3f520ef 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -4282,38 +4282,45 @@ static int nf_tables_delobj(struct net *net, struct 
sock *nlsk,
return nft_delobj(, obj);
 }
 
-static int nf_tables_obj_notify(const struct nft_ctx *ctx,
-   struct nft_object *obj, int event)
+int nft_obj_notify(struct net *net, struct nft_table *table,
+  struct nft_object *obj, u32 portid, u32 seq, int event,
+  int family, int report, gfp_t gfp)
 {
struct sk_buff *skb;
int err;
 
-   if (!ctx->report &&
-   !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
+   if (!report &&
+   !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
return 0;
 
err = -ENOBUFS;
-   skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+   skb = nlmsg_new(NLMSG_GOODSIZE, gfp);
if (skb == NULL)
goto err;
 
-   err = nf_tables_fill_obj_info(skb, ctx->net, ctx->portid, ctx->seq,
- event, 0, ctx->afi->family, ctx->table,
- obj, false);
+   err = nf_tables_fill_obj_info(skb, net, portid, seq, event, 0, family,
+ table, obj, false);
if (err < 0) {
kfree_skb(skb);
goto err;
}
 
-   err = nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
-ctx->report, GFP_KERNEL);
+   err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, gfp);
 err:
if (err < 0) {
-   nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES,
- err);
+   nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
}
return err;
 }
+EXPORT_SYMBOL_GPL(nft_obj_notify);
+
+static int nf_tables_obj_notify(const struct nft_ctx *ctx,
+   struct nft_object *obj, int event)
+{
+   return nft_obj_notify(ctx->net, ctx->table, obj, ctx->portid,
+ ctx->seq, event, ctx->afi->family, ctx->report,
+ GFP_KERNEL);
+}
 
 static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
   u32 portid, u32 seq)
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 23/50] netfilter: x_tables: pass xt_counters struct instead of packet counter

2016-12-07 Thread Pablo Neira Ayuso
From: Florian Westphal 

On SMP we overload the packet counter (unsigned long) to contain
percpu offset.  Hide this from callers and pass xt_counters address
instead.

Preparation patch to allocate the percpu counters in page-sized batch
chunks.

Signed-off-by: Florian Westphal 
Acked-by: Eric Dumazet 
Signed-off-by: Pablo Neira Ayuso 
---
 include/linux/netfilter/x_tables.h | 6 +-
 net/ipv4/netfilter/arp_tables.c| 4 ++--
 net/ipv4/netfilter/ip_tables.c | 4 ++--
 net/ipv6/netfilter/ip6_tables.c| 5 ++---
 net/netfilter/x_tables.c   | 9 +
 5 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/include/linux/netfilter/x_tables.h 
b/include/linux/netfilter/x_tables.h
index cd4eaf8df445..6e61edeb68e3 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -430,11 +430,7 @@ static inline unsigned long xt_percpu_counter_alloc(void)
 
return 0;
 }
-static inline void xt_percpu_counter_free(u64 pcnt)
-{
-   if (nr_cpu_ids > 1)
-   free_percpu((void __percpu *) (unsigned long) pcnt);
-}
+void xt_percpu_counter_free(struct xt_counters *cnt);
 
 static inline struct xt_counters *
 xt_get_this_cpu_counter(struct xt_counters *cnt)
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 848a0704b28f..019f8e8dda6d 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -439,7 +439,7 @@ find_check_entry(struct arpt_entry *e, const char *name, 
unsigned int size)
 err:
module_put(t->u.kernel.target->me);
 out:
-   xt_percpu_counter_free(e->counters.pcnt);
+   xt_percpu_counter_free(>counters);
 
return ret;
 }
@@ -519,7 +519,7 @@ static inline void cleanup_entry(struct arpt_entry *e)
if (par.target->destroy != NULL)
par.target->destroy();
module_put(par.target->me);
-   xt_percpu_counter_free(e->counters.pcnt);
+   xt_percpu_counter_free(>counters);
 }
 
 /* Checks and translates the user-supplied table segment (held in
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 46815c8a60d7..acc9a0c45bdf 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -582,7 +582,7 @@ find_check_entry(struct ipt_entry *e, struct net *net, 
const char *name,
cleanup_match(ematch, net);
}
 
-   xt_percpu_counter_free(e->counters.pcnt);
+   xt_percpu_counter_free(>counters);
 
return ret;
 }
@@ -670,7 +670,7 @@ cleanup_entry(struct ipt_entry *e, struct net *net)
if (par.target->destroy != NULL)
par.target->destroy();
module_put(par.target->me);
-   xt_percpu_counter_free(e->counters.pcnt);
+   xt_percpu_counter_free(>counters);
 }
 
 /* Checks and translates the user-supplied table segment (held in
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 6ff42b8301cc..88b56a98905b 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -612,7 +612,7 @@ find_check_entry(struct ip6t_entry *e, struct net *net, 
const char *name,
cleanup_match(ematch, net);
}
 
-   xt_percpu_counter_free(e->counters.pcnt);
+   xt_percpu_counter_free(>counters);
 
return ret;
 }
@@ -699,8 +699,7 @@ static void cleanup_entry(struct ip6t_entry *e, struct net 
*net)
if (par.target->destroy != NULL)
par.target->destroy();
module_put(par.target->me);
-
-   xt_percpu_counter_free(e->counters.pcnt);
+   xt_percpu_counter_free(>counters);
 }
 
 /* Checks and translates the user-supplied table segment (held in
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index ad818e52859b..0580029eb0ee 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1615,6 +1615,15 @@ void xt_proto_fini(struct net *net, u_int8_t af)
 }
 EXPORT_SYMBOL_GPL(xt_proto_fini);
 
+void xt_percpu_counter_free(struct xt_counters *counters)
+{
+   unsigned long pcnt = counters->pcnt;
+
+   if (nr_cpu_ids > 1)
+   free_percpu((void __percpu *)pcnt);
+}
+EXPORT_SYMBOL_GPL(xt_percpu_counter_free);
+
 static int __net_init xt_net_init(struct net *net)
 {
int i;
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 44/50] netfilter: nat: skip checksum on offload SCTP packets

2016-12-07 Thread Pablo Neira Ayuso
From: Davide Caratti 

SCTP GSO and hardware can do CRC32c computation after netfilter processing,
so we can avoid calling sctp_compute_checksum() on skb if skb->ip_summed
is equal to CHECKSUM_PARTIAL. Moreover, set skb->ip_summed to CHECKSUM_NONE
when the NAT code computes the CRC, to prevent offloaders from computing
it again (on ixgbe this resulted in a transmission with wrong L4 checksum).

Signed-off-by: Davide Caratti 
Signed-off-by: Pablo Neira Ayuso 
---
 net/netfilter/nf_nat_proto_sctp.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/nf_nat_proto_sctp.c 
b/net/netfilter/nf_nat_proto_sctp.c
index 2e14108ff697..31d358691af0 100644
--- a/net/netfilter/nf_nat_proto_sctp.c
+++ b/net/netfilter/nf_nat_proto_sctp.c
@@ -47,7 +47,10 @@ sctp_manip_pkt(struct sk_buff *skb,
hdr->dest = tuple->dst.u.sctp.port;
}
 
-   hdr->checksum = sctp_compute_cksum(skb, hdroff);
+   if (skb->ip_summed != CHECKSUM_PARTIAL) {
+   hdr->checksum = sctp_compute_cksum(skb, hdroff);
+   skb->ip_summed = CHECKSUM_NONE;
+   }
 
return true;
 }
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 28/50] netfilter: nft_payload: layer 4 checksum adjustment for pseudoheader fields

2016-12-07 Thread Pablo Neira Ayuso
This patch adds a new flag that signals the kernel to update layer 4
checksum if the packet field belongs to the layer 4 pseudoheader. This
implicitly provides stateless NAT 1:1 that is useful under very specific
usecases.

Since rules mangling layer 3 fields that are part of the pseudoheader
may potentially convey any layer 4 packet, we have to deal with the
layer 4 checksum adjustment using protocol specific code.

This patch adds support for TCP, UDP and ICMPv6, since they include the
pseudoheader in the layer 4 checksum calculation. ICMP doesn't, so we
can skip it.

Signed-off-by: Pablo Neira Ayuso 
---
 include/net/netfilter/nf_tables_core.h   |   1 +
 include/uapi/linux/netfilter/nf_tables.h |   6 ++
 net/netfilter/nft_payload.c  | 107 +--
 3 files changed, 109 insertions(+), 5 deletions(-)

diff --git a/include/net/netfilter/nf_tables_core.h 
b/include/net/netfilter/nf_tables_core.h
index 862373d4ea9d..8f690effec37 100644
--- a/include/net/netfilter/nf_tables_core.h
+++ b/include/net/netfilter/nf_tables_core.h
@@ -45,6 +45,7 @@ struct nft_payload_set {
enum nft_registers  sreg:8;
u8  csum_type;
u8  csum_offset;
+   u8  csum_flags;
 };
 
 extern const struct nft_expr_ops nft_payload_fast_ops;
diff --git a/include/uapi/linux/netfilter/nf_tables.h 
b/include/uapi/linux/netfilter/nf_tables.h
index 14e5f619167e..f030e59aa2ec 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -659,6 +659,10 @@ enum nft_payload_csum_types {
NFT_PAYLOAD_CSUM_INET,
 };
 
+enum nft_payload_csum_flags {
+   NFT_PAYLOAD_L4CSUM_PSEUDOHDR = (1 << 0),
+};
+
 /**
  * enum nft_payload_attributes - nf_tables payload expression netlink 
attributes
  *
@@ -669,6 +673,7 @@ enum nft_payload_csum_types {
  * @NFTA_PAYLOAD_SREG: source register to load data from (NLA_U32: 
nft_registers)
  * @NFTA_PAYLOAD_CSUM_TYPE: checksum type (NLA_U32)
  * @NFTA_PAYLOAD_CSUM_OFFSET: checksum offset relative to base (NLA_U32)
+ * @NFTA_PAYLOAD_CSUM_FLAGS: checksum flags (NLA_U32)
  */
 enum nft_payload_attributes {
NFTA_PAYLOAD_UNSPEC,
@@ -679,6 +684,7 @@ enum nft_payload_attributes {
NFTA_PAYLOAD_SREG,
NFTA_PAYLOAD_CSUM_TYPE,
NFTA_PAYLOAD_CSUM_OFFSET,
+   NFTA_PAYLOAD_CSUM_FLAGS,
__NFTA_PAYLOAD_MAX
 };
 #define NFTA_PAYLOAD_MAX   (__NFTA_PAYLOAD_MAX - 1)
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 98fb5d7b8087..36d2b1096546 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2008-2009 Patrick McHardy 
+ * Copyright (c) 2016 Pablo Neira Ayuso 
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -17,6 +18,10 @@
 #include 
 #include 
 #include 
+/* For layer 4 checksum field offset. */
+#include 
+#include 
+#include 
 
 /* add vlan header into the user buffer for if tag was removed by offloads */
 static bool
@@ -164,6 +169,87 @@ const struct nft_expr_ops nft_payload_fast_ops = {
.dump   = nft_payload_dump,
 };
 
+static inline void nft_csum_replace(__sum16 *sum, __wsum fsum, __wsum tsum)
+{
+   *sum = csum_fold(csum_add(csum_sub(~csum_unfold(*sum), fsum), tsum));
+   if (*sum == 0)
+   *sum = CSUM_MANGLED_0;
+}
+
+static bool nft_payload_udp_checksum(struct sk_buff *skb, unsigned int thoff)
+{
+   struct udphdr *uh, _uh;
+
+   uh = skb_header_pointer(skb, thoff, sizeof(_uh), &_uh);
+   if (!uh)
+   return false;
+
+   return uh->check;
+}
+
+static int nft_payload_l4csum_offset(const struct nft_pktinfo *pkt,
+struct sk_buff *skb,
+unsigned int *l4csum_offset)
+{
+   switch (pkt->tprot) {
+   case IPPROTO_TCP:
+   *l4csum_offset = offsetof(struct tcphdr, check);
+   break;
+   case IPPROTO_UDP:
+   if (!nft_payload_udp_checksum(skb, pkt->xt.thoff))
+   return -1;
+   /* Fall through. */
+   case IPPROTO_UDPLITE:
+   *l4csum_offset = offsetof(struct udphdr, check);
+   break;
+   case IPPROTO_ICMPV6:
+   *l4csum_offset = offsetof(struct icmp6hdr, icmp6_cksum);
+   break;
+   default:
+   return -1;
+   }
+
+   *l4csum_offset += pkt->xt.thoff;
+   return 0;
+}
+
+static int nft_payload_l4csum_update(const struct nft_pktinfo *pkt,
+struct sk_buff *skb,
+__wsum fsum, __wsum tsum)
+{
+   int l4csum_offset;
+   __sum16 sum;
+
+   /* If we cannot determine layer 4 checksum offset or this packet 

[PATCH 15/50] netfilter: nat: add dependencies on conntrack module

2016-12-07 Thread Pablo Neira Ayuso
From: Florian Westphal 

MASQUERADE, S/DNAT and REDIRECT already call functions that depend on the
conntrack module.

However, since the conntrack hooks are now registered in a lazy fashion
(i.e., only when needed) a symbol reference is not enough.

Thus, when something is added to a nat table, make sure that it will see
packets by calling nf_ct_netns_get() which will register the conntrack
hooks in the current netns.

An alternative would be to add these dependencies to the NAT table.

However, that has problems when using non-modular builds -- we might
register e.g. ipv6 conntrack before its initcall has run, leading to NULL
deref crashes since its per-netns storage has not yet been allocated.

Adding the dependency in the modules instead has the advantage that nat
table also does not register its hooks until rules are added.

Signed-off-by: Florian Westphal 
Signed-off-by: Pablo Neira Ayuso 
---
 net/ipv4/netfilter/ipt_MASQUERADE.c |  8 +++-
 net/netfilter/xt_NETMAP.c   | 11 +--
 net/netfilter/xt_REDIRECT.c | 12 ++--
 net/netfilter/xt_nat.c  | 18 +-
 4 files changed, 43 insertions(+), 6 deletions(-)

diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c 
b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 34cfb9b0bc0a..a03e4e7ef5f9 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -41,7 +41,7 @@ static int masquerade_tg_check(const struct xt_tgchk_param 
*par)
pr_debug("bad rangesize %u\n", mr->rangesize);
return -EINVAL;
}
-   return 0;
+   return nf_ct_netns_get(par->net, par->family);
 }
 
 static unsigned int
@@ -59,6 +59,11 @@ masquerade_tg(struct sk_buff *skb, const struct 
xt_action_param *par)
  xt_out(par));
 }
 
+static void masquerade_tg_destroy(const struct xt_tgdtor_param *par)
+{
+   nf_ct_netns_put(par->net, par->family);
+}
+
 static struct xt_target masquerade_tg_reg __read_mostly = {
.name   = "MASQUERADE",
.family = NFPROTO_IPV4,
@@ -67,6 +72,7 @@ static struct xt_target masquerade_tg_reg __read_mostly = {
.table  = "nat",
.hooks  = 1 << NF_INET_POST_ROUTING,
.checkentry = masquerade_tg_check,
+   .destroy= masquerade_tg_destroy,
.me = THIS_MODULE,
 };
 
diff --git a/net/netfilter/xt_NETMAP.c b/net/netfilter/xt_NETMAP.c
index 94d0b5411192..e45a01255e70 100644
--- a/net/netfilter/xt_NETMAP.c
+++ b/net/netfilter/xt_NETMAP.c
@@ -60,7 +60,12 @@ static int netmap_tg6_checkentry(const struct xt_tgchk_param 
*par)
 
if (!(range->flags & NF_NAT_RANGE_MAP_IPS))
return -EINVAL;
-   return 0;
+   return nf_ct_netns_get(par->net, par->family);
+}
+
+static void netmap_tg_destroy(const struct xt_tgdtor_param *par)
+{
+   nf_ct_netns_put(par->net, par->family);
 }
 
 static unsigned int
@@ -111,7 +116,7 @@ static int netmap_tg4_check(const struct xt_tgchk_param 
*par)
pr_debug("bad rangesize %u.\n", mr->rangesize);
return -EINVAL;
}
-   return 0;
+   return nf_ct_netns_get(par->net, par->family);
 }
 
 static struct xt_target netmap_tg_reg[] __read_mostly = {
@@ -127,6 +132,7 @@ static struct xt_target netmap_tg_reg[] __read_mostly = {
  (1 << NF_INET_LOCAL_OUT) |
  (1 << NF_INET_LOCAL_IN),
.checkentry = netmap_tg6_checkentry,
+   .destroy= netmap_tg_destroy,
.me = THIS_MODULE,
},
{
@@ -141,6 +147,7 @@ static struct xt_target netmap_tg_reg[] __read_mostly = {
  (1 << NF_INET_LOCAL_OUT) |
  (1 << NF_INET_LOCAL_IN),
.checkentry = netmap_tg4_check,
+   .destroy= netmap_tg_destroy,
.me = THIS_MODULE,
},
 };
diff --git a/net/netfilter/xt_REDIRECT.c b/net/netfilter/xt_REDIRECT.c
index 651dce65a30b..98a4c6d4f1cb 100644
--- a/net/netfilter/xt_REDIRECT.c
+++ b/net/netfilter/xt_REDIRECT.c
@@ -40,7 +40,13 @@ static int redirect_tg6_checkentry(const struct 
xt_tgchk_param *par)
 
if (range->flags & NF_NAT_RANGE_MAP_IPS)
return -EINVAL;
-   return 0;
+
+   return nf_ct_netns_get(par->net, par->family);
+}
+
+static void redirect_tg_destroy(const struct xt_tgdtor_param *par)
+{
+   nf_ct_netns_put(par->net, par->family);
 }
 
 /* FIXME: Take multiple ranges --RR */
@@ -56,7 +62,7 @@ static int redirect_tg4_check(const struct xt_tgchk_param 
*par)
pr_debug("bad rangesize %u.\n", mr->rangesize);
return -EINVAL;
}
-   return 0;
+   return nf_ct_netns_get(par->net, par->family);
 }
 
 static unsigned int
@@ -72,6 +78,7 @@ static struct xt_target redirect_tg_reg[] 

[PATCH 00/50] Netfilter/IPVS updates for net-next

2016-12-07 Thread Pablo Neira Ayuso
Hi David,

The following patchset contains a large Netfilter update for net-next,
to summarise:

1) Add support for stateful objects. This series provides a nf_tables
   native alternative to the extended accounting infrastructure for
   nf_tables. Two initial stateful objects are supported: counters and
   quotas. Objects are identified by a user-defined name, you can fetch
   and reset them anytime. You can also use a maps to allow fast lookups
   using any arbitrary key combination. More info at:

   http://marc.info/?l=netfilter-devel=148029128323837=2

2) On-demand registration of nf_conntrack and defrag hooks per netns.
   Register nf_conntrack hooks if we have a stateful ruleset, ie.
   state-based filtering or NAT. The new nf_conntrack_default_on sysctl
   enables this from newly created netnamespaces. Default behaviour is not
   modified. Patches from Florian Westphal.

3) Allocate 4k chunks and then use these for x_tables counter allocation
   requests, this improves ruleset load time and also datapath ruleset
   evaluation, patches from Florian Westphal.

4) Add support for ebpf to the existing x_tables bpf extension.
   From Willem de Bruijn.

5) Update layer 4 checksum if any of the pseudoheader fields is updated.
   This provides a limited form of 1:1 stateless NAT that make sense in
   specific scenario, eg. load balancing.

6) Add support to flush sets in nf_tables. This series comes with a new
   set->ops->deactivate_one() indirection given that we have to walk
   over the list of set elements, then deactivate them one by one.
   The existing set->ops->deactivate() performs an element lookup that
   we don't need.

7) Two patches to avoid cloning packets, thus speed up packet forwarding
   via nft_fwd from ingress. From Florian Westphal.

8) Two IPVS patches via Simon Horman: Decrement ttl in all modes to
   prevent infinite loops, patch from Dwip Banerjee. And one minor
   refactoring from Gao feng.

9) Revisit recent log support for nf_tables netdev families: One patch
   to ensure that we correctly handle non-ethernet packets. Another
   patch to add missing logger definition for netdev. Patches from
   Liping Zhang.

10) Three patches for nft_fib, one to address insufficient register
initialization and another to solve incorrect (although harmless)
byteswap operation. Moreover update xt_rpfilter and nft_fib to match
lbcast packets with zeronet as source, eg. DHCP Discover packets
(0.0.0.0 -> 255.255.255.255). Also from Liping Zhang.

11) Built-in DCCP, SCTP and UDPlite conntrack and NAT support, from
Davide Caratti. While DCCP is rather hopeless lately, and UDPlite has
been broken in many-cast mode for some little time, let's give them a
chance by placing them at the same level as other existing protocols.
Thus, users don't explicitly have to modprobe support for this and
NAT rules work for them. Some people point to the lack of support in
SOHO Linux-based routers that make deployment of new protocols harder.
I guess other middleboxes outthere on the Internet are also to blame.
Anyway, let's see if this has any impact in the midrun.

12) Skip software SCTP software checksum calculation if the NIC comes
with SCTP checksum offload support. From Davide Caratti.

13) Initial core factoring to prepare conversion to hook array. Three
patches from Aaron Conole.

14) Gao Feng made a wrong conversion to switch in the xt_multiport
extension in a patch coming in the previous batch. Fix it in this
batch.

15) Get vmalloc call in sync with kmalloc flags to avoid a warning
and likely OOM killer intervention from x_tables. From Marcelo
Ricardo Leitner.

16) Update Arturo Borrero's email address in all source code headers.

You can pull these changes from:

  git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next.git

Thanks!



The following changes since commit adc176c5472214971d77c1a61c83db9b01e9cdc7:

  ipv6 addrconf: Implemented enhanced DAD (RFC7527) (2016-12-03 23:21:37 -0500)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next.git HEAD

for you to fetch changes up to 73c25fb139337ac4fe1695ae3c056961855594db:

  netfilter: nft_quota: allow to restore consumed quota (2016-12-07 14:40:53 
+0100)


Aaron Conole (3):
  netfilter: introduce accessor functions for hook entries
  netfilter: decouple nf_hook_entry and nf_hook_ops
  netfilter: convert while loops to for loops

Arturo Borrero Gonzalez (1):
  netfilter: update Arturo Borrero Gonzalez email address

Davide Caratti (8):
  netfilter: built-in NAT support for DCCP
  netfilter: built-in NAT support for SCTP
  netfilter: built-in NAT support for UDPlite
  netfilter: nf_conntrack_tuple_common.h: fix #include
  netfilter: conntrack: built-in support for DCCP

[PATCH 22/50] netfilter: convert while loops to for loops

2016-12-07 Thread Pablo Neira Ayuso
From: Aaron Conole 

This is to facilitate converting from a singly-linked list to an array
of elements.

Signed-off-by: Aaron Conole 
Signed-off-by: Pablo Neira Ayuso 
---
 net/bridge/br_netfilter_hooks.c | 8 
 net/netfilter/core.c| 6 ++
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index adad2eed29e6..b12501a77f18 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -1008,10 +1008,10 @@ int br_nf_hook_thresh(unsigned int hook, struct net 
*net,
struct nf_hook_state state;
int ret;
 
-   elem = rcu_dereference(net->nf.hooks[NFPROTO_BRIDGE][hook]);
-
-   while (elem && (nf_hook_entry_priority(elem) <= NF_BR_PRI_BRNF))
-   elem = rcu_dereference(elem->next);
+   for (elem = rcu_dereference(net->nf.hooks[NFPROTO_BRIDGE][hook]);
+elem && nf_hook_entry_priority(elem) <= NF_BR_PRI_BRNF;
+elem = rcu_dereference(elem->next))
+   ;
 
if (!elem)
return okfn(net, sk, skb);
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 2bb46e2d8d30..ce6adfae521a 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -107,10 +107,9 @@ int nf_register_net_hook(struct net *net, const struct 
nf_hook_ops *reg)
mutex_lock(_hook_mutex);
 
/* Find the spot in the list */
-   while ((p = nf_entry_dereference(*pp)) != NULL) {
+   for (; (p = nf_entry_dereference(*pp)) != NULL; pp = >next) {
if (reg->priority < nf_hook_entry_priority(p))
break;
-   pp = >next;
}
rcu_assign_pointer(entry->next, p);
rcu_assign_pointer(*pp, entry);
@@ -137,12 +136,11 @@ void nf_unregister_net_hook(struct net *net, const struct 
nf_hook_ops *reg)
return;
 
mutex_lock(_hook_mutex);
-   while ((p = nf_entry_dereference(*pp)) != NULL) {
+   for (; (p = nf_entry_dereference(*pp)) != NULL; pp = >next) {
if (nf_hook_entry_ops(p) == reg) {
rcu_assign_pointer(*pp, p->next);
break;
}
-   pp = >next;
}
mutex_unlock(_hook_mutex);
if (!p) {
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 16/50] netfilter: nf_tables: add conntrack dependencies for nat/masq/redir expressions

2016-12-07 Thread Pablo Neira Ayuso
From: Florian Westphal 

so that conntrack core will add the needed hooks in this namespace.

Signed-off-by: Florian Westphal 
Signed-off-by: Pablo Neira Ayuso 
---
 net/ipv4/netfilter/nft_masq_ipv4.c  |  7 +++
 net/ipv4/netfilter/nft_redir_ipv4.c |  7 +++
 net/ipv6/netfilter/nft_masq_ipv6.c  |  7 +++
 net/ipv6/netfilter/nft_redir_ipv6.c |  7 +++
 net/netfilter/nft_masq.c|  2 +-
 net/netfilter/nft_nat.c | 11 ++-
 net/netfilter/nft_redir.c   |  2 +-
 7 files changed, 40 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c 
b/net/ipv4/netfilter/nft_masq_ipv4.c
index 4d69f99b8707..a0ea8aad1bf1 100644
--- a/net/ipv4/netfilter/nft_masq_ipv4.c
+++ b/net/ipv4/netfilter/nft_masq_ipv4.c
@@ -35,12 +35,19 @@ static void nft_masq_ipv4_eval(const struct nft_expr *expr,
, nft_out(pkt));
 }
 
+static void
+nft_masq_ipv4_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
+{
+   nf_ct_netns_put(ctx->net, NFPROTO_IPV4);
+}
+
 static struct nft_expr_type nft_masq_ipv4_type;
 static const struct nft_expr_ops nft_masq_ipv4_ops = {
.type   = _masq_ipv4_type,
.size   = NFT_EXPR_SIZE(sizeof(struct nft_masq)),
.eval   = nft_masq_ipv4_eval,
.init   = nft_masq_init,
+   .destroy= nft_masq_ipv4_destroy,
.dump   = nft_masq_dump,
.validate   = nft_masq_validate,
 };
diff --git a/net/ipv4/netfilter/nft_redir_ipv4.c 
b/net/ipv4/netfilter/nft_redir_ipv4.c
index 62c18e68ac58..1650ed23c15d 100644
--- a/net/ipv4/netfilter/nft_redir_ipv4.c
+++ b/net/ipv4/netfilter/nft_redir_ipv4.c
@@ -38,12 +38,19 @@ static void nft_redir_ipv4_eval(const struct nft_expr *expr,
regs->verdict.code = nf_nat_redirect_ipv4(pkt->skb, , nft_hook(pkt));
 }
 
+static void
+nft_redir_ipv4_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
+{
+   nf_ct_netns_put(ctx->net, NFPROTO_IPV4);
+}
+
 static struct nft_expr_type nft_redir_ipv4_type;
 static const struct nft_expr_ops nft_redir_ipv4_ops = {
.type   = _redir_ipv4_type,
.size   = NFT_EXPR_SIZE(sizeof(struct nft_redir)),
.eval   = nft_redir_ipv4_eval,
.init   = nft_redir_init,
+   .destroy= nft_redir_ipv4_destroy,
.dump   = nft_redir_dump,
.validate   = nft_redir_validate,
 };
diff --git a/net/ipv6/netfilter/nft_masq_ipv6.c 
b/net/ipv6/netfilter/nft_masq_ipv6.c
index 93d758f70334..6c5b5b1830a7 100644
--- a/net/ipv6/netfilter/nft_masq_ipv6.c
+++ b/net/ipv6/netfilter/nft_masq_ipv6.c
@@ -36,12 +36,19 @@ static void nft_masq_ipv6_eval(const struct nft_expr *expr,
nft_out(pkt));
 }
 
+static void
+nft_masq_ipv6_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
+{
+   nf_ct_netns_put(ctx->net, NFPROTO_IPV6);
+}
+
 static struct nft_expr_type nft_masq_ipv6_type;
 static const struct nft_expr_ops nft_masq_ipv6_ops = {
.type   = _masq_ipv6_type,
.size   = NFT_EXPR_SIZE(sizeof(struct nft_masq)),
.eval   = nft_masq_ipv6_eval,
.init   = nft_masq_init,
+   .destroy= nft_masq_ipv6_destroy,
.dump   = nft_masq_dump,
.validate   = nft_masq_validate,
 };
diff --git a/net/ipv6/netfilter/nft_redir_ipv6.c 
b/net/ipv6/netfilter/nft_redir_ipv6.c
index 2850fcd8583f..f5ac080fc084 100644
--- a/net/ipv6/netfilter/nft_redir_ipv6.c
+++ b/net/ipv6/netfilter/nft_redir_ipv6.c
@@ -39,12 +39,19 @@ static void nft_redir_ipv6_eval(const struct nft_expr *expr,
nf_nat_redirect_ipv6(pkt->skb, , nft_hook(pkt));
 }
 
+static void
+nft_redir_ipv6_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
+{
+   nf_ct_netns_put(ctx->net, NFPROTO_IPV6);
+}
+
 static struct nft_expr_type nft_redir_ipv6_type;
 static const struct nft_expr_ops nft_redir_ipv6_ops = {
.type   = _redir_ipv6_type,
.size   = NFT_EXPR_SIZE(sizeof(struct nft_redir)),
.eval   = nft_redir_ipv6_eval,
.init   = nft_redir_init,
+   .destroy= nft_redir_ipv6_destroy,
.dump   = nft_redir_dump,
.validate   = nft_redir_validate,
 };
diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c
index bf92de01410f..11ce016cd479 100644
--- a/net/netfilter/nft_masq.c
+++ b/net/netfilter/nft_masq.c
@@ -77,7 +77,7 @@ int nft_masq_init(const struct nft_ctx *ctx,
}
}
 
-   return 0;
+   return nf_ct_netns_get(ctx->net, ctx->afi->family);
 }
 EXPORT_SYMBOL_GPL(nft_masq_init);
 
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index ee2d71753746..19a7bf3236f9 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ 

[PATCH 32/50] netfilter: nf_tables: add stateful objects

2016-12-07 Thread Pablo Neira Ayuso
This patch augments nf_tables to support stateful objects. This new
infrastructure allows you to create, dump and delete stateful objects,
that are identified by a user-defined name.

This patch adds the generic infrastructure, follow up patches add
support for two stateful objects: counters and quotas.

This patch provides a native infrastructure for nf_tables to replace
nfacct, the extended accounting infrastructure for iptables.

Signed-off-by: Pablo Neira Ayuso 
---
 include/net/netfilter/nf_tables.h|  79 +
 include/uapi/linux/netfilter/nf_tables.h |  29 ++
 net/netfilter/nf_tables_api.c| 516 +++
 3 files changed, 624 insertions(+)

diff --git a/include/net/netfilter/nf_tables.h 
b/include/net/netfilter/nf_tables.h
index 32970cba184a..903cd618f50e 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -875,6 +875,7 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt, void 
*priv);
  * @list: used internally
  * @chains: chains in the table
  * @sets: sets in the table
+ * @objects: stateful objects in the table
  * @hgenerator: handle generator state
  * @use: number of chain references to this table
  * @flags: table flag (see enum nft_table_flags)
@@ -885,6 +886,7 @@ struct nft_table {
struct list_headlist;
struct list_headchains;
struct list_headsets;
+   struct list_headobjects;
u64 hgenerator;
u32 use;
u16 flags:14,
@@ -935,6 +937,73 @@ int nft_verdict_dump(struct sk_buff *skb, int type,
 const struct nft_verdict *v);
 
 /**
+ * struct nft_object - nf_tables stateful object
+ *
+ * @list: table stateful object list node
+ * @type: pointer to object type
+ * @data: pointer to object data
+ * @name: name of this stateful object
+ * @genmask: generation mask
+ * @use: number of references to this stateful object
+ * @data: object data, layout depends on type
+ */
+struct nft_object {
+   struct list_headlist;
+   charname[NFT_OBJ_MAXNAMELEN];
+   u32 genmask:2,
+   use:30;
+   /* runtime data below here */
+   const struct nft_object_type*type cacheline_aligned;
+   unsigned char   data[]
+   __attribute__((aligned(__alignof__(u64;
+};
+
+static inline void *nft_obj_data(const struct nft_object *obj)
+{
+   return (void *)obj->data;
+}
+
+#define nft_expr_obj(expr) *((struct nft_object **)nft_expr_priv(expr))
+
+struct nft_object *nf_tables_obj_lookup(const struct nft_table *table,
+   const struct nlattr *nla, u32 objtype,
+   u8 genmask);
+
+/**
+ * struct nft_object_type - stateful object type
+ *
+ * @eval: stateful object evaluation function
+ * @list: list node in list of object types
+ * @type: stateful object numeric type
+ * @size: stateful object size
+ * @owner: module owner
+ * @maxattr: maximum netlink attribute
+ * @policy: netlink attribute policy
+ * @init: initialize object from netlink attributes
+ * @destroy: release existing stateful object
+ * @dump: netlink dump stateful object
+ */
+struct nft_object_type {
+   void(*eval)(struct nft_object *obj,
+   struct nft_regs *regs,
+   const struct nft_pktinfo *pkt);
+   struct list_headlist;
+   u32 type;
+   unsigned intsize;
+   unsigned intmaxattr;
+   struct module   *owner;
+   const struct nla_policy *policy;
+   int (*init)(const struct nlattr * const 
tb[],
+   struct nft_object *obj);
+   void(*destroy)(struct nft_object *obj);
+   int (*dump)(struct sk_buff *skb,
+   const struct nft_object *obj);
+};
+
+int nft_register_obj(struct nft_object_type *obj_type);
+void nft_unregister_obj(struct nft_object_type *obj_type);
+
+/**
  * struct nft_traceinfo - nft tracing information and state
  *
  * @pkt: pktinfo currently processed
@@ -981,6 +1050,9 @@ void nft_trace_notify(struct nft_traceinfo *info);
 #define MODULE_ALIAS_NFT_SET() \
MODULE_ALIAS("nft-set")
 
+#define MODULE_ALIAS_NFT_OBJ(type) \
+   MODULE_ALIAS("nft-obj-" __stringify(type))
+
 /*
  * The gencursor defines two generations, the currently 

[PATCH 14/50] netfilter: add and use nf_ct_netns_get/put

2016-12-07 Thread Pablo Neira Ayuso
From: Florian Westphal 

currently aliased to try_module_get/_put.
Will be changed in next patch when we add functions to make use of ->net
argument to store usercount per l3proto tracker.

This is needed to avoid registering the conntrack hooks in all netns and
later only enable connection tracking in those that need conntrack.

Signed-off-by: Florian Westphal 
Signed-off-by: Pablo Neira Ayuso 
---
 include/net/netfilter/nf_conntrack.h |  4 
 net/ipv4/netfilter/ipt_CLUSTERIP.c   |  4 ++--
 net/ipv4/netfilter/ipt_SYNPROXY.c|  4 ++--
 net/ipv6/netfilter/ip6t_SYNPROXY.c   |  4 ++--
 net/netfilter/nf_conntrack_proto.c   | 12 
 net/netfilter/nft_ct.c   | 26 +-
 net/netfilter/xt_CONNSECMARK.c   |  4 ++--
 net/netfilter/xt_CT.c|  6 +++---
 net/netfilter/xt_connbytes.c |  4 ++--
 net/netfilter/xt_connlabel.c |  6 +++---
 net/netfilter/xt_connlimit.c |  6 +++---
 net/netfilter/xt_connmark.c  |  8 
 net/netfilter/xt_conntrack.c |  4 ++--
 net/netfilter/xt_helper.c|  4 ++--
 net/netfilter/xt_state.c |  4 ++--
 15 files changed, 58 insertions(+), 42 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack.h 
b/include/net/netfilter/nf_conntrack.h
index d9d52c020a70..5916aa9ab3f0 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -181,6 +181,10 @@ static inline void nf_ct_put(struct nf_conn *ct)
 int nf_ct_l3proto_try_module_get(unsigned short l3proto);
 void nf_ct_l3proto_module_put(unsigned short l3proto);
 
+/* load module; enable/disable conntrack in this namespace */
+int nf_ct_netns_get(struct net *net, u8 nfproto);
+void nf_ct_netns_put(struct net *net, u8 nfproto);
+
 /*
  * Allocate a hashtable of hlist_head (if nulls == 0),
  * or hlist_nulls_head (if nulls == 1)
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c 
b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index e6e206fa86c8..21db00d0362b 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -419,7 +419,7 @@ static int clusterip_tg_check(const struct xt_tgchk_param 
*par)
}
cipinfo->config = config;
 
-   ret = nf_ct_l3proto_try_module_get(par->family);
+   ret = nf_ct_netns_get(par->net, par->family);
if (ret < 0)
pr_info("cannot load conntrack support for proto=%u\n",
par->family);
@@ -444,7 +444,7 @@ static void clusterip_tg_destroy(const struct 
xt_tgdtor_param *par)
 
clusterip_config_put(cipinfo->config);
 
-   nf_ct_l3proto_module_put(par->family);
+   nf_ct_netns_get(par->net, par->family);
 }
 
 #ifdef CONFIG_COMPAT
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c 
b/net/ipv4/netfilter/ipt_SYNPROXY.c
index 361411688221..30c0de53e254 100644
--- a/net/ipv4/netfilter/ipt_SYNPROXY.c
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -418,12 +418,12 @@ static int synproxy_tg4_check(const struct xt_tgchk_param 
*par)
e->ip.invflags & XT_INV_PROTO)
return -EINVAL;
 
-   return nf_ct_l3proto_try_module_get(par->family);
+   return nf_ct_netns_get(par->net, par->family);
 }
 
 static void synproxy_tg4_destroy(const struct xt_tgdtor_param *par)
 {
-   nf_ct_l3proto_module_put(par->family);
+   nf_ct_netns_put(par->net, par->family);
 }
 
 static struct xt_target synproxy_tg4_reg __read_mostly = {
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c 
b/net/ipv6/netfilter/ip6t_SYNPROXY.c
index 99a1216287c8..98c8dd38575a 100644
--- a/net/ipv6/netfilter/ip6t_SYNPROXY.c
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -440,12 +440,12 @@ static int synproxy_tg6_check(const struct xt_tgchk_param 
*par)
e->ipv6.invflags & XT_INV_PROTO)
return -EINVAL;
 
-   return nf_ct_l3proto_try_module_get(par->family);
+   return nf_ct_netns_get(par->net, par->family);
 }
 
 static void synproxy_tg6_destroy(const struct xt_tgdtor_param *par)
 {
-   nf_ct_l3proto_module_put(par->family);
+   nf_ct_netns_put(par->net, par->family);
 }
 
 static struct xt_target synproxy_tg6_reg __read_mostly = {
diff --git a/net/netfilter/nf_conntrack_proto.c 
b/net/netfilter/nf_conntrack_proto.c
index b218e70b2f74..948f1e2fc80b 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -125,6 +125,18 @@ void nf_ct_l3proto_module_put(unsigned short l3proto)
 }
 EXPORT_SYMBOL_GPL(nf_ct_l3proto_module_put);
 
+int nf_ct_netns_get(struct net *net, u8 nfproto)
+{
+   return nf_ct_l3proto_try_module_get(nfproto);
+}
+EXPORT_SYMBOL_GPL(nf_ct_netns_get);
+
+void nf_ct_netns_put(struct net *net, u8 nfproto)
+{
+   nf_ct_l3proto_module_put(nfproto);
+}
+EXPORT_SYMBOL_GPL(nf_ct_netns_put);
+
 struct nf_conntrack_l4proto *
 nf_ct_l4proto_find_get(u_int16_t l3num, u_int8_t l4num)
 {
diff --git a/net/netfilter/nft_ct.c 

[PATCH 26/50] netfilter: nft_fib: convert htonl to ntohl properly

2016-12-07 Thread Pablo Neira Ayuso
From: Liping Zhang 

Acctually ntohl and htonl are identical, so this doesn't affect
anything, but it is conceptually wrong.

Signed-off-by: Liping Zhang 
Acked-by: Florian Westphal 
Signed-off-by: Pablo Neira Ayuso 
---
 net/ipv4/netfilter/nft_fib_ipv4.c | 2 +-
 net/ipv6/netfilter/nft_fib_ipv6.c | 2 +-
 net/netfilter/nft_fib.c   | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c 
b/net/ipv4/netfilter/nft_fib_ipv4.c
index 1b49966484b3..bfffa742f397 100644
--- a/net/ipv4/netfilter/nft_fib_ipv4.c
+++ b/net/ipv4/netfilter/nft_fib_ipv4.c
@@ -198,7 +198,7 @@ nft_fib4_select_ops(const struct nft_ctx *ctx,
if (!tb[NFTA_FIB_RESULT])
return ERR_PTR(-EINVAL);
 
-   result = htonl(nla_get_be32(tb[NFTA_FIB_RESULT]));
+   result = ntohl(nla_get_be32(tb[NFTA_FIB_RESULT]));
 
switch (result) {
case NFT_FIB_RESULT_OIF:
diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c 
b/net/ipv6/netfilter/nft_fib_ipv6.c
index d526bb594956..c947aad8bcc6 100644
--- a/net/ipv6/netfilter/nft_fib_ipv6.c
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c
@@ -235,7 +235,7 @@ nft_fib6_select_ops(const struct nft_ctx *ctx,
if (!tb[NFTA_FIB_RESULT])
return ERR_PTR(-EINVAL);
 
-   result = htonl(nla_get_be32(tb[NFTA_FIB_RESULT]));
+   result = ntohl(nla_get_be32(tb[NFTA_FIB_RESULT]));
 
switch (result) {
case NFT_FIB_RESULT_OIF:
diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c
index 249c9b80c150..29a4906adc27 100644
--- a/net/netfilter/nft_fib.c
+++ b/net/netfilter/nft_fib.c
@@ -86,7 +86,7 @@ int nft_fib_init(const struct nft_ctx *ctx, const struct 
nft_expr *expr,
if ((priv->flags & (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR)) == 0)
return -EINVAL;
 
-   priv->result = htonl(nla_get_be32(tb[NFTA_FIB_RESULT]));
+   priv->result = ntohl(nla_get_be32(tb[NFTA_FIB_RESULT]));
priv->dreg = nft_parse_register(tb[NFTA_FIB_DREG]);
 
switch (priv->result) {
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 39/50] netfilter: nft_quota: add depleted flag for objects

2016-12-07 Thread Pablo Neira Ayuso
Notify on depleted quota objects. The NFT_QUOTA_F_DEPLETED flag
indicates we have reached overquota.

Add pointer to table from nft_object, so we can use it when sending the
depletion notification to userspace.

Signed-off-by: Pablo Neira Ayuso 
---
 include/net/netfilter/nf_tables.h|  2 ++
 include/uapi/linux/netfilter/nf_tables.h |  1 +
 net/netfilter/nf_tables_api.c|  1 +
 net/netfilter/nft_quota.c| 36 +---
 4 files changed, 32 insertions(+), 8 deletions(-)

diff --git a/include/net/netfilter/nf_tables.h 
b/include/net/netfilter/nf_tables.h
index 339e374c28b5..ce6fb6e83b32 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -940,6 +940,7 @@ int nft_verdict_dump(struct sk_buff *skb, int type,
  * struct nft_object - nf_tables stateful object
  *
  * @list: table stateful object list node
+ * @table: table this object belongs to
  * @type: pointer to object type
  * @data: pointer to object data
  * @name: name of this stateful object
@@ -950,6 +951,7 @@ int nft_verdict_dump(struct sk_buff *skb, int type,
 struct nft_object {
struct list_headlist;
charname[NFT_OBJ_MAXNAMELEN];
+   struct nft_table*table;
u32 genmask:2,
use:30;
/* runtime data below here */
diff --git a/include/uapi/linux/netfilter/nf_tables.h 
b/include/uapi/linux/netfilter/nf_tables.h
index 399eac1eee91..4864caca1e8e 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -983,6 +983,7 @@ enum nft_queue_attributes {
 
 enum nft_quota_flags {
NFT_QUOTA_F_INV = (1 << 0),
+   NFT_QUOTA_F_DEPLETED= (1 << 1),
 };
 
 /**
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 9d2ed3f520ef..c5419701ca79 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -4075,6 +4075,7 @@ static int nf_tables_newobj(struct net *net, struct sock 
*nlsk,
err = PTR_ERR(obj);
goto err1;
}
+   obj->table = table;
nla_strlcpy(obj->name, nla[NFTA_OBJ_NAME], NFT_OBJ_MAXNAMELEN);
 
err = nft_trans_obj_add(, NFT_MSG_NEWOBJ, obj);
diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c
index 5d25f57497cb..7f27ebdce7ab 100644
--- a/net/netfilter/nft_quota.c
+++ b/net/netfilter/nft_quota.c
@@ -17,7 +17,7 @@
 
 struct nft_quota {
u64 quota;
-   boolinvert;
+   unsigned long   flags;
atomic64_t  consumed;
 };
 
@@ -27,11 +27,16 @@ static inline bool nft_overquota(struct nft_quota *priv,
return atomic64_add_return(skb->len, >consumed) >= priv->quota;
 }
 
+static inline bool nft_quota_invert(struct nft_quota *priv)
+{
+   return priv->flags & NFT_QUOTA_F_INV;
+}
+
 static inline void nft_quota_do_eval(struct nft_quota *priv,
 struct nft_regs *regs,
 const struct nft_pktinfo *pkt)
 {
-   if (nft_overquota(priv, pkt->skb) ^ priv->invert)
+   if (nft_overquota(priv, pkt->skb) ^ nft_quota_invert(priv))
regs->verdict.code = NFT_BREAK;
 }
 
@@ -40,19 +45,29 @@ static const struct nla_policy 
nft_quota_policy[NFTA_QUOTA_MAX + 1] = {
[NFTA_QUOTA_FLAGS]  = { .type = NLA_U32 },
 };
 
+#define NFT_QUOTA_DEPLETED_BIT 1   /* From NFT_QUOTA_F_DEPLETED. */
+
 static void nft_quota_obj_eval(struct nft_object *obj,
   struct nft_regs *regs,
   const struct nft_pktinfo *pkt)
 {
struct nft_quota *priv = nft_obj_data(obj);
+   bool overquota;
 
-   nft_quota_do_eval(priv, regs, pkt);
+   overquota = nft_overquota(priv, pkt->skb);
+   if (overquota ^ nft_quota_invert(priv))
+   regs->verdict.code = NFT_BREAK;
+
+   if (overquota &&
+   !test_and_set_bit(NFT_QUOTA_DEPLETED_BIT, >flags))
+   nft_obj_notify(nft_net(pkt), obj->table, obj, 0, 0,
+  NFT_MSG_NEWOBJ, nft_pf(pkt), 0, GFP_ATOMIC);
 }
 
 static int nft_quota_do_init(const struct nlattr * const tb[],
 struct nft_quota *priv)
 {
-   u32 flags = 0;
+   unsigned long flags = 0;
u64 quota;
 
if (!tb[NFTA_QUOTA_BYTES])
@@ -66,10 +81,12 @@ static int nft_quota_do_init(const struct nlattr * const 
tb[],
flags = ntohl(nla_get_be32(tb[NFTA_QUOTA_FLAGS]));
if (flags & ~NFT_QUOTA_F_INV)
return -EINVAL;
+   if (flags & NFT_QUOTA_F_DEPLETED)
+   return -EOPNOTSUPP;
}
 
priv->quota = quota;
-   priv->invert = (flags & NFT_QUOTA_F_INV) ? true : false;
+   priv->flags = flags;
  

[PATCH 31/50] netfilter: add and use nf_fwd_netdev_egress

2016-12-07 Thread Pablo Neira Ayuso
From: Florian Westphal 

... so we can use current skb instead of working with a clone.

Signed-off-by: Florian Westphal 
Signed-off-by: Pablo Neira Ayuso 
---
 include/net/netfilter/nf_dup_netdev.h |  1 +
 net/netfilter/nf_dup_netdev.c | 33 +
 net/netfilter/nft_fwd_netdev.c|  4 ++--
 3 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/include/net/netfilter/nf_dup_netdev.h 
b/include/net/netfilter/nf_dup_netdev.h
index 397dcae349f9..3e919356bedf 100644
--- a/include/net/netfilter/nf_dup_netdev.h
+++ b/include/net/netfilter/nf_dup_netdev.h
@@ -2,5 +2,6 @@
 #define _NF_DUP_NETDEV_H_
 
 void nf_dup_netdev_egress(const struct nft_pktinfo *pkt, int oif);
+void nf_fwd_netdev_egress(const struct nft_pktinfo *pkt, int oif);
 
 #endif
diff --git a/net/netfilter/nf_dup_netdev.c b/net/netfilter/nf_dup_netdev.c
index 44ae986c383f..c9d7f95768ab 100644
--- a/net/netfilter/nf_dup_netdev.c
+++ b/net/netfilter/nf_dup_netdev.c
@@ -14,6 +14,29 @@
 #include 
 #include 
 
+static void nf_do_netdev_egress(struct sk_buff *skb, struct net_device *dev)
+{
+   if (skb_mac_header_was_set(skb))
+   skb_push(skb, skb->mac_len);
+
+   skb->dev = dev;
+   dev_queue_xmit(skb);
+}
+
+void nf_fwd_netdev_egress(const struct nft_pktinfo *pkt, int oif)
+{
+   struct net_device *dev;
+
+   dev = dev_get_by_index_rcu(nft_net(pkt), oif);
+   if (!dev) {
+   kfree_skb(pkt->skb);
+   return;
+   }
+
+   nf_do_netdev_egress(pkt->skb, dev);
+}
+EXPORT_SYMBOL_GPL(nf_fwd_netdev_egress);
+
 void nf_dup_netdev_egress(const struct nft_pktinfo *pkt, int oif)
 {
struct net_device *dev;
@@ -24,14 +47,8 @@ void nf_dup_netdev_egress(const struct nft_pktinfo *pkt, int 
oif)
return;
 
skb = skb_clone(pkt->skb, GFP_ATOMIC);
-   if (skb == NULL)
-   return;
-
-   if (skb_mac_header_was_set(skb))
-   skb_push(skb, skb->mac_len);
-
-   skb->dev = dev;
-   dev_queue_xmit(skb);
+   if (skb)
+   nf_do_netdev_egress(skb, dev);
 }
 EXPORT_SYMBOL_GPL(nf_dup_netdev_egress);
 
diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c
index 763ebc3e0b2b..ce13a50b9189 100644
--- a/net/netfilter/nft_fwd_netdev.c
+++ b/net/netfilter/nft_fwd_netdev.c
@@ -26,8 +26,8 @@ static void nft_fwd_netdev_eval(const struct nft_expr *expr,
struct nft_fwd_netdev *priv = nft_expr_priv(expr);
int oif = regs->data[priv->sreg_dev];
 
-   nf_dup_netdev_egress(pkt, oif);
-   regs->verdict.code = NF_DROP;
+   nf_fwd_netdev_egress(pkt, oif);
+   regs->verdict.code = NF_STOLEN;
 }
 
 static const struct nla_policy nft_fwd_netdev_policy[NFTA_FWD_MAX + 1] = {
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 25/50] netfilter: x_tables: pack percpu counter allocations

2016-12-07 Thread Pablo Neira Ayuso
From: Florian Westphal 

instead of allocating each xt_counter individually, allocate 4k chunks
and then use these for counter allocation requests.

This should speed up rule evaluation by increasing data locality,
also speeds up ruleset loading because we reduce calls to the percpu
allocator.

As Eric points out we can't use PAGE_SIZE, page_allocator would fail on
arches with 64k page size.

Suggested-by: Eric Dumazet 
Signed-off-by: Florian Westphal 
Acked-by: Eric Dumazet 
Signed-off-by: Pablo Neira Ayuso 
---
 include/linux/netfilter/x_tables.h |  7 ++-
 net/ipv4/netfilter/arp_tables.c|  9 ++---
 net/ipv4/netfilter/ip_tables.c |  9 ++---
 net/ipv6/netfilter/ip6_tables.c|  9 ++---
 net/netfilter/x_tables.c   | 33 -
 5 files changed, 48 insertions(+), 19 deletions(-)

diff --git a/include/linux/netfilter/x_tables.h 
b/include/linux/netfilter/x_tables.h
index 05a94bd32c55..5117e4d2ddfa 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -403,8 +403,13 @@ static inline unsigned long ifname_compare_aligned(const 
char *_a,
return ret;
 }
 
+struct xt_percpu_counter_alloc_state {
+   unsigned int off;
+   const char __percpu *mem;
+};
 
-bool xt_percpu_counter_alloc(struct xt_counters *counters);
+bool xt_percpu_counter_alloc(struct xt_percpu_counter_alloc_state *state,
+struct xt_counters *counter);
 void xt_percpu_counter_free(struct xt_counters *cnt);
 
 static inline struct xt_counters *
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 808deb275ceb..1258a9ab62ef 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -411,13 +411,14 @@ static inline int check_target(struct arpt_entry *e, 
const char *name)
 }
 
 static inline int
-find_check_entry(struct arpt_entry *e, const char *name, unsigned int size)
+find_check_entry(struct arpt_entry *e, const char *name, unsigned int size,
+struct xt_percpu_counter_alloc_state *alloc_state)
 {
struct xt_entry_target *t;
struct xt_target *target;
int ret;
 
-   if (!xt_percpu_counter_alloc(>counters))
+   if (!xt_percpu_counter_alloc(alloc_state, >counters))
return -ENOMEM;
 
t = arpt_get_target(e);
@@ -525,6 +526,7 @@ static inline void cleanup_entry(struct arpt_entry *e)
 static int translate_table(struct xt_table_info *newinfo, void *entry0,
   const struct arpt_replace *repl)
 {
+   struct xt_percpu_counter_alloc_state alloc_state = { 0 };
struct arpt_entry *iter;
unsigned int *offsets;
unsigned int i;
@@ -587,7 +589,8 @@ static int translate_table(struct xt_table_info *newinfo, 
void *entry0,
/* Finally, each sanity check must pass */
i = 0;
xt_entry_foreach(iter, entry0, newinfo->size) {
-   ret = find_check_entry(iter, repl->name, repl->size);
+   ret = find_check_entry(iter, repl->name, repl->size,
+  _state);
if (ret != 0)
break;
++i;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index a48430d3420f..308b456723f0 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -531,7 +531,8 @@ static int check_target(struct ipt_entry *e, struct net 
*net, const char *name)
 
 static int
 find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
-unsigned int size)
+unsigned int size,
+struct xt_percpu_counter_alloc_state *alloc_state)
 {
struct xt_entry_target *t;
struct xt_target *target;
@@ -540,7 +541,7 @@ find_check_entry(struct ipt_entry *e, struct net *net, 
const char *name,
struct xt_mtchk_param mtpar;
struct xt_entry_match *ematch;
 
-   if (!xt_percpu_counter_alloc(>counters))
+   if (!xt_percpu_counter_alloc(alloc_state, >counters))
return -ENOMEM;
 
j = 0;
@@ -676,6 +677,7 @@ static int
 translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
const struct ipt_replace *repl)
 {
+   struct xt_percpu_counter_alloc_state alloc_state = { 0 };
struct ipt_entry *iter;
unsigned int *offsets;
unsigned int i;
@@ -735,7 +737,8 @@ translate_table(struct net *net, struct xt_table_info 
*newinfo, void *entry0,
/* Finally, each sanity check must pass */
i = 0;
xt_entry_foreach(iter, entry0, newinfo->size) {
-   ret = find_check_entry(iter, net, repl->name, repl->size);
+   ret = find_check_entry(iter, net, repl->name, repl->size,
+  _state);
if (ret != 0)
 

[PATCH 20/50] netfilter: introduce accessor functions for hook entries

2016-12-07 Thread Pablo Neira Ayuso
From: Aaron Conole 

This allows easier future refactoring.

Signed-off-by: Aaron Conole 
Signed-off-by: Pablo Neira Ayuso 
---
 include/linux/netfilter.h   | 27 +++
 net/bridge/br_netfilter_hooks.c |  2 +-
 net/netfilter/core.c| 10 --
 net/netfilter/nf_queue.c|  5 ++---
 4 files changed, 34 insertions(+), 10 deletions(-)

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 69230140215b..575aa198097e 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -79,6 +79,33 @@ struct nf_hook_entry {
const struct nf_hook_ops*orig_ops;
 };
 
+static inline void
+nf_hook_entry_init(struct nf_hook_entry *entry,const struct 
nf_hook_ops *ops)
+{
+   entry->next = NULL;
+   entry->ops = *ops;
+   entry->orig_ops = ops;
+}
+
+static inline int
+nf_hook_entry_priority(const struct nf_hook_entry *entry)
+{
+   return entry->ops.priority;
+}
+
+static inline int
+nf_hook_entry_hookfn(const struct nf_hook_entry *entry, struct sk_buff *skb,
+struct nf_hook_state *state)
+{
+   return entry->ops.hook(entry->ops.priv, skb, state);
+}
+
+static inline const struct nf_hook_ops *
+nf_hook_entry_ops(const struct nf_hook_entry *entry)
+{
+   return entry->orig_ops;
+}
+
 static inline void nf_hook_state_init(struct nf_hook_state *p,
  unsigned int hook,
  u_int8_t pf,
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 83d937f4415e..adad2eed29e6 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -1010,7 +1010,7 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net,
 
elem = rcu_dereference(net->nf.hooks[NFPROTO_BRIDGE][hook]);
 
-   while (elem && (elem->ops.priority <= NF_BR_PRI_BRNF))
+   while (elem && (nf_hook_entry_priority(elem) <= NF_BR_PRI_BRNF))
elem = rcu_dereference(elem->next);
 
if (!elem)
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index de30e08d58f2..2bb46e2d8d30 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -102,15 +102,13 @@ int nf_register_net_hook(struct net *net, const struct 
nf_hook_ops *reg)
if (!entry)
return -ENOMEM;
 
-   entry->orig_ops = reg;
-   entry->ops  = *reg;
-   entry->next = NULL;
+   nf_hook_entry_init(entry, reg);
 
mutex_lock(_hook_mutex);
 
/* Find the spot in the list */
while ((p = nf_entry_dereference(*pp)) != NULL) {
-   if (reg->priority < p->orig_ops->priority)
+   if (reg->priority < nf_hook_entry_priority(p))
break;
pp = >next;
}
@@ -140,7 +138,7 @@ void nf_unregister_net_hook(struct net *net, const struct 
nf_hook_ops *reg)
 
mutex_lock(_hook_mutex);
while ((p = nf_entry_dereference(*pp)) != NULL) {
-   if (p->orig_ops == reg) {
+   if (nf_hook_entry_ops(p) == reg) {
rcu_assign_pointer(*pp, p->next);
break;
}
@@ -311,7 +309,7 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state 
*state,
int ret;
 
do {
-   verdict = entry->ops.hook(entry->ops.priv, skb, state);
+   verdict = nf_hook_entry_hookfn(entry, skb, state);
switch (verdict & NF_VERDICT_MASK) {
case NF_ACCEPT:
entry = rcu_dereference(entry->next);
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 77cba9f6ccb6..4a7662486f44 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -185,7 +185,7 @@ static unsigned int nf_iterate(struct sk_buff *skb,
 
do {
 repeat:
-   verdict = (*entryp)->ops.hook((*entryp)->ops.priv, skb, state);
+   verdict = nf_hook_entry_hookfn((*entryp), skb, state);
if (verdict != NF_ACCEPT) {
if (verdict != NF_REPEAT)
return verdict;
@@ -200,7 +200,6 @@ static unsigned int nf_iterate(struct sk_buff *skb,
 void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
 {
struct nf_hook_entry *hook_entry = entry->hook;
-   struct nf_hook_ops *elem = _entry->ops;
struct sk_buff *skb = entry->skb;
const struct nf_afinfo *afinfo;
int err;
@@ -209,7 +208,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int 
verdict)
 
/* Continue traversal iff userspace said ok... */
if (verdict == NF_REPEAT)
-   verdict = elem->hook(elem->priv, skb, >state);
+   verdict = nf_hook_entry_hookfn(hook_entry, skb, >state);
 
if (verdict == NF_ACCEPT) {
afinfo = nf_get_afinfo(entry->state.pf);
-- 

[PATCH 24/50] netfilter: x_tables: pass xt_counters struct to counter allocator

2016-12-07 Thread Pablo Neira Ayuso
From: Florian Westphal 

Keeps some noise away from a followup patch.

Signed-off-by: Florian Westphal 
Acked-by: Eric Dumazet 
Signed-off-by: Pablo Neira Ayuso 
---
 include/linux/netfilter/x_tables.h | 27 +--
 net/ipv4/netfilter/arp_tables.c|  5 +
 net/ipv4/netfilter/ip_tables.c |  5 +
 net/ipv6/netfilter/ip6_tables.c|  5 +
 net/netfilter/x_tables.c   | 30 ++
 5 files changed, 34 insertions(+), 38 deletions(-)

diff --git a/include/linux/netfilter/x_tables.h 
b/include/linux/netfilter/x_tables.h
index 6e61edeb68e3..05a94bd32c55 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -404,32 +404,7 @@ static inline unsigned long ifname_compare_aligned(const 
char *_a,
 }
 
 
-/* On SMP, ip(6)t_entry->counters.pcnt holds address of the
- * real (percpu) counter.  On !SMP, its just the packet count,
- * so nothing needs to be done there.
- *
- * xt_percpu_counter_alloc returns the address of the percpu
- * counter, or 0 on !SMP. We force an alignment of 16 bytes
- * so that bytes/packets share a common cache line.
- *
- * Hence caller must use IS_ERR_VALUE to check for error, this
- * allows us to return 0 for single core systems without forcing
- * callers to deal with SMP vs. NONSMP issues.
- */
-static inline unsigned long xt_percpu_counter_alloc(void)
-{
-   if (nr_cpu_ids > 1) {
-   void __percpu *res = __alloc_percpu(sizeof(struct xt_counters),
-   sizeof(struct xt_counters));
-
-   if (res == NULL)
-   return -ENOMEM;
-
-   return (__force unsigned long) res;
-   }
-
-   return 0;
-}
+bool xt_percpu_counter_alloc(struct xt_counters *counters);
 void xt_percpu_counter_free(struct xt_counters *cnt);
 
 static inline struct xt_counters *
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 019f8e8dda6d..808deb275ceb 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -415,13 +415,10 @@ find_check_entry(struct arpt_entry *e, const char *name, 
unsigned int size)
 {
struct xt_entry_target *t;
struct xt_target *target;
-   unsigned long pcnt;
int ret;
 
-   pcnt = xt_percpu_counter_alloc();
-   if (IS_ERR_VALUE(pcnt))
+   if (!xt_percpu_counter_alloc(>counters))
return -ENOMEM;
-   e->counters.pcnt = pcnt;
 
t = arpt_get_target(e);
target = xt_request_find_target(NFPROTO_ARP, t->u.user.name,
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index acc9a0c45bdf..a48430d3420f 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -539,12 +539,9 @@ find_check_entry(struct ipt_entry *e, struct net *net, 
const char *name,
unsigned int j;
struct xt_mtchk_param mtpar;
struct xt_entry_match *ematch;
-   unsigned long pcnt;
 
-   pcnt = xt_percpu_counter_alloc();
-   if (IS_ERR_VALUE(pcnt))
+   if (!xt_percpu_counter_alloc(>counters))
return -ENOMEM;
-   e->counters.pcnt = pcnt;
 
j = 0;
mtpar.net   = net;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 88b56a98905b..a5a92083fd62 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -570,12 +570,9 @@ find_check_entry(struct ip6t_entry *e, struct net *net, 
const char *name,
unsigned int j;
struct xt_mtchk_param mtpar;
struct xt_entry_match *ematch;
-   unsigned long pcnt;
 
-   pcnt = xt_percpu_counter_alloc();
-   if (IS_ERR_VALUE(pcnt))
+   if (!xt_percpu_counter_alloc(>counters))
return -ENOMEM;
-   e->counters.pcnt = pcnt;
 
j = 0;
mtpar.net   = net;
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 0580029eb0ee..be5e83047594 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1615,6 +1615,36 @@ void xt_proto_fini(struct net *net, u_int8_t af)
 }
 EXPORT_SYMBOL_GPL(xt_proto_fini);
 
+/**
+ * xt_percpu_counter_alloc - allocate x_tables rule counter
+ *
+ * @counter: pointer to counter struct inside the ip(6)/arpt_entry struct
+ *
+ * On SMP, the packet counter [ ip(6)t_entry->counters.pcnt ] will then
+ * contain the address of the real (percpu) counter.
+ *
+ * Rule evaluation needs to use xt_get_this_cpu_counter() helper
+ * to fetch the real percpu counter.
+ *
+ * returns false on error.
+ */
+bool xt_percpu_counter_alloc(struct xt_counters *counter)
+{
+   void __percpu *res;
+
+   if (nr_cpu_ids <= 1)
+   return true;
+
+   res = __alloc_percpu(sizeof(struct xt_counters),
+sizeof(struct xt_counters));
+   if (!res)
+   

[PATCH 42/50] netfilter: nf_tables: allow to filter stateful object dumps by type

2016-12-07 Thread Pablo Neira Ayuso
This patch adds the netlink code to filter out dump of stateful objects,
through the NFTA_OBJ_TYPE netlink attribute.

Signed-off-by: Pablo Neira Ayuso 
---
 net/netfilter/nf_tables_api.c | 50 +++
 1 file changed, 50 insertions(+)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index b4db5bf4c135..b04d4ee1d533 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -4183,12 +4183,18 @@ static int nf_tables_fill_obj_info(struct sk_buff *skb, 
struct net *net,
return -1;
 }
 
+struct nft_obj_filter {
+   chartable[NFT_OBJ_MAXNAMELEN];
+   u32 type;
+};
+
 static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
 {
const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
const struct nft_af_info *afi;
const struct nft_table *table;
unsigned int idx = 0, s_idx = cb->args[0];
+   struct nft_obj_filter *filter = cb->data;
struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
struct nft_object *obj;
@@ -4213,6 +4219,13 @@ static int nf_tables_dump_obj(struct sk_buff *skb, 
struct netlink_callback *cb)
if (idx > s_idx)
memset(>args[1], 0,
   sizeof(cb->args) - 
sizeof(cb->args[0]));
+   if (filter->table[0] &&
+   strcmp(filter->table, table->name))
+   goto cont;
+   if (filter->type != NFT_OBJECT_UNSPEC &&
+   obj->type->type != filter->type)
+   goto cont;
+
if (nf_tables_fill_obj_info(skb, net, 
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NFT_MSG_NEWOBJ,
@@ -4233,6 +4246,31 @@ static int nf_tables_dump_obj(struct sk_buff *skb, 
struct netlink_callback *cb)
return skb->len;
 }
 
+static int nf_tables_dump_obj_done(struct netlink_callback *cb)
+{
+   kfree(cb->data);
+
+   return 0;
+}
+
+static struct nft_obj_filter *
+nft_obj_filter_alloc(const struct nlattr * const nla[])
+{
+   struct nft_obj_filter *filter;
+
+   filter = kzalloc(sizeof(*filter), GFP_KERNEL);
+   if (!filter)
+   return ERR_PTR(-ENOMEM);
+
+   if (nla[NFTA_OBJ_TABLE])
+   nla_strlcpy(filter->table, nla[NFTA_OBJ_TABLE],
+   NFT_TABLE_MAXNAMELEN);
+   if (nla[NFTA_OBJ_TYPE])
+   filter->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
+
+   return filter;
+}
+
 static int nf_tables_getobj(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
@@ -4251,7 +4289,19 @@ static int nf_tables_getobj(struct net *net, struct sock 
*nlsk,
if (nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
.dump = nf_tables_dump_obj,
+   .done = nf_tables_dump_obj_done,
};
+
+   if (nla[NFTA_OBJ_TABLE] ||
+   nla[NFTA_OBJ_TYPE]) {
+   struct nft_obj_filter *filter;
+
+   filter = nft_obj_filter_alloc(nla);
+   if (IS_ERR(filter))
+   return -ENOMEM;
+
+   c.data = filter;
+   }
return netlink_dump_start(nlsk, skb, nlh, );
}
 
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 34/50] netfilter: nft_quota: add stateful object type

2016-12-07 Thread Pablo Neira Ayuso
Register a new quota stateful object type into the new stateful object
infrastructure.

Signed-off-by: Pablo Neira Ayuso 
---
 include/uapi/linux/netfilter/nf_tables.h |  1 +
 net/netfilter/nft_quota.c| 96 +++-
 2 files changed, 84 insertions(+), 13 deletions(-)

diff --git a/include/uapi/linux/netfilter/nf_tables.h 
b/include/uapi/linux/netfilter/nf_tables.h
index e352ef65d753..ad0577ba5d2a 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -1187,6 +1187,7 @@ enum nft_fib_flags {
 
 #define NFT_OBJECT_UNSPEC  0
 #define NFT_OBJECT_COUNTER 1
+#define NFT_OBJECT_QUOTA   2
 
 /**
  * enum nft_object_attributes - nf_tables stateful object netlink attributes
diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c
index c00104c07095..09ce72b1d6bf 100644
--- a/net/netfilter/nft_quota.c
+++ b/net/netfilter/nft_quota.c
@@ -27,12 +27,10 @@ static inline bool nft_overquota(struct nft_quota *priv,
return atomic64_sub_return(pkt->skb->len, >remain) < 0;
 }
 
-static void nft_quota_eval(const struct nft_expr *expr,
-  struct nft_regs *regs,
-  const struct nft_pktinfo *pkt)
+static inline void nft_quota_do_eval(struct nft_quota *priv,
+struct nft_regs *regs,
+const struct nft_pktinfo *pkt)
 {
-   struct nft_quota *priv = nft_expr_priv(expr);
-
if (nft_overquota(priv, pkt) ^ priv->invert)
regs->verdict.code = NFT_BREAK;
 }
@@ -42,11 +40,18 @@ static const struct nla_policy 
nft_quota_policy[NFTA_QUOTA_MAX + 1] = {
[NFTA_QUOTA_FLAGS]  = { .type = NLA_U32 },
 };
 
-static int nft_quota_init(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nlattr * const tb[])
+static void nft_quota_obj_eval(struct nft_object *obj,
+  struct nft_regs *regs,
+  const struct nft_pktinfo *pkt)
+{
+   struct nft_quota *priv = nft_obj_data(obj);
+
+   nft_quota_do_eval(priv, regs, pkt);
+}
+
+static int nft_quota_do_init(const struct nlattr * const tb[],
+struct nft_quota *priv)
 {
-   struct nft_quota *priv = nft_expr_priv(expr);
u32 flags = 0;
u64 quota;
 
@@ -70,9 +75,16 @@ static int nft_quota_init(const struct nft_ctx *ctx,
return 0;
 }
 
-static int nft_quota_dump(struct sk_buff *skb, const struct nft_expr *expr)
+static int nft_quota_obj_init(const struct nlattr * const tb[],
+ struct nft_object *obj)
+{
+   struct nft_quota *priv = nft_obj_data(obj);
+
+   return nft_quota_do_init(tb, priv);
+}
+
+static int nft_quota_do_dump(struct sk_buff *skb, const struct nft_quota *priv)
 {
-   const struct nft_quota *priv = nft_expr_priv(expr);
u32 flags = priv->invert ? NFT_QUOTA_F_INV : 0;
 
if (nla_put_be64(skb, NFTA_QUOTA_BYTES, cpu_to_be64(priv->quota),
@@ -85,6 +97,49 @@ static int nft_quota_dump(struct sk_buff *skb, const struct 
nft_expr *expr)
return -1;
 }
 
+static int nft_quota_obj_dump(struct sk_buff *skb, const struct nft_object 
*obj)
+{
+   struct nft_quota *priv = nft_obj_data(obj);
+
+   return nft_quota_do_dump(skb, priv);
+}
+
+static struct nft_object_type nft_quota_obj __read_mostly = {
+   .type   = NFT_OBJECT_QUOTA,
+   .size   = sizeof(struct nft_quota),
+   .maxattr= NFTA_QUOTA_MAX,
+   .policy = nft_quota_policy,
+   .init   = nft_quota_obj_init,
+   .eval   = nft_quota_obj_eval,
+   .dump   = nft_quota_obj_dump,
+   .owner  = THIS_MODULE,
+};
+
+static void nft_quota_eval(const struct nft_expr *expr,
+  struct nft_regs *regs,
+  const struct nft_pktinfo *pkt)
+{
+   struct nft_quota *priv = nft_expr_priv(expr);
+
+   nft_quota_do_eval(priv, regs, pkt);
+}
+
+static int nft_quota_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+   struct nft_quota *priv = nft_expr_priv(expr);
+
+   return nft_quota_do_init(tb, priv);
+}
+
+static int nft_quota_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+   const struct nft_quota *priv = nft_expr_priv(expr);
+
+   return nft_quota_do_dump(skb, priv);
+}
+
 static struct nft_expr_type nft_quota_type;
 static const struct nft_expr_ops nft_quota_ops = {
.type   = _quota_type,
@@ -105,12 +160,26 @@ static struct nft_expr_type nft_quota_type __read_mostly 
= {
 
 static int __init nft_quota_module_init(void)
 {
-return nft_register_expr(_quota_type);
+   int err;
+
+   err = nft_register_obj(_quota_obj);
+   if (err 

[PATCH 29/50] netfilter: xt_multiport: Fix wrong unmatch result with multiple ports

2016-12-07 Thread Pablo Neira Ayuso
From: Gao Feng 

I lost one test case in the last commit for xt_multiport.
For example, the rule is "-m multiport --dports 22,80,443".
When first port is unmatched and the second is matched, the curent codes
could not return the right result.
It would return false directly when the first port is unmatched.

Fixes: dd2602d00f80 ("netfilter: xt_multiport: Use switch case instead
of multiple condition checks")
Signed-off-by: Gao Feng 
Signed-off-by: Pablo Neira Ayuso 
---
 net/netfilter/xt_multiport.c | 26 +++---
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c
index ec06fb1cb16f..1cde0e4985b7 100644
--- a/net/netfilter/xt_multiport.c
+++ b/net/netfilter/xt_multiport.c
@@ -44,12 +44,18 @@ ports_match_v1(const struct xt_multiport_v1 *minfo,
 
switch (minfo->flags) {
case XT_MULTIPORT_SOURCE:
-   return (src >= s && src <= e) ^ minfo->invert;
+   if (src >= s && src <= e)
+   return true ^ minfo->invert;
+   break;
case XT_MULTIPORT_DESTINATION:
-   return (dst >= s && dst <= e) ^ minfo->invert;
+   if (dst >= s && dst <= e)
+   return true ^ minfo->invert;
+   break;
case XT_MULTIPORT_EITHER:
-   return ((dst >= s && dst <= e) ||
-   (src >= s && src <= e)) ^ minfo->invert;
+   if ((dst >= s && dst <= e) ||
+   (src >= s && src <= e))
+   return true ^ minfo->invert;
+   break;
default:
break;
}
@@ -59,11 +65,17 @@ ports_match_v1(const struct xt_multiport_v1 *minfo,
 
switch (minfo->flags) {
case XT_MULTIPORT_SOURCE:
-   return (src == s) ^ minfo->invert;
+   if (src == s)
+   return true ^ minfo->invert;
+   break;
case XT_MULTIPORT_DESTINATION:
-   return (dst == s) ^ minfo->invert;
+   if (dst == s)
+   return true ^ minfo->invert;
+   break;
case XT_MULTIPORT_EITHER:
-   return (src == s || dst == s) ^ minfo->invert;
+   if (src == s || dst == s)
+   return true ^ minfo->invert;
+   break;
default:
break;
}
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 30/50] netfilter: ingress: translate 0 nf_hook_slow retval to -1

2016-12-07 Thread Pablo Neira Ayuso
From: Florian Westphal 

The caller assumes that < 0 means that skb was stolen (or free'd).

All other return values continue skb processing.

nf_hook_slow returns 3 different return value types:

A) a (negative) errno value: the skb was dropped (NF_DROP, e.g.
by iptables '-j DROP' rule).

B) 0. The skb was stolen by the hook or queued to userspace.

C) 1. all hooks returned NF_ACCEPT so the caller should invoke
   the okfn so packet processing can continue.

nft ingress facility currently doesn't have the 'okfn' that
the NF_HOOK() macros use; there is no nfqueue support either.

So 1 means that nf_hook_ingress() caller should go on processing the skb.

In order to allow use of NF_STOLEN from ingress we need to translate
this to an errno number, else we'd crash because we continue with
already-free'd (or about to be free-d) skb.

The errno value isn't checked, its just important that its less than 0,
so return -1.

Signed-off-by: Florian Westphal 
Signed-off-by: Pablo Neira Ayuso 
---
 include/linux/netfilter_ingress.h | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/include/linux/netfilter_ingress.h 
b/include/linux/netfilter_ingress.h
index 2dc3b49b804a..59476061de86 100644
--- a/include/linux/netfilter_ingress.h
+++ b/include/linux/netfilter_ingress.h
@@ -19,6 +19,7 @@ static inline int nf_hook_ingress(struct sk_buff *skb)
 {
struct nf_hook_entry *e = rcu_dereference(skb->dev->nf_hooks_ingress);
struct nf_hook_state state;
+   int ret;
 
/* Must recheck the ingress hook head, in the event it became NULL
 * after the check in nf_hook_ingress_active evaluated to true.
@@ -29,7 +30,11 @@ static inline int nf_hook_ingress(struct sk_buff *skb)
nf_hook_state_init(, NF_NETDEV_INGRESS,
   NFPROTO_NETDEV, skb->dev, NULL, NULL,
   dev_net(skb->dev), NULL);
-   return nf_hook_slow(skb, , e);
+   ret = nf_hook_slow(skb, , e);
+   if (ret == 0)
+   return -1;
+
+   return ret;
 }
 
 static inline void nf_hook_ingress_init(struct net_device *dev)
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 40/50] netfilter: nf_tables: add stateful object reference to set elements

2016-12-07 Thread Pablo Neira Ayuso
This patch allows you to refer to stateful objects from set elements.
This provides the infrastructure to create maps where the right hand
side of the mapping is a stateful object.

This allows us to build dictionaries of stateful objects, that you can
use to perform fast lookups using any arbitrary key combination.

Signed-off-by: Pablo Neira Ayuso 
---
 include/net/netfilter/nf_tables.h|  9 
 include/uapi/linux/netfilter/nf_tables.h |  8 
 net/netfilter/nf_tables_api.c| 72 +++-
 3 files changed, 79 insertions(+), 10 deletions(-)

diff --git a/include/net/netfilter/nf_tables.h 
b/include/net/netfilter/nf_tables.h
index ce6fb6e83b32..85f0f03f1e87 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -326,6 +326,7 @@ void nft_unregister_set(struct nft_set_ops *ops);
  * @name: name of the set
  * @ktype: key type (numeric type defined by userspace, not used in the 
kernel)
  * @dtype: data type (verdict or numeric type defined by userspace)
+ * @objtype: object type (see NFT_OBJECT_* definitions)
  * @size: maximum set size
  * @nelems: number of elements
  * @ndeact: number of deactivated elements queued for removal
@@ -347,6 +348,7 @@ struct nft_set {
charname[NFT_SET_MAXNAMELEN];
u32 ktype;
u32 dtype;
+   u32 objtype;
u32 size;
atomic_tnelems;
u32 ndeact;
@@ -416,6 +418,7 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct 
nft_set *set,
  * @NFT_SET_EXT_EXPIRATION: element expiration time
  * @NFT_SET_EXT_USERDATA: user data associated with the element
  * @NFT_SET_EXT_EXPR: expression assiociated with the element
+ * @NFT_SET_EXT_OBJREF: stateful object reference associated with element
  * @NFT_SET_EXT_NUM: number of extension types
  */
 enum nft_set_extensions {
@@ -426,6 +429,7 @@ enum nft_set_extensions {
NFT_SET_EXT_EXPIRATION,
NFT_SET_EXT_USERDATA,
NFT_SET_EXT_EXPR,
+   NFT_SET_EXT_OBJREF,
NFT_SET_EXT_NUM
 };
 
@@ -554,6 +558,11 @@ static inline struct nft_set_ext *nft_set_elem_ext(const 
struct nft_set *set,
return elem + set->ops->elemsize;
 }
 
+static inline struct nft_object **nft_set_ext_obj(const struct nft_set_ext 
*ext)
+{
+   return nft_set_ext(ext, NFT_SET_EXT_OBJREF);
+}
+
 void *nft_set_elem_init(const struct nft_set *set,
const struct nft_set_ext_tmpl *tmpl,
const u32 *key, const u32 *data,
diff --git a/include/uapi/linux/netfilter/nf_tables.h 
b/include/uapi/linux/netfilter/nf_tables.h
index 4864caca1e8e..a6b52dbff08c 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -255,6 +255,7 @@ enum nft_rule_compat_attributes {
  * @NFT_SET_MAP: set is used as a dictionary
  * @NFT_SET_TIMEOUT: set uses timeouts
  * @NFT_SET_EVAL: set contains expressions for evaluation
+ * @NFT_SET_OBJECT: set contains stateful objects
  */
 enum nft_set_flags {
NFT_SET_ANONYMOUS   = 0x1,
@@ -263,6 +264,7 @@ enum nft_set_flags {
NFT_SET_MAP = 0x8,
NFT_SET_TIMEOUT = 0x10,
NFT_SET_EVAL= 0x20,
+   NFT_SET_OBJECT  = 0x40,
 };
 
 /**
@@ -304,6 +306,7 @@ enum nft_set_desc_attributes {
  * @NFTA_SET_TIMEOUT: default timeout value (NLA_U64)
  * @NFTA_SET_GC_INTERVAL: garbage collection interval (NLA_U32)
  * @NFTA_SET_USERDATA: user data (NLA_BINARY)
+ * @NFTA_SET_OBJ_TYPE: stateful object type (NLA_U32: NFT_OBJECT_*)
  */
 enum nft_set_attributes {
NFTA_SET_UNSPEC,
@@ -321,6 +324,7 @@ enum nft_set_attributes {
NFTA_SET_GC_INTERVAL,
NFTA_SET_USERDATA,
NFTA_SET_PAD,
+   NFTA_SET_OBJ_TYPE,
__NFTA_SET_MAX
 };
 #define NFTA_SET_MAX   (__NFTA_SET_MAX - 1)
@@ -344,6 +348,7 @@ enum nft_set_elem_flags {
  * @NFTA_SET_ELEM_EXPIRATION: expiration time (NLA_U64)
  * @NFTA_SET_ELEM_USERDATA: user data (NLA_BINARY)
  * @NFTA_SET_ELEM_EXPR: expression (NLA_NESTED: nft_expr_attributes)
+ * @NFTA_SET_ELEM_OBJREF: stateful object reference (NLA_STRING)
  */
 enum nft_set_elem_attributes {
NFTA_SET_ELEM_UNSPEC,
@@ -355,6 +360,7 @@ enum nft_set_elem_attributes {
NFTA_SET_ELEM_USERDATA,
NFTA_SET_ELEM_EXPR,
NFTA_SET_ELEM_PAD,
+   NFTA_SET_ELEM_OBJREF,
__NFTA_SET_ELEM_MAX
 };
 #define NFTA_SET_ELEM_MAX  (__NFTA_SET_ELEM_MAX - 1)
@@ -1207,6 +1213,8 @@ enum nft_fib_flags {
 #define NFT_OBJECT_UNSPEC  0
 #define NFT_OBJECT_COUNTER 1
 #define NFT_OBJECT_QUOTA   2
+#define __NFT_OBJECT_MAX   3
+#define NFT_OBJECT_MAX (__NFT_OBJECT_MAX - 1)
 
 /**

[PATCH 08/50] netfilter: nfnetlink_log: add "nf-logger-5-1" module alias name

2016-12-07 Thread Pablo Neira Ayuso
From: Liping Zhang 

So we can autoload nfnetlink_log.ko when the user adding nft log
group X rule in netdev family.

Signed-off-by: Liping Zhang 
Signed-off-by: Pablo Neira Ayuso 
---
 net/netfilter/nfnetlink_log.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 763cb4d54e8d..200922bb2036 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -1152,6 +1152,7 @@ MODULE_ALIAS_NF_LOGGER(AF_INET, 1);
 MODULE_ALIAS_NF_LOGGER(AF_INET6, 1);
 MODULE_ALIAS_NF_LOGGER(AF_BRIDGE, 1);
 MODULE_ALIAS_NF_LOGGER(3, 1); /* NFPROTO_ARP */
+MODULE_ALIAS_NF_LOGGER(5, 1); /* NFPROTO_NETDEV */
 
 module_init(nfnetlink_log_init);
 module_exit(nfnetlink_log_fini);
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 41/50] netfilter: nft_objref: support for stateful object maps

2016-12-07 Thread Pablo Neira Ayuso
This patch allows us to refer to stateful object dictionaries, the
source register indicates the key data to be used to look up for the
corresponding state object. We can refer to these maps through names or,
alternatively, the map transaction id. This allows us to refer to both
anonymous and named maps.

Signed-off-by: Pablo Neira Ayuso 
---
 include/uapi/linux/netfilter/nf_tables.h |   6 ++
 net/netfilter/nf_tables_api.c|   4 ++
 net/netfilter/nft_objref.c   | 116 ++-
 3 files changed, 125 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/netfilter/nf_tables.h 
b/include/uapi/linux/netfilter/nf_tables.h
index a6b52dbff08c..881d49e94569 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -1153,11 +1153,17 @@ enum nft_fwd_attributes {
  *
  * @NFTA_OBJREF_IMM_TYPE: object type for immediate reference (NLA_U32: 
nft_register)
  * @NFTA_OBJREF_IMM_NAME: object name for immediate reference (NLA_STRING)
+ * @NFTA_OBJREF_SET_SREG: source register of the data to look for (NLA_U32: 
nft_registers)
+ * @NFTA_OBJREF_SET_NAME: name of the set where to look for (NLA_STRING)
+ * @NFTA_OBJREF_SET_ID: id of the set where to look for in this transaction 
(NLA_U32)
  */
 enum nft_objref_attributes {
NFTA_OBJREF_UNSPEC,
NFTA_OBJREF_IMM_TYPE,
NFTA_OBJREF_IMM_NAME,
+   NFTA_OBJREF_SET_SREG,
+   NFTA_OBJREF_SET_NAME,
+   NFTA_OBJREF_SET_ID,
__NFTA_OBJREF_MAX
 };
 #define NFTA_OBJREF_MAX(__NFTA_OBJREF_MAX - 1)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 8228714c42d5..b4db5bf4c135 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2504,6 +2504,7 @@ struct nft_set *nf_tables_set_lookup(const struct 
nft_table *table,
}
return ERR_PTR(-ENOENT);
 }
+EXPORT_SYMBOL_GPL(nf_tables_set_lookup);
 
 struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
  const struct nlattr *nla,
@@ -2522,6 +2523,7 @@ struct nft_set *nf_tables_set_lookup_byid(const struct 
net *net,
}
return ERR_PTR(-ENOENT);
 }
+EXPORT_SYMBOL_GPL(nf_tables_set_lookup_byid);
 
 static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
const char *name)
@@ -3124,6 +3126,7 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct 
nft_set *set,
list_add_tail_rcu(>list, >bindings);
return 0;
 }
+EXPORT_SYMBOL_GPL(nf_tables_bind_set);
 
 void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
  struct nft_set_binding *binding)
@@ -3134,6 +3137,7 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, 
struct nft_set *set,
nft_is_active(ctx->net, set))
nf_tables_set_destroy(ctx, set);
 }
+EXPORT_SYMBOL_GPL(nf_tables_unbind_set);
 
 const struct nft_set_ext_type nft_set_ext_types[] = {
[NFT_SET_EXT_KEY]   = {
diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
index 23820f796aad..415a65ba2b85 100644
--- a/net/netfilter/nft_objref.c
+++ b/net/netfilter/nft_objref.c
@@ -81,14 +81,128 @@ static const struct nft_expr_ops nft_objref_ops = {
.dump   = nft_objref_dump,
 };
 
+struct nft_objref_map {
+   struct nft_set  *set;
+   enum nft_registers  sreg:8;
+   struct nft_set_binding  binding;
+};
+
+static void nft_objref_map_eval(const struct nft_expr *expr,
+   struct nft_regs *regs,
+   const struct nft_pktinfo *pkt)
+{
+   struct nft_objref_map *priv = nft_expr_priv(expr);
+   const struct nft_set *set = priv->set;
+   const struct nft_set_ext *ext;
+   struct nft_object *obj;
+   bool found;
+
+   found = set->ops->lookup(nft_net(pkt), set, >data[priv->sreg],
+);
+   if (!found) {
+   regs->verdict.code = NFT_BREAK;
+   return;
+   }
+   obj = *nft_set_ext_obj(ext);
+   obj->type->eval(obj, regs, pkt);
+}
+
+static int nft_objref_map_init(const struct nft_ctx *ctx,
+  const struct nft_expr *expr,
+  const struct nlattr * const tb[])
+{
+   struct nft_objref_map *priv = nft_expr_priv(expr);
+   u8 genmask = nft_genmask_next(ctx->net);
+   struct nft_set *set;
+   int err;
+
+   set = nf_tables_set_lookup(ctx->table, tb[NFTA_OBJREF_SET_NAME], 
genmask);
+   if (IS_ERR(set)) {
+   if (tb[NFTA_OBJREF_SET_ID]) {
+   set = nf_tables_set_lookup_byid(ctx->net,
+   tb[NFTA_OBJREF_SET_ID],
+   genmask);
+   }
+   if (IS_ERR(set))
+  

[PATCH 49/50] netfilter: xt_bpf: support ebpf

2016-12-07 Thread Pablo Neira Ayuso
From: Willem de Bruijn 

Add support for attaching an eBPF object by file descriptor.

The iptables binary can be called with a path to an elf object or a
pinned bpf object. Also pass the mode and path to the kernel to be
able to return it later for iptables dump and save.

Signed-off-by: Willem de Bruijn 
Signed-off-by: Pablo Neira Ayuso 
---
 include/uapi/linux/netfilter/xt_bpf.h | 21 
 net/netfilter/xt_bpf.c| 96 +--
 2 files changed, 101 insertions(+), 16 deletions(-)

diff --git a/include/uapi/linux/netfilter/xt_bpf.h 
b/include/uapi/linux/netfilter/xt_bpf.h
index 1fad2c27ac32..b97725af2ac0 100644
--- a/include/uapi/linux/netfilter/xt_bpf.h
+++ b/include/uapi/linux/netfilter/xt_bpf.h
@@ -2,9 +2,11 @@
 #define _XT_BPF_H
 
 #include 
+#include 
 #include 
 
 #define XT_BPF_MAX_NUM_INSTR   64
+#define XT_BPF_PATH_MAX(XT_BPF_MAX_NUM_INSTR * sizeof(struct 
sock_filter))
 
 struct bpf_prog;
 
@@ -16,4 +18,23 @@ struct xt_bpf_info {
struct bpf_prog *filter __attribute__((aligned(8)));
 };
 
+enum xt_bpf_modes {
+   XT_BPF_MODE_BYTECODE,
+   XT_BPF_MODE_FD_PINNED,
+   XT_BPF_MODE_FD_ELF,
+};
+
+struct xt_bpf_info_v1 {
+   __u16 mode;
+   __u16 bpf_program_num_elem;
+   __s32 fd;
+   union {
+   struct sock_filter bpf_program[XT_BPF_MAX_NUM_INSTR];
+   char path[XT_BPF_PATH_MAX];
+   };
+
+   /* only used in the kernel */
+   struct bpf_prog *filter __attribute__((aligned(8)));
+};
+
 #endif /*_XT_BPF_H */
diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c
index dffee9d47ec4..2dedaa23ab0a 100644
--- a/net/netfilter/xt_bpf.c
+++ b/net/netfilter/xt_bpf.c
@@ -10,6 +10,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -20,15 +21,15 @@ MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_bpf");
 MODULE_ALIAS("ip6t_bpf");
 
-static int bpf_mt_check(const struct xt_mtchk_param *par)
+static int __bpf_mt_check_bytecode(struct sock_filter *insns, __u16 len,
+  struct bpf_prog **ret)
 {
-   struct xt_bpf_info *info = par->matchinfo;
struct sock_fprog_kern program;
 
-   program.len = info->bpf_program_num_elem;
-   program.filter = info->bpf_program;
+   program.len = len;
+   program.filter = insns;
 
-   if (bpf_prog_create(>filter, )) {
+   if (bpf_prog_create(ret, )) {
pr_info("bpf: check failed: parse error\n");
return -EINVAL;
}
@@ -36,6 +37,42 @@ static int bpf_mt_check(const struct xt_mtchk_param *par)
return 0;
 }
 
+static int __bpf_mt_check_fd(int fd, struct bpf_prog **ret)
+{
+   struct bpf_prog *prog;
+
+   prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER);
+   if (IS_ERR(prog))
+   return PTR_ERR(prog);
+
+   *ret = prog;
+   return 0;
+}
+
+static int bpf_mt_check(const struct xt_mtchk_param *par)
+{
+   struct xt_bpf_info *info = par->matchinfo;
+
+   return __bpf_mt_check_bytecode(info->bpf_program,
+  info->bpf_program_num_elem,
+  >filter);
+}
+
+static int bpf_mt_check_v1(const struct xt_mtchk_param *par)
+{
+   struct xt_bpf_info_v1 *info = par->matchinfo;
+
+   if (info->mode == XT_BPF_MODE_BYTECODE)
+   return __bpf_mt_check_bytecode(info->bpf_program,
+  info->bpf_program_num_elem,
+  >filter);
+   else if (info->mode == XT_BPF_MODE_FD_PINNED ||
+info->mode == XT_BPF_MODE_FD_ELF)
+   return __bpf_mt_check_fd(info->fd, >filter);
+   else
+   return -EINVAL;
+}
+
 static bool bpf_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
const struct xt_bpf_info *info = par->matchinfo;
@@ -43,31 +80,58 @@ static bool bpf_mt(const struct sk_buff *skb, struct 
xt_action_param *par)
return BPF_PROG_RUN(info->filter, skb);
 }
 
+static bool bpf_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
+{
+   const struct xt_bpf_info_v1 *info = par->matchinfo;
+
+   return !!bpf_prog_run_save_cb(info->filter, (struct sk_buff *) skb);
+}
+
 static void bpf_mt_destroy(const struct xt_mtdtor_param *par)
 {
const struct xt_bpf_info *info = par->matchinfo;
+
+   bpf_prog_destroy(info->filter);
+}
+
+static void bpf_mt_destroy_v1(const struct xt_mtdtor_param *par)
+{
+   const struct xt_bpf_info_v1 *info = par->matchinfo;
+
bpf_prog_destroy(info->filter);
 }
 
-static struct xt_match bpf_mt_reg __read_mostly = {
-   .name   = "bpf",
-   .revision   = 0,
-   .family = NFPROTO_UNSPEC,
-   .checkentry = bpf_mt_check,
-   .match  = bpf_mt,
-   .destroy= bpf_mt_destroy,
-   

[PATCH 48/50] netfilter: x_tables: avoid warn and OOM killer on vmalloc call

2016-12-07 Thread Pablo Neira Ayuso
From: Marcelo Ricardo Leitner 

Andrey Konovalov reported that this vmalloc call is based on an
userspace request and that it's spewing traces, which may flood the logs
and cause DoS if abused.

Florian Westphal also mentioned that this call should not trigger OOM
killer.

This patch brings the vmalloc call in sync to kmalloc and disables the
warn trace on allocation failure and also disable OOM killer invocation.

Note, however, that under such stress situation, other places may
trigger OOM killer invocation.

Reported-by: Andrey Konovalov 
Cc: Florian Westphal 
Signed-off-by: Marcelo Ricardo Leitner 
Signed-off-by: Pablo Neira Ayuso 
---
 net/netfilter/x_tables.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index f6ce4a7036e6..2ff499680cc6 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -959,7 +959,9 @@ struct xt_table_info *xt_alloc_table_info(unsigned int size)
if (sz <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER))
info = kmalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
if (!info) {
-   info = vmalloc(sz);
+   info = __vmalloc(sz, GFP_KERNEL | __GFP_NOWARN |
+__GFP_NORETRY | __GFP_HIGHMEM,
+PAGE_KERNEL);
if (!info)
return NULL;
}
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 46/50] netfilter: nft_set: introduce nft_{hash, rbtree}_deactivate_one()

2016-12-07 Thread Pablo Neira Ayuso
This new function allows us to deactivate one single element, this is
required by the set flush command that comes in a follow up patch.

Signed-off-by: Pablo Neira Ayuso 
---
 net/netfilter/nft_set_hash.c   | 24 +---
 net/netfilter/nft_set_rbtree.c | 11 ++-
 2 files changed, 27 insertions(+), 8 deletions(-)

diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index a3dface3e6e6..73f7687c5656 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -167,6 +167,19 @@ static void nft_hash_activate(const struct net *net, const 
struct nft_set *set,
nft_set_elem_clear_busy(>ext);
 }
 
+static bool nft_hash_deactivate_one(const struct net *net,
+   const struct nft_set *set, void *priv)
+{
+   struct nft_hash_elem *he = priv;
+
+   if (!nft_set_elem_mark_busy(>ext) ||
+   !nft_is_active(net, >ext)) {
+   nft_set_elem_change_active(net, set, >ext);
+   return true;
+   }
+   return false;
+}
+
 static void *nft_hash_deactivate(const struct net *net,
 const struct nft_set *set,
 const struct nft_set_elem *elem)
@@ -181,13 +194,10 @@ static void *nft_hash_deactivate(const struct net *net,
 
rcu_read_lock();
he = rhashtable_lookup_fast(>ht, , nft_hash_params);
-   if (he != NULL) {
-   if (!nft_set_elem_mark_busy(>ext) ||
-   !nft_is_active(net, >ext))
-   nft_set_elem_change_active(net, set, >ext);
-   else
-   he = NULL;
-   }
+   if (he != NULL &&
+   !nft_hash_deactivate_one(net, set, he))
+   he = NULL;
+
rcu_read_unlock();
 
return he;
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 36493a7cae88..5580bb64dc0f 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -171,6 +171,15 @@ static void nft_rbtree_activate(const struct net *net,
nft_set_elem_change_active(net, set, >ext);
 }
 
+static bool nft_rbtree_deactivate_one(const struct net *net,
+ const struct nft_set *set, void *priv)
+{
+   struct nft_rbtree_elem *rbe = priv;
+
+   nft_set_elem_change_active(net, set, >ext);
+   return true;
+}
+
 static void *nft_rbtree_deactivate(const struct net *net,
   const struct nft_set *set,
   const struct nft_set_elem *elem)
@@ -204,7 +213,7 @@ static void *nft_rbtree_deactivate(const struct net *net,
parent = parent->rb_right;
continue;
}
-   nft_set_elem_change_active(net, set, >ext);
+   nft_rbtree_deactivate_one(net, set, rbe);
return rbe;
}
}
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 45/50] netfilter: nf_tables: constify struct nft_ctx * parameter in nft_trans_alloc()

2016-12-07 Thread Pablo Neira Ayuso
Context is not modified by nft_trans_alloc(), so constify it.

Signed-off-by: Pablo Neira Ayuso 
---
 net/netfilter/nf_tables_api.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index b04d4ee1d533..b42059795819 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -111,8 +111,8 @@ static void nft_ctx_init(struct nft_ctx *ctx,
ctx->seq= nlh->nlmsg_seq;
 }
 
-static struct nft_trans *nft_trans_alloc(struct nft_ctx *ctx, int msg_type,
-u32 size)
+static struct nft_trans *nft_trans_alloc(const struct nft_ctx *ctx,
+int msg_type, u32 size)
 {
struct nft_trans *trans;
 
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 47/50] netfilter: nf_tables: support for set flushing

2016-12-07 Thread Pablo Neira Ayuso
This patch adds support for set flushing, that consists of walking over
the set elements if the NFTA_SET_ELEM_LIST_ELEMENTS attribute is set.
This patch requires the following changes:

1) Add set->ops->deactivate_one() operation: This allows us to
   deactivate an element from the set element walk path, given we can
   skip the lookup that happens in ->deactivate().

2) Add a new nft_trans_alloc_gfp() function since we need to allocate
   transactions using GFP_ATOMIC given the set walk path happens with
   held rcu_read_lock.

Signed-off-by: Pablo Neira Ayuso 
---
 include/net/netfilter/nf_tables.h |  6 -
 net/netfilter/nf_tables_api.c | 55 ++-
 net/netfilter/nft_set_hash.c  |  1 +
 net/netfilter/nft_set_rbtree.c|  1 +
 4 files changed, 56 insertions(+), 7 deletions(-)

diff --git a/include/net/netfilter/nf_tables.h 
b/include/net/netfilter/nf_tables.h
index 85f0f03f1e87..924325c46aab 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -259,7 +259,8 @@ struct nft_expr;
  * @lookup: look up an element within the set
  * @insert: insert new element into set
  * @activate: activate new element in the next generation
- * @deactivate: deactivate element in the next generation
+ * @deactivate: lookup for element and deactivate it in the next generation
+ * @deactivate_one: deactivate element in the next generation
  * @remove: remove element from set
  * @walk: iterate over all set elemeennts
  * @privsize: function to return size of set private data
@@ -294,6 +295,9 @@ struct nft_set_ops {
void *  (*deactivate)(const struct net *net,
  const struct nft_set *set,
  const struct nft_set_elem 
*elem);
+   bool(*deactivate_one)(const struct net *net,
+ const struct nft_set 
*set,
+ void *priv);
void(*remove)(const struct nft_set *set,
  const struct nft_set_elem 
*elem);
void(*walk)(const struct nft_ctx *ctx,
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index b42059795819..a019a87e58ee 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -111,12 +111,12 @@ static void nft_ctx_init(struct nft_ctx *ctx,
ctx->seq= nlh->nlmsg_seq;
 }
 
-static struct nft_trans *nft_trans_alloc(const struct nft_ctx *ctx,
-int msg_type, u32 size)
+static struct nft_trans *nft_trans_alloc_gfp(const struct nft_ctx *ctx,
+int msg_type, u32 size, gfp_t gfp)
 {
struct nft_trans *trans;
 
-   trans = kzalloc(sizeof(struct nft_trans) + size, GFP_KERNEL);
+   trans = kzalloc(sizeof(struct nft_trans) + size, gfp);
if (trans == NULL)
return NULL;
 
@@ -126,6 +126,12 @@ static struct nft_trans *nft_trans_alloc(const struct 
nft_ctx *ctx,
return trans;
 }
 
+static struct nft_trans *nft_trans_alloc(const struct nft_ctx *ctx,
+int msg_type, u32 size)
+{
+   return nft_trans_alloc_gfp(ctx, msg_type, size, GFP_KERNEL);
+}
+
 static void nft_trans_destroy(struct nft_trans *trans)
 {
list_del(>list);
@@ -3876,6 +3882,34 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct 
nft_set *set,
return err;
 }
 
+static int nft_flush_set(const struct nft_ctx *ctx,
+const struct nft_set *set,
+const struct nft_set_iter *iter,
+const struct nft_set_elem *elem)
+{
+   struct nft_trans *trans;
+   int err;
+
+   trans = nft_trans_alloc_gfp(ctx, NFT_MSG_DELSETELEM,
+   sizeof(struct nft_trans_elem), GFP_ATOMIC);
+   if (!trans)
+   return -ENOMEM;
+
+   if (!set->ops->deactivate_one(ctx->net, set, elem->priv)) {
+   err = -ENOENT;
+   goto err1;
+   }
+
+   nft_trans_elem_set(trans) = (struct nft_set *)set;
+   nft_trans_elem(trans) = *((struct nft_set_elem *)elem);
+   list_add_tail(>list, >net->nft.commit_list);
+
+   return 0;
+err1:
+   kfree(trans);
+   return err;
+}
+
 static int nf_tables_delsetelem(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
@@ -3886,9 +3920,6 @@ static int nf_tables_delsetelem(struct net *net, struct 
sock *nlsk,
struct nft_ctx ctx;
int rem, err = 0;
 
-   if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] 

[PATCH 11/50] netfilter: conntrack: built-in support for SCTP

2016-12-07 Thread Pablo Neira Ayuso
From: Davide Caratti 

CONFIG_NF_CT_PROTO_SCTP is no more a tristate. When set to y, connection
tracking support for SCTP protocol is built-in into nf_conntrack.ko.

footprint test:
$ ls -l net/netfilter/nf_conntrack{_proto_sctp,}.ko \
net/ipv4/netfilter/nf_conntrack_ipv4.ko \
net/ipv6/netfilter/nf_conntrack_ipv6.ko

(builtin)||  sctp  |  ipv4  |  ipv6  | nf_conntrack
-+++++--
none || 498243 | 828755 | 828676 | 6141434
SCTP ||   -| 829254 | 829175 | 6547872

Signed-off-by: Davide Caratti 
Signed-off-by: Pablo Neira Ayuso 
---
 include/net/netfilter/ipv4/nf_conntrack_ipv4.h |  3 +
 include/net/netfilter/ipv6/nf_conntrack_ipv6.h |  3 +
 include/net/netns/conntrack.h  | 13 +
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c |  3 +
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c |  3 +
 net/netfilter/Kconfig  |  7 +--
 net/netfilter/Makefile |  2 +-
 net/netfilter/nf_conntrack_proto_sctp.c| 76 +++---
 8 files changed, 38 insertions(+), 72 deletions(-)

diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h 
b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
index c2f155fd9299..5f1fc15a51fb 100644
--- a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
+++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
@@ -18,6 +18,9 @@ extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp;
 #ifdef CONFIG_NF_CT_PROTO_DCCP
 extern struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4;
 #endif
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+extern struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4;
+#endif
 
 int nf_conntrack_ipv4_compat_init(void);
 void nf_conntrack_ipv4_compat_fini(void);
diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h 
b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
index 5ec66c0d21c4..f70d191a8820 100644
--- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
+++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
@@ -9,6 +9,9 @@ extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6;
 #ifdef CONFIG_NF_CT_PROTO_DCCP
 extern struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6;
 #endif
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+extern struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6;
+#endif
 
 #include 
 extern struct ctl_table nf_ct_ipv6_sysctl_table[];
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index 440b781baf0b..17724c62de97 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -9,6 +9,9 @@
 #ifdef CONFIG_NF_CT_PROTO_DCCP
 #include 
 #endif
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+#include 
+#endif
 #include 
 
 struct ctl_table_header;
@@ -59,6 +62,13 @@ struct nf_dccp_net {
 };
 #endif
 
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+struct nf_sctp_net {
+   struct nf_proto_net pn;
+   unsigned int timeouts[SCTP_CONNTRACK_MAX];
+};
+#endif
+
 struct nf_ip_net {
struct nf_generic_net   generic;
struct nf_tcp_net   tcp;
@@ -68,6 +78,9 @@ struct nf_ip_net {
 #ifdef CONFIG_NF_CT_PROTO_DCCP
struct nf_dccp_net  dccp;
 #endif
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+   struct nf_sctp_net  sctp;
+#endif
 };
 
 struct ct_pcpu {
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c 
b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index cb3cf770b00c..0a9d354ef314 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -343,6 +343,9 @@ static struct nf_conntrack_l4proto *builtin_l4proto4[] = {
 #ifdef CONFIG_NF_CT_PROTO_DCCP
_conntrack_l4proto_dccp4,
 #endif
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+   _conntrack_l4proto_sctp4,
+#endif
 };
 
 static int ipv4_net_init(struct net *net)
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c 
b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index f52338d02951..1d8daafb1685 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -343,6 +343,9 @@ static struct nf_conntrack_l4proto *builtin_l4proto6[] = {
 #ifdef CONFIG_NF_CT_PROTO_DCCP
_conntrack_l4proto_dccp6,
 #endif
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+   _conntrack_l4proto_sctp6,
+#endif
 };
 
 static int ipv6_net_init(struct net *net)
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 27a3d8c8f8ce..29c0bf0a315d 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -159,15 +159,14 @@ config NF_CT_PROTO_GRE
tristate
 
 config NF_CT_PROTO_SCTP
-   tristate 'SCTP protocol connection tracking support'
+   bool 'SCTP protocol connection tracking support'
depends on NETFILTER_ADVANCED
-   default IP_SCTP
+   default y
help
  With this option enabled, the layer 3 independent connection
  tracking code will be able to do state tracking on SCTP connections.
 
- If you want to 

[PATCH 19/50] netfilter: defrag: only register defrag functionality if needed

2016-12-07 Thread Pablo Neira Ayuso
From: Florian Westphal 

nf_defrag modules for ipv4 and ipv6 export an empty stub function.
Any module that needs the defragmentation hooks registered simply 'calls'
this empty function to create a phony module dependency -- modprobe will
then load the defrag module too.

This extends netfilter ipv4/ipv6 defragmentation modules to delay the hook
registration until the functionality is requested within a network namespace
instead of module load time for all namespaces.

Hooks are only un-registered on module unload or when a namespace that used
such defrag functionality exits.

We have to use struct net for this as the register hooks can be called
before netns initialization here from the ipv4/ipv6 conntrack module
init path.

There is no unregister functionality support, defrag will always be
active once it was requested inside a net namespace.

The reason is that defrag has impact on nft and iptables rulesets
(without defrag we might see framents).

Signed-off-by: Florian Westphal 
Signed-off-by: Pablo Neira Ayuso 
---
 include/net/netfilter/ipv4/nf_defrag_ipv4.h|  3 +-
 include/net/netfilter/ipv6/nf_defrag_ipv6.h|  3 +-
 include/net/netns/netfilter.h  |  6 
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c |  7 -
 net/ipv4/netfilter/nf_defrag_ipv4.c| 41 +++--
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c |  7 -
 net/ipv6/netfilter/nf_defrag_ipv6_hooks.c  | 42 +++---
 net/netfilter/xt_TPROXY.c  | 15 ++---
 net/netfilter/xt_socket.c  | 33 +---
 9 files changed, 136 insertions(+), 21 deletions(-)

diff --git a/include/net/netfilter/ipv4/nf_defrag_ipv4.h 
b/include/net/netfilter/ipv4/nf_defrag_ipv4.h
index f01ef208dff6..db405f70e538 100644
--- a/include/net/netfilter/ipv4/nf_defrag_ipv4.h
+++ b/include/net/netfilter/ipv4/nf_defrag_ipv4.h
@@ -1,6 +1,7 @@
 #ifndef _NF_DEFRAG_IPV4_H
 #define _NF_DEFRAG_IPV4_H
 
-void nf_defrag_ipv4_enable(void);
+struct net;
+int nf_defrag_ipv4_enable(struct net *);
 
 #endif /* _NF_DEFRAG_IPV4_H */
diff --git a/include/net/netfilter/ipv6/nf_defrag_ipv6.h 
b/include/net/netfilter/ipv6/nf_defrag_ipv6.h
index ddf162f7966f..7664efe37974 100644
--- a/include/net/netfilter/ipv6/nf_defrag_ipv6.h
+++ b/include/net/netfilter/ipv6/nf_defrag_ipv6.h
@@ -1,7 +1,8 @@
 #ifndef _NF_DEFRAG_IPV6_H
 #define _NF_DEFRAG_IPV6_H
 
-void nf_defrag_ipv6_enable(void);
+struct net;
+int nf_defrag_ipv6_enable(struct net *);
 
 int nf_ct_frag6_init(void);
 void nf_ct_frag6_cleanup(void);
diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h
index 58487b1cc99a..cea396b53a60 100644
--- a/include/net/netns/netfilter.h
+++ b/include/net/netns/netfilter.h
@@ -17,5 +17,11 @@ struct netns_nf {
struct ctl_table_header *nf_log_dir_header;
 #endif
struct nf_hook_entry __rcu *hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
+   booldefrag_ipv4;
+#endif
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
+   booldefrag_ipv6;
+#endif
 };
 #endif
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c 
b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 6f375443a74b..fcfd071f4705 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -325,6 +325,12 @@ static int ipv4_hooks_register(struct net *net)
if (cnet->users > 1)
goto out_unlock;
 
+   err = nf_defrag_ipv4_enable(net);
+   if (err) {
+   cnet->users = 0;
+   goto out_unlock;
+   }
+
err = nf_register_net_hooks(net, ipv4_conntrack_ops,
ARRAY_SIZE(ipv4_conntrack_ops));
 
@@ -422,7 +428,6 @@ static int __init nf_conntrack_l3proto_ipv4_init(void)
int ret = 0;
 
need_conntrack();
-   nf_defrag_ipv4_enable();
 
ret = nf_register_sockopt(_getorigdst);
if (ret < 0) {
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c 
b/net/ipv4/netfilter/nf_defrag_ipv4.c
index d88da36b383c..49bd6a54404f 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -11,6 +11,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -22,6 +23,8 @@
 #endif
 #include 
 
+static DEFINE_MUTEX(defrag4_mutex);
+
 static int nf_ct_ipv4_gather_frags(struct net *net, struct sk_buff *skb,
   u_int32_t user)
 {
@@ -102,18 +105,50 @@ static struct nf_hook_ops ipv4_defrag_ops[] = {
},
 };
 
+static void __net_exit defrag4_net_exit(struct net *net)
+{
+   if (net->nf.defrag_ipv4) {
+   nf_unregister_net_hooks(net, ipv4_defrag_ops,
+   ARRAY_SIZE(ipv4_defrag_ops));
+   net->nf.defrag_ipv4 = false;
+   }
+}
+
+static struct pernet_operations 

[PATCH 21/50] netfilter: decouple nf_hook_entry and nf_hook_ops

2016-12-07 Thread Pablo Neira Ayuso
From: Aaron Conole 

During nfhook traversal we only need a very small subset of
nf_hook_ops members.

We need:
- next element
- hook function to call
- hook function priv argument

Bridge netfilter also needs 'thresh'; can be obtained via ->orig_ops.

nf_hook_entry struct is now 32 bytes on x86_64.

A followup patch will turn the run-time list into an array that only
stores hook functions plus their priv arguments, eliminating the ->next
element.

Suggested-by: Florian Westphal 
Signed-off-by: Aaron Conole 
Signed-off-by: Pablo Neira Ayuso 
---
 include/linux/netfilter.h | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 575aa198097e..a4b97be30b28 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -75,7 +75,8 @@ struct nf_hook_ops {
 
 struct nf_hook_entry {
struct nf_hook_entry __rcu  *next;
-   struct nf_hook_ops  ops;
+   nf_hookfn   *hook;
+   void*priv;
const struct nf_hook_ops*orig_ops;
 };
 
@@ -83,21 +84,22 @@ static inline void
 nf_hook_entry_init(struct nf_hook_entry *entry,const struct 
nf_hook_ops *ops)
 {
entry->next = NULL;
-   entry->ops = *ops;
+   entry->hook = ops->hook;
+   entry->priv = ops->priv;
entry->orig_ops = ops;
 }
 
 static inline int
 nf_hook_entry_priority(const struct nf_hook_entry *entry)
 {
-   return entry->ops.priority;
+   return entry->orig_ops->priority;
 }
 
 static inline int
 nf_hook_entry_hookfn(const struct nf_hook_entry *entry, struct sk_buff *skb,
 struct nf_hook_state *state)
 {
-   return entry->ops.hook(entry->ops.priv, skb, state);
+   return entry->hook(entry->priv, skb, state);
 }
 
 static inline const struct nf_hook_ops *
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 35/50] netfilter: nf_tables: add stateful object reference expression

2016-12-07 Thread Pablo Neira Ayuso
This new expression allows us to refer to existing stateful objects from
rules.

Signed-off-by: Pablo Neira Ayuso 
---
 include/uapi/linux/netfilter/nf_tables.h |  14 
 net/netfilter/Kconfig|   6 ++
 net/netfilter/Makefile   |   1 +
 net/netfilter/nft_objref.c   | 112 +++
 4 files changed, 133 insertions(+)
 create mode 100644 net/netfilter/nft_objref.c

diff --git a/include/uapi/linux/netfilter/nf_tables.h 
b/include/uapi/linux/netfilter/nf_tables.h
index ad0577ba5d2a..1043ce4250c5 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -1138,6 +1138,20 @@ enum nft_fwd_attributes {
 #define NFTA_FWD_MAX   (__NFTA_FWD_MAX - 1)
 
 /**
+ * enum nft_objref_attributes - nf_tables stateful object expression netlink 
attributes
+ *
+ * @NFTA_OBJREF_IMM_TYPE: object type for immediate reference (NLA_U32: 
nft_register)
+ * @NFTA_OBJREF_IMM_NAME: object name for immediate reference (NLA_STRING)
+ */
+enum nft_objref_attributes {
+   NFTA_OBJREF_UNSPEC,
+   NFTA_OBJREF_IMM_TYPE,
+   NFTA_OBJREF_IMM_NAME,
+   __NFTA_OBJREF_MAX
+};
+#define NFTA_OBJREF_MAX(__NFTA_OBJREF_MAX - 1)
+
+/**
  * enum nft_gen_attributes - nf_tables ruleset generation attributes
  *
  * @NFTA_GEN_ID: Ruleset generation ID (NLA_U32)
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index def4be06cda6..63729b489c2c 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -551,6 +551,12 @@ config NFT_NAT
  This option adds the "nat" expression that you can use to perform
  typical Network Address Translation (NAT) packet transformations.
 
+config NFT_OBJREF
+   tristate "Netfilter nf_tables stateful object reference module"
+   help
+ This option adds the "objref" expression that allows you to refer to
+ stateful objects, such as counters and quotas.
+
 config NFT_QUEUE
depends on NETFILTER_NETLINK_QUEUE
tristate "Netfilter nf_tables queue module"
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index e4c8c1d7aaed..ca30d1960f1d 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -88,6 +88,7 @@ obj-$(CONFIG_NFT_NUMGEN)  += nft_numgen.o
 obj-$(CONFIG_NFT_CT)   += nft_ct.o
 obj-$(CONFIG_NFT_LIMIT)+= nft_limit.o
 obj-$(CONFIG_NFT_NAT)  += nft_nat.o
+obj-$(CONFIG_NFT_OBJREF)   += nft_objref.o
 obj-$(CONFIG_NFT_QUEUE)+= nft_queue.o
 obj-$(CONFIG_NFT_QUOTA)+= nft_quota.o
 obj-$(CONFIG_NFT_REJECT)   += nft_reject.o
diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
new file mode 100644
index ..23820f796aad
--- /dev/null
+++ b/net/netfilter/nft_objref.c
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2012-2016 Pablo Neira Ayuso 
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define nft_objref_priv(expr)  *((struct nft_object **)nft_expr_priv(expr))
+
+static void nft_objref_eval(const struct nft_expr *expr,
+   struct nft_regs *regs,
+   const struct nft_pktinfo *pkt)
+{
+   struct nft_object *obj = nft_objref_priv(expr);
+
+   obj->type->eval(obj, regs, pkt);
+}
+
+static int nft_objref_init(const struct nft_ctx *ctx,
+  const struct nft_expr *expr,
+  const struct nlattr * const tb[])
+{
+   struct nft_object *obj = nft_objref_priv(expr);
+   u8 genmask = nft_genmask_next(ctx->net);
+   u32 objtype;
+
+   if (!tb[NFTA_OBJREF_IMM_NAME] ||
+   !tb[NFTA_OBJREF_IMM_TYPE])
+   return -EINVAL;
+
+   objtype = ntohl(nla_get_be32(tb[NFTA_OBJREF_IMM_TYPE]));
+   obj = nf_tables_obj_lookup(ctx->table, tb[NFTA_OBJREF_IMM_NAME], 
objtype,
+  genmask);
+   if (IS_ERR(obj))
+   return -ENOENT;
+
+   nft_objref_priv(expr) = obj;
+   obj->use++;
+
+   return 0;
+}
+
+static int nft_objref_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+   const struct nft_object *obj = nft_objref_priv(expr);
+
+   if (nla_put_string(skb, NFTA_OBJREF_IMM_NAME, obj->name) ||
+   nla_put_be32(skb, NFTA_OBJREF_IMM_TYPE, htonl(obj->type->type)))
+   goto nla_put_failure;
+
+   return 0;
+
+nla_put_failure:
+   return -1;
+}
+
+static void nft_objref_destroy(const struct nft_ctx *ctx,
+  const struct nft_expr *expr)
+{
+   struct nft_object *obj = nft_objref_priv(expr);
+
+   obj->use--;
+}
+
+static struct nft_expr_type nft_objref_type;
+static const struct 

[PATCH 01/50] ipvs: Use IS_ERR_OR_NULL(svc) instead of IS_ERR(svc) || svc == NULL

2016-12-07 Thread Pablo Neira Ayuso
From: Gao Feng 

This minor refactoring does not change the logic of function
ip_vs_genl_dump_dests.

Signed-off-by: Gao Feng 
Acked-by: Julian Anastasov 
Signed-off-by: Simon Horman 
---
 net/netfilter/ipvs/ip_vs_ctl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 6b85ded4f91d..217e0105b5e0 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -3260,7 +3260,7 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb,
 
 
svc = ip_vs_genl_find_service(ipvs, attrs[IPVS_CMD_ATTR_SERVICE]);
-   if (IS_ERR(svc) || svc == NULL)
+   if (IS_ERR_OR_NULL(svc))
goto out_err;
 
/* Dump the destinations */
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 05/50] netfilter: built-in NAT support for SCTP

2016-12-07 Thread Pablo Neira Ayuso
From: Davide Caratti 

CONFIG_NF_NAT_PROTO_SCTP is no more a tristate. When set to y, NAT
support for SCTP protocol is built-in into nf_nat.ko.

footprint test:

(nf_nat_proto_)   | sctp   || nf_nat
--+++
no builtin| 428344 || 2241312
SCTP builtin  |   -|| 2597032

Signed-off-by: Davide Caratti 
Signed-off-by: Pablo Neira Ayuso 
---
 include/net/netfilter/nf_nat_l4proto.h |  3 +++
 net/netfilter/Kconfig  |  2 +-
 net/netfilter/Makefile |  2 +-
 net/netfilter/nf_nat_core.c|  4 
 net/netfilter/nf_nat_proto_sctp.c  | 35 +-
 5 files changed, 10 insertions(+), 36 deletions(-)

diff --git a/include/net/netfilter/nf_nat_l4proto.h 
b/include/net/netfilter/nf_nat_l4proto.h
index 92b147be00ef..2cbaf3856e21 100644
--- a/include/net/netfilter/nf_nat_l4proto.h
+++ b/include/net/netfilter/nf_nat_l4proto.h
@@ -57,6 +57,9 @@ extern const struct nf_nat_l4proto nf_nat_l4proto_unknown;
 #ifdef CONFIG_NF_NAT_PROTO_DCCP
 extern const struct nf_nat_l4proto nf_nat_l4proto_dccp;
 #endif
+#ifdef CONFIG_NF_NAT_PROTO_SCTP
+extern const struct nf_nat_l4proto nf_nat_l4proto_sctp;
+#endif
 
 bool nf_nat_l4proto_in_range(const struct nf_conntrack_tuple *tuple,
 enum nf_nat_manip_type maniptype,
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 13092e5cd245..ad72edf1f6ec 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -394,7 +394,7 @@ config NF_NAT_PROTO_UDPLITE
default NF_NAT && NF_CT_PROTO_UDPLITE
 
 config NF_NAT_PROTO_SCTP
-   tristate
+   bool
default NF_NAT && NF_CT_PROTO_SCTP
depends on NF_NAT && NF_CT_PROTO_SCTP
select LIBCRC32C
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 9ea0c98e51e6..02ef6decf94d 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -46,6 +46,7 @@ nf_nat-y  := nf_nat_core.o nf_nat_proto_unknown.o 
nf_nat_proto_common.o \
   nf_nat_proto_udp.o nf_nat_proto_tcp.o nf_nat_helper.o
 
 nf_nat-$(CONFIG_NF_NAT_PROTO_DCCP) += nf_nat_proto_dccp.o
+nf_nat-$(CONFIG_NF_NAT_PROTO_SCTP) += nf_nat_proto_sctp.o
 
 # generic transport layer logging
 obj-$(CONFIG_NF_LOG_COMMON) += nf_log_common.o
@@ -58,7 +59,6 @@ obj-$(CONFIG_NF_NAT_REDIRECT) += nf_nat_redirect.o
 
 # NAT protocols (nf_nat)
 obj-$(CONFIG_NF_NAT_PROTO_UDPLITE) += nf_nat_proto_udplite.o
-obj-$(CONFIG_NF_NAT_PROTO_SCTP) += nf_nat_proto_sctp.o
 
 # NAT helpers
 obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 69b121d11275..80858bd110cc 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -686,6 +686,10 @@ int nf_nat_l3proto_register(const struct nf_nat_l3proto 
*l3proto)
RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_DCCP],
 _nat_l4proto_dccp);
 #endif
+#ifdef CONFIG_NF_NAT_PROTO_SCTP
+   RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_SCTP],
+_nat_l4proto_sctp);
+#endif
mutex_unlock(_nat_proto_mutex);
 
RCU_INIT_POINTER(nf_nat_l3protos[l3proto->l3proto], l3proto);
diff --git a/net/netfilter/nf_nat_proto_sctp.c 
b/net/netfilter/nf_nat_proto_sctp.c
index cbc7ade1487b..2e14108ff697 100644
--- a/net/netfilter/nf_nat_proto_sctp.c
+++ b/net/netfilter/nf_nat_proto_sctp.c
@@ -7,9 +7,7 @@
  */
 
 #include 
-#include 
 #include 
-#include 
 #include 
 
 #include 
@@ -54,7 +52,7 @@ sctp_manip_pkt(struct sk_buff *skb,
return true;
 }
 
-static const struct nf_nat_l4proto nf_nat_l4proto_sctp = {
+const struct nf_nat_l4proto nf_nat_l4proto_sctp = {
.l4proto= IPPROTO_SCTP,
.manip_pkt  = sctp_manip_pkt,
.in_range   = nf_nat_l4proto_in_range,
@@ -63,34 +61,3 @@ static const struct nf_nat_l4proto nf_nat_l4proto_sctp = {
.nlattr_to_range= nf_nat_l4proto_nlattr_to_range,
 #endif
 };
-
-static int __init nf_nat_proto_sctp_init(void)
-{
-   int err;
-
-   err = nf_nat_l4proto_register(NFPROTO_IPV4, _nat_l4proto_sctp);
-   if (err < 0)
-   goto err1;
-   err = nf_nat_l4proto_register(NFPROTO_IPV6, _nat_l4proto_sctp);
-   if (err < 0)
-   goto err2;
-   return 0;
-
-err2:
-   nf_nat_l4proto_unregister(NFPROTO_IPV4, _nat_l4proto_sctp);
-err1:
-   return err;
-}
-
-static void __exit nf_nat_proto_sctp_exit(void)
-{
-   nf_nat_l4proto_unregister(NFPROTO_IPV6, _nat_l4proto_sctp);
-   nf_nat_l4proto_unregister(NFPROTO_IPV4, _nat_l4proto_sctp);
-}
-
-module_init(nf_nat_proto_sctp_init);
-module_exit(nf_nat_proto_sctp_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("SCTP NAT protocol helper");
-MODULE_AUTHOR("Patrick McHardy ");
-- 
2.1.4

--
To 

[PATCH 10/50] netfilter: conntrack: built-in support for DCCP

2016-12-07 Thread Pablo Neira Ayuso
From: Davide Caratti 

CONFIG_NF_CT_PROTO_DCCP is no more a tristate. When set to y, connection
tracking support for DCCP protocol is built-in into nf_conntrack.ko.

footprint test:
$ ls -l net/netfilter/nf_conntrack{_proto_dccp,}.ko \
net/ipv4/netfilter/nf_conntrack_ipv4.ko \
net/ipv6/netfilter/nf_conntrack_ipv6.ko

(builtin)||  dccp  |  ipv4  |  ipv6  | nf_conntrack
-+++++--
none || 469140 | 828755 | 828676 | 6141434
DCCP ||   -| 830566 | 829935 | 6533526

Signed-off-by: Davide Caratti 
Signed-off-by: Pablo Neira Ayuso 
---
 include/linux/netfilter/nf_conntrack_dccp.h|  2 +-
 include/net/netfilter/ipv4/nf_conntrack_ipv4.h |  3 +
 include/net/netfilter/ipv6/nf_conntrack_ipv6.h |  3 +
 include/net/netns/conntrack.h  | 14 +
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c |  3 +
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c |  3 +
 net/netfilter/Kconfig  |  6 +-
 net/netfilter/Makefile |  3 +-
 net/netfilter/nf_conntrack_proto_dccp.c| 79 --
 9 files changed, 41 insertions(+), 75 deletions(-)

diff --git a/include/linux/netfilter/nf_conntrack_dccp.h 
b/include/linux/netfilter/nf_conntrack_dccp.h
index 40dcc82058d1..ff721d7325cf 100644
--- a/include/linux/netfilter/nf_conntrack_dccp.h
+++ b/include/linux/netfilter/nf_conntrack_dccp.h
@@ -25,7 +25,7 @@ enum ct_dccp_roles {
 #define CT_DCCP_ROLE_MAX   (__CT_DCCP_ROLE_MAX - 1)
 
 #ifdef __KERNEL__
-#include 
+#include 
 
 struct nf_ct_dccp {
u_int8_trole[IP_CT_DIR_MAX];
diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h 
b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
index 981c327374da..c2f155fd9299 100644
--- a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
+++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
@@ -15,6 +15,9 @@ extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4;
 extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4;
 extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4;
 extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp;
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+extern struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4;
+#endif
 
 int nf_conntrack_ipv4_compat_init(void);
 void nf_conntrack_ipv4_compat_fini(void);
diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h 
b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
index a4c993685795..5ec66c0d21c4 100644
--- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
+++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
@@ -6,6 +6,9 @@ extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6;
 extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6;
 extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6;
 extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6;
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+extern struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6;
+#endif
 
 #include 
 extern struct ctl_table nf_ct_ipv6_sysctl_table[];
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index 3d06d94d2e52..440b781baf0b 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -6,6 +6,9 @@
 #include 
 #include 
 #include 
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+#include 
+#endif
 #include 
 
 struct ctl_table_header;
@@ -48,12 +51,23 @@ struct nf_icmp_net {
unsigned int timeout;
 };
 
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+struct nf_dccp_net {
+   struct nf_proto_net pn;
+   int dccp_loose;
+   unsigned int dccp_timeout[CT_DCCP_MAX + 1];
+};
+#endif
+
 struct nf_ip_net {
struct nf_generic_net   generic;
struct nf_tcp_net   tcp;
struct nf_udp_net   udp;
struct nf_icmp_net  icmp;
struct nf_icmp_net  icmpv6;
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+   struct nf_dccp_net  dccp;
+#endif
 };
 
 struct ct_pcpu {
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c 
b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 7130ed5dc1fa..cb3cf770b00c 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -340,6 +340,9 @@ static struct nf_conntrack_l4proto *builtin_l4proto4[] = {
_conntrack_l4proto_tcp4,
_conntrack_l4proto_udp4,
_conntrack_l4proto_icmp,
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+   _conntrack_l4proto_dccp4,
+#endif
 };
 
 static int ipv4_net_init(struct net *net)
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c 
b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 500be28ff563..f52338d02951 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -340,6 +340,9 @@ static struct nf_conntrack_l4proto *builtin_l4proto6[] = {
_conntrack_l4proto_tcp6,
_conntrack_l4proto_udp6,
_conntrack_l4proto_icmpv6,

[PATCH 04/50] netfilter: built-in NAT support for DCCP

2016-12-07 Thread Pablo Neira Ayuso
From: Davide Caratti 

CONFIG_NF_NAT_PROTO_DCCP is no more a tristate. When set to y, NAT
support for DCCP protocol is built-in into nf_nat.ko.

footprint test:

(nf_nat_proto_)   | dccp   || nf_nat
--+++
no builtin| 409800 || 2241312
DCCP builtin  |   -|| 2578968

Signed-off-by: Davide Caratti 
Signed-off-by: Pablo Neira Ayuso 
---
 include/net/netfilter/nf_nat_l4proto.h |  3 +++
 net/netfilter/Kconfig  |  2 +-
 net/netfilter/Makefile |  3 ++-
 net/netfilter/nf_nat_core.c|  4 
 net/netfilter/nf_nat_proto_dccp.c  | 36 +-
 5 files changed, 11 insertions(+), 37 deletions(-)

diff --git a/include/net/netfilter/nf_nat_l4proto.h 
b/include/net/netfilter/nf_nat_l4proto.h
index 12f4cc841b6e..92b147be00ef 100644
--- a/include/net/netfilter/nf_nat_l4proto.h
+++ b/include/net/netfilter/nf_nat_l4proto.h
@@ -54,6 +54,9 @@ extern const struct nf_nat_l4proto nf_nat_l4proto_udp;
 extern const struct nf_nat_l4proto nf_nat_l4proto_icmp;
 extern const struct nf_nat_l4proto nf_nat_l4proto_icmpv6;
 extern const struct nf_nat_l4proto nf_nat_l4proto_unknown;
+#ifdef CONFIG_NF_NAT_PROTO_DCCP
+extern const struct nf_nat_l4proto nf_nat_l4proto_dccp;
+#endif
 
 bool nf_nat_l4proto_in_range(const struct nf_conntrack_tuple *tuple,
 enum nf_nat_manip_type maniptype,
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 44410d30d461..13092e5cd245 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -384,7 +384,7 @@ config NF_NAT_NEEDED
default y
 
 config NF_NAT_PROTO_DCCP
-   tristate
+   bool
depends on NF_NAT && NF_CT_PROTO_DCCP
default NF_NAT && NF_CT_PROTO_DCCP
 
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 5bbf767672ec..9ea0c98e51e6 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -45,6 +45,8 @@ obj-$(CONFIG_NF_CONNTRACK_TFTP) += nf_conntrack_tftp.o
 nf_nat-y   := nf_nat_core.o nf_nat_proto_unknown.o nf_nat_proto_common.o \
   nf_nat_proto_udp.o nf_nat_proto_tcp.o nf_nat_helper.o
 
+nf_nat-$(CONFIG_NF_NAT_PROTO_DCCP) += nf_nat_proto_dccp.o
+
 # generic transport layer logging
 obj-$(CONFIG_NF_LOG_COMMON) += nf_log_common.o
 
@@ -55,7 +57,6 @@ obj-$(CONFIG_NF_NAT) += nf_nat.o
 obj-$(CONFIG_NF_NAT_REDIRECT) += nf_nat_redirect.o
 
 # NAT protocols (nf_nat)
-obj-$(CONFIG_NF_NAT_PROTO_DCCP) += nf_nat_proto_dccp.o
 obj-$(CONFIG_NF_NAT_PROTO_UDPLITE) += nf_nat_proto_udplite.o
 obj-$(CONFIG_NF_NAT_PROTO_SCTP) += nf_nat_proto_sctp.o
 
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 5b9c884a452e..69b121d11275 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -682,6 +682,10 @@ int nf_nat_l3proto_register(const struct nf_nat_l3proto 
*l3proto)
 _nat_l4proto_tcp);
RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_UDP],
 _nat_l4proto_udp);
+#ifdef CONFIG_NF_NAT_PROTO_DCCP
+   RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_DCCP],
+_nat_l4proto_dccp);
+#endif
mutex_unlock(_nat_proto_mutex);
 
RCU_INIT_POINTER(nf_nat_l3protos[l3proto->l3proto], l3proto);
diff --git a/net/netfilter/nf_nat_proto_dccp.c 
b/net/netfilter/nf_nat_proto_dccp.c
index 15c47b246d0d..269fcd5dc34c 100644
--- a/net/netfilter/nf_nat_proto_dccp.c
+++ b/net/netfilter/nf_nat_proto_dccp.c
@@ -10,8 +10,6 @@
  */
 
 #include 
-#include 
-#include 
 #include 
 #include 
 
@@ -73,7 +71,7 @@ dccp_manip_pkt(struct sk_buff *skb,
return true;
 }
 
-static const struct nf_nat_l4proto nf_nat_l4proto_dccp = {
+const struct nf_nat_l4proto nf_nat_l4proto_dccp = {
.l4proto= IPPROTO_DCCP,
.manip_pkt  = dccp_manip_pkt,
.in_range   = nf_nat_l4proto_in_range,
@@ -82,35 +80,3 @@ static const struct nf_nat_l4proto nf_nat_l4proto_dccp = {
.nlattr_to_range= nf_nat_l4proto_nlattr_to_range,
 #endif
 };
-
-static int __init nf_nat_proto_dccp_init(void)
-{
-   int err;
-
-   err = nf_nat_l4proto_register(NFPROTO_IPV4, _nat_l4proto_dccp);
-   if (err < 0)
-   goto err1;
-   err = nf_nat_l4proto_register(NFPROTO_IPV6, _nat_l4proto_dccp);
-   if (err < 0)
-   goto err2;
-   return 0;
-
-err2:
-   nf_nat_l4proto_unregister(NFPROTO_IPV4, _nat_l4proto_dccp);
-err1:
-   return err;
-}
-
-static void __exit nf_nat_proto_dccp_fini(void)
-{
-   nf_nat_l4proto_unregister(NFPROTO_IPV6, _nat_l4proto_dccp);
-   nf_nat_l4proto_unregister(NFPROTO_IPV4, _nat_l4proto_dccp);
-
-}
-
-module_init(nf_nat_proto_dccp_init);
-module_exit(nf_nat_proto_dccp_fini);
-
-MODULE_AUTHOR("Patrick McHardy ");
-MODULE_DESCRIPTION("DCCP 

[PATCH 03/50] netfilter: update Arturo Borrero Gonzalez email address

2016-12-07 Thread Pablo Neira Ayuso
From: Arturo Borrero Gonzalez 

The email address has changed, let's update the copyright statements.

Signed-off-by: Arturo Borrero Gonzalez 
Signed-off-by: Pablo Neira Ayuso 
---
 net/ipv4/netfilter/nft_masq_ipv4.c  | 4 ++--
 net/ipv4/netfilter/nft_redir_ipv4.c | 4 ++--
 net/ipv6/netfilter/nft_masq_ipv6.c  | 4 ++--
 net/ipv6/netfilter/nft_redir_ipv6.c | 4 ++--
 net/netfilter/nft_masq.c| 4 ++--
 net/netfilter/nft_redir.c   | 4 ++--
 6 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c 
b/net/ipv4/netfilter/nft_masq_ipv4.c
index 4f697e431811..4d69f99b8707 100644
--- a/net/ipv4/netfilter/nft_masq_ipv4.c
+++ b/net/ipv4/netfilter/nft_masq_ipv4.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014 Arturo Borrero Gonzalez 
+ * Copyright (c) 2014 Arturo Borrero Gonzalez 
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -77,5 +77,5 @@ module_init(nft_masq_ipv4_module_init);
 module_exit(nft_masq_ipv4_module_exit);
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Arturo Borrero Gonzalez ");
+MODULE_AUTHOR("Arturo Borrero Gonzalez 
+ * Copyright (c) 2014 Arturo Borrero Gonzalez 
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -71,5 +71,5 @@ module_init(nft_redir_ipv4_module_init);
 module_exit(nft_redir_ipv4_module_exit);
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Arturo Borrero Gonzalez ");
+MODULE_AUTHOR("Arturo Borrero Gonzalez ");
 MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "redir");
diff --git a/net/ipv6/netfilter/nft_masq_ipv6.c 
b/net/ipv6/netfilter/nft_masq_ipv6.c
index a2aff1277b40..93d758f70334 100644
--- a/net/ipv6/netfilter/nft_masq_ipv6.c
+++ b/net/ipv6/netfilter/nft_masq_ipv6.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014 Arturo Borrero Gonzalez 
+ * Copyright (c) 2014 Arturo Borrero Gonzalez 
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -78,5 +78,5 @@ module_init(nft_masq_ipv6_module_init);
 module_exit(nft_masq_ipv6_module_exit);
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Arturo Borrero Gonzalez ");
+MODULE_AUTHOR("Arturo Borrero Gonzalez ");
 MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "masq");
diff --git a/net/ipv6/netfilter/nft_redir_ipv6.c 
b/net/ipv6/netfilter/nft_redir_ipv6.c
index bfcd5af6bc15..2850fcd8583f 100644
--- a/net/ipv6/netfilter/nft_redir_ipv6.c
+++ b/net/ipv6/netfilter/nft_redir_ipv6.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014 Arturo Borrero Gonzalez 
+ * Copyright (c) 2014 Arturo Borrero Gonzalez 
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -72,5 +72,5 @@ module_init(nft_redir_ipv6_module_init);
 module_exit(nft_redir_ipv6_module_exit);
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Arturo Borrero Gonzalez ");
+MODULE_AUTHOR("Arturo Borrero Gonzalez ");
 MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "redir");
diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c
index 81b5ad6165ac..bf92de01410f 100644
--- a/net/netfilter/nft_masq.c
+++ b/net/netfilter/nft_masq.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014 Arturo Borrero Gonzalez 
+ * Copyright (c) 2014 Arturo Borrero Gonzalez 
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -105,4 +105,4 @@ int nft_masq_dump(struct sk_buff *skb, const struct 
nft_expr *expr)
 EXPORT_SYMBOL_GPL(nft_masq_dump);
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Arturo Borrero Gonzalez ");
+MODULE_AUTHOR("Arturo Borrero Gonzalez ");
diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c
index 03f7bf40ae75..967e09b099b2 100644
--- a/net/netfilter/nft_redir.c
+++ b/net/netfilter/nft_redir.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014 Arturo Borrero Gonzalez 
+ * Copyright (c) 2014 Arturo Borrero Gonzalez 

Re: [PATCH nft v2] datatype: Display pre-defined inet_service values in host byte order

2016-12-07 Thread Pablo Neira Ayuso
Hi Elise,

On Wed, Dec 07, 2016 at 05:03:31PM -0200, Elise Lennion wrote:
> nft describe displays, to the user, which values are available for a selector,
> then the values should be in host byte order.
> 
> Reported-by: Pablo Neira Ayuso 
> Fixes: ccc5da470e76 ("datatype: Replace getnameinfo() by internal lookup 
> table")
> Signed-off-by: Elise Lennion 
> ---
> 
>  v2: Created a function to convert different types and number of bytes
> 
>  include/datatype.h |  3 ++-
>  src/datatype.c | 27 ---
>  src/expression.c   |  3 ++-
>  3 files changed, 28 insertions(+), 5 deletions(-)
> 
> diff --git a/include/datatype.h b/include/datatype.h
> index d4fe817..a7db1df 100644
> --- a/include/datatype.h
> +++ b/include/datatype.h
> @@ -191,7 +191,8 @@ extern struct error_record *symbolic_constant_parse(const 
> struct expr *sym,
>  extern void symbolic_constant_print(const struct symbol_table *tbl,
>   const struct expr *expr, bool quotes);
>  extern void symbol_table_print(const struct symbol_table *tbl,
> -const struct datatype *dtype);
> +const struct datatype *dtype,
> +enum byteorder byteorder);
>  
>  extern struct symbol_table *rt_symbol_table_init(const char *filename);
>  extern void rt_symbol_table_free(struct symbol_table *tbl);
> diff --git a/src/datatype.c b/src/datatype.c
> index b5d73bc..c884171 100644
> --- a/src/datatype.c
> +++ b/src/datatype.c
> @@ -180,15 +180,36 @@ void symbolic_constant_print(const struct symbol_table 
> *tbl,
>   printf("%s", s->identifier);
>  }
>  
> +static void big_to_host_byteorder(void *value, const unsigned int nbytes)
> +{
> + unsigned char *p = value;
> + unsigned int i;
> +
> + for (i = 0; i < nbytes / 2; i++) {
> + p[i] ^= p[nbytes - 1 - i];
> + p[nbytes - 1 - i] ^= p[i];
> + p[i] ^= p[nbytes - 1 - i];
> + }

Sorry, probably I should have specified a bit more.

I suggest you use libgmp for this, that allows any arbitrary word
size. See mpz_init() to initialize the variable, then
mpz_switch_byteorder() to change byteorder. Then, export it via
mpz_export_data() back to fixed standard datatypes.

libgmp is well documented, please have a look at it.

Thanks.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[nf-next:master 36/48] net/netfilter/nft_counter.c:131:9: error: implicit declaration of function 'cmpxchg64'

2016-12-07 Thread kbuild test robot
tree:   https://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next.git master
head:   2c16d60332643e90d4fa244f4a706c454b8c7569
commit: 43da04a593d8b2626f1cf4b56efe9402f6b53652 [36/48] netfilter: nf_tables: 
atomic dump and reset for stateful objects
config: sh-allmodconfig (attached as .config)
compiler: sh4-linux-gnu-gcc (Debian 6.1.1-9) 6.1.1 20160705
reproduce:
wget 
https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross
 -O ~/bin/make.cross
chmod +x ~/bin/make.cross
git checkout 43da04a593d8b2626f1cf4b56efe9402f6b53652
# save the attached .config to linux build tree
make.cross ARCH=sh 

All errors (new ones prefixed by >>):

   net/netfilter/nft_counter.c: In function '__nft_counter_reset':
>> net/netfilter/nft_counter.c:131:9: error: implicit declaration of function 
>> 'cmpxchg64' [-Werror=implicit-function-declaration]
  ret = cmpxchg64(counter, old, 0);
^
   cc1: some warnings being treated as errors

vim +/cmpxchg64 +131 net/netfilter/nft_counter.c

   125  static u64 __nft_counter_reset(u64 *counter)
   126  {
   127  u64 ret, old;
   128  
   129  do {
   130  old = *counter;
 > 131  ret = cmpxchg64(counter, old, 0);
   132  } while (ret != old);
   133  
   134  return ret;

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip


[PATCH nf-next] netfilter: nft_quota: allow to restore consumed quota

2016-12-07 Thread Pablo Neira Ayuso
Allow to restore consumed quota, this is useful to restore the quota
state across reboots.

Signed-off-by: Pablo Neira Ayuso 
---
N.B: Just this one more patch on the current pile on nf-next, and I'll be
preparing the pull request for David.

 net/netfilter/nft_quota.c | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c
index 7f27ebdce7ab..bd6efc53f26d 100644
--- a/net/netfilter/nft_quota.c
+++ b/net/netfilter/nft_quota.c
@@ -43,6 +43,7 @@ static inline void nft_quota_do_eval(struct nft_quota *priv,
 static const struct nla_policy nft_quota_policy[NFTA_QUOTA_MAX + 1] = {
[NFTA_QUOTA_BYTES]  = { .type = NLA_U64 },
[NFTA_QUOTA_FLAGS]  = { .type = NLA_U32 },
+   [NFTA_QUOTA_CONSUMED]   = { .type = NLA_U64 },
 };
 
 #define NFT_QUOTA_DEPLETED_BIT 1   /* From NFT_QUOTA_F_DEPLETED. */
@@ -68,7 +69,7 @@ static int nft_quota_do_init(const struct nlattr * const tb[],
 struct nft_quota *priv)
 {
unsigned long flags = 0;
-   u64 quota;
+   u64 quota, consumed = 0;
 
if (!tb[NFTA_QUOTA_BYTES])
return -EINVAL;
@@ -77,6 +78,12 @@ static int nft_quota_do_init(const struct nlattr * const 
tb[],
if (quota > S64_MAX)
return -EOVERFLOW;
 
+   if (tb[NFTA_QUOTA_CONSUMED]) {
+   consumed = be64_to_cpu(nla_get_be64(tb[NFTA_QUOTA_CONSUMED]));
+   if (consumed > quota)
+   return -EINVAL;
+   }
+
if (tb[NFTA_QUOTA_FLAGS]) {
flags = ntohl(nla_get_be32(tb[NFTA_QUOTA_FLAGS]));
if (flags & ~NFT_QUOTA_F_INV)
@@ -87,7 +94,7 @@ static int nft_quota_do_init(const struct nlattr * const tb[],
 
priv->quota = quota;
priv->flags = flags;
-   atomic64_set(>consumed, 0);
+   atomic64_set(>consumed, consumed);
 
return 0;
 }
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html