[PATCH] netfilter: xt_TEE: Fix potential deadlock when TEE target is inserted

2017-09-03 Thread Taehee Yoo
When xt_TEE target is inserted, lockdep warns about possible
DEADLOCK situation. to avoid deadlock situation
the register_netdevice_notifier() should be called by only init routine.

reproduce command is :
   # iptables -I INPUT -j TEE --oif enp3s0 --gateway 192.168.0.1

warning message is :

[  115.182917] WARNING: possible circular locking dependency detected
[  115.189846] 4.13.0-rc1+ #68 Not tainted
[  115.194141] --
[  115.201065] iptables/1283 is trying to acquire lock:
[  115.206627]  (rtnl_mutex){+.+.+.}, at: [] 
rtnl_lock+0x17/0x20
[  115.214842]
[  115.214842] but task is already holding lock:
[  115.221378]  (sk_lock-AF_INET){+.+.+.}, at: [] 
ip_setsockopt+0x6d/0xb0
[  115.230462]
[  115.230462] which lock already depends on the new lock.
[  115.230462]
[  115.239627]
[  115.239627] the existing dependency chain (in reverse order) is:
[  115.248012]
[  115.248012] -> #1 (sk_lock-AF_INET){+.+.+.}:
[  115.254472]lock_acquire+0x190/0x370
[  115.259165]lock_sock_nested+0xb8/0x100
[  115.264148]do_ip_setsockopt.isra.16+0x140/0x24f0
[  115.270125]ip_setsockopt+0x34/0xb0
[  115.274742]udp_setsockopt+0x1b/0x30
[  115.279455]sock_common_setsockopt+0x78/0xf0
[  115.284937]SyS_setsockopt+0x11c/0x220
[  115.289835]do_syscall_64+0x187/0x410
[  115.294638]return_from_SYSCALL_64+0x0/0x7a
[  115.300025]
[  115.300025] -> #0 (rtnl_mutex){+.+.+.}:
[  115.306030]__lock_acquire+0x4114/0x47c0
[  115.311132]lock_acquire+0x190/0x370
[  115.315844]__mutex_lock+0xef/0x1460
[  115.320555]mutex_lock_nested+0x1b/0x20
[  115.325558]rtnl_lock+0x17/0x20
[  115.329785]register_netdevice_notifier+0x6f/0x4f0
[  115.335851]tee_tg_check+0x19b/0x260
[  115.340562]xt_check_target+0x1f5/0x6c0
[  115.345569]find_check_entry.isra.7+0x62f/0x960
[  115.351353]translate_table+0xcf2/0x1830
[  115.356454]do_ipt_set_ctl+0x1ff/0x3a0
[  115.361362]nf_setsockopt+0x61/0xc0
[  115.365977]ip_setsockopt+0x82/0xb0
[  115.370592]raw_setsockopt+0x73/0xa0
[  115.375304]sock_common_setsockopt+0x78/0xf0
[  115.380793]SyS_setsockopt+0x11c/0x220
[  115.385701]entry_SYSCALL_64_fastpath+0x1c/0xb1
[  115.391478]
[  115.391478] other info that might help us debug this:
[  115.391478]
[  115.400511]  Possible unsafe locking scenario:
[  115.400511]
[  115.407176]CPU0CPU1
[  115.412270]
[  115.417364]   lock(sk_lock-AF_INET);
[  115.421394]lock(rtnl_mutex);
[  115.427760]lock(sk_lock-AF_INET);
[  115.434723]   lock(rtnl_mutex);
[  115.438267]
[  115.438267]  *** DEADLOCK ***

[ ... ]

Signed-off-by: Taehee Yoo 
---
 include/uapi/linux/netfilter/xt_TEE.h |  3 +-
 net/netfilter/xt_TEE.c| 90 ++-
 2 files changed, 59 insertions(+), 34 deletions(-)

diff --git a/include/uapi/linux/netfilter/xt_TEE.h 
b/include/uapi/linux/netfilter/xt_TEE.h
index 0109202..4b7eae4 100644
--- a/include/uapi/linux/netfilter/xt_TEE.h
+++ b/include/uapi/linux/netfilter/xt_TEE.h
@@ -2,10 +2,11 @@
 #define _XT_TEE_TARGET_H
 
 #include 
+#include 
 
 struct xt_tee_tginfo {
union nf_inet_addr gw;
-   char oif[16];
+   char oif[IFNAMSIZ];
 
/* used internally by the kernel */
struct xt_tee_priv *priv __attribute__((aligned(8)));
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index 86b0580..98fac9f 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -12,19 +12,20 @@
  */
 #include 
 #include 
-#include 
 #include 
-#include 
 #include 
 #include 
 #include 
 
 struct xt_tee_priv {
-   struct notifier_block   notifier;
struct xt_tee_tginfo*tginfo;
+   struct net  *net;
+   struct list_headlist;
int oif;
 };
 
+static LIST_HEAD(tee_tg_list);
+static DEFINE_MUTEX(list_mutex);
 static const union nf_inet_addr tee_zero_address;
 
 static unsigned int
@@ -55,59 +56,69 @@ static int tee_netdev_event(struct notifier_block *this, 
unsigned long event,
void *ptr)
 {
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+   struct net *net = dev_net(dev);
struct xt_tee_priv *priv;
 
-   priv = container_of(this, struct xt_tee_priv, notifier);
-   switch (event) {
-   case NETDEV_REGISTER:
-   if (!strcmp(dev->name, priv->tginfo->oif))
-   priv->oif = dev->ifindex;
-   break;
-   case NETDEV_UNREGISTER:
-   if (dev->ifindex == priv->oif)
-   priv->oif = -1;
-   break;
-   case NETDEV_CHANGENAME:
-   if (!strcmp(dev->name, priv->tginfo->oif))
-

[PATCH] netfilter: ipt_CLUSTERIP: Fix potential deadlock when CLUSTERIP target is inserted

2017-09-03 Thread Taehee Yoo
When ipt_CLUSTERIP target is inserted, lockdep warns about
possible DEADLOCK situation. to avoid deadlock situation
register_netdevice_notifier() should be called by only init routine.

reproduce command is :
   # iptables -A INPUT -p tcp -i enp3s0 -d 192.168.0.5 --dport 80 \
-j CLUSTERIP --new --hashmode sourceip \
--clustermac 01:00:5e:00:00:20 --total-nodes 2 --local-node 1

warning message is :

[  148.751110] WARNING: possible circular locking dependency detected
[  148.758037] 4.13.0-rc1+ #71 Not tainted
[  148.762334] --

[ ... ]

the existing dependency chain (in reverse order) is:
[  148.816203]
-> #1 (sk_lock-AF_INET){+.+.+.}:
[  148.822686]lock_acquire+0x190/0x370
[  148.827401]lock_sock_nested+0xb8/0x100
[  148.832405]do_ip_setsockopt.isra.16+0x140/0x24f0
[  148.838380]ip_setsockopt+0x34/0xb0
[  148.842988]udp_setsockopt+0x1b/0x30
[  148.847692]sock_common_setsockopt+0x78/0xf0
[  148.853182]SyS_setsockopt+0x11c/0x220
[  148.858089]do_syscall_64+0x187/0x410
[  148.862901]return_from_SYSCALL_64+0x0/0x7a
[  148.868289]
-> #0 (rtnl_mutex){+.+.+.}:
[  148.874303]__lock_acquire+0x4114/0x47c0
[  148.879405]lock_acquire+0x190/0x370
[  148.884109]__mutex_lock+0xef/0x1460
[  148.20]mutex_lock_nested+0x1b/0x20
[  148.893824]rtnl_lock+0x17/0x20
[  148.898052]register_netdevice_notifier+0x6f/0x4f0
[  148.904127]clusterip_tg_check+0xbf0/0x13e0
[  148.909519]xt_check_target+0x1f5/0x6c0
[  148.914525]find_check_entry.isra.7+0x62f/0x960
[  148.920308]translate_table+0xcf2/0x1830
[  148.925410]do_ipt_set_ctl+0x1ff/0x3a0
[  148.930320]nf_setsockopt+0x61/0xc0
[  148.934933]ip_setsockopt+0x82/0xb0
[  148.939548]raw_setsockopt+0x73/0xa0
[  148.944260]sock_common_setsockopt+0x78/0xf0
[  148.949749]SyS_setsockopt+0x11c/0x220
[  148.954658]entry_SYSCALL_64_fastpath+0x1c/0xb1
[  148.960435]
other info that might help us debug this:

[  148.969459]  Possible unsafe locking scenario:

[  148.976124]CPU0CPU1
[  148.981218]
[  148.986312]   lock(sk_lock-AF_INET);
[  148.990343]lock(rtnl_mutex);
[  148.996708]lock(sk_lock-AF_INET);
[  149.003559]   lock(rtnl_mutex);
[  149.007103]
*** DEADLOCK ***

[ ... ]

Signed-off-by: Taehee Yoo 
---
 net/ipv4/netfilter/ipt_CLUSTERIP.c | 70 +-
 1 file changed, 39 insertions(+), 31 deletions(-)

diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c 
b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 6637e8b..c31f188 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -59,7 +59,6 @@ struct clusterip_config {
struct rcu_head rcu;
 
char ifname[IFNAMSIZ];  /* device ifname */
-   struct notifier_block notifier; /* refresh c->ifindex in it */
 };
 
 #ifdef CONFIG_PROC_FS
@@ -73,6 +72,7 @@ struct clusterip_net {
/* lock protects the configs list */
spinlock_t lock;
 
+   struct notifier_block notifier;
 #ifdef CONFIG_PROC_FS
struct proc_dir_entry *procdir;
 #endif
@@ -111,8 +111,6 @@ clusterip_config_entry_put(struct net *net, struct 
clusterip_config *c)
spin_unlock(>lock);
local_bh_enable();
 
-   unregister_netdevice_notifier(>notifier);
-
/* In case anyone still accesses the file, the open/close
 * functions are also incrementing the refcount on their own,
 * so it's safe to remove the entry even if it's in use. */
@@ -176,32 +174,37 @@ clusterip_netdev_event(struct notifier_block *this, 
unsigned long event,
   void *ptr)
 {
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+   struct net *net = dev_net(dev);
+   struct clusterip_net *cn = net_generic(net, clusterip_net_id);
struct clusterip_config *c;
 
-   c = container_of(this, struct clusterip_config, notifier);
-   switch (event) {
-   case NETDEV_REGISTER:
-   if (!strcmp(dev->name, c->ifname)) {
-   c->ifindex = dev->ifindex;
-   dev_mc_add(dev, c->clustermac);
-   }
-   break;
-   case NETDEV_UNREGISTER:
-   if (dev->ifindex == c->ifindex) {
-   dev_mc_del(dev, c->clustermac);
-   c->ifindex = -1;
-   }
-   break;
-   case NETDEV_CHANGENAME:
-   if (!strcmp(dev->name, c->ifname)) {
-   c->ifindex = dev->ifindex;
-   dev_mc_add(dev, c->clustermac);
-   } else if (dev->ifindex == c->ifindex) 

Re: [PATCH] netfilter: xt_TEE: Fix potential deadlock when TEE target is inserted

2017-09-03 Thread Jan Engelhardt

On Sunday 2017-09-03 16:30, Taehee Yoo wrote:

>When xt_TEE target is inserted, lockdep warns about possible
>DEADLOCK situation. to avoid deadlock situation
>the register_netdevice_notifier() should be called by only init routine.
>
>+#include 
> 
> struct xt_tee_tginfo {
>   union nf_inet_addr gw;
>-  char oif[16];
>+  char oif[IFNAMSIZ];

This should not be done, as xt_tee_tginfo is exported to userspace.
(It also has nothing to do with fixing the deadlock, really.)

>+  case NETDEV_UNREGISTER:
>+  if ((dev->ifindex == priv->oif) &&

redundant new parenthesis group
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


On ulogd2, nfacct and sqlite3

2017-09-03 Thread DEXTER
Hi Guys!

I was searching for a _simple_ way to account traffic per host and found
numerous methods just by googleing but none of them were simple.
Then I stumbled upon ulogd2 and this page:
https://home.regit.org/2012/07/flow-accounting-with-netfilter-and-ulogd2/

Which is almost something I want, but the output should be in sqlite3.
So I tried to configure ulogd2 with sqlite3 output on Ubuntu 16.04, and
it just did not want to work.

Here are the issues I found (after all my remaining hair have fallen out):

- First, this is what comes up in the logs after starting ulogd:
"type mismatch between SQLITE3 and NFACCT in stack"
This completely misguided me on where to look for the issue. (~40% hair
loss)

- Then I realized (after a couple of hours trying to find out what is
going on) Ubuntu starts ulogd2 with --uid ulog option. When I manually
start ulogd without the --uid option it was able to write to the sqlite
file but not with --uid. I'm thinking this is probably because it cannot
communicate with the kernel after the setgid/setuid, because it is
missing the appropriate capability (maybe CAP_NET_ADMIN? idk.). (~30%
hair loss)

- The docs containing the sqlite3.table file completely missing the
table where the nfacct data can be written. Looking at
pgsql-ulogd2.sql.gz I found that the table columns should be something
like this:
INSERT INTO nfacct
(sum_name,sum_pkts,sum_bytes,oob_time_sec,oob_time_usec) (~5% hair loss)

- Ulogd logs this every pollinterval: unknown type 32816 for sum
This is because in ulogd_output_SQLITE3.c ULOGD_RET_RAW is only handled
in the default: section in the sqlite3_interp function. I don't know why
the sum is RET_RAW, I also don't know what that sum even want to be.
(because there are already sum_pkts, and sum_bytes). (~5% hair loss)

- Data is written to the sqlite database even if the sum_pkts and
sum_bytes are 0. There should be at least on option either on the input
side (nfacct) or the output side(sqlite3) that if the sums are 0 do not
write it to the database. (It just uses up space and I'm only interested
in hourly/daily/monthly/yearly traffic sums). (~6% hair loss)

Now the only thing needed for me is to regularly consolidate the sqlite
database and also show the data on a somewhat pretty web page. This has
nothing to do with ulogd and sqlite or you guys but since I couldn't
find one, I probably need to write my own. (~14% hair loss).

I hope I managed to collect all the issues I found while trying to make
this work, and hoping that someone in the future don't have to go
through this and will have the _justworks_ magic feeling.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] netfilter: xt_TEE: Fix potential deadlock when TEE target is inserted

2017-09-03 Thread Taehee Yoo
2017-09-04 0:32 GMT+09:00 Jan Engelhardt :
>
> On Sunday 2017-09-03 16:30, Taehee Yoo wrote:
>
>>When xt_TEE target is inserted, lockdep warns about possible
>>DEADLOCK situation. to avoid deadlock situation
>>the register_netdevice_notifier() should be called by only init routine.
>>
>>+#include 
>>
>> struct xt_tee_tginfo {
>>   union nf_inet_addr gw;
>>-  char oif[16];
>>+  char oif[IFNAMSIZ];
>
> This should not be done, as xt_tee_tginfo is exported to userspace.
> (It also has nothing to do with fixing the deadlock, really.)
>
>>+  case NETDEV_UNREGISTER:
>>+  if ((dev->ifindex == priv->oif) &&
>
> redundant new parenthesis group

Thank you for your review!

I will send v2 patch.

Thanks
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH V2] netfilter: xt_TEE: Fix potential deadlock when TEE target is inserted

2017-09-03 Thread Taehee Yoo
When xt_TEE target is inserted, lockdep warns about possible
DEADLOCK situation. to avoid deadlock situation
the register_netdevice_notifier() should be called by only init routine.

reproduce command is :
   # iptables -I INPUT -j TEE --oif enp3s0 --gateway 192.168.0.1

warning message is :

[  115.182917] WARNING: possible circular locking dependency detected
[  115.189846] 4.13.0-rc1+ #68 Not tainted
[  115.194141] --
[  115.201065] iptables/1283 is trying to acquire lock:
[  115.206627]  (rtnl_mutex){+.+.+.}, at: [] 
rtnl_lock+0x17/0x20
[  115.214842]
[  115.214842] but task is already holding lock:
[  115.221378]  (sk_lock-AF_INET){+.+.+.}, at: [] 
ip_setsockopt+0x6d/0xb0
[  115.230462]
[  115.230462] which lock already depends on the new lock.
[  115.230462]
[  115.239627]
[  115.239627] the existing dependency chain (in reverse order) is:
[  115.248012]
[  115.248012] -> #1 (sk_lock-AF_INET){+.+.+.}:
[  115.254472]lock_acquire+0x190/0x370
[  115.259165]lock_sock_nested+0xb8/0x100
[  115.264148]do_ip_setsockopt.isra.16+0x140/0x24f0
[  115.270125]ip_setsockopt+0x34/0xb0
[  115.274742]udp_setsockopt+0x1b/0x30
[  115.279455]sock_common_setsockopt+0x78/0xf0
[  115.284937]SyS_setsockopt+0x11c/0x220
[  115.289835]do_syscall_64+0x187/0x410
[  115.294638]return_from_SYSCALL_64+0x0/0x7a
[  115.300025]
[  115.300025] -> #0 (rtnl_mutex){+.+.+.}:
[  115.306030]__lock_acquire+0x4114/0x47c0
[  115.311132]lock_acquire+0x190/0x370
[  115.315844]__mutex_lock+0xef/0x1460
[  115.320555]mutex_lock_nested+0x1b/0x20
[  115.325558]rtnl_lock+0x17/0x20
[  115.329785]register_netdevice_notifier+0x6f/0x4f0
[  115.335851]tee_tg_check+0x19b/0x260
[  115.340562]xt_check_target+0x1f5/0x6c0
[  115.345569]find_check_entry.isra.7+0x62f/0x960
[  115.351353]translate_table+0xcf2/0x1830
[  115.356454]do_ipt_set_ctl+0x1ff/0x3a0
[  115.361362]nf_setsockopt+0x61/0xc0
[  115.365977]ip_setsockopt+0x82/0xb0
[  115.370592]raw_setsockopt+0x73/0xa0
[  115.375304]sock_common_setsockopt+0x78/0xf0
[  115.380793]SyS_setsockopt+0x11c/0x220
[  115.385701]entry_SYSCALL_64_fastpath+0x1c/0xb1
[  115.391478]
[  115.391478] other info that might help us debug this:
[  115.391478]
[  115.400511]  Possible unsafe locking scenario:
[  115.400511]
[  115.407176]CPU0CPU1
[  115.412270]
[  115.417364]   lock(sk_lock-AF_INET);
[  115.421394]lock(rtnl_mutex);
[  115.427760]lock(sk_lock-AF_INET);
[  115.434723]   lock(rtnl_mutex);
[  115.438267]
[  115.438267]  *** DEADLOCK ***

[ ... ]

Signed-off-by: Taehee Yoo 
---

V2:
 - Do not modify the xt_TEE.h

V1:
 - Initial version


 net/netfilter/xt_TEE.c | 89 +++---
 1 file changed, 56 insertions(+), 33 deletions(-)

diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index 86b0580..2aebbc0 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -12,19 +12,20 @@
  */
 #include 
 #include 
-#include 
 #include 
-#include 
 #include 
 #include 
 #include 
 
 struct xt_tee_priv {
-   struct notifier_block   notifier;
struct xt_tee_tginfo*tginfo;
+   struct net  *net;
+   struct list_headlist;
int oif;
 };
 
+static LIST_HEAD(tee_tg_list);
+static DEFINE_MUTEX(list_mutex);
 static const union nf_inet_addr tee_zero_address;
 
 static unsigned int
@@ -55,59 +56,68 @@ static int tee_netdev_event(struct notifier_block *this, 
unsigned long event,
void *ptr)
 {
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+   struct net *net = dev_net(dev);
struct xt_tee_priv *priv;
 
-   priv = container_of(this, struct xt_tee_priv, notifier);
-   switch (event) {
-   case NETDEV_REGISTER:
-   if (!strcmp(dev->name, priv->tginfo->oif))
-   priv->oif = dev->ifindex;
-   break;
-   case NETDEV_UNREGISTER:
-   if (dev->ifindex == priv->oif)
-   priv->oif = -1;
-   break;
-   case NETDEV_CHANGENAME:
-   if (!strcmp(dev->name, priv->tginfo->oif))
-   priv->oif = dev->ifindex;
-   else if (dev->ifindex == priv->oif)
-   priv->oif = -1;
-   break;
+   mutex_lock(_mutex);
+   list_for_each_entry(priv, _tg_list, list) {
+   switch (event) {
+   case NETDEV_REGISTER:
+   if (!strcmp(dev->name, priv->tginfo->oif) &&
+   net_eq(net, priv->net))
+   priv->oif = dev->ifindex;
+ 

[PATCH 02/47] netfilter: nf_tables: keep chain counters away from hot path

2017-09-03 Thread Pablo Neira Ayuso
These chain counters are only used by the iptables-compat tool, that
allow users to use the x_tables extensions from the existing nf_tables
framework. This patch makes nf_tables by ~5% for the general usecase,
ie. native nft users, where no chain counters are used at all.

Signed-off-by: Pablo Neira Ayuso 
---
 include/net/netfilter/nf_tables_core.h |  2 ++
 net/netfilter/nf_tables_api.c  | 11 +++
 net/netfilter/nf_tables_core.c | 26 ++
 3 files changed, 23 insertions(+), 16 deletions(-)

diff --git a/include/net/netfilter/nf_tables_core.h 
b/include/net/netfilter/nf_tables_core.h
index 8f690effec37..424684c33771 100644
--- a/include/net/netfilter/nf_tables_core.h
+++ b/include/net/netfilter/nf_tables_core.h
@@ -49,6 +49,8 @@ struct nft_payload_set {
 };
 
 extern const struct nft_expr_ops nft_payload_fast_ops;
+
+extern struct static_key_false nft_counters_enabled;
 extern struct static_key_false nft_trace_enabled;
 
 #endif /* _NET_NF_TABLES_CORE_H */
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 7843efa33c59..7fbf0070aba1 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1240,6 +1240,8 @@ static void nf_tables_chain_destroy(struct nft_chain 
*chain)
 
module_put(basechain->type->owner);
free_percpu(basechain->stats);
+   if (basechain->stats)
+   static_branch_dec(_counters_enabled);
if (basechain->ops[0].dev != NULL)
dev_put(basechain->ops[0].dev);
kfree(basechain);
@@ -1504,14 +1506,7 @@ static int nf_tables_newchain(struct net *net, struct 
sock *nlsk,
return PTR_ERR(stats);
}
basechain->stats = stats;
-   } else {
-   stats = netdev_alloc_pcpu_stats(struct nft_stats);
-   if (stats == NULL) {
-   nft_chain_release_hook();
-   kfree(basechain);
-   return -ENOMEM;
-   }
-   rcu_assign_pointer(basechain->stats, stats);
+   static_branch_inc(_counters_enabled);
}
 
hookfn = hook.type->hooks[hook.num];
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 65dbeadcb118..c5bab08b0d73 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -114,6 +114,22 @@ static bool nft_payload_fast_eval(const struct nft_expr 
*expr,
return true;
 }
 
+DEFINE_STATIC_KEY_FALSE(nft_counters_enabled);
+
+static noinline void nft_update_chain_stats(const struct nft_chain *chain,
+   const struct nft_pktinfo *pkt)
+{
+   struct nft_stats *stats;
+
+   local_bh_disable();
+   stats = this_cpu_ptr(rcu_dereference(nft_base_chain(chain)->stats));
+   u64_stats_update_begin(>syncp);
+   stats->pkts++;
+   stats->bytes += pkt->skb->len;
+   u64_stats_update_end(>syncp);
+   local_bh_enable();
+}
+
 struct nft_jumpstack {
const struct nft_chain  *chain;
const struct nft_rule   *rule;
@@ -130,7 +146,6 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv)
struct nft_regs regs;
unsigned int stackptr = 0;
struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE];
-   struct nft_stats *stats;
int rulenum;
unsigned int gencursor = nft_genmask_cur(net);
struct nft_traceinfo info;
@@ -220,13 +235,8 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv)
nft_trace_packet(, basechain, NULL, -1,
 NFT_TRACETYPE_POLICY);
 
-   rcu_read_lock_bh();
-   stats = this_cpu_ptr(rcu_dereference(nft_base_chain(basechain)->stats));
-   u64_stats_update_begin(>syncp);
-   stats->pkts++;
-   stats->bytes += pkt->skb->len;
-   u64_stats_update_end(>syncp);
-   rcu_read_unlock_bh();
+   if (static_branch_unlikely(_counters_enabled))
+   nft_update_chain_stats(basechain, pkt);
 
return nft_base_chain(basechain)->policy;
 }
-- 
2.1.4


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 08/47] netfilter: expect: add and use nf_ct_expect_iterate helpers

2017-09-03 Thread Pablo Neira Ayuso
From: Florian Westphal 

We have several spots that open-code a expect walk, add a helper
that is similar to nf_ct_iterate_destroy/nf_ct_iterate_cleanup.

Signed-off-by: Florian Westphal 
Signed-off-by: Pablo Neira Ayuso 
---
 include/net/netfilter/nf_conntrack_expect.h |  5 +++
 net/netfilter/nf_conntrack_expect.c | 54 +
 net/netfilter/nf_conntrack_helper.c | 34 +++-
 net/netfilter/nf_conntrack_netlink.c| 63 ++---
 4 files changed, 95 insertions(+), 61 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack_expect.h 
b/include/net/netfilter/nf_conntrack_expect.h
index 2ba54feaccd8..818def00 100644
--- a/include/net/netfilter/nf_conntrack_expect.h
+++ b/include/net/netfilter/nf_conntrack_expect.h
@@ -107,6 +107,11 @@ void nf_ct_remove_expectations(struct nf_conn *ct);
 void nf_ct_unexpect_related(struct nf_conntrack_expect *exp);
 bool nf_ct_remove_expect(struct nf_conntrack_expect *exp);
 
+void nf_ct_expect_iterate_destroy(bool (*iter)(struct nf_conntrack_expect *e, 
void *data), void *data);
+void nf_ct_expect_iterate_net(struct net *net,
+ bool (*iter)(struct nf_conntrack_expect *e, void 
*data),
+  void *data, u32 portid, int report);
+
 /* Allocate space for an expectation: this is mandatory before calling
nf_ct_expect_related.  You will have to call put afterwards. */
 struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me);
diff --git a/net/netfilter/nf_conntrack_expect.c 
b/net/netfilter/nf_conntrack_expect.c
index 2c63808bea96..dad2c0c22ad5 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -474,6 +474,60 @@ int nf_ct_expect_related_report(struct nf_conntrack_expect 
*expect,
 }
 EXPORT_SYMBOL_GPL(nf_ct_expect_related_report);
 
+void nf_ct_expect_iterate_destroy(bool (*iter)(struct nf_conntrack_expect *e, 
void *data),
+ void *data)
+{
+   struct nf_conntrack_expect *exp;
+   const struct hlist_node *next;
+   unsigned int i;
+
+   spin_lock_bh(_conntrack_expect_lock);
+
+   for (i = 0; i < nf_ct_expect_hsize; i++) {
+   hlist_for_each_entry_safe(exp, next,
+ _ct_expect_hash[i],
+ hnode) {
+   if (iter(exp, data) && del_timer(>timeout)) {
+   nf_ct_unlink_expect(exp);
+   nf_ct_expect_put(exp);
+   }
+   }
+   }
+
+   spin_unlock_bh(_conntrack_expect_lock);
+}
+EXPORT_SYMBOL_GPL(nf_ct_expect_iterate_destroy);
+
+void nf_ct_expect_iterate_net(struct net *net,
+ bool (*iter)(struct nf_conntrack_expect *e, void 
*data),
+ void *data,
+ u32 portid, int report)
+{
+   struct nf_conntrack_expect *exp;
+   const struct hlist_node *next;
+   unsigned int i;
+
+   spin_lock_bh(_conntrack_expect_lock);
+
+   for (i = 0; i < nf_ct_expect_hsize; i++) {
+   hlist_for_each_entry_safe(exp, next,
+ _ct_expect_hash[i],
+ hnode) {
+
+   if (!net_eq(nf_ct_exp_net(exp), net))
+   continue;
+
+   if (iter(exp, data) && del_timer(>timeout)) {
+   nf_ct_unlink_expect_report(exp, portid, report);
+   nf_ct_expect_put(exp);
+   }
+   }
+   }
+
+   spin_unlock_bh(_conntrack_expect_lock);
+}
+EXPORT_SYMBOL_GPL(nf_ct_expect_iterate_net);
+
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
 struct ct_expect_iter_state {
struct seq_net_private p;
diff --git a/net/netfilter/nf_conntrack_helper.c 
b/net/netfilter/nf_conntrack_helper.c
index 9129bb3b5153..551a1eddf0fa 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -437,12 +437,22 @@ int nf_conntrack_helper_register(struct 
nf_conntrack_helper *me)
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_helper_register);
 
-void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
+static bool expect_iter_me(struct nf_conntrack_expect *exp, void *data)
 {
-   struct nf_conntrack_expect *exp;
-   const struct hlist_node *next;
-   unsigned int i;
+   struct nf_conn_help *help = nfct_help(exp->master);
+   const struct nf_conntrack_helper *me = data;
+   const struct nf_conntrack_helper *this;
+
+   if (exp->helper == me)
+   return true;
 
+   this = rcu_dereference_protected(help->helper,
+
lockdep_is_held(_conntrack_expect_lock));
+   return this == me;
+}
+
+void 

[PATCH 06/47] netfilter: nf_tables: add fib expression to the netdev family

2017-09-03 Thread Pablo Neira Ayuso
From: "Pablo M. Bermudo Garay" 

Add fib expression support for netdev family. Like inet family, netdev
delegates the actual decision to the corresponding backend, either ipv4
or ipv6.

This allows to perform very early reverse path filtering, among other
things.

You can find more information about fib expression in the f6d0cbcf09c5
("") commit message.

Signed-off-by: Pablo M. Bermudo Garay 
Signed-off-by: Pablo Neira Ayuso 
---
 net/netfilter/Kconfig  |  9 +
 net/netfilter/Makefile |  1 +
 net/netfilter/nft_fib_netdev.c | 87 ++
 3 files changed, 97 insertions(+)
 create mode 100644 net/netfilter/nft_fib_netdev.c

diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 9b28864cc36a..e4a13cc8a2e7 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -636,6 +636,15 @@ config NFT_FWD_NETDEV
help
  This option enables packet forwarding for the "netdev" family.
 
+config NFT_FIB_NETDEV
+   depends on NFT_FIB_IPV4
+   depends on NFT_FIB_IPV6
+   tristate "Netfilter nf_tables netdev fib lookups support"
+   help
+ This option allows using the FIB expression from the netdev table.
+ The lookup will be delegated to the IPv4 or IPv6 FIB depending
+ on the protocol of the packet.
+
 endif # NF_TABLES_NETDEV
 
 endif # NF_TABLES
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 913380919301..d3891c93edd6 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -100,6 +100,7 @@ obj-$(CONFIG_NFT_REDIR) += nft_redir.o
 obj-$(CONFIG_NFT_HASH) += nft_hash.o
 obj-$(CONFIG_NFT_FIB)  += nft_fib.o
 obj-$(CONFIG_NFT_FIB_INET) += nft_fib_inet.o
+obj-$(CONFIG_NFT_FIB_NETDEV)   += nft_fib_netdev.o
 
 # nf_tables netdev
 obj-$(CONFIG_NFT_DUP_NETDEV)   += nft_dup_netdev.o
diff --git a/net/netfilter/nft_fib_netdev.c b/net/netfilter/nft_fib_netdev.c
new file mode 100644
index ..3997ee36cfbd
--- /dev/null
+++ b/net/netfilter/nft_fib_netdev.c
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2017 Pablo M. Bermudo Garay 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This code is based on net/netfilter/nft_fib_inet.c, written by
+ * Florian Westphal .
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+static void nft_fib_netdev_eval(const struct nft_expr *expr,
+   struct nft_regs *regs,
+   const struct nft_pktinfo *pkt)
+{
+   const struct nft_fib *priv = nft_expr_priv(expr);
+
+   switch (ntohs(pkt->skb->protocol)) {
+   case ETH_P_IP:
+   switch (priv->result) {
+   case NFT_FIB_RESULT_OIF:
+   case NFT_FIB_RESULT_OIFNAME:
+   return nft_fib4_eval(expr, regs, pkt);
+   case NFT_FIB_RESULT_ADDRTYPE:
+   return nft_fib4_eval_type(expr, regs, pkt);
+   }
+   break;
+   case ETH_P_IPV6:
+   switch (priv->result) {
+   case NFT_FIB_RESULT_OIF:
+   case NFT_FIB_RESULT_OIFNAME:
+   return nft_fib6_eval(expr, regs, pkt);
+   case NFT_FIB_RESULT_ADDRTYPE:
+   return nft_fib6_eval_type(expr, regs, pkt);
+   }
+   break;
+   }
+
+   regs->verdict.code = NFT_BREAK;
+}
+
+static struct nft_expr_type nft_fib_netdev_type;
+static const struct nft_expr_ops nft_fib_netdev_ops = {
+   .type   = _fib_netdev_type,
+   .size   = NFT_EXPR_SIZE(sizeof(struct nft_fib)),
+   .eval   = nft_fib_netdev_eval,
+   .init   = nft_fib_init,
+   .dump   = nft_fib_dump,
+   .validate   = nft_fib_validate,
+};
+
+static struct nft_expr_type nft_fib_netdev_type __read_mostly = {
+   .family = NFPROTO_NETDEV,
+   .name   = "fib",
+   .ops= _fib_netdev_ops,
+   .policy = nft_fib_policy,
+   .maxattr= NFTA_FIB_MAX,
+   .owner  = THIS_MODULE,
+};
+
+static int __init nft_fib_netdev_module_init(void)
+{
+   return nft_register_expr(_fib_netdev_type);
+}
+
+static void __exit nft_fib_netdev_module_exit(void)
+{
+   nft_unregister_expr(_fib_netdev_type);
+}
+
+module_init(nft_fib_netdev_module_init);
+module_exit(nft_fib_netdev_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo M. Bermudo Garay ");
+MODULE_ALIAS_NFT_AF_EXPR(5, "fib");
-- 
2.1.4


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info 

[PATCH 10/47] netfilter: conntrack: destroy functions need to free queued packets

2017-09-03 Thread Pablo Neira Ayuso
From: Florian Westphal 

queued skbs might be using conntrack extensions that are being removed,
such as timeout.  This happens for skbs that have a skb->nfct in
unconfirmed state (i.e., not in hash table yet).

This is destructive, but there are only two use cases:
 - module removal (rare)
 - netns cleanup (most likely no conntracks exist, and if they do,
   they are removed anyway later on).

Signed-off-by: Florian Westphal 
Signed-off-by: Pablo Neira Ayuso 
---
 net/netfilter/nf_conntrack_core.c | 4 
 net/netfilter/nf_queue.c  | 1 +
 2 files changed, 5 insertions(+)

diff --git a/net/netfilter/nf_conntrack_core.c 
b/net/netfilter/nf_conntrack_core.c
index 80ab4e937765..2bc499186186 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -56,6 +56,8 @@
 #include 
 #include 
 
+#include "nf_internals.h"
+
 #define NF_CONNTRACK_VERSION   "0.5.0"
 
 int (*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct,
@@ -1692,6 +1694,7 @@ void nf_ct_unconfirmed_destroy(struct net *net)
 
if (atomic_read(>ct.count) > 0) {
__nf_ct_unconfirmed_destroy(net);
+   nf_queue_nf_hook_drop(net);
synchronize_net();
}
 }
@@ -1737,6 +1740,7 @@ nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void 
*data), void *data)
if (atomic_read(>ct.count) == 0)
continue;
__nf_ct_unconfirmed_destroy(net);
+   nf_queue_nf_hook_drop(net);
}
rtnl_unlock();
 
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 043850c9d154..4f4d80a58fb5 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -109,6 +109,7 @@ unsigned int nf_queue_nf_hook_drop(struct net *net)
 
return count;
 }
+EXPORT_SYMBOL_GPL(nf_queue_nf_hook_drop);
 
 static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
  struct nf_hook_entry *hook_entry, unsigned int queuenum)
-- 
2.1.4


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 05/47] netfilter: nf_tables: fib: use skb_header_pointer

2017-09-03 Thread Pablo Neira Ayuso
From: "Pablo M. Bermudo Garay" 

This is a preparatory patch for adding fib support to the netdev family.

The netdev family receives the packets from ingress hook. At this point
we have no guarantee that the ip header is linear. So this patch
replaces ip_hdr with skb_header_pointer in order to address that
possible situation.

Signed-off-by: Pablo M. Bermudo Garay 
Signed-off-by: Pablo Neira Ayuso 
---
 net/ipv4/netfilter/nft_fib_ipv4.c | 20 
 net/ipv6/netfilter/nft_fib_ipv6.c | 29 +++--
 2 files changed, 39 insertions(+), 10 deletions(-)

diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c 
b/net/ipv4/netfilter/nft_fib_ipv4.c
index de3681df2ce7..e50976e3c213 100644
--- a/net/ipv4/netfilter/nft_fib_ipv4.c
+++ b/net/ipv4/netfilter/nft_fib_ipv4.c
@@ -32,9 +32,10 @@ void nft_fib4_eval_type(const struct nft_expr *expr, struct 
nft_regs *regs,
const struct nft_pktinfo *pkt)
 {
const struct nft_fib *priv = nft_expr_priv(expr);
+   int noff = skb_network_offset(pkt->skb);
u32 *dst = >data[priv->dreg];
const struct net_device *dev = NULL;
-   const struct iphdr *iph;
+   struct iphdr *iph, _iph;
__be32 addr;
 
if (priv->flags & NFTA_FIB_F_IIF)
@@ -42,7 +43,12 @@ void nft_fib4_eval_type(const struct nft_expr *expr, struct 
nft_regs *regs,
else if (priv->flags & NFTA_FIB_F_OIF)
dev = nft_out(pkt);
 
-   iph = ip_hdr(pkt->skb);
+   iph = skb_header_pointer(pkt->skb, noff, sizeof(_iph), &_iph);
+   if (!iph) {
+   regs->verdict.code = NFT_BREAK;
+   return;
+   }
+
if (priv->flags & NFTA_FIB_F_DADDR)
addr = iph->daddr;
else
@@ -61,8 +67,9 @@ void nft_fib4_eval(const struct nft_expr *expr, struct 
nft_regs *regs,
   const struct nft_pktinfo *pkt)
 {
const struct nft_fib *priv = nft_expr_priv(expr);
+   int noff = skb_network_offset(pkt->skb);
u32 *dest = >data[priv->dreg];
-   const struct iphdr *iph;
+   struct iphdr *iph, _iph;
struct fib_result res;
struct flowi4 fl4 = {
.flowi4_scope = RT_SCOPE_UNIVERSE,
@@ -95,7 +102,12 @@ void nft_fib4_eval(const struct nft_expr *expr, struct 
nft_regs *regs,
return;
}
 
-   iph = ip_hdr(pkt->skb);
+   iph = skb_header_pointer(pkt->skb, noff, sizeof(_iph), &_iph);
+   if (!iph) {
+   regs->verdict.code = NFT_BREAK;
+   return;
+   }
+
if (ipv4_is_zeronet(iph->saddr)) {
if (ipv4_is_lbcast(iph->daddr) ||
ipv4_is_local_multicast(iph->daddr)) {
diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c 
b/net/ipv6/netfilter/nft_fib_ipv6.c
index 43f91d9b086c..54b5899543ef 100644
--- a/net/ipv6/netfilter/nft_fib_ipv6.c
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c
@@ -25,9 +25,9 @@ static int get_ifindex(const struct net_device *dev)
 
 static int nft_fib6_flowi_init(struct flowi6 *fl6, const struct nft_fib *priv,
   const struct nft_pktinfo *pkt,
-  const struct net_device *dev)
+  const struct net_device *dev,
+  struct ipv6hdr *iph)
 {
-   const struct ipv6hdr *iph = ipv6_hdr(pkt->skb);
int lookup_flags = 0;
 
if (priv->flags & NFTA_FIB_F_DADDR) {
@@ -55,7 +55,8 @@ static int nft_fib6_flowi_init(struct flowi6 *fl6, const 
struct nft_fib *priv,
 }
 
 static u32 __nft_fib6_eval_type(const struct nft_fib *priv,
-   const struct nft_pktinfo *pkt)
+   const struct nft_pktinfo *pkt,
+   struct ipv6hdr *iph)
 {
const struct net_device *dev = NULL;
const struct nf_ipv6_ops *v6ops;
@@ -77,7 +78,7 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv,
else if (priv->flags & NFTA_FIB_F_OIF)
dev = nft_out(pkt);
 
-   nft_fib6_flowi_init(, priv, pkt, dev);
+   nft_fib6_flowi_init(, priv, pkt, dev, iph);
 
v6ops = nf_get_ipv6_ops();
if (dev && v6ops && v6ops->chk_addr(nft_net(pkt), , dev, 
true))
@@ -131,9 +132,17 @@ void nft_fib6_eval_type(const struct nft_expr *expr, 
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
 {
const struct nft_fib *priv = nft_expr_priv(expr);
+   int noff = skb_network_offset(pkt->skb);
u32 *dest = >data[priv->dreg];
+   struct ipv6hdr *iph, _iph;
 
-   *dest = __nft_fib6_eval_type(priv, pkt);
+   iph = skb_header_pointer(pkt->skb, noff, sizeof(_iph), &_iph);
+   if (!iph) {
+   regs->verdict.code = NFT_BREAK;
+   return;
+   }
+
+   *dest = __nft_fib6_eval_type(priv, pkt, iph);
 }
 EXPORT_SYMBOL_GPL(nft_fib6_eval_type);
 
@@ -141,8 +150,10 @@ void 

[PATCH 07/47] netfilter: conntrack: Change to deferable work queue

2017-09-03 Thread Pablo Neira Ayuso
From: "subas...@codeaurora.org" 

Delayed workqueue causes wakeups to idle CPUs. This was
causing a power impact for devices. Use deferable work
queue instead so that gc_worker runs when CPU is active only.

Signed-off-by: Subash Abhinov Kasiviswanathan 
Signed-off-by: Pablo Neira Ayuso 
---
 net/netfilter/nf_conntrack_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/nf_conntrack_core.c 
b/net/netfilter/nf_conntrack_core.c
index 69746928cc0a..c6f1cf0bff56 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1081,7 +1081,7 @@ static void gc_worker(struct work_struct *work)
 
 static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work)
 {
-   INIT_DELAYED_WORK(_work->dwork, gc_worker);
+   INIT_DEFERRABLE_WORK(_work->dwork, gc_worker);
gc_work->next_gc_run = HZ;
gc_work->exiting = false;
 }
-- 
2.1.4


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 03/47] netfilter: Remove duplicated rcu_read_lock.

2017-09-03 Thread Pablo Neira Ayuso
From: Taehee Yoo 

This patch removes duplicate rcu_read_lock().

1. IPVS part:

According to Julian Anastasov's mention, contexts of ipvs are described
at: http://marc.info/?l=netfilter-devel=149562884514072=2, in summary:

 - packet RX/TX: does not need locks because packets come from hooks.
 - sync msg RX: backup server uses RCU locks while registering new
   connections.
 - ip_vs_ctl.c: configuration get/set, RCU locks needed.
 - xt_ipvs.c: It is a netfilter match, running from hook context.

As result, rcu_read_lock and rcu_read_unlock can be removed from:

 - ip_vs_core.c: all
 - ip_vs_ctl.c:
   - only from ip_vs_has_real_service
 - ip_vs_ftp.c: all
 - ip_vs_proto_sctp.c: all
 - ip_vs_proto_tcp.c: all
 - ip_vs_proto_udp.c: all
 - ip_vs_xmit.c: all (contains only packet processing)

2. Netfilter part:

There are three types of functions that are guaranteed the rcu_read_lock().
First, as result, functions are only called by nf_hook():

 - nf_conntrack_broadcast_help(), pptp_expectfn(), set_expected_rtp_rtcp().
 - tcpmss_reverse_mtu(), tproxy_laddr4(), tproxy_laddr6().
 - match_lookup_rt6(), check_hlist(), hashlimit_mt_common().
 - xt_osf_match_packet().

Second, functions that caller already held the rcu_read_lock().
 - destroy_conntrack(), ctnetlink_conntrack_event().
 - ctnl_timeout_find_get(), nfqnl_nf_hook_drop().

Third, functions that are mixed with type1 and type2.

These functions are called by nf_hook() also these are called by
ordinary functions that already held the rcu_read_lock():

 - __ctnetlink_glue_build(), ctnetlink_expect_event().
 - ctnetlink_proto_size().

Applied files are below:

- nf_conntrack_broadcast.c, nf_conntrack_core.c, nf_conntrack_netlink.c.
- nf_conntrack_pptp.c, nf_conntrack_sip.c, nfnetlink_cttimeout.c.
- nfnetlink_queue.c, xt_TCPMSS.c, xt_TPROXY.c, xt_addrtype.c.
- xt_connlimit.c, xt_hashlimit.c, xt_osf.c

Detailed calltrace can be found at:
http://marc.info/?l=netfilter-devel=149667610710350=2

Signed-off-by: Taehee Yoo 
Acked-by: Julian Anastasov 
Signed-off-by: Pablo Neira Ayuso 
---
 net/netfilter/ipvs/ip_vs_core.c|  8 --
 net/netfilter/ipvs/ip_vs_ctl.c |  3 ---
 net/netfilter/ipvs/ip_vs_ftp.c |  2 --
 net/netfilter/ipvs/ip_vs_proto_sctp.c  | 11 ++--
 net/netfilter/ipvs/ip_vs_proto_tcp.c   | 10 +---
 net/netfilter/ipvs/ip_vs_proto_udp.c   | 10 +---
 net/netfilter/ipvs/ip_vs_xmit.c| 46 +++---
 net/netfilter/nf_conntrack_broadcast.c |  2 --
 net/netfilter/nf_conntrack_core.c  |  3 ---
 net/netfilter/nf_conntrack_netlink.c   | 12 -
 net/netfilter/nf_conntrack_pptp.c  |  2 --
 net/netfilter/nf_conntrack_sip.c   |  6 +
 net/netfilter/nfnetlink_cttimeout.c|  2 --
 net/netfilter/nfnetlink_queue.c|  2 --
 net/netfilter/xt_TCPMSS.c  |  2 --
 net/netfilter/xt_TPROXY.c  |  4 ---
 net/netfilter/xt_addrtype.c|  3 ---
 net/netfilter/xt_connlimit.c   |  3 ---
 net/netfilter/xt_hashlimit.c   |  8 +++---
 net/netfilter/xt_osf.c |  2 --
 20 files changed, 13 insertions(+), 128 deletions(-)

diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index e31956b58aba..2ff9d9070c95 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -125,14 +125,12 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
s->cnt.inbytes += skb->len;
u64_stats_update_end(>syncp);
 
-   rcu_read_lock();
svc = rcu_dereference(dest->svc);
s = this_cpu_ptr(svc->stats.cpustats);
u64_stats_update_begin(>syncp);
s->cnt.inpkts++;
s->cnt.inbytes += skb->len;
u64_stats_update_end(>syncp);
-   rcu_read_unlock();
 
s = this_cpu_ptr(ipvs->tot_stats.cpustats);
u64_stats_update_begin(>syncp);
@@ -159,14 +157,12 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff 
*skb)
s->cnt.outbytes += skb->len;
u64_stats_update_end(>syncp);
 
-   rcu_read_lock();
svc = rcu_dereference(dest->svc);
s = this_cpu_ptr(svc->stats.cpustats);
u64_stats_update_begin(>syncp);
s->cnt.outpkts++;
s->cnt.outbytes += skb->len;
u64_stats_update_end(>syncp);
-   rcu_read_unlock();
 
s = this_cpu_ptr(ipvs->tot_stats.cpustats);
u64_stats_update_begin(>syncp);
@@ -1222,7 +1218,6 @@ static struct ip_vs_conn *__ip_vs_rs_conn_out(unsigned 
int hooknum,
if (!pptr)
return NULL;
 
-   rcu_read_lock();
dest = ip_vs_find_real_service(ipvs, af, iph->protocol,
   >saddr, pptr[0]);
if (dest) {
@@ -1237,7 

[PATCH 04/47] netfilter: nf_tables: Attach process info to NFT_MSG_NEWGEN notifications

2017-09-03 Thread Pablo Neira Ayuso
From: Phil Sutter 

This is helpful for 'nft monitor' to track which process caused a given
change to the ruleset.

Signed-off-by: Phil Sutter 
Signed-off-by: Pablo Neira Ayuso 
---
 include/uapi/linux/netfilter/nf_tables.h | 2 ++
 net/netfilter/nf_tables_api.c| 5 -
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/netfilter/nf_tables.h 
b/include/uapi/linux/netfilter/nf_tables.h
index 683f6f88fcac..6f0a950e21c3 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -1221,6 +1221,8 @@ enum nft_objref_attributes {
 enum nft_gen_attributes {
NFTA_GEN_UNSPEC,
NFTA_GEN_ID,
+   NFTA_GEN_PROC_PID,
+   NFTA_GEN_PROC_NAME,
__NFTA_GEN_MAX
 };
 #define NFTA_GEN_MAX   (__NFTA_GEN_MAX - 1)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 7fbf0070aba1..b77ad0813564 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -4657,6 +4657,7 @@ static int nf_tables_fill_gen_info(struct sk_buff *skb, 
struct net *net,
 {
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
+   char buf[TASK_COMM_LEN];
int event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_NEWGEN);
 
nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), 0);
@@ -4668,7 +4669,9 @@ static int nf_tables_fill_gen_info(struct sk_buff *skb, 
struct net *net,
nfmsg->version  = NFNETLINK_V0;
nfmsg->res_id   = htons(net->nft.base_seq & 0x);
 
-   if (nla_put_be32(skb, NFTA_GEN_ID, htonl(net->nft.base_seq)))
+   if (nla_put_be32(skb, NFTA_GEN_ID, htonl(net->nft.base_seq)) ||
+   nla_put_be32(skb, NFTA_GEN_PROC_PID, htonl(task_pid_nr(current))) ||
+   nla_put_string(skb, NFTA_GEN_PROC_NAME, get_task_comm(buf, 
current)))
goto nla_put_failure;
 
nlmsg_end(skb, nlh);
-- 
2.1.4


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 00/47] Netfilter updates for net-next

2017-09-03 Thread Pablo Neira Ayuso
Hi David,

The following patchset contains Netfilter updates for your net-next
tree. Basically, updates to the conntrack core, enhancements for
nf_tables, conversion of netfilter hooks from linked list to array to
improve memory locality and asorted improvements for the Netfilter
codebase. More specifically, they are:

1) Add expection to hashes after timer initialization to prevent
   access from another CPU that walks on the hashes and calls
   del_timer(), from Florian Westphal.

2) Don't update nf_tables chain counters from hot path, this is only
   used by the x_tables compatibility layer.

3) Get rid of nested rcu_read_lock() calls from netfilter hook path.
   Hooks are always guaranteed to run from rcu read side, so remove
   nested rcu_read_lock() where possible. Patch from Taehee Yoo.

4) nf_tables new ruleset generation notifications include PID and name
   of the process that has updated the ruleset, from Phil Sutter.

5) Use skb_header_pointer() from nft_fib, so we can reuse this code from
   the nf_family netdev family. Patch from Pablo M. Bermudo.

6) Add support for nft_fib in nf_tables netdev family, also from Pablo.

7) Use deferrable workqueue for conntrack garbage collection, to reduce
   power consumption, from Patch from Subash Abhinov Kasiviswanathan.

8) Add nf_ct_expect_iterate_net() helper and use it. From Florian
   Westphal.

9) Call nf_ct_unconfirmed_destroy only from cttimeout, from Florian.

10) Drop references on conntrack removal path when skbuffs has escaped via
nfqueue, from Florian.

11) Don't queue packets to nfqueue with dying conntrack, from Florian.

12) Constify nf_hook_ops structure, from Florian.

13) Remove neededlessly branch in nf_tables trace code, from Phil Sutter.

14) Add nla_strdup(), from Phil Sutter.

15) Rise nf_tables objects name size up to 255 chars, people want to use
DNS names, so increase this according to what RFC 1035 specifies.
Patch series from Phil Sutter.

16) Kill nf_conntrack_default_on, it's broken. Default on conntrack hook
registration on demand, suggested by Eric Dumazet, patch from Florian.

17) Remove unused variables in compat_copy_entry_from_user both in
ip_tables and arp_tables code. Patch from Taehee Yoo.

18) Constify struct nf_conntrack_l4proto, from Julia Lawall.

19) Constify nf_loginfo structure, also from Julia.

20) Use a single rb root in connlimit, from Taehee Yoo.

21) Remove unused netfilter_queue_init() prototype, from Taehee Yoo.

22) Use audit_log() instead of open-coding it, from Geliang Tang.

23) Allow to mangle tcp options via nft_exthdr, from Florian.

24) Allow to fetch TCP MSS from nft_rt, from Florian. This includes
a fix for a miscalculation of the minimal length.

25) Simplify branch logic in h323 helper, from Nick Desaulniers.

26) Calculate netlink attribute size for conntrack tuple at compile
time, from Florian.

27) Remove protocol name field from nf_conntrack_{l3,l4}proto structure.
From Florian.

28) Remove holes in nf_conntrack_l4proto structure, so it becomes
smaller. From Florian.

29) Get rid of print_tuple() indirection for /proc conntrack listing.
Place all the code in net/netfilter/nf_conntrack_standalone.c.
Patch from Florian.

30) Do not built in print_conntrack() if CONFIG_NF_CONNTRACK_PROCFS is
off. From Florian.

31) Constify most nf_conntrack_{l3,l4}proto helper functions, from
Florian.

32) Fix broken indentation in ebtables extensions, from Colin Ian King.

33) Fix several harmless sparse warning, from Florian.

34) Convert netfilter hook infrastructure to use array for better memory
locality, joint work done by Florian and Aaron Conole. Moreover, add
some instrumentation to debug this.

35) Batch nf_unregister_net_hooks() calls, to call synchronize_net once
per batch, from Florian.

36) Get rid of noisy logging in ICMPv6 conntrack helper, from Florian.

37) Get rid of obsolete NFDEBUG() instrumentation, from Varsha Rao.

38) Remove unused code in the generic protocol tracker, from Davide
Caratti.

I think I will have material for a second Netfilter batch in my queue if
time allow to make it fit in this merge window.

You can pull these changes from:

  git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next.git

Thanks a lot!



The following changes since commit 7a68ada6ec7d88c68057d3a4c2a517eb94289976:

  Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net (2017-07-21 
03:38:43 +0100)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next.git HEAD

for you to fetch changes up to 1aff64715edb8565e99337b842d814d636641b50:

  netfilter: rt: account for tcp header size too (2017-08-28 18:14:30 +0200)


Aaron Conole (1):
  netfilter: convert hook list to an array

Colin Ian King (1):
  netfilter: ebtables: fix indent on if 

[PATCH 09/47] netfilter: add and use nf_ct_unconfirmed_destroy

2017-09-03 Thread Pablo Neira Ayuso
From: Florian Westphal 

This also removes __nf_ct_unconfirmed_destroy() call from
nf_ct_iterate_cleanup_net, so that function can be used only
when missing conntracks from unconfirmed list isn't a problem.

Signed-off-by: Florian Westphal 
Signed-off-by: Pablo Neira Ayuso 
---
 include/net/netfilter/nf_conntrack.h |  3 +++
 net/netfilter/nf_conntrack_core.c| 15 +++
 net/netfilter/nfnetlink_cttimeout.c  |  1 +
 3 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack.h 
b/include/net/netfilter/nf_conntrack.h
index 48407569585d..6e6f678aaac7 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -224,6 +224,9 @@ extern s32 (*nf_ct_nat_offset)(const struct nf_conn *ct,
   enum ip_conntrack_dir dir,
   u32 seq);
 
+/* Set all unconfirmed conntrack as dying */
+void nf_ct_unconfirmed_destroy(struct net *);
+
 /* Iterate over all conntracks: if iter returns true, it's deleted. */
 void nf_ct_iterate_cleanup_net(struct net *net,
   int (*iter)(struct nf_conn *i, void *data),
diff --git a/net/netfilter/nf_conntrack_core.c 
b/net/netfilter/nf_conntrack_core.c
index c6f1cf0bff56..80ab4e937765 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1686,6 +1686,17 @@ __nf_ct_unconfirmed_destroy(struct net *net)
}
 }
 
+void nf_ct_unconfirmed_destroy(struct net *net)
+{
+   might_sleep();
+
+   if (atomic_read(>ct.count) > 0) {
+   __nf_ct_unconfirmed_destroy(net);
+   synchronize_net();
+   }
+}
+EXPORT_SYMBOL_GPL(nf_ct_unconfirmed_destroy);
+
 void nf_ct_iterate_cleanup_net(struct net *net,
   int (*iter)(struct nf_conn *i, void *data),
   void *data, u32 portid, int report)
@@ -1697,14 +1708,10 @@ void nf_ct_iterate_cleanup_net(struct net *net,
if (atomic_read(>ct.count) == 0)
return;
 
-   __nf_ct_unconfirmed_destroy(net);
-
d.iter = iter;
d.data = data;
d.net = net;
 
-   synchronize_net();
-
nf_ct_iterate_cleanup(iter_net_only, , portid, report);
 }
 EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup_net);
diff --git a/net/netfilter/nfnetlink_cttimeout.c 
b/net/netfilter/nfnetlink_cttimeout.c
index 7ce9e86d374c..f4fb6d4dd0b9 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -570,6 +570,7 @@ static void __net_exit cttimeout_net_exit(struct net *net)
 {
struct ctnl_timeout *cur, *tmp;
 
+   nf_ct_unconfirmed_destroy(net);
ctnl_untimeout(net, NULL);
 
list_for_each_entry_safe(cur, tmp, >nfct_timeout_list, head) {
-- 
2.1.4


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 01/47] netfilter: expect: add to hash table after expect init

2017-09-03 Thread Pablo Neira Ayuso
From: Florian Westphal 

assuming we have lockless readers we should make sure they can only
see expectations that have already been initialized.

hlist_add_head_rcu acts as memory barrier, move it after timer setup.

Theoretically we could crash due to a del_timer() on other cpu
seeing garbage data.

Signed-off-by: Florian Westphal 
Signed-off-by: Pablo Neira Ayuso 
---
 net/netfilter/nf_conntrack_expect.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/net/netfilter/nf_conntrack_expect.c 
b/net/netfilter/nf_conntrack_expect.c
index 899c2c36da13..2c63808bea96 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -368,12 +368,6 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect 
*exp)
/* two references : one for hash insert, one for the timer */
refcount_add(2, >use);
 
-   hlist_add_head_rcu(>lnode, _help->expectations);
-   master_help->expecting[exp->class]++;
-
-   hlist_add_head_rcu(>hnode, _ct_expect_hash[h]);
-   net->ct.expect_count++;
-
setup_timer(>timeout, nf_ct_expectation_timed_out,
(unsigned long)exp);
helper = rcu_dereference_protected(master_help->helper,
@@ -384,6 +378,12 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect 
*exp)
}
add_timer(>timeout);
 
+   hlist_add_head_rcu(>lnode, _help->expectations);
+   master_help->expecting[exp->class]++;
+
+   hlist_add_head_rcu(>hnode, _ct_expect_hash[h]);
+   net->ct.expect_count++;
+
NF_CT_STAT_INC(net, expect_create);
 }
 
-- 
2.1.4


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [nft PATCH 1/2] src: add flags fo nft_ctx_new

2017-09-03 Thread Pablo Neira Ayuso
On Mon, Sep 04, 2017 at 12:03:55AM +0200, Eric Leblond wrote:
> By adding flags to nft_ctx_new, we will have a minimum capabilities
> of changing the way the nft_ctx is created.
> 
> For now, this patch uses a simple value that allow the user to specify
> that he will handle netlink by himself.
> 
> Signed-off-by: Eric Leblond 
> ---
>  include/nftables.h |  4 
>  src/main.c | 20 +++-
>  2 files changed, 15 insertions(+), 9 deletions(-)
> 
> diff --git a/include/nftables.h b/include/nftables.h
> index 5035567..7c4e93f 100644
> --- a/include/nftables.h
> +++ b/include/nftables.h
> @@ -49,8 +49,12 @@ struct nft_ctx {
>   struct output_ctx   output;
>   boolcheck;
>   struct nft_cachecache;
> + uint32_tflags;
>  };
>  
> +#define NFT_CTX_CUSTOM_NETLINK   (1<<0)
> +#define NFT_CTX_DEFAULT  0

Better than flag, let's make this a type, ie. NFT_CTX_NETLINK is just 1.

OK?
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [nft PATCH 2/2] src: get rid of printf

2017-09-03 Thread Pablo Neira Ayuso
On Mon, Sep 04, 2017 at 12:03:56AM +0200, Eric Leblond wrote:
> This patch introduces the nft_print_to_output_ctx function that has
> to be used instead of printf to output information that where
> previously send to stdout. This function accumulate the output in
> a buffer that can be fetched by the user with the nft_ctx_get_output()
> function.
> 
> This modification will allow the libnftables library to provide an
> easy way to the users to get the output data and display them like
> they want.
> 
> Signed-off-by: Eric Leblond 
> ---
>  include/datatype.h   |   5 +-
>  include/expression.h |   2 +-
>  include/nftables.h   |   5 ++
>  src/cli.c|   1 +
>  src/ct.c |  21 ++---
>  src/datatype.c   |  66 ---
>  src/expression.c |  79 ++
>  src/exthdr.c |  16 ++--
>  src/fib.c|  23 +++---
>  src/hash.c   |  10 +--
>  src/main.c   |  45 ++
>  src/meta.c   |  32 +---
>  src/numgen.c |   8 +-
>  src/payload.c|   9 +-
>  src/rule.c   | 228 
> +--
>  src/statement.c  | 145 +---
>  16 files changed, 408 insertions(+), 287 deletions(-)
> 
> diff --git a/include/datatype.h b/include/datatype.h
> index 2e34591..e9f6079 100644
> --- a/include/datatype.h
> +++ b/include/datatype.h
> @@ -209,7 +209,8 @@ extern void symbolic_constant_print(const struct 
> symbol_table *tbl,
>   struct output_ctx *octx);
>  extern void symbol_table_print(const struct symbol_table *tbl,
>  const struct datatype *dtype,
> -enum byteorder byteorder);
> +enum byteorder byteorder,
> +struct output_ctx *octx);
>  
>  extern struct symbol_table *rt_symbol_table_init(const char *filename);
>  extern void rt_symbol_table_free(struct symbol_table *tbl);
> @@ -261,7 +262,7 @@ extern const struct datatype *
>  set_datatype_alloc(const struct datatype *orig_dtype, unsigned int 
> byteorder);
>  extern void set_datatype_destroy(const struct datatype *dtype);
>  
> -extern void time_print(uint64_t seconds);
> +extern void time_print(uint64_t seconds, struct output_ctx *octx);
>  extern struct error_record *time_parse(const struct location *loc,
>  const char *c, uint64_t *res);
>  
> diff --git a/include/expression.h b/include/expression.h
> index 32d4423..ce6b702 100644
> --- a/include/expression.h
> +++ b/include/expression.h
> @@ -334,7 +334,7 @@ extern struct expr *expr_get(struct expr *expr);
>  extern void expr_free(struct expr *expr);
>  extern void expr_print(const struct expr *expr, struct output_ctx *octx);
>  extern bool expr_cmp(const struct expr *e1, const struct expr *e2);
> -extern void expr_describe(const struct expr *expr);
> +extern void expr_describe(const struct expr *expr, struct output_ctx *octx);
>  
>  extern const struct datatype *expr_basetype(const struct expr *expr);
>  extern void expr_set_type(struct expr *expr, const struct datatype *dtype,
> diff --git a/include/nftables.h b/include/nftables.h
> index 7c4e93f..f4d5ce1 100644
> --- a/include/nftables.h
> +++ b/include/nftables.h
> @@ -30,6 +30,8 @@ struct output_ctx {
>   unsigned int ip2name;
>   unsigned int handle;
>   unsigned int echo;
> + char *output_buf;
> + size_t output_buf_len;
>  };
>  
>  struct nft_cache {
> @@ -149,4 +151,7 @@ void realm_table_meta_exit(void);
>  void devgroup_table_exit(void);
>  void realm_table_rt_exit(void);
>  
> +int nft_print_to_output_ctx(struct output_ctx *octx, const char *fmt, ...);
> +char *nft_ctx_get_output(struct nft_ctx *ctx);
> +
>  #endif /* NFTABLES_NFTABLES_H */
> diff --git a/src/cli.c b/src/cli.c
> index d923ff7..ca4418c 100644
> --- a/src/cli.c
> +++ b/src/cli.c
> @@ -138,6 +138,7 @@ static void cli_complete(char *line)
>   cli_nft->debug_mask);
>   scanner_push_buffer(scanner, _cli, line);
>   nft_run(cli_nft, cli_nf_sock, scanner, state, );
> + printf("%s", nft_ctx_get_output(cli_nft));
>   erec_print_list(stdout, , cli_nft->debug_mask);
>   xfree(line);
>   cache_release(_nft->cache);
> diff --git a/src/ct.c b/src/ct.c
> index d64f467..f19608a 100644
> --- a/src/ct.c
> +++ b/src/ct.c
> @@ -141,11 +141,12 @@ static void ct_label_type_print(const struct expr *expr,
>   for (s = ct_label_tbl->symbols; s->identifier != NULL; s++) {
>   if (bit != s->value)
>   continue;
> - printf("\"%s\"", s->identifier);
> + nft_print_to_output_ctx(octx, "\"%s\"", s->identifier);
>   return;
>   }
>   /* can happen when connlabel.conf is altered after rules were added */
> - printf("%ld\n", (long)mpz_scan1(expr->value, 0));
> + nft_print_to_output_ctx(octx, "%ld\n",

Let's call 

[PATCH 37/47] netfilter: conntrack: print_conntrack only needed if CONFIG_NF_CONNTRACK_PROCFS

2017-09-03 Thread Pablo Neira Ayuso
From: Florian Westphal 

Signed-off-by: Florian Westphal 
Signed-off-by: Pablo Neira Ayuso 
---
 include/net/netfilter/nf_conntrack_l4proto.h | 7 ---
 net/netfilter/nf_conntrack_proto_dccp.c  | 6 ++
 net/netfilter/nf_conntrack_proto_gre.c   | 4 
 net/netfilter/nf_conntrack_proto_sctp.c  | 6 ++
 net/netfilter/nf_conntrack_proto_tcp.c   | 6 ++
 5 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack_l4proto.h 
b/include/net/netfilter/nf_conntrack_l4proto.h
index 7e8da04a5eb6..4976ef92dc78 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -61,9 +61,6 @@ struct nf_conntrack_l4proto {
/* called by gc worker if table is full */
bool (*can_early_drop)(const struct nf_conn *ct);
 
-   /* Print out the private part of the conntrack. */
-   void (*print_conntrack)(struct seq_file *s, struct nf_conn *);
-
/* Return the array of timeouts for this protocol. */
unsigned int *(*get_timeouts)(struct net *net);
 
@@ -97,6 +94,10 @@ struct nf_conntrack_l4proto {
const struct nla_policy *nla_policy;
} ctnl_timeout;
 #endif
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
+   /* Print out the private part of the conntrack. */
+   void (*print_conntrack)(struct seq_file *s, struct nf_conn *);
+#endif
unsigned int*net_id;
/* Init l4proto pernet data */
int (*init_net)(struct net *net, u_int16_t proto);
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c 
b/net/netfilter/nf_conntrack_proto_dccp.c
index d2df49ac390a..188347571fc7 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -623,10 +623,12 @@ static bool dccp_can_early_drop(const struct nf_conn *ct)
return false;
 }
 
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
 static void dccp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
 {
seq_printf(s, "%s ", dccp_state_names[ct->proto.dccp.state]);
 }
+#endif
 
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 static int dccp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
@@ -879,7 +881,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 
__read_mostly = {
.get_timeouts   = dccp_get_timeouts,
.error  = dccp_error,
.can_early_drop = dccp_can_early_drop,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack= dccp_print_conntrack,
+#endif
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.to_nlattr  = dccp_to_nlattr,
.nlattr_size= dccp_nlattr_size,
@@ -913,7 +917,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 
__read_mostly = {
.get_timeouts   = dccp_get_timeouts,
.error  = dccp_error,
.can_early_drop = dccp_can_early_drop,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack= dccp_print_conntrack,
+#endif
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.to_nlattr  = dccp_to_nlattr,
.nlattr_size= dccp_nlattr_size,
diff --git a/net/netfilter/nf_conntrack_proto_gre.c 
b/net/netfilter/nf_conntrack_proto_gre.c
index cd28095dd7a4..c0e3a23ac23a 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -224,6 +224,7 @@ static bool gre_pkt_to_tuple(const struct sk_buff *skb, 
unsigned int dataoff,
return true;
 }
 
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
 /* print private data for conntrack */
 static void gre_print_conntrack(struct seq_file *s, struct nf_conn *ct)
 {
@@ -231,6 +232,7 @@ static void gre_print_conntrack(struct seq_file *s, struct 
nf_conn *ct)
   (ct->proto.gre.timeout / HZ),
   (ct->proto.gre.stream_timeout / HZ));
 }
+#endif
 
 static unsigned int *gre_get_timeouts(struct net *net)
 {
@@ -357,7 +359,9 @@ static struct nf_conntrack_l4proto 
nf_conntrack_l4proto_gre4 __read_mostly = {
.l4proto = IPPROTO_GRE,
.pkt_to_tuple= gre_pkt_to_tuple,
.invert_tuple= gre_invert_tuple,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = gre_print_conntrack,
+#endif
.get_timeouts= gre_get_timeouts,
.packet  = gre_packet,
.new = gre_new,
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c 
b/net/netfilter/nf_conntrack_proto_sctp.c
index da83b401be17..890b5c73368d 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -174,11 +174,13 @@ static bool sctp_invert_tuple(struct nf_conntrack_tuple 
*tuple,
return true;
 }
 
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
 /* Print out the private part of the conntrack. */
 static void sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
 {
seq_printf(s, "%s ", sctp_conntrack_names[ct->proto.sctp.state]);
 }
+#endif
 
 

[PATCH 34/47] netfilter: conntrack: remove protocol name from l4proto struct

2017-09-03 Thread Pablo Neira Ayuso
From: Florian Westphal 

no need to waste storage for something that is only needed
in one place and can be deduced from protocol number.

Signed-off-by: Florian Westphal 
Signed-off-by: Pablo Neira Ayuso 
---
 include/net/netfilter/nf_conntrack_l4proto.h   |  3 ---
 net/ipv4/netfilter/nf_conntrack_proto_icmp.c   |  1 -
 net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c |  1 -
 net/netfilter/nf_conntrack_proto.c |  8 
 net/netfilter/nf_conntrack_proto_dccp.c|  2 --
 net/netfilter/nf_conntrack_proto_generic.c |  1 -
 net/netfilter/nf_conntrack_proto_gre.c |  1 -
 net/netfilter/nf_conntrack_proto_sctp.c|  2 --
 net/netfilter/nf_conntrack_proto_tcp.c |  2 --
 net/netfilter/nf_conntrack_proto_udp.c |  4 
 net/netfilter/nf_conntrack_standalone.c| 17 -
 11 files changed, 20 insertions(+), 22 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack_l4proto.h 
b/include/net/netfilter/nf_conntrack_l4proto.h
index b6e27cafb1d9..47c16bae5e00 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -108,9 +108,6 @@ struct nf_conntrack_l4proto {
/* Return the per-net protocol part. */
struct nf_proto_net *(*get_net_proto)(struct net *net);
 
-   /* Protocol name */
-   const char *name;
-
/* Module (if any) which this is connected to. */
struct module *me;
 };
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c 
b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 73c591d8a9a8..fdbeb03e4600 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -362,7 +362,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp 
__read_mostly =
 {
.l3proto= PF_INET,
.l4proto= IPPROTO_ICMP,
-   .name   = "icmp",
.pkt_to_tuple   = icmp_pkt_to_tuple,
.invert_tuple   = icmp_invert_tuple,
.print_tuple= icmp_print_tuple,
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c 
b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index d5f028e33f65..805ab122767a 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -367,7 +367,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 
__read_mostly =
 {
.l3proto= PF_INET6,
.l4proto= IPPROTO_ICMPV6,
-   .name   = "icmpv6",
.pkt_to_tuple   = icmpv6_pkt_to_tuple,
.invert_tuple   = icmpv6_invert_tuple,
.print_tuple= icmpv6_print_tuple,
diff --git a/net/netfilter/nf_conntrack_proto.c 
b/net/netfilter/nf_conntrack_proto.c
index 85104a27cc89..0ecab7163d62 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -437,8 +437,8 @@ int nf_ct_l4proto_register(struct nf_conntrack_l4proto 
*l4proto[],
}
if (i != num_proto) {
ver = l4proto[i]->l3proto == PF_INET6 ? 6 : 4;
-   pr_err("nf_conntrack_ipv%d: can't register %s%d proto.\n",
-  ver, l4proto[i]->name, ver);
+   pr_err("nf_conntrack_ipv%d: can't register l4 %d proto.\n",
+  ver, l4proto[i]->l4proto);
nf_ct_l4proto_unregister(l4proto, i);
}
return ret;
@@ -458,8 +458,8 @@ int nf_ct_l4proto_pernet_register(struct net *net,
break;
}
if (i != num_proto) {
-   pr_err("nf_conntrack_%s%d: pernet registration failed\n",
-  l4proto[i]->name,
+   pr_err("nf_conntrack_proto_%d %d: pernet registration failed\n",
+  l4proto[i]->l4proto,
   l4proto[i]->l3proto == PF_INET6 ? 6 : 4);
nf_ct_l4proto_pernet_unregister(net, l4proto, i);
}
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c 
b/net/netfilter/nf_conntrack_proto_dccp.c
index 4707d997558a..a0492184a0a8 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -880,7 +880,6 @@ static struct nf_proto_net *dccp_get_net_proto(struct net 
*net)
 struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 __read_mostly = {
.l3proto= AF_INET,
.l4proto= IPPROTO_DCCP,
-   .name   = "dccp",
.pkt_to_tuple   = dccp_pkt_to_tuple,
.invert_tuple   = dccp_invert_tuple,
.new= dccp_new,
@@ -916,7 +915,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_dccp4);
 struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 __read_mostly = {
.l3proto= AF_INET6,
.l4proto= IPPROTO_DCCP,
-   .name   = 

[PATCH 33/47] netfilter: conntrack: remove protocol name from l3proto struct

2017-09-03 Thread Pablo Neira Ayuso
From: Florian Westphal 

no need to waste storage for something that is only needed
in one place and can be deduced from protocol number.

Signed-off-by: Florian Westphal 
Signed-off-by: Pablo Neira Ayuso 
---
 include/net/netfilter/nf_conntrack_l3proto.h   |  3 ---
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c |  1 -
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c |  1 -
 net/netfilter/nf_conntrack_l3proto_generic.c   |  1 -
 net/netfilter/nf_conntrack_standalone.c| 12 +++-
 5 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack_l3proto.h 
b/include/net/netfilter/nf_conntrack_l3proto.h
index 6a27ffea7480..e31861e4fa6a 100644
--- a/include/net/netfilter/nf_conntrack_l3proto.h
+++ b/include/net/netfilter/nf_conntrack_l3proto.h
@@ -23,9 +23,6 @@ struct nf_conntrack_l3proto {
/* size of tuple nlattr, fills a hole */
u16 nla_size;
 
-   /* Protocol name */
-   const char *name;
-
/*
 * Try to fill in the third arg: nhoff is offset of l3 proto
  * hdr.  Return true if possible.
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c 
b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 9fb8cb033d80..9f7ea862068c 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -353,7 +353,6 @@ static void ipv4_hooks_unregister(struct net *net)
 
 struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = {
.l3proto = PF_INET,
-   .name= "ipv4",
.pkt_to_tuple= ipv4_pkt_to_tuple,
.invert_tuple= ipv4_invert_tuple,
.print_tuple = ipv4_print_tuple,
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c 
b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 6b4d59fd0214..91d37fbe28de 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -348,7 +348,6 @@ static void ipv6_hooks_unregister(struct net *net)
 
 struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = {
.l3proto= PF_INET6,
-   .name   = "ipv6",
.pkt_to_tuple   = ipv6_pkt_to_tuple,
.invert_tuple   = ipv6_invert_tuple,
.print_tuple= ipv6_print_tuple,
diff --git a/net/netfilter/nf_conntrack_l3proto_generic.c 
b/net/netfilter/nf_conntrack_l3proto_generic.c
index cf9ace70bece..0387971582bc 100644
--- a/net/netfilter/nf_conntrack_l3proto_generic.c
+++ b/net/netfilter/nf_conntrack_l3proto_generic.c
@@ -64,7 +64,6 @@ static int generic_get_l4proto(const struct sk_buff *skb, 
unsigned int nhoff,
 
 struct nf_conntrack_l3proto nf_conntrack_l3proto_generic __read_mostly = {
.l3proto = PF_UNSPEC,
-   .name= "unknown",
.pkt_to_tuple= generic_pkt_to_tuple,
.invert_tuple= generic_invert_tuple,
.print_tuple = generic_print_tuple,
diff --git a/net/netfilter/nf_conntrack_standalone.c 
b/net/netfilter/nf_conntrack_standalone.c
index 5b6c675d55b1..359d7e6a4503 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -198,6 +198,16 @@ ct_show_delta_time(struct seq_file *s, const struct 
nf_conn *ct)
 }
 #endif
 
+static const char* l3proto_name(u16 proto)
+{
+   switch (proto) {
+   case AF_INET: return "ipv4";
+   case AF_INET6: return "ipv6";
+   }
+
+   return "unknown";
+}
+
 /* return 0 on success, 1 in case of error */
 static int ct_seq_show(struct seq_file *s, void *v)
 {
@@ -231,7 +241,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
 
ret = -ENOSPC;
seq_printf(s, "%-8s %u %-8s %u %ld ",
-  l3proto->name, nf_ct_l3num(ct),
+  l3proto_name(l3proto->l3proto), nf_ct_l3num(ct),
   l4proto->name, nf_ct_protonum(ct),
   nf_ct_expires(ct)  / HZ);
 
-- 
2.1.4


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 35/47] netfilter: conntrack: reduce size of l4protocol trackers

2017-09-03 Thread Pablo Neira Ayuso
From: Florian Westphal 

can use u16 for both, shrinks size by another 8 bytes.

Signed-off-by: Florian Westphal 
Signed-off-by: Pablo Neira Ayuso 
---
 include/net/netfilter/nf_conntrack_l4proto.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack_l4proto.h 
b/include/net/netfilter/nf_conntrack_l4proto.h
index 47c16bae5e00..15c58dd3f701 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -92,12 +92,12 @@ struct nf_conntrack_l4proto {
 
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
struct {
-   size_t obj_size;
int (*nlattr_to_obj)(struct nlattr *tb[],
 struct net *net, void *data);
int (*obj_to_nlattr)(struct sk_buff *skb, const void *data);
 
-   unsigned int nlattr_max;
+   u16 obj_size;
+   u16 nlattr_max;
const struct nla_policy *nla_policy;
} ctnl_timeout;
 #endif
-- 
2.1.4


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 29/47] netfilter: exthdr: tcp option set support

2017-09-03 Thread Pablo Neira Ayuso
From: Florian Westphal 

This allows setting 2 and 4 byte quantities in the tcp option space.
Main purpose is to allow native replacement for xt_TCPMSS to
work around pmtu blackholes.

Writes to kind and len are now allowed at the moment, it does not seem
useful to do this as it causes corruption of the tcp option space.

We can always lift this restriction later if a use-case appears.

Signed-off-by: Florian Westphal 
Signed-off-by: Pablo Neira Ayuso 
---
 include/uapi/linux/netfilter/nf_tables.h |   4 +-
 net/netfilter/nft_exthdr.c   | 164 ++-
 2 files changed, 165 insertions(+), 3 deletions(-)

diff --git a/include/uapi/linux/netfilter/nf_tables.h 
b/include/uapi/linux/netfilter/nf_tables.h
index be25cf69295b..40fd199f7531 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -732,7 +732,8 @@ enum nft_exthdr_op {
  * @NFTA_EXTHDR_OFFSET: extension header offset (NLA_U32)
  * @NFTA_EXTHDR_LEN: extension header length (NLA_U32)
  * @NFTA_EXTHDR_FLAGS: extension header flags (NLA_U32)
- * @NFTA_EXTHDR_OP: option match type (NLA_U8)
+ * @NFTA_EXTHDR_OP: option match type (NLA_U32)
+ * @NFTA_EXTHDR_SREG: option match type (NLA_U32)
  */
 enum nft_exthdr_attributes {
NFTA_EXTHDR_UNSPEC,
@@ -742,6 +743,7 @@ enum nft_exthdr_attributes {
NFTA_EXTHDR_LEN,
NFTA_EXTHDR_FLAGS,
NFTA_EXTHDR_OP,
+   NFTA_EXTHDR_SREG,
__NFTA_EXTHDR_MAX
 };
 #define NFTA_EXTHDR_MAX(__NFTA_EXTHDR_MAX - 1)
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index e3a6eebe7e0c..f5a0bf5e3bdd 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -8,6 +8,7 @@
  * Development of this code funded by Astaro AG (http://www.astaro.com/)
  */
 
+#include 
 #include 
 #include 
 #include 
@@ -23,6 +24,7 @@ struct nft_exthdr {
u8  len;
u8  op;
enum nft_registers  dreg:8;
+   enum nft_registers  sreg:8;
u8  flags;
 };
 
@@ -124,6 +126,88 @@ static void nft_exthdr_tcp_eval(const struct nft_expr 
*expr,
regs->verdict.code = NFT_BREAK;
 }
 
+static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr,
+   struct nft_regs *regs,
+   const struct nft_pktinfo *pkt)
+{
+   u8 buff[sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE];
+   struct nft_exthdr *priv = nft_expr_priv(expr);
+   unsigned int i, optl, tcphdr_len, offset;
+   struct tcphdr *tcph;
+   u8 *opt;
+   u32 src;
+
+   tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, _len);
+   if (!tcph)
+   return;
+
+   opt = (u8 *)tcph;
+   for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) {
+   union {
+   u8 octet;
+   __be16 v16;
+   __be32 v32;
+   } old, new;
+
+   optl = optlen(opt, i);
+
+   if (priv->type != opt[i])
+   continue;
+
+   if (i + optl > tcphdr_len || priv->len + priv->offset > optl)
+   return;
+
+   if (!skb_make_writable(pkt->skb, pkt->xt.thoff + i + priv->len))
+   return;
+
+   tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff,
+ _len);
+   if (!tcph)
+   return;
+
+   src = regs->data[priv->sreg];
+   offset = i + priv->offset;
+
+   switch (priv->len) {
+   case 2:
+   old.v16 = get_unaligned((u16 *)(opt + offset));
+   new.v16 = src;
+
+   switch (priv->type) {
+   case TCPOPT_MSS:
+   /* increase can cause connection to stall */
+   if (ntohs(old.v16) <= ntohs(new.v16))
+   return;
+   break;
+   }
+
+   if (old.v16 == new.v16)
+   return;
+
+   put_unaligned(new.v16, (u16*)(opt + offset));
+   inet_proto_csum_replace2(>check, pkt->skb,
+old.v16, new.v16, false);
+   break;
+   case 4:
+   new.v32 = src;
+   old.v32 = get_unaligned((u32 *)(opt + offset));
+
+   if (old.v32 == new.v32)
+   return;
+
+   put_unaligned(new.v32, (u32*)(opt + offset));
+   inet_proto_csum_replace4(>check, pkt->skb,
+old.v32, new.v32, false);
+

[PATCH 31/47] netfilter: nf_nat_h323: fix logical-not-parentheses warning

2017-09-03 Thread Pablo Neira Ayuso
From: Nick Desaulniers 

Clang produces the following warning:

net/ipv4/netfilter/nf_nat_h323.c:553:6: error:
logical not is only applied to the left hand side of this comparison
  [-Werror,-Wlogical-not-parentheses]
if (!set_h225_addr(skb, protoff, data, dataoff, taddr,
^
add parentheses after the '!' to evaluate the comparison first
add parentheses around left hand side expression to silence this warning

There's not necessarily a bug here, but it's cleaner to return early,
ex:

if (x)
  return
...

rather than:

if (x == 0)
  ...
else
  return

Also added a return code check that seemed to be missing in one
instance.

Signed-off-by: Nick Desaulniers 
Signed-off-by: Pablo Neira Ayuso 
---
 net/ipv4/netfilter/nf_nat_h323.c | 57 +---
 1 file changed, 30 insertions(+), 27 deletions(-)

diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index 574f7ebba0b6..ac8342dcb55e 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -252,16 +252,16 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct 
nf_conn *ct,
if (set_h245_addr(skb, protoff, data, dataoff, taddr,
  >tuplehash[!dir].tuple.dst.u3,
  htons((port & htons(1)) ? nated_port + 1 :
-   nated_port)) == 0) {
-   /* Save ports */
-   info->rtp_port[i][dir] = rtp_port;
-   info->rtp_port[i][!dir] = htons(nated_port);
-   } else {
+   nated_port))) {
nf_ct_unexpect_related(rtp_exp);
nf_ct_unexpect_related(rtcp_exp);
return -1;
}
 
+   /* Save ports */
+   info->rtp_port[i][dir] = rtp_port;
+   info->rtp_port[i][!dir] = htons(nated_port);
+
/* Success */
pr_debug("nf_nat_h323: expect RTP %pI4:%hu->%pI4:%hu\n",
 _exp->tuple.src.u3.ip,
@@ -370,15 +370,15 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn 
*ct,
/* Modify signal */
if (set_h225_addr(skb, protoff, data, dataoff, taddr,
  >tuplehash[!dir].tuple.dst.u3,
- htons(nated_port)) == 0) {
-   /* Save ports */
-   info->sig_port[dir] = port;
-   info->sig_port[!dir] = htons(nated_port);
-   } else {
+ htons(nated_port))) {
nf_ct_unexpect_related(exp);
return -1;
}
 
+   /* Save ports */
+   info->sig_port[dir] = port;
+   info->sig_port[!dir] = htons(nated_port);
+
pr_debug("nf_nat_q931: expect H.245 %pI4:%hu->%pI4:%hu\n",
 >tuple.src.u3.ip,
 ntohs(exp->tuple.src.u.tcp.port),
@@ -462,24 +462,27 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn 
*ct,
/* Modify signal */
if (set_h225_addr(skb, protoff, data, 0, [idx],
  >tuplehash[!dir].tuple.dst.u3,
- htons(nated_port)) == 0) {
-   /* Save ports */
-   info->sig_port[dir] = port;
-   info->sig_port[!dir] = htons(nated_port);
-
-   /* Fix for Gnomemeeting */
-   if (idx > 0 &&
-   get_h225_addr(ct, *data, [0], , ) &&
-   (ntohl(addr.ip) & 0xff00) == 0x7f00) {
-   set_h225_addr(skb, protoff, data, 0, [0],
- >tuplehash[!dir].tuple.dst.u3,
- info->sig_port[!dir]);
-   }
-   } else {
+ htons(nated_port))) {
nf_ct_unexpect_related(exp);
return -1;
}
 
+   /* Save ports */
+   info->sig_port[dir] = port;
+   info->sig_port[!dir] = htons(nated_port);
+
+   /* Fix for Gnomemeeting */
+   if (idx > 0 &&
+   get_h225_addr(ct, *data, [0], , ) &&
+   (ntohl(addr.ip) & 0xff00) == 0x7f00) {
+   if (set_h225_addr(skb, protoff, data, 0, [0],
+ >tuplehash[!dir].tuple.dst.u3,
+ info->sig_port[!dir])) {
+   nf_ct_unexpect_related(exp);
+   return -1;
+   }
+   }
+
/* Success */
pr_debug("nf_nat_ras: expect Q.931 %pI4:%hu->%pI4:%hu\n",
 >tuple.src.u3.ip,
@@ -550,9 +553,9 @@ static int nat_callforwarding(struct sk_buff *skb, struct 
nf_conn *ct,
}
 
/* Modify signal */
-   if (!set_h225_addr(skb, protoff, data, dataoff, taddr,
-  >tuplehash[!dir].tuple.dst.u3,
-  htons(nated_port)) == 0) {
+   if (set_h225_addr(skb, protoff, data, dataoff, taddr,
+ 

[PATCH 26/47] netfilter: use audit_log()

2017-09-03 Thread Pablo Neira Ayuso
From: Geliang Tang 

Use audit_log() instead of open-coding it.

Signed-off-by: Geliang Tang 
Signed-off-by: Pablo Neira Ayuso 
---
 net/bridge/netfilter/ebtables.c | 13 -
 net/netfilter/x_tables.c| 14 --
 2 files changed, 8 insertions(+), 19 deletions(-)

diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 9c6e619f452b..54c7ef4e970e 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1069,15 +1069,10 @@ static int do_replace_finish(struct net *net, struct 
ebt_replace *repl,
 
 #ifdef CONFIG_AUDIT
if (audit_enabled) {
-   struct audit_buffer *ab;
-
-   ab = audit_log_start(current->audit_context, GFP_KERNEL,
-AUDIT_NETFILTER_CFG);
-   if (ab) {
-   audit_log_format(ab, "table=%s family=%u entries=%u",
-repl->name, AF_BRIDGE, repl->nentries);
-   audit_log_end(ab);
-   }
+   audit_log(current->audit_context, GFP_KERNEL,
+ AUDIT_NETFILTER_CFG,
+ "table=%s family=%u entries=%u",
+ repl->name, AF_BRIDGE, repl->nentries);
}
 #endif
return ret;
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index e1648238a9c9..c83a3b5e1c6c 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1192,16 +1192,10 @@ xt_replace_table(struct xt_table *table,
 
 #ifdef CONFIG_AUDIT
if (audit_enabled) {
-   struct audit_buffer *ab;
-
-   ab = audit_log_start(current->audit_context, GFP_KERNEL,
-AUDIT_NETFILTER_CFG);
-   if (ab) {
-   audit_log_format(ab, "table=%s family=%u entries=%u",
-table->name, table->af,
-private->number);
-   audit_log_end(ab);
-   }
+   audit_log(current->audit_context, GFP_KERNEL,
+ AUDIT_NETFILTER_CFG,
+ "table=%s family=%u entries=%u",
+ table->name, table->af, private->number);
}
 #endif
 
-- 
2.1.4


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 24/47] netfilter: connlimit: merge root4 and root6.

2017-09-03 Thread Pablo Neira Ayuso
From: Taehee Yoo 

The root4 variable is used only when connlimit extension module has been
stored by the iptables command. and the roo6 variable is used only when
connlimit extension module has been stored by the ip6tables command.
So the root4 and roo6 variable does not be used at the same time.

Signed-off-by: Taehee Yoo 
Signed-off-by: Pablo Neira Ayuso 
---
 net/netfilter/xt_connlimit.c | 23 ---
 1 file changed, 8 insertions(+), 15 deletions(-)

diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 97589b8a2a40..ffa8eec980e9 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -58,8 +58,7 @@ struct xt_connlimit_rb {
 static spinlock_t xt_connlimit_locks[CONNLIMIT_LOCK_SLOTS] 
__cacheline_aligned_in_smp;
 
 struct xt_connlimit_data {
-   struct rb_root climit_root4[CONNLIMIT_SLOTS];
-   struct rb_root climit_root6[CONNLIMIT_SLOTS];
+   struct rb_root climit_root[CONNLIMIT_SLOTS];
 };
 
 static u_int32_t connlimit_rnd __read_mostly;
@@ -294,13 +293,11 @@ static int count_them(struct net *net,
int count;
u32 hash;
 
-   if (family == NFPROTO_IPV6) {
+   if (family == NFPROTO_IPV6)
hash = connlimit_iphash6(addr, mask);
-   root = >climit_root6[hash];
-   } else {
+   else
hash = connlimit_iphash(addr->ip & mask->ip);
-   root = >climit_root4[hash];
-   }
+   root = >climit_root[hash];
 
spin_lock_bh(_connlimit_locks[hash % CONNLIMIT_LOCK_SLOTS]);
 
@@ -379,10 +376,8 @@ static int connlimit_mt_check(const struct xt_mtchk_param 
*par)
return -ENOMEM;
}
 
-   for (i = 0; i < ARRAY_SIZE(info->data->climit_root4); ++i)
-   info->data->climit_root4[i] = RB_ROOT;
-   for (i = 0; i < ARRAY_SIZE(info->data->climit_root6); ++i)
-   info->data->climit_root6[i] = RB_ROOT;
+   for (i = 0; i < ARRAY_SIZE(info->data->climit_root); ++i)
+   info->data->climit_root[i] = RB_ROOT;
 
return 0;
 }
@@ -413,10 +408,8 @@ static void connlimit_mt_destroy(const struct 
xt_mtdtor_param *par)
 
nf_ct_netns_put(par->net, par->family);
 
-   for (i = 0; i < ARRAY_SIZE(info->data->climit_root4); ++i)
-   destroy_tree(>data->climit_root4[i]);
-   for (i = 0; i < ARRAY_SIZE(info->data->climit_root6); ++i)
-   destroy_tree(>data->climit_root6[i]);
+   for (i = 0; i < ARRAY_SIZE(info->data->climit_root); ++i)
+   destroy_tree(>data->climit_root[i]);
 
kfree(info->data);
 }
-- 
2.1.4


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 32/47] netfilter: conntrack: compute l3proto nla size at compile time

2017-09-03 Thread Pablo Neira Ayuso
From: Florian Westphal 

avoids a pointer and allows struct to be const later on.

Signed-off-by: Florian Westphal 
Signed-off-by: Pablo Neira Ayuso 
---
 include/net/netfilter/nf_conntrack_l3proto.h   | 19 ---
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 13 +++--
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 14 --
 net/netfilter/nf_conntrack_netlink.c   |  3 ++-
 net/netfilter/nf_conntrack_proto.c |  9 +++--
 5 files changed, 28 insertions(+), 30 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack_l3proto.h 
b/include/net/netfilter/nf_conntrack_l3proto.h
index 1b8de164d744..6a27ffea7480 100644
--- a/include/net/netfilter/nf_conntrack_l3proto.h
+++ b/include/net/netfilter/nf_conntrack_l3proto.h
@@ -20,6 +20,9 @@ struct nf_conntrack_l3proto {
/* L3 Protocol Family number. ex) PF_INET */
u_int16_t l3proto;
 
+   /* size of tuple nlattr, fills a hole */
+   u16 nla_size;
+
/* Protocol name */
const char *name;
 
@@ -49,23 +52,17 @@ struct nf_conntrack_l3proto {
int (*get_l4proto)(const struct sk_buff *skb, unsigned int nhoff,
   unsigned int *dataoff, u_int8_t *protonum);
 
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
int (*tuple_to_nlattr)(struct sk_buff *skb,
   const struct nf_conntrack_tuple *t);
-
-   /* Called when netns wants to use connection tracking */
-   int (*net_ns_get)(struct net *);
-   void (*net_ns_put)(struct net *);
-
-   /*
-* Calculate size of tuple nlattr
-*/
-   int (*nlattr_tuple_size)(void);
-
int (*nlattr_to_tuple)(struct nlattr *tb[],
   struct nf_conntrack_tuple *t);
const struct nla_policy *nla_policy;
+#endif
 
-   size_t nla_size;
+   /* Called when netns wants to use connection tracking */
+   int (*net_ns_get)(struct net *);
+   void (*net_ns_put)(struct net *);
 
/* Module (if any) which this is connected to. */
struct module *me;
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c 
b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index de5f0e6ddd1b..9fb8cb033d80 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -303,11 +303,6 @@ static int ipv4_nlattr_to_tuple(struct nlattr *tb[],
 
return 0;
 }
-
-static int ipv4_nlattr_tuple_size(void)
-{
-   return nla_policy_len(ipv4_nla_policy, CTA_IP_MAX + 1);
-}
 #endif
 
 static struct nf_sockopt_ops so_getorigdst = {
@@ -365,9 +360,10 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 
__read_mostly = {
.get_l4proto = ipv4_get_l4proto,
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.tuple_to_nlattr = ipv4_tuple_to_nlattr,
-   .nlattr_tuple_size = ipv4_nlattr_tuple_size,
.nlattr_to_tuple = ipv4_nlattr_to_tuple,
.nla_policy  = ipv4_nla_policy,
+   .nla_size= NLA_ALIGN(NLA_HDRLEN + sizeof(u32)) + /* 
CTA_IP_V4_SRC */
+  NLA_ALIGN(NLA_HDRLEN + sizeof(u32)),  /* 
CTA_IP_V4_DST */
 #endif
.net_ns_get  = ipv4_hooks_register,
.net_ns_put  = ipv4_hooks_unregister,
@@ -421,6 +417,11 @@ static int __init nf_conntrack_l3proto_ipv4_init(void)
 
need_conntrack();
 
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+   if (WARN_ON(nla_policy_len(ipv4_nla_policy, CTA_IP_MAX + 1) !=
+   nf_conntrack_l3proto_ipv4.nla_size))
+   return -EINVAL;
+#endif
ret = nf_register_sockopt(_getorigdst);
if (ret < 0) {
pr_err("Unable to register netfilter socket option\n");
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c 
b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index ddef5ee9e0a8..6b4d59fd0214 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -308,11 +308,6 @@ static int ipv6_nlattr_to_tuple(struct nlattr *tb[],
 
return 0;
 }
-
-static int ipv6_nlattr_tuple_size(void)
-{
-   return nla_policy_len(ipv6_nla_policy, CTA_IP_MAX + 1);
-}
 #endif
 
 static int ipv6_hooks_register(struct net *net)
@@ -360,9 +355,10 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 
__read_mostly = {
.get_l4proto= ipv6_get_l4proto,
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.tuple_to_nlattr= ipv6_tuple_to_nlattr,
-   .nlattr_tuple_size  = ipv6_nlattr_tuple_size,
.nlattr_to_tuple= ipv6_nlattr_to_tuple,
.nla_policy = ipv6_nla_policy,
+   .nla_size   = NLA_ALIGN(NLA_HDRLEN + sizeof(u32[4])) +
+ NLA_ALIGN(NLA_HDRLEN + sizeof(u32[4])),
 #endif
.net_ns_get = ipv6_hooks_register,
.net_ns_put = ipv6_hooks_unregister,
@@ -421,6 +417,12 @@ static int __init 

[PATCH 28/47] netfilter: exthdr: split netlink dump function

2017-09-03 Thread Pablo Neira Ayuso
From: Florian Westphal 

so eval and uncoming eval_set versions can reuse a common helper.

Signed-off-by: Florian Westphal 
Signed-off-by: Pablo Neira Ayuso 
---
 net/netfilter/nft_exthdr.c | 16 +++-
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index 921c95f2c583..e3a6eebe7e0c 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -180,12 +180,8 @@ static int nft_exthdr_init(const struct nft_ctx *ctx,
   NFT_DATA_VALUE, priv->len);
 }
 
-static int nft_exthdr_dump(struct sk_buff *skb, const struct nft_expr *expr)
+static int nft_exthdr_dump_common(struct sk_buff *skb, const struct nft_exthdr 
*priv)
 {
-   const struct nft_exthdr *priv = nft_expr_priv(expr);
-
-   if (nft_dump_register(skb, NFTA_EXTHDR_DREG, priv->dreg))
-   goto nla_put_failure;
if (nla_put_u8(skb, NFTA_EXTHDR_TYPE, priv->type))
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_EXTHDR_OFFSET, htonl(priv->offset)))
@@ -202,6 +198,16 @@ static int nft_exthdr_dump(struct sk_buff *skb, const 
struct nft_expr *expr)
return -1;
 }
 
+static int nft_exthdr_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+   const struct nft_exthdr *priv = nft_expr_priv(expr);
+
+   if (nft_dump_register(skb, NFTA_EXTHDR_DREG, priv->dreg))
+   return -1;
+
+   return nft_exthdr_dump_common(skb, priv);
+}
+
 static struct nft_expr_type nft_exthdr_type;
 static const struct nft_expr_ops nft_exthdr_ipv6_ops = {
.type   = _exthdr_type,
-- 
2.1.4


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 27/47] netfilter: exthdr: factor out tcp option access

2017-09-03 Thread Pablo Neira Ayuso
From: Florian Westphal 

Signed-off-by: Florian Westphal 
Signed-off-by: Pablo Neira Ayuso 
---
 net/netfilter/nft_exthdr.c | 33 +
 1 file changed, 21 insertions(+), 12 deletions(-)

diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index 1ec49fe5845f..921c95f2c583 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -61,6 +61,26 @@ static void nft_exthdr_ipv6_eval(const struct nft_expr *expr,
regs->verdict.code = NFT_BREAK;
 }
 
+static void *
+nft_tcp_header_pointer(const struct nft_pktinfo *pkt,
+  unsigned int len, void *buffer, unsigned int *tcphdr_len)
+{
+   struct tcphdr *tcph;
+
+   if (!pkt->tprot_set || pkt->tprot != IPPROTO_TCP)
+   return NULL;
+
+   tcph = skb_header_pointer(pkt->skb, pkt->xt.thoff, sizeof(*tcph), 
buffer);
+   if (!tcph)
+   return NULL;
+
+   *tcphdr_len = __tcp_hdrlen(tcph);
+   if (*tcphdr_len < sizeof(*tcph) || *tcphdr_len > len)
+   return NULL;
+
+   return skb_header_pointer(pkt->skb, pkt->xt.thoff, *tcphdr_len, buffer);
+}
+
 static void nft_exthdr_tcp_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
@@ -72,18 +92,7 @@ static void nft_exthdr_tcp_eval(const struct nft_expr *expr,
struct tcphdr *tcph;
u8 *opt;
 
-   if (!pkt->tprot_set || pkt->tprot != IPPROTO_TCP)
-   goto err;
-
-   tcph = skb_header_pointer(pkt->skb, pkt->xt.thoff, sizeof(*tcph), buff);
-   if (!tcph)
-   goto err;
-
-   tcphdr_len = __tcp_hdrlen(tcph);
-   if (tcphdr_len < sizeof(*tcph))
-   goto err;
-
-   tcph = skb_header_pointer(pkt->skb, pkt->xt.thoff, tcphdr_len, buff);
+   tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, _len);
if (!tcph)
goto err;
 
-- 
2.1.4


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 25/47] netfilter: remove prototype of netfilter_queue_init

2017-09-03 Thread Pablo Neira Ayuso
From: Taehee Yoo 

The netfilter_queue_init() has been removed.
so we can remove the prototype of that.

Signed-off-by: Taehee Yoo 
Signed-off-by: Pablo Neira Ayuso 
---
 net/netfilter/nf_internals.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
index bfa742da83af..19f00a47a710 100644
--- a/net/netfilter/nf_internals.h
+++ b/net/netfilter/nf_internals.h
@@ -15,7 +15,6 @@
 int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
 struct nf_hook_entry **entryp, unsigned int verdict);
 unsigned int nf_queue_nf_hook_drop(struct net *net);
-int __init netfilter_queue_init(void);
 
 /* nf_log.c */
 int __init netfilter_log_init(void);
-- 
2.1.4


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [nft PATCH 1/2] src: add flags fo nft_ctx_new

2017-09-03 Thread Pablo Neira Ayuso
On Mon, Sep 04, 2017 at 12:33:09AM +0200, Pablo Neira Ayuso wrote:
> On Mon, Sep 04, 2017 at 12:03:55AM +0200, Eric Leblond wrote:
> > By adding flags to nft_ctx_new, we will have a minimum capabilities
> > of changing the way the nft_ctx is created.
> > 
> > For now, this patch uses a simple value that allow the user to specify
> > that he will handle netlink by himself.
> > 
> > Signed-off-by: Eric Leblond 
> > ---
> >  include/nftables.h |  4 
> >  src/main.c | 20 +++-
> >  2 files changed, 15 insertions(+), 9 deletions(-)
> > 
> > diff --git a/include/nftables.h b/include/nftables.h
> > index 5035567..7c4e93f 100644
> > --- a/include/nftables.h
> > +++ b/include/nftables.h
> > @@ -49,8 +49,12 @@ struct nft_ctx {
> > struct output_ctx   output;
> > boolcheck;
> > struct nft_cachecache;
> > +   uint32_tflags;
> >  };
> >  
> > +#define NFT_CTX_CUSTOM_NETLINK (1<<0)
> > +#define NFT_CTX_DEFAULT0
> 
> Better than flag, let's make this a type, ie. NFT_CTX_NETLINK is just 1.
> 
> OK?

Actually, we keep this back, I mean, we just have NFT_CTX_DEFAULT, so
we don't introduce NFT_CTX_NETLINK until we have the advanced API in
place.

--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH nf-next 3/5] netlink: add NLM_F_NONREC flag for deletion requests

2017-09-03 Thread Pablo Neira Ayuso
On Sun, Sep 03, 2017 at 05:14:18PM -0700, David Miller wrote:
> 
> I only see patches 3, 4, and 5 of this series.
> 
> If this is meant for net-next inclusion, you'll have to submit it such that
> I see the entire series on netdev and thus in patchwork.

I'm posting this new NLM_F_NONREC for acknowledgment, if possible. I
have a few more patches that follow up so I can take them through
nf-next in the next batch.

But I can just re-send this through your net-next tree, as you prefer.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH nf-next 3/5] netlink: add NLM_F_NONREC flag for deletion requests

2017-09-03 Thread David Miller

I only see patches 3, 4, and 5 of this series.

If this is meant for net-next inclusion, you'll have to submit it such that
I see the entire series on netdev and thus in patchwork.

Thanks.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[no subject]

2017-09-03 Thread x1kn8fk


423567.doc
Description: MS-Word document


[PATCH nf-next 1/5] netfilter: nf_tables: add nf_tables_updchain()

2017-09-03 Thread Pablo Neira Ayuso
nf_tables_newchain() is too large, wrap the chain update path in a
function to make it more maintainable.

Signed-off-by: Pablo Neira Ayuso 
---
 net/netfilter/nf_tables_api.c | 170 +++---
 1 file changed, 92 insertions(+), 78 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 149785ff1c7b..14695062a925 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1335,6 +1335,97 @@ static void nft_chain_release_hook(struct nft_chain_hook 
*hook)
dev_put(hook->dev);
 }
 
+static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
+ bool create)
+{
+   const struct nlattr * const *nla = ctx->nla;
+   struct nft_table *table = ctx->table;
+   struct nft_chain *chain = ctx->chain;
+   struct nft_af_info *afi = ctx->afi;
+   struct nft_base_chain *basechain;
+   struct nft_stats *stats = NULL;
+   struct nft_chain_hook hook;
+   const struct nlattr *name;
+   struct nf_hook_ops *ops;
+   struct nft_trans *trans;
+   int err, i;
+
+   if (nla[NFTA_CHAIN_HOOK]) {
+   if (!nft_is_base_chain(chain))
+   return -EBUSY;
+
+   err = nft_chain_parse_hook(ctx->net, nla, ctx->afi, ,
+  create);
+   if (err < 0)
+   return err;
+
+   basechain = nft_base_chain(chain);
+   if (basechain->type != hook.type) {
+   nft_chain_release_hook();
+   return -EBUSY;
+   }
+
+   for (i = 0; i < afi->nops; i++) {
+   ops = >ops[i];
+   if (ops->hooknum != hook.num ||
+   ops->priority != hook.priority ||
+   ops->dev != hook.dev) {
+   nft_chain_release_hook();
+   return -EBUSY;
+   }
+   }
+   nft_chain_release_hook();
+   }
+
+   if (nla[NFTA_CHAIN_HANDLE] &&
+   nla[NFTA_CHAIN_NAME]) {
+   struct nft_chain *chain2;
+
+   chain2 = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME],
+   genmask);
+   if (IS_ERR(chain2))
+   return PTR_ERR(chain2);
+   }
+
+   if (nla[NFTA_CHAIN_COUNTERS]) {
+   if (!nft_is_base_chain(chain))
+   return -EOPNOTSUPP;
+
+   stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
+   if (IS_ERR(stats))
+   return PTR_ERR(stats);
+   }
+
+   trans = nft_trans_alloc(ctx, NFT_MSG_NEWCHAIN,
+   sizeof(struct nft_trans_chain));
+   if (trans == NULL) {
+   free_percpu(stats);
+   return -ENOMEM;
+   }
+
+   nft_trans_chain_stats(trans) = stats;
+   nft_trans_chain_update(trans) = true;
+
+   if (nla[NFTA_CHAIN_POLICY])
+   nft_trans_chain_policy(trans) = policy;
+   else
+   nft_trans_chain_policy(trans) = -1;
+
+   name = nla[NFTA_CHAIN_NAME];
+   if (nla[NFTA_CHAIN_HANDLE] && name) {
+   nft_trans_chain_name(trans) =
+   nla_strdup(name, GFP_KERNEL);
+   if (!nft_trans_chain_name(trans)) {
+   kfree(trans);
+   free_percpu(stats);
+   return -ENOMEM;
+   }
+   }
+   list_add_tail(>list, >net->nft.commit_list);
+
+   return 0;
+}
+
 static int nf_tables_newchain(struct net *net, struct sock *nlsk,
  struct sk_buff *skb, const struct nlmsghdr *nlh,
  const struct nlattr * const nla[],
@@ -1403,91 +1494,14 @@ static int nf_tables_newchain(struct net *net, struct 
sock *nlsk,
}
 
if (chain != NULL) {
-   struct nft_stats *stats = NULL;
-   struct nft_trans *trans;
-
if (nlh->nlmsg_flags & NLM_F_EXCL)
return -EEXIST;
if (nlh->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;
 
-   if (nla[NFTA_CHAIN_HOOK]) {
-   struct nft_base_chain *basechain;
-   struct nft_chain_hook hook;
-   struct nf_hook_ops *ops;
-
-   if (!nft_is_base_chain(chain))
-   return -EBUSY;
-
-   err = nft_chain_parse_hook(net, nla, afi, ,
-  create);
-   if (err < 0)
-   return err;
-
-   basechain = nft_base_chain(chain);
-   if (basechain->type != 

[PATCH nf-next 2/5] netfilter: nf_tables: add nf_tables_addchain()

2017-09-03 Thread Pablo Neira Ayuso
Wrap the chain addition path in a function to make it more maintainable.

Signed-off-by: Pablo Neira Ayuso 
---
 net/netfilter/nf_tables_api.c | 199 ++
 1 file changed, 106 insertions(+), 93 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 14695062a925..8b86acbb9770 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1335,6 +1335,106 @@ static void nft_chain_release_hook(struct 
nft_chain_hook *hook)
dev_put(hook->dev);
 }
 
+static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
+ u8 policy, bool create)
+{
+   const struct nlattr * const *nla = ctx->nla;
+   struct nft_table *table = ctx->table;
+   struct nft_af_info *afi = ctx->afi;
+   struct nft_base_chain *basechain;
+   struct nft_stats __percpu *stats;
+   struct net *net = ctx->net;
+   struct nft_chain *chain;
+   unsigned int i;
+   int err;
+
+   if (table->use == UINT_MAX)
+   return -EOVERFLOW;
+
+   if (nla[NFTA_CHAIN_HOOK]) {
+   struct nft_chain_hook hook;
+   struct nf_hook_ops *ops;
+   nf_hookfn *hookfn;
+
+   err = nft_chain_parse_hook(net, nla, afi, , create);
+   if (err < 0)
+   return err;
+
+   basechain = kzalloc(sizeof(*basechain), GFP_KERNEL);
+   if (basechain == NULL) {
+   nft_chain_release_hook();
+   return -ENOMEM;
+   }
+
+   if (hook.dev != NULL)
+   strncpy(basechain->dev_name, hook.dev->name, IFNAMSIZ);
+
+   if (nla[NFTA_CHAIN_COUNTERS]) {
+   stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
+   if (IS_ERR(stats)) {
+   nft_chain_release_hook();
+   kfree(basechain);
+   return PTR_ERR(stats);
+   }
+   basechain->stats = stats;
+   static_branch_inc(_counters_enabled);
+   }
+
+   hookfn = hook.type->hooks[hook.num];
+   basechain->type = hook.type;
+   chain = >chain;
+
+   for (i = 0; i < afi->nops; i++) {
+   ops = >ops[i];
+   ops->pf = family;
+   ops->hooknum= hook.num;
+   ops->priority   = hook.priority;
+   ops->priv   = chain;
+   ops->hook   = afi->hooks[ops->hooknum];
+   ops->dev= hook.dev;
+   if (hookfn)
+   ops->hook = hookfn;
+   if (afi->hook_ops_init)
+   afi->hook_ops_init(ops, i);
+   }
+
+   chain->flags |= NFT_BASE_CHAIN;
+   basechain->policy = policy;
+   } else {
+   chain = kzalloc(sizeof(*chain), GFP_KERNEL);
+   if (chain == NULL)
+   return -ENOMEM;
+   }
+   INIT_LIST_HEAD(>rules);
+   chain->handle = nf_tables_alloc_handle(table);
+   chain->table = table;
+   chain->name = nla_strdup(nla[NFTA_CHAIN_NAME], GFP_KERNEL);
+   if (!chain->name) {
+   err = -ENOMEM;
+   goto err1;
+   }
+
+   err = nf_tables_register_hooks(net, table, chain, afi->nops);
+   if (err < 0)
+   goto err1;
+
+   ctx->chain = chain;
+   err = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN);
+   if (err < 0)
+   goto err2;
+
+   table->use++;
+   list_add_tail_rcu(>list, >chains);
+
+   return 0;
+err2:
+   nf_tables_unregister_hooks(net, table, chain, afi->nops);
+err1:
+   nf_tables_chain_destroy(chain);
+
+   return err;
+}
+
 static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
  bool create)
 {
@@ -1433,19 +1533,15 @@ static int nf_tables_newchain(struct net *net, struct 
sock *nlsk,
 {
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
const struct nlattr * uninitialized_var(name);
+   u8 genmask = nft_genmask_next(net);
+   int family = nfmsg->nfgen_family;
struct nft_af_info *afi;
struct nft_table *table;
struct nft_chain *chain;
-   struct nft_base_chain *basechain = NULL;
-   u8 genmask = nft_genmask_next(net);
-   int family = nfmsg->nfgen_family;
u8 policy = NF_ACCEPT;
+   struct nft_ctx ctx;
u64 handle = 0;
-   unsigned int i;
-   struct nft_stats __percpu *stats;
-   int err;
bool create;
-   struct nft_ctx ctx;
 
create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
 
@@ -1493,101 +1589,18 @@ 

[PATCH nf-next 3/5] netlink: add NLM_F_NONREC flag for deletion requests

2017-09-03 Thread Pablo Neira Ayuso
In the last NFWS in Faro, Portugal, we discussed that netlink is lacking
the semantics to request non recursive deletions, ie. do not delete an
object iff it has child objects that hang from this parent object that
the user requests to be deleted.

We need this new flag to solve a problem for the iptables-compat
backward compatibility utility, that runs iptables commands using the
existing nf_tables netlink interface. Specifically, custom chains in
iptables cannot be deleted if there are rules in it, however, nf_tables
allows to remove any chain that is populated with content. To sort out
this asymmetry, iptables-compat userspace sets this new NLM_F_NONREC
flag to obtain the same semantics that iptables provides.

This new flag should only be used for deletion requests. Note this new
flag value overlaps with the existing:

* NLM_F_ROOT for get requests.
* NLM_F_REPLACE for new requests.

However, those flags should not ever be used in deletion requests.

Signed-off-by: Pablo Neira Ayuso 
---
@David: Please, acknowledge this if you think this is fine so I can
take this into the nf-next tree, given patches 4/5 and 5/5
depend on this. Thanks a lot!

 include/uapi/linux/netlink.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h
index f4fc9c9e123d..e8af60a7c56d 100644
--- a/include/uapi/linux/netlink.h
+++ b/include/uapi/linux/netlink.h
@@ -69,6 +69,9 @@ struct nlmsghdr {
 #define NLM_F_CREATE   0x400   /* Create, if it does not exist */
 #define NLM_F_APPEND   0x800   /* Add to end of list   */
 
+/* Modifiers to DELETE request */
+#define NLM_F_NONREC   0x100   /* Do not delete recursively*/
+
 /* Flags for ACK message */
 #define NLM_F_CAPPED   0x100   /* request was capped */
 #define NLM_F_ACK_TLVS 0x200   /* extended ACK TVLs were included */
-- 
2.1.4


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH nf-next 5/5] netfilter: nf_tables: support for recursive chain deletion

2017-09-03 Thread Pablo Neira Ayuso
This patch sorts out an asymmetry in deletions. Currently, table and set
deletion commands come with an implicit content flush on deletion.
However, chain deletion results in -EBUSY if there is content in this
chain, so no implicit flush happens. So you have to send a flush command
in first place to delete chains, this is inconsistent and it can be
annoying in terms of user experience.

This patch uses the new NLM_F_NONREC flag to request non-recursive chain
deletion, ie. if the chain to be removed contains rules, then this
returns EBUSY. This problem was discussed during the NFWS'17 in Faro,
Portugal. In iptables, you hit -EBUSY if you try to delete a chain that
contains rules, so you have to flush first before you can remove
anything. Since iptables-compat uses the nf_tables netlink interface, it
has to use the NLM_F_NONREC flag from userspace to retain the original
iptables semantics, ie.  bail out on removing chains that contain rules.

Signed-off-by: Pablo Neira Ayuso 
---
 net/netfilter/nf_tables_api.c | 24 +++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 2ea043e5b344..5e2cfdbd51bd 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1617,8 +1617,11 @@ static int nf_tables_delchain(struct net *net, struct 
sock *nlsk,
struct nft_af_info *afi;
struct nft_table *table;
struct nft_chain *chain;
+   struct nft_rule *rule;
int family = nfmsg->nfgen_family;
struct nft_ctx ctx;
+   u32 use;
+   int err;
 
afi = nf_tables_afinfo_lookup(net, family, false);
if (IS_ERR(afi))
@@ -1631,11 +1634,30 @@ static int nf_tables_delchain(struct net *net, struct 
sock *nlsk,
chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask);
if (IS_ERR(chain))
return PTR_ERR(chain);
-   if (chain->use > 0)
+
+   if (nlh->nlmsg_flags & NLM_F_NONREC &&
+   chain->use > 0)
return -EBUSY;
 
nft_ctx_init(, net, skb, nlh, afi, table, chain, nla);
 
+   use = chain->use;
+   list_for_each_entry(rule, >rules, list) {
+   if (!nft_is_active_next(net, rule))
+   continue;
+   use--;
+
+   err = nft_delrule(, rule);
+   if (err < 0)
+   return err;
+   }
+
+   /* There are rules and elements that are still holding references to us,
+* we cannot do a recursive removal in this case.
+*/
+   if (use > 0)
+   return -EBUSY;
+
return nft_delchain();
 }
 
-- 
2.1.4


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH nf-next 4/5] netfilter: nf_tables: use NLM_F_NONREC for deletion requests

2017-09-03 Thread Pablo Neira Ayuso
Bail out if user requests non-recursive deletion for tables and sets.
This new flags tells nf_tables netlink interface to reject deletions if
tables and sets have content.

Signed-off-by: Pablo Neira Ayuso 
---
 net/netfilter/nf_tables_api.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 8b86acbb9770..2ea043e5b344 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -860,6 +860,10 @@ static int nf_tables_deltable(struct net *net, struct sock 
*nlsk,
if (IS_ERR(table))
return PTR_ERR(table);
 
+   if (nlh->nlmsg_flags & NLM_F_NONREC &&
+   table->use > 0)
+   return -EBUSY;
+
ctx.afi = afi;
ctx.table = table;
 
@@ -3225,7 +3229,9 @@ static int nf_tables_delset(struct net *net, struct sock 
*nlsk,
set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask);
if (IS_ERR(set))
return PTR_ERR(set);
-   if (!list_empty(>bindings))
+
+   if (!list_empty(>bindings) ||
+   (nlh->nlmsg_flags & NLM_F_NONREC && atomic_read(>nelems) > 0))
return -EBUSY;
 
return nft_delset(, set);
-- 
2.1.4


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[no subject]

2017-09-03 Thread xb028930336


83163881723765.doc
Description: MS-Word document


[nft PATCH 0/2] libnftables preparation work

2017-09-03 Thread Eric Leblond

Hi,

Here is two patches relative to libnftables preparation work.

The first one it changing the way a nft_ctx is created to be able
to skip the netlink init function call and also to have some freedom
later.

The second one is getting the printf out. This is completely changed
from what was proposed before. It is now really simple on user side.

BR,
--
Eric
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[nft PATCH 2/2] src: get rid of printf

2017-09-03 Thread Eric Leblond
This patch introduces the nft_print_to_output_ctx function that has
to be used instead of printf to output information that where
previously send to stdout. This function accumulate the output in
a buffer that can be fetched by the user with the nft_ctx_get_output()
function.

This modification will allow the libnftables library to provide an
easy way to the users to get the output data and display them like
they want.

Signed-off-by: Eric Leblond 
---
 include/datatype.h   |   5 +-
 include/expression.h |   2 +-
 include/nftables.h   |   5 ++
 src/cli.c|   1 +
 src/ct.c |  21 ++---
 src/datatype.c   |  66 ---
 src/expression.c |  79 ++
 src/exthdr.c |  16 ++--
 src/fib.c|  23 +++---
 src/hash.c   |  10 +--
 src/main.c   |  45 ++
 src/meta.c   |  32 +---
 src/numgen.c |   8 +-
 src/payload.c|   9 +-
 src/rule.c   | 228 +--
 src/statement.c  | 145 +---
 16 files changed, 408 insertions(+), 287 deletions(-)

diff --git a/include/datatype.h b/include/datatype.h
index 2e34591..e9f6079 100644
--- a/include/datatype.h
+++ b/include/datatype.h
@@ -209,7 +209,8 @@ extern void symbolic_constant_print(const struct 
symbol_table *tbl,
struct output_ctx *octx);
 extern void symbol_table_print(const struct symbol_table *tbl,
   const struct datatype *dtype,
-  enum byteorder byteorder);
+  enum byteorder byteorder,
+  struct output_ctx *octx);
 
 extern struct symbol_table *rt_symbol_table_init(const char *filename);
 extern void rt_symbol_table_free(struct symbol_table *tbl);
@@ -261,7 +262,7 @@ extern const struct datatype *
 set_datatype_alloc(const struct datatype *orig_dtype, unsigned int byteorder);
 extern void set_datatype_destroy(const struct datatype *dtype);
 
-extern void time_print(uint64_t seconds);
+extern void time_print(uint64_t seconds, struct output_ctx *octx);
 extern struct error_record *time_parse(const struct location *loc,
   const char *c, uint64_t *res);
 
diff --git a/include/expression.h b/include/expression.h
index 32d4423..ce6b702 100644
--- a/include/expression.h
+++ b/include/expression.h
@@ -334,7 +334,7 @@ extern struct expr *expr_get(struct expr *expr);
 extern void expr_free(struct expr *expr);
 extern void expr_print(const struct expr *expr, struct output_ctx *octx);
 extern bool expr_cmp(const struct expr *e1, const struct expr *e2);
-extern void expr_describe(const struct expr *expr);
+extern void expr_describe(const struct expr *expr, struct output_ctx *octx);
 
 extern const struct datatype *expr_basetype(const struct expr *expr);
 extern void expr_set_type(struct expr *expr, const struct datatype *dtype,
diff --git a/include/nftables.h b/include/nftables.h
index 7c4e93f..f4d5ce1 100644
--- a/include/nftables.h
+++ b/include/nftables.h
@@ -30,6 +30,8 @@ struct output_ctx {
unsigned int ip2name;
unsigned int handle;
unsigned int echo;
+   char *output_buf;
+   size_t output_buf_len;
 };
 
 struct nft_cache {
@@ -149,4 +151,7 @@ void realm_table_meta_exit(void);
 void devgroup_table_exit(void);
 void realm_table_rt_exit(void);
 
+int nft_print_to_output_ctx(struct output_ctx *octx, const char *fmt, ...);
+char *nft_ctx_get_output(struct nft_ctx *ctx);
+
 #endif /* NFTABLES_NFTABLES_H */
diff --git a/src/cli.c b/src/cli.c
index d923ff7..ca4418c 100644
--- a/src/cli.c
+++ b/src/cli.c
@@ -138,6 +138,7 @@ static void cli_complete(char *line)
cli_nft->debug_mask);
scanner_push_buffer(scanner, _cli, line);
nft_run(cli_nft, cli_nf_sock, scanner, state, );
+   printf("%s", nft_ctx_get_output(cli_nft));
erec_print_list(stdout, , cli_nft->debug_mask);
xfree(line);
cache_release(_nft->cache);
diff --git a/src/ct.c b/src/ct.c
index d64f467..f19608a 100644
--- a/src/ct.c
+++ b/src/ct.c
@@ -141,11 +141,12 @@ static void ct_label_type_print(const struct expr *expr,
for (s = ct_label_tbl->symbols; s->identifier != NULL; s++) {
if (bit != s->value)
continue;
-   printf("\"%s\"", s->identifier);
+   nft_print_to_output_ctx(octx, "\"%s\"", s->identifier);
return;
}
/* can happen when connlabel.conf is altered after rules were added */
-   printf("%ld\n", (long)mpz_scan1(expr->value, 0));
+   nft_print_to_output_ctx(octx, "%ld\n",
+   (long)mpz_scan1(expr->value, 0));
 }
 
 static struct error_record *ct_label_type_parse(const struct expr *sym,
@@ -269,27 +270,27 @@ static const struct ct_template ct_templates[] = {
 

[nft PATCH 1/2] src: add flags fo nft_ctx_new

2017-09-03 Thread Eric Leblond
By adding flags to nft_ctx_new, we will have a minimum capabilities
of changing the way the nft_ctx is created.

For now, this patch uses a simple value that allow the user to specify
that he will handle netlink by himself.

Signed-off-by: Eric Leblond 
---
 include/nftables.h |  4 
 src/main.c | 20 +++-
 2 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/include/nftables.h b/include/nftables.h
index 5035567..7c4e93f 100644
--- a/include/nftables.h
+++ b/include/nftables.h
@@ -49,8 +49,12 @@ struct nft_ctx {
struct output_ctx   output;
boolcheck;
struct nft_cachecache;
+   uint32_tflags;
 };
 
+#define NFT_CTX_CUSTOM_NETLINK (1<<0)
+#define NFT_CTX_DEFAULT0
+
 enum nftables_exit_codes {
NFT_EXIT_SUCCESS= 0,
NFT_EXIT_FAILURE= 1,
diff --git a/src/main.c b/src/main.c
index fce9bfe..94f8a47 100644
--- a/src/main.c
+++ b/src/main.c
@@ -281,7 +281,12 @@ static void nft_exit(void)
mark_table_exit();
 }
 
-static struct nft_ctx *nft_ctx_new(void)
+static void nft_ctx_netlink_init(struct nft_ctx *ctx)
+{
+   ctx->nf_sock = netlink_open_sock();
+}
+
+static struct nft_ctx *nft_ctx_new(uint32_t flags)
 {
struct nft_ctx *ctx;
 
@@ -292,6 +297,10 @@ static struct nft_ctx *nft_ctx_new(void)
ctx->num_include_paths  = 1;
ctx->parser_max_errors  = 10;
init_list_head(>cache.list);
+   ctx->flags = flags;
+
+   if (! (flags & NFT_CTX_CUSTOM_NETLINK))
+   nft_ctx_netlink_init(ctx);
 
return ctx;
 }
@@ -307,11 +316,6 @@ static void nft_ctx_free(const struct nft_ctx *ctx)
nft_exit();
 }
 
-static void nft_ctx_netlink_init(struct nft_ctx *ctx)
-{
-   ctx->nf_sock = netlink_open_sock();
-}
-
 static int nft_run_cmd_from_buffer(struct nft_ctx *nft,
   char *buf, size_t buflen)
 {
@@ -367,9 +371,7 @@ int main(int argc, char * const *argv)
struct parser_state state;
int i, val, rc;
 
-   nft = nft_ctx_new();
-
-   nft_ctx_netlink_init(nft);
+   nft = nft_ctx_new(NFT_CTX_DEFAULT);
 
while (1) {
val = getopt_long(argc, argv, OPTSTRING, options, NULL);
-- 
2.14.1

--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 13/47] netfilter: nf_tables: No need to check chain existence when tracing

2017-09-03 Thread Pablo Neira Ayuso
From: Phil Sutter 

nft_trace_notify() is called only from __nft_trace_packet(), which
assigns its parameter 'chain' to info->chain. __nft_trace_packet() in
turn later dereferences 'chain' unconditionally, which indicates that
it's never NULL. Same does nft_do_chain(), the only user of the tracing
infrastructure. Hence it is safe to assume the check removed here is not
needed.

Signed-off-by: Phil Sutter 
Signed-off-by: Pablo Neira Ayuso 
---
 net/netfilter/nf_tables_trace.c | 13 +
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c
index e1b15e7a5793..0c3a0049e4aa 100644
--- a/net/netfilter/nf_tables_trace.c
+++ b/net/netfilter/nf_tables_trace.c
@@ -217,14 +217,11 @@ void nft_trace_notify(struct nft_traceinfo *info)
if (trace_fill_id(skb, pkt->skb))
goto nla_put_failure;
 
-   if (info->chain) {
-   if (nla_put_string(skb, NFTA_TRACE_CHAIN,
-  info->chain->name))
-   goto nla_put_failure;
-   if (nla_put_string(skb, NFTA_TRACE_TABLE,
-  info->chain->table->name))
-   goto nla_put_failure;
-   }
+   if (nla_put_string(skb, NFTA_TRACE_CHAIN, info->chain->name))
+   goto nla_put_failure;
+
+   if (nla_put_string(skb, NFTA_TRACE_TABLE, info->chain->table->name))
+   goto nla_put_failure;
 
if (nf_trace_fill_rule_info(skb, info))
goto nla_put_failure;
-- 
2.1.4


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 16/47] netfilter: nf_tables: Allow chain name of up to 255 chars

2017-09-03 Thread Pablo Neira Ayuso
From: Phil Sutter 

Same conversion as for table names, use NFT_NAME_MAXLEN as upper
boundary as well.

Signed-off-by: Phil Sutter 
Signed-off-by: Pablo Neira Ayuso 
---
 include/net/netfilter/nf_tables.h|  4 ++--
 include/uapi/linux/netfilter/nf_tables.h |  2 +-
 net/netfilter/nf_tables_api.c| 34 
 net/netfilter/nf_tables_trace.c  | 27 +++--
 4 files changed, 54 insertions(+), 13 deletions(-)

diff --git a/include/net/netfilter/nf_tables.h 
b/include/net/netfilter/nf_tables.h
index 05ecf78ec078..be1610162ee0 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -859,7 +859,7 @@ struct nft_chain {
u16 level;
u8  flags:6,
genmask:2;
-   charname[NFT_CHAIN_MAXNAMELEN];
+   char*name;
 };
 
 enum nft_chain_type {
@@ -1272,7 +1272,7 @@ struct nft_trans_set {
 
 struct nft_trans_chain {
boolupdate;
-   charname[NFT_CHAIN_MAXNAMELEN];
+   char*name;
struct nft_stats __percpu   *stats;
u8  policy;
 };
diff --git a/include/uapi/linux/netfilter/nf_tables.h 
b/include/uapi/linux/netfilter/nf_tables.h
index 0b94e572ef16..d9c03a8608ee 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -3,7 +3,7 @@
 
 #define NFT_NAME_MAXLEN256
 #define NFT_TABLE_MAXNAMELEN   NFT_NAME_MAXLEN
-#define NFT_CHAIN_MAXNAMELEN   32
+#define NFT_CHAIN_MAXNAMELEN   NFT_NAME_MAXLEN
 #define NFT_SET_MAXNAMELEN 32
 #define NFT_OBJ_MAXNAMELEN 32
 #define NFT_USERDATA_MAXLEN256
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index c2e392d5e512..747499039709 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1250,8 +1250,10 @@ static void nf_tables_chain_destroy(struct nft_chain 
*chain)
static_branch_dec(_counters_enabled);
if (basechain->ops[0].dev != NULL)
dev_put(basechain->ops[0].dev);
+   kfree(chain->name);
kfree(basechain);
} else {
+   kfree(chain->name);
kfree(chain);
}
 }
@@ -1476,8 +1478,13 @@ static int nf_tables_newchain(struct net *net, struct 
sock *nlsk,
nft_trans_chain_policy(trans) = -1;
 
if (nla[NFTA_CHAIN_HANDLE] && name) {
-   nla_strlcpy(nft_trans_chain_name(trans), name,
-   NFT_CHAIN_MAXNAMELEN);
+   nft_trans_chain_name(trans) =
+   nla_strdup(name, GFP_KERNEL);
+   if (!nft_trans_chain_name(trans)) {
+   kfree(trans);
+   free_percpu(stats);
+   return -ENOMEM;
+   }
}
list_add_tail(>list, >nft.commit_list);
return 0;
@@ -1544,7 +1551,11 @@ static int nf_tables_newchain(struct net *net, struct 
sock *nlsk,
INIT_LIST_HEAD(>rules);
chain->handle = nf_tables_alloc_handle(table);
chain->table = table;
-   nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN);
+   chain->name = nla_strdup(name, GFP_KERNEL);
+   if (!chain->name) {
+   err = -ENOMEM;
+   goto err1;
+   }
 
err = nf_tables_register_hooks(net, table, chain, afi->nops);
if (err < 0)
@@ -1979,7 +1990,7 @@ static void nf_tables_rule_notify(const struct nft_ctx 
*ctx,
 
 struct nft_rule_dump_ctx {
char *table;
-   char chain[NFT_CHAIN_MAXNAMELEN];
+   char *chain;
 };
 
 static int nf_tables_dump_rules(struct sk_buff *skb,
@@ -2047,6 +2058,7 @@ static int nf_tables_dump_rules_done(struct 
netlink_callback *cb)
 
if (ctx) {
kfree(ctx->table);
+   kfree(ctx->chain);
kfree(ctx);
}
return 0;
@@ -2088,9 +2100,15 @@ static int nf_tables_getrule(struct net *net, struct 
sock *nlsk,
return -ENOMEM;
}
}
-   if (nla[NFTA_RULE_CHAIN])
-   nla_strlcpy(ctx->chain, nla[NFTA_RULE_CHAIN],
-   sizeof(ctx->chain));
+   if (nla[NFTA_RULE_CHAIN]) {
+   ctx->chain = nla_strdup(nla[NFTA_RULE_CHAIN],
+   GFP_KERNEL);
+   if (!ctx->chain) {
+ 

[PATCH 15/47] netfilter: nf_tables: Allow table names of up to 255 chars

2017-09-03 Thread Pablo Neira Ayuso
From: Phil Sutter 

Allocate all table names dynamically to allow for arbitrary lengths but
introduce NFT_NAME_MAXLEN as an upper sanity boundary. It's value was
chosen to allow using a domain name as per RFC 1035.

Signed-off-by: Phil Sutter 
Signed-off-by: Pablo Neira Ayuso 
---
 include/net/netfilter/nf_tables.h|  2 +-
 include/uapi/linux/netfilter/nf_tables.h |  3 +-
 net/netfilter/nf_tables_api.c| 49 +++-
 net/netfilter/nf_tables_trace.c  |  2 +-
 4 files changed, 40 insertions(+), 16 deletions(-)

diff --git a/include/net/netfilter/nf_tables.h 
b/include/net/netfilter/nf_tables.h
index bd5be0d691d5..05ecf78ec078 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -957,7 +957,7 @@ struct nft_table {
u32 use;
u16 flags:14,
genmask:2;
-   charname[NFT_TABLE_MAXNAMELEN];
+   char*name;
 };
 
 enum nft_af_flags {
diff --git a/include/uapi/linux/netfilter/nf_tables.h 
b/include/uapi/linux/netfilter/nf_tables.h
index 6f0a950e21c3..0b94e572ef16 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -1,7 +1,8 @@
 #ifndef _LINUX_NF_TABLES_H
 #define _LINUX_NF_TABLES_H
 
-#define NFT_TABLE_MAXNAMELEN   32
+#define NFT_NAME_MAXLEN256
+#define NFT_TABLE_MAXNAMELEN   NFT_NAME_MAXLEN
 #define NFT_CHAIN_MAXNAMELEN   32
 #define NFT_SET_MAXNAMELEN 32
 #define NFT_OBJ_MAXNAMELEN 32
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index b77ad0813564..c2e392d5e512 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -726,7 +726,10 @@ static int nf_tables_newtable(struct net *net, struct sock 
*nlsk,
if (table == NULL)
goto err2;
 
-   nla_strlcpy(table->name, name, NFT_TABLE_MAXNAMELEN);
+   table->name = nla_strdup(name, GFP_KERNEL);
+   if (table->name == NULL)
+   goto err3;
+
INIT_LIST_HEAD(>chains);
INIT_LIST_HEAD(>sets);
INIT_LIST_HEAD(>objects);
@@ -735,10 +738,12 @@ static int nf_tables_newtable(struct net *net, struct 
sock *nlsk,
nft_ctx_init(, net, skb, nlh, afi, table, NULL, nla);
err = nft_trans_table_add(, NFT_MSG_NEWTABLE);
if (err < 0)
-   goto err3;
+   goto err4;
 
list_add_tail_rcu(>list, >tables);
return 0;
+err4:
+   kfree(table->name);
 err3:
kfree(table);
 err2:
@@ -865,6 +870,7 @@ static void nf_tables_table_destroy(struct nft_ctx *ctx)
 {
BUG_ON(ctx->table->use > 0);
 
+   kfree(ctx->table->name);
kfree(ctx->table);
module_put(ctx->afi->owner);
 }
@@ -1972,7 +1978,7 @@ static void nf_tables_rule_notify(const struct nft_ctx 
*ctx,
 }
 
 struct nft_rule_dump_ctx {
-   char table[NFT_TABLE_MAXNAMELEN];
+   char *table;
char chain[NFT_CHAIN_MAXNAMELEN];
 };
 
@@ -1997,7 +2003,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
continue;
 
list_for_each_entry_rcu(table, >tables, list) {
-   if (ctx && ctx->table[0] &&
+   if (ctx && ctx->table &&
strcmp(ctx->table, table->name) != 0)
continue;
 
@@ -2037,7 +2043,12 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
 
 static int nf_tables_dump_rules_done(struct netlink_callback *cb)
 {
-   kfree(cb->data);
+   struct nft_rule_dump_ctx *ctx = cb->data;
+
+   if (ctx) {
+   kfree(ctx->table);
+   kfree(ctx);
+   }
return 0;
 }
 
@@ -2069,9 +2080,14 @@ static int nf_tables_getrule(struct net *net, struct 
sock *nlsk,
if (!ctx)
return -ENOMEM;
 
-   if (nla[NFTA_RULE_TABLE])
-   nla_strlcpy(ctx->table, nla[NFTA_RULE_TABLE],
-   sizeof(ctx->table));
+   if (nla[NFTA_RULE_TABLE]) {
+   ctx->table = nla_strdup(nla[NFTA_RULE_TABLE],
+   GFP_KERNEL);
+   if (!ctx->table) {
+   kfree(ctx);
+   return -ENOMEM;
+   }
+   }
if (nla[NFTA_RULE_CHAIN])
nla_strlcpy(ctx->chain, nla[NFTA_RULE_CHAIN],
sizeof(ctx->chain));
@@ -4410,7 +4426,7 @@ static int nf_tables_fill_obj_info(struct sk_buff *skb, 
struct net *net,
 }
 
 struct nft_obj_filter {
-   char  

[PATCH 18/47] netfilter: nf_tables: Allow object names of up to 255 chars

2017-09-03 Thread Pablo Neira Ayuso
From: Phil Sutter 

Same conversion as for table names, use NFT_NAME_MAXLEN as upper
boundary as well.

Signed-off-by: Phil Sutter 
Signed-off-by: Pablo Neira Ayuso 
---
 include/net/netfilter/nf_tables.h|  2 +-
 include/uapi/linux/netfilter/nf_tables.h |  2 +-
 net/netfilter/nf_tables_api.c| 11 +--
 3 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/include/net/netfilter/nf_tables.h 
b/include/net/netfilter/nf_tables.h
index 66ba62fa7d90..f9795fe394f3 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1016,7 +1016,7 @@ int nft_verdict_dump(struct sk_buff *skb, int type,
  */
 struct nft_object {
struct list_headlist;
-   charname[NFT_OBJ_MAXNAMELEN];
+   char*name;
struct nft_table*table;
u32 genmask:2,
use:30;
diff --git a/include/uapi/linux/netfilter/nf_tables.h 
b/include/uapi/linux/netfilter/nf_tables.h
index b5e73e80b7b6..be25cf69295b 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -5,7 +5,7 @@
 #define NFT_TABLE_MAXNAMELEN   NFT_NAME_MAXLEN
 #define NFT_CHAIN_MAXNAMELEN   NFT_NAME_MAXLEN
 #define NFT_SET_MAXNAMELEN NFT_NAME_MAXLEN
-#define NFT_OBJ_MAXNAMELEN 32
+#define NFT_OBJ_MAXNAMELEN NFT_NAME_MAXLEN
 #define NFT_USERDATA_MAXLEN256
 
 /**
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index e6a07f27b1a3..149785ff1c7b 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -4402,15 +4402,21 @@ static int nf_tables_newobj(struct net *net, struct 
sock *nlsk,
goto err1;
}
obj->table = table;
-   nla_strlcpy(obj->name, nla[NFTA_OBJ_NAME], NFT_OBJ_MAXNAMELEN);
+   obj->name = nla_strdup(nla[NFTA_OBJ_NAME], GFP_KERNEL);
+   if (!obj->name) {
+   err = -ENOMEM;
+   goto err2;
+   }
 
err = nft_trans_obj_add(, NFT_MSG_NEWOBJ, obj);
if (err < 0)
-   goto err2;
+   goto err3;
 
list_add_tail_rcu(>list, >objects);
table->use++;
return 0;
+err3:
+   kfree(obj->name);
 err2:
if (obj->type->destroy)
obj->type->destroy(obj);
@@ -4626,6 +4632,7 @@ static void nft_obj_destroy(struct nft_object *obj)
obj->type->destroy(obj);
 
module_put(obj->type->owner);
+   kfree(obj->name);
kfree(obj);
 }
 
-- 
2.1.4


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 19/47] netfilter: nft_set_rbtree: use seqcount to avoid lock in most cases

2017-09-03 Thread Pablo Neira Ayuso
From: Florian Westphal 

switch to lockless lockup. write side now also increments sequence
counter.  On lookup, sample counter value and only take the lock
if we did not find a match and the counter has changed.

This avoids need to write to private area in normal (lookup) cases.

In case we detect a writer (seqretry is true) we fall back to taking
the readlock.

The readlock is also used during dumps to ensure we get a consistent
tree walk.

Similar technique (rbtree+seqlock) was used by David Howells in rxrpc.

Signed-off-by: Florian Westphal 
Signed-off-by: Pablo Neira Ayuso 
---
 net/netfilter/nft_set_rbtree.c | 49 +++---
 1 file changed, 37 insertions(+), 12 deletions(-)

diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index bce5382f1d49..d83a4ec5900d 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -19,8 +19,9 @@
 #include 
 
 struct nft_rbtree {
-   rwlock_tlock;
struct rb_root  root;
+   rwlock_tlock;
+   seqcount_t  count;
 };
 
 struct nft_rbtree_elem {
@@ -40,8 +41,9 @@ static bool nft_rbtree_equal(const struct nft_set *set, const 
void *this,
return memcmp(this, nft_set_ext_key(>ext), set->klen) == 0;
 }
 
-static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
- const u32 *key, const struct nft_set_ext **ext)
+static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set 
*set,
+   const u32 *key, const struct nft_set_ext **ext,
+   unsigned int seq)
 {
struct nft_rbtree *priv = nft_set_priv(set);
const struct nft_rbtree_elem *rbe, *interval = NULL;
@@ -50,15 +52,17 @@ static bool nft_rbtree_lookup(const struct net *net, const 
struct nft_set *set,
const void *this;
int d;
 
-   read_lock_bh(>lock);
-   parent = priv->root.rb_node;
+   parent = rcu_dereference_raw(priv->root.rb_node);
while (parent != NULL) {
+   if (read_seqcount_retry(>count, seq))
+   return false;
+
rbe = rb_entry(parent, struct nft_rbtree_elem, node);
 
this = nft_set_ext_key(>ext);
d = memcmp(this, key, set->klen);
if (d < 0) {
-   parent = parent->rb_left;
+   parent = rcu_dereference_raw(parent->rb_left);
if (interval &&
nft_rbtree_equal(set, this, interval) &&
nft_rbtree_interval_end(this) &&
@@ -66,15 +70,14 @@ static bool nft_rbtree_lookup(const struct net *net, const 
struct nft_set *set,
continue;
interval = rbe;
} else if (d > 0)
-   parent = parent->rb_right;
+   parent = rcu_dereference_raw(parent->rb_right);
else {
if (!nft_set_elem_active(>ext, genmask)) {
-   parent = parent->rb_left;
+   parent = rcu_dereference_raw(parent->rb_left);
continue;
}
if (nft_rbtree_interval_end(rbe))
goto out;
-   read_unlock_bh(>lock);
 
*ext = >ext;
return true;
@@ -84,15 +87,32 @@ static bool nft_rbtree_lookup(const struct net *net, const 
struct nft_set *set,
if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
nft_set_elem_active(>ext, genmask) &&
!nft_rbtree_interval_end(interval)) {
-   read_unlock_bh(>lock);
*ext = >ext;
return true;
}
 out:
-   read_unlock_bh(>lock);
return false;
 }
 
+static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
+ const u32 *key, const struct nft_set_ext **ext)
+{
+   struct nft_rbtree *priv = nft_set_priv(set);
+   unsigned int seq = read_seqcount_begin(>count);
+   bool ret;
+
+   ret = __nft_rbtree_lookup(net, set, key, ext, seq);
+   if (ret || !read_seqcount_retry(>count, seq))
+   return ret;
+
+   read_lock_bh(>lock);
+   seq = read_seqcount_begin(>count);
+   ret = __nft_rbtree_lookup(net, set, key, ext, seq);
+   read_unlock_bh(>lock);
+
+   return ret;
+}
+
 static int __nft_rbtree_insert(const struct net *net, const struct nft_set 
*set,
   struct nft_rbtree_elem *new,
   struct nft_set_ext **ext)
@@ -130,7 +150,7 @@ static int __nft_rbtree_insert(const struct net *net, const 
struct nft_set *set,
}

[PATCH 17/47] netfilter: nf_tables: Allow set names of up to 255 chars

2017-09-03 Thread Pablo Neira Ayuso
From: Phil Sutter 

Same conversion as for table names, use NFT_NAME_MAXLEN as upper
boundary as well.

Signed-off-by: Phil Sutter 
Signed-off-by: Pablo Neira Ayuso 
---
 include/net/netfilter/nf_tables.h|  2 +-
 include/uapi/linux/netfilter/nf_tables.h |  2 +-
 net/netfilter/nf_tables_api.c| 18 ++
 3 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/include/net/netfilter/nf_tables.h 
b/include/net/netfilter/nf_tables.h
index be1610162ee0..66ba62fa7d90 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -396,7 +396,7 @@ void nft_unregister_set(struct nft_set_type *type);
 struct nft_set {
struct list_headlist;
struct list_headbindings;
-   charname[NFT_SET_MAXNAMELEN];
+   char*name;
u32 ktype;
u32 dtype;
u32 objtype;
diff --git a/include/uapi/linux/netfilter/nf_tables.h 
b/include/uapi/linux/netfilter/nf_tables.h
index d9c03a8608ee..b5e73e80b7b6 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -4,7 +4,7 @@
 #define NFT_NAME_MAXLEN256
 #define NFT_TABLE_MAXNAMELEN   NFT_NAME_MAXLEN
 #define NFT_CHAIN_MAXNAMELEN   NFT_NAME_MAXLEN
-#define NFT_SET_MAXNAMELEN 32
+#define NFT_SET_MAXNAMELEN NFT_NAME_MAXLEN
 #define NFT_OBJ_MAXNAMELEN 32
 #define NFT_USERDATA_MAXLEN256
 
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 747499039709..e6a07f27b1a3 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2650,7 +2650,7 @@ static int nf_tables_set_alloc_name(struct nft_ctx *ctx, 
struct nft_set *set,
unsigned long *inuse;
unsigned int n = 0, min = 0;
 
-   p = strnchr(name, NFT_SET_MAXNAMELEN, '%');
+   p = strchr(name, '%');
if (p != NULL) {
if (p[1] != 'd' || strchr(p + 2, '%'))
return -EINVAL;
@@ -2681,7 +2681,10 @@ static int nf_tables_set_alloc_name(struct nft_ctx *ctx, 
struct nft_set *set,
free_page((unsigned long)inuse);
}
 
-   snprintf(set->name, sizeof(set->name), name, min + n);
+   set->name = kasprintf(GFP_KERNEL, name, min + n);
+   if (!set->name)
+   return -ENOMEM;
+
list_for_each_entry(i, >table->sets, list) {
if (!nft_is_active_next(ctx->net, i))
continue;
@@ -2958,7 +2961,7 @@ static int nf_tables_newset(struct net *net, struct sock 
*nlsk,
struct nft_table *table;
struct nft_set *set;
struct nft_ctx ctx;
-   char name[NFT_SET_MAXNAMELEN];
+   char *name;
unsigned int size;
bool create;
u64 timeout;
@@ -3104,8 +3107,14 @@ static int nf_tables_newset(struct net *net, struct sock 
*nlsk,
goto err1;
}
 
-   nla_strlcpy(name, nla[NFTA_SET_NAME], sizeof(set->name));
+   name = nla_strdup(nla[NFTA_SET_NAME], GFP_KERNEL);
+   if (!name) {
+   err = -ENOMEM;
+   goto err2;
+   }
+
err = nf_tables_set_alloc_name(, set, name);
+   kfree(name);
if (err < 0)
goto err2;
 
@@ -3155,6 +3164,7 @@ static void nft_set_destroy(struct nft_set *set)
 {
set->ops->destroy(set);
module_put(set->ops->type->owner);
+   kfree(set->name);
kvfree(set);
 }
 
-- 
2.1.4


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 14/47] netlink: Introduce nla_strdup()

2017-09-03 Thread Pablo Neira Ayuso
From: Phil Sutter 

This is similar to strdup() for netlink string attributes.

Signed-off-by: Phil Sutter 
Signed-off-by: Pablo Neira Ayuso 
---
 include/net/netlink.h |  1 +
 lib/nlattr.c  | 24 
 2 files changed, 25 insertions(+)

diff --git a/include/net/netlink.h b/include/net/netlink.h
index ef8e6c3a80a6..c8c2eb5ae55e 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -247,6 +247,7 @@ int nla_parse(struct nlattr **tb, int maxtype, const struct 
nlattr *head,
 int nla_policy_len(const struct nla_policy *, int);
 struct nlattr *nla_find(const struct nlattr *head, int len, int attrtype);
 size_t nla_strlcpy(char *dst, const struct nlattr *nla, size_t dstsize);
+char *nla_strdup(const struct nlattr *nla, gfp_t flags);
 int nla_memcpy(void *dest, const struct nlattr *src, int count);
 int nla_memcmp(const struct nlattr *nla, const void *data, size_t size);
 int nla_strcmp(const struct nlattr *nla, const char *str);
diff --git a/lib/nlattr.c b/lib/nlattr.c
index fb52435be42d..f13013f7e21a 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -272,6 +272,30 @@ size_t nla_strlcpy(char *dst, const struct nlattr *nla, 
size_t dstsize)
 EXPORT_SYMBOL(nla_strlcpy);
 
 /**
+ * nla_strdup - Copy string attribute payload into a newly allocated buffer
+ * @nla: attribute to copy the string from
+ * @flags: the type of memory to allocate (see kmalloc).
+ *
+ * Returns a pointer to the allocated buffer or NULL on error.
+ */
+char *nla_strdup(const struct nlattr *nla, gfp_t flags)
+{
+   size_t srclen = nla_len(nla);
+   char *src = nla_data(nla), *dst;
+
+   if (srclen > 0 && src[srclen - 1] == '\0')
+   srclen--;
+
+   dst = kmalloc(srclen + 1, flags);
+   if (dst != NULL) {
+   memcpy(dst, src, srclen);
+   dst[srclen] = '\0';
+   }
+   return dst;
+}
+EXPORT_SYMBOL(nla_strdup);
+
+/**
  * nla_memcpy - Copy a netlink attribute into another memory area
  * @dest: where to copy to memcpy
  * @src: netlink attribute to copy from
-- 
2.1.4


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 12/47] netfilter: nf_hook_ops structs can be const

2017-09-03 Thread Pablo Neira Ayuso
From: Florian Westphal 

We no longer place these on a list so they can be const.

Signed-off-by: Florian Westphal 
Signed-off-by: Pablo Neira Ayuso 
---
 drivers/net/ipvlan/ipvlan_main.c   | 2 +-
 net/bridge/br_netfilter_hooks.c| 2 +-
 net/bridge/netfilter/ebtable_filter.c  | 2 +-
 net/bridge/netfilter/ebtable_nat.c | 2 +-
 net/decnet/netfilter/dn_rtmsg.c| 2 +-
 net/ipv4/netfilter/ipt_CLUSTERIP.c | 2 +-
 net/ipv4/netfilter/ipt_SYNPROXY.c  | 2 +-
 net/ipv4/netfilter/iptable_nat.c   | 2 +-
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 2 +-
 net/ipv4/netfilter/nf_defrag_ipv4.c| 2 +-
 net/ipv6/ila/ila_xlat.c| 2 +-
 net/ipv6/netfilter/ip6t_SYNPROXY.c | 2 +-
 net/ipv6/netfilter/ip6table_nat.c  | 2 +-
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 2 +-
 net/ipv6/netfilter/nf_defrag_ipv6_hooks.c  | 2 +-
 net/netfilter/ipvs/ip_vs_core.c| 2 +-
 security/selinux/hooks.c   | 2 +-
 security/smack/smack_netfilter.c   | 2 +-
 18 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index fdde20735416..943e6907dc19 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -15,7 +15,7 @@ struct ipvlan_netns {
unsigned int ipvl_nf_hook_refcnt;
 };
 
-static struct nf_hook_ops ipvl_nfops[] __read_mostly = {
+static const struct nf_hook_ops ipvl_nfops[] = {
{
.hook = ipvlan_nf_input,
.pf   = NFPROTO_IPV4,
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 2261e5194c82..626f4b2cef16 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -887,7 +887,7 @@ EXPORT_SYMBOL_GPL(br_netfilter_enable);
 
 /* For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because
  * br_dev_queue_push_xmit is called afterwards */
-static struct nf_hook_ops br_nf_ops[] __read_mostly = {
+static const struct nf_hook_ops br_nf_ops[] = {
{
.hook = br_nf_pre_routing,
.pf = NFPROTO_BRIDGE,
diff --git a/net/bridge/netfilter/ebtable_filter.c 
b/net/bridge/netfilter/ebtable_filter.c
index f22ef7c21913..45a00dbdbcad 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -70,7 +70,7 @@ ebt_out_hook(void *priv, struct sk_buff *skb,
return ebt_do_table(skb, state, state->net->xt.frame_filter);
 }
 
-static struct nf_hook_ops ebt_ops_filter[] __read_mostly = {
+static const struct nf_hook_ops ebt_ops_filter[] = {
{
.hook   = ebt_in_hook,
.pf = NFPROTO_BRIDGE,
diff --git a/net/bridge/netfilter/ebtable_nat.c 
b/net/bridge/netfilter/ebtable_nat.c
index 2f7a4f314406..4ecf50662b7d 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -70,7 +70,7 @@ ebt_nat_out(void *priv, struct sk_buff *skb,
return ebt_do_table(skb, state, state->net->xt.frame_nat);
 }
 
-static struct nf_hook_ops ebt_ops_nat[] __read_mostly = {
+static const struct nf_hook_ops ebt_ops_nat[] = {
{
.hook   = ebt_nat_out,
.pf = NFPROTO_BRIDGE,
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index aa8ffecc46a4..ab395e55cd78 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -115,7 +115,7 @@ static inline void dnrmg_receive_user_skb(struct sk_buff 
*skb)
RCV_SKB_FAIL(-EINVAL);
 }
 
-static struct nf_hook_ops dnrmg_ops __read_mostly = {
+static const struct nf_hook_ops dnrmg_ops = {
.hook   = dnrmg_hook,
.pf = NFPROTO_DECNET,
.hooknum= NF_DN_ROUTE,
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c 
b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 7d72decb80f9..6637e8b37ee2 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -624,7 +624,7 @@ arp_mangle(void *priv,
return NF_ACCEPT;
 }
 
-static struct nf_hook_ops cip_arp_ops __read_mostly = {
+static const struct nf_hook_ops cip_arp_ops = {
.hook = arp_mangle,
.pf = NFPROTO_ARP,
.hooknum = NF_ARP_OUT,
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c 
b/net/ipv4/netfilter/ipt_SYNPROXY.c
index f1528f7175a8..811689e523c3 100644
--- a/net/ipv4/netfilter/ipt_SYNPROXY.c
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -416,7 +416,7 @@ static unsigned int ipv4_synproxy_hook(void *priv,
return NF_ACCEPT;
 }
 
-static struct nf_hook_ops ipv4_synproxy_ops[] __read_mostly = {
+static const struct nf_hook_ops ipv4_synproxy_ops[] = {
{
.hook   = ipv4_synproxy_hook,
.pf = 

[PATCH 11/47] netfilter: nfnetlink_queue: don't queue dying conntracks to userspace

2017-09-03 Thread Pablo Neira Ayuso
From: Florian Westphal 

When skb is queued to userspace it leaves softirq/rcu protection.
skb->nfct (via conntrack extensions such as helper) could then reference
modules that no longer exist if the conntrack was not yet confirmed.

nf_ct_iterate_destroy() will set the DYING bit for unconfirmed
conntracks, we therefore solve this race as follows:

1. take the queue spinlock.
2. check if the conntrack is unconfirmed and has dying bit set.
   In this case, we must discard skb while we're still inside
   rcu read-side section.
3. If nf_ct_iterate_destroy() is called right after the packet is queued
   to userspace, it will be removed from the queue via
   nf_ct_iterate_destroy -> nf_queue_nf_hook_drop.

When userspace sends the verdict (nfnetlink takes rcu read lock), there
are two cases to consider:

1. nf_ct_iterate_destroy() was called while packet was out.
   In this case, skb will have been removed from the queue already
   and no reinject takes place as we won't find a matching entry for the
   packet id.

2. nf_ct_iterate_destroy() gets called right after verdict callback
   found and removed the skb from queue list.

   In this case, skb->nfct is marked as dying but it is still valid.
   The skb will be dropped either in nf_conntrack_confirm (we don't
   insert DYING conntracks into hash table) or when we try to queue
   the skb again, but either events don't occur before the rcu read lock
   is dropped.

Signed-off-by: Florian Westphal 
Signed-off-by: Pablo Neira Ayuso 
---
 net/netfilter/nfnetlink_queue.c | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 7c543bfbf624..c9796629858f 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -41,6 +41,10 @@
 #include "../bridge/br_private.h"
 #endif
 
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+#include 
+#endif
+
 #define NFQNL_QMAX_DEFAULT 1024
 
 /* We're using struct nlattr which has 16bit nla_len. Note that nla_len
@@ -612,6 +616,18 @@ nfqnl_build_packet_message(struct net *net, struct 
nfqnl_instance *queue,
return NULL;
 }
 
+static bool nf_ct_drop_unconfirmed(const struct nf_queue_entry *entry)
+{
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+   static const unsigned long flags = IPS_CONFIRMED | IPS_DYING;
+   const struct nf_conn *ct = (void *)skb_nfct(entry->skb);
+
+   if (ct && ((ct->status & flags) == IPS_DYING))
+   return true;
+#endif
+   return false;
+}
+
 static int
 __nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue,
struct nf_queue_entry *entry)
@@ -628,6 +644,9 @@ __nfqnl_enqueue_packet(struct net *net, struct 
nfqnl_instance *queue,
}
spin_lock_bh(>lock);
 
+   if (nf_ct_drop_unconfirmed(entry))
+   goto err_out_free_nskb;
+
if (queue->queue_total >= queue->queue_maxlen) {
if (queue->flags & NFQA_CFG_F_FAIL_OPEN) {
failopen = 1;
-- 
2.1.4


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 20/47] netfilter: conntrack: do not enable connection tracking unless needed

2017-09-03 Thread Pablo Neira Ayuso
From: Florian Westphal 

Discussion during NFWS 2017 in Faro has shown that the current
conntrack behaviour is unreasonable.

Even if conntrack module is loaded on behalf of a single net namespace,
its turned on for all namespaces, which is expensive.  Commit
481fa373476 ("netfilter: conntrack: add nf_conntrack_default_on sysctl")
attempted to provide an alternative to the 'default on' behaviour by
adding a sysctl to change it.

However, as Eric points out, the sysctl only becomes available
once the module is loaded, and then its too late.

So we either have to move the sysctl to the core, or, alternatively,
change conntrack to become active only once the rule set requires this.

This does the latter, conntrack is only enabled when a rule needs it.

Reported-by: Eric Dumazet 
Signed-off-by: Florian Westphal 
Signed-off-by: Pablo Neira Ayuso 
---
 Documentation/networking/nf_conntrack-sysctl.txt | 11 -
 include/net/netfilter/nf_conntrack_l3proto.h | 15 
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c   | 16 ++---
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c   | 17 ++
 net/netfilter/nf_conntrack_proto.c   | 29 
 net/netfilter/nf_conntrack_standalone.c  | 10 
 6 files changed, 4 insertions(+), 94 deletions(-)

diff --git a/Documentation/networking/nf_conntrack-sysctl.txt 
b/Documentation/networking/nf_conntrack-sysctl.txt
index 497d668288f9..433b6724797a 100644
--- a/Documentation/networking/nf_conntrack-sysctl.txt
+++ b/Documentation/networking/nf_conntrack-sysctl.txt
@@ -96,17 +96,6 @@ nf_conntrack_max - INTEGER
Size of connection tracking table.  Default value is
nf_conntrack_buckets value * 4.
 
-nf_conntrack_default_on - BOOLEAN
-   0 - don't register conntrack in new net namespaces
-   1 - register conntrack in new net namespaces (default)
-
-   This controls wheter newly created network namespaces have connection
-   tracking enabled by default.  It will be enabled automatically
-   regardless of this setting if the new net namespace requires
-   connection tracking, e.g. when NAT rules are created.
-   This setting is only visible in initial user namespace, it has no
-   effect on existing namespaces.
-
 nf_conntrack_tcp_be_liberal - BOOLEAN
0 - disabled (default)
not 0 - enabled
diff --git a/include/net/netfilter/nf_conntrack_l3proto.h 
b/include/net/netfilter/nf_conntrack_l3proto.h
index 6d14b36e3a49..1b8de164d744 100644
--- a/include/net/netfilter/nf_conntrack_l3proto.h
+++ b/include/net/netfilter/nf_conntrack_l3proto.h
@@ -73,21 +73,6 @@ struct nf_conntrack_l3proto {
 
 extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[NFPROTO_NUMPROTO];
 
-#ifdef CONFIG_SYSCTL
-/* Protocol pernet registration. */
-int nf_ct_l3proto_pernet_register(struct net *net,
- struct nf_conntrack_l3proto *proto);
-#else
-static inline int nf_ct_l3proto_pernet_register(struct net *n,
-   struct nf_conntrack_l3proto *p)
-{
-   return 0;
-}
-#endif
-
-void nf_ct_l3proto_pernet_unregister(struct net *net,
-struct nf_conntrack_l3proto *proto);
-
 /* Protocol global registration. */
 int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto);
 void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto);
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c 
b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 63e4ea0e01f8..de5f0e6ddd1b 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -398,24 +398,12 @@ static struct nf_conntrack_l4proto *builtin_l4proto4[] = {
 
 static int ipv4_net_init(struct net *net)
 {
-   int ret = 0;
-
-   ret = nf_ct_l4proto_pernet_register(net, builtin_l4proto4,
-   ARRAY_SIZE(builtin_l4proto4));
-   if (ret < 0)
-   return ret;
-   ret = nf_ct_l3proto_pernet_register(net, _conntrack_l3proto_ipv4);
-   if (ret < 0) {
-   pr_err("nf_conntrack_ipv4: pernet registration failed\n");
-   nf_ct_l4proto_pernet_unregister(net, builtin_l4proto4,
-   ARRAY_SIZE(builtin_l4proto4));
-   }
-   return ret;
+   return nf_ct_l4proto_pernet_register(net, builtin_l4proto4,
+ARRAY_SIZE(builtin_l4proto4));
 }
 
 static void ipv4_net_exit(struct net *net)
 {
-   nf_ct_l3proto_pernet_unregister(net, _conntrack_l3proto_ipv4);
nf_ct_l4proto_pernet_unregister(net, builtin_l4proto4,
ARRAY_SIZE(builtin_l4proto4));
 }
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c 
b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 

[PATCH 21/47] netfilter: xtables: Remove unused variable in compat_copy_entry_from_user()

2017-09-03 Thread Pablo Neira Ayuso
From: Taehee Yoo 

The target variable is not used in the compat_copy_entry_from_user().
So It can be removed.

Signed-off-by: Taehee Yoo 
Signed-off-by: Pablo Neira Ayuso 
---
 net/ipv4/netfilter/arp_tables.c | 2 --
 net/ipv4/netfilter/ip_tables.c  | 2 --
 2 files changed, 4 deletions(-)

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 0bc3c3d73e61..cf520d30cb94 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -1117,7 +1117,6 @@ compat_copy_entry_from_user(struct compat_arpt_entry *e, 
void **dstptr,
struct xt_table_info *newinfo, unsigned char *base)
 {
struct xt_entry_target *t;
-   struct xt_target *target;
struct arpt_entry *de;
unsigned int origsize;
int h;
@@ -1132,7 +1131,6 @@ compat_copy_entry_from_user(struct compat_arpt_entry *e, 
void **dstptr,
 
de->target_offset = e->target_offset - (origsize - *size);
t = compat_arpt_get_target(e);
-   target = t->u.kernel.target;
xt_compat_target_from_user(t, dstptr, size);
 
de->next_offset = e->next_offset - (origsize - *size);
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 2a55a40211cb..f47e8dad5e95 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1355,7 +1355,6 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, 
void **dstptr,
struct xt_table_info *newinfo, unsigned char *base)
 {
struct xt_entry_target *t;
-   struct xt_target *target;
struct ipt_entry *de;
unsigned int origsize;
int h;
@@ -1374,7 +1373,6 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, 
void **dstptr,
 
de->target_offset = e->target_offset - (origsize - *size);
t = compat_ipt_get_target(e);
-   target = t->u.kernel.target;
xt_compat_target_from_user(t, dstptr, size);
 
de->next_offset = e->next_offset - (origsize - *size);
-- 
2.1.4


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 22/47] netfilter: constify nf_conntrack_l3/4proto parameters

2017-09-03 Thread Pablo Neira Ayuso
From: Julia Lawall 

When a nf_conntrack_l3/4proto parameter is not on the left hand side
of an assignment, its address is not taken, and it is not passed to a
function that may modify its fields, then it can be declared as const.

This change is useful from a documentation point of view, and can
possibly facilitate making some nf_conntrack_l3/4proto structures const
subsequently.

Done with the help of Coccinelle.

Signed-off-by: Julia Lawall 
Signed-off-by: Pablo Neira Ayuso 
---
 include/net/netfilter/nf_conntrack_l4proto.h | 14 +++---
 include/net/netfilter/nf_conntrack_timeout.h |  2 +-
 net/netfilter/nf_conntrack_core.c|  8 
 net/netfilter/nf_conntrack_netlink.c |  6 +++---
 net/netfilter/nf_conntrack_proto.c   | 24 
 net/netfilter/nfnetlink_cttimeout.c  |  5 +++--
 6 files changed, 30 insertions(+), 29 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack_l4proto.h 
b/include/net/netfilter/nf_conntrack_l4proto.h
index 7032e044bbe2..b6e27cafb1d9 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -125,23 +125,23 @@ struct nf_conntrack_l4proto 
*__nf_ct_l4proto_find(u_int16_t l3proto,
 
 struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u_int16_t l3proto,
u_int8_t l4proto);
-void nf_ct_l4proto_put(struct nf_conntrack_l4proto *p);
+void nf_ct_l4proto_put(const struct nf_conntrack_l4proto *p);
 
 /* Protocol pernet registration. */
 int nf_ct_l4proto_pernet_register_one(struct net *net,
- struct nf_conntrack_l4proto *proto);
+   const struct nf_conntrack_l4proto *proto);
 void nf_ct_l4proto_pernet_unregister_one(struct net *net,
-struct nf_conntrack_l4proto *proto);
+   const struct nf_conntrack_l4proto *proto);
 int nf_ct_l4proto_pernet_register(struct net *net,
- struct nf_conntrack_l4proto *proto[],
+ struct nf_conntrack_l4proto *const proto[],
  unsigned int num_proto);
 void nf_ct_l4proto_pernet_unregister(struct net *net,
-struct nf_conntrack_l4proto *proto[],
-unsigned int num_proto);
+   struct nf_conntrack_l4proto *const proto[],
+   unsigned int num_proto);
 
 /* Protocol global registration. */
 int nf_ct_l4proto_register_one(struct nf_conntrack_l4proto *proto);
-void nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *proto);
+void nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *proto);
 int nf_ct_l4proto_register(struct nf_conntrack_l4proto *proto[],
   unsigned int num_proto);
 void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *proto[],
diff --git a/include/net/netfilter/nf_conntrack_timeout.h 
b/include/net/netfilter/nf_conntrack_timeout.h
index d40b89355fdd..b222957062b5 100644
--- a/include/net/netfilter/nf_conntrack_timeout.h
+++ b/include/net/netfilter/nf_conntrack_timeout.h
@@ -68,7 +68,7 @@ struct nf_conn_timeout *nf_ct_timeout_ext_add(struct nf_conn 
*ct,
 
 static inline unsigned int *
 nf_ct_timeout_lookup(struct net *net, struct nf_conn *ct,
-struct nf_conntrack_l4proto *l4proto)
+const struct nf_conntrack_l4proto *l4proto)
 {
 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
struct nf_conn_timeout *timeout_ext;
diff --git a/net/netfilter/nf_conntrack_core.c 
b/net/netfilter/nf_conntrack_core.c
index 2bc499186186..f2f00eaf217d 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1176,8 +1176,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_free);
 static noinline struct nf_conntrack_tuple_hash *
 init_conntrack(struct net *net, struct nf_conn *tmpl,
   const struct nf_conntrack_tuple *tuple,
-  struct nf_conntrack_l3proto *l3proto,
-  struct nf_conntrack_l4proto *l4proto,
+  const struct nf_conntrack_l3proto *l3proto,
+  const struct nf_conntrack_l4proto *l4proto,
   struct sk_buff *skb,
   unsigned int dataoff, u32 hash)
 {
@@ -1288,8 +1288,8 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
  unsigned int dataoff,
  u_int16_t l3num,
  u_int8_t protonum,
- struct nf_conntrack_l3proto *l3proto,
- struct nf_conntrack_l4proto *l4proto)
+ const struct nf_conntrack_l3proto *l3proto,
+ const struct nf_conntrack_l4proto *l4proto)
 {
const struct nf_conntrack_zone *zone;
struct nf_conntrack_tuple tuple;
diff --git 

[PATCH 23/47] netfilter: constify nf_loginfo structures

2017-09-03 Thread Pablo Neira Ayuso
From: Julia Lawall 

The nf_loginfo structures are only passed as the seventh argument to
nf_log_trace, which is declared as const or stored in a local const
variable.  Thus the nf_loginfo structures themselves can be const.

Done with the help of Coccinelle.

// 
@r disable optional_qualifier@
identifier i;
position p;
@@
static struct nf_loginfo i@p = { ... };

@ok1@
identifier r.i;
expression list[6] es;
position p;
@@
 nf_log_trace(es,@p,...)

@ok2@
identifier r.i;
const struct nf_loginfo *e;
position p;
@@
 e = @p

@bad@
position p != {r.p,ok1.p,ok2.p};
identifier r.i;
struct nf_loginfo e;
@@
e@i@p

@depends on !bad disable optional_qualifier@
identifier r.i;
@@
static
+const
 struct nf_loginfo i = { ... };
// 

Signed-off-by: Julia Lawall 
Signed-off-by: Pablo Neira Ayuso 
---
 net/ipv4/netfilter/ip_tables.c   | 2 +-
 net/ipv4/netfilter/nf_log_arp.c  | 2 +-
 net/ipv4/netfilter/nf_log_ipv4.c | 2 +-
 net/ipv6/netfilter/ip6_tables.c  | 2 +-
 net/ipv6/netfilter/nf_log_ipv6.c | 2 +-
 net/netfilter/nf_tables_core.c   | 2 +-
 net/netfilter/nfnetlink_log.c| 2 +-
 7 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index f47e8dad5e95..2aea896f5708 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -151,7 +151,7 @@ static const char *const comments[] = {
[NF_IP_TRACE_COMMENT_POLICY]= "policy",
 };
 
-static struct nf_loginfo trace_loginfo = {
+static const struct nf_loginfo trace_loginfo = {
.type = NF_LOG_TYPE_LOG,
.u = {
.log = {
diff --git a/net/ipv4/netfilter/nf_log_arp.c b/net/ipv4/netfilter/nf_log_arp.c
index 2f3895ddc275..df5c2a2061a4 100644
--- a/net/ipv4/netfilter/nf_log_arp.c
+++ b/net/ipv4/netfilter/nf_log_arp.c
@@ -25,7 +25,7 @@
 #include 
 #include 
 
-static struct nf_loginfo default_loginfo = {
+static const struct nf_loginfo default_loginfo = {
.type   = NF_LOG_TYPE_LOG,
.u = {
.log = {
diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c
index c83a9963269b..4388de0e5380 100644
--- a/net/ipv4/netfilter/nf_log_ipv4.c
+++ b/net/ipv4/netfilter/nf_log_ipv4.c
@@ -24,7 +24,7 @@
 #include 
 #include 
 
-static struct nf_loginfo default_loginfo = {
+static const struct nf_loginfo default_loginfo = {
.type   = NF_LOG_TYPE_LOG,
.u = {
.log = {
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 1f90644056ac..9f6644958e5e 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -176,7 +176,7 @@ static const char *const comments[] = {
[NF_IP6_TRACE_COMMENT_POLICY]   = "policy",
 };
 
-static struct nf_loginfo trace_loginfo = {
+static const struct nf_loginfo trace_loginfo = {
.type = NF_LOG_TYPE_LOG,
.u = {
.log = {
diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c
index 97c724224da7..b397a8fe88b9 100644
--- a/net/ipv6/netfilter/nf_log_ipv6.c
+++ b/net/ipv6/netfilter/nf_log_ipv6.c
@@ -25,7 +25,7 @@
 #include 
 #include 
 
-static struct nf_loginfo default_loginfo = {
+static const struct nf_loginfo default_loginfo = {
.type   = NF_LOG_TYPE_LOG,
.u = {
.log = {
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index c5bab08b0d73..dfd0bf3810d2 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -29,7 +29,7 @@ static const char *const comments[__NFT_TRACETYPE_MAX] = {
[NFT_TRACETYPE_RULE]= "rule",
 };
 
-static struct nf_loginfo trace_loginfo = {
+static const struct nf_loginfo trace_loginfo = {
.type = NF_LOG_TYPE_LOG,
.u = {
.log = {
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index c684ba95dbb4..cad6498f10b0 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -606,7 +606,7 @@ __build_packet_message(struct nfnl_log_net *log,
return -1;
 }
 
-static struct nf_loginfo default_loginfo = {
+static const struct nf_loginfo default_loginfo = {
.type = NF_LOG_TYPE_ULOG,
.u = {
.ulog = {
-- 
2.1.4


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 30/47] netfilter: rt: add support to fetch path mss

2017-09-03 Thread Pablo Neira Ayuso
From: Florian Westphal 

to be used in combination with tcp option set support to mimic
iptables TCPMSS --clamp-mss-to-pmtu.

v2: Eric Dumazet points out dst must be initialized.

Signed-off-by: Florian Westphal 
Signed-off-by: Pablo Neira Ayuso 
---
 include/uapi/linux/netfilter/nf_tables.h |  2 +
 net/netfilter/nft_rt.c   | 66 
 2 files changed, 68 insertions(+)

diff --git a/include/uapi/linux/netfilter/nf_tables.h 
b/include/uapi/linux/netfilter/nf_tables.h
index 40fd199f7531..b49da72efa68 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -811,11 +811,13 @@ enum nft_meta_keys {
  * @NFT_RT_CLASSID: realm value of packet's route (skb->dst->tclassid)
  * @NFT_RT_NEXTHOP4: routing nexthop for IPv4
  * @NFT_RT_NEXTHOP6: routing nexthop for IPv6
+ * @NFT_RT_TCPMSS: fetch current path tcp mss
  */
 enum nft_rt_keys {
NFT_RT_CLASSID,
NFT_RT_NEXTHOP4,
NFT_RT_NEXTHOP6,
+   NFT_RT_TCPMSS,
 };
 
 /**
diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c
index c7383d8f88d0..e142e65d3176 100644
--- a/net/netfilter/nft_rt.c
+++ b/net/netfilter/nft_rt.c
@@ -23,6 +23,42 @@ struct nft_rt {
enum nft_registers  dreg:8;
 };
 
+static u16 get_tcpmss(const struct nft_pktinfo *pkt, const struct dst_entry 
*skbdst)
+{
+   u32 minlen = sizeof(struct ipv6hdr), mtu = dst_mtu(skbdst);
+   const struct sk_buff *skb = pkt->skb;
+   const struct nf_afinfo *ai;
+   struct flowi fl;
+
+   memset(, 0, sizeof(fl));
+
+   switch (nft_pf(pkt)) {
+   case NFPROTO_IPV4:
+   fl.u.ip4.daddr = ip_hdr(skb)->saddr;
+   minlen = sizeof(struct iphdr);
+   break;
+   case NFPROTO_IPV6:
+   fl.u.ip6.daddr = ipv6_hdr(skb)->saddr;
+   break;
+   }
+
+   ai = nf_get_afinfo(nft_pf(pkt));
+   if (ai) {
+   struct dst_entry *dst = NULL;
+
+   ai->route(nft_net(pkt), , , false);
+   if (dst) {
+   mtu = min(mtu, dst_mtu(dst));
+   dst_release(dst);
+   }
+   }
+
+   if (mtu <= minlen || mtu > 0x)
+   return TCP_MSS_DEFAULT;
+
+   return mtu - minlen;
+}
+
 static void nft_rt_get_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
@@ -57,6 +93,9 @@ static void nft_rt_get_eval(const struct nft_expr *expr,
 _hdr(skb)->daddr),
   sizeof(struct in6_addr));
break;
+   case NFT_RT_TCPMSS:
+   nft_reg_store16(dest, get_tcpmss(pkt, dst));
+   break;
default:
WARN_ON(1);
goto err;
@@ -94,6 +133,9 @@ static int nft_rt_get_init(const struct nft_ctx *ctx,
case NFT_RT_NEXTHOP6:
len = sizeof(struct in6_addr);
break;
+   case NFT_RT_TCPMSS:
+   len = sizeof(u16);
+   break;
default:
return -EOPNOTSUPP;
}
@@ -118,6 +160,29 @@ static int nft_rt_get_dump(struct sk_buff *skb,
return -1;
 }
 
+static int nft_rt_validate(const struct nft_ctx *ctx, const struct nft_expr 
*expr,
+  const struct nft_data **data)
+{
+   const struct nft_rt *priv = nft_expr_priv(expr);
+   unsigned int hooks;
+
+   switch (priv->key) {
+   case NFT_RT_NEXTHOP4:
+   case NFT_RT_NEXTHOP6:
+   case NFT_RT_CLASSID:
+   return 0;
+   case NFT_RT_TCPMSS:
+   hooks = (1 << NF_INET_FORWARD) |
+   (1 << NF_INET_LOCAL_OUT) |
+   (1 << NF_INET_POST_ROUTING);
+   break;
+   default:
+   return -EINVAL;
+   }
+
+   return nft_chain_validate_hooks(ctx->chain, hooks);
+}
+
 static struct nft_expr_type nft_rt_type;
 static const struct nft_expr_ops nft_rt_get_ops = {
.type   = _rt_type,
@@ -125,6 +190,7 @@ static const struct nft_expr_ops nft_rt_get_ops = {
.eval   = nft_rt_get_eval,
.init   = nft_rt_get_init,
.dump   = nft_rt_get_dump,
+   .validate   = nft_rt_validate,
 };
 
 static struct nft_expr_type nft_rt_type __read_mostly = {
-- 
2.1.4


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 38/47] netfilter: conntrack: make protocol tracker pointers const

2017-09-03 Thread Pablo Neira Ayuso
From: Florian Westphal 

Doesn't change generated code, but will make it easier to eventually
make the actual trackers themselvers const.

Signed-off-by: Florian Westphal 
Signed-off-by: Pablo Neira Ayuso 
---
 include/net/netfilter/nf_conntrack_l3proto.h |  6 +++---
 include/net/netfilter/nf_conntrack_l4proto.h |  4 ++--
 include/net/netfilter/nf_conntrack_timeout.h |  2 +-
 net/netfilter/nf_conntrack_core.c| 12 ++--
 net/netfilter/nf_conntrack_netlink.c | 22 +++---
 net/netfilter/nf_conntrack_proto.c   | 20 ++--
 net/netfilter/nfnetlink_cttimeout.c  | 14 +++---
 net/netfilter/xt_CT.c|  2 +-
 net/openvswitch/conntrack.c  |  4 ++--
 9 files changed, 43 insertions(+), 43 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack_l3proto.h 
b/include/net/netfilter/nf_conntrack_l3proto.h
index dabb53b0913c..6269deecbee7 100644
--- a/include/net/netfilter/nf_conntrack_l3proto.h
+++ b/include/net/netfilter/nf_conntrack_l3proto.h
@@ -64,10 +64,10 @@ struct nf_conntrack_l3proto {
 extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[NFPROTO_NUMPROTO];
 
 /* Protocol global registration. */
-int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto);
-void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto);
+int nf_ct_l3proto_register(const struct nf_conntrack_l3proto *proto);
+void nf_ct_l3proto_unregister(const struct nf_conntrack_l3proto *proto);
 
-struct nf_conntrack_l3proto *nf_ct_l3proto_find_get(u_int16_t l3proto);
+const struct nf_conntrack_l3proto *nf_ct_l3proto_find_get(u_int16_t l3proto);
 
 /* Existing built-in protocols */
 extern struct nf_conntrack_l3proto nf_conntrack_l3proto_generic;
diff --git a/include/net/netfilter/nf_conntrack_l4proto.h 
b/include/net/netfilter/nf_conntrack_l4proto.h
index 4976ef92dc78..d4933d56809d 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -114,10 +114,10 @@ extern struct nf_conntrack_l4proto 
nf_conntrack_l4proto_generic;
 
 #define MAX_NF_CT_PROTO 256
 
-struct nf_conntrack_l4proto *__nf_ct_l4proto_find(u_int16_t l3proto,
+const struct nf_conntrack_l4proto *__nf_ct_l4proto_find(u_int16_t l3proto,
  u_int8_t l4proto);
 
-struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u_int16_t l3proto,
+const struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u_int16_t l3proto,
u_int8_t l4proto);
 void nf_ct_l4proto_put(const struct nf_conntrack_l4proto *p);
 
diff --git a/include/net/netfilter/nf_conntrack_timeout.h 
b/include/net/netfilter/nf_conntrack_timeout.h
index b222957062b5..483d104fa254 100644
--- a/include/net/netfilter/nf_conntrack_timeout.h
+++ b/include/net/netfilter/nf_conntrack_timeout.h
@@ -16,7 +16,7 @@ struct ctnl_timeout {
refcount_t  refcnt;
charname[CTNL_TIMEOUT_NAME_MAX];
__u16   l3num;
-   struct nf_conntrack_l4proto *l4proto;
+   const struct nf_conntrack_l4proto *l4proto;
chardata[0];
 };
 
diff --git a/net/netfilter/nf_conntrack_core.c 
b/net/netfilter/nf_conntrack_core.c
index f2f00eaf217d..c23df7c9cd59 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -250,8 +250,8 @@ bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned 
int nhoff,
   u_int16_t l3num,
   struct net *net, struct nf_conntrack_tuple *tuple)
 {
-   struct nf_conntrack_l3proto *l3proto;
-   struct nf_conntrack_l4proto *l4proto;
+   const struct nf_conntrack_l3proto *l3proto;
+   const struct nf_conntrack_l4proto *l4proto;
unsigned int protoff;
u_int8_t protonum;
int ret;
@@ -400,7 +400,7 @@ static void
 destroy_conntrack(struct nf_conntrack *nfct)
 {
struct nf_conn *ct = (struct nf_conn *)nfct;
-   struct nf_conntrack_l4proto *l4proto;
+   const struct nf_conntrack_l4proto *l4proto;
 
pr_debug("destroy_conntrack(%p)\n", ct);
NF_CT_ASSERT(atomic_read(>use) == 0);
@@ -694,7 +694,7 @@ static int nf_ct_resolve_clash(struct net *net, struct 
sk_buff *skb,
 {
/* This is the conntrack entry already in hashes that won race. */
struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
-   struct nf_conntrack_l4proto *l4proto;
+   const struct nf_conntrack_l4proto *l4proto;
 
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
if (l4proto->allow_clash &&
@@ -1344,10 +1344,10 @@ unsigned int
 nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
struct sk_buff *skb)
 {
+   const struct nf_conntrack_l3proto *l3proto;
+   const struct nf_conntrack_l4proto *l4proto;
struct nf_conn *ct, 

[PATCH 45/47] netfilter: Remove NFDEBUG()

2017-09-03 Thread Pablo Neira Ayuso
From: Varsha Rao 

Remove NFDEBUG and use pr_debug() instead of it.

Signed-off-by: Varsha Rao 
Signed-off-by: Pablo Neira Ayuso 
---
 net/netfilter/nf_internals.h | 6 --
 net/netfilter/nf_sockopt.c   | 2 +-
 2 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
index bacd6363946e..49f87ec093a3 100644
--- a/net/netfilter/nf_internals.h
+++ b/net/netfilter/nf_internals.h
@@ -5,12 +5,6 @@
 #include 
 #include 
 
-#ifdef CONFIG_NETFILTER_DEBUG
-#define NFDEBUG(format, args...)  printk(KERN_DEBUG format , ## args)
-#else
-#define NFDEBUG(format, args...)
-#endif
-
 /* nf_queue.c */
 int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
 const struct nf_hook_entries *entries, unsigned int index,
diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c
index c68c1e58b362..d2a9e6b5d01f 100644
--- a/net/netfilter/nf_sockopt.c
+++ b/net/netfilter/nf_sockopt.c
@@ -33,7 +33,7 @@ int nf_register_sockopt(struct nf_sockopt_ops *reg)
reg->set_optmin, reg->set_optmax)
|| overlap(ops->get_optmin, ops->get_optmax,
   reg->get_optmin, reg->get_optmax))) {
-   NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n",
+   pr_debug("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n",
ops->set_optmin, ops->set_optmax,
ops->get_optmin, ops->get_optmax,
reg->set_optmin, reg->set_optmax,
-- 
2.1.4


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 39/47] netfilter: ebtables: fix indent on if statements

2017-09-03 Thread Pablo Neira Ayuso
From: Colin Ian King 

The returns on some if statements are not indented correctly,
add in the missing tab.

Signed-off-by: Colin Ian King 
Signed-off-by: Pablo Neira Ayuso 
---
 net/bridge/netfilter/ebt_ip.c  | 4 ++--
 net/bridge/netfilter/ebt_ip6.c | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c
index d06968bdf5ec..2b46c50abce0 100644
--- a/net/bridge/netfilter/ebt_ip.c
+++ b/net/bridge/netfilter/ebt_ip.c
@@ -64,14 +64,14 @@ ebt_ip_mt(const struct sk_buff *skb, struct xt_action_param 
*par)
if (NF_INVF(info, EBT_IP_DPORT,
dst < info->dport[0] ||
dst > info->dport[1]))
-   return false;
+   return false;
}
if (info->bitmask & EBT_IP_SPORT) {
u32 src = ntohs(pptr->src);
if (NF_INVF(info, EBT_IP_SPORT,
src < info->sport[0] ||
src > info->sport[1]))
-   return false;
+   return false;
}
}
return true;
diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c
index 4617491be41e..2a5a52a53ec4 100644
--- a/net/bridge/netfilter/ebt_ip6.c
+++ b/net/bridge/netfilter/ebt_ip6.c
@@ -89,7 +89,7 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param 
*par)
if (NF_INVF(info, EBT_IP6_SPORT,
src < info->sport[0] ||
src > info->sport[1]))
-   return false;
+   return false;
}
if ((info->bitmask & EBT_IP6_ICMP6) &&
NF_INVF(info, EBT_IP6_ICMP6,
-- 
2.1.4


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 36/47] netfilter: conntrack: place print_tuple in procfs part

2017-09-03 Thread Pablo Neira Ayuso
From: Florian Westphal 

CONFIG_NF_CONNTRACK_PROCFS is deprecated, no need to use a function
pointer in the trackers for this. Place the printf formatting in
the one place that uses it.

Signed-off-by: Florian Westphal 
Signed-off-by: Pablo Neira Ayuso 
---
 include/net/netfilter/nf_conntrack_l3proto.h   |  4 --
 include/net/netfilter/nf_conntrack_l4proto.h   |  4 --
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c |  8 
 net/ipv4/netfilter/nf_conntrack_proto_icmp.c   | 11 -
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c |  8 
 net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 11 -
 net/netfilter/nf_conntrack_l3proto_generic.c   |  6 ---
 net/netfilter/nf_conntrack_proto_dccp.c| 10 -
 net/netfilter/nf_conntrack_proto_generic.c |  7 
 net/netfilter/nf_conntrack_proto_gre.c | 10 -
 net/netfilter/nf_conntrack_proto_sctp.c| 11 -
 net/netfilter/nf_conntrack_proto_tcp.c | 11 -
 net/netfilter/nf_conntrack_proto_udp.c | 13 --
 net/netfilter/nf_conntrack_standalone.c| 58 +-
 14 files changed, 56 insertions(+), 116 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack_l3proto.h 
b/include/net/netfilter/nf_conntrack_l3proto.h
index e31861e4fa6a..dabb53b0913c 100644
--- a/include/net/netfilter/nf_conntrack_l3proto.h
+++ b/include/net/netfilter/nf_conntrack_l3proto.h
@@ -37,10 +37,6 @@ struct nf_conntrack_l3proto {
bool (*invert_tuple)(struct nf_conntrack_tuple *inverse,
 const struct nf_conntrack_tuple *orig);
 
-   /* Print out the per-protocol part of the tuple. */
-   void (*print_tuple)(struct seq_file *s,
-   const struct nf_conntrack_tuple *);
-
/*
 * Called before tracking. 
 *  *dataoff: offset of protocol header (TCP, UDP,...) in skb
diff --git a/include/net/netfilter/nf_conntrack_l4proto.h 
b/include/net/netfilter/nf_conntrack_l4proto.h
index 15c58dd3f701..7e8da04a5eb6 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -61,10 +61,6 @@ struct nf_conntrack_l4proto {
/* called by gc worker if table is full */
bool (*can_early_drop)(const struct nf_conn *ct);
 
-   /* Print out the per-protocol part of the tuple. Return like seq_* */
-   void (*print_tuple)(struct seq_file *s,
-   const struct nf_conntrack_tuple *);
-
/* Print out the private part of the conntrack. */
void (*print_conntrack)(struct seq_file *s, struct nf_conn *);
 
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c 
b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 9f7ea862068c..fe374da4bc13 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -63,13 +63,6 @@ static bool ipv4_invert_tuple(struct nf_conntrack_tuple 
*tuple,
return true;
 }
 
-static void ipv4_print_tuple(struct seq_file *s,
-   const struct nf_conntrack_tuple *tuple)
-{
-   seq_printf(s, "src=%pI4 dst=%pI4 ",
-  >src.u3.ip, >dst.u3.ip);
-}
-
 static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
unsigned int *dataoff, u_int8_t *protonum)
 {
@@ -355,7 +348,6 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 
__read_mostly = {
.l3proto = PF_INET,
.pkt_to_tuple= ipv4_pkt_to_tuple,
.invert_tuple= ipv4_invert_tuple,
-   .print_tuple = ipv4_print_tuple,
.get_l4proto = ipv4_get_l4proto,
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.tuple_to_nlattr = ipv4_tuple_to_nlattr,
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c 
b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index fdbeb03e4600..434b4e20f6db 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -71,16 +71,6 @@ static bool icmp_invert_tuple(struct nf_conntrack_tuple 
*tuple,
return true;
 }
 
-/* Print out the per-protocol part of the tuple. */
-static void icmp_print_tuple(struct seq_file *s,
-   const struct nf_conntrack_tuple *tuple)
-{
-   seq_printf(s, "type=%u code=%u id=%u ",
-  tuple->dst.u.icmp.type,
-  tuple->dst.u.icmp.code,
-  ntohs(tuple->src.u.icmp.id));
-}
-
 static unsigned int *icmp_get_timeouts(struct net *net)
 {
return _pernet(net)->timeout;
@@ -364,7 +354,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp 
__read_mostly =
.l4proto= IPPROTO_ICMP,
.pkt_to_tuple   = icmp_pkt_to_tuple,
.invert_tuple   = icmp_invert_tuple,
-   .print_tuple= icmp_print_tuple,
.packet = icmp_packet,
.get_timeouts   = 

[PATCH 41/47] netfilter: convert hook list to an array

2017-09-03 Thread Pablo Neira Ayuso
From: Aaron Conole 

This converts the storage and layout of netfilter hook entries from a
linked list to an array.  After this commit, hook entries will be
stored adjacent in memory.  The next pointer is no longer required.

The ops pointers are stored at the end of the array as they are only
used in the register/unregister path and in the legacy br_netfilter code.

nf_unregister_net_hooks() is slower than needed as it just calls
nf_unregister_net_hook in a loop (i.e. at least n synchronize_net()
calls), this will be addressed in followup patch.

Test setup:
 - ixgbe 10gbit
 - netperf UDP_STREAM, 64 byte packets
 - 5 hooks: (raw + mangle prerouting, mangle+filter input, inet filter):
empty mangle and raw prerouting, mangle and filter input hooks:
353.9
this patch:
364.2

Signed-off-by: Aaron Conole 
Signed-off-by: Florian Westphal 
Signed-off-by: Pablo Neira Ayuso 
---
 include/linux/netdevice.h |   2 +-
 include/linux/netfilter.h |  45 +++---
 include/linux/netfilter_ingress.h |   4 +-
 include/net/netfilter/nf_queue.h  |   2 +-
 include/net/netns/netfilter.h |   2 +-
 net/bridge/br_netfilter_hooks.c   |  19 ++-
 net/netfilter/core.c  | 297 --
 net/netfilter/nf_internals.h  |   3 +-
 net/netfilter/nf_queue.c  |  67 +
 9 files changed, 307 insertions(+), 134 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 614642eb7eb7..ca0a30127300 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1811,7 +1811,7 @@ struct net_device {
 #endif
struct netdev_queue __rcu *ingress_queue;
 #ifdef CONFIG_NETFILTER_INGRESS
-   struct nf_hook_entry __rcu *nf_hooks_ingress;
+   struct nf_hook_entries __rcu *nf_hooks_ingress;
 #endif
 
unsigned char   broadcast[MAX_ADDR_LEN];
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 22f081065d49..f84bca1703cd 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -72,25 +72,32 @@ struct nf_hook_ops {
 };
 
 struct nf_hook_entry {
-   struct nf_hook_entry __rcu  *next;
nf_hookfn   *hook;
void*priv;
-   const struct nf_hook_ops*orig_ops;
 };
 
-static inline void
-nf_hook_entry_init(struct nf_hook_entry *entry,const struct 
nf_hook_ops *ops)
-{
-   entry->next = NULL;
-   entry->hook = ops->hook;
-   entry->priv = ops->priv;
-   entry->orig_ops = ops;
-}
+struct nf_hook_entries {
+   u16 num_hook_entries;
+   /* padding */
+   struct nf_hook_entryhooks[];
+
+   /* trailer: pointers to original orig_ops of each hook.
+*
+* This is not part of struct nf_hook_entry since its only
+* needed in slow path (hook register/unregister).
+*
+* const struct nf_hook_ops *orig_ops[]
+*/
+};
 
-static inline int
-nf_hook_entry_priority(const struct nf_hook_entry *entry)
+static inline struct nf_hook_ops **nf_hook_entries_get_hook_ops(const struct 
nf_hook_entries *e)
 {
-   return entry->orig_ops->priority;
+   unsigned int n = e->num_hook_entries;
+   const void *hook_end;
+
+   hook_end = >hooks[n]; /* this is *past* ->hooks[]! */
+
+   return (struct nf_hook_ops **)hook_end;
 }
 
 static inline int
@@ -100,12 +107,6 @@ nf_hook_entry_hookfn(const struct nf_hook_entry *entry, 
struct sk_buff *skb,
return entry->hook(entry->priv, skb, state);
 }
 
-static inline const struct nf_hook_ops *
-nf_hook_entry_ops(const struct nf_hook_entry *entry)
-{
-   return entry->orig_ops;
-}
-
 static inline void nf_hook_state_init(struct nf_hook_state *p,
  unsigned int hook,
  u_int8_t pf,
@@ -168,7 +169,7 @@ extern struct static_key 
nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
 #endif
 
 int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
-struct nf_hook_entry *entry);
+const struct nf_hook_entries *e, unsigned int i);
 
 /**
  * nf_hook - call a netfilter hook
@@ -182,7 +183,7 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, 
struct net *net,
  struct net_device *indev, struct net_device *outdev,
  int (*okfn)(struct net *, struct sock *, struct 
sk_buff *))
 {
-   struct nf_hook_entry *hook_head;
+   struct nf_hook_entries *hook_head;
int ret = 1;
 
 #ifdef HAVE_JUMP_LABEL
@@ -200,7 +201,7 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, 
struct net *net,
nf_hook_state_init(, hook, pf, indev, outdev,
   sk, net, okfn);
 
-   ret = nf_hook_slow(skb, , hook_head);
+   ret = nf_hook_slow(skb, , 

[PATCH 43/47] netfilter: core: batch nf_unregister_net_hooks synchronize_net calls

2017-09-03 Thread Pablo Neira Ayuso
From: Florian Westphal 

re-add batching in nf_unregister_net_hooks().

Similar as before, just store an array with to-be-free'd rule arrays
on stack, then call synchronize_net once per batch.

Signed-off-by: Florian Westphal 
Signed-off-by: Pablo Neira Ayuso 
---
 net/netfilter/core.c | 59 +---
 1 file changed, 56 insertions(+), 3 deletions(-)

diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 164ad20d0bd2..04fe25abc5f6 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -395,10 +395,63 @@ EXPORT_SYMBOL(nf_register_net_hooks);
 void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg,
 unsigned int hookcount)
 {
-   unsigned int i;
+   struct nf_hook_entries *to_free[16], *p;
+   struct nf_hook_entries __rcu **pp;
+   unsigned int i, j, n;
+
+   mutex_lock(_hook_mutex);
+   for (i = 0; i < hookcount; i++) {
+   pp = nf_hook_entry_head(net, [i]);
+   if (!pp)
+   continue;
+
+   p = nf_entry_dereference(*pp);
+   if (WARN_ON_ONCE(!p))
+   continue;
+   __nf_unregister_net_hook(p, [i]);
+   }
+   mutex_unlock(_hook_mutex);
+
+   do {
+   n = min_t(unsigned int, hookcount, ARRAY_SIZE(to_free));
+
+   mutex_lock(_hook_mutex);
+
+   for (i = 0, j = 0; i < hookcount && j < n; i++) {
+   pp = nf_hook_entry_head(net, [i]);
+   if (!pp)
+   continue;
+
+   p = nf_entry_dereference(*pp);
+   if (!p)
+   continue;
+
+   to_free[j] = __nf_hook_entries_try_shrink(pp);
+   if (to_free[j])
+   ++j;
+   }
+
+   mutex_unlock(_hook_mutex);
+
+   if (j) {
+   unsigned int nfq;
+
+   synchronize_net();
+
+   /* need 2nd synchronize_net() if nfqueue is used, skb
+* can get reinjected right before nf_queue_hook_drop()
+*/
+   nfq = nf_queue_nf_hook_drop(net);
+   if (nfq)
+   synchronize_net();
+
+   for (i = 0; i < j; i++)
+   kvfree(to_free[i]);
+   }
 
-   for (i = 0; i < hookcount; i++)
-   nf_unregister_net_hook(net, [i]);
+   reg += n;
+   hookcount -= n;
+   } while (hookcount > 0);
 }
 EXPORT_SYMBOL(nf_unregister_net_hooks);
 
-- 
2.1.4


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 47/47] netfilter: rt: account for tcp header size too

2017-09-03 Thread Pablo Neira Ayuso
From: Florian Westphal 

This needs to accout for the ipv4/ipv6 header size and the tcp
header without options.

Fixes: 6b5dc98e8fac0 ("netfilter: rt: add support to fetch path mss")
Reported-by: Matteo Croce 
Signed-off-by: Florian Westphal 
Signed-off-by: Pablo Neira Ayuso 
---
 net/netfilter/nft_rt.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c
index 61fd3acaa3c9..a6b7d05aeacf 100644
--- a/net/netfilter/nft_rt.c
+++ b/net/netfilter/nft_rt.c
@@ -35,10 +35,11 @@ static u16 get_tcpmss(const struct nft_pktinfo *pkt, const 
struct dst_entry *skb
switch (nft_pf(pkt)) {
case NFPROTO_IPV4:
fl.u.ip4.daddr = ip_hdr(skb)->saddr;
-   minlen = sizeof(struct iphdr);
+   minlen = sizeof(struct iphdr) + sizeof(struct tcphdr);
break;
case NFPROTO_IPV6:
fl.u.ip6.daddr = ipv6_hdr(skb)->saddr;
+   minlen = sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
break;
}
 
-- 
2.1.4


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 46/47] netfilter: conntrack: remove unused code in nf_conntrack_proto_generic.c

2017-09-03 Thread Pablo Neira Ayuso
From: Davide Caratti 

L4 protocol helpers for DCCP, SCTP and UDPlite can't be built as kernel
modules anymore, so we can remove code enclosed in
 #ifdef CONFIG_NF_CT_PROTO_{DCCP,SCTP,UDPLITE}_MODULE

Signed-off-by: Davide Caratti 
Signed-off-by: Pablo Neira Ayuso 
---
 net/netfilter/nf_conntrack_proto_generic.c | 12 
 1 file changed, 12 deletions(-)

diff --git a/net/netfilter/nf_conntrack_proto_generic.c 
b/net/netfilter/nf_conntrack_proto_generic.c
index 2bc3d0c1a5bf..2993995b690d 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -17,22 +17,10 @@ static unsigned int nf_ct_generic_timeout __read_mostly = 
600*HZ;
 static bool nf_generic_should_process(u8 proto)
 {
switch (proto) {
-#ifdef CONFIG_NF_CT_PROTO_SCTP_MODULE
-   case IPPROTO_SCTP:
-   return false;
-#endif
-#ifdef CONFIG_NF_CT_PROTO_DCCP_MODULE
-   case IPPROTO_DCCP:
-   return false;
-#endif
 #ifdef CONFIG_NF_CT_PROTO_GRE_MODULE
case IPPROTO_GRE:
return false;
 #endif
-#ifdef CONFIG_NF_CT_PROTO_UDPLITE_MODULE
-   case IPPROTO_UDPLITE:
-   return false;
-#endif
default:
return true;
}
-- 
2.1.4


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 40/47] netfilter: fix a few (harmless) sparse warnings

2017-09-03 Thread Pablo Neira Ayuso
From: Florian Westphal 

net/netfilter/nft_payload.c:187:18: warning: incorrect type in return 
expression (expected bool got restricted __sum16 [usertype] check)
net/netfilter/nft_exthdr.c:222:14: warning: cast to restricted __be32
net/netfilter/nft_rt.c:49:23: warning: incorrect type in assignment (different 
base types expected unsigned int got restricted __be32)
net/netfilter/nft_rt.c:70:25: warning: symbol 'nft_rt_policy' was not declared. 
Should it be static?

Signed-off-by: Florian Westphal 
Signed-off-by: Pablo Neira Ayuso 
---
 net/netfilter/nft_exthdr.c  | 2 +-
 net/netfilter/nft_payload.c | 2 +-
 net/netfilter/nft_rt.c  | 6 +++---
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index f5a0bf5e3bdd..a0a93d987a3b 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -388,7 +388,7 @@ nft_exthdr_select_ops(const struct nft_ctx *ctx,
if (tb[NFTA_EXTHDR_SREG] && tb[NFTA_EXTHDR_DREG])
return ERR_PTR(-EOPNOTSUPP);
 
-   op = ntohl(nla_get_u32(tb[NFTA_EXTHDR_OP]));
+   op = ntohl(nla_get_be32(tb[NFTA_EXTHDR_OP]));
switch (op) {
case NFT_EXTHDR_OP_TCPOPT:
if (tb[NFTA_EXTHDR_SREG])
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 7d699bbd45b0..e110b0ebbf58 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -184,7 +184,7 @@ static bool nft_payload_udp_checksum(struct sk_buff *skb, 
unsigned int thoff)
if (!uh)
return false;
 
-   return uh->check;
+   return (__force bool)uh->check;
 }
 
 static int nft_payload_l4csum_offset(const struct nft_pktinfo *pkt,
diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c
index e142e65d3176..61fd3acaa3c9 100644
--- a/net/netfilter/nft_rt.c
+++ b/net/netfilter/nft_rt.c
@@ -82,8 +82,8 @@ static void nft_rt_get_eval(const struct nft_expr *expr,
if (nft_pf(pkt) != NFPROTO_IPV4)
goto err;
 
-   *dest = rt_nexthop((const struct rtable *)dst,
-  ip_hdr(skb)->daddr);
+   *dest = (__force u32)rt_nexthop((const struct rtable *)dst,
+   ip_hdr(skb)->daddr);
break;
case NFT_RT_NEXTHOP6:
if (nft_pf(pkt) != NFPROTO_IPV6)
@@ -106,7 +106,7 @@ static void nft_rt_get_eval(const struct nft_expr *expr,
regs->verdict.code = NFT_BREAK;
 }
 
-const struct nla_policy nft_rt_policy[NFTA_RT_MAX + 1] = {
+static const struct nla_policy nft_rt_policy[NFTA_RT_MAX + 1] = {
[NFTA_RT_DREG]  = { .type = NLA_U32 },
[NFTA_RT_KEY]   = { .type = NLA_U32 },
 };
-- 
2.1.4


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 44/47] netfilter: conntrack: don't log "invalid" icmpv6 connections

2017-09-03 Thread Pablo Neira Ayuso
From: Florian Westphal 

When enabling logging for invalid connections we currently also log most
icmpv6 types, which we don't track intentionally (e.g. neigh discovery).
"invalid" should really mean "invalid", i.e. short header or bad checksum.

We don't do any logging for icmp(v4) either, its just useless noise.

Signed-off-by: Florian Westphal 
Signed-off-by: Pablo Neira Ayuso 
---
 net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c 
b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 808f63e2e1ff..43544b975eae 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -121,11 +121,6 @@ static bool icmpv6_new(struct nf_conn *ct, const struct 
sk_buff *skb,
pr_debug("icmpv6: can't create new conn with type %u\n",
 type + 128);
nf_ct_dump_tuple_ipv6(>tuplehash[0].tuple);
-   if (LOG_INVALID(nf_ct_net(ct), IPPROTO_ICMPV6))
-   nf_log_packet(nf_ct_net(ct), PF_INET6, 0, skb, NULL,
- NULL, NULL,
- "nf_ct_icmpv6: invalid new with type %d ",
- type + 128);
return false;
}
return true;
-- 
2.1.4


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 42/47] netfilter: debug: check for sorted array

2017-09-03 Thread Pablo Neira Ayuso
From: Florian Westphal 

Make sure our grow/shrink routine places them in the correct order.

Signed-off-by: Florian Westphal 
Signed-off-by: Pablo Neira Ayuso 
---
 net/netfilter/core.c | 23 +++
 1 file changed, 23 insertions(+)

diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 1a9e23c9ab98..164ad20d0bd2 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -157,6 +157,27 @@ nf_hook_entries_grow(const struct nf_hook_entries *old,
return new;
 }
 
+static void hooks_validate(const struct nf_hook_entries *hooks)
+{
+#ifdef CONFIG_DEBUG_KERNEL
+   struct nf_hook_ops **orig_ops;
+   int prio = INT_MIN;
+   size_t i = 0;
+
+   orig_ops = nf_hook_entries_get_hook_ops(hooks);
+
+   for (i = 0; i < hooks->num_hook_entries; i++) {
+   if (orig_ops[i] == _ops)
+   continue;
+
+   WARN_ON(orig_ops[i]->priority < prio);
+
+   if (orig_ops[i]->priority > prio)
+   prio = orig_ops[i]->priority;
+   }
+#endif
+}
+
 /*
  * __nf_hook_entries_try_shrink - try to shrink hook array
  *
@@ -210,6 +231,7 @@ static void *__nf_hook_entries_try_shrink(struct 
nf_hook_entries __rcu **pp)
new_ops[j] = (void *)orig_ops[i];
j++;
}
+   hooks_validate(new);
 out_assign:
rcu_assign_pointer(*pp, new);
return old;
@@ -261,6 +283,7 @@ int nf_register_net_hook(struct net *net, const struct 
nf_hook_ops *reg)
if (IS_ERR(new_hooks))
return PTR_ERR(new_hooks);
 
+   hooks_validate(new_hooks);
 #ifdef CONFIG_NETFILTER_INGRESS
if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
net_inc_ingress_queue();
-- 
2.1.4


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 00/47] Netfilter updates for net-next

2017-09-03 Thread David Miller
From: Pablo Neira Ayuso 
Date: Mon,  4 Sep 2017 00:25:42 +0200

> The following patchset contains Netfilter updates for your net-next
> tree. Basically, updates to the conntrack core, enhancements for
> nf_tables, conversion of netfilter hooks from linked list to array to
> improve memory locality and asorted improvements for the Netfilter
> codebase. More specifically, they are:
 ...
> I think I will have material for a second Netfilter batch in my queue if
> time allow to make it fit in this merge window.

Ok, but please be swift about it.

> You can pull these changes from:
> 
>   git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next.git

Pulled, thanks.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html