[PATCH nft 3/3] mnl: use either name or handle to refer to objects

2018-10-23 Thread Pablo Neira Ayuso
We can only specify either name or handle to refer to objects.

Signed-off-by: Pablo Neira Ayuso 
---
 src/mnl.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/mnl.c b/src/mnl.c
index d3129fda2b89..2be8ca14e50d 100644
--- a/src/mnl.c
+++ b/src/mnl.c
@@ -660,7 +660,7 @@ int mnl_nft_table_del(struct netlink_ctx *ctx, const struct 
cmd *cmd)
nftnl_table_set_u32(nlt, NFTNL_TABLE_FAMILY, cmd->handle.family);
if (cmd->handle.table.name)
nftnl_table_set(nlt, NFTNL_TABLE_NAME, cmd->handle.table.name);
-   if (cmd->handle.handle.id)
+   else if (cmd->handle.handle.id)
nftnl_table_set_u64(nlt, NFTNL_TABLE_HANDLE,
cmd->handle.handle.id);
 
@@ -830,7 +830,7 @@ int mnl_nft_set_del(struct netlink_ctx *ctx, const struct 
cmd *cmd)
nftnl_set_set_str(nls, NFTNL_SET_TABLE, h->table.name);
if (h->set.name)
nftnl_set_set_str(nls, NFTNL_SET_NAME, h->set.name);
-   if (h->handle.id)
+   else if (h->handle.id)
nftnl_set_set_u64(nls, NFTNL_SET_HANDLE, h->handle.id);
 
nlh = nftnl_nlmsg_build_hdr(nftnl_batch_buffer(ctx->batch),
@@ -1189,8 +1189,9 @@ int mnl_nft_setelem_del(struct netlink_ctx *ctx, const 
struct cmd *cmd)
 
nftnl_set_set_u32(nls, NFTNL_SET_FAMILY, h->family);
nftnl_set_set_str(nls, NFTNL_SET_TABLE, h->table.name);
-   nftnl_set_set_str(nls, NFTNL_SET_NAME, h->set.name);
-   if (h->handle.id)
+   if (h->set.name)
+   nftnl_set_set_str(nls, NFTNL_SET_NAME, h->set.name);
+   else if (h->handle.id)
nftnl_set_set_u64(nls, NFTNL_SET_HANDLE, h->handle.id);
 
if (cmd->expr)
-- 
2.11.0



[PATCH nft 1/3] src: move socket open and reopen to mnl.c

2018-10-23 Thread Pablo Neira Ayuso
These functions are part of the mnl backend, move them there. Remove
netlink_close_sock(), use direct call to mnl_socket_close().

Signed-off-by: Pablo Neira Ayuso 
---
 include/mnl.h |  4 ++--
 include/netlink.h |  1 -
 src/libnftables.c |  4 ++--
 src/mnl.c | 22 ++
 src/netlink.c | 27 ---
 src/rule.c|  2 +-
 6 files changed, 27 insertions(+), 33 deletions(-)

diff --git a/include/mnl.h b/include/mnl.h
index 3ddc82a05cb0..676030e6c4c6 100644
--- a/include/mnl.h
+++ b/include/mnl.h
@@ -6,8 +6,8 @@
 #include 
 #include 
 
-struct mnl_socket *netlink_open_sock(void);
-void netlink_close_sock(struct mnl_socket *nf_sock);
+struct mnl_socket *nft_mnl_socket_open(void);
+struct mnl_socket *nft_mnl_socket_reopen(struct mnl_socket *nf_sock);
 
 uint32_t mnl_seqnum_alloc(uint32_t *seqnum);
 uint16_t mnl_genid_get(struct netlink_ctx *ctx);
diff --git a/include/netlink.h b/include/netlink.h
index 66e400d88f19..af9313d51453 100644
--- a/include/netlink.h
+++ b/include/netlink.h
@@ -157,7 +157,6 @@ extern void netlink_dump_obj(struct nftnl_obj *nlo, struct 
netlink_ctx *ctx);
 
 extern int netlink_batch_send(struct netlink_ctx *ctx, struct list_head 
*err_list);
 
-extern struct mnl_socket *netlink_restart(struct mnl_socket *nf_sock);
 #define netlink_abi_error()\
__netlink_abi_error(__FILE__, __LINE__, strerror(errno));
 extern void __noreturn __netlink_abi_error(const char *file, int line, const 
char *reason);
diff --git a/src/libnftables.c b/src/libnftables.c
index 44869602c875..0731c532a22a 100644
--- a/src/libnftables.c
+++ b/src/libnftables.c
@@ -129,7 +129,7 @@ void nft_ctx_clear_include_paths(struct nft_ctx *ctx)
 
 static void nft_ctx_netlink_init(struct nft_ctx *ctx)
 {
-   ctx->nf_sock = netlink_open_sock();
+   ctx->nf_sock = nft_mnl_socket_open();
 }
 
 struct nft_ctx *nft_ctx_new(uint32_t flags)
@@ -266,7 +266,7 @@ const char *nft_ctx_get_error_buffer(struct nft_ctx *ctx)
 void nft_ctx_free(struct nft_ctx *ctx)
 {
if (ctx->nf_sock)
-   netlink_close_sock(ctx->nf_sock);
+   mnl_socket_close(ctx->nf_sock);
 
exit_cookie(>output.output_cookie);
exit_cookie(>output.error_cookie);
diff --git a/src/mnl.c b/src/mnl.c
index 9a6248aa0ad9..84727094e27e 100644
--- a/src/mnl.c
+++ b/src/mnl.c
@@ -28,10 +28,32 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
 
+struct mnl_socket *nft_mnl_socket_open(void)
+{
+   struct mnl_socket *nf_sock;
+
+   nf_sock = mnl_socket_open(NETLINK_NETFILTER);
+   if (!nf_sock)
+   netlink_init_error();
+
+   if (fcntl(mnl_socket_get_fd(nf_sock), F_SETFL, O_NONBLOCK))
+   netlink_init_error();
+
+   return nf_sock;
+}
+
+struct mnl_socket *nft_mnl_socket_reopen(struct mnl_socket *nf_sock)
+{
+   mnl_socket_close(nf_sock);
+
+   return nft_mnl_socket_open();
+}
+
 uint32_t mnl_seqnum_alloc(unsigned int *seqnum)
 {
return (*seqnum)++;
diff --git a/src/netlink.c b/src/netlink.c
index 403780ffdefb..8eb2ccad2f8c 100644
--- a/src/netlink.c
+++ b/src/netlink.c
@@ -10,7 +10,6 @@
  */
 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -53,32 +52,6 @@ const struct location netlink_location = {
.indesc = _netlink,
 };
 
-struct mnl_socket *netlink_open_sock(void)
-{
-   struct mnl_socket *nf_sock;
-
-   nf_sock = mnl_socket_open(NETLINK_NETFILTER);
-   if (nf_sock == NULL)
-   netlink_init_error();
-
-   if (fcntl(mnl_socket_get_fd(nf_sock), F_SETFL, O_NONBLOCK))
-   netlink_init_error();
-
-   return nf_sock;
-}
-
-void netlink_close_sock(struct mnl_socket *nf_sock)
-{
-   if (nf_sock)
-   mnl_socket_close(nf_sock);
-}
-
-struct mnl_socket *netlink_restart(struct mnl_socket *nf_sock)
-{
-   netlink_close_sock(nf_sock);
-   return netlink_open_sock();
-}
-
 void __noreturn __netlink_abi_error(const char *file, int line,
const char *reason)
 {
diff --git a/src/rule.c b/src/rule.c
index 12ac1310034d..9087fd2bd193 100644
--- a/src/rule.c
+++ b/src/rule.c
@@ -243,7 +243,7 @@ replay:
if (ret < 0) {
cache_release(cache);
if (errno == EINTR) {
-   nft->nf_sock = netlink_restart(nft->nf_sock);
+   nft->nf_sock = nft_mnl_socket_reopen(nft->nf_sock);
goto replay;
}
return -1;
-- 
2.11.0



Re: [PATCH nft v3] src: osf: add ttl option support

2018-10-23 Thread Pablo Neira Ayuso
On Tue, Oct 23, 2018 at 05:06:22PM +0200, Fernando Fernandez Mancera wrote:
> Add support for ttl option in "osf" expression. Example:
> 
> table ip foo {
>   chain bar {
>   type filter hook input priority filter; policy accept;
>   osf ttl skip name "Linux"
>   }
> }

Applied, thanks Fernando.


Re: [iptables PATCH] xtables: Fix for spurious errors from iptables-translate

2018-10-23 Thread Pablo Neira Ayuso
On Tue, Oct 23, 2018 at 04:59:14PM +0200, Phil Sutter wrote:
> When aligning iptables-nft error messages with legacy ones, I missed
> that translate tools shouldn't check for missing or duplicated chains.
> 
> Introduce a boolean in struct nft_xt_cmd_parse indicating we're "just"
> translating and do_parse() should skip the checks.

Applied, thanks for restoring iptables-xlate.


Re: [nft PATCH] tests: shell: Extend get element test

2018-10-23 Thread Pablo Neira Ayuso
On Tue, Oct 23, 2018 at 12:33:28PM +0200, Phil Sutter wrote:
> On Tue, Oct 23, 2018 at 11:28:28AM +0200, Pablo Neira Ayuso wrote:
[...]
> > Using current nftables git HEAD plus kernel patch, I'm getting:
> > 
> > # nft get element ip t s '{ 25, 28 }'
> > table ip t {
> > set s {
> > type inet_service
> > flags interval
> > elements = { 20-30, 20-30 }
> > }
> > }
> 
> Oh, sorry. I did above test using sources which eliminate duplicates
> from the return set (something I was playing with).

No problem :-)

> > > Here, although I ask for two elements, a single range is returned. So I
> > > guess asking for two ranges belonging to the same range should return a
> > > single range as well. Maybe a "simple" fix would be to drop duplicates
> > > from the return set?
> > > 
> > > Anyway, from my point of view your solution is fine as well: If a
> > > humanoid parser evaluates the results, it can easily spot duplicates and
> > > interpret them right. If 'get element' command is used by a script to
> > > check for existence of given ranges, it all boils down to a boolean
> > > found or not found result, so duplicates shouldn't matter that much.
> > 
> > My idea was to provide list including exactly the same number of
> > elements that have been requested, and in strict order, so you can
> > quickly check what you request belongs to somewhere, eg.
> > 
> > # nft get element ip t s '{ 50, 20, 10 }'
> > table ip t {
> > set s {
> > type inet_service
> > flags interval
> > elements = { 50-60, 20-30, 10 }
> > }
> > }
> > 
> 
> Yes, that makes sense. Thanks for explaining your approach to how 'get
> element' command should work!

I should find time to document this in the manpage :-\

> > Not yet implemented, but we could add something like:
> > 
> > # nft get element ip t s '{ 50, 20, 10 }'
> > table ip t {
> > set s {
> > type inet_service
> > flags interval
> > elements = { 50 in 50-60, 20 in 20-30, 10 }
> > }
> > }
> > 
> > So there's a clear mapping between what we request and what we get.
> 
> This would allow to return partial data, i.e. if one requested element
> doesn't exist in the set to still show the remaining ones. But current
> behaviour is absolutely fine from my point of view.

The kernel side is not allowing this, it hits ENOENT in case we find
an element which does not exist. It should be possible to revisit this
and batch several inquiries in one go.

> > Still, at this stage, the existing behaviour allows humans - for a
> > small number of data - and robots, to do mappings between what they
> > request and what they find.
> 
> Yes, indeed.

Will leave this for future work, just for the record.

Thanks.


[PATCH nft v3] src: osf: add ttl option support

2018-10-23 Thread Fernando Fernandez Mancera
Add support for ttl option in "osf" expression. Example:

table ip foo {
chain bar {
type filter hook input priority filter; policy accept;
osf ttl skip name "Linux"
}
}

Signed-off-by: Fernando Fernandez Mancera 
---
v1:initial patch
v2:use "ttl-global, ttl-nocheck.." instead of "1, 2.."
v3:better names for ttl options, add json and tests/py support
All is working properly.
---
 include/expression.h|  4 ++
 include/linux/netfilter/nf_tables.h |  2 +
 include/osf.h   |  2 +-
 src/json.c  |  2 +-
 src/netlink_delinearize.c   |  5 ++-
 src/netlink_linearize.c |  1 +
 src/osf.c   | 26 ++-
 src/parser_bison.y  | 19 +++-
 src/parser_json.c   |  5 ++-
 tests/py/inet/osf.t |  3 ++
 tests/py/inet/osf.t.json|  3 ++
 tests/py/inet/osf.t.payload | 68 -
 12 files changed, 129 insertions(+), 11 deletions(-)

diff --git a/include/expression.h b/include/expression.h
index d6977c3..f018c95 100644
--- a/include/expression.h
+++ b/include/expression.h
@@ -345,6 +345,10 @@ struct expr {
uint8_t direction;
uint8_t spnum;
} xfrm;
+   struct {
+   /* EXPR_OSF */
+   uint8_t ttl;
+   } osf;
};
 };
 
diff --git a/include/linux/netfilter/nf_tables.h 
b/include/linux/netfilter/nf_tables.h
index 4e28598..1d13ad3 100644
--- a/include/linux/netfilter/nf_tables.h
+++ b/include/linux/netfilter/nf_tables.h
@@ -939,10 +939,12 @@ enum nft_socket_keys {
  * enum nft_osf_attributes - nf_tables osf expression netlink attributes
  *
  * @NFTA_OSF_DREG: destination register (NLA_U32: nft_registers)
+ * @NFTA_OSF_TTL: Value of the TTL osf option (NLA_U8)
  */
 enum nft_osf_attributes {
NFTA_OSF_UNSPEC,
NFTA_OSF_DREG,
+   NFTA_OSF_TTL,
__NFTA_OSF_MAX
 };
 #define NFT_OSF_MAX(__NFTA_OSF_MAX - 1)
diff --git a/include/osf.h b/include/osf.h
index 54cdd4a..23ea34d 100644
--- a/include/osf.h
+++ b/include/osf.h
@@ -1,7 +1,7 @@
 #ifndef NFTABLES_OSF_H
 #define NFTABLES_OSF_H
 
-struct expr *osf_expr_alloc(const struct location *loc);
+struct expr *osf_expr_alloc(const struct location *loc, const uint8_t ttl);
 
 extern int nfnl_osf_load_fingerprints(struct netlink_ctx *ctx, int del);
 
diff --git a/src/json.c b/src/json.c
index 1cde270..cea9f19 100644
--- a/src/json.c
+++ b/src/json.c
@@ -857,7 +857,7 @@ json_t *socket_expr_json(const struct expr *expr, struct 
output_ctx *octx)
 
 json_t *osf_expr_json(const struct expr *expr, struct output_ctx *octx)
 {
-   return json_pack("{s:{s:s}}", "osf", "key", "name");
+   return json_pack("{s:{s:i, s:s}}", "osf", "ttl", expr->osf.ttl, "key", 
"name");
 }
 
 json_t *xfrm_expr_json(const struct expr *expr, struct output_ctx *octx)
diff --git a/src/netlink_delinearize.c b/src/netlink_delinearize.c
index cd05885..84948db 100644
--- a/src/netlink_delinearize.c
+++ b/src/netlink_delinearize.c
@@ -655,8 +655,11 @@ static void netlink_parse_osf(struct netlink_parse_ctx 
*ctx,
 {
enum nft_registers dreg;
struct expr *expr;
+   uint8_t ttl;
+
+   ttl = nftnl_expr_get_u8(nle, NFTNL_EXPR_OSF_TTL);
+   expr = osf_expr_alloc(loc, ttl);
 
-   expr = osf_expr_alloc(loc);
dreg = netlink_parse_register(nle, NFTNL_EXPR_OSF_DREG);
netlink_set_register(ctx, dreg, expr);
 }
diff --git a/src/netlink_linearize.c b/src/netlink_linearize.c
index 0ac51bd..0c8f5fe 100644
--- a/src/netlink_linearize.c
+++ b/src/netlink_linearize.c
@@ -227,6 +227,7 @@ static void netlink_gen_osf(struct netlink_linearize_ctx 
*ctx,
 
nle = alloc_nft_expr("osf");
netlink_put_register(nle, NFTNL_EXPR_OSF_DREG, dreg);
+   nftnl_expr_set_u8(nle, NFTNL_EXPR_OSF_TTL, expr->osf.ttl);
nftnl_rule_add_expr(ctx->nlr, nle);
 }
 
diff --git a/src/osf.c b/src/osf.c
index 85c9573..436f34b 100644
--- a/src/osf.c
+++ b/src/osf.c
@@ -5,13 +5,32 @@
 #include 
 #include 
 
+static const char *osf_ttl_int_to_str(const uint8_t ttl)
+{
+   if (ttl == 1)
+   return "ttl loose ";
+   else if (ttl == 2)
+   return "ttl skip ";
+
+   return "";
+}
+
 static void osf_expr_print(const struct expr *expr, struct output_ctx *octx)
 {
-   nft_print(octx, "osf name");
+   const char *ttl_str;
+
+   ttl_str = osf_ttl_int_to_str(expr->osf.ttl);
+   nft_print(octx, "osf %sname", ttl_str);
 }
 
 static void osf_expr_clone(struct expr *new, const struct expr *expr)
 {
+   new->osf.ttl = expr->osf.ttl;
+}
+
+static bool osf_expr_cmp(const struct expr *e1, const struct expr *e2)
+{
+   return e1->osf.ttl == e2->osf.ttl;
 }
 
 static const struct expr_ops osf_expr_ops = {
@@ -19,10 +38,11 @@ 

[iptables PATCH] xtables: Fix for spurious errors from iptables-translate

2018-10-23 Thread Phil Sutter
When aligning iptables-nft error messages with legacy ones, I missed
that translate tools shouldn't check for missing or duplicated chains.

Introduce a boolean in struct nft_xt_cmd_parse indicating we're "just"
translating and do_parse() should skip the checks.

Fixes: b6a06c1a215f8 ("xtables: Align return codes with legacy iptables")
Signed-off-by: Phil Sutter 
---
 iptables/nft-shared.h| 1 +
 iptables/xtables-translate.c | 1 +
 iptables/xtables.c   | 6 +++---
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/iptables/nft-shared.h b/iptables/nft-shared.h
index 1281f080bc31d..e3ecdb4d23df3 100644
--- a/iptables/nft-shared.h
+++ b/iptables/nft-shared.h
@@ -233,6 +233,7 @@ struct nft_xt_cmd_parse {
const char  *policy;
boolrestore;
int verbose;
+   boolxlate;
 };
 
 void do_parse(struct nft_handle *h, int argc, char *argv[],
diff --git a/iptables/xtables-translate.c b/iptables/xtables-translate.c
index f4c0f9cf5a181..849c53f30e155 100644
--- a/iptables/xtables-translate.c
+++ b/iptables/xtables-translate.c
@@ -216,6 +216,7 @@ static int do_command_xlate(struct nft_handle *h, int argc, 
char *argv[],
struct nft_xt_cmd_parse p = {
.table  = *table,
.restore= restore,
+   .xlate  = true,
};
struct iptables_command_state cs;
struct xtables_args args = {
diff --git a/iptables/xtables.c b/iptables/xtables.c
index e0343dbabf2b3..0038804e288c6 100644
--- a/iptables/xtables.c
+++ b/iptables/xtables.c
@@ -1063,16 +1063,16 @@ void do_parse(struct nft_handle *h, int argc, char 
*argv[],
   p->chain);
}
 
-   if (!nft_chain_exists(h, p->table, p->chain))
+   if (!p->xlate && !nft_chain_exists(h, p->table, p->chain))
xtables_error(OTHER_PROBLEM,
  "Chain '%s' does not exist", cs->jumpto);
 
-   if (!cs->target && strlen(cs->jumpto) > 0 &&
+   if (!p->xlate && !cs->target && strlen(cs->jumpto) > 0 &&
!nft_chain_exists(h, p->table, cs->jumpto))
xtables_error(PARAMETER_PROBLEM,
  "Chain '%s' does not exist", cs->jumpto);
}
-   if (p->command == CMD_NEW_CHAIN &&
+   if (!p->xlate && p->command == CMD_NEW_CHAIN &&
nft_chain_exists(h, p->table, p->chain))
xtables_error(OTHER_PROBLEM, "Chain already exists");
 }
-- 
2.19.0



Re: [nft PATCH] tests: shell: Extend get element test

2018-10-23 Thread Phil Sutter
Hi,

On Tue, Oct 23, 2018 at 11:28:28AM +0200, Pablo Neira Ayuso wrote:
> On Mon, Oct 22, 2018 at 11:14:32PM +0200, Phil Sutter wrote:
> > Hi Pablo,
> > 
> > On Mon, Oct 22, 2018 at 09:45:02PM +0200, Pablo Neira Ayuso wrote:
> > [...]
> > > > A bit of context illustrating why I think the code needs more than just
> > > > "more fixes": AFAIU, for each input element (which may be part of a
> > > > range or not), code asks the kernel for whether the element exists, then
> > > > get_set_decompose() is called to find the corresponding range. This
> > > > approach has a critical problem though: Given a set with elements 10 and
> > > > 20-30, asking for 10 and 20 will return the same two elements as asking
> > > > for 10-20 does. Though in the first case, we should return 10 and 20-30
> > > > while in the latter case we should return nothing.
> > > 
> > > With this patch:
> > > 
> > > https://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf.git/commit/?id=3b18d5eba491b2328b31efa4235724a2354af010
> > > 
> > > I'm going to send a pull request for David now, I guess you are
> > > missing this kernel fix, so the _END interval flag is included when
> > > searching for the right hand side of a matching interval.
> > 
> > Ah! I wondered already why the test still fails although you had applied
> > a bunch of fixes. An additional kernel fix didn't come to mind. :)
> > 
> > > With this kernel patch plus your extended test reports I get this.
> > > 
> > > # ./run-tests.sh testcases/sets/0034get_element_0 
> > > I: using nft binary ./../../src/nft
> > > 
> > > W: [FAILED] testcases/sets/0034get_element_0: expected 0 but got 1
> > > ERROR: asked for '22-24, 26-28', expecting '20-30' but got '20-30, 20-30'
> > > 
> > > I: results: [OK] 0 [FAILED] 1 [TOTAL] 1
> > > 
> > > So, before applying your patch, I'm going to mangle it with this patch
> > > below. If we ask for 22-24 and 26-28, the result in 20-30 and 20-30.
> > > The command returns what is the interval container for 22-24 is 20-30,
> > > same thing for 26-28 which is 20-30.
> > 
> > Well, in theory, I'm fine with this. The expected outcome of 'get
> > element' command is a matter of definition, as long as it doesn't return
> > things which don't exist in the set. But I think the above is
> > inconsistent with regards to single element lookups:
> > 
> > | # nft list set ip t s
> > | table ip t {
> > |   set s {
> > |   type inet_service
> > |   flags interval
> > |   elements = { 10, 20-30, 40, 50-60 }
> > |   }
> > | }
> > | # nft get element ip t s '{ 25, 28 }'
> > | table ip t {
> > |   set s {
> > |   type inet_service
> > |   flags interval
> > |   elements = { 20-30 }
> > |   }
> > | }
> 
> Using current nftables git HEAD plus kernel patch, I'm getting:
> 
> # nft get element ip t s '{ 25, 28 }'
> table ip t {
> set s {
> type inet_service
> flags interval
> elements = { 20-30, 20-30 }
> }
> }

Oh, sorry. I did above test using sources which eliminate duplicates
from the return set (something I was playing with).

> > Here, although I ask for two elements, a single range is returned. So I
> > guess asking for two ranges belonging to the same range should return a
> > single range as well. Maybe a "simple" fix would be to drop duplicates
> > from the return set?
> > 
> > Anyway, from my point of view your solution is fine as well: If a
> > humanoid parser evaluates the results, it can easily spot duplicates and
> > interpret them right. If 'get element' command is used by a script to
> > check for existence of given ranges, it all boils down to a boolean
> > found or not found result, so duplicates shouldn't matter that much.
> 
> My idea was to provide list including exactly the same number of
> elements that have been requested, and in strict order, so you can
> quickly check what you request belongs to somewhere, eg.
> 
> # nft get element ip t s '{ 50, 20, 10 }'
> table ip t {
> set s {
> type inet_service
> flags interval
> elements = { 50-60, 20-30, 10 }
> }
> }
> 

Yes, that makes sense. Thanks for explaining your approach to how 'get
element' command should work!

> Not yet implemented, but we could add something like:
> 
> # nft get element ip t s '{ 50, 20, 10 }'
> table ip t {
> set s {
> type inet_service
> flags interval
> elements = { 50 in 50-60, 20 in 20-30, 10 }
> }
> }
> 
> So there's a clear mapping between what we request and what we get.

This would allow to return partial data, i.e. if one requested element
doesn't exist in the set to still show the remaining ones. But current
behaviour is absolutely fine from my point of view.

> Still, at this stage, the existing behaviour allows humans - for a
> small number of data - and robots, to do mappings between what they
> request and what they find.

Yes, 

[ANNOUNCE] iptables 1.8.1 release

2018-10-23 Thread Florian Westphal
Hi!

The Netfilter project proudly presents:

iptables 1.8.1

This release contains fixes and following new features:
   * add arp & ebtables-save/restore for nf_tables backend
   * new cgroup match revision with reduced memory footprint
   Noteable nft backend fixes:
  - don't print rule counters unless verbose
  - fix rule negation bug that caused valid rules to get rejected
   iptables-legacy fixes:
  fix for segfault when registering hashlimit extension
  fix for an iptables-restore failure with --table option

See ChangeLog that comes attached to this email for more details.

You can download it from:

http://www.netfilter.org/projects/iptables/downloads.html#iptables-1.8.1

To build the code, libnftnl 1.1.1 and libmnl >= 1.0.3 are required:

* http://netfilter.org/projects/libnftnl/index.html
* http://netfilter.org/projects/libmnl/index.html

In case of bugs and feature request, file them via:

* https://bugzilla.netfilter.org


iptables-1.8.1.txt.gz
Description: application/gzip


Re: [nft PATCH] tests: shell: Extend get element test

2018-10-23 Thread Pablo Neira Ayuso
On Mon, Oct 22, 2018 at 11:14:32PM +0200, Phil Sutter wrote:
> Hi Pablo,
> 
> On Mon, Oct 22, 2018 at 09:45:02PM +0200, Pablo Neira Ayuso wrote:
> [...]
> > > A bit of context illustrating why I think the code needs more than just
> > > "more fixes": AFAIU, for each input element (which may be part of a
> > > range or not), code asks the kernel for whether the element exists, then
> > > get_set_decompose() is called to find the corresponding range. This
> > > approach has a critical problem though: Given a set with elements 10 and
> > > 20-30, asking for 10 and 20 will return the same two elements as asking
> > > for 10-20 does. Though in the first case, we should return 10 and 20-30
> > > while in the latter case we should return nothing.
> > 
> > With this patch:
> > 
> > https://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf.git/commit/?id=3b18d5eba491b2328b31efa4235724a2354af010
> > 
> > I'm going to send a pull request for David now, I guess you are
> > missing this kernel fix, so the _END interval flag is included when
> > searching for the right hand side of a matching interval.
> 
> Ah! I wondered already why the test still fails although you had applied
> a bunch of fixes. An additional kernel fix didn't come to mind. :)
> 
> > With this kernel patch plus your extended test reports I get this.
> > 
> > # ./run-tests.sh testcases/sets/0034get_element_0 
> > I: using nft binary ./../../src/nft
> > 
> > W: [FAILED] testcases/sets/0034get_element_0: expected 0 but got 1
> > ERROR: asked for '22-24, 26-28', expecting '20-30' but got '20-30, 20-30'
> > 
> > I: results: [OK] 0 [FAILED] 1 [TOTAL] 1
> > 
> > So, before applying your patch, I'm going to mangle it with this patch
> > below. If we ask for 22-24 and 26-28, the result in 20-30 and 20-30.
> > The command returns what is the interval container for 22-24 is 20-30,
> > same thing for 26-28 which is 20-30.
> 
> Well, in theory, I'm fine with this. The expected outcome of 'get
> element' command is a matter of definition, as long as it doesn't return
> things which don't exist in the set. But I think the above is
> inconsistent with regards to single element lookups:
> 
> | # nft list set ip t s
> | table ip t {
> | set s {
> | type inet_service
> | flags interval
> | elements = { 10, 20-30, 40, 50-60 }
> | }
> | }
> | # nft get element ip t s '{ 25, 28 }'
> | table ip t {
> | set s {
> | type inet_service
> | flags interval
> | elements = { 20-30 }
> | }
> | }

Using current nftables git HEAD plus kernel patch, I'm getting:

# nft get element ip t s '{ 25, 28 }'
table ip t {
set s {
type inet_service
flags interval
elements = { 20-30, 20-30 }
}
}

> Here, although I ask for two elements, a single range is returned. So I
> guess asking for two ranges belonging to the same range should return a
> single range as well. Maybe a "simple" fix would be to drop duplicates
> from the return set?
> 
> Anyway, from my point of view your solution is fine as well: If a
> humanoid parser evaluates the results, it can easily spot duplicates and
> interpret them right. If 'get element' command is used by a script to
> check for existence of given ranges, it all boils down to a boolean
> found or not found result, so duplicates shouldn't matter that much.

My idea was to provide list including exactly the same number of
elements that have been requested, and in strict order, so you can
quickly check what you request belongs to somewhere, eg.

# nft get element ip t s '{ 50, 20, 10 }'
table ip t {
set s {
type inet_service
flags interval
elements = { 50-60, 20-30, 10 }
}
}

Not yet implemented, but we could add something like:

# nft get element ip t s '{ 50, 20, 10 }'
table ip t {
set s {
type inet_service
flags interval
elements = { 50 in 50-60, 20 in 20-30, 10 }
}
}

So there's a clear mapping between what we request and what we get.

Still, at this stage, the existing behaviour allows humans - for a
small number of data - and robots, to do mappings between what they
request and what they find.

Thanks!


Re: [PATCH 1/2 nft v3 preview] src: osf: add ttl option support

2018-10-23 Thread Pablo Neira Ayuso
On Mon, Oct 22, 2018 at 10:46:18PM +0200, Fernando Fernandez Mancera wrote:
> Add support for ttl option in "osf" expression. Example:
> 
> table ip foo {
>   chain bar {
>   type filter hook input priority filter; policy accept;
>   osf skip name "Linux"

osf ttl skip
osf ttl loose

Don't remove the 'ttl' token from there.

I just didn't want to have: osf ttl ttl-nocheck, which was sort of
redundant.

>   }
> }
> 
> Signed-off-by: Fernando Fernandez Mancera 
> ---
> v1:initial patch
> v2:use "ttl-global, ttl-nocheck.." instead of "1, 2.."
> v3:better names for ttl options, add json and tests/py support

Could you describe what is not yet working here? Thanks!

> ---
>  include/expression.h|  4 +++
>  include/linux/netfilter/nf_tables.h |  2 ++
>  include/osf.h   |  2 +-
>  src/json.c  |  2 +-
>  src/netlink_delinearize.c   |  5 +++-
>  src/netlink_linearize.c |  1 +
>  src/osf.c   | 26 --
>  src/parser_bison.y  | 19 +++--
>  src/parser_json.c   |  5 ++--
>  tests/py/inet/osf.t |  3 +++
>  tests/py/inet/osf.t.json|  3 +++
>  tests/py/inet/osf.t.payload | 41 +++--
>  12 files changed, 90 insertions(+), 23 deletions(-)
> 
> diff --git a/include/expression.h b/include/expression.h
> index d6977c3..f018c95 100644
> --- a/include/expression.h
> +++ b/include/expression.h
> @@ -345,6 +345,10 @@ struct expr {
>   uint8_t direction;
>   uint8_t spnum;
>   } xfrm;
> + struct {
> + /* EXPR_OSF */
> + uint8_t ttl;
> + } osf;
>   };
>  };
>  
> diff --git a/include/linux/netfilter/nf_tables.h 
> b/include/linux/netfilter/nf_tables.h
> index 4e28598..1d13ad3 100644
> --- a/include/linux/netfilter/nf_tables.h
> +++ b/include/linux/netfilter/nf_tables.h
> @@ -939,10 +939,12 @@ enum nft_socket_keys {
>   * enum nft_osf_attributes - nf_tables osf expression netlink attributes
>   *
>   * @NFTA_OSF_DREG: destination register (NLA_U32: nft_registers)
> + * @NFTA_OSF_TTL: Value of the TTL osf option (NLA_U8)
>   */
>  enum nft_osf_attributes {
>   NFTA_OSF_UNSPEC,
>   NFTA_OSF_DREG,
> + NFTA_OSF_TTL,
>   __NFTA_OSF_MAX
>  };
>  #define NFT_OSF_MAX  (__NFTA_OSF_MAX - 1)
> diff --git a/include/osf.h b/include/osf.h
> index 54cdd4a..23ea34d 100644
> --- a/include/osf.h
> +++ b/include/osf.h
> @@ -1,7 +1,7 @@
>  #ifndef NFTABLES_OSF_H
>  #define NFTABLES_OSF_H
>  
> -struct expr *osf_expr_alloc(const struct location *loc);
> +struct expr *osf_expr_alloc(const struct location *loc, const uint8_t ttl);
>  
>  extern int nfnl_osf_load_fingerprints(struct netlink_ctx *ctx, int del);
>  
> diff --git a/src/json.c b/src/json.c
> index 1cde270..cea9f19 100644
> --- a/src/json.c
> +++ b/src/json.c
> @@ -857,7 +857,7 @@ json_t *socket_expr_json(const struct expr *expr, struct 
> output_ctx *octx)
>  
>  json_t *osf_expr_json(const struct expr *expr, struct output_ctx *octx)
>  {
> - return json_pack("{s:{s:s}}", "osf", "key", "name");
> + return json_pack("{s:{s:i, s:s}}", "osf", "ttl", expr->osf.ttl, "key", 
> "name");
>  }
>  
>  json_t *xfrm_expr_json(const struct expr *expr, struct output_ctx *octx)
> diff --git a/src/netlink_delinearize.c b/src/netlink_delinearize.c
> index cd05885..84948db 100644
> --- a/src/netlink_delinearize.c
> +++ b/src/netlink_delinearize.c
> @@ -655,8 +655,11 @@ static void netlink_parse_osf(struct netlink_parse_ctx 
> *ctx,
>  {
>   enum nft_registers dreg;
>   struct expr *expr;
> + uint8_t ttl;
> +
> + ttl = nftnl_expr_get_u8(nle, NFTNL_EXPR_OSF_TTL);
> + expr = osf_expr_alloc(loc, ttl);
>  
> - expr = osf_expr_alloc(loc);
>   dreg = netlink_parse_register(nle, NFTNL_EXPR_OSF_DREG);
>   netlink_set_register(ctx, dreg, expr);
>  }
> diff --git a/src/netlink_linearize.c b/src/netlink_linearize.c
> index 0ac51bd..0c8f5fe 100644
> --- a/src/netlink_linearize.c
> +++ b/src/netlink_linearize.c
> @@ -227,6 +227,7 @@ static void netlink_gen_osf(struct netlink_linearize_ctx 
> *ctx,
>  
>   nle = alloc_nft_expr("osf");
>   netlink_put_register(nle, NFTNL_EXPR_OSF_DREG, dreg);
> + nftnl_expr_set_u8(nle, NFTNL_EXPR_OSF_TTL, expr->osf.ttl);
>   nftnl_rule_add_expr(ctx->nlr, nle);
>  }
>  
> diff --git a/src/osf.c b/src/osf.c
> index 85c9573..1a224fd 100644
> --- a/src/osf.c
> +++ b/src/osf.c
> @@ -5,13 +5,32 @@
>  #include 
>  #include 
>  
> +static const char *osf_ttl_int_to_str(const uint8_t ttl)
> +{
> + if (ttl == 1)
> + return "loose ";
> + else if (ttl == 2)
> + return "skip ";
> +
> + return "";
> +}
> +
>  static void osf_expr_print(const struct expr *expr, struct output_ctx *octx)
>  {
> - 

Re: [PATCH] netfilter: conntrack: fix cloned unconfirmed skb->_nfct race in __nf_conntrack_confirm

2018-10-23 Thread Chieh-Min Wang
Not sure if you have questions about this bug? I draw the broadcast
packet racing flow chart as following:

br_handle_frame
  BR_HOOK(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, br_handle_frame_finish)
  // skb->_nfct (unconfirmed conntrack) is established at PRE_ROUTING
  br_handle_frame_finish
// check if this packet is broadcast
br_flood_forward
  br_flood
list_for_each_entry_rcu(p, >port_list, list) // iterate
through each port
  maybe_deliver
deliver_clone
  skb = skb_clone(skb)
  __br_forward
BR_HOOK(NFPROTO_BRIDGE, NF_BR_FORWARD,...)
// queue in our nfq and received by our userspace program
// goto __nf_conntrack_confirm in our userspace
process context on CPU 1
br_pass_frame_up
  BR_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN,...)
  // __nf_conntrack_confirm in softirq context on CPU 0

Because conntrack confirm can happen at both INPUT and POSTROUTING
stage. So with NFQUEUE running,
skb->_nfct with same unconfirmed conntrack could race on different core.

Please let me know if you have any doubts :)
On Thu, Oct 18, 2018 at 10:03 PM Chieh-Min Wang  wrote:
>
> From: Chieh-Min Wang 
>
> For bridge(br_flood) or broadcast/multicast packets, they could clone skb with
> unconfirmed conntrack which break the rule that unconfirmed skb->_nfct is 
> never shared.
> With nfqueue running on my system, the race can be easily reproduced with 
> following
> warning calltrace:
>
> [13257.707525] CPU: 0 PID: 12132 Comm: main Tainted: PW   4.4.60 
> #7744
> [13257.707568] Hardware name: Qualcomm (Flattened Device Tree)
> [13257.714700] [] (unwind_backtrace) from [] 
> (show_stack+0x10/0x14)
> [13257.720253] [] (show_stack) from [] 
> (dump_stack+0x94/0xa8)
> [13257.728240] [] (dump_stack) from [] 
> (warn_slowpath_common+0x94/0xb0)
> [13257.735268] [] (warn_slowpath_common) from [] 
> (warn_slowpath_null+0x1c/0x24)
> [13257.743519] [] (warn_slowpath_null) from [] 
> (__nf_conntrack_confirm+0xa8/0x618)
> [13257.752284] [] (__nf_conntrack_confirm) from [] 
> (ipv4_confirm+0xb8/0xfc)
> [13257.761049] [] (ipv4_confirm) from [] 
> (nf_iterate+0x48/0xa8)
> [13257.769725] [] (nf_iterate) from [] 
> (nf_hook_slow+0x30/0xb0)
> [13257.777108] [] (nf_hook_slow) from [] 
> (br_nf_post_routing+0x274/0x31c)
> [13257.784486] [] (br_nf_post_routing) from [] 
> (nf_iterate+0x48/0xa8)
> [13257.792556] [] (nf_iterate) from [] 
> (nf_hook_slow+0x30/0xb0)
> [13257.800458] [] (nf_hook_slow) from [] 
> (br_forward_finish+0x94/0xa4)
> [13257.808010] [] (br_forward_finish) from [] 
> (br_nf_forward_finish+0x150/0x1ac)
> [13257.815736] [] (br_nf_forward_finish) from [] 
> (nf_reinject+0x108/0x170)
> [13257.824762] [] (nf_reinject) from [] 
> (nfqnl_recv_verdict+0x3d8/0x420)
> [13257.832924] [] (nfqnl_recv_verdict) from [] 
> (nfnetlink_rcv_msg+0x158/0x248)
> [13257.841256] [] (nfnetlink_rcv_msg) from [] 
> (netlink_rcv_skb+0x54/0xb0)
> [13257.849762] [] (netlink_rcv_skb) from [] 
> (netlink_unicast+0x148/0x23c)
> [13257.858093] [] (netlink_unicast) from [] 
> (netlink_sendmsg+0x2ec/0x368)
> [13257.866348] [] (netlink_sendmsg) from [] 
> (sock_sendmsg+0x34/0x44)
> [13257.874590] [] (sock_sendmsg) from [] 
> (___sys_sendmsg+0x1ec/0x200)
> [13257.882489] [] (___sys_sendmsg) from [] 
> (__sys_sendmsg+0x3c/0x64)
> [13257.890300] [] (__sys_sendmsg) from [] 
> (ret_fast_syscall+0x0/0x34)
>
> The original code just triggered the warning but do nothing. It will caused 
> the shared
> conntrack moves to the dying list and the packet be droppped 
> (nf_ct_resolve_clash returns
> NF_DROP for dying conntrack).
>
> Reproduce steps:
>
> ++
> |  br0(bridge)   |
> ||
> +-+-+-+--+
>   | eth0|   | eth1|   | eth2|
>   | |   | |   | |
>   +--+--+   +--+--+   +---+-+
>  | |  |
>  | |  |
>   +--+-+ +-+--++--+-+
>   | PC1| | PC2|| PC3|
>   ++ ++++
>
> iptables -A FORWARD -m mark --mark 0x100/0x100 -j NFQUEUE --queue-num 
> 100 --queue-bypass
> ps: Our nfq userspace program will set mark on packets whose connection has 
> already been processed.
>
> PC1 sends broadcast packets simulated by hping3:
> hping3 --rand-source --udp 192.168.1.255 -i u100
> ---
>  net/netfilter/nf_conntrack_core.c | 13 ++---
>  1 file changed, 10 insertions(+), 3 deletions(-)
>
> diff --git a/net/netfilter/nf_conntrack_core.c 
> b/net/netfilter/nf_conntrack_core.c
> index ca1168d..1c16bd9 100644
> --- a/net/netfilter/nf_conntrack_core.c
> +++ b/net/netfilter/nf_conntrack_core.c
> @@ -901,10 +901,17 @@ __nf_conntrack_confirm(struct sk_buff *skb)
>  * REJECT will give spurious warnings here.
>  */
>
> -   /* No external references means no one else could have
> -* confirmed us.
> +   /* Another skb with the same unconfirmed conntrack may
> +* win 

Re: [nft PATCH] tests: shell: Extend get element test

2018-10-22 Thread Phil Sutter
Hi Pablo,

On Mon, Oct 22, 2018 at 09:45:02PM +0200, Pablo Neira Ayuso wrote:
[...]
> > A bit of context illustrating why I think the code needs more than just
> > "more fixes": AFAIU, for each input element (which may be part of a
> > range or not), code asks the kernel for whether the element exists, then
> > get_set_decompose() is called to find the corresponding range. This
> > approach has a critical problem though: Given a set with elements 10 and
> > 20-30, asking for 10 and 20 will return the same two elements as asking
> > for 10-20 does. Though in the first case, we should return 10 and 20-30
> > while in the latter case we should return nothing.
> 
> With this patch:
> 
> https://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf.git/commit/?id=3b18d5eba491b2328b31efa4235724a2354af010
> 
> I'm going to send a pull request for David now, I guess you are
> missing this kernel fix, so the _END interval flag is included when
> searching for the right hand side of a matching interval.

Ah! I wondered already why the test still fails although you had applied
a bunch of fixes. An additional kernel fix didn't come to mind. :)

> With this kernel patch plus your extended test reports I get this.
> 
> # ./run-tests.sh testcases/sets/0034get_element_0 
> I: using nft binary ./../../src/nft
> 
> W: [FAILED] testcases/sets/0034get_element_0: expected 0 but got 1
> ERROR: asked for '22-24, 26-28', expecting '20-30' but got '20-30, 20-30'
> 
> I: results: [OK] 0 [FAILED] 1 [TOTAL] 1
> 
> So, before applying your patch, I'm going to mangle it with this patch
> below. If we ask for 22-24 and 26-28, the result in 20-30 and 20-30.
> The command returns what is the interval container for 22-24 is 20-30,
> same thing for 26-28 which is 20-30.

Well, in theory, I'm fine with this. The expected outcome of 'get
element' command is a matter of definition, as long as it doesn't return
things which don't exist in the set. But I think the above is
inconsistent with regards to single element lookups:

| # nft list set ip t s
| table ip t {
|   set s {
|   type inet_service
|   flags interval
|   elements = { 10, 20-30, 40, 50-60 }
|   }
| }
| # nft get element ip t s '{ 25, 28 }'
| table ip t {
|   set s {
|   type inet_service
|   flags interval
|   elements = { 20-30 }
|   }
| }

Here, although I ask for two elements, a single range is returned. So I
guess asking for two ranges belonging to the same range should return a
single range as well. Maybe a "simple" fix would be to drop duplicates
from the return set?

Anyway, from my point of view your solution is fine as well: If a
humanoid parser evaluates the results, it can easily spot duplicates and
interpret them right. If 'get element' command is used by a script to
check for existence of given ranges, it all boils down to a boolean
found or not found result, so duplicates shouldn't matter that much.

Thanks, Phil


[PATCH 1/2 nft v3 preview] src: osf: add ttl option support

2018-10-22 Thread Fernando Fernandez Mancera
Add support for ttl option in "osf" expression. Example:

table ip foo {
chain bar {
type filter hook input priority filter; policy accept;
osf skip name "Linux"
}
}

Signed-off-by: Fernando Fernandez Mancera 
---
v1:initial patch
v2:use "ttl-global, ttl-nocheck.." instead of "1, 2.."
v3:better names for ttl options, add json and tests/py support
---
 include/expression.h|  4 +++
 include/linux/netfilter/nf_tables.h |  2 ++
 include/osf.h   |  2 +-
 src/json.c  |  2 +-
 src/netlink_delinearize.c   |  5 +++-
 src/netlink_linearize.c |  1 +
 src/osf.c   | 26 --
 src/parser_bison.y  | 19 +++--
 src/parser_json.c   |  5 ++--
 tests/py/inet/osf.t |  3 +++
 tests/py/inet/osf.t.json|  3 +++
 tests/py/inet/osf.t.payload | 41 +++--
 12 files changed, 90 insertions(+), 23 deletions(-)

diff --git a/include/expression.h b/include/expression.h
index d6977c3..f018c95 100644
--- a/include/expression.h
+++ b/include/expression.h
@@ -345,6 +345,10 @@ struct expr {
uint8_t direction;
uint8_t spnum;
} xfrm;
+   struct {
+   /* EXPR_OSF */
+   uint8_t ttl;
+   } osf;
};
 };
 
diff --git a/include/linux/netfilter/nf_tables.h 
b/include/linux/netfilter/nf_tables.h
index 4e28598..1d13ad3 100644
--- a/include/linux/netfilter/nf_tables.h
+++ b/include/linux/netfilter/nf_tables.h
@@ -939,10 +939,12 @@ enum nft_socket_keys {
  * enum nft_osf_attributes - nf_tables osf expression netlink attributes
  *
  * @NFTA_OSF_DREG: destination register (NLA_U32: nft_registers)
+ * @NFTA_OSF_TTL: Value of the TTL osf option (NLA_U8)
  */
 enum nft_osf_attributes {
NFTA_OSF_UNSPEC,
NFTA_OSF_DREG,
+   NFTA_OSF_TTL,
__NFTA_OSF_MAX
 };
 #define NFT_OSF_MAX(__NFTA_OSF_MAX - 1)
diff --git a/include/osf.h b/include/osf.h
index 54cdd4a..23ea34d 100644
--- a/include/osf.h
+++ b/include/osf.h
@@ -1,7 +1,7 @@
 #ifndef NFTABLES_OSF_H
 #define NFTABLES_OSF_H
 
-struct expr *osf_expr_alloc(const struct location *loc);
+struct expr *osf_expr_alloc(const struct location *loc, const uint8_t ttl);
 
 extern int nfnl_osf_load_fingerprints(struct netlink_ctx *ctx, int del);
 
diff --git a/src/json.c b/src/json.c
index 1cde270..cea9f19 100644
--- a/src/json.c
+++ b/src/json.c
@@ -857,7 +857,7 @@ json_t *socket_expr_json(const struct expr *expr, struct 
output_ctx *octx)
 
 json_t *osf_expr_json(const struct expr *expr, struct output_ctx *octx)
 {
-   return json_pack("{s:{s:s}}", "osf", "key", "name");
+   return json_pack("{s:{s:i, s:s}}", "osf", "ttl", expr->osf.ttl, "key", 
"name");
 }
 
 json_t *xfrm_expr_json(const struct expr *expr, struct output_ctx *octx)
diff --git a/src/netlink_delinearize.c b/src/netlink_delinearize.c
index cd05885..84948db 100644
--- a/src/netlink_delinearize.c
+++ b/src/netlink_delinearize.c
@@ -655,8 +655,11 @@ static void netlink_parse_osf(struct netlink_parse_ctx 
*ctx,
 {
enum nft_registers dreg;
struct expr *expr;
+   uint8_t ttl;
+
+   ttl = nftnl_expr_get_u8(nle, NFTNL_EXPR_OSF_TTL);
+   expr = osf_expr_alloc(loc, ttl);
 
-   expr = osf_expr_alloc(loc);
dreg = netlink_parse_register(nle, NFTNL_EXPR_OSF_DREG);
netlink_set_register(ctx, dreg, expr);
 }
diff --git a/src/netlink_linearize.c b/src/netlink_linearize.c
index 0ac51bd..0c8f5fe 100644
--- a/src/netlink_linearize.c
+++ b/src/netlink_linearize.c
@@ -227,6 +227,7 @@ static void netlink_gen_osf(struct netlink_linearize_ctx 
*ctx,
 
nle = alloc_nft_expr("osf");
netlink_put_register(nle, NFTNL_EXPR_OSF_DREG, dreg);
+   nftnl_expr_set_u8(nle, NFTNL_EXPR_OSF_TTL, expr->osf.ttl);
nftnl_rule_add_expr(ctx->nlr, nle);
 }
 
diff --git a/src/osf.c b/src/osf.c
index 85c9573..1a224fd 100644
--- a/src/osf.c
+++ b/src/osf.c
@@ -5,13 +5,32 @@
 #include 
 #include 
 
+static const char *osf_ttl_int_to_str(const uint8_t ttl)
+{
+   if (ttl == 1)
+   return "loose ";
+   else if (ttl == 2)
+   return "skip ";
+
+   return "";
+}
+
 static void osf_expr_print(const struct expr *expr, struct output_ctx *octx)
 {
-   nft_print(octx, "osf name");
+   const char *ttl_str;
+
+   ttl_str = osf_ttl_int_to_str(expr->osf.ttl);
+   nft_print(octx, "osf %sname", ttl_str);
 }
 
 static void osf_expr_clone(struct expr *new, const struct expr *expr)
 {
+   new->osf.ttl = expr->osf.ttl;
+}
+
+static bool osf_expr_cmp(const struct expr *e1, const struct expr *e2)
+{
+   return e1->osf.ttl == e2->osf.ttl;
 }
 
 static const struct expr_ops osf_expr_ops = {
@@ -19,10 +38,11 @@ static const struct 

[PATCH 2/2 nft v3] doc: osf: add ttl option to man page

2018-10-22 Thread Fernando Fernandez Mancera
---
 doc/primary-expression.txt | 20 
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/doc/primary-expression.txt b/doc/primary-expression.txt
index 0fda76d..0c02d9d 100644
--- a/doc/primary-expression.txt
+++ b/doc/primary-expression.txt
@@ -187,18 +187,30 @@ and others) from packets with the SYN bit set.
 [options="header"]
 |==
 |Name |Description| Type
+|ttl|
+Do TTL checks on the packet to determine the operating system.|
+string
 |name|
-Name of the OS signature to match. All signatures can be found at pf.os file.|
-Use "unknown" for OS signatures that the expression could not detect.
+Name of the OS signature to match. All signatures can be found at pf.os file.
+Use "unknown" for OS signatures that the expression could not detect.|
+string
 |==
 
+.Available ttl values
+-
+If no TTL attribute is passed, make a true IP header and fingerprint TTL true 
comparison. This generally works for LANs.
+
+* loose: Check if the IP header's TTL is less than the fingerprint one. Works 
for globally-routable addresses.
+* skip: Do not compare the TTL at all.
+-
+
 .Using osf expression
 -
-# Accept packets that match the "Linux" OS signature.
+# Accept packets that match the "Linux" OS genre signature without comparing 
TTL.
 table inet x {
 chain y {
type filter hook input priority 0; policy accept;
-osf "Linux"
+osf skip name "Linux"
 }
 }
 ---
-- 
2.19.1



Re: [PATCH iptables] configure: bump versions for 1.8.1 release

2018-10-22 Thread Pablo Neira Ayuso
On Mon, Oct 22, 2018 at 06:51:08PM +0200, Florian Westphal wrote:
> this release also adds xtables_getether* functions to libxtables, so
> current and age are incremented as well.
> 
> Signed-off-by: Florian Westphal 

Acked-by: Pablo Neira Ayuso 

Thanks Florian!


Re: [PATCH 1/2 nft v2] src: osf: add ttl option support

2018-10-22 Thread Pablo Neira Ayuso
On Mon, Oct 22, 2018 at 09:38:31PM +0200, Fernando Fernandez Mancera wrote:
> El 22 de octubre de 2018 20:38:13 CEST, Pablo Neira Ayuso 
>  escribió:
> >On Mon, Oct 22, 2018 at 05:35:42PM +0200, Fernando Fernandez Mancera
> >wrote:
> >> I am going to add the necessary NFT_OSF_* definitions in the
> >nf_tables.h
> >
> >Just add a copy of nf_osf.h to nftables tree. We cannot mangle
> >nf_tables.h, it's a copy from the original header to ensure sources
> >compile with any kernel version.
> >
> >> header. The options format is "ttl-*" because "global" or "loose"
> >could be
> >> confusing. Do you prefer the option without "ttl-"?
> >
> >Yes, the ttl- is not needed.
> >
> >Make sure what you list via 'nft list ruleset > file' can be loaded
> >via 'nft -f file'
> 
> The patch is already done but tests/py is giving me errors when
> using ct mark and maps with osf. Should I send it?

You can send a preview / RFC.


[PATCH nf-next] netfilter: nft_osf: check if attribute is present

2018-10-22 Thread Pablo Neira Ayuso
If the attribute is not sent, eg. old libnftnl binary, then
tb[NFTA_OSF_TTL] is NULL and kernel crashes from the _init path.

Fixes: a218dc82f0b5 ("netfilter: nft_osf: Add ttl option support")
Signed-off-by: Pablo Neira Ayuso 
---
 net/netfilter/nft_osf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c
index 0b452fd470c4..ee87f9b2df47 100644
--- a/net/netfilter/nft_osf.c
+++ b/net/netfilter/nft_osf.c
@@ -50,7 +50,7 @@ static int nft_osf_init(const struct nft_ctx *ctx,
int err;
u8 ttl;
 
-   if (nla_get_u8(tb[NFTA_OSF_TTL])) {
+   if (tb[NFTA_OSF_TTL]) {
ttl = nla_get_u8(tb[NFTA_OSF_TTL]);
if (ttl > 2)
return -EINVAL;
-- 
2.11.0



Re: [nft PATCH] tests: shell: Extend get element test

2018-10-22 Thread Pablo Neira Ayuso
Hi Phil,

On Mon, Oct 22, 2018 at 03:45:09PM +0200, Phil Sutter wrote:
> Despite the recent fixes, the test still fails. While trying to address
> the remaining issues, I found more potentially problematic inputs so
> extend the test by those.

Applied, thanks. More comments, see below.

> ---
> Hi,
> 
> A bit of context illustrating why I think the code needs more than just
> "more fixes": AFAIU, for each input element (which may be part of a
> range or not), code asks the kernel for whether the element exists, then
> get_set_decompose() is called to find the corresponding range. This
> approach has a critical problem though: Given a set with elements 10 and
> 20-30, asking for 10 and 20 will return the same two elements as asking
> for 10-20 does. Though in the first case, we should return 10 and 20-30
> while in the latter case we should return nothing.

With this patch:

https://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf.git/commit/?id=3b18d5eba491b2328b31efa4235724a2354af010

I'm going to send a pull request for David now, I guess you are
missing this kernel fix, so the _END interval flag is included when
searching for the right hand side of a matching interval.

With this kernel patch plus your extended test reports I get this.

# ./run-tests.sh testcases/sets/0034get_element_0 
I: using nft binary ./../../src/nft

W: [FAILED] testcases/sets/0034get_element_0: expected 0 but got 1
ERROR: asked for '22-24, 26-28', expecting '20-30' but got '20-30, 20-30'

I: results: [OK] 0 [FAILED] 1 [TOTAL] 1

So, before applying your patch, I'm going to mangle it with this patch
below. If we ask for 22-24 and 26-28, the result in 20-30 and 20-30.
The command returns what is the interval container for 22-24 is 20-30,
same thing for 26-28 which is 20-30.

diff --git a/tests/shell/testcases/sets/0034get_element_0
b/tests/shell/testcases/sets/0034get_element_0
index b1f14476d90d..c7e7298a4aac 100755
--- a/tests/shell/testcases/sets/0034get_element_0
+++ b/tests/shell/testcases/sets/0034get_element_0
@@ -27,7 +27,7 @@ check 15-18 ""
 
 # multiple single elements, ranges smaller than present
 check "10, 40" "10, 40"
-check "22-24, 26-28" 20-30
+check "22-24, 26-28" "20-30, 20-30"
 check 21-29 20-30
 
 # mixed single elements and ranges

Thanks!


Re: [PATCH 1/2 nft v2] src: osf: add ttl option support

2018-10-22 Thread Fernando Fernandez Mancera
El 22 de octubre de 2018 20:38:13 CEST, Pablo Neira Ayuso  
escribió:
>On Mon, Oct 22, 2018 at 05:35:42PM +0200, Fernando Fernandez Mancera
>wrote:
>> I am going to add the necessary NFT_OSF_* definitions in the
>nf_tables.h
>
>Just add a copy of nf_osf.h to nftables tree. We cannot mangle
>nf_tables.h, it's a copy from the original header to ensure sources
>compile with any kernel version.
>
>> header. The options format is "ttl-*" because "global" or "loose"
>could be
>> confusing. Do you prefer the option without "ttl-"?
>
>Yes, the ttl- is not needed.
>
>Make sure what you list via 'nft list ruleset > file' can be loaded
>via 'nft -f file'

The patch is already done but tests/py is giving me errors when using ct mark 
and maps with osf. Should I send it?

I am trying to solve this anyway.

Thanks :-)


[PATCH nft 3/3] netlink: reset mnl_socket field in struct nft_ctx on EINTR

2018-10-22 Thread Pablo Neira Ayuso
Otherwise we keep using the old netlink socket if we hit EINTR.

Signed-off-by: Pablo Neira Ayuso 
---
Requires patches 1/3 and 2/3.

 include/netlink.h | 2 +-
 src/netlink.c | 4 ++--
 src/rule.c| 3 +--
 3 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/include/netlink.h b/include/netlink.h
index b26ef4598500..66e400d88f19 100644
--- a/include/netlink.h
+++ b/include/netlink.h
@@ -157,7 +157,7 @@ extern void netlink_dump_obj(struct nftnl_obj *nlo, struct 
netlink_ctx *ctx);
 
 extern int netlink_batch_send(struct netlink_ctx *ctx, struct list_head 
*err_list);
 
-extern void netlink_restart(struct mnl_socket *nf_sock);
+extern struct mnl_socket *netlink_restart(struct mnl_socket *nf_sock);
 #define netlink_abi_error()\
__netlink_abi_error(__FILE__, __LINE__, strerror(errno));
 extern void __noreturn __netlink_abi_error(const char *file, int line, const 
char *reason);
diff --git a/src/netlink.c b/src/netlink.c
index 755949c965c3..403780ffdefb 100644
--- a/src/netlink.c
+++ b/src/netlink.c
@@ -73,10 +73,10 @@ void netlink_close_sock(struct mnl_socket *nf_sock)
mnl_socket_close(nf_sock);
 }
 
-void netlink_restart(struct mnl_socket *nf_sock)
+struct mnl_socket *netlink_restart(struct mnl_socket *nf_sock)
 {
netlink_close_sock(nf_sock);
-   nf_sock = netlink_open_sock();
+   return netlink_open_sock();
 }
 
 void __noreturn __netlink_abi_error(const char *file, int line,
diff --git a/src/rule.c b/src/rule.c
index 9e24c35c85ae..12ac1310034d 100644
--- a/src/rule.c
+++ b/src/rule.c
@@ -229,7 +229,6 @@ int cache_update(struct nft_ctx *nft, enum cmd_ops cmd, 
struct list_head *msgs)
.msgs   = msgs,
.nft= nft,
};
-   struct mnl_socket *nf_sock = nft->nf_sock;
struct nft_cache *cache = >cache;
 
 replay:
@@ -244,7 +243,7 @@ replay:
if (ret < 0) {
cache_release(cache);
if (errno == EINTR) {
-   netlink_restart(nf_sock);
+   nft->nf_sock = netlink_restart(nft->nf_sock);
goto replay;
}
return -1;
-- 
2.11.0



[PATCH nft 1/3] src: pass struct nft_ctx through struct eval_ctx

2018-10-22 Thread Pablo Neira Ayuso
Signed-off-by: Pablo Neira Ayuso 
---
 include/rule.h |   5 +-
 src/evaluate.c | 134 ++---
 src/parser_bison.y |   5 +-
 3 files changed, 69 insertions(+), 75 deletions(-)

diff --git a/include/rule.h b/include/rule.h
index 9e029899513f..c6c5ca9805b7 100644
--- a/include/rule.h
+++ b/include/rule.h
@@ -600,16 +600,13 @@ extern void cmd_free(struct cmd *cmd);
  * @pctx:  payload context
  */
 struct eval_ctx {
-   struct mnl_socket   *nf_sock;
+   struct nft_ctx  *nft;
struct list_head*msgs;
struct cmd  *cmd;
struct table*table;
struct rule *rule;
struct set  *set;
struct stmt *stmt;
-   struct nft_cache*cache;
-   struct output_ctx   *octx;
-   unsigned intdebug_mask;
struct expr_ctx ectx;
struct proto_ctxpctx;
 };
diff --git a/src/evaluate.c b/src/evaluate.c
index 1880578b8738..15aa5980a164 100644
--- a/src/evaluate.c
+++ b/src/evaluate.c
@@ -159,7 +159,7 @@ static struct table *table_lookup_global(struct eval_ctx 
*ctx)
if (ctx->table != NULL)
return ctx->table;
 
-   table = table_lookup(>cmd->handle, ctx->cache);
+   table = table_lookup(>cmd->handle, >nft->cache);
if (table == NULL)
return NULL;
 
@@ -187,8 +187,8 @@ static int expr_evaluate_symbol(struct eval_ctx *ctx, 
struct expr **expr)
}
break;
case SYMBOL_SET:
-   ret = cache_update(ctx->nf_sock, ctx->cache, ctx->cmd->op,
-  ctx->msgs, ctx->debug_mask, ctx->octx);
+   ret = cache_update(ctx->nft->nf_sock, >nft->cache, 
ctx->cmd->op,
+  ctx->msgs, ctx->nft->debug_mask, 
>nft->output);
if (ret < 0)
return ret;
 
@@ -1730,9 +1730,9 @@ static int expr_evaluate_socket(struct eval_ctx *ctx, 
struct expr **expr)
 static int expr_evaluate_osf(struct eval_ctx *ctx, struct expr **expr)
 {
struct netlink_ctx nl_ctx = {
-   .nf_sock= ctx->nf_sock,
-   .debug_mask = ctx->debug_mask,
-   .octx   = ctx->octx,
+   .nf_sock= ctx->nft->nf_sock,
+   .debug_mask = ctx->nft->debug_mask,
+   .octx   = >nft->output,
.seqnum = time(NULL),
};
 
@@ -1770,13 +1770,13 @@ static int expr_evaluate_xfrm(struct eval_ctx *ctx, 
struct expr **exprp)
 
 static int expr_evaluate(struct eval_ctx *ctx, struct expr **expr)
 {
-   if (ctx->debug_mask & NFT_DEBUG_EVALUATION) {
+   if (ctx->nft->debug_mask & NFT_DEBUG_EVALUATION) {
struct error_record *erec;
erec = erec_create(EREC_INFORMATIONAL, &(*expr)->location,
   "Evaluate %s", (*expr)->ops->name);
-   erec_print(ctx->octx, erec, ctx->debug_mask);
-   expr_print(*expr, ctx->octx);
-   nft_print(ctx->octx, "\n\n");
+   erec_print(>nft->output, erec, ctx->nft->debug_mask);
+   expr_print(*expr, >nft->output);
+   nft_print(>nft->output, "\n\n");
erec_destroy(erec);
}
 
@@ -2885,13 +2885,13 @@ static int stmt_evaluate_objref(struct eval_ctx *ctx, 
struct stmt *stmt)
 
 int stmt_evaluate(struct eval_ctx *ctx, struct stmt *stmt)
 {
-   if (ctx->debug_mask & NFT_DEBUG_EVALUATION) {
+   if (ctx->nft->debug_mask & NFT_DEBUG_EVALUATION) {
struct error_record *erec;
erec = erec_create(EREC_INFORMATIONAL, >location,
   "Evaluate %s", stmt->ops->name);
-   erec_print(ctx->octx, erec, ctx->debug_mask);
-   stmt_print(stmt, ctx->octx);
-   nft_print(ctx->octx, "\n\n");
+   erec_print(>nft->output, erec, ctx->nft->debug_mask);
+   stmt_print(stmt, >nft->output);
+   nft_print(>nft->output, "\n\n");
erec_destroy(erec);
}
 
@@ -3086,12 +3086,12 @@ static int rule_translate_index(struct eval_ctx *ctx, 
struct rule *rule)
int ret;
 
/* update cache with CMD_LIST so that rules are fetched, too */
-   ret = cache_update(ctx->nf_sock, ctx->cache, CMD_LIST,
-   ctx->msgs, ctx->debug_mask, ctx->octx);
+   ret = cache_update(ctx->nft->nf_sock, >nft->cache, CMD_LIST,
+   ctx->msgs, ctx->nft->debug_mask, >nft->output);
if (ret < 0)
return ret;
 
-   table = table_lookup(>handle, ctx->cache);
+   table = table_lookup(>handle, >nft->cache);
if (!table)
return cmd_error(ctx, >handle.table.location,
"Could not process rule: %s",
@@ -3122,7 +3122,7 @@ 

[PATCH nft 2/3] src: pass struct nft_ctx through struct netlink_ctx

2018-10-22 Thread Pablo Neira Ayuso
Signed-off-by: Pablo Neira Ayuso 
---
 include/netlink.h |   9 +---
 include/rule.h|  10 ++--
 src/evaluate.c|  55 +++-
 src/libnftables.c |   8 +--
 src/mnl.c |  28 +-
 src/monitor.c |  52 +--
 src/netlink.c |  41 +++
 src/netlink_delinearize.c |   4 +-
 src/nfnl_osf.c|  16 +++---
 src/rule.c| 128 ++
 10 files changed, 156 insertions(+), 195 deletions(-)

diff --git a/include/netlink.h b/include/netlink.h
index b7e2232f4bd9..b26ef4598500 100644
--- a/include/netlink.h
+++ b/include/netlink.h
@@ -35,26 +35,21 @@ extern const struct location netlink_location;
 /** 
  * struct netlink_ctx
  *
+ * @nft:   nftables context
  * @msgs:  message queue
  * @list:  list of parsed rules/chains/tables
  * @set:   current set
  * @data:  pointer to pass data to callback
  * @seqnum:sequence number
- * @octx:  output context
- * @debug_mask:display debugging information
- * @cache: cache context
  */
 struct netlink_ctx {
-   struct mnl_socket   *nf_sock;
+   struct nft_ctx  *nft;
struct list_head*msgs;
struct list_headlist;
struct set  *set;
const void  *data;
uint32_tseqnum;
struct nftnl_batch  *batch;
-   unsigned intdebug_mask;
-   struct output_ctx   *octx;
-   struct nft_cache*cache;
 };
 
 extern struct nftnl_expr *alloc_nft_expr(const char *name);
diff --git a/include/rule.h b/include/rule.h
index c6c5ca9805b7..977f274842ef 100644
--- a/include/rule.h
+++ b/include/rule.h
@@ -618,12 +618,10 @@ extern struct error_record *rule_postprocess(struct rule 
*rule);
 struct netlink_ctx;
 extern int do_command(struct netlink_ctx *ctx, struct cmd *cmd);
 
-extern int cache_update(struct mnl_socket *nf_sock, struct nft_cache *cache,
-   enum cmd_ops cmd, struct list_head *msgs, unsigned int 
debug_flag,
-   struct output_ctx *octx);
-extern void cache_flush(struct mnl_socket *nf_sock, struct nft_cache *cache,
-   enum cmd_ops cmd, struct list_head *msgs,
-   unsigned int debug_mask, struct output_ctx *octx);
+extern int cache_update(struct nft_ctx *ctx, enum cmd_ops cmd,
+   struct list_head *msgs);
+extern void cache_flush(struct nft_ctx *ctx, enum cmd_ops cmd,
+   struct list_head *msgs);
 extern void cache_release(struct nft_cache *cache);
 
 enum udata_type {
diff --git a/src/evaluate.c b/src/evaluate.c
index 15aa5980a164..66e9293fd4ca 100644
--- a/src/evaluate.c
+++ b/src/evaluate.c
@@ -187,8 +187,7 @@ static int expr_evaluate_symbol(struct eval_ctx *ctx, 
struct expr **expr)
}
break;
case SYMBOL_SET:
-   ret = cache_update(ctx->nft->nf_sock, >nft->cache, 
ctx->cmd->op,
-  ctx->msgs, ctx->nft->debug_mask, 
>nft->output);
+   ret = cache_update(ctx->nft, ctx->cmd->op, ctx->msgs);
if (ret < 0)
return ret;
 
@@ -1730,9 +1729,7 @@ static int expr_evaluate_socket(struct eval_ctx *ctx, 
struct expr **expr)
 static int expr_evaluate_osf(struct eval_ctx *ctx, struct expr **expr)
 {
struct netlink_ctx nl_ctx = {
-   .nf_sock= ctx->nft->nf_sock,
-   .debug_mask = ctx->nft->debug_mask,
-   .octx   = >nft->output,
+   .nft= ctx->nft,
.seqnum = time(NULL),
};
 
@@ -3086,8 +3083,7 @@ static int rule_translate_index(struct eval_ctx *ctx, 
struct rule *rule)
int ret;
 
/* update cache with CMD_LIST so that rules are fetched, too */
-   ret = cache_update(ctx->nft->nf_sock, >nft->cache, CMD_LIST,
-   ctx->msgs, ctx->nft->debug_mask, >nft->output);
+   ret = cache_update(ctx->nft, CMD_LIST, ctx->msgs);
if (ret < 0)
return ret;
 
@@ -3315,15 +3311,13 @@ static int cmd_evaluate_add(struct eval_ctx *ctx, 
struct cmd *cmd)
 
switch (cmd->obj) {
case CMD_OBJ_SETELEM:
-   ret = cache_update(ctx->nft->nf_sock, >nft->cache, cmd->op,
-  ctx->msgs, ctx->nft->debug_mask, 
>nft->output);
+   ret = cache_update(ctx->nft, cmd->op, ctx->msgs);
if (ret < 0)
return ret;
 
return setelem_evaluate(ctx, >expr);
case CMD_OBJ_SET:
-   ret = cache_update(ctx->nft->nf_sock, >nft->cache, cmd->op,
-  ctx->msgs, ctx->nft->debug_mask, 
>nft->output);
+   ret = cache_update(ctx->nft, cmd->op, ctx->msgs);
if (ret < 0)
  

Re: [PATCH 1/2 nft v2] src: osf: add ttl option support

2018-10-22 Thread Pablo Neira Ayuso
On Mon, Oct 22, 2018 at 05:35:42PM +0200, Fernando Fernandez Mancera wrote:
> I am going to add the necessary NFT_OSF_* definitions in the nf_tables.h

Just add a copy of nf_osf.h to nftables tree. We cannot mangle
nf_tables.h, it's a copy from the original header to ensure sources
compile with any kernel version.

> header. The options format is "ttl-*" because "global" or "loose" could be
> confusing. Do you prefer the option without "ttl-"?

Yes, the ttl- is not needed.

Make sure what you list via 'nft list ruleset > file' can be loaded
via 'nft -f file'


[PATCH iptables] configure: bump versions for 1.8.1 release

2018-10-22 Thread Florian Westphal
this release also adds xtables_getether* functions to libxtables, so
current and age are incremented as well.

Signed-off-by: Florian Westphal 
---
 configure.ac | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/configure.ac b/configure.ac
index 07e32064489b..1da8555e65f3 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,9 +1,9 @@
 
-AC_INIT([iptables], [1.8.0])
+AC_INIT([iptables], [1.8.1])
 
 # See libtool.info "Libtool's versioning system"
-libxtables_vcurrent=12
-libxtables_vage=0
+libxtables_vcurrent=13
+libxtables_vage=1
 
 AC_CONFIG_AUX_DIR([build-aux])
 AC_CONFIG_HEADERS([config.h])
-- 
2.17.2



Re: [PATCH 1/2 nft v2] src: osf: add ttl option support

2018-10-22 Thread Fernando Fernandez Mancera

Comments below.

On 10/15/18 2:47 PM, Pablo Neira Ayuso wrote:

Please send a v3 including tests/py. More comments below.

On Sat, Sep 29, 2018 at 12:15:17PM +0200, Fernando Fernandez Mancera wrote:

Add support for ttl option in "osf" expression. Example:

table ip foo {
chain bar {
type filter hook input priority filter; policy accept;
osf ttl nocheck name "Linux"


Listing and output should match, ie. what you list should work with
nft -f, see below.


diff --git a/src/parser_bison.y b/src/parser_bison.y
index 831090b..a7ec858 100644
--- a/src/parser_bison.y
+++ b/src/parser_bison.y
@@ -739,6 +739,7 @@ int nft_lex(void *, void *, void *);
  %type  fib_tuple   fib_result  fib_flag
  
  %type 			osf_expr

+%type   osf_ttl
  %destructor { expr_free($$); }osf_expr
  
  %type 			markup_format

@@ -3112,9 +3113,21 @@ fib_tuple:   fib_flagDOT 
fib_tuple
|   fib_flag
;
  
-osf_expr		:	OSF	NAME

+osf_expr   :   OSF osf_ttl NAME
{
-   $$ = osf_expr_alloc(&@$);
+   $$ = osf_expr_alloc(&@$, $2);
+   }
+   ;
+
+osf_ttl:   /* empty */ { $$ = 0; }
+   |   STRING
+   {
+   if (!strcmp($1, "ttl-global"))


This should be "global". But I would suggest you rename this to "loose".


+   $$ = 1;


Can we use NFT_OSF_* definitions, instead of magic number?


+   else if (!strcmp($1, "ttl-nocheck"))


This should be "nocheck". But I'd suggest you rename this to "skip"


+   $$ = 2;


Same here, avoid magic number, use definition.


+   else
+   $$ = 3;


Same thing.



I am going to add the necessary NFT_OSF_* definitions in the nf_tables.h 
header. The options format is "ttl-*" because "global" or "loose" could 
be confusing. Do you prefer the option without "ttl-"?


Thanks.


[nft PATCH] tests: shell: Extend get element test

2018-10-22 Thread Phil Sutter
Despite the recent fixes, the test still fails. While trying to address
the remaining issues, I found more potentially problematic inputs so
extend the test by those.

Signed-off-by: Phil Sutter 
---
Hi,

A bit of context illustrating why I think the code needs more than just
"more fixes": AFAIU, for each input element (which may be part of a
range or not), code asks the kernel for whether the element exists, then
get_set_decompose() is called to find the corresponding range. This
approach has a critical problem though: Given a set with elements 10 and
20-30, asking for 10 and 20 will return the same two elements as asking
for 10-20 does. Though in the first case, we should return 10 and 20-30
while in the latter case we should return nothing.
---
 tests/shell/testcases/sets/0034get_element_0 | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/tests/shell/testcases/sets/0034get_element_0 
b/tests/shell/testcases/sets/0034get_element_0
index 2bfb527b8080f..b1f14476d90d6 100755
--- a/tests/shell/testcases/sets/0034get_element_0
+++ b/tests/shell/testcases/sets/0034get_element_0
@@ -27,10 +27,17 @@ check 15-18 ""
 
 # multiple single elements, ranges smaller than present
 check "10, 40" "10, 40"
+check "22-24, 26-28" 20-30
 check 21-29 20-30
 
+# mixed single elements and ranges
+check "10, 20" "10, 20-30"
+check "10, 22" "10, 20-30"
+check "10, 22-24" "10, 20-30"
+
 # non-existing ranges matching elements
 check 10-40 ""
+check 10-20 ""
 check 10-25 ""
 check 25-55 ""
 
-- 
2.19.0



I NEED YOUR HELP URGENTLY!!!

2018-10-21 Thread GEN KELVIN
Compliment of the day to you. I am Gen.Kelvin W Howard, I am sending this brief 
letter to solicit your partnership of Sixteen  Million Two Hundred Thousand 
United States Dollars ($16,200,000). I shall send you more information and 
procedures when I receive positive response from you.Best Regards,
CONTACT ME: kivenhow...@gmail.com
Gen.Kelvin W Howard

---
This email has been checked for viruses by Avast antivirus software.
https://www.avast.com/antivirus



[PATCH nf] netfilter: xt_IDLETIMER: add sysfs filename checking routine

2018-10-20 Thread Taehee Yoo
When IDLETIMER rule is added, sysfs file is created under
/sys/class/xt_idletimer/timers/
But some label name shouldn't be used.
".", "..", "power", "uevent", "subsystem", etc...
So that sysfs filename checking routine is needed.

test commands:
   %iptables -I INPUT -j IDLETIMER --timeout 1 --label "power"

splat looks like:
[95765.423132] sysfs: cannot create duplicate filename 
'/devices/virtual/xt_idletimer/timers/power'
[95765.433418] CPU: 0 PID: 8446 Comm: iptables Not tainted 4.19.0-rc6+ #20
[95765.449755] Call Trace:
[95765.449755]  dump_stack+0xc9/0x16b
[95765.449755]  ? show_regs_print_info+0x5/0x5
[95765.449755]  sysfs_warn_dup+0x74/0x90
[95765.449755]  sysfs_add_file_mode_ns+0x352/0x500
[95765.449755]  sysfs_create_file_ns+0x179/0x270
[95765.449755]  ? sysfs_add_file_mode_ns+0x500/0x500
[95765.449755]  ? idletimer_tg_checkentry+0x3e5/0xb1b [xt_IDLETIMER]
[95765.449755]  ? rcu_read_lock_sched_held+0x114/0x130
[95765.449755]  ? __kmalloc_track_caller+0x211/0x2b0
[95765.449755]  ? memcpy+0x34/0x50
[95765.449755]  idletimer_tg_checkentry+0x4e2/0xb1b [xt_IDLETIMER]
[ ... ]

Fixes: 0902b469bd25 ("netfilter: xtables: idletimer target implementation")
Signed-off-by: Taehee Yoo 
---
 net/netfilter/xt_IDLETIMER.c | 20 
 1 file changed, 20 insertions(+)

diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c
index 5ee859193783..25453a16385e 100644
--- a/net/netfilter/xt_IDLETIMER.c
+++ b/net/netfilter/xt_IDLETIMER.c
@@ -116,6 +116,22 @@ static void idletimer_tg_expired(struct timer_list *t)
schedule_work(>work);
 }
 
+static int idletimer_check_sysfs_name(const char *name, unsigned int size)
+{
+   int ret;
+
+   ret = xt_check_proc_name(name, size);
+   if (ret < 0)
+   return ret;
+
+   if (!strcmp(name, "power") ||
+   !strcmp(name, "subsystem") ||
+   !strcmp(name, "uevent"))
+   return -EINVAL;
+
+   return 0;
+}
+
 static int idletimer_tg_create(struct idletimer_tg_info *info)
 {
int ret;
@@ -126,6 +142,10 @@ static int idletimer_tg_create(struct idletimer_tg_info 
*info)
goto out;
}
 
+   ret = idletimer_check_sysfs_name(info->label, sizeof(info->label));
+   if (ret < 0)
+   goto out_free_timer;
+
sysfs_attr_init(>timer->attr.attr);
info->timer->attr.attr.name = kstrdup(info->label, GFP_KERNEL);
if (!info->timer->attr.attr.name) {
-- 
2.17.1



Re: [nft PATCH] make cache persistent if local entries were added

2018-10-20 Thread Phil Sutter
On Sat, Oct 20, 2018 at 12:35:11PM +0200, Pablo Neira Ayuso wrote:
> On Sat, Oct 20, 2018 at 12:24:06PM +0200, Phil Sutter wrote:
> > JSON API as well as nft CLI allow to run multiple commands within the
> > same batch. Depending on the local cache state, a later command may
> > trigger a cache update which removes the local entry added by an earlier
> > command.
> > 
> > To overcome this, introduce a special genid value to set when local
> > entries are added to the cache which blocks all cache updates until the
> > batch has been committed to the kernel.
> 
> Probably we can make sure we issue a cache_update() by when we call
> chain_add_hash(), before adding the local object to the cache, then
> lock it? Or add assert() to _add_hash() functions to make sure cache
> is up to date? We need a valid cache before we can lock it, right?

The problem is that a batch commit outdates the local cache. An example
showing the problem is:

| % sudo nft -i
| nft> list ruleset
| nft> add table ip t
| nft> add table ip t2; add chain ip t2 c
| Error: Could not process rule: No such file or directory
| add table ip t2; add chain ip t2 c
|   ^^

With 'list ruleset', I just ensure cache->genid is not zero. The first
'add table' command increments kernel's genid. The 'add chain' command
triggers a cache update which removes table t2 from it.

> Do you have several examples that are triggering the problem that we
> can place in the test regression infrastructure?

I'll try to collect a few and will send a test case so we have something
to validate against.

Thanks, Phil


Re: [nft PATCH] make cache persistent if local entries were added

2018-10-20 Thread Pablo Neira Ayuso
On Sat, Oct 20, 2018 at 12:24:06PM +0200, Phil Sutter wrote:
> JSON API as well as nft CLI allow to run multiple commands within the
> same batch. Depending on the local cache state, a later command may
> trigger a cache update which removes the local entry added by an earlier
> command.
> 
> To overcome this, introduce a special genid value to set when local
> entries are added to the cache which blocks all cache updates until the
> batch has been committed to the kernel.

Probably we can make sure we issue a cache_update() by when we call
chain_add_hash(), before adding the local object to the cache, then
lock it? Or add assert() to _add_hash() functions to make sure cache
is up to date? We need a valid cache before we can lock it, right?

Do you have several examples that are triggering the problem that we
can place in the test regression infrastructure?

Thanks!

> Signed-off-by: Phil Sutter 
> ---
>  include/nftables.h |  2 ++
>  include/rule.h | 12 
>  src/evaluate.c |  6 +++---
>  src/libnftables.c  |  2 ++
>  src/monitor.c  |  4 ++--
>  src/rule.c | 17 +
>  6 files changed, 30 insertions(+), 13 deletions(-)
> 
> diff --git a/include/nftables.h b/include/nftables.h
> index 25e78c80df7e0..47d2c2bb036cc 100644
> --- a/include/nftables.h
> +++ b/include/nftables.h
> @@ -37,6 +37,8 @@ struct nft_cache {
>   struct list_headlist;
>   uint32_tseqnum;
>  };
> +#define CACHE_LOCK_GENID (uint16_t)-1

This one is a valid cache number in the kernel, right? IIRC, it's zero
the one that is never used.


[nft PATCH] make cache persistent if local entries were added

2018-10-20 Thread Phil Sutter
JSON API as well as nft CLI allow to run multiple commands within the
same batch. Depending on the local cache state, a later command may
trigger a cache update which removes the local entry added by an earlier
command.

To overcome this, introduce a special genid value to set when local
entries are added to the cache which blocks all cache updates until the
batch has been committed to the kernel.

Signed-off-by: Phil Sutter 
---
 include/nftables.h |  2 ++
 include/rule.h | 12 
 src/evaluate.c |  6 +++---
 src/libnftables.c  |  2 ++
 src/monitor.c  |  4 ++--
 src/rule.c | 17 +
 6 files changed, 30 insertions(+), 13 deletions(-)

diff --git a/include/nftables.h b/include/nftables.h
index 25e78c80df7e0..47d2c2bb036cc 100644
--- a/include/nftables.h
+++ b/include/nftables.h
@@ -37,6 +37,8 @@ struct nft_cache {
struct list_headlist;
uint32_tseqnum;
 };
+#define CACHE_LOCK_GENID   (uint16_t)-1
+#define CACHE_UNLOCK_GENID 1
 
 struct mnl_socket;
 struct parser_state;
diff --git a/include/rule.h b/include/rule.h
index 9e029899513fd..d5ddc78a7d3c4 100644
--- a/include/rule.h
+++ b/include/rule.h
@@ -216,7 +216,8 @@ extern const char *chain_hookname_lookup(const char *name);
 extern struct chain *chain_alloc(const char *name);
 extern struct chain *chain_get(struct chain *chain);
 extern void chain_free(struct chain *chain);
-extern void chain_add_hash(struct chain *chain, struct table *table);
+extern void chain_add_hash(struct chain *chain, struct table *table,
+  struct nft_cache *cache);
 extern struct chain *chain_lookup(const struct table *table,
  const struct handle *h);
 
@@ -299,7 +300,8 @@ extern struct set *set_alloc(const struct location *loc);
 extern struct set *set_get(struct set *set);
 extern void set_free(struct set *set);
 extern struct set *set_clone(const struct set *set);
-extern void set_add_hash(struct set *set, struct table *table);
+extern void set_add_hash(struct set *set, struct table *table,
+struct nft_cache *cache);
 extern struct set *set_lookup(const struct table *table, const char *name);
 extern struct set *set_lookup_global(uint32_t family, const char *table,
 const char *name, struct nft_cache *cache);
@@ -381,7 +383,8 @@ struct obj {
 struct obj *obj_alloc(const struct location *loc);
 extern struct obj *obj_get(struct obj *obj);
 void obj_free(struct obj *obj);
-void obj_add_hash(struct obj *obj, struct table *table);
+void obj_add_hash(struct obj *obj, struct table *table,
+ struct nft_cache *cache);
 struct obj *obj_lookup(const struct table *table, const char *name,
   uint32_t type);
 void obj_print(const struct obj *n, struct output_ctx *octx);
@@ -406,7 +409,8 @@ struct flowtable {
 extern struct flowtable *flowtable_alloc(const struct location *loc);
 extern struct flowtable *flowtable_get(struct flowtable *flowtable);
 extern void flowtable_free(struct flowtable *flowtable);
-extern void flowtable_add_hash(struct flowtable *flowtable, struct table 
*table);
+extern void flowtable_add_hash(struct flowtable *flowtable,
+  struct table *table, struct nft_cache *cache);
 
 void flowtable_print(const struct flowtable *n, struct output_ctx *octx);
 
diff --git a/src/evaluate.c b/src/evaluate.c
index db49a18d0150c..f2448296be0c0 100644
--- a/src/evaluate.c
+++ b/src/evaluate.c
@@ -3014,7 +3014,7 @@ static int set_evaluate(struct eval_ctx *ctx, struct set 
*set)
ctx->set = NULL;
 
if (set_lookup(table, set->handle.set.name) == NULL)
-   set_add_hash(set_get(set), table);
+   set_add_hash(set_get(set), table, ctx->cache);
 
/* Default timeout value implies timeout support */
if (set->timeout)
@@ -3198,12 +3198,12 @@ static int chain_evaluate(struct eval_ctx *ctx, struct 
chain *chain)
if (chain_lookup(table, >cmd->handle) == NULL) {
chain = chain_alloc(NULL);
handle_merge(>handle, >cmd->handle);
-   chain_add_hash(chain, table);
+   chain_add_hash(chain, table, ctx->cache);
}
return 0;
} else {
if (chain_lookup(table, >handle) == NULL)
-   chain_add_hash(chain_get(chain), table);
+   chain_add_hash(chain_get(chain), table, ctx->cache);
}
 
if (chain->flags & CHAIN_F_BASECHAIN) {
diff --git a/src/libnftables.c b/src/libnftables.c
index 977763793e768..1726b0ee19b54 100644
--- a/src/libnftables.c
+++ b/src/libnftables.c
@@ -464,6 +464,7 @@ err:
}
erec_print_list(>output, , nft->debug_mask);
iface_cache_release();
+   nft->cache.genid = CACHE_UNLOCK_GENID;
if (nft->scanner) {

Re: [PATCH iptables] iptables-test: add -N option to exercise netns removal path

2018-10-20 Thread Phil Sutter
Hi,

On Sat, Oct 20, 2018 at 11:21:42AM +0200, Pablo Neira Ayuso wrote:
> On Fri, Oct 19, 2018 at 03:38:58PM +0200, Phil Sutter wrote:
> > On Fri, Oct 19, 2018 at 11:55:07AM +0200, Pablo Neira Ayuso wrote:
> > > On Fri, Oct 19, 2018 at 11:04:42AM +0200, Phil Sutter wrote:
> > > > Hi,
> > > > 
> > > > On Thu, Oct 18, 2018 at 08:33:07PM +0200, Pablo Neira Ayuso wrote:
> > > > [...]
> > > > > @@ -108,8 +111,15 @@ def run_test(iptables, rule, rule_save, res, 
> > > > > filename, lineno):
> > > > >  command = IPTABLES_SAVE
> > > > >  elif splitted[0] == IP6TABLES:
> > > > >  command = IP6TABLES_SAVE
> > > > > +
> > > > > +if netns:
> > > > > +path = "/sbin/ip"
> > > > > +command = "netns exec vm-iptable-test " + 
> > > > > EXECUTEABLE + " " + command
> > > > > +else:
> > > > > +path = os.path.abspath(os.path.curdir) + "/iptables/" + 
> > > > > EXECUTEABLE
> > > > > +
> > > > 
> > > > In netns case, doesn't this lead to calling xtables-*-multi from $PATH
> > > > instead of the local one we want to test?
> > > 
> > > Hm, right, will fix this.
> > 
> > I had another look: In main(), PATH is extended to include $PWD/iptables
> > as first component. So actually this shouldn't matter, but maybe better
> > to have it explicit.
> 
> You mean, we could remove lines that are updating PATH and have them
> explicit everywhere, right? If so, that's fine with it.

Yes, that's what I meant.

> I can have a look in a follow up patch, or may this affect this patch
> in some way I'm overlooking?

No, no secret insights I didn't tell you about. :D

Thanks, Phil


Re: [PATCH iptables] iptables-test: add -N option to exercise netns removal path

2018-10-20 Thread Pablo Neira Ayuso
On Fri, Oct 19, 2018 at 03:38:58PM +0200, Phil Sutter wrote:
> On Fri, Oct 19, 2018 at 11:55:07AM +0200, Pablo Neira Ayuso wrote:
> > On Fri, Oct 19, 2018 at 11:04:42AM +0200, Phil Sutter wrote:
> > > Hi,
> > > 
> > > On Thu, Oct 18, 2018 at 08:33:07PM +0200, Pablo Neira Ayuso wrote:
> > > [...]
> > > > @@ -108,8 +111,15 @@ def run_test(iptables, rule, rule_save, res, 
> > > > filename, lineno):
> > > >  command = IPTABLES_SAVE
> > > >  elif splitted[0] == IP6TABLES:
> > > >  command = IP6TABLES_SAVE
> > > > +
> > > > +if netns:
> > > > +path = "/sbin/ip"
> > > > +command = "netns exec vm-iptable-test " + EXECUTEABLE 
> > > > + " " + command
> > > > +else:
> > > > +path = os.path.abspath(os.path.curdir) + "/iptables/" + 
> > > > EXECUTEABLE
> > > > +
> > > 
> > > In netns case, doesn't this lead to calling xtables-*-multi from $PATH
> > > instead of the local one we want to test?
> > 
> > Hm, right, will fix this.
> 
> I had another look: In main(), PATH is extended to include $PWD/iptables
> as first component. So actually this shouldn't matter, but maybe better
> to have it explicit.

You mean, we could remove lines that are updating PATH and have them
explicit everywhere, right? If so, that's fine with it.

I can have a look in a follow up patch, or may this affect this patch
in some way I'm overlooking?

Thanks.


Re: [PATCH libnftnl 4/4] src: Use memcpy() to handle potentially unaligned data

2018-10-19 Thread Matt Turner
On Fri, Oct 19, 2018 at 5:14 AM Pablo Neira Ayuso  wrote:
>
> On Wed, Oct 17, 2018 at 12:32:54PM -0700, Matt Turner wrote:
> > Rolf Eike Beer  reported that nft-expr_quota-test fails
> > with a SIGBUS on SPARC due to unaligned accesses. This patch resolves
> > that and fixes additional sources of unaligned accesses matching the
> > same pattern. Both nft-expr_quota-test and nft-expr_objref-test
> > generated unaligned accesses on DEC Alpha.
>
> Applied, thanks Matt.

Thanks a bunch.

If you wouldn't mind, now might be a good time to make a 1.1.2
release. In the four months since 1.1.1 there are some important fixes
for oddball systems (big endian, strict alignment) and now these
changes to make the test suite more robust.

Thanks again!


Re: [PATCH iptables] libxtables: expose new etherdb lookup function through libxtables API

2018-10-19 Thread Phil Sutter
On Fri, Oct 19, 2018 at 01:10:59PM +0200, Pablo Neira Ayuso wrote:
> This is used from extensions and included in libxtables, so we have to
> make them public.
> 
> Fixes: 31f1434dfe37 ("libxtables: Integrate getethertype.c from xtables core")
> Reported-by: Florian Westphal 
> Signed-off-by: Pablo Neira Ayuso 

Acked-by: Phil Sutter 

Thanks for fixing this up!


Re: [PATCH iptables] libxtables: prefix exported new functions for etherdb lookups

2018-10-19 Thread Phil Sutter
On Fri, Oct 19, 2018 at 12:57:36PM +0200, Pablo Neira Ayuso wrote:
> To avoid symbol pollution, place them under the xt_ and xtables_ prefix
> name.
> 
> Reported-by: Florian Westphal 
> Signed-off-by: Pablo Neira Ayuso 

Acked-by: Phil Sutter 


Re: [PATCH iptables] iptables-test: add -N option to exercise netns removal path

2018-10-19 Thread Phil Sutter
On Fri, Oct 19, 2018 at 11:55:07AM +0200, Pablo Neira Ayuso wrote:
> On Fri, Oct 19, 2018 at 11:04:42AM +0200, Phil Sutter wrote:
> > Hi,
> > 
> > On Thu, Oct 18, 2018 at 08:33:07PM +0200, Pablo Neira Ayuso wrote:
> > [...]
> > > @@ -108,8 +111,15 @@ def run_test(iptables, rule, rule_save, res, 
> > > filename, lineno):
> > >  command = IPTABLES_SAVE
> > >  elif splitted[0] == IP6TABLES:
> > >  command = IP6TABLES_SAVE
> > > +
> > > +if netns:
> > > +path = "/sbin/ip"
> > > +command = "netns exec vm-iptable-test " + EXECUTEABLE + 
> > > " " + command
> > > +else:
> > > +path = os.path.abspath(os.path.curdir) + "/iptables/" + 
> > > EXECUTEABLE
> > > +
> > 
> > In netns case, doesn't this lead to calling xtables-*-multi from $PATH
> > instead of the local one we want to test?
> 
> Hm, right, will fix this.

I had another look: In main(), PATH is extended to include $PWD/iptables
as first component. So actually this shouldn't matter, but maybe better
to have it explicit.

Cheers, Phil


Re: [PATCH libnftnl 4/4] src: Use memcpy() to handle potentially unaligned data

2018-10-19 Thread Pablo Neira Ayuso
On Wed, Oct 17, 2018 at 12:32:54PM -0700, Matt Turner wrote:
> Rolf Eike Beer  reported that nft-expr_quota-test fails
> with a SIGBUS on SPARC due to unaligned accesses. This patch resolves
> that and fixes additional sources of unaligned accesses matching the
> same pattern. Both nft-expr_quota-test and nft-expr_objref-test
> generated unaligned accesses on DEC Alpha.

Applied, thanks Matt.


Re: [PATCH nf-next] netfilter: remove unused udp.h header.

2018-10-19 Thread Pablo Neira Ayuso
On Wed, Oct 17, 2018 at 09:46:07PM +0900, Weongyo Jeong wrote:
> udp.h header isn't used at these files.  So it's safe to remove.
> 
> Signed-off-by: Weongyo Jeong 
> ---
>  net/ipv4/netfilter/ipt_CLUSTERIP.c | 1 -
>  net/ipv4/netfilter/ipt_REJECT.c| 1 -
>  2 files changed, 2 deletions(-)
> 
> diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c 
> b/net/ipv4/netfilter/ipt_CLUSTERIP.c
> index 2c8d313..35984f4 100644
> --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
> +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
> @@ -18,7 +18,6 @@
>  #include 
>  #include 
>  #include 

It seems we can also remove tcp.h?

> -#include 
>  #include 

And icmp.h?

It would be good to have a careful look and remove all headers that we
don't need in one go.

Probably we can even remove checksum.h, it seems this file is actually
including much more than it needs?

Let me know,
Thanks!

>  #include 
>  #include 
> diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
> index e8bed33..503ba09 100644
> --- a/net/ipv4/netfilter/ipt_REJECT.c
> +++ b/net/ipv4/netfilter/ipt_REJECT.c
> @@ -14,7 +14,6 @@
>  #include 
>  #include 
>  #include 
> -#include 
>  #include 
>  #include 

Probably same here with icmp.h header files, not sure we need them.

>  #include 
> -- 
> 2.7.4
> 


Re: [PATCH nf-next] netfilter: remove two unused variables.

2018-10-19 Thread Pablo Neira Ayuso
On Wed, Oct 17, 2018 at 09:45:17PM +0900, Weongyo Jeong wrote:
> nft_dup_netdev_ingress_ops and nft_fwd_netdev_ingress_ops variables are
> no longer used at the code.

Applied, thanks.


Re: [PATCH nf-next] netfilter: nfnetlink_log: remove empty nfnetlink_log.h header file

2018-10-19 Thread Pablo Neira Ayuso
On Thu, Oct 18, 2018 at 10:29:59PM +0900, Taehee Yoo wrote:
> /include/net/netfilter/nfnetlink_log.h file is empty.
> so that it can be removed.

Applied, thanks.


Re: [PATCH nf-next] netfilter: nf_flow_table: remove unnecessary parameter of nf_flow_table_cleanup()

2018-10-19 Thread Pablo Neira Ayuso
On Fri, Oct 12, 2018 at 03:01:54AM +0900, Taehee Yoo wrote:
> parameter net of nf_flow_table_cleanup() is not used.
> So that it can be removed.

Applied, thanks.


[PATCH iptables] libxtables: expose new etherdb lookup function through libxtables API

2018-10-19 Thread Pablo Neira Ayuso
This is used from extensions and included in libxtables, so we have to
make them public.

Fixes: 31f1434dfe37 ("libxtables: Integrate getethertype.c from xtables core")
Reported-by: Florian Westphal 
Signed-off-by: Pablo Neira Ayuso 
---
 extensions/libebt_arp.c |  4 ++--
 extensions/libebt_vlan.c|  1 -
 include/ebtables/ethernetdb.h   | 45 -
 include/xtables.h   | 12 +++
 iptables/nft-bridge.c   |  1 -
 iptables/xtables-eb-translate.c |  3 +--
 iptables/xtables-eb.c   |  2 +-
 libxtables/getethertype.c   |  7 +++
 8 files changed, 19 insertions(+), 56 deletions(-)
 delete mode 100644 include/ebtables/ethernetdb.h

diff --git a/extensions/libebt_arp.c b/extensions/libebt_arp.c
index 3a4c29b5fd3e..522c57c0156d 100644
--- a/extensions/libebt_arp.c
+++ b/extensions/libebt_arp.c
@@ -14,7 +14,7 @@
 #include 
 #include 
 
-#include 
+#include 
 #include 
 #include 
 #include "iptables/nft.h"
@@ -75,7 +75,7 @@ static void brarp_print_help(void)
printf(" %d = %s\n", i + 1, opcodes[i]);
printf(
 " hardware type string: 1 = Ethernet\n"
-" protocol type string: see "_XT_PATH_ETHERTYPES"\n");
+" protocol type string: see "XT_PATH_ETHERTYPES"\n");
 }
 
 #define OPT_OPCODE 0x01
diff --git a/extensions/libebt_vlan.c b/extensions/libebt_vlan.c
index 57c4dd5bca5d..a2a9dcce531c 100644
--- a/extensions/libebt_vlan.c
+++ b/extensions/libebt_vlan.c
@@ -12,7 +12,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include "iptables/nft.h"
diff --git a/include/ebtables/ethernetdb.h b/include/ebtables/ethernetdb.h
deleted file mode 100644
index 08b433543c67..
--- a/include/ebtables/ethernetdb.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
-* This program is free software; you can redistribute it and/or modify
-* it under the terms of the GNU General Public License as published by
-* the Free Software Foundation; either version 2 of the License, or
-* (at your option) any later version.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-* GNU General Public License for more details.
-*
-* You should have received a copy of the GNU General Public License
-* along with this program; if not, write to the Free Software
-* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-*/
-
-/* All data returned by the network data base library are supplied in
-   host order and returned in network order (suitable for use in
-   system calls).  */
-
-#ifndef_ETHERNETDB_H
-#define_ETHERNETDB_H   1
-
-#include 
-#include 
-#include 
-
-/* Absolute file name for network data base files.  */
-#ifndef_XT_PATH_ETHERTYPES
-#define_XT_PATH_ETHERTYPES "/etc/ethertypes"
-#endif /* _PATH_ETHERTYPES */
-
-struct xt_ethertypeent {
-   char *e_name;   /* Official ethernet type name.  */
-   char **e_aliases;   /* Alias list.  */
-   int e_ethertype;/* Ethernet type number.  */
-};
-
-/* Return entry from ethertype data base for network with NAME.  */
-extern struct xt_ethertypeent *xtables_getethertypebyname(__const char 
*__name);
-
-/* Return entry from ethertype data base which number is PROTO.  */
-extern struct xt_ethertypeent *xtables_getethertypebynumber(int __ethertype);
-
-#endif /* ethernetdb.h */
diff --git a/include/xtables.h b/include/xtables.h
index bf169b08186f..8fb8843ac4f4 100644
--- a/include/xtables.h
+++ b/include/xtables.h
@@ -521,6 +521,18 @@ extern void xtables_ip6parse_any(const char *, struct 
in6_addr **,
 extern void xtables_ip6parse_multiple(const char *, struct in6_addr **,
struct in6_addr **, unsigned int *);
 
+/* Absolute file name for network data base files.  */
+#define XT_PATH_ETHERTYPES "/etc/ethertypes"
+
+struct xt_ethertypeent {
+   char *e_name;   /* Official ethernet type name.  */
+   char **e_aliases;   /* Alias list.  */
+   int e_ethertype;/* Ethernet type number.  */
+};
+
+extern struct xt_ethertypeent *xtables_getethertypebyname(const char *name);
+extern struct xt_ethertypeent *xtables_getethertypebynumber(int ethertype);
+
 /**
  * Print the specified value to standard output, quoting dangerous
  * characters if required.
diff --git a/iptables/nft-bridge.c b/iptables/nft-bridge.c
index 876981ac5862..35c862cfda81 100644
--- a/iptables/nft-bridge.c
+++ b/iptables/nft-bridge.c
@@ -16,7 +16,6 @@
 #include 
 #include 
 #include 
-#include 
 
 #include "nft-shared.h"
 #include "nft-bridge.h"
diff --git a/iptables/xtables-eb-translate.c b/iptables/xtables-eb-translate.c
index 44419751fc85..f98c38eb 100644
--- a/iptables/xtables-eb-translate.c
+++ b/iptables/xtables-eb-translate.c
@@ -14,7 +14,6 @@
 
 #include 
 #include 
-#include 
 #include 

[PATCH iptables] libxtables: prefix exported new functions for etherdb lookups

2018-10-19 Thread Pablo Neira Ayuso
To avoid symbol pollution, place them under the xt_ and xtables_ prefix
name.

Reported-by: Florian Westphal 
Signed-off-by: Pablo Neira Ayuso 
---
 extensions/libebt_arp.c |  6 +++---
 extensions/libebt_vlan.c|  4 ++--
 include/ebtables/ethernetdb.h   | 22 +-
 iptables/nft-bridge.c   |  4 ++--
 iptables/xtables-eb-translate.c |  6 +++---
 iptables/xtables-eb.c   |  6 +++---
 libxtables/getethertype.c   | 22 +++---
 7 files changed, 29 insertions(+), 41 deletions(-)

diff --git a/extensions/libebt_arp.c b/extensions/libebt_arp.c
index dc8e306a2879..3a4c29b5fd3e 100644
--- a/extensions/libebt_arp.c
+++ b/extensions/libebt_arp.c
@@ -75,7 +75,7 @@ static void brarp_print_help(void)
printf(" %d = %s\n", i + 1, opcodes[i]);
printf(
 " hardware type string: 1 = Ethernet\n"
-" protocol type string: see "_PATH_ETHERTYPES"\n");
+" protocol type string: see "_XT_PATH_ETHERTYPES"\n");
 }
 
 #define OPT_OPCODE 0x01
@@ -262,9 +262,9 @@ brarp_parse(int c, char **argv, int invert, unsigned int 
*flags,
 
i = strtol(optarg, , 16);
if (i < 0 || i >= (0x1 << 16) || *end !='\0') {
-   struct ethertypeent *ent;
+   struct xt_ethertypeent *ent;
 
-   ent = getethertypebyname(argv[optind - 1]);
+   ent = xtables_getethertypebyname(argv[optind - 1]);
if (!ent)
xtables_error(PARAMETER_PROBLEM, "Problem with 
specified ARP "
 "protocol 
type");
diff --git a/extensions/libebt_vlan.c b/extensions/libebt_vlan.c
index 52cc99fa19c7..57c4dd5bca5d 100644
--- a/extensions/libebt_vlan.c
+++ b/extensions/libebt_vlan.c
@@ -55,7 +55,7 @@ brvlan_parse(int c, char **argv, int invert, unsigned int 
*flags,
   const void *entry, struct xt_entry_match **match)
 {
struct ebt_vlan_info *vlaninfo = (struct ebt_vlan_info *) 
(*match)->data;
-   struct ethertypeent *ethent;
+   struct xt_ethertypeent *ethent;
char *end;
struct ebt_vlan_info local;
 
@@ -86,7 +86,7 @@ brvlan_parse(int c, char **argv, int invert, unsigned int 
*flags,
vlaninfo->invflags |= EBT_VLAN_ENCAP;
local.encap = strtoul(optarg, , 16);
if (*end != '\0') {
-   ethent = getethertypebyname(optarg);
+   ethent = xtables_getethertypebyname(optarg);
if (ethent == NULL)
xtables_error(PARAMETER_PROBLEM, "Unknown 
--vlan-encap value ('%s')", optarg);
local.encap = ethent->e_ethertype;
diff --git a/include/ebtables/ethernetdb.h b/include/ebtables/ethernetdb.h
index 1683abe01987..08b433543c67 100644
--- a/include/ebtables/ethernetdb.h
+++ b/include/ebtables/ethernetdb.h
@@ -26,32 +26,20 @@
 #include 
 
 /* Absolute file name for network data base files.  */
-#ifndef_PATH_ETHERTYPES
-#define_PATH_ETHERTYPES"/etc/ethertypes"
+#ifndef_XT_PATH_ETHERTYPES
+#define_XT_PATH_ETHERTYPES "/etc/ethertypes"
 #endif /* _PATH_ETHERTYPES */
 
-struct ethertypeent {
+struct xt_ethertypeent {
char *e_name;   /* Official ethernet type name.  */
char **e_aliases;   /* Alias list.  */
int e_ethertype;/* Ethernet type number.  */
 };
 
-/* Open ethertype data base files and mark them as staying open even
-   after a later search if STAY_OPEN is non-zero.  */
-extern void setethertypeent(int __stay_open);
-
-/* Close ethertype data base files and clear `stay open' flag.  */
-extern void endethertypeent(void);
-
-/* Get next entry from ethertype data base file.  Open data base if
-   necessary.  */
-extern struct ethertypeent *getethertypeent(void);
-
 /* Return entry from ethertype data base for network with NAME.  */
-extern struct ethertypeent *getethertypebyname(__const char *__name);
+extern struct xt_ethertypeent *xtables_getethertypebyname(__const char 
*__name);
 
 /* Return entry from ethertype data base which number is PROTO.  */
-extern struct ethertypeent *getethertypebynumber(int __ethertype);
-
+extern struct xt_ethertypeent *xtables_getethertypebynumber(int __ethertype);
 
 #endif /* ethernetdb.h */
diff --git a/iptables/nft-bridge.c b/iptables/nft-bridge.c
index 7e659bb554a4..876981ac5862 100644
--- a/iptables/nft-bridge.c
+++ b/iptables/nft-bridge.c
@@ -421,7 +421,7 @@ static void print_mac(char option, const unsigned char *mac,
 
 static void print_protocol(uint16_t ethproto, bool invert, unsigned int 
bitmask)
 {
-   struct ethertypeent *ent;
+   struct xt_ethertypeent *ent;
 
/* Dont print anything about the protocol if no protocol was
 * specified, obviously this means any protocol will do. */
@@ 

Re: [PATCH nf-next] netfilter: nf_flow_table: remove flowtable hook flush routine in netns exit routine

2018-10-19 Thread Pablo Neira Ayuso
On Tue, Oct 02, 2018 at 02:17:14AM +0900, Taehee Yoo wrote:
> When device is unregistered, flowtable flush routine is called
> by notifier_call(nf_tables_flowtable_event). and exit callback of
> nftables pernet_operation(nf_tables_exit_net) also has flowtable flush
> routine. but when network namespace is destroyed, both notifier_call
> and pernet_operation are called. hence flowtable flush routine in
> pernet_operation is unnecessary.
> 
> test commands:
>%ip netns add vm1
>%ip netns exec vm1 nft add table ip filter
>%ip netns exec vm1 nft add flowtable ip filter w \
>   { hook ingress priority 0\; devices = { lo }\; }
>%ip netns del vm1

Applied, thanks for explaining.


Re: [PATCH nf-next] Revert "netfilter: xt_quota: fix the behavior of xt_quota module"

2018-10-19 Thread Maciej Żenczykowski
Signed-off-by: Maciej Żenczykowski 


[PATCH nf-next] Revert "netfilter: xt_quota: fix the behavior of xt_quota module"

2018-10-19 Thread Pablo Neira Ayuso
This reverts commit e9837e55b0200da544a095a1fca36efd7fd3ba30.

When talking to Maze and Chenbo, we agreed to keep this back by now
due to problems in the ruleset listing path with 32-bit arches.

Signed-off-by: Pablo Neira Ayuso 
---
 include/uapi/linux/netfilter/xt_quota.h |  8 ++---
 net/netfilter/xt_quota.c| 55 -
 2 files changed, 36 insertions(+), 27 deletions(-)

diff --git a/include/uapi/linux/netfilter/xt_quota.h 
b/include/uapi/linux/netfilter/xt_quota.h
index d72fd52adbba..f3ba5d9e58b6 100644
--- a/include/uapi/linux/netfilter/xt_quota.h
+++ b/include/uapi/linux/netfilter/xt_quota.h
@@ -15,11 +15,9 @@ struct xt_quota_info {
__u32 flags;
__u32 pad;
__aligned_u64 quota;
-#ifdef __KERNEL__
-   atomic64_t counter;
-#else
-   __aligned_u64 remain;
-#endif
+
+   /* Used internally by the kernel */
+   struct xt_quota_priv*master;
 };
 
 #endif /* _XT_QUOTA_H */
diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c
index fceae245eb03..10d61a6eed71 100644
--- a/net/netfilter/xt_quota.c
+++ b/net/netfilter/xt_quota.c
@@ -11,6 +11,11 @@
 #include 
 #include 
 
+struct xt_quota_priv {
+   spinlock_t  lock;
+   uint64_tquota;
+};
+
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Sam Johnston ");
 MODULE_DESCRIPTION("Xtables: countdown quota match");
@@ -21,48 +26,54 @@ static bool
 quota_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
struct xt_quota_info *q = (void *)par->matchinfo;
-   u64 current_count = atomic64_read(>counter);
+   struct xt_quota_priv *priv = q->master;
bool ret = q->flags & XT_QUOTA_INVERT;
-   u64 old_count, new_count;
-
-   do {
-   if (current_count == 1)
-   return ret;
-   if (current_count <= skb->len) {
-   atomic64_set(>counter, 1);
-   return ret;
-   }
-   old_count = current_count;
-   new_count = current_count - skb->len;
-   current_count = atomic64_cmpxchg(>counter, old_count,
-new_count);
-   } while (current_count != old_count);
-   return !ret;
+
+   spin_lock_bh(>lock);
+   if (priv->quota >= skb->len) {
+   priv->quota -= skb->len;
+   ret = !ret;
+   } else {
+   /* we do not allow even small packets from now on */
+   priv->quota = 0;
+   }
+   spin_unlock_bh(>lock);
+
+   return ret;
 }
 
 static int quota_mt_check(const struct xt_mtchk_param *par)
 {
struct xt_quota_info *q = par->matchinfo;
 
-   BUILD_BUG_ON(sizeof(atomic64_t) != sizeof(__u64));
-
if (q->flags & ~XT_QUOTA_MASK)
return -EINVAL;
-   if (atomic64_read(>counter) > q->quota + 1)
-   return -ERANGE;
 
-   if (atomic64_read(>counter) == 0)
-   atomic64_set(>counter, q->quota + 1);
+   q->master = kmalloc(sizeof(*q->master), GFP_KERNEL);
+   if (q->master == NULL)
+   return -ENOMEM;
+
+   spin_lock_init(>master->lock);
+   q->master->quota = q->quota;
return 0;
 }
 
+static void quota_mt_destroy(const struct xt_mtdtor_param *par)
+{
+   const struct xt_quota_info *q = par->matchinfo;
+
+   kfree(q->master);
+}
+
 static struct xt_match quota_mt_reg __read_mostly = {
.name   = "quota",
.revision   = 0,
.family = NFPROTO_UNSPEC,
.match  = quota_mt,
.checkentry = quota_mt_check,
+   .destroy= quota_mt_destroy,
.matchsize  = sizeof(struct xt_quota_info),
+   .usersize   = offsetof(struct xt_quota_info, master),
.me = THIS_MODULE,
 };
 
-- 
2.11.0



[PATCH iptables,v2] iptables-test: add -N option to exercise netns removal path

2018-10-19 Thread Pablo Neira Ayuso
We are getting bug reports lately from the netns path, add a new option
to exercise this path.

Signed-off-by: Pablo Neira Ayuso 
---
v2: run local xtables-multi command, not the one installed in the system
as requested by Phil Sutter. Several cleanups too.

 iptables-test.py | 38 --
 1 file changed, 32 insertions(+), 6 deletions(-)

diff --git a/iptables-test.py b/iptables-test.py
index 9bfb8086aa0a..5e6bfb7e28a6 100755
--- a/iptables-test.py
+++ b/iptables-test.py
@@ -61,7 +61,7 @@ def delete_rule(iptables, rule, filename, lineno):
 return 0
 
 
-def run_test(iptables, rule, rule_save, res, filename, lineno):
+def run_test(iptables, rule, rule_save, res, filename, lineno, netns):
 '''
 Executes an unit test. Returns the output of delete_rule().
 
@@ -76,6 +76,9 @@ def run_test(iptables, rule, rule_save, res, filename, 
lineno):
 ret = 0
 
 cmd = iptables + " -A " + rule
+if netns:
+cmd = "ip netns exec iptables-container-test " + EXECUTEABLE + 
" " + cmd
+
 ret = execute_cmd(cmd, filename, lineno)
 
 #
@@ -108,8 +111,15 @@ def run_test(iptables, rule, rule_save, res, filename, 
lineno):
 command = IPTABLES_SAVE
 elif splitted[0] == IP6TABLES:
 command = IP6TABLES_SAVE
+
+path = os.path.abspath(os.path.curdir) + "/iptables/" + EXECUTEABLE
+command = path + " " + command
+
+if netns:
+command = "ip netns exec iptables-container-test " + command
+
 args = splitted[1:]
-proc = subprocess.Popen((os.path.abspath(os.path.curdir) + "/iptables/" + 
EXECUTEABLE, command),
+proc = subprocess.Popen(command, shell=True,
 stdin=subprocess.PIPE,
 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 out, err = proc.communicate()
@@ -131,8 +141,17 @@ def run_test(iptables, rule, rule_save, res, filename, 
lineno):
 delete_rule(iptables, rule, filename, lineno)
 return -1
 
+# Test "ip netns del NETNS" path with rules in place
+if netns:
+return 0
+
 return delete_rule(iptables, rule, filename, lineno)
 
+def run_test_netns(iptables, rule, rule_save, res, filename, lineno):
+execute_cmd("ip netns add iptables-container-test", filename, lineno)
+ret = run_test(iptables, rule, rule_save, res, filename, lineno, True)
+execute_cmd("ip netns del iptables-container-test", filename, lineno)
+return ret
 
 def execute_cmd(cmd, filename, lineno):
 '''
@@ -159,7 +178,7 @@ def execute_cmd(cmd, filename, lineno):
 return ret
 
 
-def run_test_file(filename):
+def run_test_file(filename, netns):
 '''
 Runs a test file
 
@@ -227,8 +246,13 @@ def run_test_file(filename):
 
 res = item[2].rstrip()
 
-ret = run_test(iptables, rule, rule_save,
-   res, filename, lineno + 1)
+if netns:
+ret = run_test_netns(iptables, rule, rule_save,
+ res, filename, lineno + 1)
+else:
+ret = run_test(iptables, rule, rule_save,
+   res, filename, lineno + 1, False)
+
 if ret < 0:
 test_passed = False
 total_test_passed = False
@@ -275,6 +299,8 @@ def main():
 help='Check for missing tests')
 parser.add_argument('-n', '--nftables', action='store_true',
 help='Test iptables-over-nftables')
+parser.add_argument('-N', '--netns', action='store_true',
+help='Test netnamespace path')
 args = parser.parse_args()
 
 #
@@ -313,7 +339,7 @@ def main():
 if args.filename:
 file_list = [args.filename]
 for filename in file_list:
-file_tests, file_passed = run_test_file(filename)
+file_tests, file_passed = run_test_file(filename, args.netns)
 if file_tests:
 tests += file_tests
 passed += file_passed
-- 
2.11.0



Re: [PATCH iptables] iptables-test: add -N option to exercise netns removal path

2018-10-19 Thread Pablo Neira Ayuso
On Fri, Oct 19, 2018 at 11:04:42AM +0200, Phil Sutter wrote:
> Hi,
> 
> On Thu, Oct 18, 2018 at 08:33:07PM +0200, Pablo Neira Ayuso wrote:
> [...]
> > @@ -108,8 +111,15 @@ def run_test(iptables, rule, rule_save, res, filename, 
> > lineno):
> >  command = IPTABLES_SAVE
> >  elif splitted[0] == IP6TABLES:
> >  command = IP6TABLES_SAVE
> > +
> > +if netns:
> > +path = "/sbin/ip"
> > +command = "netns exec vm-iptable-test " + EXECUTEABLE + " 
> > " + command
> > +else:
> > +path = os.path.abspath(os.path.curdir) + "/iptables/" + 
> > EXECUTEABLE
> > +
> 
> In netns case, doesn't this lead to calling xtables-*-multi from $PATH
> instead of the local one we want to test?

Hm, right, will fix this.


Re: [PATCH iptables] iptables-test: add -N option to exercise netns removal path

2018-10-19 Thread Phil Sutter
Hi,

On Thu, Oct 18, 2018 at 08:33:07PM +0200, Pablo Neira Ayuso wrote:
[...]
> @@ -108,8 +111,15 @@ def run_test(iptables, rule, rule_save, res, filename, 
> lineno):
>  command = IPTABLES_SAVE
>  elif splitted[0] == IP6TABLES:
>  command = IP6TABLES_SAVE
> +
> +if netns:
> +path = "/sbin/ip"
> +command = "netns exec vm-iptable-test " + EXECUTEABLE + " " 
> + command
> +else:
> +path = os.path.abspath(os.path.curdir) + "/iptables/" + 
> EXECUTEABLE
> +

In netns case, doesn't this lead to calling xtables-*-multi from $PATH
instead of the local one we want to test?

Cheers, Phil


Re: [PATCH libnftnl 4/4] src: Use memcpy() to handle potentially unaligned data

2018-10-18 Thread Matt Turner
On Thu, Oct 18, 2018 at 11:00 AM Pablo Neira Ayuso  wrote:
>
> Hi!
>
> On Wed, Oct 17, 2018 at 12:32:54PM -0700, Matt Turner wrote:
> > Rolf Eike Beer  reported that nft-expr_quota-test fails
> > with a SIGBUS on SPARC due to unaligned accesses. This patch resolves
> > that and fixes additional sources of unaligned accesses matching the
> > same pattern. Both nft-expr_quota-test and nft-expr_objref-test
> > generated unaligned accesses on DEC Alpha.
>
> Conversion to use memcpy() is just to stay safe, right? I thought it
> is only 64-bits case that can cause the unaligned access.

Unfortunately not. The case I found in nft-expr_objref-test was a uint32_t :(

But yes, memcpy makes these safe and should be completely inlined
since it's doing small fixed-size copies. `size` tells me that
libnftnl.so.7.3.0 has the same amount of .text on amd64 before and
after this patch.


[PATCH iptables] iptables-test: add -N option to exercise netns removal path

2018-10-18 Thread Pablo Neira Ayuso
We are getting bug reports lately from the netns path, add a new option
to exercise this path.

Signed-off-by: Pablo Neira Ayuso 
---
This is crashing the kernel in a few spots, will retest with recent fixes to
see if we are address all existing problems.

 iptables-test.py | 54 --
 1 file changed, 44 insertions(+), 10 deletions(-)

diff --git a/iptables-test.py b/iptables-test.py
index 9bfb8086aa0a..11b6c05a2b91 100755
--- a/iptables-test.py
+++ b/iptables-test.py
@@ -61,7 +61,7 @@ def delete_rule(iptables, rule, filename, lineno):
 return 0
 
 
-def run_test(iptables, rule, rule_save, res, filename, lineno):
+def run_test(iptables, rule, rule_save, res, filename, lineno, netns):
 '''
 Executes an unit test. Returns the output of delete_rule().
 
@@ -76,6 +76,9 @@ def run_test(iptables, rule, rule_save, res, filename, 
lineno):
 ret = 0
 
 cmd = iptables + " -A " + rule
+if netns:
+cmd = "ip netns exec vm-iptable-test " + EXECUTEABLE + " " + 
cmd
+
 ret = execute_cmd(cmd, filename, lineno)
 
 #
@@ -108,8 +111,15 @@ def run_test(iptables, rule, rule_save, res, filename, 
lineno):
 command = IPTABLES_SAVE
 elif splitted[0] == IP6TABLES:
 command = IP6TABLES_SAVE
+
+if netns:
+path = "/sbin/ip"
+command = "netns exec vm-iptable-test " + EXECUTEABLE + " " + 
command
+else:
+path = os.path.abspath(os.path.curdir) + "/iptables/" + EXECUTEABLE
+
 args = splitted[1:]
-proc = subprocess.Popen((os.path.abspath(os.path.curdir) + "/iptables/" + 
EXECUTEABLE, command),
+proc = subprocess.Popen(path + " " + command, shell=True,
 stdin=subprocess.PIPE,
 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 out, err = proc.communicate()
@@ -131,8 +141,17 @@ def run_test(iptables, rule, rule_save, res, filename, 
lineno):
 delete_rule(iptables, rule, filename, lineno)
 return -1
 
+# Test "ip netns del NETNS" path with rules in place
+if netns:
+return 0
+
 return delete_rule(iptables, rule, filename, lineno)
 
+def run_test_netns(iptables, rule, rule_save, res, filename, lineno):
+execute_cmd("ip netns add vm-iptable-test", filename, lineno)
+ret = run_test(iptables, rule, rule_save, res, filename, lineno, True)
+execute_cmd("ip netns del vm-iptable-test", filename, lineno)
+return ret
 
 def execute_cmd(cmd, filename, lineno):
 '''
@@ -159,7 +178,7 @@ def execute_cmd(cmd, filename, lineno):
 return ret
 
 
-def run_test_file(filename):
+def run_test_file(filename, netns):
 '''
 Runs a test file
 
@@ -227,12 +246,20 @@ def run_test_file(filename):
 
 res = item[2].rstrip()
 
-ret = run_test(iptables, rule, rule_save,
-   res, filename, lineno + 1)
-if ret < 0:
-test_passed = False
-total_test_passed = False
-break
+if netns:
+ret = run_test_netns(iptables, rule, rule_save,
+ res, filename, lineno + 1)
+if ret < 0:
+   test_passed = False
+   total_test_passed = False
+   break
+else:
+ret = run_test(iptables, rule, rule_save,
+   res, filename, lineno + 1, False)
+if ret < 0:
+test_passed = False
+total_test_passed = False
+break
 
 if test_passed:
 passed += 1
@@ -275,6 +302,8 @@ def main():
 help='Check for missing tests')
 parser.add_argument('-n', '--nftables', action='store_true',
 help='Test iptables-over-nftables')
+parser.add_argument('-N', '--netns', action='store_true',
+help='Test netnamespace path')
 args = parser.parse_args()
 
 #
@@ -289,6 +318,11 @@ def main():
 if args.nftables:
 EXECUTEABLE = "xtables-nft-multi"
 
+if args.netns:
+netns = True
+else:
+netns = False
+
 if os.getuid() != 0:
 print "You need to be root to run this, sorry"
 return
@@ -313,7 +347,7 @@ def main():
 if args.filename:
 file_list = [args.filename]
 for filename in file_list:
-file_tests, file_passed = run_test_file(filename)
+file_tests, file_passed = run_test_file(filename, netns)
 if file_tests:
 tests += file_tests
 passed += file_passed
-- 
2.11.0



Re: [PATCH nft v2] doc: Document ct timeout support

2018-10-18 Thread Pablo Neira Ayuso
On Thu, Oct 18, 2018 at 11:42:20PM +0530, Harsha Sharma wrote:
> Add documentation for creating ct timeout objects and assigning timeout
> policies via rules.

Applied, thanks Harsha.


[PATCH nft v2] doc: Document ct timeout support

2018-10-18 Thread Harsha Sharma
Add documentation for creating ct timeout objects and assigning timeout
policies via rules.

Signed-off-by: Harsha Sharma 
---
Changes in v2:
 - correct bold font for "ct timeout" title
 - update example script
 
 doc/libnftables-json.adoc | 52 ++---
 doc/stateful-objects.txt  | 59 +--
 2 files changed, 106 insertions(+), 5 deletions(-)

diff --git a/doc/libnftables-json.adoc b/doc/libnftables-json.adoc
index 59bac17..98303b3 100644
--- a/doc/libnftables-json.adoc
+++ b/doc/libnftables-json.adoc
@@ -23,7 +23,7 @@ libnftables-json - Supported JSON schema by libnftables
 
 'LIST_OBJECT' := 'TABLE' | 'CHAIN' | 'RULE' | 'SET' | 'MAP' | 'ELEMENT' |
 'FLOWTABLE' | 'COUNTER' | 'QUOTA' | 'CT_HELPER' | 'LIMIT' |
-'METAINFO_OBJECT'
+'METAINFO_OBJECT' | 'CT_TIMEOUT'
 
 == DESCRIPTION
 libnftables supports JSON formatted input and output. This is implemented as an
@@ -117,7 +117,8 @@ 
 *{ "add":* 'ADD_OBJECT' *}*
 
 'ADD_OBJECT' := 'TABLE' | 'CHAIN' | 'RULE' | 'SET' | 'MAP' | 'ELEMENT' |
-'FLOWTABLE' | 'COUNTER | QUOTA' | 'CT_HELPER' | 'LIMIT'
+'FLOWTABLE' | 'COUNTER | QUOTA' | 'CT_HELPER' | 'LIMIT' |
+   'CT_TIMEOUT'
 
 
 Add a new ruleset element to the kernel.
@@ -161,7 +162,7 @@ 
 'LIST_OBJECT' := 'TABLE' | 'TABLES' | 'CHAIN' | 'CHAINS' | 'SET' | 'SETS' |
  'MAP' | 'MAPS | COUNTER' | 'COUNTERS' | 'QUOTA' | 'QUOTAS' |
  'CT_HELPER' | 'CT_HELPERS' | 'LIMIT' | 'LIMITS | RULESET' |
- 'METER' | 'METERS' | 'FLOWTABLES'
+ 'METER' | 'METERS' | 'FLOWTABLES' | 'CT_TIMEOUT'
 
 
 List ruleset elements. The plural forms are used to list all objects of that
@@ -559,6 +560,42 @@ This object represents a named limit.
 *inv*::
If true, match if limit was exceeded. If omitted, defaults to *false*.
 
+=== CT TIMEOUT
+[verse]
+
+*{ "ct timeout": {
+   "family":* 'STRING'*,
+   "table":* 'STRING'*,
+   "name":* 'STRING'*,
+   "handle":* 'NUMBER'*,
+   "protocol":* 'CTH_PROTO'*,
+   "state":* 'STRING'*,
+   "value:* 'NUMBER'*,
+   "l3proto":* 'STRING'
+*}}*
+
+'CTH_PROTO' := *"tcp"* | *"udp"* | *"dccp"* | *"sctp"* | *"gre"* | *"icmpv6"* 
| *"icmp"* | *"generic"*
+
+
+This object represents a named conntrack timeout policy.
+
+*family*::
+   The table's family.
+*table*::
+   The table's name.
+*name*::
+   The ct timeout object's name.
+*handle*::
+   The ct timeout object's handle. In input, used for *delete* command 
only.
+*protocol*::
+   The ct timeout object's layer 4 protocol.
+*state*::
+   The connection state name, for which timeout value has to be updated, 
e.g. *"established"*, *"syn_sent"*, *"close"* or *"close_wait"*.
+*value*::
+   The updated timeout value for specified connection state.
+*l3proto*::
+   The ct timeout object's layer 3 protocol, e.g. *"ip"* or *"ip6"*.
+
 == STATEMENTS
 Statements are the building blocks for rules. Each rule consists of at least a
 single statement.
@@ -952,6 +989,15 @@ Limit number of connections using conntrack.
If *true*, match if *val* was exceeded. If omitted, defaults to
*false*.
 
+=== CT TIMEOUT
+[verse]
+*{ "ct timeout":* 'EXPRESSION' *}*
+
+Assign connection tracking timeout policy.
+
+*ct timeout*::
+   CT timeout reference.
+
 == EXPRESSIONS
 Expressions are the building blocks of (most) statements. In their most basic
 form, they are just immediate values represented as JSON string, integer or
diff --git a/doc/stateful-objects.txt b/doc/stateful-objects.txt
index 83a2575..3ce623f 100644
--- a/doc/stateful-objects.txt
+++ b/doc/stateful-objects.txt
@@ -1,5 +1,5 @@
-CT
-~~
+CT HELPER
+~
 [verse]
 *ct* helper 'helper' {type 'type' protocol 'protocol' ; [l3proto 'family' ;] }
 
@@ -40,6 +40,61 @@ table inet myhelpers {
 }
 --
 
+CT TIMEOUT
+~~
+[verse]
+*ct* timeout 'name' {protocol 'protocol' ; policy = {'state': 'value'} 
;[l3proto 'family' ;] }
+
+Ct timeout is used to update connection tracking timeout values.Timeout 
policies are assigned
+with the *ct timeout set* statement. 'protocol' and 'policy' are
+  mandatory, l3proto is derived from the table family by default.
+
+.conntrack timeout specifications
+[options="header"]
+|=
+|Keyword | Description | Type
+| protocol |
+layer 4 protocol of the timeout object |
+string (e.g. ip)
+|state |
+connection state name |
+string (e.g. "established")
+|value |
+timeout value for connection state |
+unsigned integer
+|l3proto |
+layer 3 protocol of the timeout object |
+address family (e.g. ip)
+|=
+
+.defining and assigning ct timeout policy
+--
+table ip filter {
+   ct timeout cttime {
+   protocol tcp;
+   l3proto ip
+   policy = 

Re: [PATCH libnftnl 3/4] tests: Remove test-script.sh

2018-10-18 Thread Pablo Neira Ayuso
On Wed, Oct 17, 2018 at 12:32:53PM -0700, Matt Turner wrote:
> All tests are now run with make check.

Applied, thanks.


Re: [PATCH libnftnl 2/4] tests: Run regression tests from make check

2018-10-18 Thread Pablo Neira Ayuso
On Wed, Oct 17, 2018 at 12:32:52PM -0700, Matt Turner wrote:
> The existing test-script.sh does not check the return values of the
> tests so it is not very good for automated testing.

Also applied, thanks.


[PATCH nf] netfilter: xt_RATEEST: remove netns exit routine

2018-10-18 Thread Taehee Yoo
xt_rateest_net_exit() was added to check whether rules are flushed
successfully. but ->net_exit() callback is called earlier than
->destroy() callback.
So that ->net_exit() callback can't check that.

test commands:
   %ip netns add vm1
   %ip netns exec vm1 iptables -t mangle -I PREROUTING -p udp \
   --dport  -j RATEEST --rateest-name ap \
   --rateest-interval 250ms --rateest-ewma 0.5s
   %ip netns del vm1

splat looks like:
[  668.813518] WARNING: CPU: 0 PID: 87 at net/netfilter/xt_RATEEST.c:210 
xt_rateest_net_exit+0x210/0x340 [xt_RATEEST]
[  668.813518] Modules linked in: xt_RATEEST xt_tcpudp iptable_mangle bpfilter 
ip_tables x_tables
[  668.813518] CPU: 0 PID: 87 Comm: kworker/u4:2 Not tainted 4.19.0-rc7+ #21
[  668.813518] Workqueue: netns cleanup_net
[  668.813518] RIP: 0010:xt_rateest_net_exit+0x210/0x340 [xt_RATEEST]
[  668.813518] Code: 00 48 8b 85 30 ff ff ff 4c 8b 23 80 38 00 0f 85 24 01 00 
00 48 8b 85 30 ff ff ff 4d 85 e4 4c 89 a5 58 ff ff ff c6 00 f8 74 b2 <0f> 0b 48 
83 c3 08 4c 39 f3 75 b0 48 b8 00 00 00 00 00 fc ff df 49
[  668.813518] RSP: 0018:8801156c73f8 EFLAGS: 00010282
[  668.813518] RAX: ed0022ad8e85 RBX: 880118928e98 RCX: 5db8012a
[  668.813518] RDX: 8801156c7428 RSI: cb1d185f RDI: 880115663b74
[  668.813518] RBP: 8801156c74d0 R08: 8801156633c0 R09: 1100236440be
[  668.813518] R10: 0001 R11: ed002367d852 R12: 880115142b08
[  668.813518] R13: 110022ad8e81 R14: 880118928ea8 R15: dc00
[  668.813518] FS:  () GS:88011b20() 
knlGS:
[  668.813518] CS:  0010 DS:  ES:  CR0: 80050033
[  668.813518] CR2: 563aa69f4f28 CR3: 000105a16000 CR4: 001006f0
[  668.813518] Call Trace:
[  668.813518]  ? unregister_netdevice_many+0xe0/0xe0
[  668.813518]  ? xt_rateest_net_init+0x2c0/0x2c0 [xt_RATEEST]
[  668.813518]  ? default_device_exit+0x1ca/0x270
[  668.813518]  ? remove_proc_entry+0x1cd/0x390
[  668.813518]  ? dev_change_net_namespace+0xd00/0xd00
[  668.813518]  ? __init_waitqueue_head+0x130/0x130
[  668.813518]  ops_exit_list.isra.10+0x94/0x140
[  668.813518]  cleanup_net+0x45b/0x900
[  668.813518]  ? net_drop_ns+0x110/0x110
[  668.813518]  ? swapgs_restore_regs_and_return_to_usermode+0x3c/0x80
[  668.813518]  ? save_trace+0x300/0x300
[  668.813518]  ? lock_acquire+0x196/0x470
[  668.813518]  ? lock_acquire+0x196/0x470
[  668.813518]  ? process_one_work+0xb60/0x1de0
[  668.813518]  ? _raw_spin_unlock_irq+0x29/0x40
[  668.813518]  ? _raw_spin_unlock_irq+0x29/0x40
[  668.813518]  ? __lock_acquire+0x4500/0x4500
[  668.813518]  ? __lock_is_held+0xb4/0x140
[  668.813518]  process_one_work+0xc13/0x1de0
[  668.813518]  ? pwq_dec_nr_in_flight+0x3c0/0x3c0
[  668.813518]  ? set_load_weight+0x270/0x270
[ ... ]

Fixes: 3427b2ab63fa ("netfilter: make xt_rateest hash table per net")
Signed-off-by: Taehee Yoo 
---
 net/netfilter/xt_RATEEST.c | 10 --
 1 file changed, 10 deletions(-)

diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index dec843cadf46..9e05c86ba5c4 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -201,18 +201,8 @@ static __net_init int xt_rateest_net_init(struct net *net)
return 0;
 }
 
-static void __net_exit xt_rateest_net_exit(struct net *net)
-{
-   struct xt_rateest_net *xn = net_generic(net, xt_rateest_id);
-   int i;
-
-   for (i = 0; i < ARRAY_SIZE(xn->hash); i++)
-   WARN_ON_ONCE(!hlist_empty(>hash[i]));
-}
-
 static struct pernet_operations xt_rateest_net_ops = {
.init = xt_rateest_net_init,
-   .exit = xt_rateest_net_exit,
.id   = _rateest_id,
.size = sizeof(struct xt_rateest_net),
 };
-- 
2.17.1



[PATCH] netfilter: conntrack: fix cloned unconfirmed skb->_nfct race in __nf_conntrack_confirm

2018-10-18 Thread Chieh-Min Wang
From: Chieh-Min Wang 

For bridge(br_flood) or broadcast/multicast packets, they could clone skb with
unconfirmed conntrack which break the rule that unconfirmed skb->_nfct is never 
shared.
With nfqueue running on my system, the race can be easily reproduced with 
following
warning calltrace:

[13257.707525] CPU: 0 PID: 12132 Comm: main Tainted: PW   4.4.60 
#7744
[13257.707568] Hardware name: Qualcomm (Flattened Device Tree)
[13257.714700] [] (unwind_backtrace) from [] 
(show_stack+0x10/0x14)
[13257.720253] [] (show_stack) from [] 
(dump_stack+0x94/0xa8)
[13257.728240] [] (dump_stack) from [] 
(warn_slowpath_common+0x94/0xb0)
[13257.735268] [] (warn_slowpath_common) from [] 
(warn_slowpath_null+0x1c/0x24)
[13257.743519] [] (warn_slowpath_null) from [] 
(__nf_conntrack_confirm+0xa8/0x618)
[13257.752284] [] (__nf_conntrack_confirm) from [] 
(ipv4_confirm+0xb8/0xfc)
[13257.761049] [] (ipv4_confirm) from [] 
(nf_iterate+0x48/0xa8)
[13257.769725] [] (nf_iterate) from [] 
(nf_hook_slow+0x30/0xb0)
[13257.777108] [] (nf_hook_slow) from [] 
(br_nf_post_routing+0x274/0x31c)
[13257.784486] [] (br_nf_post_routing) from [] 
(nf_iterate+0x48/0xa8)
[13257.792556] [] (nf_iterate) from [] 
(nf_hook_slow+0x30/0xb0)
[13257.800458] [] (nf_hook_slow) from [] 
(br_forward_finish+0x94/0xa4)
[13257.808010] [] (br_forward_finish) from [] 
(br_nf_forward_finish+0x150/0x1ac)
[13257.815736] [] (br_nf_forward_finish) from [] 
(nf_reinject+0x108/0x170)
[13257.824762] [] (nf_reinject) from [] 
(nfqnl_recv_verdict+0x3d8/0x420)
[13257.832924] [] (nfqnl_recv_verdict) from [] 
(nfnetlink_rcv_msg+0x158/0x248)
[13257.841256] [] (nfnetlink_rcv_msg) from [] 
(netlink_rcv_skb+0x54/0xb0)
[13257.849762] [] (netlink_rcv_skb) from [] 
(netlink_unicast+0x148/0x23c)
[13257.858093] [] (netlink_unicast) from [] 
(netlink_sendmsg+0x2ec/0x368)
[13257.866348] [] (netlink_sendmsg) from [] 
(sock_sendmsg+0x34/0x44)
[13257.874590] [] (sock_sendmsg) from [] 
(___sys_sendmsg+0x1ec/0x200)
[13257.882489] [] (___sys_sendmsg) from [] 
(__sys_sendmsg+0x3c/0x64)
[13257.890300] [] (__sys_sendmsg) from [] 
(ret_fast_syscall+0x0/0x34)

The original code just triggered the warning but do nothing. It will caused the 
shared
conntrack moves to the dying list and the packet be droppped 
(nf_ct_resolve_clash returns
NF_DROP for dying conntrack).

Reproduce steps:

++
|  br0(bridge)   |
||
+-+-+-+--+
  | eth0|   | eth1|   | eth2|
  | |   | |   | |
  +--+--+   +--+--+   +---+-+
 | |  |
 | |  |
  +--+-+ +-+--++--+-+
  | PC1| | PC2|| PC3|
  ++ ++++

iptables -A FORWARD -m mark --mark 0x100/0x100 -j NFQUEUE --queue-num 
100 --queue-bypass
ps: Our nfq userspace program will set mark on packets whose connection has 
already been processed.

PC1 sends broadcast packets simulated by hping3:
hping3 --rand-source --udp 192.168.1.255 -i u100
---
 net/netfilter/nf_conntrack_core.c | 13 ++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/net/netfilter/nf_conntrack_core.c 
b/net/netfilter/nf_conntrack_core.c
index ca1168d..1c16bd9 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -901,10 +901,17 @@ __nf_conntrack_confirm(struct sk_buff *skb)
 * REJECT will give spurious warnings here.
 */
 
-   /* No external references means no one else could have
-* confirmed us.
+   /* Another skb with the same unconfirmed conntrack may
+* win the race. This may happen for bridge(br_flood)
+* or broadcast/multicast packets do skb_clone with
+* unconfirmed conntrack.
 */
-   WARN_ON(nf_ct_is_confirmed(ct));
+   if (unlikely(nf_ct_is_confirmed(ct))) {
+   nf_conntrack_double_unlock(hash, reply_hash);
+   local_bh_enable();
+   return NF_ACCEPT;
+   }
+
pr_debug("Confirming conntrack %p\n", ct);
/* We have to check the DYING flag after unlink to prevent
 * a race against nf_ct_get_next_corpse() possibly called from
-- 
2.7.4



[PATCH nf-next] netfilter: nfnetlink_log: remove empty nfnetlink_log.h header file

2018-10-18 Thread Taehee Yoo
/include/net/netfilter/nfnetlink_log.h file is empty.
so that it can be removed.

Signed-off-by: Taehee Yoo 
---
 include/net/netfilter/nfnetlink_log.h | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 include/net/netfilter/nfnetlink_log.h

diff --git a/include/net/netfilter/nfnetlink_log.h 
b/include/net/netfilter/nfnetlink_log.h
deleted file mode 100644
index ea32a7d3cf1b..
--- a/include/net/netfilter/nfnetlink_log.h
+++ /dev/null
@@ -1 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-- 
2.17.1



Re: netfilter request for -stable 4.9.x inclusion

2018-10-18 Thread Greg Kroah-Hartman
On Wed, Oct 17, 2018 at 06:34:22PM +0200, Pablo Neira Ayuso wrote:
> Hi Greg,
> 
> Could you enqueue the following patch for -stable 4.9.x?
> 
> commit ab6dd1beac7be3c17f8bf3d38bdf29ecb7293f1e
> Author: Xin Long 
> Date:   Thu Aug 10 10:22:24 2017 +0800
> 
> netfilter: check for seqadj ext existence before adding it in 
> nf_nat_setup_info
> 
> Cc'ing Laura, combining SNAT+DNAT+ftp helper is currently broken with
> 4.9.x. The patch above cures the issues.

Now queued up, thanks.

greg k-h


Re: [PATCH] netfilter: add grev6 conntrack support

2018-10-18 Thread Alin Năstac
Hi Pablo,

On Thu, Oct 18, 2018 at 1:53 PM Pablo Neira Ayuso  wrote:
>
> Hi Alin,
>
> On Thu, Oct 18, 2018 at 01:27:01PM +0200, Alin Nastac wrote:
> > From: Alin Nastac 
> >
> > nf_conntrack_proto_generic refuse to handle grev6 packets when
> > NF_CT_PROTO_GRE is enabled, resulting in grev6 packets being
> > categorized as INVALID.
>
> IIRC, this depends on the pptp helper, right? I'm telling this because
> the PPTP helper is IPv4 only and in that case this will not help?
>
> Recent updates in nf-next removed the l3proto field. See:
>
> commit dd2934a95701576203b2f61e8ded4e4a2f9183ea
> Author: Florian Westphal 
> Date:   Mon Sep 17 12:02:54 2018 +0200
>
> netfilter: conntrack: remove l3->l4 mapping information
>
> Which is part of what you want, right?

That's right, all I need is a conntrack protocol handler for GRE over IPv6.
Florian's solution looks more elegant.


Re: [PATCH] netfilter: add grev6 conntrack support

2018-10-18 Thread Pablo Neira Ayuso
Hi Alin,

On Thu, Oct 18, 2018 at 01:27:01PM +0200, Alin Nastac wrote:
> From: Alin Nastac 
> 
> nf_conntrack_proto_generic refuse to handle grev6 packets when
> NF_CT_PROTO_GRE is enabled, resulting in grev6 packets being
> categorized as INVALID.

IIRC, this depends on the pptp helper, right? I'm telling this because
the PPTP helper is IPv4 only and in that case this will not help?

Recent updates in nf-next removed the l3proto field. See:

commit dd2934a95701576203b2f61e8ded4e4a2f9183ea
Author: Florian Westphal 
Date:   Mon Sep 17 12:02:54 2018 +0200

netfilter: conntrack: remove l3->l4 mapping information

Which is part of what you want, right?

Thanks.


[PATCH] netfilter: conntrack: fix cloned skb __nf_conntrack_confirm race

2018-10-18 Thread chiehminw
From: Chieh-Min Wang 

For bridge or multicast packets, they could cloned skb with unconfirmed 
conntrack
 which break the rule unconfirmed skb->nfct is never shared. With nfqueue 
running
on my system, the race can be easily reproduced with following warning 
calltrace:

[13257.707525] CPU: 0 PID: 12132 Comm: main Tainted: PW   4.4.60 
#7744
[13257.707568] Hardware name: Qualcomm (Flattened Device Tree)
[13257.714700] [] (unwind_backtrace) from [] 
(show_stack+0x10/0x14)
[13257.720253] [] (show_stack) from [] 
(dump_stack+0x94/0xa8)
[13257.728240] [] (dump_stack) from [] 
(warn_slowpath_common+0x94/0xb0)
[13257.735268] [] (warn_slowpath_common) from [] 
(warn_slowpath_null+0x1c/0x24)
[13257.743519] [] (warn_slowpath_null) from [] 
(__nf_conntrack_confirm+0xa8/0x618)
[13257.752284] [] (__nf_conntrack_confirm) from [] 
(ipv4_confirm+0xb8/0xfc)
[13257.761049] [] (ipv4_confirm) from [] 
(nf_iterate+0x48/0xa8)
[13257.769725] [] (nf_iterate) from [] 
(nf_hook_slow+0x30/0xb0)
[13257.777108] [] (nf_hook_slow) from [] 
(br_nf_post_routing+0x274/0x31c)
[13257.784486] [] (br_nf_post_routing) from [] 
(nf_iterate+0x48/0xa8)
[13257.792556] [] (nf_iterate) from [] 
(nf_hook_slow+0x30/0xb0)
[13257.800458] [] (nf_hook_slow) from [] 
(br_forward_finish+0x94/0xa4)
[13257.808010] [] (br_forward_finish) from [] 
(br_nf_forward_finish+0x150/0x1ac)
[13257.815736] [] (br_nf_forward_finish) from [] 
(nf_reinject+0x108/0x170)
[13257.824762] [] (nf_reinject) from [] 
(nfqnl_recv_verdict+0x3d8/0x420)
[13257.832924] [] (nfqnl_recv_verdict) from [] 
(nfnetlink_rcv_msg+0x158/0x248)
[13257.841256] [] (nfnetlink_rcv_msg) from [] 
(netlink_rcv_skb+0x54/0xb0)
[13257.849762] [] (netlink_rcv_skb) from [] 
(netlink_unicast+0x148/0x23c)
[13257.858093] [] (netlink_unicast) from [] 
(netlink_sendmsg+0x2ec/0x368)
[13257.866348] [] (netlink_sendmsg) from [] 
(sock_sendmsg+0x34/0x44)
[13257.874590] [] (sock_sendmsg) from [] 
(___sys_sendmsg+0x1ec/0x200)
[13257.882489] [] (___sys_sendmsg) from [] 
(__sys_sendmsg+0x3c/0x64)
[13257.890300] [] (__sys_sendmsg) from [] 
(ret_fast_syscall+0x0/0x34)

The original code just triggered the warning but do nothing. It will caused the 
shared
conntrack moves to the dying list and the packet be droppped 
(nf_ct_resolve_clash returns
NF_DROP for dying conntrack).
---
 net/netfilter/nf_conntrack_core.c | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/net/netfilter/nf_conntrack_core.c 
b/net/netfilter/nf_conntrack_core.c
index ca1168d..94e6c85 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -901,10 +901,16 @@ __nf_conntrack_confirm(struct sk_buff *skb)
 * REJECT will give spurious warnings here.
 */
 
-   /* No external references means no one else could have
-* confirmed us.
+   /* Another skb with the same unconfirmed conntrack may
+* win the race. This may happen for bridge or multicast
+* packets that skb_clone skb with unconfirmed conntrack.
 */
-   WARN_ON(nf_ct_is_confirmed(ct));
+   if (nf_ct_is_confirmed(ct)) {
+   nf_conntrack_double_unlock(hash, reply_hash);
+   local_bh_enable();
+   return NF_ACCEPT;
+   }
+
pr_debug("Confirming conntrack %p\n", ct);
/* We have to check the DYING flag after unlink to prevent
 * a race against nf_ct_get_next_corpse() possibly called from
-- 
2.7.4



[PATCH] netfilter: add grev6 conntrack support

2018-10-18 Thread Alin Nastac
From: Alin Nastac 

nf_conntrack_proto_generic refuse to handle grev6 packets when
NF_CT_PROTO_GRE is enabled, resulting in grev6 packets being
categorized as INVALID.
---
 net/netfilter/nf_conntrack_proto_gre.c | 49 +-
 1 file changed, 48 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/nf_conntrack_proto_gre.c 
b/net/netfilter/nf_conntrack_proto_gre.c
index 650eb4f..c6b249d 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -380,19 +380,60 @@ static const struct nf_conntrack_l4proto 
nf_conntrack_l4proto_gre4 = {
.init_net   = gre_init_net,
 };
 
+static const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre6 = {
+   .l3proto = AF_INET6,
+   .l4proto = IPPROTO_GRE,
+   .pkt_to_tuple= gre_pkt_to_tuple,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
+   .print_conntrack = gre_print_conntrack,
+#endif
+   .packet  = gre_packet,
+   .new = gre_new,
+   .destroy = gre_destroy,
+   .me  = THIS_MODULE,
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+   .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
+   .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size,
+   .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+   .nla_policy  = nf_ct_port_nla_policy,
+#endif
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
+   .ctnl_timeout= {
+   .nlattr_to_obj  = gre_timeout_nlattr_to_obj,
+   .obj_to_nlattr  = gre_timeout_obj_to_nlattr,
+   .nlattr_max = CTA_TIMEOUT_GRE_MAX,
+   .obj_size   = sizeof(unsigned int) * GRE_CT_MAX,
+   .nla_policy = gre_timeout_nla_policy,
+   },
+#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
+   .net_id = _gre_net_id,
+   .init_net   = gre_init_net,
+};
+
 static int proto_gre_net_init(struct net *net)
 {
int ret = 0;
 
ret = nf_ct_l4proto_pernet_register_one(net,
_conntrack_l4proto_gre4);
-   if (ret < 0)
+   if (ret < 0) {
pr_err("nf_conntrack_gre4: pernet registration failed.\n");
+   return ret;
+   }
+
+   ret = nf_ct_l4proto_pernet_register_one(net,
+   _conntrack_l4proto_gre6);
+   if (ret < 0) {
+   pr_err("nf_conntrack_gre6: pernet registration failed.\n");
+   nf_ct_l4proto_pernet_unregister_one(net, 
_conntrack_l4proto_gre4);
+   }
+
return ret;
 }
 
 static void proto_gre_net_exit(struct net *net)
 {
+   nf_ct_l4proto_pernet_unregister_one(net, _conntrack_l4proto_gre6);
nf_ct_l4proto_pernet_unregister_one(net, _conntrack_l4proto_gre4);
nf_ct_gre_keymap_flush(net);
 }
@@ -414,8 +455,13 @@ static int __init nf_ct_proto_gre_init(void)
ret = nf_ct_l4proto_register_one(_conntrack_l4proto_gre4);
if (ret < 0)
goto out_gre4;
+   ret = nf_ct_l4proto_register_one(_conntrack_l4proto_gre6);
+   if (ret < 0)
+   goto out_gre6;
 
return 0;
+out_gre6:
+   nf_ct_l4proto_unregister_one(_conntrack_l4proto_gre4);
 out_gre4:
unregister_pernet_subsys(_gre_net_ops);
 out_pernet:
@@ -424,6 +470,7 @@ static int __init nf_ct_proto_gre_init(void)
 
 static void __exit nf_ct_proto_gre_fini(void)
 {
+   nf_ct_l4proto_unregister_one(_conntrack_l4proto_gre6);
nf_ct_l4proto_unregister_one(_conntrack_l4proto_gre4);
unregister_pernet_subsys(_gre_net_ops);
 }
-- 
2.7.4



Re: iptables (nf_tables) error when negating an interface and using protocol port - works fine with classic iptables

2018-10-18 Thread Pedretti Fabio
Il giorno mar 9 ott 2018 alle ore 16:39 Florian Westphal
 ha scritto:
>
> Pedretti Fabio  wrote:
> > Hi, I tried iptables 1.8 with the new nf_tables back-end using the
> > Debian 1.8.0-1~exp1 package with my firewall script.
> >
> > It seems to properly load most rules, however I am getting an error
> > when negating an interface and using protocol ports, which works fine
> > with classic iptables.
> >
> > Specifically these work OK:
> > # iptables -A INPUT ! -i eth0 -p udp -j ACCEPT
> > # iptables -A INPUT -i eth0 -p udp --dport 5202 -j ACCEPT
> >
> > But when using an interface negation with --sport or --dport it
> > reports an error, here is an example:
> > # iptables -A INPUT ! -i eth0 -p udp --dport 5202 -j ACCEPT
> > iptables v1.8.0 (nf_tables):  RULE_APPEND failed (Invalid argument):
> > rule in chain INPUT
>
> Thanks for reporting, I think we should make a 1.81 release soon,
> this bug is fixed in iptables.git already.
>
> I'll prepare this, if there are objections please let me know.

I verified building iptables from git this issue is fixed.

It would be nice to have a release soon, so that the fix gets into
Linux distros (e.g. Debian).

Thanks.


[PATCH] netfilter: conntrack: fix cloned skb __nf_conntrack_confirm race

2018-10-18 Thread Chieh-Min Wang
From: Chieh-Min Wang 

For bridge or multicast packets, they could cloned skb with unconfirmed 
conntrack
 which break the rule unconfirmed skb->nfct is never shared. With nfqueue 
running
on my system, the race can be easily reproduced with following warning 
calltrace:

[13257.707525] CPU: 0 PID: 12132 Comm: main Tainted: PW   4.4.60 
#7744
[13257.707568] Hardware name: Qualcomm (Flattened Device Tree)
[13257.714700] [] (unwind_backtrace) from [] 
(show_stack+0x10/0x14)
[13257.720253] [] (show_stack) from [] 
(dump_stack+0x94/0xa8)
[13257.728240] [] (dump_stack) from [] 
(warn_slowpath_common+0x94/0xb0)
[13257.735268] [] (warn_slowpath_common) from [] 
(warn_slowpath_null+0x1c/0x24)
[13257.743519] [] (warn_slowpath_null) from [] 
(__nf_conntrack_confirm+0xa8/0x618)
[13257.752284] [] (__nf_conntrack_confirm) from [] 
(ipv4_confirm+0xb8/0xfc)
[13257.761049] [] (ipv4_confirm) from [] 
(nf_iterate+0x48/0xa8)
[13257.769725] [] (nf_iterate) from [] 
(nf_hook_slow+0x30/0xb0)
[13257.777108] [] (nf_hook_slow) from [] 
(br_nf_post_routing+0x274/0x31c)
[13257.784486] [] (br_nf_post_routing) from [] 
(nf_iterate+0x48/0xa8)
[13257.792556] [] (nf_iterate) from [] 
(nf_hook_slow+0x30/0xb0)
[13257.800458] [] (nf_hook_slow) from [] 
(br_forward_finish+0x94/0xa4)
[13257.808010] [] (br_forward_finish) from [] 
(br_nf_forward_finish+0x150/0x1ac)
[13257.815736] [] (br_nf_forward_finish) from [] 
(nf_reinject+0x108/0x170)
[13257.824762] [] (nf_reinject) from [] 
(nfqnl_recv_verdict+0x3d8/0x420)
[13257.832924] [] (nfqnl_recv_verdict) from [] 
(nfnetlink_rcv_msg+0x158/0x248)
[13257.841256] [] (nfnetlink_rcv_msg) from [] 
(netlink_rcv_skb+0x54/0xb0)
[13257.849762] [] (netlink_rcv_skb) from [] 
(netlink_unicast+0x148/0x23c)
[13257.858093] [] (netlink_unicast) from [] 
(netlink_sendmsg+0x2ec/0x368)
[13257.866348] [] (netlink_sendmsg) from [] 
(sock_sendmsg+0x34/0x44)
[13257.874590] [] (sock_sendmsg) from [] 
(___sys_sendmsg+0x1ec/0x200)
[13257.882489] [] (___sys_sendmsg) from [] 
(__sys_sendmsg+0x3c/0x64)
[13257.890300] [] (__sys_sendmsg) from [] 
(ret_fast_syscall+0x0/0x34)

The original code just triggered the warning but do nothing. It will caused the 
shared
conntrack moves to the dying list and the packet be droppped 
(nf_ct_resolve_clash returns
NF_DROP for dying conntrack).
---
 net/netfilter/nf_conntrack_core.c | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/net/netfilter/nf_conntrack_core.c 
b/net/netfilter/nf_conntrack_core.c
index ca1168d..94e6c85 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -901,10 +901,16 @@ __nf_conntrack_confirm(struct sk_buff *skb)
 * REJECT will give spurious warnings here.
 */
 
-   /* No external references means no one else could have
-* confirmed us.
+   /* Another skb with the same unconfirmed conntrack may
+* win the race. This may happen for bridge or multicast
+* packets that skb_clone skb with unconfirmed conntrack.
 */
-   WARN_ON(nf_ct_is_confirmed(ct));
+   if (nf_ct_is_confirmed(ct)) {
+   nf_conntrack_double_unlock(hash, reply_hash);
+   local_bh_enable();
+   return NF_ACCEPT;
+   }
+
pr_debug("Confirming conntrack %p\n", ct);
/* We have to check the DYING flag after unlink to prevent
 * a race against nf_ct_get_next_corpse() possibly called from
-- 
2.7.4



[PATCH libnftnl 4/4] src: Use memcpy() to handle potentially unaligned data

2018-10-17 Thread Matt Turner
Rolf Eike Beer  reported that nft-expr_quota-test fails
with a SIGBUS on SPARC due to unaligned accesses. This patch resolves
that and fixes additional sources of unaligned accesses matching the
same pattern. Both nft-expr_quota-test and nft-expr_objref-test
generated unaligned accesses on DEC Alpha.

Bug: https://bugs.gentoo.org/666448
Signed-off-by: Matt Turner 
---
 src/chain.c  | 12 ++--
 src/expr/bitwise.c   |  6 +++---
 src/expr/byteorder.c | 10 +-
 src/expr/cmp.c   |  4 ++--
 src/expr/connlimit.c |  4 ++--
 src/expr/counter.c   |  4 ++--
 src/expr/ct.c|  8 
 src/expr/dup.c   |  4 ++--
 src/expr/dynset.c| 10 +-
 src/expr/exthdr.c| 14 +++---
 src/expr/fib.c   |  6 +++---
 src/expr/fwd.c   |  6 +++---
 src/expr/hash.c  | 16 
 src/expr/immediate.c |  4 ++--
 src/expr/limit.c | 10 +-
 src/expr/log.c   | 10 +-
 src/expr/lookup.c|  8 
 src/expr/masq.c  |  6 +++---
 src/expr/match.c |  2 +-
 src/expr/meta.c  |  6 +++---
 src/expr/nat.c   | 14 +++---
 src/expr/numgen.c| 10 +-
 src/expr/objref.c|  6 +++---
 src/expr/osf.c   |  4 ++--
 src/expr/payload.c   | 16 
 src/expr/queue.c |  8 
 src/expr/quota.c |  6 +++---
 src/expr/range.c |  4 ++--
 src/expr/redir.c |  6 +++---
 src/expr/reject.c|  4 ++--
 src/expr/rt.c|  4 ++--
 src/expr/socket.c|  4 ++--
 src/expr/target.c|  2 +-
 src/expr/tproxy.c|  6 +++---
 src/expr/tunnel.c|  4 ++--
 src/expr/xfrm.c  |  8 
 src/gen.c|  2 +-
 src/obj/counter.c|  4 ++--
 src/obj/ct_helper.c  |  4 ++--
 src/obj/ct_timeout.c |  4 ++--
 src/obj/limit.c  | 10 +-
 src/obj/quota.c  |  6 +++---
 src/obj/tunnel.c | 28 ++--
 src/object.c |  6 +++---
 src/rule.c   | 12 ++--
 src/set.c| 26 +-
 src/set_elem.c   | 12 +++-
 src/table.c  |  8 
 src/utils.c  |  4 ++--
 49 files changed, 192 insertions(+), 190 deletions(-)

diff --git a/src/chain.c b/src/chain.c
index 06252b6..01d62c8 100644
--- a/src/chain.c
+++ b/src/chain.c
@@ -190,22 +190,22 @@ int nftnl_chain_set_data(struct nftnl_chain *c, uint16_t 
attr,
memcpy(>prio, data, sizeof(c->prio));
break;
case NFTNL_CHAIN_POLICY:
-   c->policy = *((uint32_t *)data);
+   memcpy(>policy, data, sizeof(c->policy));
break;
case NFTNL_CHAIN_USE:
-   c->use = *((uint32_t *)data);
+   memcpy(>use, data, sizeof(c->use));
break;
case NFTNL_CHAIN_BYTES:
-   c->bytes = *((uint64_t *)data);
+   memcpy(>bytes, data, sizeof(c->bytes));
break;
case NFTNL_CHAIN_PACKETS:
-   c->packets = *((uint64_t *)data);
+   memcpy(>packets, data, sizeof(c->packets));
break;
case NFTNL_CHAIN_HANDLE:
-   c->handle = *((uint64_t *)data);
+   memcpy(>handle, data, sizeof(c->handle));
break;
case NFTNL_CHAIN_FAMILY:
-   c->family = *((uint32_t *)data);
+   memcpy(>family, data, sizeof(c->family));
break;
case NFTNL_CHAIN_TYPE:
if (c->flags & (1 << NFTNL_CHAIN_TYPE))
diff --git a/src/expr/bitwise.c b/src/expr/bitwise.c
index ebfbcd0..79d6a51 100644
--- a/src/expr/bitwise.c
+++ b/src/expr/bitwise.c
@@ -37,13 +37,13 @@ nftnl_expr_bitwise_set(struct nftnl_expr *e, uint16_t type,
 
switch(type) {
case NFTNL_EXPR_BITWISE_SREG:
-   bitwise->sreg = *((uint32_t *)data);
+   memcpy(>sreg, data, sizeof(bitwise->sreg));
break;
case NFTNL_EXPR_BITWISE_DREG:
-   bitwise->dreg = *((uint32_t *)data);
+   memcpy(>dreg, data, sizeof(bitwise->dreg));
break;
case NFTNL_EXPR_BITWISE_LEN:
-   bitwise->len = *((unsigned int *)data);
+   memcpy(>len, data, sizeof(bitwise->len));
break;
case NFTNL_EXPR_BITWISE_MASK:
memcpy(>mask.val, data, data_len);
diff --git a/src/expr/byteorder.c b/src/expr/byteorder.c
index 4cddd4a..35c1450 100644
--- a/src/expr/byteorder.c
+++ b/src/expr/byteorder.c
@@ -37,19 +37,19 @@ nftnl_expr_byteorder_set(struct nftnl_expr *e, uint16_t 
type,
 
switch(type) {
case NFTNL_EXPR_BYTEORDER_SREG:
-   byteorder->sreg = *((uint32_t *)data);
+   memcpy(>sreg, data, sizeof(byteorder->sreg));
break;
case NFTNL_EXPR_BYTEORDER_DREG:
-   byteorder->dreg = *((uint32_t *)data);
+   memcpy(>dreg, data, sizeof(byteorder->dreg));
break;
case NFTNL_EXPR_BYTEORDER_OP:

[PATCH libnftnl 1/4] tests: Execute nft-flowtable-test in test-script.sh

2018-10-17 Thread Matt Turner
Seems to have been forgotten in commit 4d472c225ba0 ("tests: add
flowtable regression test")

Signed-off-by: Matt Turner 
---
 tests/test-script.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/test-script.sh b/tests/test-script.sh
index 83dbda2..72eebbc 100755
--- a/tests/test-script.sh
+++ b/tests/test-script.sh
@@ -25,6 +25,7 @@
 ./nft-expr_reject-test
 ./nft-expr_target-test
 ./nft-expr_hash-test
+./nft-flowtable-test
 ./nft-rule-test
 ./nft-set-test
 ./nft-table-test
-- 
2.16.4



[PATCH libnftnl 2/4] tests: Run regression tests from make check

2018-10-17 Thread Matt Turner
The existing test-script.sh does not check the return values of the
tests so it is not very good for automated testing.

Signed-off-by: Matt Turner 
---
 tests/Makefile.am | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/Makefile.am b/tests/Makefile.am
index 556575f..ad493b5 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -35,6 +35,8 @@ check_PROGRAMS =  nft-table-test  \
nft-expr_target-test\
nft-expr_hash-test
 
+TESTS = $(check_PROGRAMS)
+
 nft_table_test_SOURCES = nft-table-test.c
 nft_table_test_LDADD = ../src/libnftnl.la ${LIBMNL_LIBS}
 
-- 
2.16.4



netfilter request for -stable 4.9.x inclusion

2018-10-17 Thread Pablo Neira Ayuso
Hi Greg,

Could you enqueue the following patch for -stable 4.9.x?

commit ab6dd1beac7be3c17f8bf3d38bdf29ecb7293f1e
Author: Xin Long 
Date:   Thu Aug 10 10:22:24 2017 +0800

netfilter: check for seqadj ext existence before adding it in 
nf_nat_setup_info

Cc'ing Laura, combining SNAT+DNAT+ftp helper is currently broken with
4.9.x. The patch above cures the issues.

Thanks.


[PATCH nf-next] netfilter: remove unused udp.h header.

2018-10-17 Thread Weongyo Jeong
udp.h header isn't used at these files.  So it's safe to remove.

Signed-off-by: Weongyo Jeong 
---
 net/ipv4/netfilter/ipt_CLUSTERIP.c | 1 -
 net/ipv4/netfilter/ipt_REJECT.c| 1 -
 2 files changed, 2 deletions(-)

diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c 
b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 2c8d313..35984f4 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -18,7 +18,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index e8bed33..503ba09 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -14,7 +14,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
-- 
2.7.4



[PATCH nf-next] netfilter: remove two unused variables.

2018-10-17 Thread Weongyo Jeong
nft_dup_netdev_ingress_ops and nft_fwd_netdev_ingress_ops variables are
no longer used at the code.

Signed-off-by: Weongyo Jeong 
---
 net/netfilter/nft_dup_netdev.c | 2 --
 net/netfilter/nft_fwd_netdev.c | 4 
 2 files changed, 6 deletions(-)

diff --git a/net/netfilter/nft_dup_netdev.c b/net/netfilter/nft_dup_netdev.c
index 2cc1e0ef..15cc62b 100644
--- a/net/netfilter/nft_dup_netdev.c
+++ b/net/netfilter/nft_dup_netdev.c
@@ -46,8 +46,6 @@ static int nft_dup_netdev_init(const struct nft_ctx *ctx,
return nft_validate_register_load(priv->sreg_dev, sizeof(int));
 }
 
-static const struct nft_expr_ops nft_dup_netdev_ingress_ops;
-
 static int nft_dup_netdev_dump(struct sk_buff *skb, const struct nft_expr 
*expr)
 {
struct nft_dup_netdev *priv = nft_expr_priv(expr);
diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c
index 8abb989..d7694e7 100644
--- a/net/netfilter/nft_fwd_netdev.c
+++ b/net/netfilter/nft_fwd_netdev.c
@@ -53,8 +53,6 @@ static int nft_fwd_netdev_init(const struct nft_ctx *ctx,
return nft_validate_register_load(priv->sreg_dev, sizeof(int));
 }
 
-static const struct nft_expr_ops nft_fwd_netdev_ingress_ops;
-
 static int nft_fwd_netdev_dump(struct sk_buff *skb, const struct nft_expr 
*expr)
 {
struct nft_fwd_netdev *priv = nft_expr_priv(expr);
@@ -169,8 +167,6 @@ static int nft_fwd_neigh_init(const struct nft_ctx *ctx,
return nft_validate_register_load(priv->sreg_addr, addr_len);
 }
 
-static const struct nft_expr_ops nft_fwd_netdev_ingress_ops;
-
 static int nft_fwd_neigh_dump(struct sk_buff *skb, const struct nft_expr *expr)
 {
struct nft_fwd_neigh *priv = nft_expr_priv(expr);
-- 
2.7.4



[PATCH nf-next] netfilter: x_tables: add missing comments

2018-10-17 Thread Hyejeong Jang
Comments about two member variables "family" and
"nft_compat" are missing. So I added them.

Signed-off-by: Hyejeong Jang 
---
 include/linux/netfilter/x_tables.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/linux/netfilter/x_tables.h 
b/include/linux/netfilter/x_tables.h
index 9077b3ebea08..4594d1fa3f5c 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -87,6 +87,8 @@ static inline u_int8_t xt_family(const struct xt_action_param 
*par)
  * @match: struct xt_match through which this function was invoked
  * @matchinfo: per-match data
  * @hook_mask: via which hooks the new rule is reachable
+ * @family:address family
+ * @nft_compat:true if extensions run from nft compat layer
  * Other fields as above.
  */
 struct xt_mtchk_param {
-- 
2.17.1



[PATCH nft,v2] evaluate: bogus bail out with raw expression from dynamic sets

2018-10-17 Thread Pablo Neira Ayuso
The following ruleset that uses raw expressions:

 table ip nftlb {
map persistency {
type inet_service : mark
size 65535
timeout 1h
elements = { 53 expires 59m55s864ms : 0x0064, 80 expires 
59m58s924ms : 0x0065, 443 expires 59m56s220ms : 0x0064 }
}

chain pre {
type filter hook prerouting priority filter; policy accept;
ip protocol { tcp, udp } update @persistencia { @th,0,16 : 
numgen inc mod 2 offset 100 }
}
 }

bogusly bails out with:

 /tmp/test:9:57-64: Error: datatype mismatch: expected internet network 
service, expression has type integer
 ip protocol { tcp, udp } update @persistencia { @th,0,16 : numgen inc 
mod 2 offset 100 }
  
~~~

Fix the problem by evaluating expression basetype and length in this case.

Reported-by: Laura Garcia 
Signed-off-by: Pablo Neira Ayuso 
---
v2: passes tests/py

 include/datatype.h |  6 ++
 src/evaluate.c | 12 +++-
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/include/datatype.h b/include/datatype.h
index eab505ba53f8..f7092f06a5ec 100644
--- a/include/datatype.h
+++ b/include/datatype.h
@@ -171,6 +171,12 @@ static inline bool datatype_equal(const struct datatype 
*d1,
return d1->type == d2->type;
 }
 
+static inline const struct datatype *
+datatype_basetype(const struct datatype *dtype)
+{
+   return dtype->basetype ? dtype->basetype : dtype;
+}
+
 /**
  * struct symbolic_constant - symbol <-> constant mapping
  *
diff --git a/src/evaluate.c b/src/evaluate.c
index db49a18d0150..1880578b8738 100644
--- a/src/evaluate.c
+++ b/src/evaluate.c
@@ -1854,7 +1854,17 @@ static int stmt_evaluate_arg(struct eval_ctx *ctx, 
struct stmt *stmt,
if (expr_evaluate(ctx, expr) < 0)
return -1;
 
-   if (!datatype_equal((*expr)->dtype, dtype))
+   if ((*expr)->ops->type == EXPR_PAYLOAD &&
+   (*expr)->dtype->type == TYPE_INTEGER &&
+   ((*expr)->dtype->type != datatype_basetype(dtype)->type ||
+(*expr)->len != len))
+   return stmt_binary_error(ctx, *expr, stmt,
+"datatype mismatch: expected %s, "
+"expression has type %s with length 
%d",
+dtype->desc, (*expr)->dtype->desc,
+(*expr)->len);
+   else if ((*expr)->dtype->type != TYPE_INTEGER &&
+!datatype_equal((*expr)->dtype, dtype))
return stmt_binary_error(ctx, *expr, stmt,
 "datatype mismatch: expected %s, "
 "expression has type %s",
-- 
2.11.0



[PATCH nft] evaluate: bogus bail out with raw expression from dynamic sets

2018-10-17 Thread Pablo Neira Ayuso
The following ruleset that uses raw expressions:

 table ip nftlb {
map persistency {
type inet_service : mark
size 65535
timeout 1h
elements = { 53 expires 59m55s864ms : 0x0064, 80 expires 
59m58s924ms : 0x0065, 443 expires 59m56s220ms : 0x0064 }
}

chain pre {
type filter hook prerouting priority filter; policy accept;
ip protocol { tcp, udp } update @persistencia { @th,0,16 : 
numgen inc mod 2 offset 100 }
}
 }

bogusly bails out with:

 /tmp/test:9:57-64: Error: datatype mismatch: expected internet network 
service, expression has type integer
 ip protocol { tcp, udp } update @persistencia { @th,0,16 : numgen inc 
mod 2 offset 100 }
  
~~~

Fix the problem by evaluating expression basetype and length in this case.

Reported-by: Laura Garcia 
Signed-off-by: Pablo Neira Ayuso 
---
 src/evaluate.c | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/evaluate.c b/src/evaluate.c
index db49a18d0150..19d4b65bfa78 100644
--- a/src/evaluate.c
+++ b/src/evaluate.c
@@ -1854,7 +1854,16 @@ static int stmt_evaluate_arg(struct eval_ctx *ctx, 
struct stmt *stmt,
if (expr_evaluate(ctx, expr) < 0)
return -1;
 
-   if (!datatype_equal((*expr)->dtype, dtype))
+   if ((*expr)->dtype->type == TYPE_INTEGER &&
+   ((*expr)->dtype->type != dtype->basetype->type ||
+(*expr)->len != len))
+   return stmt_binary_error(ctx, *expr, stmt,
+"datatype mismatch: expected %s, "
+"expression has type %s with length 
%d",
+dtype->desc, (*expr)->dtype->desc,
+(*expr)->len);
+   else if ((*expr)->dtype->type != TYPE_INTEGER &&
+!datatype_equal((*expr)->dtype, dtype))
return stmt_binary_error(ctx, *expr, stmt,
 "datatype mismatch: expected %s, "
 "expression has type %s",
-- 
2.11.0



Re: [PATCH nf-next] netfilter: nft_flow_offload: remove secpath check

2018-10-17 Thread Steffen Klassert
On Thu, Oct 11, 2018 at 11:45:40PM +0200, Pablo Neira Ayuso wrote:
> It is safe to place a flow that is coming from IPSec into the flowtable.
> So decapsulated can benefit from the flowtable fastpath.
> 
> Signed-off-by: Pablo Neira Ayuso 
> Signed-off-by: Steffen Klassert 
> ---
> I'm recovering this patch, this enables faster flowtable forwarding from
> ingress. Florian has been asking for a way to restore the xfrm cache,
> and I remember Steffen mentioned this two liner should be just enough to
> combine the flowtable infrastructure with ipsec.

Yes, it was this and we need to relax the requirement to see
traffic in both directions before offloading to the flowtable
(if I remember correct).


Re: [PATCH 1/2 nft v2] src: osf: add ttl option support

2018-10-17 Thread Fernando Fernandez Mancera




On 10/15/18 2:47 PM, Pablo Neira Ayuso wrote:

Please send a v3 including tests/py. More comments below.

On Sat, Sep 29, 2018 at 12:15:17PM +0200, Fernando Fernandez Mancera wrote:

Add support for ttl option in "osf" expression. Example:

table ip foo {
chain bar {
type filter hook input priority filter; policy accept;
osf ttl nocheck name "Linux"


Listing and output should match, ie. what you list should work with
nft -f, see below.


diff --git a/src/parser_bison.y b/src/parser_bison.y
index 831090b..a7ec858 100644
--- a/src/parser_bison.y
+++ b/src/parser_bison.y
@@ -739,6 +739,7 @@ int nft_lex(void *, void *, void *);
  %type  fib_tuple   fib_result  fib_flag
  
  %type 			osf_expr

+%type   osf_ttl
  %destructor { expr_free($$); }osf_expr
  
  %type 			markup_format

@@ -3112,9 +3113,21 @@ fib_tuple:   fib_flagDOT 
fib_tuple
|   fib_flag
;
  
-osf_expr		:	OSF	NAME

+osf_expr   :   OSF osf_ttl NAME
{
-   $$ = osf_expr_alloc(&@$);
+   $$ = osf_expr_alloc(&@$, $2);
+   }
+   ;
+
+osf_ttl:   /* empty */ { $$ = 0; }
+   |   STRING
+   {
+   if (!strcmp($1, "ttl-global"))


This should be "global". But I would suggest you rename this to "loose".


+   $$ = 1;


Can we use NFT_OSF_* definitions, instead of magic number?


+   else if (!strcmp($1, "ttl-nocheck"))


This should be "nocheck". But I'd suggest you rename this to "skip"


+   $$ = 2;


Same here, avoid magic number, use definition.


+   else
+   $$ = 3;


Same thing.


}
;
diff --git a/src/osf.c b/src/osf.c
index 85c9573..c7dd25f 100644
--- a/src/osf.c
+++ b/src/osf.c
@@ -5,13 +5,32 @@
  #include 
  #include 
  
+static const char *osf_ttl_int_to_str(const uint8_t ttl)

+{
+   if (ttl == 1)
+   return "ttl global";
+   else if (ttl == 2)
+   return "ttl nocheck";
+
+   return "";
+}
+
  static void osf_expr_print(const struct expr *expr, struct output_ctx *octx)
  {
-   nft_print(octx, "osf name");
+   const char *ttl_str;
+
+   ttl_str = osf_ttl_int_to_str(expr->osf.ttl);
+   nft_print(octx, "osf %s name", ttl_str);
  }
  
  static void osf_expr_clone(struct expr *new, const struct expr *expr)

  {
+   new->osf.ttl = expr->osf.ttl;
+}
+
+static bool osf_expr_cmp(const struct expr *e1, const struct expr *e2)
+{
+   return e1->osf.ttl == e2->osf.ttl;
  }
  
  static const struct expr_ops osf_expr_ops = {

@@ -19,10 +38,11 @@ static const struct expr_ops osf_expr_ops = {
.name   = "osf",
.print  = osf_expr_print,
.clone  = osf_expr_clone,
+   .cmp= osf_expr_cmp,
.json   = osf_expr_json,


BTW, could you extend json to support 'ttl' too?



Hi Pablo, thanks for this review.

All changes seem fine to me :-)


Re: [PATCH nft] src: remove opts field from struct xt_stmt

2018-10-16 Thread Phil Sutter
On Tue, Oct 16, 2018 at 08:58:20PM +0200, Pablo Neira Ayuso wrote:
> This is never used, ie. always NULL.
> 
> Reported-by: Phil Sutter 
> Signed-off-by: Pablo Neira Ayuso 

Acked-by: Phil Sutter 

Thanks for clearing this up!


[PATCH nft] src: remove opts field from struct xt_stmt

2018-10-16 Thread Pablo Neira Ayuso
This is never used, ie. always NULL.

Reported-by: Phil Sutter 
Signed-off-by: Pablo Neira Ayuso 
---
 include/statement.h | 1 -
 src/statement.c | 1 -
 src/xt.c| 8 ++--
 3 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/include/statement.h b/include/statement.h
index 48ba6673553b..442405505072 100644
--- a/include/statement.h
+++ b/include/statement.h
@@ -237,7 +237,6 @@ struct xt_stmt {
struct xtables_match*match;
struct xtables_target   *target;
};
-   const char  *opts;
void*entry;
 };
 
diff --git a/src/statement.c b/src/statement.c
index 909f04ca382e..e50ac706402d 100644
--- a/src/statement.c
+++ b/src/statement.c
@@ -832,7 +832,6 @@ static void xt_stmt_print(const struct stmt *stmt, struct 
output_ctx *octx)
 static void xt_stmt_destroy(struct stmt *stmt)
 {
xfree(stmt->xt.name);
-   xfree(stmt->xt.opts);
xt_stmt_release(stmt);
 }
 
diff --git a/src/xt.c b/src/xt.c
index 95d0c5f24c07..74763d58cafd 100644
--- a/src/xt.c
+++ b/src/xt.c
@@ -32,9 +32,7 @@ void xt_stmt_xlate(const struct stmt *stmt)
 
switch (stmt->xt.type) {
case NFT_XT_MATCH:
-   if (stmt->xt.match == NULL && stmt->xt.opts) {
-   printf("%s", stmt->xt.opts);
-   } else if (stmt->xt.match->xlate) {
+   if (stmt->xt.match->xlate) {
struct xt_xlate_mt_params params = {
.ip = stmt->xt.entry,
.match  = stmt->xt.match->m,
@@ -51,9 +49,7 @@ void xt_stmt_xlate(const struct stmt *stmt)
break;
case NFT_XT_WATCHER:
case NFT_XT_TARGET:
-   if (stmt->xt.target == NULL && stmt->xt.opts) {
-   printf("%s", stmt->xt.opts);
-   } else if (stmt->xt.target->xlate) {
+   if (stmt->xt.target->xlate) {
struct xt_xlate_tg_params params = {
.ip = stmt->xt.entry,
.target = stmt->xt.target->t,
-- 
2.11.0



[PATCH nf-next] netfilter: xt_quota: simplify quota logic, account for consumed bytes

2018-10-16 Thread Pablo Neira Ayuso
Store consumed bytes, instead of remaining bytes, this simplifies
logic quite a bit.

Cc: Chenbo Feng 
Cc: Maciej Żenczykowski 
Signed-off-by: Pablo Neira Ayuso 
---
Before merge window closes and it's too late to change semantics.

 include/uapi/linux/netfilter/xt_quota.h |  4 ++--
 net/netfilter/xt_quota.c| 27 ---
 2 files changed, 10 insertions(+), 21 deletions(-)

diff --git a/include/uapi/linux/netfilter/xt_quota.h 
b/include/uapi/linux/netfilter/xt_quota.h
index d72fd52adbba..8abe7e6261c9 100644
--- a/include/uapi/linux/netfilter/xt_quota.h
+++ b/include/uapi/linux/netfilter/xt_quota.h
@@ -16,9 +16,9 @@ struct xt_quota_info {
__u32 pad;
__aligned_u64 quota;
 #ifdef __KERNEL__
-   atomic64_t counter;
+   atomic64_t consumed;
 #else
-   __aligned_u64 remain;
+   __aligned_u64 consumed;
 #endif
 };
 
diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c
index fceae245eb03..83ce440e4b6e 100644
--- a/net/netfilter/xt_quota.c
+++ b/net/netfilter/xt_quota.c
@@ -17,27 +17,18 @@ MODULE_DESCRIPTION("Xtables: countdown quota match");
 MODULE_ALIAS("ipt_quota");
 MODULE_ALIAS("ip6t_quota");
 
+static inline bool xt_overquota(struct xt_quota_info *q,
+   const struct sk_buff *skb)
+{
+   return atomic64_add_return(skb->len, >consumed) >= q->quota;
+}
+
 static bool
 quota_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
struct xt_quota_info *q = (void *)par->matchinfo;
-   u64 current_count = atomic64_read(>counter);
-   bool ret = q->flags & XT_QUOTA_INVERT;
-   u64 old_count, new_count;
 
-   do {
-   if (current_count == 1)
-   return ret;
-   if (current_count <= skb->len) {
-   atomic64_set(>counter, 1);
-   return ret;
-   }
-   old_count = current_count;
-   new_count = current_count - skb->len;
-   current_count = atomic64_cmpxchg(>counter, old_count,
-new_count);
-   } while (current_count != old_count);
-   return !ret;
+   return xt_overquota(q, skb) ^ (q->flags & XT_QUOTA_INVERT);
 }
 
 static int quota_mt_check(const struct xt_mtchk_param *par)
@@ -48,11 +39,9 @@ static int quota_mt_check(const struct xt_mtchk_param *par)
 
if (q->flags & ~XT_QUOTA_MASK)
return -EINVAL;
-   if (atomic64_read(>counter) > q->quota + 1)
+   if (atomic64_read(>consumed) > q->quota)
return -ERANGE;
 
-   if (atomic64_read(>counter) == 0)
-   atomic64_set(>counter, q->quota + 1);
return 0;
 }
 
-- 
2.11.0



[nf-next:master 1/7] net/ipv4/netfilter/ipt_ECN.c:58:28: error: 'IPT_ECN_OP_SET_ECE' undeclared; did you mean 'IPT_ECN_OP_MATCH_ECE'?

2018-10-16 Thread kbuild test robot
tree:   https://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next.git master
head:   60dd57bba519ab75277df610d5d245ed3af3c57c
commit: 25038aaf0cbf7639a18f80aeddb325811aff23c3 [1/7] UAPI: netfilter: Fix 
symbol collision issues [ver #2]
config: m68k-mvme16x_defconfig (attached as .config)
compiler: m68k-linux-gnu-gcc (Debian 7.2.0-11) 7.2.0
reproduce:
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
git checkout 25038aaf0cbf7639a18f80aeddb325811aff23c3
# save the attached .config to linux build tree
GCC_VERSION=7.2.0 make.cross ARCH=m68k 

All errors (new ones prefixed by >>):

   net/ipv4/netfilter/ipt_ECN.c: In function 'set_ect_tcp':
>> net/ipv4/netfilter/ipt_ECN.c:58:28: error: 'IPT_ECN_OP_SET_ECE' undeclared 
>> (first use in this function); did you mean 'IPT_ECN_OP_MATCH_ECE'?
 if ((!(einfo->operation & IPT_ECN_OP_SET_ECE) ||
   ^~
   IPT_ECN_OP_MATCH_ECE
   net/ipv4/netfilter/ipt_ECN.c:58:28: note: each undeclared identifier is 
reported only once for each function it appears in
>> net/ipv4/netfilter/ipt_ECN.c:60:28: error: 'IPT_ECN_OP_SET_CWR' undeclared 
>> (first use in this function); did you mean 'IPT_ECN_OP_SET_ECE'?
 (!(einfo->operation & IPT_ECN_OP_SET_CWR) ||
   ^~
   IPT_ECN_OP_SET_ECE
   net/ipv4/netfilter/ipt_ECN.c: In function 'ecn_tg':
>> net/ipv4/netfilter/ipt_ECN.c:84:25: error: 'IPT_ECN_OP_SET_IP' undeclared 
>> (first use in this function); did you mean 'IPT_ECN_OP_MATCH_IP'?
 if (einfo->operation & IPT_ECN_OP_SET_IP)
^
IPT_ECN_OP_MATCH_IP
>> net/ipv4/netfilter/ipt_ECN.c:88:26: error: 'IPT_ECN_OP_SET_ECE' undeclared 
>> (first use in this function); did you mean 'IPT_ECN_OP_SET_IP'?
 if (einfo->operation & (IPT_ECN_OP_SET_ECE | IPT_ECN_OP_SET_CWR) &&
 ^~
 IPT_ECN_OP_SET_IP
   net/ipv4/netfilter/ipt_ECN.c:88:47: error: 'IPT_ECN_OP_SET_CWR' undeclared 
(first use in this function); did you mean 'IPT_ECN_OP_SET_ECE'?
 if (einfo->operation & (IPT_ECN_OP_SET_ECE | IPT_ECN_OP_SET_CWR) &&
  ^~
  IPT_ECN_OP_SET_ECE
   net/ipv4/netfilter/ipt_ECN.c: In function 'ecn_tg_check':
>> net/ipv4/netfilter/ipt_ECN.c:101:25: error: 'IPT_ECN_OP_MASK' undeclared 
>> (first use in this function); did you mean 'IPT_ECN_IP_MASK'?
 if (einfo->operation & IPT_ECN_OP_MASK)
^~~
IPT_ECN_IP_MASK
   net/ipv4/netfilter/ipt_ECN.c:107:27: error: 'IPT_ECN_OP_SET_ECE' undeclared 
(first use in this function); did you mean 'IPT_ECN_OP_MATCH_ECE'?
 if ((einfo->operation & (IPT_ECN_OP_SET_ECE|IPT_ECN_OP_SET_CWR)) &&
  ^~
  IPT_ECN_OP_MATCH_ECE
   net/ipv4/netfilter/ipt_ECN.c:107:46: error: 'IPT_ECN_OP_SET_CWR' undeclared 
(first use in this function); did you mean 'IPT_ECN_OP_SET_ECE'?
 if ((einfo->operation & (IPT_ECN_OP_SET_ECE|IPT_ECN_OP_SET_CWR)) &&
 ^~
 IPT_ECN_OP_SET_ECE

vim +58 net/ipv4/netfilter/ipt_ECN.c

^1da177e4 Linus Torvalds 2005-04-16   45  
e1931b784 Jan Engelhardt 2007-07-07   46  /* Return false if there was an 
error. */
e1931b784 Jan Engelhardt 2007-07-07   47  static inline bool
3db05fea5 Herbert Xu 2007-10-15   48  set_ect_tcp(struct sk_buff *skb, 
const struct ipt_ECN_info *einfo)
^1da177e4 Linus Torvalds 2005-04-16   49  {
^1da177e4 Linus Torvalds 2005-04-16   50struct tcphdr _tcph, *tcph;
6a19d6147 Al Viro2006-09-28   51__be16 oldval;
^1da177e4 Linus Torvalds 2005-04-16   52  
af901ca18 André Goddard Rosa 2009-11-14   53/* Not enough header? */
3db05fea5 Herbert Xu 2007-10-15   54tcph = skb_header_pointer(skb, 
ip_hdrlen(skb), sizeof(_tcph), &_tcph);
^1da177e4 Linus Torvalds 2005-04-16   55if (!tcph)
e1931b784 Jan Engelhardt 2007-07-07   56return false;
^1da177e4 Linus Torvalds 2005-04-16   57  
fd841326d Patrick McHardy2005-08-20  @58if ((!(einfo->operation & 
IPT_ECN_OP_SET_ECE) ||
fd841326d Patrick McHardy2005-08-20   59 tcph->ece == 
einfo->proto.tcp.ece) &&
7c4e36bc1 Jan Engelhardt 2007-07-07  @60(!(einfo->operation & 
IPT_ECN_OP_SET_CWR) ||
7c4e36bc1 Jan Engelhardt 2007-07-07   61 tcph->cwr == 
einfo->proto.tcp.cwr))
e1931b784 Jan Engelhardt 2007-07-07   62return true;
^1da177e4 Linus Torvalds 2005-04-16   63  
3db05fea5 Herbert Xu

Working test 6

2018-10-15 Thread Judy

Did you get my email from last week?
Let me know if you have photos for cutting out or retouching?

We are an image team who can do editing for your the web store photos,
industry photos or portrait photos.

Send photos, we will do testing for you to check quality.
Waiting for your reply soon.

Thanks,
Judy



Re: [PATCH libnftables] src: remove json support

2018-10-15 Thread Pablo Neira Ayuso
On Mon, Oct 15, 2018 at 05:18:48PM +0200, Phil Sutter wrote:
> Hey,
> 
> On Mon, Oct 15, 2018 at 04:45:38PM +0200, Pablo Neira Ayuso wrote:
> > On Mon, Oct 15, 2018 at 02:34:21PM +0200, Pablo Neira Ayuso wrote:
> > > On Mon, Oct 15, 2018 at 02:08:07PM +0200, Phil Sutter wrote:
> > > > On Mon, Oct 15, 2018 at 01:29:52PM +0200, Pablo Neira Ayuso wrote:
> > > > > Subject: [PATCH libnftables] src: remove json support
> > > >   ~~~
> > > > 
> > > > This is libnftnl, right? :)
> > > > 
> > > > Apart from that:
> > > > 
> > > > Acked-by: Phil Sutter 
> > > 
> > > Right, thanks!
> > 
> > Oh, I pushed out this patch without including your Acked-by.
> > 
> > Sorry about that, it was not intentional.
> 
> No problem, this way people won't find me that easily if the change
> broke their scripts. ;)

We'll point them to adapt any existing infrastructure to your work, it
should be relatively easy to upgrade to your higher level json scheme.
Specifically if they we're dealing with matching bitfield and such,
whose arithmetics are not that obvious.

Thanks!


Re: [PATCH libnftables] src: remove json support

2018-10-15 Thread Phil Sutter
Hey,

On Mon, Oct 15, 2018 at 04:45:38PM +0200, Pablo Neira Ayuso wrote:
> On Mon, Oct 15, 2018 at 02:34:21PM +0200, Pablo Neira Ayuso wrote:
> > On Mon, Oct 15, 2018 at 02:08:07PM +0200, Phil Sutter wrote:
> > > On Mon, Oct 15, 2018 at 01:29:52PM +0200, Pablo Neira Ayuso wrote:
> > > > Subject: [PATCH libnftables] src: remove json support
> > >   ~~~
> > > 
> > > This is libnftnl, right? :)
> > > 
> > > Apart from that:
> > > 
> > > Acked-by: Phil Sutter 
> > 
> > Right, thanks!
> 
> Oh, I pushed out this patch without including your Acked-by.
> 
> Sorry about that, it was not intentional.

No problem, this way people won't find me that easily if the change
broke their scripts. ;)

Cheers, Phil


Re: [PATCH libnftables] src: remove json support

2018-10-15 Thread Pablo Neira Ayuso
On Mon, Oct 15, 2018 at 02:34:21PM +0200, Pablo Neira Ayuso wrote:
> On Mon, Oct 15, 2018 at 02:08:07PM +0200, Phil Sutter wrote:
> > On Mon, Oct 15, 2018 at 01:29:52PM +0200, Pablo Neira Ayuso wrote:
> > > Subject: [PATCH libnftables] src: remove json support
> >   ~~~
> > 
> > This is libnftnl, right? :)
> > 
> > Apart from that:
> > 
> > Acked-by: Phil Sutter 
> 
> Right, thanks!

Oh, I pushed out this patch without including your Acked-by.

Sorry about that, it was not intentional.

Thanks Phil !


Re: [PATCH 1/2 nft v2] src: osf: add ttl option support

2018-10-15 Thread Pablo Neira Ayuso
Please send a v3 including tests/py. More comments below.

On Sat, Sep 29, 2018 at 12:15:17PM +0200, Fernando Fernandez Mancera wrote:
> Add support for ttl option in "osf" expression. Example:
> 
> table ip foo {
>   chain bar {
>   type filter hook input priority filter; policy accept;
>   osf ttl nocheck name "Linux"

Listing and output should match, ie. what you list should work with
nft -f, see below.

> diff --git a/src/parser_bison.y b/src/parser_bison.y
> index 831090b..a7ec858 100644
> --- a/src/parser_bison.y
> +++ b/src/parser_bison.y
> @@ -739,6 +739,7 @@ int nft_lex(void *, void *, void *);
>  %type   fib_tuple   fib_result  fib_flag
>  
>  %type  osf_expr
> +%type   osf_ttl
>  %destructor { expr_free($$); }   osf_expr
>  
>  %type   markup_format
> @@ -3112,9 +3113,21 @@ fib_tuple  :   fib_flagDOT 
> fib_tuple
>   |   fib_flag
>   ;
>  
> -osf_expr :   OSF NAME
> +osf_expr :   OSF osf_ttl NAME
>   {
> - $$ = osf_expr_alloc(&@$);
> + $$ = osf_expr_alloc(&@$, $2);
> + }
> + ;
> +
> +osf_ttl  :   /* empty */ { $$ = 0; }
> + |   STRING
> + {
> + if (!strcmp($1, "ttl-global"))

This should be "global". But I would suggest you rename this to "loose".

> + $$ = 1;

Can we use NFT_OSF_* definitions, instead of magic number?

> + else if (!strcmp($1, "ttl-nocheck"))

This should be "nocheck". But I'd suggest you rename this to "skip"

> + $$ = 2;

Same here, avoid magic number, use definition.

> + else
> + $$ = 3;

Same thing.

>   }
>   ;
> diff --git a/src/osf.c b/src/osf.c
> index 85c9573..c7dd25f 100644
> --- a/src/osf.c
> +++ b/src/osf.c
> @@ -5,13 +5,32 @@
>  #include 
>  #include 
>  
> +static const char *osf_ttl_int_to_str(const uint8_t ttl)
> +{
> + if (ttl == 1)
> + return "ttl global";
> + else if (ttl == 2)
> + return "ttl nocheck";
> +
> + return "";
> +}
> +
>  static void osf_expr_print(const struct expr *expr, struct output_ctx *octx)
>  {
> - nft_print(octx, "osf name");
> + const char *ttl_str;
> +
> + ttl_str = osf_ttl_int_to_str(expr->osf.ttl);
> + nft_print(octx, "osf %s name", ttl_str);
>  }
>  
>  static void osf_expr_clone(struct expr *new, const struct expr *expr)
>  {
> + new->osf.ttl = expr->osf.ttl;
> +}
> +
> +static bool osf_expr_cmp(const struct expr *e1, const struct expr *e2)
> +{
> + return e1->osf.ttl == e2->osf.ttl;
>  }
>  
>  static const struct expr_ops osf_expr_ops = {
> @@ -19,10 +38,11 @@ static const struct expr_ops osf_expr_ops = {
>   .name   = "osf",
>   .print  = osf_expr_print,
>   .clone  = osf_expr_clone,
> + .cmp= osf_expr_cmp,
>   .json   = osf_expr_json,

BTW, could you extend json to support 'ttl' too?


Re: [PATCH nf-next] netfilter: nf_nat_snmp_basic: add missing helper alias name

2018-10-15 Thread Pablo Neira Ayuso
On Sun, Oct 07, 2018 at 12:17:07AM +0900, Taehee Yoo wrote:
> In order to upload helper module automatically, helper alias name
> is needed. so that MODULE_ALIAS_NFCT_HELPER() should be added.
> And unlike other nat helper modules, the nf_nat_snmp_basic can be
> used independently.
> helper name is "snmp_trap" so that alias name will be
> "nfct-helper-snmp_trap" by MODULE_ALIAS_NFCT_HELPER(snmp_trap)
> 
> test command:
>%iptables -t raw -I PREROUTING -p udp -j CT --helper snmp_trap
>%lsmod | grep nf_nat_snmp_basic
> 
> We can see nf_nat_snmp_basic module is uploaded automatically.

Applied, thanks.


Re: [PATCH libnftables] src: remove json support

2018-10-15 Thread Pablo Neira Ayuso
On Mon, Oct 15, 2018 at 02:08:07PM +0200, Phil Sutter wrote:
> On Mon, Oct 15, 2018 at 01:29:52PM +0200, Pablo Neira Ayuso wrote:
> > Subject: [PATCH libnftables] src: remove json support
>   ~~~
> 
> This is libnftnl, right? :)
> 
> Apart from that:
> 
> Acked-by: Phil Sutter 

Right, thanks!


Re: [PATCH] nftables: add support for setting secmark

2018-10-15 Thread Pablo Neira Ayuso
On Thu, Sep 20, 2018 at 09:43:22AM +0200, Christian Göttsche wrote:
> Add support for new nft object secmark holding security context strings.
> 
> The following should demonstrate its usage (based on SELinux context):
> 
> # define a tag containing a context string
> nft add secmark inet filter sshtag 
> \"system_u:object_r:ssh_server_packet_t:s0\"
> nft list secmarks
> 
> # set the secmark
> nft add rule inet filter input tcp dport 22 meta secmark set sshtag
> 
> # map usage
> nft add map inet filter secmapping { type inet_service : secmark \; }
> nft add element inet filter secmapping { 22 : sshtag }
> nft list maps
> nft list map inet filter secmapping
> nft add rule inet filter input meta secmark set tcp dport map @secmapping
> 
> Based on v0.9.0

I made the rebase myself and pushed it out.

I'd appreciate if you can review what we have in nft.git and test it.

Thanks.


Re: [nft PATCH] json: Fix memleak in dup_stmt_json()

2018-10-15 Thread Pablo Neira Ayuso
On Fri, Oct 12, 2018 at 05:50:15PM +0200, Phil Sutter wrote:
> The variable 'root' is always assigned to after initialization, so there
> is no point in initializing it upon declaration.

Applied, thanks.


Re: [nft PATCH] parser_json: Fix for ineffective family value checks

2018-10-15 Thread Pablo Neira Ayuso
On Fri, Oct 12, 2018 at 05:23:24PM +0200, Phil Sutter wrote:
> Since handle->family is unsigned, checking for value < 0 never yields
> true. Overcome this by changing parse_family() to return an error code
> and write the parsed family value into a pointer passed as parameter.
> 
> The above change required a bit more cleanup to avoid passing pointers
> to signed variables to the function. Also leverage json_parse_family() a
> bit more to reduce code side.

Also applied, thanks.


Re: [nft PATCH] Fix memleak in netlink_parse_fwd() error path

2018-10-15 Thread Pablo Neira Ayuso
On Fri, Oct 12, 2018 at 12:54:09PM +0200, Phil Sutter wrote:
> Make sure allocated 'stmt' is freed before returning to caller.

Applied, thanks.


Re: [nft PATCH] libnftables: Fix memleak in nft_parse_bison_filename()

2018-10-15 Thread Pablo Neira Ayuso
On Fri, Oct 12, 2018 at 01:22:55PM +0200, Phil Sutter wrote:
> Allocated scanner object leaks when returning to caller. For some odd
> reason, this was missed by the commit referenced below.

Applied, thanks.


Re: [nft PATCH 0/8] monitor: Use libnftables for JSON output

2018-10-15 Thread Pablo Neira Ayuso
On Thu, Oct 11, 2018 at 05:48:53PM +0200, Phil Sutter wrote:
> This series essentially moves nft monitor JSON output to libnftables (in
> patch 7). Patch 8 enhances tests/monitor to get that tested as well (via
> passing '-j' parameter to run-tests.sh). The leading six patches are
> more or less prerequisites for the later ones.

Series applied, thanks Phil.


Re: [PATCH libnftables] src: remove json support

2018-10-15 Thread Phil Sutter
On Mon, Oct 15, 2018 at 01:29:52PM +0200, Pablo Neira Ayuso wrote:
> Subject: [PATCH libnftables] src: remove json support
  ~~~

This is libnftnl, right? :)

Apart from that:

Acked-by: Phil Sutter 

Cheers, Phil


Re: [PATCH libnftnl] expr: osf: add ttl option support

2018-10-15 Thread Pablo Neira Ayuso
Applied, thanks.


Re: [PATCH nf-next] netfilter: nf_tables: xfrm: use state family, not hook one

2018-10-15 Thread Pablo Neira Ayuso
On Wed, Oct 10, 2018 at 05:25:47PM +0200, Florian Westphal wrote:
> Eyal says:
>   doesn't the use of nft_pf(pkt) in this context limit the matching of
>   encapsulated packets to the same family?
> 
>   IIUC when an e.g. IPv6-in-IPv4 packet is matched, the nft_pf(pkt) will
>   be the decapsulated packet family - IPv6 - whereas the state may be
>   IPv4. So this check would not allow matching the 'underlay' address in
>   such cases.
> 
>   I know this was a limitation in xt_policy. but is this intentional in
>   this matcher? or is it possible to use state->props.family when
>   validating the match instead of nft_pf(pkt)?
> 
> Userspace already tells us which address family it expects to match, so
> we can just use the real state family rather than the hook family.
> so change it as suggested above.

Applied, thanks.


Re: [iptables PATCH] xtables: Remove target_maxnamelen field

2018-10-15 Thread Pablo Neira Ayuso
On Thu, Oct 11, 2018 at 01:30:38PM +0200, Phil Sutter wrote:
> This is a partial revert of commit 9f075031a1973 ("Combine
> parse_target() and command_jump() implementations"): Upstream prefers to
> reduce max chain name length of arptables by two characters instead of
> the introduced struct xtables_globals field which requires to bump
> library API version.

Applied, thanks.


[PATCH nf v2 3/3] netfilter: ipt_CLUSTERIP: fix sleep-in-atomic bug in clusterip_config_entry_put()

2018-10-14 Thread Taehee Yoo
A proc_remove() can sleep. so that it can't be inside of spin_lock.
Hence proc_remove() is moved to outside of spin_lock. and it also
adds mutex to sync create and remove of proc entry(config->pde).

test commands:
SHELL#1
   %while :; do iptables -A INPUT -p udp -i enp2s0 -d 192.168.1.100 \
   --dport 9000  -j CLUSTERIP --new --hashmode sourceip \
   --clustermac 01:00:5e:00:00:21 --total-nodes 3 --local-node 3; \
   iptables -F; done

SHELL#2
   %while :; do echo +1 > /proc/net/ipt_CLUSTERIP/192.168.1.100; \
   echo -1 > /proc/net/ipt_CLUSTERIP/192.168.1.100; done

[ 2949.569864] BUG: sleeping function called from invalid context at 
kernel/sched/completion.c:99
[ 2949.579944] in_atomic(): 1, irqs_disabled(): 0, pid: 5472, name: iptables
[ 2949.587920] 1 lock held by iptables/5472:
[ 2949.592711]  #0: 8f0ebcf2 (&(>lock)->rlock){+...}, at: 
refcount_dec_and_lock+0x24/0x50
[ 2949.603307] CPU: 1 PID: 5472 Comm: iptables Tainted: GW 
4.19.0-rc5+ #16
[ 2949.604212] Hardware name: To be filled by O.E.M. To be filled by 
O.E.M./Aptio CRB, BIOS 5.6.5 07/08/2015
[ 2949.604212] Call Trace:
[ 2949.604212]  dump_stack+0xc9/0x16b
[ 2949.604212]  ? show_regs_print_info+0x5/0x5
[ 2949.604212]  ___might_sleep+0x2eb/0x420
[ 2949.604212]  ? set_rq_offline.part.87+0x140/0x140
[ 2949.604212]  ? _rcu_barrier_trace+0x400/0x400
[ 2949.604212]  wait_for_completion+0x94/0x710
[ 2949.604212]  ? wait_for_completion_interruptible+0x780/0x780
[ 2949.604212]  ? __kernel_text_address+0xe/0x30
[ 2949.604212]  ? __lockdep_init_map+0x10e/0x5c0
[ 2949.604212]  ? __lockdep_init_map+0x10e/0x5c0
[ 2949.604212]  ? __init_waitqueue_head+0x86/0x130
[ 2949.604212]  ? init_wait_entry+0x1a0/0x1a0
[ 2949.604212]  proc_entry_rundown+0x208/0x270
[ 2949.604212]  ? proc_reg_get_unmapped_area+0x370/0x370
[ 2949.604212]  ? __lock_acquire+0x4500/0x4500
[ 2949.604212]  ? complete+0x18/0x70
[ 2949.604212]  remove_proc_subtree+0x143/0x2a0
[ 2949.708655]  ? remove_proc_entry+0x390/0x390
[ 2949.708655]  clusterip_tg_destroy+0x27a/0x630 [ipt_CLUSTERIP]
[ ... ]

v3: add Third patch.
v2:
 - use spin_lock_bh() instead of spin_lock() (Pablo Neira Ayuso)
 - add missing dev_mc_add() and dev_mc_del().
v1: Initial patch

Fixes: b3e456fce9f5 ("netfilter: ipt_CLUSTERIP: fix a race condition of proc 
file creation")
Signed-off-by: Taehee Yoo 
---
 net/ipv4/netfilter/ipt_CLUSTERIP.c | 19 ++-
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c 
b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index e734a57cd9f1..7fd399751c2e 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -56,7 +56,7 @@ struct clusterip_config {
 #endif
enum clusterip_hashmode hash_mode;  /* which hashing mode */
u_int32_t hash_initval; /* hash initialization */
-   struct rcu_head rcu;
+   struct rcu_head rcu;/* for call_rcu_bh */
struct net *net;/* netns for pernet list */
char ifname[IFNAMSIZ];  /* device ifname */
 };
@@ -72,6 +72,8 @@ struct clusterip_net {
 
 #ifdef CONFIG_PROC_FS
struct proc_dir_entry *procdir;
+   /* mutex protects the config->pde*/
+   struct mutex mutex;
 #endif
 };
 
@@ -118,17 +120,18 @@ clusterip_config_entry_put(struct clusterip_config *c)
 
local_bh_disable();
if (refcount_dec_and_lock(>entries, >lock)) {
+   list_del_rcu(>list);
+   spin_unlock(>lock);
+   local_bh_enable();
/* In case anyone still accesses the file, the open/close
 * functions are also incrementing the refcount on their own,
 * so it's safe to remove the entry even if it's in use. */
 #ifdef CONFIG_PROC_FS
+   mutex_lock(>mutex);
if (cn->procdir)
proc_remove(c->pde);
+   mutex_unlock(>mutex);
 #endif
-   list_del_rcu(>list);
-   spin_unlock(>lock);
-   local_bh_enable();
-
return;
}
local_bh_enable();
@@ -278,9 +281,11 @@ clusterip_config_init(struct net *net, const struct 
ipt_clusterip_tgt_info *i,
 
/* create proc dir entry */
sprintf(buffer, "%pI4", );
+   mutex_lock(>mutex);
c->pde = proc_create_data(buffer, 0600,
  cn->procdir,
  _proc_fops, c);
+   mutex_unlock(>mutex);
if (!c->pde) {
err = -ENOMEM;
goto err;
@@ -832,6 +837,7 @@ static int clusterip_net_init(struct net *net)
pr_err("Unable to proc dir entry\n");
return -ENOMEM;
}
+   mutex_init(>mutex);
 #endif /* CONFIG_PROC_FS */
 
return 0;
@@ -840,9 +846,12 @@ static int 

<    1   2   3   4   5   6   7   8   9   10   >