[PATCH nf-next] netfilter: remove unused headers.

2018-10-23 Thread Weongyo Jeong
Some headers aren't used at these files.  So it's safe to remove.

Signed-off-by: Weongyo Jeong 
---
 net/ipv4/netfilter/ipt_CLUSTERIP.c | 5 -
 net/ipv4/netfilter/ipt_REJECT.c| 3 ---
 2 files changed, 8 deletions(-)

diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c 
b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 2c8d313..f275460 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -17,9 +17,6 @@
 #include 
 #include 
 #include 
-#include 
-#include 
-#include 
 #include 
 #include 
 #include 
@@ -30,8 +27,6 @@
 #include 
 #include 
 #include 
-#include 
-#include 
 
 #define CLUSTERIP_VERSION "0.8"
 
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index e8bed33..d12360a 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -14,9 +14,6 @@
 #include 
 #include 
 #include 
-#include 
-#include 
-#include 
 #include 
 #include 
 #include 
-- 
2.7.4



[PATCH nft 3/3] mnl: use either name or handle to refer to objects

2018-10-23 Thread Pablo Neira Ayuso
We can only specify either name or handle to refer to objects.

Signed-off-by: Pablo Neira Ayuso 
---
 src/mnl.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/mnl.c b/src/mnl.c
index d3129fda2b89..2be8ca14e50d 100644
--- a/src/mnl.c
+++ b/src/mnl.c
@@ -660,7 +660,7 @@ int mnl_nft_table_del(struct netlink_ctx *ctx, const struct 
cmd *cmd)
nftnl_table_set_u32(nlt, NFTNL_TABLE_FAMILY, cmd->handle.family);
if (cmd->handle.table.name)
nftnl_table_set(nlt, NFTNL_TABLE_NAME, cmd->handle.table.name);
-   if (cmd->handle.handle.id)
+   else if (cmd->handle.handle.id)
nftnl_table_set_u64(nlt, NFTNL_TABLE_HANDLE,
cmd->handle.handle.id);
 
@@ -830,7 +830,7 @@ int mnl_nft_set_del(struct netlink_ctx *ctx, const struct 
cmd *cmd)
nftnl_set_set_str(nls, NFTNL_SET_TABLE, h->table.name);
if (h->set.name)
nftnl_set_set_str(nls, NFTNL_SET_NAME, h->set.name);
-   if (h->handle.id)
+   else if (h->handle.id)
nftnl_set_set_u64(nls, NFTNL_SET_HANDLE, h->handle.id);
 
nlh = nftnl_nlmsg_build_hdr(nftnl_batch_buffer(ctx->batch),
@@ -1189,8 +1189,9 @@ int mnl_nft_setelem_del(struct netlink_ctx *ctx, const 
struct cmd *cmd)
 
nftnl_set_set_u32(nls, NFTNL_SET_FAMILY, h->family);
nftnl_set_set_str(nls, NFTNL_SET_TABLE, h->table.name);
-   nftnl_set_set_str(nls, NFTNL_SET_NAME, h->set.name);
-   if (h->handle.id)
+   if (h->set.name)
+   nftnl_set_set_str(nls, NFTNL_SET_NAME, h->set.name);
+   else if (h->handle.id)
nftnl_set_set_u64(nls, NFTNL_SET_HANDLE, h->handle.id);
 
if (cmd->expr)
-- 
2.11.0



[PATCH nft 1/3] src: move socket open and reopen to mnl.c

2018-10-23 Thread Pablo Neira Ayuso
These functions are part of the mnl backend, move them there. Remove
netlink_close_sock(), use direct call to mnl_socket_close().

Signed-off-by: Pablo Neira Ayuso 
---
 include/mnl.h |  4 ++--
 include/netlink.h |  1 -
 src/libnftables.c |  4 ++--
 src/mnl.c | 22 ++
 src/netlink.c | 27 ---
 src/rule.c|  2 +-
 6 files changed, 27 insertions(+), 33 deletions(-)

diff --git a/include/mnl.h b/include/mnl.h
index 3ddc82a05cb0..676030e6c4c6 100644
--- a/include/mnl.h
+++ b/include/mnl.h
@@ -6,8 +6,8 @@
 #include 
 #include 
 
-struct mnl_socket *netlink_open_sock(void);
-void netlink_close_sock(struct mnl_socket *nf_sock);
+struct mnl_socket *nft_mnl_socket_open(void);
+struct mnl_socket *nft_mnl_socket_reopen(struct mnl_socket *nf_sock);
 
 uint32_t mnl_seqnum_alloc(uint32_t *seqnum);
 uint16_t mnl_genid_get(struct netlink_ctx *ctx);
diff --git a/include/netlink.h b/include/netlink.h
index 66e400d88f19..af9313d51453 100644
--- a/include/netlink.h
+++ b/include/netlink.h
@@ -157,7 +157,6 @@ extern void netlink_dump_obj(struct nftnl_obj *nlo, struct 
netlink_ctx *ctx);
 
 extern int netlink_batch_send(struct netlink_ctx *ctx, struct list_head 
*err_list);
 
-extern struct mnl_socket *netlink_restart(struct mnl_socket *nf_sock);
 #define netlink_abi_error()\
__netlink_abi_error(__FILE__, __LINE__, strerror(errno));
 extern void __noreturn __netlink_abi_error(const char *file, int line, const 
char *reason);
diff --git a/src/libnftables.c b/src/libnftables.c
index 44869602c875..0731c532a22a 100644
--- a/src/libnftables.c
+++ b/src/libnftables.c
@@ -129,7 +129,7 @@ void nft_ctx_clear_include_paths(struct nft_ctx *ctx)
 
 static void nft_ctx_netlink_init(struct nft_ctx *ctx)
 {
-   ctx->nf_sock = netlink_open_sock();
+   ctx->nf_sock = nft_mnl_socket_open();
 }
 
 struct nft_ctx *nft_ctx_new(uint32_t flags)
@@ -266,7 +266,7 @@ const char *nft_ctx_get_error_buffer(struct nft_ctx *ctx)
 void nft_ctx_free(struct nft_ctx *ctx)
 {
if (ctx->nf_sock)
-   netlink_close_sock(ctx->nf_sock);
+   mnl_socket_close(ctx->nf_sock);
 
exit_cookie(>output.output_cookie);
exit_cookie(>output.error_cookie);
diff --git a/src/mnl.c b/src/mnl.c
index 9a6248aa0ad9..84727094e27e 100644
--- a/src/mnl.c
+++ b/src/mnl.c
@@ -28,10 +28,32 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
 
+struct mnl_socket *nft_mnl_socket_open(void)
+{
+   struct mnl_socket *nf_sock;
+
+   nf_sock = mnl_socket_open(NETLINK_NETFILTER);
+   if (!nf_sock)
+   netlink_init_error();
+
+   if (fcntl(mnl_socket_get_fd(nf_sock), F_SETFL, O_NONBLOCK))
+   netlink_init_error();
+
+   return nf_sock;
+}
+
+struct mnl_socket *nft_mnl_socket_reopen(struct mnl_socket *nf_sock)
+{
+   mnl_socket_close(nf_sock);
+
+   return nft_mnl_socket_open();
+}
+
 uint32_t mnl_seqnum_alloc(unsigned int *seqnum)
 {
return (*seqnum)++;
diff --git a/src/netlink.c b/src/netlink.c
index 403780ffdefb..8eb2ccad2f8c 100644
--- a/src/netlink.c
+++ b/src/netlink.c
@@ -10,7 +10,6 @@
  */
 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -53,32 +52,6 @@ const struct location netlink_location = {
.indesc = _netlink,
 };
 
-struct mnl_socket *netlink_open_sock(void)
-{
-   struct mnl_socket *nf_sock;
-
-   nf_sock = mnl_socket_open(NETLINK_NETFILTER);
-   if (nf_sock == NULL)
-   netlink_init_error();
-
-   if (fcntl(mnl_socket_get_fd(nf_sock), F_SETFL, O_NONBLOCK))
-   netlink_init_error();
-
-   return nf_sock;
-}
-
-void netlink_close_sock(struct mnl_socket *nf_sock)
-{
-   if (nf_sock)
-   mnl_socket_close(nf_sock);
-}
-
-struct mnl_socket *netlink_restart(struct mnl_socket *nf_sock)
-{
-   netlink_close_sock(nf_sock);
-   return netlink_open_sock();
-}
-
 void __noreturn __netlink_abi_error(const char *file, int line,
const char *reason)
 {
diff --git a/src/rule.c b/src/rule.c
index 12ac1310034d..9087fd2bd193 100644
--- a/src/rule.c
+++ b/src/rule.c
@@ -243,7 +243,7 @@ replay:
if (ret < 0) {
cache_release(cache);
if (errno == EINTR) {
-   nft->nf_sock = netlink_restart(nft->nf_sock);
+   nft->nf_sock = nft_mnl_socket_reopen(nft->nf_sock);
goto replay;
}
return -1;
-- 
2.11.0



Re: [PATCH nft v3] src: osf: add ttl option support

2018-10-23 Thread Pablo Neira Ayuso
On Tue, Oct 23, 2018 at 05:06:22PM +0200, Fernando Fernandez Mancera wrote:
> Add support for ttl option in "osf" expression. Example:
> 
> table ip foo {
>   chain bar {
>   type filter hook input priority filter; policy accept;
>   osf ttl skip name "Linux"
>   }
> }

Applied, thanks Fernando.


Re: [iptables PATCH] xtables: Fix for spurious errors from iptables-translate

2018-10-23 Thread Pablo Neira Ayuso
On Tue, Oct 23, 2018 at 04:59:14PM +0200, Phil Sutter wrote:
> When aligning iptables-nft error messages with legacy ones, I missed
> that translate tools shouldn't check for missing or duplicated chains.
> 
> Introduce a boolean in struct nft_xt_cmd_parse indicating we're "just"
> translating and do_parse() should skip the checks.

Applied, thanks for restoring iptables-xlate.


Re: [nft PATCH] tests: shell: Extend get element test

2018-10-23 Thread Pablo Neira Ayuso
On Tue, Oct 23, 2018 at 12:33:28PM +0200, Phil Sutter wrote:
> On Tue, Oct 23, 2018 at 11:28:28AM +0200, Pablo Neira Ayuso wrote:
[...]
> > Using current nftables git HEAD plus kernel patch, I'm getting:
> > 
> > # nft get element ip t s '{ 25, 28 }'
> > table ip t {
> > set s {
> > type inet_service
> > flags interval
> > elements = { 20-30, 20-30 }
> > }
> > }
> 
> Oh, sorry. I did above test using sources which eliminate duplicates
> from the return set (something I was playing with).

No problem :-)

> > > Here, although I ask for two elements, a single range is returned. So I
> > > guess asking for two ranges belonging to the same range should return a
> > > single range as well. Maybe a "simple" fix would be to drop duplicates
> > > from the return set?
> > > 
> > > Anyway, from my point of view your solution is fine as well: If a
> > > humanoid parser evaluates the results, it can easily spot duplicates and
> > > interpret them right. If 'get element' command is used by a script to
> > > check for existence of given ranges, it all boils down to a boolean
> > > found or not found result, so duplicates shouldn't matter that much.
> > 
> > My idea was to provide list including exactly the same number of
> > elements that have been requested, and in strict order, so you can
> > quickly check what you request belongs to somewhere, eg.
> > 
> > # nft get element ip t s '{ 50, 20, 10 }'
> > table ip t {
> > set s {
> > type inet_service
> > flags interval
> > elements = { 50-60, 20-30, 10 }
> > }
> > }
> > 
> 
> Yes, that makes sense. Thanks for explaining your approach to how 'get
> element' command should work!

I should find time to document this in the manpage :-\

> > Not yet implemented, but we could add something like:
> > 
> > # nft get element ip t s '{ 50, 20, 10 }'
> > table ip t {
> > set s {
> > type inet_service
> > flags interval
> > elements = { 50 in 50-60, 20 in 20-30, 10 }
> > }
> > }
> > 
> > So there's a clear mapping between what we request and what we get.
> 
> This would allow to return partial data, i.e. if one requested element
> doesn't exist in the set to still show the remaining ones. But current
> behaviour is absolutely fine from my point of view.

The kernel side is not allowing this, it hits ENOENT in case we find
an element which does not exist. It should be possible to revisit this
and batch several inquiries in one go.

> > Still, at this stage, the existing behaviour allows humans - for a
> > small number of data - and robots, to do mappings between what they
> > request and what they find.
> 
> Yes, indeed.

Will leave this for future work, just for the record.

Thanks.


[PATCH nft v3] src: osf: add ttl option support

2018-10-23 Thread Fernando Fernandez Mancera
Add support for ttl option in "osf" expression. Example:

table ip foo {
chain bar {
type filter hook input priority filter; policy accept;
osf ttl skip name "Linux"
}
}

Signed-off-by: Fernando Fernandez Mancera 
---
v1:initial patch
v2:use "ttl-global, ttl-nocheck.." instead of "1, 2.."
v3:better names for ttl options, add json and tests/py support
All is working properly.
---
 include/expression.h|  4 ++
 include/linux/netfilter/nf_tables.h |  2 +
 include/osf.h   |  2 +-
 src/json.c  |  2 +-
 src/netlink_delinearize.c   |  5 ++-
 src/netlink_linearize.c |  1 +
 src/osf.c   | 26 ++-
 src/parser_bison.y  | 19 +++-
 src/parser_json.c   |  5 ++-
 tests/py/inet/osf.t |  3 ++
 tests/py/inet/osf.t.json|  3 ++
 tests/py/inet/osf.t.payload | 68 -
 12 files changed, 129 insertions(+), 11 deletions(-)

diff --git a/include/expression.h b/include/expression.h
index d6977c3..f018c95 100644
--- a/include/expression.h
+++ b/include/expression.h
@@ -345,6 +345,10 @@ struct expr {
uint8_t direction;
uint8_t spnum;
} xfrm;
+   struct {
+   /* EXPR_OSF */
+   uint8_t ttl;
+   } osf;
};
 };
 
diff --git a/include/linux/netfilter/nf_tables.h 
b/include/linux/netfilter/nf_tables.h
index 4e28598..1d13ad3 100644
--- a/include/linux/netfilter/nf_tables.h
+++ b/include/linux/netfilter/nf_tables.h
@@ -939,10 +939,12 @@ enum nft_socket_keys {
  * enum nft_osf_attributes - nf_tables osf expression netlink attributes
  *
  * @NFTA_OSF_DREG: destination register (NLA_U32: nft_registers)
+ * @NFTA_OSF_TTL: Value of the TTL osf option (NLA_U8)
  */
 enum nft_osf_attributes {
NFTA_OSF_UNSPEC,
NFTA_OSF_DREG,
+   NFTA_OSF_TTL,
__NFTA_OSF_MAX
 };
 #define NFT_OSF_MAX(__NFTA_OSF_MAX - 1)
diff --git a/include/osf.h b/include/osf.h
index 54cdd4a..23ea34d 100644
--- a/include/osf.h
+++ b/include/osf.h
@@ -1,7 +1,7 @@
 #ifndef NFTABLES_OSF_H
 #define NFTABLES_OSF_H
 
-struct expr *osf_expr_alloc(const struct location *loc);
+struct expr *osf_expr_alloc(const struct location *loc, const uint8_t ttl);
 
 extern int nfnl_osf_load_fingerprints(struct netlink_ctx *ctx, int del);
 
diff --git a/src/json.c b/src/json.c
index 1cde270..cea9f19 100644
--- a/src/json.c
+++ b/src/json.c
@@ -857,7 +857,7 @@ json_t *socket_expr_json(const struct expr *expr, struct 
output_ctx *octx)
 
 json_t *osf_expr_json(const struct expr *expr, struct output_ctx *octx)
 {
-   return json_pack("{s:{s:s}}", "osf", "key", "name");
+   return json_pack("{s:{s:i, s:s}}", "osf", "ttl", expr->osf.ttl, "key", 
"name");
 }
 
 json_t *xfrm_expr_json(const struct expr *expr, struct output_ctx *octx)
diff --git a/src/netlink_delinearize.c b/src/netlink_delinearize.c
index cd05885..84948db 100644
--- a/src/netlink_delinearize.c
+++ b/src/netlink_delinearize.c
@@ -655,8 +655,11 @@ static void netlink_parse_osf(struct netlink_parse_ctx 
*ctx,
 {
enum nft_registers dreg;
struct expr *expr;
+   uint8_t ttl;
+
+   ttl = nftnl_expr_get_u8(nle, NFTNL_EXPR_OSF_TTL);
+   expr = osf_expr_alloc(loc, ttl);
 
-   expr = osf_expr_alloc(loc);
dreg = netlink_parse_register(nle, NFTNL_EXPR_OSF_DREG);
netlink_set_register(ctx, dreg, expr);
 }
diff --git a/src/netlink_linearize.c b/src/netlink_linearize.c
index 0ac51bd..0c8f5fe 100644
--- a/src/netlink_linearize.c
+++ b/src/netlink_linearize.c
@@ -227,6 +227,7 @@ static void netlink_gen_osf(struct netlink_linearize_ctx 
*ctx,
 
nle = alloc_nft_expr("osf");
netlink_put_register(nle, NFTNL_EXPR_OSF_DREG, dreg);
+   nftnl_expr_set_u8(nle, NFTNL_EXPR_OSF_TTL, expr->osf.ttl);
nftnl_rule_add_expr(ctx->nlr, nle);
 }
 
diff --git a/src/osf.c b/src/osf.c
index 85c9573..436f34b 100644
--- a/src/osf.c
+++ b/src/osf.c
@@ -5,13 +5,32 @@
 #include 
 #include 
 
+static const char *osf_ttl_int_to_str(const uint8_t ttl)
+{
+   if (ttl == 1)
+   return "ttl loose ";
+   else if (ttl == 2)
+   return "ttl skip ";
+
+   return "";
+}
+
 static void osf_expr_print(const struct expr *expr, struct output_ctx *octx)
 {
-   nft_print(octx, "osf name");
+   const char *ttl_str;
+
+   ttl_str = osf_ttl_int_to_str(expr->osf.ttl);
+   nft_print(octx, "osf %sname", ttl_str);
 }
 
 static void osf_expr_clone(struct expr *new, const struct expr *expr)
 {
+   new->osf.ttl = expr->osf.ttl;
+}
+
+static bool osf_expr_cmp(const struct expr *e1, const struct expr *e2)
+{
+   return e1->osf.ttl == e2->osf.ttl;
 }
 
 static const struct expr_ops osf_expr_ops = {
@@ -19,10 +38,11 @@ 

[iptables PATCH] xtables: Fix for spurious errors from iptables-translate

2018-10-23 Thread Phil Sutter
When aligning iptables-nft error messages with legacy ones, I missed
that translate tools shouldn't check for missing or duplicated chains.

Introduce a boolean in struct nft_xt_cmd_parse indicating we're "just"
translating and do_parse() should skip the checks.

Fixes: b6a06c1a215f8 ("xtables: Align return codes with legacy iptables")
Signed-off-by: Phil Sutter 
---
 iptables/nft-shared.h| 1 +
 iptables/xtables-translate.c | 1 +
 iptables/xtables.c   | 6 +++---
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/iptables/nft-shared.h b/iptables/nft-shared.h
index 1281f080bc31d..e3ecdb4d23df3 100644
--- a/iptables/nft-shared.h
+++ b/iptables/nft-shared.h
@@ -233,6 +233,7 @@ struct nft_xt_cmd_parse {
const char  *policy;
boolrestore;
int verbose;
+   boolxlate;
 };
 
 void do_parse(struct nft_handle *h, int argc, char *argv[],
diff --git a/iptables/xtables-translate.c b/iptables/xtables-translate.c
index f4c0f9cf5a181..849c53f30e155 100644
--- a/iptables/xtables-translate.c
+++ b/iptables/xtables-translate.c
@@ -216,6 +216,7 @@ static int do_command_xlate(struct nft_handle *h, int argc, 
char *argv[],
struct nft_xt_cmd_parse p = {
.table  = *table,
.restore= restore,
+   .xlate  = true,
};
struct iptables_command_state cs;
struct xtables_args args = {
diff --git a/iptables/xtables.c b/iptables/xtables.c
index e0343dbabf2b3..0038804e288c6 100644
--- a/iptables/xtables.c
+++ b/iptables/xtables.c
@@ -1063,16 +1063,16 @@ void do_parse(struct nft_handle *h, int argc, char 
*argv[],
   p->chain);
}
 
-   if (!nft_chain_exists(h, p->table, p->chain))
+   if (!p->xlate && !nft_chain_exists(h, p->table, p->chain))
xtables_error(OTHER_PROBLEM,
  "Chain '%s' does not exist", cs->jumpto);
 
-   if (!cs->target && strlen(cs->jumpto) > 0 &&
+   if (!p->xlate && !cs->target && strlen(cs->jumpto) > 0 &&
!nft_chain_exists(h, p->table, cs->jumpto))
xtables_error(PARAMETER_PROBLEM,
  "Chain '%s' does not exist", cs->jumpto);
}
-   if (p->command == CMD_NEW_CHAIN &&
+   if (!p->xlate && p->command == CMD_NEW_CHAIN &&
nft_chain_exists(h, p->table, p->chain))
xtables_error(OTHER_PROBLEM, "Chain already exists");
 }
-- 
2.19.0



Re: [nft PATCH] tests: shell: Extend get element test

2018-10-23 Thread Phil Sutter
Hi,

On Tue, Oct 23, 2018 at 11:28:28AM +0200, Pablo Neira Ayuso wrote:
> On Mon, Oct 22, 2018 at 11:14:32PM +0200, Phil Sutter wrote:
> > Hi Pablo,
> > 
> > On Mon, Oct 22, 2018 at 09:45:02PM +0200, Pablo Neira Ayuso wrote:
> > [...]
> > > > A bit of context illustrating why I think the code needs more than just
> > > > "more fixes": AFAIU, for each input element (which may be part of a
> > > > range or not), code asks the kernel for whether the element exists, then
> > > > get_set_decompose() is called to find the corresponding range. This
> > > > approach has a critical problem though: Given a set with elements 10 and
> > > > 20-30, asking for 10 and 20 will return the same two elements as asking
> > > > for 10-20 does. Though in the first case, we should return 10 and 20-30
> > > > while in the latter case we should return nothing.
> > > 
> > > With this patch:
> > > 
> > > https://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf.git/commit/?id=3b18d5eba491b2328b31efa4235724a2354af010
> > > 
> > > I'm going to send a pull request for David now, I guess you are
> > > missing this kernel fix, so the _END interval flag is included when
> > > searching for the right hand side of a matching interval.
> > 
> > Ah! I wondered already why the test still fails although you had applied
> > a bunch of fixes. An additional kernel fix didn't come to mind. :)
> > 
> > > With this kernel patch plus your extended test reports I get this.
> > > 
> > > # ./run-tests.sh testcases/sets/0034get_element_0 
> > > I: using nft binary ./../../src/nft
> > > 
> > > W: [FAILED] testcases/sets/0034get_element_0: expected 0 but got 1
> > > ERROR: asked for '22-24, 26-28', expecting '20-30' but got '20-30, 20-30'
> > > 
> > > I: results: [OK] 0 [FAILED] 1 [TOTAL] 1
> > > 
> > > So, before applying your patch, I'm going to mangle it with this patch
> > > below. If we ask for 22-24 and 26-28, the result in 20-30 and 20-30.
> > > The command returns what is the interval container for 22-24 is 20-30,
> > > same thing for 26-28 which is 20-30.
> > 
> > Well, in theory, I'm fine with this. The expected outcome of 'get
> > element' command is a matter of definition, as long as it doesn't return
> > things which don't exist in the set. But I think the above is
> > inconsistent with regards to single element lookups:
> > 
> > | # nft list set ip t s
> > | table ip t {
> > |   set s {
> > |   type inet_service
> > |   flags interval
> > |   elements = { 10, 20-30, 40, 50-60 }
> > |   }
> > | }
> > | # nft get element ip t s '{ 25, 28 }'
> > | table ip t {
> > |   set s {
> > |   type inet_service
> > |   flags interval
> > |   elements = { 20-30 }
> > |   }
> > | }
> 
> Using current nftables git HEAD plus kernel patch, I'm getting:
> 
> # nft get element ip t s '{ 25, 28 }'
> table ip t {
> set s {
> type inet_service
> flags interval
> elements = { 20-30, 20-30 }
> }
> }

Oh, sorry. I did above test using sources which eliminate duplicates
from the return set (something I was playing with).

> > Here, although I ask for two elements, a single range is returned. So I
> > guess asking for two ranges belonging to the same range should return a
> > single range as well. Maybe a "simple" fix would be to drop duplicates
> > from the return set?
> > 
> > Anyway, from my point of view your solution is fine as well: If a
> > humanoid parser evaluates the results, it can easily spot duplicates and
> > interpret them right. If 'get element' command is used by a script to
> > check for existence of given ranges, it all boils down to a boolean
> > found or not found result, so duplicates shouldn't matter that much.
> 
> My idea was to provide list including exactly the same number of
> elements that have been requested, and in strict order, so you can
> quickly check what you request belongs to somewhere, eg.
> 
> # nft get element ip t s '{ 50, 20, 10 }'
> table ip t {
> set s {
> type inet_service
> flags interval
> elements = { 50-60, 20-30, 10 }
> }
> }
> 

Yes, that makes sense. Thanks for explaining your approach to how 'get
element' command should work!

> Not yet implemented, but we could add something like:
> 
> # nft get element ip t s '{ 50, 20, 10 }'
> table ip t {
> set s {
> type inet_service
> flags interval
> elements = { 50 in 50-60, 20 in 20-30, 10 }
> }
> }
> 
> So there's a clear mapping between what we request and what we get.

This would allow to return partial data, i.e. if one requested element
doesn't exist in the set to still show the remaining ones. But current
behaviour is absolutely fine from my point of view.

> Still, at this stage, the existing behaviour allows humans - for a
> small number of data - and robots, to do mappings between what they
> request and what they find.

Yes, 

[ANNOUNCE] iptables 1.8.1 release

2018-10-23 Thread Florian Westphal
Hi!

The Netfilter project proudly presents:

iptables 1.8.1

This release contains fixes and following new features:
   * add arp & ebtables-save/restore for nf_tables backend
   * new cgroup match revision with reduced memory footprint
   Noteable nft backend fixes:
  - don't print rule counters unless verbose
  - fix rule negation bug that caused valid rules to get rejected
   iptables-legacy fixes:
  fix for segfault when registering hashlimit extension
  fix for an iptables-restore failure with --table option

See ChangeLog that comes attached to this email for more details.

You can download it from:

http://www.netfilter.org/projects/iptables/downloads.html#iptables-1.8.1

To build the code, libnftnl 1.1.1 and libmnl >= 1.0.3 are required:

* http://netfilter.org/projects/libnftnl/index.html
* http://netfilter.org/projects/libmnl/index.html

In case of bugs and feature request, file them via:

* https://bugzilla.netfilter.org


iptables-1.8.1.txt.gz
Description: application/gzip


Re: [nft PATCH] tests: shell: Extend get element test

2018-10-23 Thread Pablo Neira Ayuso
On Mon, Oct 22, 2018 at 11:14:32PM +0200, Phil Sutter wrote:
> Hi Pablo,
> 
> On Mon, Oct 22, 2018 at 09:45:02PM +0200, Pablo Neira Ayuso wrote:
> [...]
> > > A bit of context illustrating why I think the code needs more than just
> > > "more fixes": AFAIU, for each input element (which may be part of a
> > > range or not), code asks the kernel for whether the element exists, then
> > > get_set_decompose() is called to find the corresponding range. This
> > > approach has a critical problem though: Given a set with elements 10 and
> > > 20-30, asking for 10 and 20 will return the same two elements as asking
> > > for 10-20 does. Though in the first case, we should return 10 and 20-30
> > > while in the latter case we should return nothing.
> > 
> > With this patch:
> > 
> > https://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf.git/commit/?id=3b18d5eba491b2328b31efa4235724a2354af010
> > 
> > I'm going to send a pull request for David now, I guess you are
> > missing this kernel fix, so the _END interval flag is included when
> > searching for the right hand side of a matching interval.
> 
> Ah! I wondered already why the test still fails although you had applied
> a bunch of fixes. An additional kernel fix didn't come to mind. :)
> 
> > With this kernel patch plus your extended test reports I get this.
> > 
> > # ./run-tests.sh testcases/sets/0034get_element_0 
> > I: using nft binary ./../../src/nft
> > 
> > W: [FAILED] testcases/sets/0034get_element_0: expected 0 but got 1
> > ERROR: asked for '22-24, 26-28', expecting '20-30' but got '20-30, 20-30'
> > 
> > I: results: [OK] 0 [FAILED] 1 [TOTAL] 1
> > 
> > So, before applying your patch, I'm going to mangle it with this patch
> > below. If we ask for 22-24 and 26-28, the result in 20-30 and 20-30.
> > The command returns what is the interval container for 22-24 is 20-30,
> > same thing for 26-28 which is 20-30.
> 
> Well, in theory, I'm fine with this. The expected outcome of 'get
> element' command is a matter of definition, as long as it doesn't return
> things which don't exist in the set. But I think the above is
> inconsistent with regards to single element lookups:
> 
> | # nft list set ip t s
> | table ip t {
> | set s {
> | type inet_service
> | flags interval
> | elements = { 10, 20-30, 40, 50-60 }
> | }
> | }
> | # nft get element ip t s '{ 25, 28 }'
> | table ip t {
> | set s {
> | type inet_service
> | flags interval
> | elements = { 20-30 }
> | }
> | }

Using current nftables git HEAD plus kernel patch, I'm getting:

# nft get element ip t s '{ 25, 28 }'
table ip t {
set s {
type inet_service
flags interval
elements = { 20-30, 20-30 }
}
}

> Here, although I ask for two elements, a single range is returned. So I
> guess asking for two ranges belonging to the same range should return a
> single range as well. Maybe a "simple" fix would be to drop duplicates
> from the return set?
> 
> Anyway, from my point of view your solution is fine as well: If a
> humanoid parser evaluates the results, it can easily spot duplicates and
> interpret them right. If 'get element' command is used by a script to
> check for existence of given ranges, it all boils down to a boolean
> found or not found result, so duplicates shouldn't matter that much.

My idea was to provide list including exactly the same number of
elements that have been requested, and in strict order, so you can
quickly check what you request belongs to somewhere, eg.

# nft get element ip t s '{ 50, 20, 10 }'
table ip t {
set s {
type inet_service
flags interval
elements = { 50-60, 20-30, 10 }
}
}

Not yet implemented, but we could add something like:

# nft get element ip t s '{ 50, 20, 10 }'
table ip t {
set s {
type inet_service
flags interval
elements = { 50 in 50-60, 20 in 20-30, 10 }
}
}

So there's a clear mapping between what we request and what we get.

Still, at this stage, the existing behaviour allows humans - for a
small number of data - and robots, to do mappings between what they
request and what they find.

Thanks!


Re: [PATCH 1/2 nft v3 preview] src: osf: add ttl option support

2018-10-23 Thread Pablo Neira Ayuso
On Mon, Oct 22, 2018 at 10:46:18PM +0200, Fernando Fernandez Mancera wrote:
> Add support for ttl option in "osf" expression. Example:
> 
> table ip foo {
>   chain bar {
>   type filter hook input priority filter; policy accept;
>   osf skip name "Linux"

osf ttl skip
osf ttl loose

Don't remove the 'ttl' token from there.

I just didn't want to have: osf ttl ttl-nocheck, which was sort of
redundant.

>   }
> }
> 
> Signed-off-by: Fernando Fernandez Mancera 
> ---
> v1:initial patch
> v2:use "ttl-global, ttl-nocheck.." instead of "1, 2.."
> v3:better names for ttl options, add json and tests/py support

Could you describe what is not yet working here? Thanks!

> ---
>  include/expression.h|  4 +++
>  include/linux/netfilter/nf_tables.h |  2 ++
>  include/osf.h   |  2 +-
>  src/json.c  |  2 +-
>  src/netlink_delinearize.c   |  5 +++-
>  src/netlink_linearize.c |  1 +
>  src/osf.c   | 26 --
>  src/parser_bison.y  | 19 +++--
>  src/parser_json.c   |  5 ++--
>  tests/py/inet/osf.t |  3 +++
>  tests/py/inet/osf.t.json|  3 +++
>  tests/py/inet/osf.t.payload | 41 +++--
>  12 files changed, 90 insertions(+), 23 deletions(-)
> 
> diff --git a/include/expression.h b/include/expression.h
> index d6977c3..f018c95 100644
> --- a/include/expression.h
> +++ b/include/expression.h
> @@ -345,6 +345,10 @@ struct expr {
>   uint8_t direction;
>   uint8_t spnum;
>   } xfrm;
> + struct {
> + /* EXPR_OSF */
> + uint8_t ttl;
> + } osf;
>   };
>  };
>  
> diff --git a/include/linux/netfilter/nf_tables.h 
> b/include/linux/netfilter/nf_tables.h
> index 4e28598..1d13ad3 100644
> --- a/include/linux/netfilter/nf_tables.h
> +++ b/include/linux/netfilter/nf_tables.h
> @@ -939,10 +939,12 @@ enum nft_socket_keys {
>   * enum nft_osf_attributes - nf_tables osf expression netlink attributes
>   *
>   * @NFTA_OSF_DREG: destination register (NLA_U32: nft_registers)
> + * @NFTA_OSF_TTL: Value of the TTL osf option (NLA_U8)
>   */
>  enum nft_osf_attributes {
>   NFTA_OSF_UNSPEC,
>   NFTA_OSF_DREG,
> + NFTA_OSF_TTL,
>   __NFTA_OSF_MAX
>  };
>  #define NFT_OSF_MAX  (__NFTA_OSF_MAX - 1)
> diff --git a/include/osf.h b/include/osf.h
> index 54cdd4a..23ea34d 100644
> --- a/include/osf.h
> +++ b/include/osf.h
> @@ -1,7 +1,7 @@
>  #ifndef NFTABLES_OSF_H
>  #define NFTABLES_OSF_H
>  
> -struct expr *osf_expr_alloc(const struct location *loc);
> +struct expr *osf_expr_alloc(const struct location *loc, const uint8_t ttl);
>  
>  extern int nfnl_osf_load_fingerprints(struct netlink_ctx *ctx, int del);
>  
> diff --git a/src/json.c b/src/json.c
> index 1cde270..cea9f19 100644
> --- a/src/json.c
> +++ b/src/json.c
> @@ -857,7 +857,7 @@ json_t *socket_expr_json(const struct expr *expr, struct 
> output_ctx *octx)
>  
>  json_t *osf_expr_json(const struct expr *expr, struct output_ctx *octx)
>  {
> - return json_pack("{s:{s:s}}", "osf", "key", "name");
> + return json_pack("{s:{s:i, s:s}}", "osf", "ttl", expr->osf.ttl, "key", 
> "name");
>  }
>  
>  json_t *xfrm_expr_json(const struct expr *expr, struct output_ctx *octx)
> diff --git a/src/netlink_delinearize.c b/src/netlink_delinearize.c
> index cd05885..84948db 100644
> --- a/src/netlink_delinearize.c
> +++ b/src/netlink_delinearize.c
> @@ -655,8 +655,11 @@ static void netlink_parse_osf(struct netlink_parse_ctx 
> *ctx,
>  {
>   enum nft_registers dreg;
>   struct expr *expr;
> + uint8_t ttl;
> +
> + ttl = nftnl_expr_get_u8(nle, NFTNL_EXPR_OSF_TTL);
> + expr = osf_expr_alloc(loc, ttl);
>  
> - expr = osf_expr_alloc(loc);
>   dreg = netlink_parse_register(nle, NFTNL_EXPR_OSF_DREG);
>   netlink_set_register(ctx, dreg, expr);
>  }
> diff --git a/src/netlink_linearize.c b/src/netlink_linearize.c
> index 0ac51bd..0c8f5fe 100644
> --- a/src/netlink_linearize.c
> +++ b/src/netlink_linearize.c
> @@ -227,6 +227,7 @@ static void netlink_gen_osf(struct netlink_linearize_ctx 
> *ctx,
>  
>   nle = alloc_nft_expr("osf");
>   netlink_put_register(nle, NFTNL_EXPR_OSF_DREG, dreg);
> + nftnl_expr_set_u8(nle, NFTNL_EXPR_OSF_TTL, expr->osf.ttl);
>   nftnl_rule_add_expr(ctx->nlr, nle);
>  }
>  
> diff --git a/src/osf.c b/src/osf.c
> index 85c9573..1a224fd 100644
> --- a/src/osf.c
> +++ b/src/osf.c
> @@ -5,13 +5,32 @@
>  #include 
>  #include 
>  
> +static const char *osf_ttl_int_to_str(const uint8_t ttl)
> +{
> + if (ttl == 1)
> + return "loose ";
> + else if (ttl == 2)
> + return "skip ";
> +
> + return "";
> +}
> +
>  static void osf_expr_print(const struct expr *expr, struct output_ctx *octx)
>  {
> - 

Re: [PATCH] netfilter: conntrack: fix cloned unconfirmed skb->_nfct race in __nf_conntrack_confirm

2018-10-23 Thread Chieh-Min Wang
Not sure if you have questions about this bug? I draw the broadcast
packet racing flow chart as following:

br_handle_frame
  BR_HOOK(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, br_handle_frame_finish)
  // skb->_nfct (unconfirmed conntrack) is established at PRE_ROUTING
  br_handle_frame_finish
// check if this packet is broadcast
br_flood_forward
  br_flood
list_for_each_entry_rcu(p, >port_list, list) // iterate
through each port
  maybe_deliver
deliver_clone
  skb = skb_clone(skb)
  __br_forward
BR_HOOK(NFPROTO_BRIDGE, NF_BR_FORWARD,...)
// queue in our nfq and received by our userspace program
// goto __nf_conntrack_confirm in our userspace
process context on CPU 1
br_pass_frame_up
  BR_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN,...)
  // __nf_conntrack_confirm in softirq context on CPU 0

Because conntrack confirm can happen at both INPUT and POSTROUTING
stage. So with NFQUEUE running,
skb->_nfct with same unconfirmed conntrack could race on different core.

Please let me know if you have any doubts :)
On Thu, Oct 18, 2018 at 10:03 PM Chieh-Min Wang  wrote:
>
> From: Chieh-Min Wang 
>
> For bridge(br_flood) or broadcast/multicast packets, they could clone skb with
> unconfirmed conntrack which break the rule that unconfirmed skb->_nfct is 
> never shared.
> With nfqueue running on my system, the race can be easily reproduced with 
> following
> warning calltrace:
>
> [13257.707525] CPU: 0 PID: 12132 Comm: main Tainted: PW   4.4.60 
> #7744
> [13257.707568] Hardware name: Qualcomm (Flattened Device Tree)
> [13257.714700] [] (unwind_backtrace) from [] 
> (show_stack+0x10/0x14)
> [13257.720253] [] (show_stack) from [] 
> (dump_stack+0x94/0xa8)
> [13257.728240] [] (dump_stack) from [] 
> (warn_slowpath_common+0x94/0xb0)
> [13257.735268] [] (warn_slowpath_common) from [] 
> (warn_slowpath_null+0x1c/0x24)
> [13257.743519] [] (warn_slowpath_null) from [] 
> (__nf_conntrack_confirm+0xa8/0x618)
> [13257.752284] [] (__nf_conntrack_confirm) from [] 
> (ipv4_confirm+0xb8/0xfc)
> [13257.761049] [] (ipv4_confirm) from [] 
> (nf_iterate+0x48/0xa8)
> [13257.769725] [] (nf_iterate) from [] 
> (nf_hook_slow+0x30/0xb0)
> [13257.777108] [] (nf_hook_slow) from [] 
> (br_nf_post_routing+0x274/0x31c)
> [13257.784486] [] (br_nf_post_routing) from [] 
> (nf_iterate+0x48/0xa8)
> [13257.792556] [] (nf_iterate) from [] 
> (nf_hook_slow+0x30/0xb0)
> [13257.800458] [] (nf_hook_slow) from [] 
> (br_forward_finish+0x94/0xa4)
> [13257.808010] [] (br_forward_finish) from [] 
> (br_nf_forward_finish+0x150/0x1ac)
> [13257.815736] [] (br_nf_forward_finish) from [] 
> (nf_reinject+0x108/0x170)
> [13257.824762] [] (nf_reinject) from [] 
> (nfqnl_recv_verdict+0x3d8/0x420)
> [13257.832924] [] (nfqnl_recv_verdict) from [] 
> (nfnetlink_rcv_msg+0x158/0x248)
> [13257.841256] [] (nfnetlink_rcv_msg) from [] 
> (netlink_rcv_skb+0x54/0xb0)
> [13257.849762] [] (netlink_rcv_skb) from [] 
> (netlink_unicast+0x148/0x23c)
> [13257.858093] [] (netlink_unicast) from [] 
> (netlink_sendmsg+0x2ec/0x368)
> [13257.866348] [] (netlink_sendmsg) from [] 
> (sock_sendmsg+0x34/0x44)
> [13257.874590] [] (sock_sendmsg) from [] 
> (___sys_sendmsg+0x1ec/0x200)
> [13257.882489] [] (___sys_sendmsg) from [] 
> (__sys_sendmsg+0x3c/0x64)
> [13257.890300] [] (__sys_sendmsg) from [] 
> (ret_fast_syscall+0x0/0x34)
>
> The original code just triggered the warning but do nothing. It will caused 
> the shared
> conntrack moves to the dying list and the packet be droppped 
> (nf_ct_resolve_clash returns
> NF_DROP for dying conntrack).
>
> Reproduce steps:
>
> ++
> |  br0(bridge)   |
> ||
> +-+-+-+--+
>   | eth0|   | eth1|   | eth2|
>   | |   | |   | |
>   +--+--+   +--+--+   +---+-+
>  | |  |
>  | |  |
>   +--+-+ +-+--++--+-+
>   | PC1| | PC2|| PC3|
>   ++ ++++
>
> iptables -A FORWARD -m mark --mark 0x100/0x100 -j NFQUEUE --queue-num 
> 100 --queue-bypass
> ps: Our nfq userspace program will set mark on packets whose connection has 
> already been processed.
>
> PC1 sends broadcast packets simulated by hping3:
> hping3 --rand-source --udp 192.168.1.255 -i u100
> ---
>  net/netfilter/nf_conntrack_core.c | 13 ++---
>  1 file changed, 10 insertions(+), 3 deletions(-)
>
> diff --git a/net/netfilter/nf_conntrack_core.c 
> b/net/netfilter/nf_conntrack_core.c
> index ca1168d..1c16bd9 100644
> --- a/net/netfilter/nf_conntrack_core.c
> +++ b/net/netfilter/nf_conntrack_core.c
> @@ -901,10 +901,17 @@ __nf_conntrack_confirm(struct sk_buff *skb)
>  * REJECT will give spurious warnings here.
>  */
>
> -   /* No external references means no one else could have
> -* confirmed us.
> +   /* Another skb with the same unconfirmed conntrack may
> +* win