[PATCH net] bridge: ebtables: fix reception of frames DNAT-ed to bridge device
When trying to redirect bridged frames to the bridge device itself via the ebtables nat-prerouting chain and the dnat target then this currently fails: The ethernet destination of the frame is dnat'ed to the MAC address of the bridge itself just fine and the correctly altered frame can even be captured via a tcpdump on br0 (with or without promisc mode). However, the IP code drops it in the beginning of ip_input.c/ip_rcv() as the dnat target did not update the skb->pkt_type. If after dnat'ing the packet is now destined to us then the skb->pkt_type needs to be updated from PACKET_OTHERHOST to PACKET_HOST, too. Signed-off-by: Linus Lüssing--- net/bridge/br_input.c | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 013f2290b..ec83175 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -198,8 +198,12 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb if (dst) { unsigned long now = jiffies; - if (dst->is_local) + if (dst->is_local) { + /* fix up potential DNAT mess */ + skb->pkt_type = PACKET_HOST; + return br_pass_frame_up(skb); + } if (now != dst->used) dst->used = now; -- 2.1.4 -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH nf-next 1/1] netfilter: ctlink: Remove duplicated condition check in nfnl_cthelper_create
From: Gao FengBecause its caller nfnl_cthelper_new has already checked the tb[NFCTH_TUPLE], so it is unnecessary to check it again in nfnl_cthelper_create. Signed-off-by: Gao Feng --- net/netfilter/nfnetlink_cthelper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index de87823..6649578 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -205,7 +205,7 @@ struct nf_conntrack_helper *helper; int ret; - if (!tb[NFCTH_TUPLE] || !tb[NFCTH_POLICY] || !tb[NFCTH_PRIV_DATA_LEN]) + if (!tb[NFCTH_POLICY] || !tb[NFCTH_PRIV_DATA_LEN]) return -EINVAL; helper = kzalloc(sizeof(struct nf_conntrack_helper), GFP_KERNEL); -- 1.9.1 -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH nft 5/9] ct: add conntrack event mask support
Florian Westphalwrote: [ nft .. ct eventmask set ...] ahem. I did not mean to submit this yet as the kernel patch assumes that the untracked object doesn't exist anymore (which is why I haven't submitted it yet). I will not push this patch until the kernel patch is in nf-next. -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH nft 8/9] doc: ct zone set support
Signed-off-by: Florian Westphal--- doc/nft.xml | 28 +++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/doc/nft.xml b/doc/nft.xml index de86d2a18258..8ea280417742 100644 --- a/doc/nft.xml +++ b/doc/nft.xml @@ -3347,6 +3347,7 @@ ip6 filter output log flags all mark label + zone set value @@ -3354,10 +3355,14 @@ ip6 filter output log flags all The ct statement sets meta data associated with a connection. + The zone id has to be assigned before a conntrack lookup takes place, + i.e. this has to be done in prerouting and possibly output (if locally + generated packets need to be placed in a distinct zone), with a hook + priority of -300. - Meta statement types + Conntrack statement types @@ -3380,6 +3385,12 @@ ip6 filter output log flags all Connection tracking label label + + zone + conntrack zone + integer (16 bit) + + @@ -3391,6 +3402,21 @@ ip6 filter output log flags all ct set mark meta mark + + set zone mapped via interface + +table inet raw { + chain prerouting { + type filter hook prerouting priority -300; + ct zone set iif map { "eth1" : 1, "veth1" : 2 } + } + chain output { + type filter hook output priority -300; + ct zone set oif map { "eth1" : 1, "veth1" : 2 } + } +} + + -- 2.10.2 -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH nft 4/9] src: implement add/create/delete for ct helper objects
Signed-off-by: Florian Westphal--- include/rule.h | 4 src/evaluate.c | 4 src/parser_bison.y | 63 -- src/rule.c | 22 +++ 4 files changed, 91 insertions(+), 2 deletions(-) diff --git a/include/rule.h b/include/rule.h index b791cc0a497c..fb4606406a94 100644 --- a/include/rule.h +++ b/include/rule.h @@ -370,6 +370,7 @@ enum cmd_obj { CMD_OBJ_COUNTERS, CMD_OBJ_QUOTA, CMD_OBJ_QUOTAS, + CMD_OBJ_CT_HELPER, CMD_OBJ_CT_HELPERS, }; @@ -438,6 +439,9 @@ struct cmd { extern struct cmd *cmd_alloc(enum cmd_ops op, enum cmd_obj obj, const struct handle *h, const struct location *loc, void *data); +extern struct cmd *cmd_alloc_obj_ct(enum cmd_ops op, int type, + const struct handle *h, + const struct location *loc, void *data); extern void cmd_free(struct cmd *cmd); #include diff --git a/src/evaluate.c b/src/evaluate.c index 20f67ee784dd..8fb716c06244 100644 --- a/src/evaluate.c +++ b/src/evaluate.c @@ -2911,6 +2911,7 @@ static int cmd_evaluate_add(struct eval_ctx *ctx, struct cmd *cmd) return table_evaluate(ctx, cmd->table); case CMD_OBJ_COUNTER: case CMD_OBJ_QUOTA: + case CMD_OBJ_CT_HELPER: return 0; default: BUG("invalid command object type %u\n", cmd->obj); @@ -2934,6 +2935,7 @@ static int cmd_evaluate_delete(struct eval_ctx *ctx, struct cmd *cmd) case CMD_OBJ_TABLE: case CMD_OBJ_COUNTER: case CMD_OBJ_QUOTA: + case CMD_OBJ_CT_HELPER: return 0; default: BUG("invalid command object type %u\n", cmd->obj); @@ -3021,6 +3023,8 @@ static int cmd_evaluate_list(struct eval_ctx *ctx, struct cmd *cmd) return cmd_evaluate_list_obj(ctx, cmd, NFT_OBJECT_QUOTA); case CMD_OBJ_COUNTER: return cmd_evaluate_list_obj(ctx, cmd, NFT_OBJECT_COUNTER); + case CMD_OBJ_CT_HELPER: + return cmd_evaluate_list_obj(ctx, cmd, NFT_OBJECT_CT_HELPER); case CMD_OBJ_COUNTERS: case CMD_OBJ_QUOTAS: case CMD_OBJ_CT_HELPERS: diff --git a/src/parser_bison.y b/src/parser_bison.y index 4d2b62438eeb..d6f095ef9f64 100644 --- a/src/parser_bison.y +++ b/src/parser_bison.y @@ -583,8 +583,8 @@ static void location_update(struct location *loc, struct location *rhs, int n) %typeand_rhs_expr exclusive_or_rhs_expr inclusive_or_rhs_expr %destructor { expr_free($$); } and_rhs_expr exclusive_or_rhs_expr inclusive_or_rhs_expr -%type counter_obj quota_obj -%destructor { obj_free($$); } counter_obj quota_obj +%type counter_obj quota_obj ct_obj_alloc +%destructor { obj_free($$); } counter_obj quota_obj ct_obj_alloc %typerelational_expr %destructor { expr_free($$); } relational_expr @@ -840,6 +840,19 @@ add_cmd: TABLE table_spec { $$ = cmd_alloc(CMD_ADD, CMD_OBJ_QUOTA, &$2, &@$, $3); } + | CT STRING obj_specct_obj_alloc '{' ct_block '}'stmt_seperator + { + struct error_record *erec; + int type; + + erec = ct_objtype_parse(&@$, $2, ); + if (erec != NULL) { + erec_queue(erec, state->msgs); + YYERROR; + } + + $$ = cmd_alloc_obj_ct(CMD_ADD, type, &$3, &@$, $4); + } ; replace_cmd: RULEruleid_spec rule @@ -906,6 +919,19 @@ create_cmd : TABLE table_spec { $$ = cmd_alloc(CMD_CREATE, CMD_OBJ_QUOTA, &$2, &@$, $3); } + | CT STRING obj_specct_obj_alloc '{' ct_block '}'stmt_seperator + { + struct error_record *erec; + int type; + + erec = ct_objtype_parse(&@$, $2, ); + if (erec != NULL) { + erec_queue(erec, state->msgs); + YYERROR; + } + + $$ = cmd_alloc_obj_ct(CMD_CREATE, type, &$3, &@$, $4); + } ; insert_cmd : RULErule_position rule @@ -946,6 +972,19
[PATCH nft 7/9] files: provide 'raw' table equivalent
useful for the 'ct zone set' statement, it has to be done before the conntrack lookup but preferrably after the defragmention hook. In iptables, the functionality resides in the CT target which is restricted to the raw table. This provides the skeleton for nft. Signed-off-by: Florian Westphal--- files/nftables/Makefile.am | 4 +++- files/nftables/ipv4-raw| 6 ++ files/nftables/ipv6-raw| 6 ++ 3 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 files/nftables/ipv4-raw create mode 100644 files/nftables/ipv6-raw diff --git a/files/nftables/Makefile.am b/files/nftables/Makefile.am index 1378e2b684f1..a4c7ac7c980b 100644 --- a/files/nftables/Makefile.am +++ b/files/nftables/Makefile.am @@ -5,9 +5,11 @@ dist_pkgsysconf_DATA = bridge-filter \ ipv4-filter \ ipv4-mangle \ ipv4-nat\ + ipv4-raw\ ipv6-filter \ ipv6-mangle \ - ipv6-nat + ipv6-nat\ + ipv6-raw install-data-hook: ${SED} -i 's|@sbindir[@]|${sbindir}/|g' ${DESTDIR}${pkgsysconfdir}/* diff --git a/files/nftables/ipv4-raw b/files/nftables/ipv4-raw new file mode 100644 index ..19773ee8bc3b --- /dev/null +++ b/files/nftables/ipv4-raw @@ -0,0 +1,6 @@ +#! @sbindir@nft -f + +table raw { + chain prerouting{ type filter hook prerouting priority -300; } + chain output{ type filter hook output priority -300; } +} diff --git a/files/nftables/ipv6-raw b/files/nftables/ipv6-raw new file mode 100644 index ..5ee56a83987e --- /dev/null +++ b/files/nftables/ipv6-raw @@ -0,0 +1,6 @@ +#! @sbindir@nft -f + +table ip6 raw { + chain prerouting{ type filter hook prerouting priority -300; } + chain output{ type filter hook output priority -300; } +} -- 2.10.2 -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH nft 6/9] tests: py: add ct helper tests
needs minor tweak to nft-test.py so we don't zap the ';' within the {} before attempting to add the rule/ct helper object. Signed-off-by: Florian Westphal--- tests/py/ip/objects.t | 4 tests/py/ip/objects.t.payload | 14 ++ tests/py/nft-test.py | 11 ++- 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/tests/py/ip/objects.t b/tests/py/ip/objects.t index 8109402da8ba..4c98f5533050 100644 --- a/tests/py/ip/objects.t +++ b/tests/py/ip/objects.t @@ -13,3 +13,7 @@ counter name tcp dport map {443 : "cnt1", 80 : "cnt2", 22 : "cnt1"};ok ip saddr 192.168.1.3 quota name "qt1";ok ip saddr 192.168.1.3 quota name "qt3";fail quota name tcp dport map {443 : "qt1", 80 : "qt2", 22 : "qt1"};ok + +%cthelp1 type ct helper { type \"ftp\"\; protocol tcp\; };ok +ct helper set "cthelp1";ok +ct helper set tcp dport map {21 : "cthelp1", 2121 : "cthelp1" };ok diff --git a/tests/py/ip/objects.t.payload b/tests/py/ip/objects.t.payload index b5cad4d1e3fc..6499d36348fe 100644 --- a/tests/py/ip/objects.t.payload +++ b/tests/py/ip/objects.t.payload @@ -29,3 +29,17 @@ ip test-ip4 output [ cmp eq reg 1 0x0006 ] [ payload load 2b @ transport header + 2 => reg 1 ] [ objref sreg 1 set __objmap%d id 1 ] + +# ct helper set "cthelp1" +ip test-ip4 output + [ objref type 3 name cthelp1 ] + +# ct helper set tcp dport map {21 : "cthelp1", 2121 : "cthelp1" } +__objmap%d test-ip4 43 +__objmap%d test-ip4 0 +element 1500 : 0 [end] element 4908 : 0 [end] +ip test-ip4 output + [ payload load 1b @ network header + 9 => reg 1 ] + [ cmp eq reg 1 0x0006 ] + [ payload load 2b @ transport header + 2 => reg 1 ] + [ objref sreg 1 set __objmap%d id 1 ] diff --git a/tests/py/nft-test.py b/tests/py/nft-test.py index 25009217e51d..b22404076edd 100755 --- a/tests/py/nft-test.py +++ b/tests/py/nft-test.py @@ -885,6 +885,10 @@ def obj_process(obj_line, filename, lineno): obj_type = tokens[2] obj_spcf = "" +if obj_type == "ct" and tokens[3] == "helper": + obj_type = "ct helper" + tokens[3] = "" + if len(tokens) > 3: obj_spcf = " ".join(tokens[3:]) @@ -985,7 +989,12 @@ def run_test_file(filename, force_all_family_option, specific_file): continue if line[0] == "%": # Adds this object -obj_line = line.rstrip()[1:].split(";") +brace = line.rfind("}") +if brace < 0: +obj_line = line.rstrip()[1:].split(";") +else: +obj_line = (line[1:brace+1], line[brace+2:].rstrip()) + ret = obj_process(obj_line, filename, lineno) tests += 1 if ret == -1: -- 2.10.2 -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH nft 5/9] ct: add conntrack event mask support
Signed-off-by: Florian Westphal--- include/datatype.h| 1 + include/linux/netfilter/nf_conntrack_common.h | 80 ++- include/linux/netfilter/nf_tables.h | 2 + src/ct.c | 30 ++ 4 files changed, 49 insertions(+), 64 deletions(-) diff --git a/include/datatype.h b/include/datatype.h index e614b96e880b..04b7d8808cea 100644 --- a/include/datatype.h +++ b/include/datatype.h @@ -83,6 +83,7 @@ enum datatypes { TYPE_ECN, TYPE_FIB_ADDR, TYPE_BOOLEAN, + TYPE_CT_EVENTBIT, __TYPE_MAX }; #define TYPE_MAX (__TYPE_MAX - 1) diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h index 27a1895218db..768ff251308b 100644 --- a/include/linux/netfilter/nf_conntrack_common.h +++ b/include/linux/netfilter/nf_conntrack_common.h @@ -79,73 +79,25 @@ enum ip_conntrack_status { IPS_FIXED_TIMEOUT = (1 << IPS_FIXED_TIMEOUT_BIT), }; -/* Connection tracking event bits */ -enum ip_conntrack_events -{ - /* New conntrack */ - IPCT_NEW_BIT = 0, - IPCT_NEW = (1 << IPCT_NEW_BIT), - - /* Expected connection */ - IPCT_RELATED_BIT = 1, - IPCT_RELATED = (1 << IPCT_RELATED_BIT), - - /* Destroyed conntrack */ - IPCT_DESTROY_BIT = 2, - IPCT_DESTROY = (1 << IPCT_DESTROY_BIT), - - /* Timer has been refreshed */ - IPCT_REFRESH_BIT = 3, - IPCT_REFRESH = (1 << IPCT_REFRESH_BIT), - - /* Status has changed */ - IPCT_STATUS_BIT = 4, - IPCT_STATUS = (1 << IPCT_STATUS_BIT), - - /* Update of protocol info */ - IPCT_PROTOINFO_BIT = 5, - IPCT_PROTOINFO = (1 << IPCT_PROTOINFO_BIT), - - /* Volatile protocol info */ - IPCT_PROTOINFO_VOLATILE_BIT = 6, - IPCT_PROTOINFO_VOLATILE = (1 << IPCT_PROTOINFO_VOLATILE_BIT), - - /* New helper for conntrack */ - IPCT_HELPER_BIT = 7, - IPCT_HELPER = (1 << IPCT_HELPER_BIT), - - /* Update of helper info */ - IPCT_HELPINFO_BIT = 8, - IPCT_HELPINFO = (1 << IPCT_HELPINFO_BIT), - - /* Volatile helper info */ - IPCT_HELPINFO_VOLATILE_BIT = 9, - IPCT_HELPINFO_VOLATILE = (1 << IPCT_HELPINFO_VOLATILE_BIT), - - /* NAT info */ - IPCT_NATINFO_BIT = 10, - IPCT_NATINFO = (1 << IPCT_NATINFO_BIT), - - /* Counter highest bit has been set, unused */ - IPCT_COUNTER_FILLING_BIT = 11, - IPCT_COUNTER_FILLING = (1 << IPCT_COUNTER_FILLING_BIT), - - /* Mark is set */ - IPCT_MARK_BIT = 12, - IPCT_MARK = (1 << IPCT_MARK_BIT), - - /* NAT sequence adjustment */ - IPCT_NATSEQADJ_BIT = 13, - IPCT_NATSEQADJ = (1 << IPCT_NATSEQADJ_BIT), - - /* Secmark is set */ - IPCT_SECMARK_BIT = 14, - IPCT_SECMARK = (1 << IPCT_SECMARK_BIT), +/* Connection tracking event types */ +enum ip_conntrack_events { + IPCT_NEW, /* new conntrack */ + IPCT_RELATED, /* related conntrack */ + IPCT_DESTROY, /* destroyed conntrack */ + IPCT_REPLY, /* connection has seen two-way traffic */ + IPCT_ASSURED, /* connection status has changed to assured */ + IPCT_PROTOINFO, /* protocol information has changed */ + IPCT_HELPER,/* new helper has been set */ + IPCT_MARK, /* new mark has been set */ + IPCT_SEQADJ,/* sequence adjustment has changed */ + IPCT_NATSEQADJ = IPCT_SEQADJ, + IPCT_SECMARK, /* new security mark has been set */ + IPCT_LABEL, /* new connlabel has been set */ }; enum ip_conntrack_expect_events { - IPEXP_NEW_BIT = 0, - IPEXP_NEW = (1 << IPEXP_NEW_BIT), + IPEXP_NEW, /* new expectation */ + IPEXP_DESTROY, /* destroyed expectation */ }; diff --git a/include/linux/netfilter/nf_tables.h b/include/linux/netfilter/nf_tables.h index 400f5049a022..9cc39b4458ca 100644 --- a/include/linux/netfilter/nf_tables.h +++ b/include/linux/netfilter/nf_tables.h @@ -901,6 +901,7 @@ enum nft_rt_attributes { * @NFT_CT_BYTES: conntrack bytes * @NFT_CT_AVGPKT: conntrack average bytes per packet * @NFT_CT_ZONE: conntrack zone + * @NFT_CT_EVENTMASK: ctnetlink events to be generated for this conntrack */ enum nft_ct_keys { NFT_CT_STATE, @@ -921,6 +922,7 @@ enum nft_ct_keys { NFT_CT_BYTES, NFT_CT_AVGPKT, NFT_CT_ZONE, + NFT_CT_EVENTMASK, }; /** diff --git a/src/ct.c b/src/ct.c index fd8ca87a21fb..5014265a3427 100644 --- a/src/ct.c +++ b/src/ct.c @@ -100,6 +100,34 @@ static const struct datatype ct_status_type = { .sym_tbl= _status_tbl, }; +static const struct symbol_table ct_events_tbl = { + .base = BASE_HEXADECIMAL, + .symbols= { + SYMBOL("new",
[PATCH nft 9/9] doc: helper assignement
Signed-off-by: Florian Westphal--- doc/nft.xml | 71 + 1 file changed, 71 insertions(+) diff --git a/doc/nft.xml b/doc/nft.xml index 8ea280417742..ffca6cc9322e 100644 --- a/doc/nft.xml +++ b/doc/nft.xml @@ -950,6 +950,72 @@ filter input iif $int_ifs accept + Ct + + + ct + helper + + + + Ct helper is used to define connection tracking helpers that can then be used in combination with the "ct helper set" statement. + type and protocol are mandatory, l3proto is derived from the table family by default, i.e. in the inet table the kernel will + try to load both the ipv4 and ipv6 helper backends, if they are supported by the kernel. + + + conntrack helper specifications + + + + + + + Keyword + Description + Type + + + + + type + name of helper type + quoted string (e.g. "ftp") + + + protocol + layer 4 protocol of the helper + string (e.g. tcp) + + + l3proto + layer 3 protocol of the helper + string (e.g. ip) + + + + + + defining and assigning ftp helper + + Unlike iptables, helper assignment needs to be performed after the conntrack lookup has completed, for example + with the default 0 hook priority. + + +table inet myhelpers { + ct helper ftp-standard { + type "ftp" + protocol tcp + } + chain prerouting { + type filter hook prerouting priority 0; + tcp dport 21 ct helper set "ftp-standard" + } +} + + + + + Counter @@ -3376,6 +3442,11 @@ ip6 filter output log flags all + helper + name of ct helper object to assign to the connection + quoted string + + mark Connection tracking mark mark -- 2.10.2 -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH nft 3/9] src: allow listing all ct helpers
this implements nft list ct helpers table filter table ip filter { ct helper ftp-standard { .. Signed-off-by: Florian Westphal--- include/rule.h | 1 + src/evaluate.c | 1 + src/parser_bison.y | 19 +++ src/rule.c | 2 ++ 4 files changed, 23 insertions(+) diff --git a/include/rule.h b/include/rule.h index d89a963dfd05..b791cc0a497c 100644 --- a/include/rule.h +++ b/include/rule.h @@ -370,6 +370,7 @@ enum cmd_obj { CMD_OBJ_COUNTERS, CMD_OBJ_QUOTA, CMD_OBJ_QUOTAS, + CMD_OBJ_CT_HELPERS, }; struct export { diff --git a/src/evaluate.c b/src/evaluate.c index ae30bc9bb3b9..20f67ee784dd 100644 --- a/src/evaluate.c +++ b/src/evaluate.c @@ -3023,6 +3023,7 @@ static int cmd_evaluate_list(struct eval_ctx *ctx, struct cmd *cmd) return cmd_evaluate_list_obj(ctx, cmd, NFT_OBJECT_COUNTER); case CMD_OBJ_COUNTERS: case CMD_OBJ_QUOTAS: + case CMD_OBJ_CT_HELPERS: if (cmd->handle.table == NULL) return 0; if (table_lookup(>handle) == NULL) diff --git a/src/parser_bison.y b/src/parser_bison.y index 664f38ee6a4b..4d2b62438eeb 100644 --- a/src/parser_bison.y +++ b/src/parser_bison.y @@ -1016,6 +1016,25 @@ list_cmd : TABLE table_spec { $$ = cmd_alloc(CMD_LIST, CMD_OBJ_MAP, &$2, &@$, NULL); } + | CT STRING TABLE table_spec + { + int cmd; + + if (strcmp($2, "helpers") == 0) { + cmd = CMD_OBJ_CT_HELPERS; + } else { + struct error_record *erec; + + erec = error(&@$, "unknown ct class '%s', want 'helpers'", $2); + + if (erec != NULL) { + erec_queue(erec, state->msgs); + YYERROR; + } + } + + $$ = cmd_alloc(CMD_LIST, cmd, &$4, &@$, NULL); + } ; reset_cmd : COUNTERSruleset_spec diff --git a/src/rule.c b/src/rule.c index abb6e1466441..6bffaa3eb63b 100644 --- a/src/rule.c +++ b/src/rule.c @@ -1469,6 +1469,8 @@ static int do_command_list(struct netlink_ctx *ctx, struct cmd *cmd) case CMD_OBJ_QUOTA: case CMD_OBJ_QUOTAS: return do_list_obj(ctx, cmd, NFT_OBJECT_QUOTA); + case CMD_OBJ_CT_HELPERS: + return do_list_obj(ctx, cmd, NFT_OBJECT_CT_HELPER); default: BUG("invalid command object type %u\n", cmd->obj); } -- 2.10.2 -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH nft 2/9] evaluate: refactor CMD_OBJ_QUOTA/COUNTER handling
... to make adding CMD_OBJ_CT_HELPER support easier. Signed-off-by: Florian Westphal--- src/evaluate.c | 32 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/src/evaluate.c b/src/evaluate.c index 7ddbb658f96f..ae30bc9bb3b9 100644 --- a/src/evaluate.c +++ b/src/evaluate.c @@ -2940,12 +2940,29 @@ static int cmd_evaluate_delete(struct eval_ctx *ctx, struct cmd *cmd) } } +static int cmd_evaluate_list_obj(struct eval_ctx *ctx, const struct cmd *cmd, +uint32_t obj_type) +{ + const struct table *table; + + if (obj_type == NFT_OBJECT_UNSPEC) + obj_type = NFT_OBJECT_COUNTER; + + table = table_lookup(>handle); + if (table == NULL) + return cmd_error(ctx, "Could not process rule: Table '%s' does not exist", +cmd->handle.table); + if (obj_lookup(table, cmd->handle.obj, obj_type) == NULL) + return cmd_error(ctx, "Could not process rule: Object '%s' does not exist", +cmd->handle.obj); + return 0; +} + static int cmd_evaluate_list(struct eval_ctx *ctx, struct cmd *cmd) { struct table *table; struct set *set; int ret; - uint32_t obj_type = NFT_OBJECT_UNSPEC; ret = cache_update(cmd->op, ctx->msgs); if (ret < 0) @@ -3001,18 +3018,9 @@ static int cmd_evaluate_list(struct eval_ctx *ctx, struct cmd *cmd) cmd->handle.chain); return 0; case CMD_OBJ_QUOTA: - obj_type = NFT_OBJECT_QUOTA; + return cmd_evaluate_list_obj(ctx, cmd, NFT_OBJECT_QUOTA); case CMD_OBJ_COUNTER: - if (obj_type == NFT_OBJECT_UNSPEC) - obj_type = NFT_OBJECT_COUNTER; - table = table_lookup(>handle); - if (table == NULL) - return cmd_error(ctx, "Could not process rule: Table '%s' does not exist", -cmd->handle.table); - if (obj_lookup(table, cmd->handle.obj, obj_type) == NULL) - return cmd_error(ctx, "Could not process rule: Object '%s' does not exist", -cmd->handle.obj); - return 0; + return cmd_evaluate_list_obj(ctx, cmd, NFT_OBJECT_COUNTER); case CMD_OBJ_COUNTERS: case CMD_OBJ_QUOTAS: if (cmd->handle.table == NULL) -- 2.10.2 -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH nft 0/9] ct helper set support
This series adds the frontend/nft support to define and assign connection tracking helpers. Example: table inet myhelpers { ct helper ftp-standard { type "ftp" protocol tcp } chain prerouting { type filter hook prerouting priority 0; tcp dport 21 ct helper set "ftp-standard" } } A future extension could also allow to define/set knobs that can only be set via module parameters at this time, for instance the ftp 'loose mode' or the number of allowed expectations. doc/nft.xml | 99 +++ files/nftables/Makefile.am|4 files/nftables/ipv4-raw |6 files/nftables/ipv6-raw |6 include/ct.h |1 include/datatype.h|1 include/linux/netfilter/nf_conntrack_common.h | 80 ++-- include/linux/netfilter/nf_tables.h |5 include/rule.h| 12 + src/ct.c | 40 ++ src/evaluate.c| 37 - src/netlink.c | 16 ++ src/parser_bison.y| 162 +- src/rule.c| 59 + src/statement.c | 10 + tests/py/ip/objects.t |4 tests/py/ip/objects.t.payload | 14 ++ tests/py/nft-test.py | 11 + 18 files changed, 481 insertions(+), 86 deletions(-) -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH libnftnl 2/2] src: ct helper support
add support for ct helper objects, these are used to assign helpers to connections, similar to iptables -j CT --set-helper target. Signed-off-by: Florian Westphal--- include/libnftnl/object.h | 6 ++ include/linux/netfilter/nf_tables.h | 12 ++- include/obj.h | 6 ++ src/Makefile.am | 1 + src/obj/ct_helper.c | 210 src/object.c| 3 +- 6 files changed, 236 insertions(+), 2 deletions(-) create mode 100644 src/obj/ct_helper.c diff --git a/include/libnftnl/object.h b/include/libnftnl/object.h index ca3abeae66cc..ccd9d19b9364 100644 --- a/include/libnftnl/object.h +++ b/include/libnftnl/object.h @@ -34,6 +34,12 @@ enum { NFTNL_OBJ_QUOTA_FLAGS, }; +enum { + NFTNL_OBJ_CT_HELPER_NAME = NFTNL_OBJ_BASE, + NFTNL_OBJ_CT_HELPER_L3PROTO, + NFTNL_OBJ_CT_HELPER_L4PROTO, +}; + struct nftnl_obj; struct nftnl_obj *nftnl_obj_alloc(void); diff --git a/include/linux/netfilter/nf_tables.h b/include/linux/netfilter/nf_tables.h index a9280a6541ac..8f3842690d17 100644 --- a/include/linux/netfilter/nf_tables.h +++ b/include/linux/netfilter/nf_tables.h @@ -1260,10 +1260,20 @@ enum nft_fib_flags { NFTA_FIB_F_PRESENT = 1 << 5, /* check existence only */ }; +enum nft_ct_helper_attributes { + NFTA_CT_HELPER_UNSPEC, + NFTA_CT_HELPER_NAME, + NFTA_CT_HELPER_L3PROTO, + NFTA_CT_HELPER_L4PROTO, + __NFTA_CT_HELPER_MAX, +}; +#define NFTA_CT_HELPER_MAX (__NFTA_CT_HELPER_MAX - 1) + #define NFT_OBJECT_UNSPEC 0 #define NFT_OBJECT_COUNTER 1 #define NFT_OBJECT_QUOTA 2 -#define __NFT_OBJECT_MAX 3 +#define NFT_OBJECT_CT_HELPER 3 +#define __NFT_OBJECT_MAX 4 #define NFT_OBJECT_MAX (__NFT_OBJECT_MAX - 1) /** diff --git a/include/obj.h b/include/obj.h index edbf023f5cdd..d90919f2d86b 100644 --- a/include/obj.h +++ b/include/obj.h @@ -30,6 +30,11 @@ struct nftnl_obj { uint64_tconsumed; uint32_tflags; } quota; + struct nftnl_obj_ct_helper { + uint16_tl3proto; + uint8_t l4proto; + charname[16]; + } ct_helper; } data; }; @@ -49,6 +54,7 @@ struct obj_ops { extern struct obj_ops obj_ops_counter; extern struct obj_ops obj_ops_quota; +extern struct obj_ops obj_ops_ct_helper; #define nftnl_obj_data(obj) (void *)>data diff --git a/src/Makefile.am b/src/Makefile.am index 485a8c4acbef..77b67b267672 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -53,5 +53,6 @@ libnftnl_la_SOURCES = utils.c \ expr/redir.c \ expr/hash.c \ obj/counter.c \ + obj/ct_helper.c \ obj/quota.c \ libnftnl.map diff --git a/src/obj/ct_helper.c b/src/obj/ct_helper.c new file mode 100644 index ..d6d3111ecce8 --- /dev/null +++ b/src/obj/ct_helper.c @@ -0,0 +1,210 @@ +/* + * (C) 2017 Red Hat GmbH + * Author: Florian Westphal + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include +#include + +#include + +#include "internal.h" +#include +#include + +#include "obj.h" + +static int nftnl_obj_ct_helper_set(struct nftnl_obj *e, uint16_t type, + const void *data, uint32_t data_len) +{ + struct nftnl_obj_ct_helper *helper = nftnl_obj_data(e); + + switch (type) { + case NFTNL_OBJ_CT_HELPER_NAME: + snprintf(helper->name, sizeof(helper->name), "%s", (const char *)data); + break; + case NFTNL_OBJ_CT_HELPER_L3PROTO: + helper->l3proto = *((uint16_t *)data); + break; + case NFTNL_OBJ_CT_HELPER_L4PROTO: + helper->l4proto = *((uint8_t *)data); + break; + default: + return -1; + } + return 0; +} + +static const void *nftnl_obj_ct_helper_get(const struct nftnl_obj *e, + uint16_t type, uint32_t *data_len) +{ + struct nftnl_obj_ct_helper *helper = nftnl_obj_data(e); + + switch (type) { + case NFTNL_OBJ_CT_HELPER_NAME: + *data_len = strlen(helper->name); + return helper->name; + case NFTNL_OBJ_CT_HELPER_L3PROTO: + *data_len = sizeof(helper->l3proto); + return >l3proto; + case NFTNL_OBJ_CT_HELPER_L4PROTO: + *data_len = sizeof(helper->l4proto); +
[PATCH libnftnl 1/2] object: extend set/get api for u8/u16 types
Signed-off-by: Florian Westphal--- include/libnftnl/object.h | 4 src/libnftnl.map | 4 src/object.c | 26 ++ 3 files changed, 34 insertions(+) diff --git a/include/libnftnl/object.h b/include/libnftnl/object.h index 074a37789734..ca3abeae66cc 100644 --- a/include/libnftnl/object.h +++ b/include/libnftnl/object.h @@ -44,12 +44,16 @@ void nftnl_obj_unset(struct nftnl_obj *ne, uint16_t attr); void nftnl_obj_set_data(struct nftnl_obj *ne, uint16_t attr, const void *data, uint32_t data_len); void nftnl_obj_set(struct nftnl_obj *ne, uint16_t attr, const void *data); +void nftnl_obj_set_u8(struct nftnl_obj *ne, uint16_t attr, uint8_t val); +void nftnl_obj_set_u16(struct nftnl_obj *ne, uint16_t attr, uint16_t val); void nftnl_obj_set_u32(struct nftnl_obj *ne, uint16_t attr, uint32_t val); void nftnl_obj_set_u64(struct nftnl_obj *obj, uint16_t attr, uint64_t val); void nftnl_obj_set_str(struct nftnl_obj *ne, uint16_t attr, const char *str); const void *nftnl_obj_get_data(struct nftnl_obj *ne, uint16_t attr, uint32_t *data_len); const void *nftnl_obj_get(struct nftnl_obj *ne, uint16_t attr); +uint8_t nftnl_obj_get_u8(struct nftnl_obj *ne, uint16_t attr); +uint16_t nftnl_obj_get_u16(struct nftnl_obj *obj, uint16_t attr); uint32_t nftnl_obj_get_u32(struct nftnl_obj *ne, uint16_t attr); uint64_t nftnl_obj_get_u64(struct nftnl_obj *obj, uint16_t attr); const char *nftnl_obj_get_str(struct nftnl_obj *ne, uint16_t attr); diff --git a/src/libnftnl.map b/src/libnftnl.map index 4c082102aa29..1892c983eb50 100644 --- a/src/libnftnl.map +++ b/src/libnftnl.map @@ -278,9 +278,13 @@ global: nftnl_obj_unset; nftnl_obj_set; nftnl_obj_get; + nftnl_obj_set_u8; + nftnl_obj_set_u16; nftnl_obj_set_u32; nftnl_obj_set_u64; nftnl_obj_set_str; + nftnl_obj_get_u8; + nftnl_obj_get_u16; nftnl_obj_get_u32; nftnl_obj_get_str; nftnl_obj_get_u64; diff --git a/src/object.c b/src/object.c index 773eff6a5a18..e635f6a8ff0e 100644 --- a/src/object.c +++ b/src/object.c @@ -107,6 +107,18 @@ void nftnl_obj_set(struct nftnl_obj *obj, uint16_t attr, const void *data) } EXPORT_SYMBOL(nftnl_obj_set); +void nftnl_obj_set_u8(struct nftnl_obj *obj, uint16_t attr, uint8_t val) +{ + nftnl_obj_set_data(obj, attr, , sizeof(uint8_t)); +} +EXPORT_SYMBOL(nftnl_obj_set_u8); + +void nftnl_obj_set_u16(struct nftnl_obj *obj, uint16_t attr, uint16_t val) +{ + nftnl_obj_set_data(obj, attr, , sizeof(uint16_t)); +} +EXPORT_SYMBOL(nftnl_obj_set_u16); + void nftnl_obj_set_u32(struct nftnl_obj *obj, uint16_t attr, uint32_t val) { nftnl_obj_set_data(obj, attr, , sizeof(uint32_t)); @@ -164,6 +176,20 @@ const void *nftnl_obj_get(struct nftnl_obj *obj, uint16_t attr) } EXPORT_SYMBOL(nftnl_obj_get); +uint8_t nftnl_obj_get_u8(struct nftnl_obj *obj, uint16_t attr) +{ + const void *ret = nftnl_obj_get(obj, attr); + return ret == NULL ? 0 : *((uint8_t *)ret); +} +EXPORT_SYMBOL(nftnl_obj_get_u8); + +uint16_t nftnl_obj_get_u16(struct nftnl_obj *obj, uint16_t attr) +{ + const void *ret = nftnl_obj_get(obj, attr); + return ret == NULL ? 0 : *((uint16_t *)ret); +} +EXPORT_SYMBOL(nftnl_obj_get_u16); + uint32_t nftnl_obj_get_u32(struct nftnl_obj *obj, uint16_t attr) { const void *ret = nftnl_obj_get(obj, attr); -- 2.10.2 -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH libnftnl 0/2] add backend support to define ct helpers
This adds libnftnl support to define connection tracking helpers. Frontend (nft) support will follow soon. include/libnftnl/object.h | 10 + include/linux/netfilter/nf_tables.h | 12 +- include/obj.h |6 + src/Makefile.am |1 src/libnftnl.map|4 src/obj/ct_helper.c | 210 src/object.c| 29 7 files changed, 270 insertions(+), 2 deletions(-) -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/2 nf] netfilter: nft_set_bitmap: keep a list of dummy elements
On Tue, Mar 14, 2017 at 10:44:43PM +0800, Liping Zhang wrote: > Hi Pablo, > 2017-03-14 20:19 GMT+08:00 Pablo Neira Ayuso: > [...] > > Another possibility is to simply regard desc->size over the memory > > scalability notation when provided. I think this just needs an update > > from nft userspace. Look, bitmap and hashtable are both described as > > O(1) in terms of performance. If the user provides the set size (this > > is known in anonymous sets) we can select the one that takes less > > memory. When no size is specified, we rely on the set policy that is > > specified. > > > > Still, for anonymous sets we will select hashtable instead, this is > > going to be slower in systems that have plenty of memory. I think we > > cannot escape the new per-table global knob to select > > memory/performance for anononymous sets after all. > > After we implement more and more sets types, I think just based on > POL_PERFORMANCE or POL_MEMORY to select a suitable set will > become a more and more difficult task. So how about this method: > 1. For compatibility, POL_PERFORMANCE means hash set, and > POL_MEMORY means rbtree set.(I know this maybe incorrect when > the set->size is 0) > 2. When the user create the set, he(she) can specify a new settype to > select the set type, such as hash, rbtree, bitmap... a little similar to > ipset. > > I know this method is not perfect, but this will provide big > flexibility to the user. Then, we cannot deprecate sets like the rbtree, that I'm very much in favour to find a replacement, as it would be exposed to userspace and anyone could be using it, and we cannot break existing user setups. Moreover, if we, the developers, don't know exactly what is a good choice, how can users just know what is best for them? I would prefer developers come to us to tune the set backend selection so we get it better. We can enhance this model incrementally. Leaking details to userspace is easy, just a matter of exposing all these knobs to userspace via netlink. If this turns out to be the way, we'll do it at a given time, but I'm still willing to spend time on this set backend selection routine. > > I'm curious, what kind of device are you thinking of with such memory > > restrictions that cannot take 320 kB? I would expect such embedded > > device that cannot afford such memory consumption will come also with > > a smallish cpu. > > We had a small router with 32MB memory in my previous company. > On such an embedded device, occupy 320KB is also no problem of > course. > > But I guess the user will not happy to know the fact, inputting such a > nft rule "nft add x y tcp dport {21, 22} drop" will consume more than > 16KB memory :) For such small usecase, we can expose something like: table x { policy memory; < chain y { type filter hook output priority 0; policy drop; tcp dport {22, 80} ct state established,related accept } } So the kernel knows memory is more important that performance, and this policy exposes what the user needs. If not specified, the performance representation is selected. -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/2 nf] netfilter: nft_set_bitmap: keep a list of dummy elements
Hi Pablo, 2017-03-14 20:19 GMT+08:00 Pablo Neira Ayuso: [...] > Another possibility is to simply regard desc->size over the memory > scalability notation when provided. I think this just needs an update > from nft userspace. Look, bitmap and hashtable are both described as > O(1) in terms of performance. If the user provides the set size (this > is known in anonymous sets) we can select the one that takes less > memory. When no size is specified, we rely on the set policy that is > specified. > > Still, for anonymous sets we will select hashtable instead, this is > going to be slower in systems that have plenty of memory. I think we > cannot escape the new per-table global knob to select > memory/performance for anononymous sets after all. After we implement more and more sets types, I think just based on POL_PERFORMANCE or POL_MEMORY to select a suitable set will become a more and more difficult task. So how about this method: 1. For compatibility, POL_PERFORMANCE means hash set, and POL_MEMORY means rbtree set.(I know this maybe incorrect when the set->size is 0) 2. When the user create the set, he(she) can specify a new settype to select the set type, such as hash, rbtree, bitmap... a little similar to ipset. I know this method is not perfect, but this will provide big flexibility to the user. > I'm curious, what kind of device are you thinking of with such memory > restrictions that cannot take 320 kB? I would expect such embedded > device that cannot afford such memory consumption will come also with > a smallish cpu. We had a small router with 32MB memory in my previous company. On such an embedded device, occupy 320KB is also no problem of course. But I guess the user will not happy to know the fact, inputting such a nft rule "nft add x y tcp dport {21, 22} drop" will consume more than 16KB memory :) -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [iptables PATCH] extensions: libxt_statistic: Complete nft translator
On Mon, Mar 13, 2017 at 05:53:53PM +0100, Pablo Neira Ayuso wrote: > On Mon, Mar 13, 2017 at 05:01:53PM +0100, Phil Sutter wrote: > [...] > > The nftables numgen expression works differently: > > Phil, if you think we need a 1:1 mapping so iptables users moving to > nftables don't get confused, I'll be fine to take an update to > nft_numgen so we accomodate a new NFT_NG_PROBABILISTIC mode or so. Well, implementing the translator wasn't exactly trivial, but in general I don't think numgen is particularly hard to use. Of course an explicit probability mode might make things easier, but then I guess it wouldn't fit into the LHS/RHS scheme anymore. So I personally don't care, but if you see use in implementing it just let me know and I'll adjust the converter to make use of it. :) Cheers, Phil -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/2 nf] netfilter: nft_set_bitmap: keep a list of dummy elements
On Tue, Mar 14, 2017 at 11:21:31AM +0100, Pablo Neira Ayuso wrote: > On Tue, Mar 14, 2017 at 05:04:17PM +0800, Liping Zhang wrote: > > 2017-03-14 1:23 GMT+08:00 Pablo Neira Ayuso: > > [...] > > > Anyway, I'll be fine if this triggers some discussions on the set > > > backend selection at some point, as well as more detailed performance > > > evaluation. Note this big O notation just tell about the scalability. > > > Size provides an accurate way to measure how much memory this consumes > > > but only if userspace tells us how many elements we're going to add > > > beforehand. On the CPU side, we have no such specific metric as the > > > memory size. Probably we can introduce some generic cycle metric that > > > represents the cost of the lookup path, this won't be simple as this > > > number is not deterministic since there are many things to consider, > > > so we have to agree on how we calculate this pseudo-metric. > > > > Hmm... I think a better selection algorithm is necessary now. I find an > > obvious drawback for the time being, for example: > > > > When set->klen is 2, the bitmap_set's memory consumption is much > > higer than others. Only one single set with few elements will occupy at > > least 16kB, so only 20 rules using sets will consume roughly 320kB, it will > > become a high burden for these embedded systems which with low memory. > > > > It's worse that we cannot ignore the bitmap_set, even if we select the the > > NFT_SET_POL_MEMORY policy without the set size specified, we will still > > choose the bitmap_set, since it claims that it's space complexity is > > NFT_SET_CLASS_O_1. > > Make sense. Please, submit patches for nf-next to revisit POL_MEMORY > selection explaining the new criteria. I guess we will need more > iterations on this set selection as we get more set backends. I wanted > to dedicate some time on this during the Netfilter Workshop (to be > announce, by Q2 2017). > > Note that an anonymous sets default to POL_PERFORMANCE, so 20 rules > with anonymous sets would still consumed those 320 kB. So probably we > need a per-table global policy switch that we can set when the table > is created. I think updating such knob would result in EOPNOTSUPP at > this stage, as we would need a way to transfer set representations > from one backend to another if the policy update results in a > different set backend configuration in order to support > performance/memory policy updates. Another possibility is to simply regard desc->size over the memory scalability notation when provided. I think this just needs an update from nft userspace. Look, bitmap and hashtable are both described as O(1) in terms of performance. If the user provides the set size (this is known in anonymous sets) we can select the one that takes less memory. When no size is specified, we rely on the set policy that is specified. Still, for anonymous sets we will select hashtable instead, this is going to be slower in systems that have plenty of memory. I think we cannot escape the new per-table global knob to select memory/performance for anononymous sets after all. I'm curious, what kind of device are you thinking of with such memory restrictions that cannot take 320 kB? I would expect such embedded device that cannot afford such memory consumption will come also with a smallish cpu. -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
ANNOUNCE: New talk accepted on Netesto tool suite
The tech committee would like to announce a new accepted talk. Lawrence Brakmo will talk about Netesto tool suite Details are as follows: - Netesto (NEtwork TESting TOolkit) is a suite of tools for running multi-host network experiments that supports the collection and display of relevant data and statistics. It currently supports TCP RPC and STREAM transfers through netperf. Although the framework itself can easily be expanded to support other protocols and transfer programs. It has some similarities to Flent (The Flexible Network Tester) in that it is based on Python, but unlike Flent, Netesto supports: Multiple senders and receivers Detailed flow information: RTTs, cwnds, etc. Per host statistics and state Creation of csv file with results Creation of html file with results in table plus detailed results including graphs of cwnds, RTTs, goodput Support for creating complex scenarios through macros Support for adding delays with netem Netesto can run complex scenarios consisting of multiple individual tests with just a few commands due to the use of macros and libraries. The end result is an HTML page consisting of a table with one or more lines per test, plus links to pages for individual results which contain graphs showing goodputs, RTTs and cwnds. Netesto has proved very valuable for comparing the performance of different TCP variants (such as Cubic, Reno, BBR, NV). For example, it was used to evaluate BBR and compare its performance to Cubic (https://drive.google.com/file/d/0B4YZ_0yTgbJEa21CbUVLWFdrX2c/view) cheers, jamal -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: conntrack (possibly) hangs on our ARM CPU in case we delete 5k+ connections as fast as possible
Peter Marcziswrote: > I mean we destroy the sockets, we used two very basic python script to > open and close TCP sockets between the WAN and LAN interface. Then my guess wrt. nf_ct_iterate_cleanup is certainly wrong. -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: conntrack (possibly) hangs on our ARM CPU in case we delete 5k+ connections as fast as possible
Hi, I mean we destroy the sockets, we used two very basic python script to open and close TCP sockets between the WAN and LAN interface. Thanks for the hint, I will try those ! Br, Peter On Tue, Mar 14, 2017 at 11:33 AM, Florian Westphalwrote: > Peter Marczis wrote: >> Hello developers, >> I'm seeking some help to debug and solve one of my issues. >> >> We observed that if we create 30k connections, everything works as >> expected, but when we start to disconnect them, >> conntrack (well not confirmed yet fully) makes the kernel side busy, >> and looks like no scheduling happens. > > What do you mean by 'disconnect'? conntrack -F ? > > My wild guss is you need to backport > > commit d93c6258ee4255749c10012c50a31c08f4e9fb16 > netfilter: conntrack: resched in nf_ct_iterate_cleanup > >> The whole thing works as expected, the only problem it makes our >> processes and well everything on user side hanging for a couple of >> seconds 10-30s, >> which of course triggers our HW Watchdog, and we end up in a reboot. > > You could try > > CONFIG_LOCKUP_DETECTOR=y > CONFIG_HARDLOCKUP_DETECTOR=y > -- Br, Peter G. Marczis SW. Developer +45 28 12 92 10 -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: conntrack (possibly) hangs on our ARM CPU in case we delete 5k+ connections as fast as possible
Peter Marcziswrote: > Hello developers, > I'm seeking some help to debug and solve one of my issues. > > We observed that if we create 30k connections, everything works as > expected, but when we start to disconnect them, > conntrack (well not confirmed yet fully) makes the kernel side busy, > and looks like no scheduling happens. What do you mean by 'disconnect'? conntrack -F ? My wild guss is you need to backport commit d93c6258ee4255749c10012c50a31c08f4e9fb16 netfilter: conntrack: resched in nf_ct_iterate_cleanup > The whole thing works as expected, the only problem it makes our > processes and well everything on user side hanging for a couple of > seconds 10-30s, > which of course triggers our HW Watchdog, and we end up in a reboot. You could try CONFIG_LOCKUP_DETECTOR=y CONFIG_HARDLOCKUP_DETECTOR=y -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] iptables: set the path of the lock file via a configure option.
On Tue, Mar 14, 2017 at 05:55:50PM +0900, Lorenzo Colitti wrote: > Currently the iptables lock is hardcoded as "/run/xtables.lock". > Allow users to change this path using the --with-xt-lock-name > option to ./configure option. This is useful on systems like > Android which do not have /run. > > Tested on Ubuntu, as follows: > > 1. By default, the lock is placed in /run/xtables.lock: > > $ make distclean-recursive && ./autogen.sh && > ./configure --disable-nftables --prefix /tmp/iptables && > make -j64 && > make install && > sudo strace -e open,flock /tmp/iptables/sbin/iptables -L foo > ... > open("/run/xtables.lock", O_RDONLY|O_CREAT, 0600) = 3 > flock(3, LOCK_EX|LOCK_NB) = 0 > iptables: No chain/target/match by that name. > > 2. Specifying the lock results in the expected location being >used: > > $ make distclean-recursive && ./autogen.sh && \ > ./configure --disable-nftables --prefix /tmp/iptables \ > --with-xt-lock-name=/tmp/iptables/run/xtables.lock && > make -j64 && > make install && > sudo strace -e open,flock /tmp/iptables/sbin/iptables -L foo > ... > open("/tmp/iptables/run/xtables.lock", O_RDONLY|O_CREAT, 0600) = 3 > flock(3, LOCK_EX|LOCK_NB) = 0 > iptables: No chain/target/match by that name. Applied, thanks Lorenzo! -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/2 nf] netfilter: nft_set_bitmap: keep a list of dummy elements
On Tue, Mar 14, 2017 at 05:04:17PM +0800, Liping Zhang wrote: > 2017-03-14 1:23 GMT+08:00 Pablo Neira Ayuso: > [...] > > Anyway, I'll be fine if this triggers some discussions on the set > > backend selection at some point, as well as more detailed performance > > evaluation. Note this big O notation just tell about the scalability. > > Size provides an accurate way to measure how much memory this consumes > > but only if userspace tells us how many elements we're going to add > > beforehand. On the CPU side, we have no such specific metric as the > > memory size. Probably we can introduce some generic cycle metric that > > represents the cost of the lookup path, this won't be simple as this > > number is not deterministic since there are many things to consider, > > so we have to agree on how we calculate this pseudo-metric. > > Hmm... I think a better selection algorithm is necessary now. I find an > obvious drawback for the time being, for example: > > When set->klen is 2, the bitmap_set's memory consumption is much > higer than others. Only one single set with few elements will occupy at > least 16kB, so only 20 rules using sets will consume roughly 320kB, it will > become a high burden for these embedded systems which with low memory. > > It's worse that we cannot ignore the bitmap_set, even if we select the the > NFT_SET_POL_MEMORY policy without the set size specified, we will still > choose the bitmap_set, since it claims that it's space complexity is > NFT_SET_CLASS_O_1. Make sense. Please, submit patches for nf-next to revisit POL_MEMORY selection explaining the new criteria. I guess we will need more iterations on this set selection as we get more set backends. I wanted to dedicate some time on this during the Netfilter Workshop (to be announce, by Q2 2017). Note that an anonymous sets default to POL_PERFORMANCE, so 20 rules with anonymous sets would still consumed those 320 kB. So probably we need a per-table global policy switch that we can set when the table is created. I think updating such knob would result in EOPNOTSUPP at this stage, as we would need a way to transfer set representations from one backend to another if the policy update results in a different set backend configuration in order to support performance/memory policy updates. -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
conntrack (possibly) hangs on our ARM CPU in case we delete 5k+ connections as fast as possible
Hello developers, I'm seeking some help to debug and solve one of my issues. At GreenWave I'm working on a "SOHO" Router product, and of course we use linux / netfilter. We observed that if we create 30k connections, everything works as expected, but when we start to disconnect them, conntrack (well not confirmed yet fully) makes the kernel side busy, and looks like no scheduling happens. The whole thing works as expected, the only problem it makes our processes and well everything on user side hanging for a couple of seconds 10-30s, which of course triggers our HW Watchdog, and we end up in a reboot. This is of course a corner case, I just would like to understand what's happening. I dig as deep in the code I can, looks like there is a hash table protected by RCU in the code. What I really seek is some help on how could I debug the kernel side things better, to see if it locks on this table long, or why the scheduling is not happening. Some extra info: Kernel: 3.4.108 - armv7l - conntrack compiled into it. ~ # cat /proc/cpuinfo Processor : ARMv7 Processor rev 1 (v7l) processor : 0 BogoMIPS: 1594.16 processor : 1 BogoMIPS: 1594.16 Features: swp half thumb fastmult vfp edsp thumbee neon vfpv3 tls CPU implementer : 0x41 CPU architecture: 7 CPU variant : 0x2 CPU part: 0xc09 CPU revision: 1 Hardware: CORTINA-G2 EB Revision: 7542a1 Serial : Idle value: ~ # cat /proc/meminfo MemTotal: 436936 kB MemFree: 298336 kB Buffers: 16 kB Cached:30504 kB SwapCached:0 kB Thanks a lot in advance ! I really appreciate your work ! -- Br, Peter G. Marczis SW. Developer +45 28 12 92 10 -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/2 nf] netfilter: nft_set_bitmap: keep a list of dummy elements
Hi Pablo, 2017-03-14 1:23 GMT+08:00 Pablo Neira Ayuso: [...] > I would like this only describes the representation that is exposed to > the packet path, not the real memory consumption of it. I know I'm > kind of cheating, but with this I'm also giving this bitmap an > advantage over the hashtable representation, the performance numbers I > gathered here when evaluating it are better. Yes, I agree, bitmap_set is faster than hash_set, so for POL_PERFORMANCE policy, we should prefer to use bitmap_set. > Anyway, I'll be fine if this triggers some discussions on the set > backend selection at some point, as well as more detailed performance > evaluation. Note this big O notation just tell about the scalability. > Size provides an accurate way to measure how much memory this consumes > but only if userspace tells us how many elements we're going to add > beforehand. On the CPU side, we have no such specific metric as the > memory size. Probably we can introduce some generic cycle metric that > represents the cost of the lookup path, this won't be simple as this > number is not deterministic since there are many things to consider, > so we have to agree on how we calculate this pseudo-metric. Hmm... I think a better selection algorithm is necessary now. I find an obvious drawback for the time being, for example: When set->klen is 2, the bitmap_set's memory consumption is much higer than others. Only one single set with few elements will occupy at least 16kB, so only 20 rules using sets will consume roughly 320kB, it will become a high burden for these embedded systems which with low memory. It's worse that we cannot ignore the bitmap_set, even if we select the the NFT_SET_POL_MEMORY policy without the set size specified, we will still choose the bitmap_set, since it claims that it's space complexity is NFT_SET_CLASS_O_1. -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] iptables: set the path of the lock file via a configure option.
Currently the iptables lock is hardcoded as "/run/xtables.lock". Allow users to change this path using the --with-xt-lock-name option to ./configure option. This is useful on systems like Android which do not have /run. Tested on Ubuntu, as follows: 1. By default, the lock is placed in /run/xtables.lock: $ make distclean-recursive && ./autogen.sh && ./configure --disable-nftables --prefix /tmp/iptables && make -j64 && make install && sudo strace -e open,flock /tmp/iptables/sbin/iptables -L foo ... open("/run/xtables.lock", O_RDONLY|O_CREAT, 0600) = 3 flock(3, LOCK_EX|LOCK_NB) = 0 iptables: No chain/target/match by that name. 2. Specifying the lock results in the expected location being used: $ make distclean-recursive && ./autogen.sh && \ ./configure --disable-nftables --prefix /tmp/iptables \ --with-xt-lock-name=/tmp/iptables/run/xtables.lock && make -j64 && make install && sudo strace -e open,flock /tmp/iptables/sbin/iptables -L foo ... open("/tmp/iptables/run/xtables.lock", O_RDONLY|O_CREAT, 0600) = 3 flock(3, LOCK_EX|LOCK_NB) = 0 iptables: No chain/target/match by that name. Signed-off-by: Lorenzo Colitti--- configure.ac | 10 -- iptables/xshared.c | 2 -- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/configure.ac b/configure.ac index eda7871405..b27502667c 100644 --- a/configure.ac +++ b/configure.ac @@ -67,6 +67,10 @@ AC_ARG_ENABLE([connlabel], AS_HELP_STRING([--disable-connlabel], [Do not build libnetfilter_conntrack]), [enable_connlabel="$enableval"], [enable_connlabel="yes"]) +AC_ARG_WITH([xt-lock-name], AS_HELP_STRING([--with-xt-lock-name=PATH], + [Path to the xtables lock [[/run/xtables.lock]]]), + [xt_lock_name="$withval"], + [xt_lock_name="/run/xtables.lock"]) libiptc_LDFLAGS2=""; AX_CHECK_LINKER_FLAGS([-Wl,--no-as-needed], @@ -193,7 +197,7 @@ AC_SUBST([blacklist_6_modules]) regular_CFLAGS="-Wall -Waggregate-return -Wmissing-declarations \ -Wmissing-prototypes -Wredundant-decls -Wshadow -Wstrict-prototypes \ -Winline -pipe"; -regular_CPPFLAGS="${largefile_cppflags} -D_REENTRANT \ +regular_CPPFLAGS="${largefile_cppflags} -DXT_LOCK_NAME=\\\"\${xt_lock_name}\\\" -D_REENTRANT \ -DXTABLES_LIBDIR=\\\"\${xtlibdir}\\\" -DXTABLES_INTERNAL"; kinclude_CPPFLAGS=""; if [[ -n "$kbuilddir" ]]; then @@ -231,6 +235,7 @@ AC_SUBST([libxtables_vcurrent]) AC_SUBST([libxtables_vage]) libxtables_vmajor=$(($libxtables_vcurrent - $libxtables_vage)); AC_SUBST([libxtables_vmajor]) +AC_SUBST([xt_lock_name]) AC_CONFIG_FILES([Makefile extensions/GNUmakefile include/Makefile iptables/Makefile iptables/xtables.pc @@ -265,7 +270,8 @@ Build parameters: Support plugins via dlopen (shared): ${enable_shared} Installation prefix (--prefix): ${prefix} Xtables extension directory: ${e_xtlibdir} - Pkg-config directory:${e_pkgconfigdir}" + Pkg-config directory:${e_pkgconfigdir} + Xtables lock file: ${xt_lock_name}" if [[ -n "$ksourcedir" ]]; then echo " Kernel source directory:${ksourcedir}" diff --git a/iptables/xshared.c b/iptables/xshared.c index f0a5ddd0de..383ecf2cf2 100644 --- a/iptables/xshared.c +++ b/iptables/xshared.c @@ -17,8 +17,6 @@ #include #include "xshared.h" -#define XT_LOCK_NAME "/run/xtables.lock" - /* * Print out any special helps. A user might like to be able to add a --help * to the commandline, and see expected results. So we call help for all -- 2.12.0.246.ga2ecc84866-goog -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH net-next 1/1] netfilter: helper: Remove the rcu lock in nf_ct_helper_expectfn_find_by_name and nf_ct_helper_expectfn_find_by_symbol.
Hi Palbo, On Tue, Mar 14, 2017 at 4:29 PM,wrote: > From: Gao Feng > > Because these two functions return the nf_ct_helper_expectfn pointer > which should be protected by rcu lock. So it should makes sure the > caller should hold the rcu lock, not inside these functions. > > Signed-off-by: Gao Feng > --- > net/netfilter/nf_conntrack_helper.c | 6 ++ > 1 file changed, 2 insertions(+), 4 deletions(-) > > diff --git a/net/netfilter/nf_conntrack_helper.c > b/net/netfilter/nf_conntrack_helper.c > index 6dc44d9..bce3d1f 100644 > --- a/net/netfilter/nf_conntrack_helper.c > +++ b/net/netfilter/nf_conntrack_helper.c > @@ -311,38 +311,36 @@ void nf_ct_helper_expectfn_unregister(struct > nf_ct_helper_expectfn *n) > } > EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_unregister); > > +/* Caller should hold the rcu lock */ > struct nf_ct_helper_expectfn * > nf_ct_helper_expectfn_find_by_name(const char *name) > { > struct nf_ct_helper_expectfn *cur; > bool found = false; > > - rcu_read_lock(); > list_for_each_entry_rcu(cur, _ct_helper_expectfn_list, head) { > if (!strcmp(cur->name, name)) { > found = true; > break; > } > } > - rcu_read_unlock(); > return found ? cur : NULL; > } > EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_find_by_name); > > +/* Caller should hold the rcu lock */ > struct nf_ct_helper_expectfn * > nf_ct_helper_expectfn_find_by_symbol(const void *symbol) > { > struct nf_ct_helper_expectfn *cur; > bool found = false; > > - rcu_read_lock(); > list_for_each_entry_rcu(cur, _ct_helper_expectfn_list, head) { > if (cur->expectfn == symbol) { > found = true; > break; > } > } > - rcu_read_unlock(); > return found ? cur : NULL; > } > EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_find_by_symbol); > -- > 1.9.1 > > Sorry, I misspelled the title. It should be "nf-next", not "net-next". Regards Feng -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH nf 1/1] netfilter: helper: Fix possible panic caused by invoking expectfn unloaded
From: Gao FengThe helper module permits the helper modules register expectfn, and it could be hold by external caller. But when the module is unloaded, there may be some pending expect nodes which still hold the function reference. It may cause unexpected behavior, even panic. Now it would delete the expect nodes which uses the expectfn when unregister expectfn. And it must use the rcu_read_lock to protect the expectfn until insert it or doesn't access it ever. There is only one caller which doesn't hold the rcu lock now. It is ctnetlink_create_expect. BTW, nf_ct_helper_expectfn_find_by_name/symbol invokes the rcu lock to protect the lookup. But it is not enough, because it returns the nf_ct_helper_expectfn pointer which should be protected by rcu lock. Actually the caller should hold the rcu lock instead of these funcs. I will refine it in the cleanup patch. Signed-off-by: Gao Feng --- net/netfilter/nf_conntrack_helper.c | 23 +++ net/netfilter/nf_conntrack_netlink.c | 3 +++ 2 files changed, 26 insertions(+) diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 6dc44d9..2be820b 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -305,9 +305,32 @@ void nf_ct_helper_expectfn_register(struct nf_ct_helper_expectfn *n) void nf_ct_helper_expectfn_unregister(struct nf_ct_helper_expectfn *n) { + struct nf_conntrack_expect *exp; + const struct hlist_node *next; + u32 i; + spin_lock_bh(_conntrack_expect_lock); list_del_rcu(>head); spin_unlock_bh(_conntrack_expect_lock); + + /* Make sure everyone who holds the expect func already +* has inserted it +*/ + synchronize_rcu(); + + /* Get rid of expectations used the dying expectfn */ + spin_lock_bh(_conntrack_expect_lock); + for (i = 0; i < nf_ct_expect_hsize; i++) { + hlist_for_each_entry_safe(exp, next, + _ct_expect_hash[i], hnode) { + if (exp->expectfn == n->expectfn && + del_timer(>timeout)) { + nf_ct_unlink_expect(exp); + nf_ct_expect_put(exp); + } + } + } + spin_unlock_bh(_conntrack_expect_lock); } EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_unregister); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 6806b5e..37784e2 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -3156,14 +3156,17 @@ static int ctnetlink_del_expect(struct net *net, struct sock *ctnl, } } + rcu_read_lock(); exp = ctnetlink_alloc_expect(cda, ct, helper, , ); if (IS_ERR(exp)) { + rcu_read_unlock(); err = PTR_ERR(exp); goto err_ct; } err = nf_ct_expect_related_report(exp, portid, report); nf_ct_expect_put(exp); + rcu_read_unlock(); err_ct: nf_ct_put(ct); return err; -- 1.9.1 -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html