On 16/01/2023 13:45, Ales Musil wrote:
> Currently, the CT can be flushed by dpctl only by specifying
> the whole 5-tuple. This is not very convenient when there are
> only some fields known to the user of CT flush. Add new struct
> ofputil_ct_match which represents the generic filtering that can
> be done for CT flush. The match is done only on fields that are
> non-zero with exception to the icmp fields.
>
> This allows the filtering just within dpctl, however
> it is a preparation for OpenFlow extension.
>
> Reported-at: https://bugzilla.redhat.com/2120546
> Signed-off-by: Ales Musil <[email protected]>
> ---
> v7: Rebase on top of current master.
> Address comments from Ilya:
> * Move the structs and function to public header.
> * Address some style related comments.
> * Document the ICMP patial match limitation.
>
> v6: Rebase on top of current master.
> Address comments from Ilya.
> v5: Add ack from Paolo.
> v4: Fix a flush all scenario.
> v3: Rebase on top of master.
> Address the C99 comment and missing dpif_close call.
> v2: Rebase on top of master.
> Address comments from Paolo.
> ---
> NEWS | 3 +
> include/openvswitch/automake.mk | 1 +
> include/openvswitch/ofp-ct.h | 70 ++++++++
> lib/automake.mk | 1 +
> lib/ct-dpif.c | 298 +++++++++++++++++++-------------
> lib/ct-dpif.h | 5 +-
> lib/dpctl.c | 43 +++--
> lib/dpctl.man | 24 ++-
> lib/ofp-ct.c | 214 +++++++++++++++++++++++
> tests/system-traffic.at | 102 ++++++++++-
> 10 files changed, 620 insertions(+), 141 deletions(-)
> create mode 100644 include/openvswitch/ofp-ct.h
> create mode 100644 lib/ofp-ct.c
>
> diff --git a/NEWS b/NEWS
> index 4f9291bf1..035fcb0ee 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -33,6 +33,9 @@ Post-v3.0.0
> * Add new experimental PMD load based sleeping feature. PMD threads can
> request to sleep up to a user configured 'pmd-maxsleep' value under
> low load conditions.
> + - ovs-dpctl and 'ovs-appctl dpctl/' commands:
> + * "flush-conntrack" is now capable of handling partial 5-tuple,
> + with additional optional parameter to specify the reply direction.
>
>
> v3.0.0 - 15 Aug 2022
> diff --git a/include/openvswitch/automake.mk b/include/openvswitch/automake.mk
> index 84670d80a..0cc1f569e 100644
> --- a/include/openvswitch/automake.mk
> +++ b/include/openvswitch/automake.mk
> @@ -15,6 +15,7 @@ openvswitchinclude_HEADERS = \
> include/openvswitch/ofp-actions.h \
> include/openvswitch/ofp-bundle.h \
> include/openvswitch/ofp-connection.h \
> + include/openvswitch/ofp-ct.h \
> include/openvswitch/ofp-ed-props.h \
> include/openvswitch/ofp-errors.h \
> include/openvswitch/ofp-flow.h \
> diff --git a/include/openvswitch/ofp-ct.h b/include/openvswitch/ofp-ct.h
> new file mode 100644
> index 000000000..e7af45337
> --- /dev/null
> +++ b/include/openvswitch/ofp-ct.h
> @@ -0,0 +1,70 @@
> +/* Copyright (c) 2022, Red Hat, Inc.
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at:
> + *
> + * http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +#ifndef OVS_OFP_CT_H
> +#define OVS_OFP_CT_H
> +
> +#include <stdbool.h>
> +#include <stdint.h>
> +#include <sys/types.h>
> +#include <netinet/in.h>
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +struct ofp_ct_tuple {
> + struct in6_addr src;
> + struct in6_addr dst;
> +
> + union {
> + ovs_be16 src_port;
> + ovs_be16 icmp_id;
> + };
> + union {
> + ovs_be16 dst_port;
> + struct {
> + uint8_t icmp_code;
> + uint8_t icmp_type;
> + };
> + };
> +};
> +
> +struct ofp_ct_match {
> + uint8_t ip_proto;
> + uint16_t l3_type;
> +
> + struct ofp_ct_tuple tuple_orig;
> + struct ofp_ct_tuple tuple_reply;
> +};
> +
> +bool ofp_ct_tuple_is_zero(const struct ofp_ct_tuple *tuple, uint8_t
> ip_proto);
> +
> +bool ofp_ct_tuple_is_five_tuple(const struct ofp_ct_tuple *tuple,
> + uint8_t ip_proto);
> +
> +void ofp_ct_match_format(struct ds *ds, const struct ofp_ct_match *match);
> +
> +bool ofp_ct_tuple_parse(struct ofp_ct_tuple *tuple, const char *s,
> + struct ds *ds, uint8_t *ip_proto,
> + uint16_t *l3_type);
> +
> +bool ofp_ct_is_match_zero(const struct ofp_ct_match *match);
> +
> +#ifdef __cplusplus
> +}
> +#endif
> +
> +#endif /* include/openvswitch/ofp-ct.h */
> diff --git a/lib/automake.mk b/lib/automake.mk
> index 61bdc308f..e64ee76ce 100644
> --- a/lib/automake.mk
> +++ b/lib/automake.mk
> @@ -226,6 +226,7 @@ lib_libopenvswitch_la_SOURCES = \
> lib/ofp-actions.c \
> lib/ofp-bundle.c \
> lib/ofp-connection.c \
> + lib/ofp-ct.c \
> lib/ofp-ed-props.c \
> lib/ofp-errors.c \
> lib/ofp-flow.c \
> diff --git a/lib/ct-dpif.c b/lib/ct-dpif.c
> index 6f17a26b5..5ae925d19 100644
> --- a/lib/ct-dpif.c
> +++ b/lib/ct-dpif.c
> @@ -20,6 +20,7 @@
> #include <errno.h>
>
> #include "ct-dpif.h"
> +#include "openvswitch/ofp-ct.h"
> #include "openvswitch/ofp-parse.h"
> #include "openvswitch/vlog.h"
>
> @@ -71,6 +72,116 @@ ct_dpif_dump_start(struct dpif *dpif, struct
> ct_dpif_dump_state **dump,
> return err;
> }
>
> +static void
> +ct_dpif_tuple_from_ofputil_ct_tuple(const struct ofp_ct_tuple *ofp_tuple,
> + struct ct_dpif_tuple *tuple,
> + uint16_t l3_type, uint8_t ip_proto)
> +{
> + if (l3_type == AF_INET) {
> + tuple->src.ip = in6_addr_get_mapped_ipv4(&ofp_tuple->src);
> + tuple->dst.ip = in6_addr_get_mapped_ipv4(&ofp_tuple->dst);
> + } else {
> + tuple->src.in6 = ofp_tuple->src;
> + tuple->dst.in6 = ofp_tuple->dst;
> + }
> +
> + tuple->l3_type = l3_type;
> + tuple->ip_proto = ip_proto;
> + tuple->src_port = ofp_tuple->src_port;
> +
> + if (ip_proto == IPPROTO_ICMP || ip_proto == IPPROTO_ICMPV6) {
> + tuple->icmp_code = ofp_tuple->icmp_code;
> + tuple->icmp_type = ofp_tuple->icmp_type;
> + } else {
> + tuple->dst_port = ofp_tuple->dst_port;
> + }
> +}
> +
> +static inline bool
> +ct_dpif_inet_addr_cmp_partial(const union ct_dpif_inet_addr *addr,
> + const struct in6_addr *partial,
> + const uint16_t l3_type)
> +{
> + if (ipv6_is_zero(partial)) {
> + return true;
> + }
> +
> + if (l3_type == AF_INET && in6_addr_get_mapped_ipv4(partial) != addr->ip)
> {
> + return false;
> + }
> +
> + if (l3_type == AF_INET6 && !ipv6_addr_equals(partial, &addr->in6)) {
> + return false;
> + }
> +
> + return true;
> +}
> +
> +static inline bool
> +ct_dpif_tuple_ip_cmp_partial(const struct ct_dpif_tuple *tuple,
> + const struct ofp_ct_tuple *partial,
> + const uint16_t l3_type, const uint8_t ip_proto)
> +{
> + if (!ct_dpif_inet_addr_cmp_partial(&tuple->src, &partial->src, l3_type))
> {
> + return false;
> + }
> +
> + if (!ct_dpif_inet_addr_cmp_partial(&tuple->dst, &partial->dst, l3_type))
> {
> + return false;
> + }
> +
> + if (ip_proto == IPPROTO_ICMP || ip_proto == IPPROTO_ICMPV6) {
> + if (partial->icmp_id != tuple->icmp_id) {
> + return false;
> + }
> +
> + if (partial->icmp_type != tuple->icmp_type) {
> + return false;
> + }
> +
> + if (partial->icmp_code != tuple->icmp_code) {
> + return false;
> + }
> + } else {
> + if (partial->src_port && partial->src_port != tuple->src_port) {
> + return false;
> + }
> +
> + if (partial->dst_port && partial->dst_port != tuple->dst_port) {
> + return false;
> + }
> + }
> +
> + return true;
> +}
> +
> +/* Returns 'true' if all non-zero members of 'match' equal to corresponding
> + * members of 'entry'. */
> +static bool
> +ct_dpif_entry_cmp(const struct ct_dpif_entry *entry,
> + const struct ofp_ct_match *match)
> +{
> + if (match->l3_type && match->l3_type != entry->tuple_orig.l3_type) {
> + return false;
> + }
> +
> + if (match->ip_proto && match->ip_proto != entry->tuple_orig.ip_proto) {
> + return false;
> + }
> +
> + if (!ct_dpif_tuple_ip_cmp_partial(&entry->tuple_orig, &match->tuple_orig,
> + match->l3_type, match->ip_proto)) {
> + return false;
> + }
> +
> + if (!ct_dpif_tuple_ip_cmp_partial(&entry->tuple_reply,
> &match->tuple_reply,
> + match->l3_type, match->ip_proto)) {
> + return false;
> + }
> +
> + return true;
> +}
> +
> /* Dump one connection from a tracker, and put it in 'entry'.
> *
> * 'dump' should have been initialized by ct_dpif_dump_start().
> @@ -101,24 +212,81 @@ ct_dpif_dump_done(struct ct_dpif_dump_state *dump)
> : EOPNOTSUPP);
> }
>
> +
> +static int
> +ct_dpif_flush_tuple(struct dpif *dpif, const uint16_t *zone,
> + const struct ofp_ct_match *match)
> +{
> + struct ct_dpif_dump_state *dump;
> + struct ct_dpif_entry cte;
> + int error;
> + int tot_bkts;
> +
> + if (!dpif->dpif_class->ct_flush) {
> + return EOPNOTSUPP;
> + }
> +
> + if (VLOG_IS_DBG_ENABLED()) {
> + struct ds ds = DS_EMPTY_INITIALIZER;
> + ofp_ct_match_format(&ds, match);
> + VLOG_DBG("%s: ct_flush: zone=%d %s", dpif_name(dpif), zone ? *zone :
> 0,
> + ds_cstr(&ds));
> + ds_destroy(&ds);
> + }
> +
> + /* If we have full five tuple in original and empty reply tuple just
> + * do the flush over original tuple directly. */
> + if (ofp_ct_tuple_is_five_tuple(&match->tuple_orig, match->ip_proto) &&
> + ofp_ct_tuple_is_zero(&match->tuple_reply, match->ip_proto)) {
> + struct ct_dpif_tuple tuple;
> +
> + ct_dpif_tuple_from_ofputil_ct_tuple(&match->tuple_orig, &tuple,
> + match->l3_type, match->ip_proto);
> + return dpif->dpif_class->ct_flush(dpif, zone, &tuple);
> + }
> +
> + error = ct_dpif_dump_start(dpif, &dump, zone, &tot_bkts);
> + if (error) {
> + return error;
> + }
> +
> + while (!(error = ct_dpif_dump_next(dump, &cte))) {
> + if (zone && *zone != cte.zone) {
> + continue;
> + }
> +
> + if (ct_dpif_entry_cmp(&cte, match)) {
> + error = dpif->dpif_class->ct_flush(dpif, &cte.zone,
> + &cte.tuple_orig);
> + if (error) {
> + break;
> + }
> + }
> + }
> + if (error == EOF) {
> + error = 0;
> + }
> +
> + ct_dpif_dump_done(dump);
> + return error;
> +}
> +
> /* Flush the entries in the connection tracker used by 'dpif'. The
> * arguments have the following behavior:
> *
> - * - If both 'zone' and 'tuple' are NULL, flush all the conntrack entries.
> - * - If 'zone' is not NULL, and 'tuple' is NULL, flush all the conntrack
> + * - If both 'zone' is NULL and 'match' is NULL or zero,
> + * flush all the conntrack entries.
> + * - If 'zone' is not NULL, and 'match' is NULL, flush all the conntrack
> * entries in '*zone'.
> - * - If 'tuple' is not NULL, flush the conntrack entry specified by 'tuple'
> - * in '*zone'. If 'zone' is NULL, use the default zone (zone 0). */
> + * - If 'match' is not NULL or zero, flush the conntrack entry specified
> + * by 'match' in '*zone'. If 'zone' is NULL, use the default
> + * zone (zone 0). */
> int
> ct_dpif_flush(struct dpif *dpif, const uint16_t *zone,
> - const struct ct_dpif_tuple *tuple)
> + const struct ofp_ct_match *match)
> {
> - if (tuple) {
> - struct ds ds = DS_EMPTY_INITIALIZER;
> - ct_dpif_format_tuple(&ds, tuple);
> - VLOG_DBG("%s: ct_flush: %s in zone %d", dpif_name(dpif),
> ds_cstr(&ds),
> - zone ? *zone : 0);
> - ds_destroy(&ds);
> + if (match && !ofp_ct_is_match_zero(match)) {
> + return ct_dpif_flush_tuple(dpif, zone, match);
> } else if (zone) {
> VLOG_DBG("%s: ct_flush: zone %"PRIu16, dpif_name(dpif), *zone);
> } else {
> @@ -126,7 +294,7 @@ ct_dpif_flush(struct dpif *dpif, const uint16_t *zone,
> }
>
> return (dpif->dpif_class->ct_flush
> - ? dpif->dpif_class->ct_flush(dpif, zone, tuple)
> + ? dpif->dpif_class->ct_flush(dpif, zone, NULL)
> : EOPNOTSUPP);
> }
>
> @@ -583,112 +751,6 @@ ct_dpif_format_tcp_stat(struct ds * ds, int tcp_state,
> int conn_per_state)
> ds_put_format(ds, "=%u", conn_per_state);
> }
>
> -/* Parses a specification of a conntrack 5-tuple from 's' into 'tuple'.
> - * Returns true on success. Otherwise, returns false and puts the error
> - * message in 'ds'. */
> -bool
> -ct_dpif_parse_tuple(struct ct_dpif_tuple *tuple, const char *s, struct ds
> *ds)
> -{
> - char *pos, *key, *value, *copy;
> - memset(tuple, 0, sizeof *tuple);
> -
> - pos = copy = xstrdup(s);
> - while (ofputil_parse_key_value(&pos, &key, &value)) {
> - if (!*value) {
> - ds_put_format(ds, "field %s missing value", key);
> - goto error;
> - }
> -
> - if (!strcmp(key, "ct_nw_src") || !strcmp(key, "ct_nw_dst")) {
> - if (tuple->l3_type && tuple->l3_type != AF_INET) {
> - ds_put_cstr(ds, "L3 type set multiple times");
> - goto error;
> - } else {
> - tuple->l3_type = AF_INET;
> - }
> - if (!ip_parse(value, key[6] == 's' ? &tuple->src.ip :
> - &tuple->dst.ip)) {
> - goto error_with_msg;
> - }
> - } else if (!strcmp(key, "ct_ipv6_src") ||
> - !strcmp(key, "ct_ipv6_dst")) {
> - if (tuple->l3_type && tuple->l3_type != AF_INET6) {
> - ds_put_cstr(ds, "L3 type set multiple times");
> - goto error;
> - } else {
> - tuple->l3_type = AF_INET6;
> - }
> - if (!ipv6_parse(value, key[8] == 's' ? &tuple->src.in6 :
> - &tuple->dst.in6)) {
> - goto error_with_msg;
> - }
> - } else if (!strcmp(key, "ct_nw_proto")) {
> - char *err = str_to_u8(value, key, &tuple->ip_proto);
> - if (err) {
> - free(err);
> - goto error_with_msg;
> - }
> - } else if (!strcmp(key, "ct_tp_src") || !strcmp(key,"ct_tp_dst")) {
> - uint16_t port;
> - char *err = str_to_u16(value, key, &port);
> - if (err) {
> - free(err);
> - goto error_with_msg;
> - }
> - if (key[6] == 's') {
> - tuple->src_port = htons(port);
> - } else {
> - tuple->dst_port = htons(port);
> - }
> - } else if (!strcmp(key, "icmp_type") || !strcmp(key, "icmp_code") ||
> - !strcmp(key, "icmp_id") ) {
> - if (tuple->ip_proto != IPPROTO_ICMP &&
> - tuple->ip_proto != IPPROTO_ICMPV6) {
> - ds_put_cstr(ds, "invalid L4 fields");
> - goto error;
> - }
> - uint16_t icmp_id;
> - char *err;
> - if (key[5] == 't') {
> - err = str_to_u8(value, key, &tuple->icmp_type);
> - } else if (key[5] == 'c') {
> - err = str_to_u8(value, key, &tuple->icmp_code);
> - } else {
> - err = str_to_u16(value, key, &icmp_id);
> - tuple->icmp_id = htons(icmp_id);
> - }
> - if (err) {
> - free(err);
> - goto error_with_msg;
> - }
> - } else {
> - ds_put_format(ds, "invalid conntrack tuple field: %s", key);
> - goto error;
> - }
> - }
> -
> - if (ipv6_is_zero(&tuple->src.in6) || ipv6_is_zero(&tuple->dst.in6) ||
> - !tuple->ip_proto) {
> - /* icmp_type, icmp_code, and icmp_id can be 0. */
> - if (tuple->ip_proto != IPPROTO_ICMP &&
> - tuple->ip_proto != IPPROTO_ICMPV6) {
> - if (!tuple->src_port || !tuple->dst_port) {
> - ds_put_cstr(ds, "at least one of the conntrack 5-tuple
> fields "
> - "is missing.");
> - goto error;
> - }
> - }
> - }
> -
> - free(copy);
> - return true;
> -
> -error_with_msg:
> - ds_put_format(ds, "failed to parse field %s", key);
> -error:
> - free(copy);
> - return false;
> -}
>
> void
> ct_dpif_push_zone_limit(struct ovs_list *zone_limits, uint16_t zone,
> diff --git a/lib/ct-dpif.h b/lib/ct-dpif.h
> index 2848549b0..5edbbfd3b 100644
> --- a/lib/ct-dpif.h
> +++ b/lib/ct-dpif.h
> @@ -20,6 +20,8 @@
> #include "openvswitch/types.h"
> #include "packets.h"
>
> +struct ofp_ct_match;
> +
> union ct_dpif_inet_addr {
> ovs_be32 ip;
> ovs_be32 ip6[4];
> @@ -285,7 +287,7 @@ int ct_dpif_dump_start(struct dpif *, struct
> ct_dpif_dump_state **,
> int ct_dpif_dump_next(struct ct_dpif_dump_state *, struct ct_dpif_entry *);
> int ct_dpif_dump_done(struct ct_dpif_dump_state *);
> int ct_dpif_flush(struct dpif *, const uint16_t *zone,
> - const struct ct_dpif_tuple *);
> + const struct ofp_ct_match *);
> int ct_dpif_set_maxconns(struct dpif *dpif, uint32_t maxconns);
> int ct_dpif_get_maxconns(struct dpif *dpif, uint32_t *maxconns);
> int ct_dpif_get_nconns(struct dpif *dpif, uint32_t *nconns);
> @@ -311,7 +313,6 @@ void ct_dpif_format_ipproto(struct ds *ds, uint16_t
> ipproto);
> void ct_dpif_format_tuple(struct ds *, const struct ct_dpif_tuple *);
> uint8_t ct_dpif_coalesce_tcp_state(uint8_t state);
> void ct_dpif_format_tcp_stat(struct ds *, int, int);
> -bool ct_dpif_parse_tuple(struct ct_dpif_tuple *, const char *s, struct ds *);
> void ct_dpif_push_zone_limit(struct ovs_list *, uint16_t zone, uint32_t
> limit,
> uint32_t count);
> void ct_dpif_free_zone_limits(struct ovs_list *);
> diff --git a/lib/dpctl.c b/lib/dpctl.c
> index 29041fa3e..e233a09c6 100644
> --- a/lib/dpctl.c
> +++ b/lib/dpctl.c
> @@ -41,6 +41,7 @@
> #include "netlink.h"
> #include "odp-util.h"
> #include "openvswitch/ofpbuf.h"
> +#include "openvswitch/ofp-ct.h"
> #include "packets.h"
> #include "openvswitch/shash.h"
> #include "simap.h"
> @@ -1707,37 +1708,55 @@ dpctl_flush_conntrack(int argc, const char *argv[],
> struct dpctl_params *dpctl_p)
> {
> struct dpif *dpif = NULL;
> - struct ct_dpif_tuple tuple, *ptuple = NULL;
> + struct ofp_ct_match match = {0};
> struct ds ds = DS_EMPTY_INITIALIZER;
> uint16_t zone, *pzone = NULL;
> int error;
> int args = argc - 1;
>
> - /* Parse ct tuple */
> - if (args && ct_dpif_parse_tuple(&tuple, argv[args], &ds)) {
> - ptuple = &tuple;
> + /* Parse zone. */
> + if (args && !strncmp(argv[1], "zone=", 5)) {
> + if (!ovs_scan(argv[1], "zone=%"SCNu16, &zone)) {
> + ds_put_cstr(&ds, "failed to parse zone");
> + error = EINVAL;
> + goto error;
> + }
> + pzone = &zone;
> args--;
> }
>
> - /* Parse zone */
> - if (args && ovs_scan(argv[args], "zone=%"SCNu16, &zone)) {
> - pzone = &zone;
> + /* Parse ct tuples. */
> + for (int i = 0; i < 2; i++) {
> + if (!args) {
> + break;
> + }
> +
> + struct ofp_ct_tuple *tuple =
> + i ? &match.tuple_reply : &match.tuple_orig;
> + const char *arg = argv[argc - args];
> +
> + if (arg[0] && !ofp_ct_tuple_parse(tuple, arg, &ds, &match.ip_proto,
> + &match.l3_type)) {
> + error = EINVAL;
> + goto error;
> + }
> args--;
> }
Hi,
There is a problem with the change here that you cannot specify now dp.
# ovs-appctl dpctl/flush-conntrack system@ovs-system
ovs-dpctl: field system@ovs-system missing value (Invalid argument)
As I understand it the old logic was parsing from the end and if we left
with 1 arg it's considered the dp.
The new logic starts from first argument and fails on first failure
so we actually never reach the following check if args > 1.
A quick fix I tried is to goto error only if args > 1.
but this leaves an opening that specifying wrong key fails on the dp arg.
# ovs-dpctl flush-conntrack system@ovs-system key=val
ovs-dpctl: field system@ovs-system missing value (Invalid argument)
I ended up with parsing backwards and checking if the error is on last
arg or not.
I'll send a fixup for review.
Thanks,
Roi
>
> - /* Report error if there are more than one unparsed argument. */
> + /* Report error if there is more than one unparsed argument. */
> if (args > 1) {
> ds_put_cstr(&ds, "invalid arguments");
> error = EINVAL;
> goto error;
> }
>
> - error = opt_dpif_open(argc, argv, dpctl_p, 4, &dpif);
> + error = opt_dpif_open(argc, argv, dpctl_p, 5, &dpif);
> if (error) {
> + dpctl_error(dpctl_p, error, "Cannot open dpif");
> return error;
> }
>
> - error = ct_dpif_flush(dpif, pzone, ptuple);
> + error = ct_dpif_flush(dpif, pzone, &match);
> if (!error) {
> dpif_close(dpif);
> return 0;
> @@ -2862,8 +2881,8 @@ static const struct dpctl_command all_commands[] = {
> 0, 1, dpctl_offload_stats_show, DP_RO },
> { "dump-conntrack", "[-m] [-s] [dp] [zone=N]",
> 0, 4, dpctl_dump_conntrack, DP_RO },
> - { "flush-conntrack", "[dp] [zone=N] [ct-tuple]", 0, 3,
> - dpctl_flush_conntrack, DP_RW },
> + { "flush-conntrack", "[dp] [zone=N] [ct-origin-tuple] [ct-reply-tuple]",
> + 0, 4, dpctl_flush_conntrack, DP_RW },
> { "cache-get-size", "[dp]", 0, 1, dpctl_cache_get_size, DP_RO },
> { "cache-set-size", "dp cache <size>", 3, 3, dpctl_cache_set_size, DP_RW
> },
> { "ct-stats-show", "[dp] [zone=N]",
> diff --git a/lib/dpctl.man b/lib/dpctl.man
> index 87ea8087b..5a0f9f4e3 100644
> --- a/lib/dpctl.man
> +++ b/lib/dpctl.man
> @@ -302,22 +302,30 @@ are included. With \fB\-\-statistics\fR timeouts and
> timestamps are
> added to the output.
> .
> .TP
> -\*(DX\fBflush\-conntrack\fR [\fIdp\fR] [\fBzone=\fIzone\fR] [\fIct-tuple\fR]
> +\*(DX\fBflush\-conntrack\fR [\fIdp\fR] [\fBzone=\fIzone\fR]
> [\fIct-origin-tuple\fR [\fIct-reply-tuple\fR]]
> Flushes the connection entries in the tracker used by \fIdp\fR based on
> -\fIzone\fR and connection tracking tuple \fIct-tuple\fR.
> +\fIzone\fR and connection tracking tuple \fIct-origin-tuple\fR.
> If \fIct-tuple\fR is not provided, flushes all the connection entries.
> If \fBzone\fR=\fIzone\fR is specified, only flushes the connections in
> \fIzone\fR.
> .IP
> -If \fIct-tuple\fR is provided, flushes the connection entry specified by
> -\fIct-tuple\fR in \fIzone\fR. The zone defaults to 0 if it is not provided.
> -The userspace connection tracker requires flushing with the original
> pre-NATed
> -tuple and a warning log will be otherwise generated.
> -An example of an IPv4 ICMP \fIct-tuple\fR:
> +If \fIct-[origin|reply]-tuple\fR is provided, flushes the connection entry
> +specified by \fIct-[origin|reply]-tuple\fR in \fIzone\fR. The zone defaults
> +to 0 if it is not provided. The userspace connection tracker requires
> flushing
> +with the original pre-NATed tuple and a warning log will be otherwise
> +generated. The tuple can be partial and will remove all connections that are
> +matching on the specified fields. In order to specify only
> +\fIct-reply-tuple\fR provide empty string as \fIct-origin-tuple\fR.
> +.IP
> +Note: Currently there is limitation for matching on ICMP, in order to
> partially
> +match on ICMP parameters the \fIct-[origin|reply]-tuple\fR has to include
> +either source or destination IP.
> +.IP
> +An example of an IPv4 ICMP \fIct-[origin|reply]-tuple\fR:
> .IP
>
> "ct_nw_src=10.1.1.1,ct_nw_dst=10.1.1.2,ct_nw_proto=1,icmp_type=8,icmp_code=0,icmp_id=10"
> .IP
> -An example of an IPv6 TCP \fIct-tuple\fR:
> +An example of an IPv6 TCP \fIct-[origin|reply]-tuple\fR:
> .IP
>
> "ct_ipv6_src=fc00::1,ct_ipv6_dst=fc00::2,ct_nw_proto=6,ct_tp_src=1,ct_tp_dst=2"
> .
> diff --git a/lib/ofp-ct.c b/lib/ofp-ct.c
> new file mode 100644
> index 000000000..61f0be953
> --- /dev/null
> +++ b/lib/ofp-ct.c
> @@ -0,0 +1,214 @@
> +
> +/* Copyright (c) 2022, Red Hat, Inc.
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at:
> + *
> + * http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +#include <config.h>
> +#include <stdbool.h>
> +#include <stdint.h>
> +#include <sys/types.h>
> +#include <netinet/in.h>
> +#include <netinet/icmp6.h>
> +
> +#include "ct-dpif.h"
> +#include "openvswitch/ofp-ct.h"
> +#include "openvswitch/dynamic-string.h"
> +#include "openvswitch/ofp-parse.h"
> +#include "openvswitch/ofp-util.h"
> +#include "openvswitch/packets.h"
> +
> +static void
> +ofputil_ct_tuple_format(struct ds *ds, const struct ofp_ct_tuple *tuple,
> + uint8_t ip_proto, uint16_t l3_type)
> +{
> + ds_put_cstr(ds, l3_type == AF_INET ? "ct_nw_src=": "ct_ipv6_src=");
> + ipv6_format_mapped(&tuple->src, ds);
> + ds_put_cstr(ds, l3_type == AF_INET ? ",ct_nw_dst=": ",ct_ipv6_dst=");
> + ipv6_format_mapped(&tuple->dst, ds);
> + if (ip_proto == IPPROTO_ICMP || ip_proto == IPPROTO_ICMPV6) {
> + ds_put_format(ds, ",icmp_id=%u,icmp_type=%u,icmp_code=%u",
> + ntohs(tuple->icmp_id), tuple->icmp_type,
> + tuple->icmp_code);
> +
> + } else {
> + ds_put_format(ds, ",ct_tp_src=%u,ct_tp_dst=%u",
> ntohs(tuple->src_port),
> + ntohs(tuple->dst_port));
> + }
> +}
> +
> +bool
> +ofp_ct_tuple_is_zero(const struct ofp_ct_tuple *tuple, uint8_t ip_proto)
> +{
> + bool is_zero = ipv6_is_zero(&tuple->src) && ipv6_is_zero(&tuple->dst);
> +
> + if (!(ip_proto == IPPROTO_ICMP || ip_proto == IPPROTO_ICMPV6)) {
> + is_zero = is_zero && !tuple->src_port && !tuple->dst_port;
> + }
> +
> + return is_zero;
> +}
> +
> +bool
> +ofp_ct_tuple_is_five_tuple(const struct ofp_ct_tuple *tuple, uint8_t
> ip_proto)
> +{
> + /* First check if we have address. */
> + bool five_tuple = !ipv6_is_zero(&tuple->src) &&
> !ipv6_is_zero(&tuple->dst);
> +
> + if (!(ip_proto == IPPROTO_ICMP || ip_proto == IPPROTO_ICMPV6)) {
> + five_tuple = five_tuple && tuple->src_port && tuple->dst_port;
> + }
> +
> + return five_tuple;
> +}
> +
> +bool
> +ofp_ct_is_match_zero(const struct ofp_ct_match *match)
> +{
> + return !match->ip_proto && !match->l3_type &&
> + ofp_ct_tuple_is_zero(&match->tuple_orig, match->ip_proto) &&
> + ofp_ct_tuple_is_zero(&match->tuple_reply, match->ip_proto);
> +}
> +
> +void
> +ofp_ct_match_format(struct ds *ds, const struct ofp_ct_match *match)
> +{
> + ds_put_cstr(ds, "'");
> + ofputil_ct_tuple_format(ds, &match->tuple_orig, match->ip_proto,
> + match->l3_type);
> + ds_put_format(ds, ",ct_nw_proto=%u' '", match->ip_proto);
> + ofputil_ct_tuple_format(ds, &match->tuple_reply, match->ip_proto,
> + match->l3_type);
> + ds_put_cstr(ds, "'");
> +}
> +
> +/* Parses a specification of a conntrack 5-tuple from 's' into 'tuple'.
> + * Returns true on success. Otherwise, returns false and puts the error
> + * message in 'ds'. */
> +bool
> +ofp_ct_tuple_parse(struct ofp_ct_tuple *tuple, const char *s,
> + struct ds *ds, uint8_t *ip_proto, uint16_t *l3_type)
> +{
> + char *pos, *key, *value, *copy;
> +
> + pos = copy = xstrdup(s);
> + while (ofputil_parse_key_value(&pos, &key, &value)) {
> + if (!*value) {
> + ds_put_format(ds, "field %s missing value", key);
> + goto error;
> + }
> +
> + if (!strcmp(key, "ct_nw_src") || !strcmp(key, "ct_nw_dst")) {
> + struct in6_addr *addr = key[6] == 's' ? &tuple->src :
> &tuple->dst;
> +
> + if (*l3_type && *l3_type != AF_INET) {
> + ds_put_format(ds ,"the L3 protocol does not match %s",
> value);
> + goto error;
> + }
> +
> + if (!ipv6_is_zero(addr)) {
> + ds_put_format(ds, "%s is set multiple times", key);
> + goto error;
> + }
> +
> + ovs_be32 ip = 0;
> + if (!ip_parse(value, &ip)) {
> + goto error_with_msg;
> + }
> +
> + *l3_type = AF_INET;
> + *addr = in6_addr_mapped_ipv4(ip);
> + } else if (!strcmp(key, "ct_ipv6_src") ||
> + !strcmp(key, "ct_ipv6_dst")) {
> + struct in6_addr *addr = key[8] == 's' ? &tuple->src :
> &tuple->dst;
> +
> + if (*l3_type && *l3_type != AF_INET6) {
> + ds_put_format(ds, "the L3 protocol does not match %s",
> value);
> + goto error;
> + }
> +
> + if (!ipv6_is_zero(addr)) {
> + ds_put_format(ds, "%s is set multiple times", key);
> + goto error;
> + }
> +
> +
> + if (!ipv6_parse(value, addr)) {
> + goto error_with_msg;
> + }
> +
> + *l3_type = AF_INET6;
> + } else if (!strcmp(key, "ct_nw_proto")) {
> + if (*ip_proto) {
> + ds_put_format(ds, "%s is set multiple times", key);
> + }
> + char *err = str_to_u8(value, key, ip_proto);
> +
> + if (err) {
> + free(err);
> + goto error_with_msg;
> + }
> + } else if (!strcmp(key, "ct_tp_src") || !strcmp(key, "ct_tp_dst")) {
> + uint16_t port;
> + char *err = str_to_u16(value, key, &port);
> +
> + if (err) {
> + free(err);
> + goto error_with_msg;
> + }
> + if (key[6] == 's') {
> + tuple->src_port = htons(port);
> + } else {
> + tuple->dst_port = htons(port);
> + }
> + } else if (!strcmp(key, "icmp_type") || !strcmp(key, "icmp_code") ||
> + !strcmp(key, "icmp_id")) {
> + if (*ip_proto != IPPROTO_ICMP && *ip_proto != IPPROTO_ICMPV6) {
> + ds_put_cstr(ds, "invalid L4 fields");
> + goto error;
> + }
> + uint16_t icmp_id;
> + char *err;
> +
> + if (key[5] == 't') {
> + err = str_to_u8(value, key, &tuple->icmp_type);
> + } else if (key[5] == 'c') {
> + err = str_to_u8(value, key, &tuple->icmp_code);
> + } else {
> + err = str_to_u16(value, key, &icmp_id);
> + tuple->icmp_id = htons(icmp_id);
> + }
> + if (err) {
> + free(err);
> + goto error_with_msg;
> + }
> + } else {
> + ds_put_format(ds, "invalid conntrack tuple field: %s", key);
> + goto error;
> + }
> + }
> +
> + if (!*ip_proto && (tuple->src_port || tuple->dst_port)) {
> + ds_put_cstr(ds, "port is set without protocol");
> + goto error;
> + }
> +
> + free(copy);
> + return true;
> +
> +error_with_msg:
> + ds_put_format(ds, "failed to parse field %s", key);
> +error:
> + free(copy);
> + return false;
> +}
> diff --git a/tests/system-traffic.at b/tests/system-traffic.at
> index 08c78ff57..e7ec1d96b 100644
> --- a/tests/system-traffic.at
> +++ b/tests/system-traffic.at
> @@ -2278,7 +2278,7 @@
> udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),reply=(src=10.1.1.2,dst=10.
> OVS_TRAFFIC_VSWITCHD_STOP
> AT_CLEANUP
>
> -AT_SETUP([conntrack - ct flush by 5-tuple])
> +AT_SETUP([conntrack - ct flush])
> CHECK_CONNTRACK()
> OVS_TRAFFIC_VSWITCHD_START()
>
> @@ -2339,6 +2339,106 @@ AT_CHECK([ovs-appctl dpctl/flush-conntrack zone=5
> $ICMP_TUPLE])
> AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "orig=.src=10\.1\.1\.2,"],
> [1], [dnl
> ])
>
> +dnl Test UDP from port 1 and 2, partial flush by src port
> +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1
> packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000
> actions=resubmit(,0)"])
> +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2
> packet=50540000000a50540000000908004500001c000000000011a4cd0a0101020a0101010002000100080000
> actions=resubmit(,0)"])
> +
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1" | sort], [0],
> [dnl
> +udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),reply=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1)
> +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5
> +])
> +
> +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_proto=17,ct_tp_src=1'])
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0], [dnl
> +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5
> +])
> +
> +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_proto=17,ct_tp_src=2'])
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [1])
> +
> +dnl Test UDP from port 1 and 2, partial flush by dst port
> +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1
> packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000
> actions=resubmit(,0)"])
> +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2
> packet=50540000000a50540000000908004500001c000000000011a4cd0a0101020a0101010002000100080000
> actions=resubmit(,0)"])
> +
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1" | sort], [0],
> [dnl
> +udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),reply=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1)
> +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5
> +])
> +
> +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_proto=17,ct_tp_dst=2'])
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0], [dnl
> +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5
> +])
> +
> +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_proto=17,ct_tp_dst=1'])
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [1])
> +
> +dnl Test UDP from port 1 and 2, partial flush by src address
> +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1
> packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000
> actions=resubmit(,0)"])
> +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2
> packet=50540000000a50540000000908004500001c000000000011a4cd0a0101020a0101010002000100080000
> actions=resubmit(,0)"])
> +
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1" | sort], [0],
> [dnl
> +udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),reply=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1)
> +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5
> +])
> +
> +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_src=10.1.1.1'])
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0], [dnl
> +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5
> +])
> +
> +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_src=10.1.1.2'])
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [1])
> +
> +dnl Test UDP from port 1 and 2, partial flush by dst address
> +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1
> packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000
> actions=resubmit(,0)"])
> +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2
> packet=50540000000a50540000000908004500001c000000000011a4cd0a0101020a0101010002000100080000
> actions=resubmit(,0)"])
> +
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1" | sort], [0],
> [dnl
> +udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),reply=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1)
> +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5
> +])
> +
> +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_dst=10.1.1.2'])
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0], [dnl
> +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5
> +])
> +
> +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_dst=10.1.1.1'])
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [1])
> +
> +dnl Test UDP from port 1 and 2, partial flush by src address in reply
> direction
> +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1
> packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000
> actions=resubmit(,0)"])
> +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2
> packet=50540000000a50540000000908004500001c000000000011a4cd0a0101020a0101010002000100080000
> actions=resubmit(,0)"])
> +
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1" | sort], [0],
> [dnl
> +udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),reply=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1)
> +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5
> +])
> +
> +AT_CHECK([ovs-appctl dpctl/flush-conntrack '' 'ct_nw_src=10.1.1.2'])
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0], [dnl
> +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5
> +])
> +
> +AT_CHECK([ovs-appctl dpctl/flush-conntrack zone=5 '' 'ct_nw_src=10.1.1.1'])
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [1])
> +
> OVS_TRAFFIC_VSWITCHD_STOP
> AT_CLEANUP
>
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev