Thanks for the patch few more comments
On Mon, Aug 12, 2019 at 5:57 PM Yi-Hung Wei <yihung....@gmail.com> wrote: > This patch derives the timeout policy based on ct zone from the > internal data structure that we maintain on dpif layer. > > It also adds a system traffic test to verify the zone-based conntrack > timeout feature. The test uses ovs-vsctl commands to configure > the customized ICMP and UDP timeout on zone 5 to a shorter period. > It then injects ICMP and UDP traffic to conntrack, and checks if the > corresponding conntrack entry expires after the predefined timeout. > > Signed-off-by: Yi-Hung Wei <yihung....@gmail.com> > --- > NEWS | 1 + > lib/ct-dpif.c | 11 +++++++ > lib/ct-dpif.h | 3 ++ > lib/dpif-netdev.c | 1 + > lib/dpif-netlink.c | 12 ++++++++ > lib/dpif-provider.h | 10 ++++++ > ofproto/ofproto-dpif-xlate.c | 23 ++++++++++++++ > ofproto/ofproto-dpif.c | 27 ++++++++++++++++ > ofproto/ofproto-dpif.h | 4 +++ > tests/system-kmod-macros.at | 27 ++++++++++++++++ > tests/system-traffic.at | 66 > ++++++++++++++++++++++++++++++++++++++++ > tests/system-userspace-macros.at | 26 ++++++++++++++++ > 12 files changed, 211 insertions(+) > > diff --git a/NEWS b/NEWS > index c5caa13d6374..9f7fbb852e08 100644 > --- a/NEWS > +++ b/NEWS > @@ -69,6 +69,7 @@ v2.12.0 - xx xxx xxxx > - Linux datapath: > * Support for the kernel versions 4.19.x and 4.20.x. > * Support for the kernel version 5.0.x. > + * Add support for conntrack zone-based timeout policy. > - 'ovs-dpctl dump-flows' is no longer suitable for dumping offloaded > flows. > 'ovs-appctl dpctl/dump-flows' should be used instead. > - Add L2 GRE tunnel over IPv6 support. > diff --git a/lib/ct-dpif.c b/lib/ct-dpif.c > index 7f9ce0a561f7..f3bd71b5769d 100644 > --- a/lib/ct-dpif.c > +++ b/lib/ct-dpif.c > @@ -864,3 +864,14 @@ ct_dpif_timeout_policy_dump_done(struct dpif *dpif, > void *state) > ? dpif->dpif_class->ct_timeout_policy_dump_done(dpif, state) > : EOPNOTSUPP); > } > + > +int > +ct_dpif_get_timeout_policy_name(struct dpif *dpif, uint32_t tp_id, > + uint16_t dl_type, uint8_t nw_proto, > + struct ds *tp_name, bool *unwildcard) > +{ > + return (dpif->dpif_class->ct_get_timeout_policy_name > + ? dpif->dpif_class->ct_get_timeout_policy_name( > + dpif, tp_id, dl_type, nw_proto, tp_name, unwildcard) > + : EOPNOTSUPP); > +} > diff --git a/lib/ct-dpif.h b/lib/ct-dpif.h > index aabd6962f2c0..786dc6d2c474 100644 > --- a/lib/ct-dpif.h > +++ b/lib/ct-dpif.h > @@ -318,5 +318,8 @@ int ct_dpif_timeout_policy_dump_start(struct dpif > *dpif, void **statep); > int ct_dpif_timeout_policy_dump_next(struct dpif *dpif, void *state, > struct ct_dpif_timeout_policy *tp); > int ct_dpif_timeout_policy_dump_done(struct dpif *dpif, void *state); > +int ct_dpif_get_timeout_policy_name(struct dpif *dpif, uint32_t tp_id, > + uint16_t dl_type, uint8_t nw_proto, > + struct ds *tp_name, bool *unwildcard); > > #endif /* CT_DPIF_H */ > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c > index 7240a3e6f3c8..36637052e598 100644 > --- a/lib/dpif-netdev.c > +++ b/lib/dpif-netdev.c > @@ -7539,6 +7539,7 @@ const struct dpif_class dpif_netdev_class = { > NULL, /* ct_timeout_policy_dump_start */ > NULL, /* ct_timeout_policy_dump_next */ > NULL, /* ct_timeout_policy_dump_done */ > + NULL, /* ct_get_timeout_policy_name */ > dpif_netdev_ipf_set_enabled, > dpif_netdev_ipf_set_min_frag, > dpif_netdev_ipf_set_max_nfrags, > diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c > index c2ac19dff887..c306242984ae 100644 > --- a/lib/dpif-netlink.c > +++ b/lib/dpif-netlink.c > @@ -3072,6 +3072,17 @@ dpif_netlink_format_tp_name(uint32_t id, uint16_t > l3num, uint8_t l4num, > ovs_assert(tp_name->length < CTNL_TIMEOUT_NAME_MAX); > } > > +static int > +dpif_netlink_ct_get_timeout_policy_name(struct dpif *dpif OVS_UNUSED, > + uint32_t tp_id, uint16_t dl_type, uint8_t nw_proto, struct ds > *tp_name, > + bool *unwildcard) > +{ > + dpif_netlink_format_tp_name(tp_id, > + dl_type == ETH_TYPE_IP ? AF_INET : AF_INET6, nw_proto, tp_name); > + *unwildcard = true; > + return 0; > +} > + > #define CT_DPIF_NL_TP_TCP_MAPPINGS \ > CT_DPIF_NL_TP_MAPPING(TCP, TCP, SYN_SENT, SYN_SENT) \ > CT_DPIF_NL_TP_MAPPING(TCP, TCP, SYN_RECV, SYN_RECV) \ > @@ -3898,6 +3909,7 @@ const struct dpif_class dpif_netlink_class = { > dpif_netlink_ct_timeout_policy_dump_start, > dpif_netlink_ct_timeout_policy_dump_next, > dpif_netlink_ct_timeout_policy_dump_done, > + dpif_netlink_ct_get_timeout_policy_name, > NULL, /* ipf_set_enabled */ > NULL, /* ipf_set_min_frag */ > NULL, /* ipf_set_max_nfrags */ > diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h > index e988626ea05b..d12b5a91c2eb 100644 > --- a/lib/dpif-provider.h > +++ b/lib/dpif-provider.h > @@ -542,6 +542,16 @@ struct dpif_class { > struct ct_dpif_timeout_policy *tp); > int (*ct_timeout_policy_dump_done)(struct dpif *, void *state); > > + /* Gets timeout policy name based on 'tp_id', 'dl_type' and > 'nw_proto'. > + * On success, returns 0, stores the timeout policy name in 'tp_name', > + * and sets 'unwildcard'. 'unwildcard' is true if the timeout > + * policy in 'dpif' is 'dl_type' and 'nw_proto' specific, .i.e. in > + * kernel datapath. Sets 'unwildcard' to false if the timeout policy > + * is generic to all supported 'dl_type' and 'nw_proto'. */ > + int (*ct_get_timeout_policy_name)(struct dpif *, uint32_t tp_id, > + uint16_t dl_type, uint8_t nw_proto, > + struct ds *tp_name, bool > *unwildcard); > I think adding the 'unwildcard' parameter to this particular API is not intuitive; I would create a simple dedicated API for it. > + > /* IP Fragmentation. */ > > /* Disables or enables conntrack fragment reassembly. The default > diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c > index 28a7fdd842a6..0b5c56f443e6 100644 > --- a/ofproto/ofproto-dpif-xlate.c > +++ b/ofproto/ofproto-dpif-xlate.c > @@ -5977,6 +5977,25 @@ put_ct_helper(struct xlate_ctx *ctx, > } > > static void > +put_ct_timeout(struct ofpbuf *odp_actions, const struct dpif_backer > *backer, > + const struct flow *flow, struct flow_wildcards *wc, > + uint16_t zone_id) > +{ > + struct ds tp_name = DS_EMPTY_INITIALIZER; > + bool unwildcard; > + > + if (ofproto_dpif_ct_zone_timeout_policy_get_name(backer, zone_id, > + ntohs(flow->dl_type), flow->nw_proto, &tp_name, &unwildcard)) > { > + nl_msg_put_string(odp_actions, OVS_CT_ATTR_TIMEOUT, > ds_cstr(&tp_name)); > + > + if (unwildcard) { > + memset(&wc->masks.nw_proto, 0xff, sizeof > wc->masks.nw_proto); > + } > + } > + ds_destroy(&tp_name); > +} > + > +static void > put_ct_nat(struct xlate_ctx *ctx) > { > struct ofpact_nat *ofn = ctx->ct_nat_action; > @@ -6071,6 +6090,10 @@ compose_conntrack_action(struct xlate_ctx *ctx, > struct ofpact_conntrack *ofc, > put_ct_mark(&ctx->xin->flow, ctx->odp_actions, ctx->wc); > put_ct_label(&ctx->xin->flow, ctx->odp_actions, ctx->wc); > put_ct_helper(ctx, ctx->odp_actions, ofc); > + if (ofc->flags & NX_CT_F_COMMIT) { > + put_ct_timeout(ctx->odp_actions, ctx->xbridge->ofproto->backer, > + &ctx->xin->flow, ctx->wc, zone); > + } > put_ct_nat(ctx); > ctx->ct_nat_action = NULL; > nl_msg_end_nested(ctx->odp_actions, ct_offset); > diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c > index 3013d83e96a0..8bbc596e2ce0 100644 > --- a/ofproto/ofproto-dpif.c > +++ b/ofproto/ofproto-dpif.c > @@ -5377,6 +5377,33 @@ ct_del_zone_timeout_policy(const char > *datapath_type, uint16_t zone) > } > } > > +/* Gets timeout policy name in 'backer' based on 'zone', 'dl_type' and > + * 'nw_proto'. Returns true if the zoned-based timeout policy is > configured. > + * On success, stores the timeout policy name in 'tp_name', and sets > + * 'unwildcard' based on the dpif implementation. Sets 'unwildcard' to > true > + * if the timeout policy is 'dl_type' and 'nw_proto' specific. */ > +bool > +ofproto_dpif_ct_zone_timeout_policy_get_name( > + const struct dpif_backer *backer, uint16_t zone, uint16_t dl_type, > + uint8_t nw_proto, struct ds *tp_name, bool *unwildcard) > +{ > + struct ct_zone *ct_zone; > + > + if (!ct_dpif_timeout_policy_support_ipproto(nw_proto)) { > + return false; > + } > + > + ct_zone = ct_zone_lookup(&backer->ct_zones, zone); > struct ct_zone *ct_zone = ct_zone_lookup(&backer->ct_zones, zone); > + if (!ct_zone) { > + return false; > + } > + > + return (!ct_dpif_get_timeout_policy_name(backer->dpif, > + ct_zone->ct_tp->tp_id, > dl_type, > + nw_proto, tp_name, > unwildcard) > + ? true : false); > +} > + > static bool > set_frag_handling(struct ofproto *ofproto_, > enum ofputil_frag_handling frag_handling) > diff --git a/ofproto/ofproto-dpif.h b/ofproto/ofproto-dpif.h > index 0dd7a45fe550..cce6bdbc842d 100644 > --- a/ofproto/ofproto-dpif.h > +++ b/ofproto/ofproto-dpif.h > @@ -374,4 +374,8 @@ int ofproto_dpif_delete_internal_flow(struct > ofproto_dpif *, struct match *, > > bool ovs_native_tunneling_is_on(struct ofproto_dpif *); > > +bool ofproto_dpif_ct_zone_timeout_policy_get_name( > + const struct dpif_backer *backer, uint16_t zone, uint16_t dl_type, > + uint8_t nw_proto, struct ds *tp_name, bool *unwildcard); > + > #endif /* ofproto-dpif.h */ > diff --git a/tests/system-kmod-macros.at b/tests/system-kmod-macros.at > index 554a61e9bd95..ace0aeae03e7 100644 > --- a/tests/system-kmod-macros.at > +++ b/tests/system-kmod-macros.at > @@ -100,6 +100,17 @@ m4_define([CHECK_CONNTRACK_FRAG_OVERLAP], > # > m4_define([CHECK_CONNTRACK_NAT]) > > +# CHECK_CONNTRACK_TIMEOUT() > +# > +# Perform requirements checks for running conntrack customized timeout > tests. > +# > +m4_define([CHECK_CONNTRACK_TIMEOUT], > +[ > + AT_SKIP_IF([! cat /boot/config-$(uname -r) | grep > NF_CONNTRACK_TIMEOUT | grep '=y' > /dev/null]) > + modprobe nfnetlink_cttimeout > + on_exit 'modprobe -r nfnetlink_cttimeout' +]) > + > # CHECK_CT_DPIF_PER_ZONE_LIMIT() > # > # Perform requirements checks for running ovs-dpctl > ct-[set|get|del]-limits per > @@ -185,3 +196,19 @@ m4_define([OVS_CHECK_KERNEL_EXCL], > sublevel=$(uname -r | sed -e 's/\./ /g' | awk '{print $ 2}') > AT_SKIP_IF([ ! ( test $version -lt $1 || ( test $version -eq $1 && > test $sublevel -lt $2 ) || test $version -gt $3 || ( test $version -eq $3 > && test $sublevel -gt $4 ) ) ]) > ]) > + > +# VSCTL_ADD_DATAPATH_TABLE() > +# > +# Create system datapath table "system" for kernel tests in ovsdb > +m4_define([VSCTL_ADD_DATAPATH_TABLE], > +[ > + AT_CHECK([ovs-vsctl -- --id=@m create Datapath datapath_version=0 -- > set Open_vSwitch . datapaths:"system"=@m], [0], [stdout]) > +]) > + > +# VSCTL_ADD_ZONE_TIMEOUT_POLICY([parameters]) > +# > +# Add zone based timeout policy to kernel datapath > +m4_define([VSCTL_ADD_ZONE_TIMEOUT_POLICY], > +[ > + AT_CHECK([ovs-vsctl add-zone-tp system $1], [0], [stdout]) > +]) > diff --git a/tests/system-traffic.at b/tests/system-traffic.at > index 1a04199dcfe9..f4ac8a8f2c06 100644 > --- a/tests/system-traffic.at > +++ b/tests/system-traffic.at > @@ -3179,6 +3179,72 @@ NXST_FLOW reply: > OVS_TRAFFIC_VSWITCHD_STOP > AT_CLEANUP > > +AT_SETUP([conntrack - zone-based timeout policy]) > +CHECK_CONNTRACK() > +CHECK_CONNTRACK_TIMEOUT() > +OVS_TRAFFIC_VSWITCHD_START() > + > +ADD_NAMESPACES(at_ns0, at_ns1) > + > +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24") > +ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24") > + > +AT_DATA([flows.txt], [dnl > +priority=1,action=drop > +priority=10,arp,action=normal > +priority=100,in_port=1,ip,action=ct(zone=5, table=1) > +priority=100,in_port=2,ip,action=ct(zone=5, table=1) > +table=1,in_port=2,ip,ct_state=+trk+est,action=1 > +table=1,in_port=1,ip,ct_state=+trk+new,action=ct(commit,zone=5),2 > +table=1,in_port=1,ip,ct_state=+trk+est,action=2 > +]) > + > +AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt]) > + > +dnl Test with default timeout > +dnl The default udp_single and icmp_first timeouts are 30 seconds in > +dnl kernel DP, and 60 seconds in userspace DP. > + > +dnl Send ICMP and UDP traffic > +NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.2 | > FORMAT_PING], [0], [dnl > +3 packets transmitted, 3 received, 0% packet loss, time 0ms > +]) > +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 > packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000 > actions=resubmit(,0)"]) > + > +sleep 4 > + > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.2) | sort], > [0], [dnl > > +icmp,orig=(src=10.1.1.1,dst=10.1.1.2,id=<cleared>,type=8,code=0),reply=(src=10.1.1.2,dst=10.1.1.1,id=<cleared>,type=0,code=0),zone=5 > > +udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),reply=(src=10.1.1.2,dst=10.1.1.1,sport=<cleared>,dport=<cleared>),zone=5 > +]) > + > +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) > + > +dnl Shorten the udp_single and icmp_first timeout in zone 5 > +VSCTL_ADD_DATAPATH_TABLE() > +VSCTL_ADD_ZONE_TIMEOUT_POLICY([zone=5 udp_single=3 icmp_first=3]) > + > +dnl Send ICMP and UDP traffic > +NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.2 | > FORMAT_PING], [0], [dnl > +3 packets transmitted, 3 received, 0% packet loss, time 0ms > +]) > +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 > packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000 > actions=resubmit(,0)"]) > + > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.2) | sort], > [0], [dnl > > +icmp,orig=(src=10.1.1.1,dst=10.1.1.2,id=<cleared>,type=8,code=0),reply=(src=10.1.1.2,dst=10.1.1.1,id=<cleared>,type=0,code=0),zone=5 > > +udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),reply=(src=10.1.1.2,dst=10.1.1.1,sport=<cleared>,dport=<cleared>),zone=5 > +]) > + > +dnl Wait until the timeout expire. > +dnl We intend to wait a bit longer, because conntrack does not recycle > the entry right after it is expired. > +sleep 4 > Once the issue with sending additional packets after the first timeout expiry is fixed, we should enhance the test to resend and re-timeout to make sure it works. > + > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.2)], [0], > [dnl > +]) > + > +OVS_TRAFFIC_VSWITCHD_STOP > +AT_CLEANUP > + > AT_BANNER([conntrack - L7]) > > AT_SETUP([conntrack - IPv4 HTTP]) > diff --git a/tests/system-userspace-macros.at b/tests/ > system-userspace-macros.at > index 9d5f3bf419d3..8950a4de7287 100644 > --- a/tests/system-userspace-macros.at > +++ b/tests/system-userspace-macros.at > @@ -98,6 +98,16 @@ m4_define([CHECK_CONNTRACK_FRAG_OVERLAP]) > # > m4_define([CHECK_CONNTRACK_NAT]) > > +# CHECK_CONNTRACK_TIMEOUT() > +# > +# Perform requirements checks for running conntrack customized timeout > tests. > +* The userspace datapath does not support this feature yet. > +# > +m4_define([CHECK_CONNTRACK_TIMEOUT], > +[ > + AT_SKIP_IF([:]) > +]) > + > # CHECK_CT_DPIF_PER_ZONE_LIMIT() > # > # Perform requirements checks for running ovs-dpctl > ct-[set|get|del]-limits per > @@ -295,3 +305,19 @@ m4_define([OVS_CHECK_KERNEL_EXCL], > [ > AT_SKIP_IF([:]) > ]) > + > +# VSCTL_ADD_DATAPATH_TABLE() > +# > +# Create datapath table "netdev" for userspace tests in ovsdb > +m4_define([VSCTL_ADD_DATAPATH_TABLE], > +[ > + AT_CHECK([ovs-vsctl -- --id=@m create Datapath datapath_version=0 -- > set Open_vSwitch . datapaths:"netdev"=@m], [0], [stdout]) > +]) > + > +# VSCTL_ADD_ZONE_TIMEOUT_POLICY([parameters]) > +# > +# Add zone based timeout policy to userspace datapath > +m4_define([VSCTL_ADD_ZONE_TIMEOUT_POLICY], > TBH, does not seems useful; just hides the what is happening > +[ > + AT_CHECK([ovs-vsctl add-zone-tp netdev $1], [0], [stdout]) > +]) > -- > 2.7.4 > > _______________________________________________ > dev mailing list > d...@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev > _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev