Acked-by: Alin-Gabriel Serdean <[email protected]> On Thu, 2021-06-10 at 11:24 +0200, Eelco Chaudron wrote: > Currently, conntrack in the kernel has an undocumented feature > referred > to as all-zero IP address SNAT. Basically, when a source port > collision is detected during the commit, the source port will be > translated to an ephemeral port. If there is no collision, no SNAT is > performed. > > This patchset documents this behavior and adds a self-test to verify > it's not changing. In addition, a datapath feature flag is added for > the all-zero IP SNAT case. This will help applications on top of OVS, > like OVN, to determine this feature can be used. > > Signed-off-by: Eelco Chaudron <[email protected]> > Acked-by: Aaron Conole <[email protected]> > Acked-by: Dumitru Ceara <[email protected]> > --- > > v5: Windows datapath does not support all-zero SNAT, add checks. > v4: Added datapath support flag for all-zero SNAT. > v3: Renamed NULL SNAT to all-zero IP SNAT. > v2: Fixed NULL SNAT to only work in the -rpl state to be inline with > OpenShift-SDN's behavior. > > lib/ct-dpif.c | 8 +++++++ > lib/ct-dpif.h | 6 +++++ > lib/dpif-netdev.c | 1 + > lib/dpif-netlink.c | 15 ++++++++++++ > lib/dpif-provider.h | 5 ++++ > lib/ovs-actions.xml | 10 ++++++++ > ofproto/ofproto-dpif.c | 22 +++++++++++++++++- > ofproto/ofproto-dpif.h | 5 +++- > tests/system-kmod-macros.at | 11 +++++++++ > tests/system-traffic.at | 46 > ++++++++++++++++++++++++++++++++++++++ > tests/system-userspace-macros.at | 10 ++++++++ > vswitchd/vswitch.xml | 9 +++++++ > 12 files changed, 146 insertions(+), 2 deletions(-) > > diff --git a/lib/ct-dpif.c b/lib/ct-dpif.c > index 6a5ba052d..cfc2315e3 100644 > --- a/lib/ct-dpif.c > +++ b/lib/ct-dpif.c > @@ -889,3 +889,11 @@ ct_dpif_get_timeout_policy_name(struct dpif > *dpif, uint32_t tp_id, > dpif, tp_id, dl_type, nw_proto, tp_name, is_generic) > : EOPNOTSUPP); > } > + > +int > +ct_dpif_get_features(struct dpif *dpif, enum ct_features *features) > +{ > + return (dpif->dpif_class->ct_get_features > + ? dpif->dpif_class->ct_get_features(dpif, features) > + : EOPNOTSUPP); > +} > diff --git a/lib/ct-dpif.h b/lib/ct-dpif.h > index 88f4c7e28..b59cba962 100644 > --- a/lib/ct-dpif.h > +++ b/lib/ct-dpif.h > @@ -271,6 +271,11 @@ struct ct_dpif_timeout_policy { > * timeout attribute > values */ > }; > > +/* Conntrack Features. */ > +enum ct_features { > + CONNTRACK_F_ZERO_SNAT = 1 << 0, /* All-zero SNAT support. */ > +}; > + > int ct_dpif_dump_start(struct dpif *, struct ct_dpif_dump_state **, > const uint16_t *zone, int *); > int ct_dpif_dump_next(struct ct_dpif_dump_state *, struct > ct_dpif_entry *); > @@ -325,5 +330,6 @@ int ct_dpif_timeout_policy_dump_done(struct dpif > *dpif, void *state); > int ct_dpif_get_timeout_policy_name(struct dpif *dpif, uint32_t > tp_id, > uint16_t dl_type, uint8_t > nw_proto, > char **tp_name, bool > *is_generic); > +int ct_dpif_get_features(struct dpif *dpif, enum ct_features > *features); > > #endif /* CT_DPIF_H */ > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c > index 816945375..49afdcc3c 100644 > --- a/lib/dpif-netdev.c > +++ b/lib/dpif-netdev.c > @@ -8502,6 +8502,7 @@ const struct dpif_class dpif_netdev_class = { > NULL, /* ct_timeout_policy_dump_next */ > NULL, /* ct_timeout_policy_dump_done */ > dpif_netdev_ct_get_timeout_policy_name, > + NULL, /* ct_get_features */ > dpif_netdev_ipf_set_enabled, > dpif_netdev_ipf_set_min_frag, > dpif_netdev_ipf_set_max_nfrags, > diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c > index ceb56c685..18069eb70 100644 > --- a/lib/dpif-netlink.c > +++ b/lib/dpif-netlink.c > @@ -3161,6 +3161,20 @@ dpif_netlink_ct_get_timeout_policy_name(struct > dpif *dpif OVS_UNUSED, > return 0; > } > > +static int > +dpif_netlink_ct_get_features(struct dpif *dpif OVS_UNUSED, > + enum ct_features *features) > +{ > + if (features != NULL) { > +#ifndef _WIN32 > + *features = CONNTRACK_F_ZERO_SNAT; > +#else > + *features = 0; > +#endif > + } > + return 0; > +} > + > #define CT_DPIF_NL_TP_TCP_MAPPINGS \ > CT_DPIF_NL_TP_MAPPING(TCP, TCP, SYN_SENT, SYN_SENT) \ > CT_DPIF_NL_TP_MAPPING(TCP, TCP, SYN_RECV, SYN_RECV) \ > @@ -4003,6 +4017,7 @@ const struct dpif_class dpif_netlink_class = { > dpif_netlink_ct_timeout_policy_dump_next, > dpif_netlink_ct_timeout_policy_dump_done, > dpif_netlink_ct_get_timeout_policy_name, > + dpif_netlink_ct_get_features, > NULL, /* ipf_set_enabled */ > NULL, /* ipf_set_min_frag */ > NULL, /* ipf_set_max_nfrags */ > diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h > index b817fceac..59e0a3a9d 100644 > --- a/lib/dpif-provider.h > +++ b/lib/dpif-provider.h > @@ -81,6 +81,7 @@ struct ct_dpif_dump_state; > struct ct_dpif_entry; > struct ct_dpif_tuple; > struct ct_dpif_timeout_policy; > +enum ct_features; > > /* 'dpif_ipf_proto_status' and 'dpif_ipf_status' are presently in > * sync with 'ipf_proto_status' and 'ipf_status', but more > @@ -562,6 +563,10 @@ struct dpif_class { > uint16_t dl_type, uint8_t > nw_proto, > char **tp_name, bool > *is_generic); > > + /* Stores the conntrack features supported by 'dpif' into > features. > + * The value is a bitmap of CONNTRACK_F_* bits. */ > + int (*ct_get_features)(struct dpif *, enum ct_features > *features); > + > /* IP Fragmentation. */ > > /* Disables or enables conntrack fragment reassembly. The > default > diff --git a/lib/ovs-actions.xml b/lib/ovs-actions.xml > index a2778de4b..a0070e6c6 100644 > --- a/lib/ovs-actions.xml > +++ b/lib/ovs-actions.xml > @@ -1833,6 +1833,16 @@ for <var>i</var> in [1,<var>n_members</var>]: > connection, will behave the same as a bare > <code>nat</code>. > </p> > > + <p> > + For SNAT, there is a special case when the > <code>src</code> IP > + address is configured as all 0's, i.e., > + <code>nat(src=0.0.0.0)</code>. In this case, when a > source port > + collision is detected during the commit, the source port > will be > + translated to an ephemeral port. If there is no > collision, no SNAT > + is performed. Note that this is currently only > implemented in the > + Linux kernel datapath. > + </p> > + > <p> > Open vSwitch 2.6 introduced <code>nat</code>. Linux 4.6 > was the > earliest upstream kernel that implemented > <code>ct</code> support for > diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c > index fd0b2fdea..5ef711a7c 100644 > --- a/ofproto/ofproto-dpif.c > +++ b/ofproto/ofproto-dpif.c > @@ -1389,6 +1389,24 @@ check_ct_timeout_policy(struct dpif_backer > *backer) > return !error; > } > > +/* Tests whether 'backer''s datapath supports the all-zero SNAT > case. */ > +static bool > +dpif_supports_ct_zero_snat(struct dpif_backer *backer) > +{ > + enum ct_features features; > + bool supported = false; > + > + if (!ct_dpif_get_features(backer->dpif, &features)) { > + if (features & CONNTRACK_F_ZERO_SNAT) { > + supported = true; > + } > + } > + VLOG_INFO("%s: Datapath %s ct_zero_snat", > + dpif_name(backer->dpif), (supported) ? "supports" > + : "does not > support"); > + return supported; > +} > + > /* Tests whether 'backer''s datapath supports the > * OVS_ACTION_ATTR_CHECK_PKT_LEN action. */ > static bool > @@ -1588,8 +1606,9 @@ check_support(struct dpif_backer *backer) > backer->rt_support.ct_timeout = check_ct_timeout_policy(backer); > backer->rt_support.explicit_drop_action = > dpif_supports_explicit_drop_action(backer->dpif); > - backer->rt_support.lb_output_action= > + backer->rt_support.lb_output_action = > dpif_supports_lb_output_action(backer->dpif); > + backer->rt_support.ct_zero_snat = > dpif_supports_ct_zero_snat(backer); > > /* Flow fields. */ > backer->rt_support.odp.ct_state = check_ct_state(backer); > @@ -5603,6 +5622,7 @@ get_datapath_cap(const char *datapath_type, > struct smap *cap) > smap_add(cap, "explicit_drop_action", > s.explicit_drop_action ? "true" :"false"); > smap_add(cap, "lb_output_action", s.lb_output_action ? "true" : > "false"); > + smap_add(cap, "ct_zero_snat", s.ct_zero_snat ? "true" : > "false"); > } > > /* Gets timeout policy name in 'backer' based on 'zone', 'dl_type' > and > diff --git a/ofproto/ofproto-dpif.h b/ofproto/ofproto-dpif.h > index b41c3d82a..191cfcb0d 100644 > --- a/ofproto/ofproto-dpif.h > +++ b/ofproto/ofproto-dpif.h > @@ -204,7 +204,10 @@ struct group_dpif *group_dpif_lookup(struct > ofproto_dpif *, > DPIF_SUPPORT_FIELD(bool, explicit_drop_action, "Explicit Drop > action") \ > > \ > /* True if the datapath supports balance_tcp optimization > */ \ > - DPIF_SUPPORT_FIELD(bool, lb_output_action, "Optimized Balance > TCP mode") > + DPIF_SUPPORT_FIELD(bool, lb_output_action, "Optimized Balance > TCP mode")\ > + > \ > + /* True if the datapath supports all-zero IP SNAT. > */ \ > + DPIF_SUPPORT_FIELD(bool, ct_zero_snat, "Conntrack all-zero IP > SNAT") > > > /* Stores the various features which the corresponding backer > supports. */ > diff --git a/tests/system-kmod-macros.at b/tests/system-kmod- > macros.at > index 15628a7c6..86d633ac4 100644 > --- a/tests/system-kmod-macros.at > +++ b/tests/system-kmod-macros.at > @@ -99,6 +99,17 @@ m4_define([CHECK_CONNTRACK_FRAG_OVERLAP], > # > m4_define([CHECK_CONNTRACK_NAT]) > > +# CHECK_CONNTRACK_ZEROIP_SNAT() > +# > +# Perform requirements checks for running conntrack all-zero IP SNAT > tests. > +# The kernel always supports all-zero IP SNAT, so no check is > needed. > +# However, the Windows datapath using the same netlink interface > does not. > +# > +m4_define([CHECK_CONNTRACK_ZEROIP_SNAT], > +[ > + AT_SKIP_IF([test "$IS_WIN32" = "yes"]) > +]) > + > # CHECK_CONNTRACK_TIMEOUT() > # > # Perform requirements checks for running conntrack customized > timeout tests. > diff --git a/tests/system-traffic.at b/tests/system-traffic.at > index fb5b9a36d..3d7f4751e 100644 > --- a/tests/system-traffic.at > +++ b/tests/system-traffic.at > @@ -4433,6 +4433,52 @@ > tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>), > reply=(src= > OVS_TRAFFIC_VSWITCHD_STOP > AT_CLEANUP > > + > +AT_SETUP([conntrack - all-zero IP SNAT]) > +AT_SKIP_IF([test $HAVE_NC = no]) > +CHECK_CONNTRACK() > +CHECK_CONNTRACK_ZEROIP_SNAT() > +OVS_TRAFFIC_VSWITCHD_START() > + > +ADD_NAMESPACES(at_ns0, at_ns1) > +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24") > +ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24") > +NS_CHECK_EXEC([at_ns0], [ip route add 172.1.1.0/24 via 10.1.1.2]) > + > +OVS_START_L7([at_ns1], [http]) > + > +AT_DATA([flows.txt], [dnl > +table=0,priority=30,ct_state=-trk,ip,action=ct(table=0) > +table=0,priority=20,ct_state=- > rpl,ip,nw_dst=10.1.1.0/24,actions=ct(commit,nat(src=0.0.0.0),table=10 > ) > +table=0,priority=20,ct_state=+rpl,ip,nw_dst=10.1.1.0/24,actions=resu > bmit(,10) > +table=0,priority=20,ip,nw_dst=172.1.1.2,actions=ct(commit,nat(dst=10 > .1.1.2),table=10) > +table=0,priority=10,arp,action=normal > +table=0,priority=1,action=drop > +table=10,priority=20,ct_state=+rpl,ip,nw_dst=10.1.1.0/24 > actions=ct(table=20,nat) > +table=10,priority=10,ip,nw_dst=10.1.1.0/24 actions=resubmit(,20) > +table=20,priority=10,ip,nw_dst=10.1.1.1,action=1 > +table=20,priority=10,ip,nw_dst=10.1.1.2,action=2 > +]) > +AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt]) > + > +dnl - Test to make sure src nat is NOT done when not needed > +NS_CHECK_EXEC([at_ns0], [echo "TEST" | nc -p 30000 10.1.1.2 80 > nc- > 1.log]) > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep > "orig=.src=10\.1\.1\.1,"], [0], [dnl > +tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=30000,dport=80),reply=(src > =10.1.1.2,dst=10.1.1.1,sport=80,dport=30000),protoinfo=(state=TIME_WA > IT) > +]) > + > +dnl - Test to make sure src nat is done when needed > +NS_CHECK_EXEC([at_ns0], [echo "TEST2" | nc -p 30001 172.1.1.2 80 > > nc-2.log]) > +NS_CHECK_EXEC([at_ns0], [echo "TEST3" | nc -p 30001 10.1.1.2 80 > > nc-3.log]) > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep 30001 | grep > "orig=.src=10\.1\.1\.1," | sed -e 's/port=30001/port=<clnt_s_port>/g' > -e 's/sport=80,dport=[[0-9]]\+/sport=80,dport=<rnd_port>/g' | sort], > [0], [dnl > +tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<clnt_s_port>,dport=80),re > ply=(src=10.1.1.2,dst=10.1.1.1,sport=80,dport=<rnd_port>),protoinfo=( > state=TIME_WAIT) > +tcp,orig=(src=10.1.1.1,dst=172.1.1.2,sport=<clnt_s_port>,dport=80),r > eply=(src=10.1.1.2,dst=10.1.1.1,sport=80,dport=<clnt_s_port>),protoin > fo=(state=TIME_WAIT) > +]) > + > +OVS_TRAFFIC_VSWITCHD_STOP > +AT_CLEANUP > + > + > AT_SETUP([conntrack - simple DNAT]) > CHECK_CONNTRACK() > CHECK_CONNTRACK_NAT() > diff --git a/tests/system-userspace-macros.at b/tests/system- > userspace-macros.at > index 34f82cee3..9f0d38dfb 100644 > --- a/tests/system-userspace-macros.at > +++ b/tests/system-userspace-macros.at > @@ -96,6 +96,16 @@ m4_define([CHECK_CONNTRACK_FRAG_OVERLAP]) > # > m4_define([CHECK_CONNTRACK_NAT]) > > +# CHECK_CONNTRACK_ZEROIP_SNAT() > +# > +# Perform requirements checks for running conntrack all-zero IP SNAT > tests. > +# The userspace datapath does not support all-zero IP SNAT. > +# > +m4_define([CHECK_CONNTRACK_ZEROIP_SNAT], > +[ > + AT_SKIP_IF([:]) > +]) > + > # CHECK_CONNTRACK_TIMEOUT() > # > # Perform requirements checks for running conntrack customized > timeout tests. > diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml > index 4597a215d..d8ea287d5 100644 > --- a/vswitchd/vswitch.xml > +++ b/vswitchd/vswitch.xml > @@ -6126,6 +6126,15 @@ ovs-vsctl add-port br0 p0 -- set Interface p0 > type=patch options:peer=p1 \ > True if the datapath supports OVS_ACTION_ATTR_DROP. If > false, > explicit drop action will not be sent to the datapath. > </column> > + <column name="capabilities" key="ct_zero_snat" > + type='{"type": "boolean"}'> > + True if the datapath supports all-zero SNAT. This is a > special case > + if the <code>src</code> IP address is configured as all 0's, > i.e., > + <code>nat(src=0.0.0.0)</code>. In this case, when a source > port > + collision is detected during the commit, the source port > will be > + translated to an ephemeral port. If there is no collision, > no SNAT > + is performed. > + </column> > </group> > > <group title="Common Columns"> >
_______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
