On 03/06/2021 16:06, Dumitru Ceara wrote: > Assuming a load balancer, LB1, with: > - VIP: 42.42.42.42:4242 > - backend: 42.42.42.1:2121 > > A client might connect to the backend either directly or through the > VIP. If the first connection is via the VIP and the second connection > is direct but happens to use the same source L4 port, OVN should make > sure that the second connection is SNATed (source port translation) in > order to avoid a tuple collision at commit time. > > For example: > 1. Session via the VIP: > - original packet: src=42.42.42.2:2000, dst=42.42.42.42:4242 > - after DNAT: src=42.42.42.2:2000, dst=42.42.42.1:2121 > 2. Session directly connected to the backend: > - original packet: src=42.42.42.2:2000, dst=42.42.42.1:2121 > - in acl stage committing this connection would fail. > > In order to avoid this we now use the all-zero-ip NAT OVS feature when > committing conneections in the ACL stage. This translates to a no-op > SNAT when there's no tuple collision, and performs source port > translation when a tuple collision would happen. > > We program flows to perform all-zero-ip NAT conditionally, only if the > datapath being used supports it. > > Reported-at: https://bugzilla.redhat.com/1939676 > Signed-off-by: Dumitru Ceara <[email protected]> > --- > controller/lflow.c | 1 > include/ovn/actions.h | 4 + > lib/actions.c | 34 ++++++++++ > tests/ovn.at | 2 - > tests/system-common-macros.at | 4 + > tests/system-ovn.at | 138 > +++++++++++++++++++++++++++++++++++++++++ > 6 files changed, 180 insertions(+), 3 deletions(-) > > diff --git a/controller/lflow.c b/controller/lflow.c > index 680b8cca1..af6a237d1 100644 > --- a/controller/lflow.c > +++ b/controller/lflow.c > @@ -582,6 +582,7 @@ add_matches_to_flow_table(const struct sbrec_logical_flow > *lflow, > .is_switch = datapath_is_switch(dp), > .group_table = l_ctx_out->group_table, > .meter_table = l_ctx_out->meter_table, > + .dp_features = ovs_feature_support_get(), > .lflow_uuid = lflow->header_.uuid, > > .pipeline = ingress ? OVNACT_P_INGRESS : OVNACT_P_EGRESS, > diff --git a/include/ovn/actions.h b/include/ovn/actions.h > index 040213177..35983498b 100644 > --- a/include/ovn/actions.h > +++ b/include/ovn/actions.h > @@ -25,6 +25,7 @@ > #include "openvswitch/hmap.h" > #include "openvswitch/uuid.h" > #include "util.h" > +#include "ovn/features.h" > > struct expr; > struct lexer; > @@ -756,6 +757,9 @@ struct ovnact_encode_params { > /* A struct to figure out the meter_id for meter actions. */ > struct ovn_extend_table *meter_table; > > + /* OVS datapath supported features. */ > + enum ovs_feature_support dp_features; > + > /* The logical flow uuid that drove this action. */ > struct uuid lflow_uuid; > > diff --git a/lib/actions.c b/lib/actions.c > index b3433f49e..c8c2443ce 100644 > --- a/lib/actions.c > +++ b/lib/actions.c > @@ -732,7 +732,7 @@ format_CT_COMMIT_V1(const struct ovnact_ct_commit_v1 *cc, > struct ds *s) > > static void > encode_CT_COMMIT_V1(const struct ovnact_ct_commit_v1 *cc, > - const struct ovnact_encode_params *ep OVS_UNUSED, > + const struct ovnact_encode_params *ep, > struct ofpbuf *ofpacts) > { > struct ofpact_conntrack *ct = ofpact_put_CT(ofpacts); > @@ -742,6 +742,21 @@ encode_CT_COMMIT_V1(const struct ovnact_ct_commit_v1 *cc, > ct->zone_src.ofs = 0; > ct->zone_src.n_bits = 16; > > + /* If the datapath supports all-zero SNAT then use it to avoid tuple > + * collisions at commit time between NATed and firewalled-only sessions. > + */ > + if (ep->dp_features & OVS_CT_ZERO_SNAT_SUPPORT) { > + size_t nat_offset = ofpacts->size; > + ofpbuf_pull(ofpacts, nat_offset); > + > + struct ofpact_nat *nat = ofpact_put_NAT(ofpacts); > + nat->flags = 0; > + nat->range_af = AF_UNSPEC; > + nat->flags |= NX_NAT_F_SRC; > + ofpacts->header = ofpbuf_push_uninit(ofpacts, nat_offset); > + ct = ofpacts->header; > + } > + > size_t set_field_offset = ofpacts->size; > ofpbuf_pull(ofpacts, set_field_offset); > > @@ -780,7 +795,7 @@ format_CT_COMMIT_V2(const struct ovnact_nest *on, struct > ds *s) > > static void > encode_CT_COMMIT_V2(const struct ovnact_nest *on, > - const struct ovnact_encode_params *ep OVS_UNUSED, > + const struct ovnact_encode_params *ep, > struct ofpbuf *ofpacts) > { > struct ofpact_conntrack *ct = ofpact_put_CT(ofpacts); > @@ -792,6 +807,21 @@ encode_CT_COMMIT_V2(const struct ovnact_nest *on, > ct->zone_src.ofs = 0; > ct->zone_src.n_bits = 16; > > + /* If the datapath supports all-zero SNAT then use it to avoid tuple > + * collisions at commit time between NATed and firewalled-only sessions. > + */ > + if (ep->dp_features & OVS_CT_ZERO_SNAT_SUPPORT) { > + size_t nat_offset = ofpacts->size; > + ofpbuf_pull(ofpacts, nat_offset); > + > + struct ofpact_nat *nat = ofpact_put_NAT(ofpacts); > + nat->flags = 0; > + nat->range_af = AF_UNSPEC; > + nat->flags |= NX_NAT_F_SRC; > + ofpacts->header = ofpbuf_push_uninit(ofpacts, nat_offset); > + ct = ofpacts->header; > + } > + > size_t set_field_offset = ofpacts->size; > ofpbuf_pull(ofpacts, set_field_offset); > > diff --git a/tests/ovn.at b/tests/ovn.at > index f26894ce4..638aa5010 100644 > --- a/tests/ovn.at > +++ b/tests/ovn.at > @@ -23109,7 +23109,7 @@ AT_CHECK([ > for hv in 1 2; do > grep table=15 hv${hv}flows | \ > grep "priority=100" | \ > - grep -c > "ct(commit,zone=NXM_NX_REG11\\[[0..15\\]],exec(move:NXM_OF_ETH_SRC\\[[\\]]->NXM_NX_CT_LABEL\\[[32..79\\]],load:0x[[0-9]]->NXM_NX_CT_LABEL\\[[80..95\\]]))" > + grep -c > "ct(commit,zone=NXM_NX_REG11\\[[0..15\\]],.*exec(move:NXM_OF_ETH_SRC\\[[\\]]->NXM_NX_CT_LABEL\\[[32..79\\]],load:0x[[0-9]]->NXM_NX_CT_LABEL\\[[80..95\\]]))" > > grep table=22 hv${hv}flows | \ > grep "priority=200" | \ > diff --git a/tests/system-common-macros.at b/tests/system-common-macros.at > index c8fa6f03f..b742a2cb9 100644 > --- a/tests/system-common-macros.at > +++ b/tests/system-common-macros.at > @@ -330,3 +330,7 @@ m4_define([OVS_CHECK_IPROUTE_ENCAP], > # OVS_CHECK_CT_CLEAR() > m4_define([OVS_CHECK_CT_CLEAR], > [AT_SKIP_IF([! grep -q "Datapath supports ct_clear action" > ovs-vswitchd.log])]) > + > +# OVS_CHECK_CT_ZERO_SNAT() > +m4_define([OVS_CHECK_CT_ZERO_SNAT], > + [AT_SKIP_IF([! grep -q "Datapath supports ct_zero_snat" > ovs-vswitchd.log])])) > diff --git a/tests/system-ovn.at b/tests/system-ovn.at > index 310bd3d5a..61d6bfc86 100644 > --- a/tests/system-ovn.at > +++ b/tests/system-ovn.at > @@ -5296,6 +5296,144 @@ OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port > patch-.*/d > AT_CLEANUP > ]) > > +OVN_FOR_EACH_NORTHD([ > +AT_SETUP([ovn -- load-balancer and firewall tuple conflict IPv4]) > +AT_KEYWORDS([ovnlb]) > + > +CHECK_CONNTRACK() > +CHECK_CONNTRACK_NAT() > +ovn_start > +OVS_TRAFFIC_VSWITCHD_START() > +OVS_CHECK_CT_ZERO_SNAT() > +ADD_BR([br-int]) > + > +# Set external-ids in br-int needed for ovn-controller > +ovs-vsctl \ > + -- set Open_vSwitch . external-ids:system-id=hv1 \ > + -- set Open_vSwitch . > external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \ > + -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \ > + -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \ > + -- set bridge br-int fail-mode=secure > other-config:disable-in-band=true > + > +# Start ovn-controller > +start_daemon ovn-controller > + > +# Logical network: > +# 1 logical switch connetected to one logical router. > +# 2 VMs, one used as backend for a load balancer. > + > +check ovn-nbctl \ > + -- lr-add rtr \ > + -- lrp-add rtr rtr-ls 00:00:00:00:01:00 42.42.42.1/24 \ > + -- ls-add ls \ > + -- lsp-add ls ls-rtr \ > + -- lsp-set-addresses ls-rtr 00:00:00:00:01:00 \ > + -- lsp-set-type ls-rtr router \ > + -- lsp-set-options ls-rtr router-port=rtr-ls \ > + -- lsp-add ls vm1 -- lsp-set-addresses vm1 00:00:00:00:00:01 \ > + -- lsp-add ls vm2 -- lsp-set-addresses vm2 00:00:00:00:00:02 \ > + -- lb-add lb-test 66.66.66.66:666 42.42.42.2:4242 tcp \ > + -- ls-lb-add ls lb-test
Will this work for a DNAT entry instead of a load-balancer? As you know, the flows are slightly different. Will this work when combined with forcing SNAT? Do we need to check this? > + > +ADD_NAMESPACES(vm1) > +ADD_VETH(vm1, vm1, br-int, "42.42.42.2/24", "00:00:00:00:00:01", > "42.42.42.1") > + > +ADD_NAMESPACES(vm2) > +ADD_VETH(vm2, vm2, br-int, "42.42.42.3/24", "00:00:00:00:00:02", > "42.42.42.1") > + > +# Wait for ovn-controller to catch up. > +wait_for_ports_up > +ovn-nbctl --wait=hv sync > + > +# Start IPv4 TCP server on vm1. > +NETNS_DAEMONIZE([vm1], [nc -k -l 42.42.42.2 4242], [nc-vm1.pid]) > + > +# Make sure connecting to the VIP works. > +NS_CHECK_EXEC([vm2], [nc 66.66.66.66 666 -p 2000 -z]) > + > +# Start IPv4 TCP connection to VIP from vm2. > +NETNS_DAEMONIZE([vm2], [nc 66.66.66.66 666 -p 2001], [nc1-vm2.pid]) I feel like there should also be some kind of ping test to check that the return path works as expected. Is there a reason why you did not do that? > + > +# Check conntrack. > +OVS_WAIT_UNTIL([ovs-appctl dpctl/dump-conntrack | grep ESTABLISHED | > FORMAT_CT(42.42.42.2)]) > + > +# Start IPv4 TCP connection to backend IP from vm2 which would require > +# additional source port translation to avoid a tuple conflict. > +NS_CHECK_EXEC([vm2], [nc 42.42.42.2 4242 -p 2001 -z]) Should there be another check to ensure that the conntrack entries are setup as expected? > + > +AT_CLEANUP > +]) > + > +OVN_FOR_EACH_NORTHD([ > +AT_SETUP([ovn -- load-balancer and firewall tuple conflict IPv6]) > +AT_KEYWORDS([ovnlb]) > + > +CHECK_CONNTRACK() > +CHECK_CONNTRACK_NAT() > +ovn_start > +OVS_TRAFFIC_VSWITCHD_START() > +OVS_CHECK_CT_ZERO_SNAT() > +ADD_BR([br-int]) > + > +# Set external-ids in br-int needed for ovn-controller > +ovs-vsctl \ > + -- set Open_vSwitch . external-ids:system-id=hv1 \ > + -- set Open_vSwitch . > external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \ > + -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \ > + -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \ > + -- set bridge br-int fail-mode=secure > other-config:disable-in-band=true > + > +# Start ovn-controller > +start_daemon ovn-controller > + > +# Logical network: > +# 1 logical switch connetected to one logical router. > +# 2 VMs, one used as backend for a load balancer. > + > +check ovn-nbctl \ > + -- lr-add rtr \ > + -- lrp-add rtr rtr-ls 00:00:00:00:01:00 4242::1/64 \ > + -- ls-add ls \ > + -- lsp-add ls ls-rtr \ > + -- lsp-set-addresses ls-rtr 00:00:00:00:01:00 \ > + -- lsp-set-type ls-rtr router \ > + -- lsp-set-options ls-rtr router-port=rtr-ls \ > + -- lsp-add ls vm1 -- lsp-set-addresses vm1 00:00:00:00:00:01 \ > + -- lsp-add ls vm2 -- lsp-set-addresses vm2 00:00:00:00:00:02 \ > + -- lb-add lb-test [[6666::1]]:666 [[4242::2]]:4242 tcp \ > + -- ls-lb-add ls lb-test > + > +ADD_NAMESPACES(vm1) > +ADD_VETH(vm1, vm1, br-int, "4242::2/64", "00:00:00:00:00:01", "4242::1") > +OVS_WAIT_UNTIL([test "$(ip netns exec vm1 ip a | grep 4242::2 | grep > tentative)" = ""]) > + > +ADD_NAMESPACES(vm2) > +ADD_VETH(vm2, vm2, br-int, "4242::3/64", "00:00:00:00:00:02", "4242::1") > +OVS_WAIT_UNTIL([test "$(ip netns exec vm2 ip a | grep 4242::3 | grep > tentative)" = ""]) > + > +# Wait for ovn-controller to catch up. > +wait_for_ports_up > +ovn-nbctl --wait=hv sync > + > +# Start IPv6 TCP server on vm1. > +NETNS_DAEMONIZE([vm1], [nc -k -l 4242::2 4242], [nc-vm1.pid]) > + > +# Make sure connecting to the VIP works. > +NS_CHECK_EXEC([vm2], [nc 6666::1 666 -p 2000 -z]) > + > +# Start IPv6 TCP connection to VIP from vm2. > +NETNS_DAEMONIZE([vm2], [nc 6666::1 666 -p 2001], [nc1-vm2.pid]) > + > +# Check conntrack. > +OVS_WAIT_UNTIL([ovs-appctl dpctl/dump-conntrack | grep ESTABLISHED | > FORMAT_CT(4242::2)]) > + > +# Start IPv6 TCP connection to backend IP from vm2 which would require > +# additional source port translation to avoid a tuple conflict. > +NS_CHECK_EXEC([vm2], [nc 4242::2 4242 -p 2001 -z]) > + > +AT_CLEANUP > +]) > + > # When a lport is released on a chassis, ovn-controller was > # not clearing some of the flowss in the table 33 leading > # to packet drops if ct() is hit. > > _______________________________________________ > dev mailing list > [email protected] > https://mail.openvswitch.org/mailman/listinfo/ovs-dev > _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
