On 6/4/21 6:11 PM, Mark Gray wrote: > On 04/06/2021 16:11, Dumitru Ceara wrote: >> On 6/4/21 4:51 PM, Mark Gray wrote: >>> On 03/06/2021 16:06, Dumitru Ceara wrote: >>>> Assuming a load balancer, LB1, with: >>>> - VIP: 42.42.42.42:4242 >>>> - backend: 42.42.42.1:2121 >>>> >>>> A client might connect to the backend either directly or through the >>>> VIP. If the first connection is via the VIP and the second connection >>>> is direct but happens to use the same source L4 port, OVN should make >>>> sure that the second connection is SNATed (source port translation) in >>>> order to avoid a tuple collision at commit time. >>>> >>>> For example: >>>> 1. Session via the VIP: >>>> - original packet: src=42.42.42.2:2000, dst=42.42.42.42:4242 >>>> - after DNAT: src=42.42.42.2:2000, dst=42.42.42.1:2121 >>>> 2. Session directly connected to the backend: >>>> - original packet: src=42.42.42.2:2000, dst=42.42.42.1:2121 >>>> - in acl stage committing this connection would fail. >>>> >>>> In order to avoid this we now use the all-zero-ip NAT OVS feature when >>>> committing conneections in the ACL stage. This translates to a no-op >>>> SNAT when there's no tuple collision, and performs source port >>>> translation when a tuple collision would happen. >>>> >>>> We program flows to perform all-zero-ip NAT conditionally, only if the >>>> datapath being used supports it. >>>> >>>> Reported-at: https://bugzilla.redhat.com/1939676 >>>> Signed-off-by: Dumitru Ceara <[email protected]> >>>> --- >>>> controller/lflow.c | 1 >>>> include/ovn/actions.h | 4 + >>>> lib/actions.c | 34 ++++++++++ >>>> tests/ovn.at | 2 - >>>> tests/system-common-macros.at | 4 + >>>> tests/system-ovn.at | 138 >>>> +++++++++++++++++++++++++++++++++++++++++ >>>> 6 files changed, 180 insertions(+), 3 deletions(-) >>>> >>>> diff --git a/controller/lflow.c b/controller/lflow.c >>>> index 680b8cca1..af6a237d1 100644 >>>> --- a/controller/lflow.c >>>> +++ b/controller/lflow.c >>>> @@ -582,6 +582,7 @@ add_matches_to_flow_table(const struct >>>> sbrec_logical_flow *lflow, >>>> .is_switch = datapath_is_switch(dp), >>>> .group_table = l_ctx_out->group_table, >>>> .meter_table = l_ctx_out->meter_table, >>>> + .dp_features = ovs_feature_support_get(), >>>> .lflow_uuid = lflow->header_.uuid, >>>> >>>> .pipeline = ingress ? OVNACT_P_INGRESS : OVNACT_P_EGRESS, >>>> diff --git a/include/ovn/actions.h b/include/ovn/actions.h >>>> index 040213177..35983498b 100644 >>>> --- a/include/ovn/actions.h >>>> +++ b/include/ovn/actions.h >>>> @@ -25,6 +25,7 @@ >>>> #include "openvswitch/hmap.h" >>>> #include "openvswitch/uuid.h" >>>> #include "util.h" >>>> +#include "ovn/features.h" >>>> >>>> struct expr; >>>> struct lexer; >>>> @@ -756,6 +757,9 @@ struct ovnact_encode_params { >>>> /* A struct to figure out the meter_id for meter actions. */ >>>> struct ovn_extend_table *meter_table; >>>> >>>> + /* OVS datapath supported features. */ >>>> + enum ovs_feature_support dp_features; >>>> + >>>> /* The logical flow uuid that drove this action. */ >>>> struct uuid lflow_uuid; >>>> >>>> diff --git a/lib/actions.c b/lib/actions.c >>>> index b3433f49e..c8c2443ce 100644 >>>> --- a/lib/actions.c >>>> +++ b/lib/actions.c >>>> @@ -732,7 +732,7 @@ format_CT_COMMIT_V1(const struct ovnact_ct_commit_v1 >>>> *cc, struct ds *s) >>>> >>>> static void >>>> encode_CT_COMMIT_V1(const struct ovnact_ct_commit_v1 *cc, >>>> - const struct ovnact_encode_params *ep OVS_UNUSED, >>>> + const struct ovnact_encode_params *ep, >>>> struct ofpbuf *ofpacts) >>>> { >>>> struct ofpact_conntrack *ct = ofpact_put_CT(ofpacts); >>>> @@ -742,6 +742,21 @@ encode_CT_COMMIT_V1(const struct ovnact_ct_commit_v1 >>>> *cc, >>>> ct->zone_src.ofs = 0; >>>> ct->zone_src.n_bits = 16; >>>> >>>> + /* If the datapath supports all-zero SNAT then use it to avoid tuple >>>> + * collisions at commit time between NATed and firewalled-only >>>> sessions. >>>> + */ >>>> + if (ep->dp_features & OVS_CT_ZERO_SNAT_SUPPORT) { >>>> + size_t nat_offset = ofpacts->size; >>>> + ofpbuf_pull(ofpacts, nat_offset); >>>> + >>>> + struct ofpact_nat *nat = ofpact_put_NAT(ofpacts); >>>> + nat->flags = 0; >>>> + nat->range_af = AF_UNSPEC; >>>> + nat->flags |= NX_NAT_F_SRC; >>>> + ofpacts->header = ofpbuf_push_uninit(ofpacts, nat_offset); >>>> + ct = ofpacts->header; >>>> + } >>>> + >>>> size_t set_field_offset = ofpacts->size; >>>> ofpbuf_pull(ofpacts, set_field_offset); >>>> >>>> @@ -780,7 +795,7 @@ format_CT_COMMIT_V2(const struct ovnact_nest *on, >>>> struct ds *s) >>>> >>>> static void >>>> encode_CT_COMMIT_V2(const struct ovnact_nest *on, >>>> - const struct ovnact_encode_params *ep OVS_UNUSED, >>>> + const struct ovnact_encode_params *ep, >>>> struct ofpbuf *ofpacts) >>>> { >>>> struct ofpact_conntrack *ct = ofpact_put_CT(ofpacts); >>>> @@ -792,6 +807,21 @@ encode_CT_COMMIT_V2(const struct ovnact_nest *on, >>>> ct->zone_src.ofs = 0; >>>> ct->zone_src.n_bits = 16; >>>> >>>> + /* If the datapath supports all-zero SNAT then use it to avoid tuple >>>> + * collisions at commit time between NATed and firewalled-only >>>> sessions. >>>> + */ >>>> + if (ep->dp_features & OVS_CT_ZERO_SNAT_SUPPORT) { >>>> + size_t nat_offset = ofpacts->size; >>>> + ofpbuf_pull(ofpacts, nat_offset); >>>> + >>>> + struct ofpact_nat *nat = ofpact_put_NAT(ofpacts); >>>> + nat->flags = 0; >>>> + nat->range_af = AF_UNSPEC; >>>> + nat->flags |= NX_NAT_F_SRC; >>>> + ofpacts->header = ofpbuf_push_uninit(ofpacts, nat_offset); >>>> + ct = ofpacts->header; >>>> + } >>>> + >>>> size_t set_field_offset = ofpacts->size; >>>> ofpbuf_pull(ofpacts, set_field_offset); >>>> >>>> diff --git a/tests/ovn.at b/tests/ovn.at >>>> index f26894ce4..638aa5010 100644 >>>> --- a/tests/ovn.at >>>> +++ b/tests/ovn.at >>>> @@ -23109,7 +23109,7 @@ AT_CHECK([ >>>> for hv in 1 2; do >>>> grep table=15 hv${hv}flows | \ >>>> grep "priority=100" | \ >>>> - grep -c >>>> "ct(commit,zone=NXM_NX_REG11\\[[0..15\\]],exec(move:NXM_OF_ETH_SRC\\[[\\]]->NXM_NX_CT_LABEL\\[[32..79\\]],load:0x[[0-9]]->NXM_NX_CT_LABEL\\[[80..95\\]]))" >>>> + grep -c >>>> "ct(commit,zone=NXM_NX_REG11\\[[0..15\\]],.*exec(move:NXM_OF_ETH_SRC\\[[\\]]->NXM_NX_CT_LABEL\\[[32..79\\]],load:0x[[0-9]]->NXM_NX_CT_LABEL\\[[80..95\\]]))" >>>> >>>> grep table=22 hv${hv}flows | \ >>>> grep "priority=200" | \ >>>> diff --git a/tests/system-common-macros.at b/tests/system-common-macros.at >>>> index c8fa6f03f..b742a2cb9 100644 >>>> --- a/tests/system-common-macros.at >>>> +++ b/tests/system-common-macros.at >>>> @@ -330,3 +330,7 @@ m4_define([OVS_CHECK_IPROUTE_ENCAP], >>>> # OVS_CHECK_CT_CLEAR() >>>> m4_define([OVS_CHECK_CT_CLEAR], >>>> [AT_SKIP_IF([! grep -q "Datapath supports ct_clear action" >>>> ovs-vswitchd.log])]) >>>> + >>>> +# OVS_CHECK_CT_ZERO_SNAT() >>>> +m4_define([OVS_CHECK_CT_ZERO_SNAT], >>>> + [AT_SKIP_IF([! grep -q "Datapath supports ct_zero_snat" >>>> ovs-vswitchd.log])])) >>>> diff --git a/tests/system-ovn.at b/tests/system-ovn.at >>>> index 310bd3d5a..61d6bfc86 100644 >>>> --- a/tests/system-ovn.at >>>> +++ b/tests/system-ovn.at >>>> @@ -5296,6 +5296,144 @@ OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port >>>> patch-.*/d >>>> AT_CLEANUP >>>> ]) >>>> >>>> +OVN_FOR_EACH_NORTHD([ >>>> +AT_SETUP([ovn -- load-balancer and firewall tuple conflict IPv4]) >>>> +AT_KEYWORDS([ovnlb]) >>>> + >>>> +CHECK_CONNTRACK() >>>> +CHECK_CONNTRACK_NAT() >>>> +ovn_start >>>> +OVS_TRAFFIC_VSWITCHD_START() >>>> +OVS_CHECK_CT_ZERO_SNAT() >>>> +ADD_BR([br-int]) >>>> + >>>> +# Set external-ids in br-int needed for ovn-controller >>>> +ovs-vsctl \ >>>> + -- set Open_vSwitch . external-ids:system-id=hv1 \ >>>> + -- set Open_vSwitch . >>>> external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \ >>>> + -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \ >>>> + -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \ >>>> + -- set bridge br-int fail-mode=secure >>>> other-config:disable-in-band=true >>>> + >>>> +# Start ovn-controller >>>> +start_daemon ovn-controller >>>> + >>>> +# Logical network: >>>> +# 1 logical switch connetected to one logical router. >>>> +# 2 VMs, one used as backend for a load balancer. >>>> + >>>> +check ovn-nbctl \ >>>> + -- lr-add rtr \ >>>> + -- lrp-add rtr rtr-ls 00:00:00:00:01:00 42.42.42.1/24 \ >>>> + -- ls-add ls \ >>>> + -- lsp-add ls ls-rtr \ >>>> + -- lsp-set-addresses ls-rtr 00:00:00:00:01:00 \ >>>> + -- lsp-set-type ls-rtr router \ >>>> + -- lsp-set-options ls-rtr router-port=rtr-ls \ >>>> + -- lsp-add ls vm1 -- lsp-set-addresses vm1 00:00:00:00:00:01 \ >>>> + -- lsp-add ls vm2 -- lsp-set-addresses vm2 00:00:00:00:00:02 \ >>>> + -- lb-add lb-test 66.66.66.66:666 42.42.42.2:4242 tcp \ >>>> + -- ls-lb-add ls lb-test >>> >>> Will this work for a DNAT entry instead of a load-balancer? As you know, >>> the flows are slightly different. Will this work when combined with >>> forcing SNAT? Do we need to check this? >> >> We already have system tests that test DNAT and load balancer force >> SNAT. I thought we're already covered by those. > > In those cases we don't have tests in which we are reusing the source > port as we are in this test. What I am wondering is that when we have > DNAT entries or force SNAT, this will generate different Openflow rules > which may cause the OVS datapath flows to look slightly different which > may cause differences in behaviour not covered by this test. > > Please correct me if I am wrong or if you think it is unnecessary. I am > a little paranoid about the OVN DNAT/SNAT/LB router code as I was > recently working on a bug in this area and I think more tests in this > area may be useful.
Sure, more tests definitely don't hurt. I'll add some more in v2. > >> >>> >>>> + >>>> +ADD_NAMESPACES(vm1) >>>> +ADD_VETH(vm1, vm1, br-int, "42.42.42.2/24", "00:00:00:00:00:01", >>>> "42.42.42.1") >>>> + >>>> +ADD_NAMESPACES(vm2) >>>> +ADD_VETH(vm2, vm2, br-int, "42.42.42.3/24", "00:00:00:00:00:02", >>>> "42.42.42.1") >>>> + >>>> +# Wait for ovn-controller to catch up. >>>> +wait_for_ports_up >>>> +ovn-nbctl --wait=hv sync >>>> + >>>> +# Start IPv4 TCP server on vm1. >>>> +NETNS_DAEMONIZE([vm1], [nc -k -l 42.42.42.2 4242], [nc-vm1.pid]) >>>> + >>>> +# Make sure connecting to the VIP works. >>>> +NS_CHECK_EXEC([vm2], [nc 66.66.66.66 666 -p 2000 -z]) >>>> + >>>> +# Start IPv4 TCP connection to VIP from vm2. >>>> +NETNS_DAEMONIZE([vm2], [nc 66.66.66.66 666 -p 2001], [nc1-vm2.pid]) >>> >>> I feel like there should also be some kind of ping test to check that >>> the return path works as expected. Is there a reason why you did not do >>> that? >> >> The goal of the test is to make sure that the direct connection (towards >> 42.42.42.2:4242) below is established successfully. >> >> For the connection via the VIP there's the part above that performs the >> full TCP 3way handshake: >> >> NS_CHECK_EXEC([vm2], [nc 66.66.66.66 666 -p 2000 -z]) >> >>> >>>> + >>>> +# Check conntrack. >>>> +OVS_WAIT_UNTIL([ovs-appctl dpctl/dump-conntrack | grep ESTABLISHED | >>>> FORMAT_CT(42.42.42.2)]) >>>> + >>>> +# Start IPv4 TCP connection to backend IP from vm2 which would require >>>> +# additional source port translation to avoid a tuple conflict. >>>> +NS_CHECK_EXEC([vm2], [nc 42.42.42.2 4242 -p 2001 -z]) >>> >>> Should there be another check to ensure that the conntrack entries are >>> setup as expected? >>> >> >> I initially wanted to add one but there are differences in the way the >> TCP state machine is implemented in userspace conntrack vs kernel >> conntrack and I couldn't come up with something that would pass on both. >> >> I'll try some more though. > > I think it would be useful. As I mentioned above I am a bit paranoid > about OVN DNAT/LB/SNAT :) :) Thanks! _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
