On 10/06/2021 20:37, Dumitru Ceara wrote: > On 6/10/21 6:26 PM, Mark Gray wrote: >> On 09/06/2021 13:12, Dumitru Ceara wrote: >>> Assuming a load balancer, LB1, with: >>> - VIP: 42.42.42.42:4242 >>> - backend: 42.42.42.1:2121 >>> >>> A client might connect to the backend either directly or through the >>> VIP. If the first connection is via the VIP and the second connection >>> is direct but happens to use the same source L4 port, OVN should make >>> sure that the second connection is SNATed (source port translation) in >>> order to avoid a tuple collision at commit time. >>> >>> For example: >>> 1. Session via the VIP: >>> - original packet: src=42.42.42.2:2000, dst=42.42.42.42:4242 >>> - after DNAT: src=42.42.42.2:2000, dst=42.42.42.1:2121 >>> 2. Session directly connected to the backend: >>> - original packet: src=42.42.42.2:2000, dst=42.42.42.1:2121 >>> - in acl stage committing this connection would fail. >>> >>> In order to avoid this we now use the all-zero-ip NAT OVS feature when >>> committing conneections in the ACL stage. This translates to a no-op >>> SNAT when there's no tuple collision, and performs source port >>> translation when a tuple collision would happen. >>> >>> We program flows to perform all-zero-ip NAT conditionally, only if the >>> datapath being used supports it. >>> >>> Reported-at: https://bugzilla.redhat.com/1939676 >>> Signed-off-by: Dumitru Ceara <[email protected]> >>> --- >>> include/ovn/actions.h | 1 >>> lib/actions.c | 31 ++++++++ >>> tests/ovn.at | 2 - >>> tests/system-common-macros.at | 4 + >>> tests/system-ovn.at | 156 >>> +++++++++++++++++++++++++++++++++++++++++ >>> 5 files changed, 193 insertions(+), 1 deletion(-) >>> >>> diff --git a/include/ovn/actions.h b/include/ovn/actions.h >>> index 040213177..f5eb01eb7 100644 >>> --- a/include/ovn/actions.h >>> +++ b/include/ovn/actions.h >>> @@ -25,6 +25,7 @@ >>> #include "openvswitch/hmap.h" >>> #include "openvswitch/uuid.h" >>> #include "util.h" >>> +#include "ovn/features.h" >>> >>> struct expr; >>> struct lexer; >>> diff --git a/lib/actions.c b/lib/actions.c >>> index b3433f49e..7010fab2b 100644 >>> --- a/lib/actions.c >>> +++ b/lib/actions.c >>> @@ -742,6 +742,22 @@ encode_CT_COMMIT_V1(const struct ovnact_ct_commit_v1 >>> *cc, >>> ct->zone_src.ofs = 0; >>> ct->zone_src.n_bits = 16; >>> >>> + /* If the datapath supports all-zero SNAT then use it to avoid tuple >>> + * collisions at commit time between NATed and firewalled-only >>> sessions. >>> + */ >>> + >>> + if (ovs_feature_is_supported(OVS_CT_ZERO_SNAT_SUPPORT)) { >>> + size_t nat_offset = ofpacts->size; >>> + ofpbuf_pull(ofpacts, nat_offset); >>> + >>> + struct ofpact_nat *nat = ofpact_put_NAT(ofpacts); >>> + nat->flags = 0; >>> + nat->range_af = AF_UNSPEC; >>> + nat->flags |= NX_NAT_F_SRC; >>> + ofpacts->header = ofpbuf_push_uninit(ofpacts, nat_offset); >>> + ct = ofpacts->header; >>> + } >>> + >>> size_t set_field_offset = ofpacts->size; >>> ofpbuf_pull(ofpacts, set_field_offset); >>> >>> @@ -792,6 +808,21 @@ encode_CT_COMMIT_V2(const struct ovnact_nest *on, >>> ct->zone_src.ofs = 0; >>> ct->zone_src.n_bits = 16; >>> >>> + /* If the datapath supports all-zero SNAT then use it to avoid tuple >>> + * collisions at commit time between NATed and firewalled-only >>> sessions. >>> + */ >>> + if (ovs_feature_is_supported(OVS_CT_ZERO_SNAT_SUPPORT)) { >>> + size_t nat_offset = ofpacts->size; >>> + ofpbuf_pull(ofpacts, nat_offset); >>> + >>> + struct ofpact_nat *nat = ofpact_put_NAT(ofpacts); >>> + nat->flags = 0; >>> + nat->range_af = AF_UNSPEC; >>> + nat->flags |= NX_NAT_F_SRC; >>> + ofpacts->header = ofpbuf_push_uninit(ofpacts, nat_offset); >>> + ct = ofpacts->header; >>> + } >>> + >>> size_t set_field_offset = ofpacts->size; >>> ofpbuf_pull(ofpacts, set_field_offset); >>> >>> diff --git a/tests/ovn.at b/tests/ovn.at >>> index f26894ce4..638aa5010 100644 >>> --- a/tests/ovn.at >>> +++ b/tests/ovn.at >>> @@ -23109,7 +23109,7 @@ AT_CHECK([ >>> for hv in 1 2; do >>> grep table=15 hv${hv}flows | \ >>> grep "priority=100" | \ >>> - grep -c >>> "ct(commit,zone=NXM_NX_REG11\\[[0..15\\]],exec(move:NXM_OF_ETH_SRC\\[[\\]]->NXM_NX_CT_LABEL\\[[32..79\\]],load:0x[[0-9]]->NXM_NX_CT_LABEL\\[[80..95\\]]))" >>> + grep -c >>> "ct(commit,zone=NXM_NX_REG11\\[[0..15\\]],.*exec(move:NXM_OF_ETH_SRC\\[[\\]]->NXM_NX_CT_LABEL\\[[32..79\\]],load:0x[[0-9]]->NXM_NX_CT_LABEL\\[[80..95\\]]))" >>> >>> grep table=22 hv${hv}flows | \ >>> grep "priority=200" | \ >>> diff --git a/tests/system-common-macros.at b/tests/system-common-macros.at >>> index c8fa6f03f..b742a2cb9 100644 >>> --- a/tests/system-common-macros.at >>> +++ b/tests/system-common-macros.at >>> @@ -330,3 +330,7 @@ m4_define([OVS_CHECK_IPROUTE_ENCAP], >>> # OVS_CHECK_CT_CLEAR() >>> m4_define([OVS_CHECK_CT_CLEAR], >>> [AT_SKIP_IF([! grep -q "Datapath supports ct_clear action" >>> ovs-vswitchd.log])]) >>> + >>> +# OVS_CHECK_CT_ZERO_SNAT() >>> +m4_define([OVS_CHECK_CT_ZERO_SNAT], >>> + [AT_SKIP_IF([! grep -q "Datapath supports ct_zero_snat" >>> ovs-vswitchd.log])])) >>> diff --git a/tests/system-ovn.at b/tests/system-ovn.at >>> index 310bd3d5a..1c11f9bab 100644 >>> --- a/tests/system-ovn.at >>> +++ b/tests/system-ovn.at >>> @@ -5296,6 +5296,162 @@ OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port >>> patch-.*/d >>> AT_CLEANUP >>> ]) >>> >>> +OVN_FOR_EACH_NORTHD([ >>> +AT_SETUP([ovn -- load-balancer and firewall tuple conflict IPv4]) >>> +AT_SKIP_IF([test $HAVE_NC = no]) >>> +AT_KEYWORDS([ovnlb]) >>> + >>> +CHECK_CONNTRACK() >>> +CHECK_CONNTRACK_NAT() >>> +ovn_start >>> +OVS_TRAFFIC_VSWITCHD_START() >>> +OVS_CHECK_CT_ZERO_SNAT() >>> +ADD_BR([br-int]) >>> + >>> +# Set external-ids in br-int needed for ovn-controller >>> +ovs-vsctl \ >>> + -- set Open_vSwitch . external-ids:system-id=hv1 \ >>> + -- set Open_vSwitch . >>> external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \ >>> + -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \ >>> + -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \ >>> + -- set bridge br-int fail-mode=secure >>> other-config:disable-in-band=true >>> + >>> +# Start ovn-controller >>> +start_daemon ovn-controller >>> + >>> +# Logical network: >>> +# 1 logical switch connetected to one logical router. >>> +# 2 VMs, one used as backend for a load balancer. >>> + >>> +check ovn-nbctl \ >>> + -- lr-add rtr \ >>> + -- lrp-add rtr rtr-ls 00:00:00:00:01:00 42.42.42.1/24 \ >>> + -- ls-add ls \ >>> + -- lsp-add ls ls-rtr \ >>> + -- lsp-set-addresses ls-rtr 00:00:00:00:01:00 \ >>> + -- lsp-set-type ls-rtr router \ >>> + -- lsp-set-options ls-rtr router-port=rtr-ls \ >>> + -- lsp-add ls vm1 -- lsp-set-addresses vm1 00:00:00:00:00:01 \ >>> + -- lsp-add ls vm2 -- lsp-set-addresses vm2 00:00:00:00:00:02 \ >>> + -- lb-add lb-test 66.66.66.66:666 42.42.42.2:4242 tcp \ >>> + -- ls-lb-add ls lb-test >>> + >>> +ADD_NAMESPACES(vm1) >>> +ADD_VETH(vm1, vm1, br-int, "42.42.42.2/24", "00:00:00:00:00:01", >>> "42.42.42.1") >>> + >>> +ADD_NAMESPACES(vm2) >>> +ADD_VETH(vm2, vm2, br-int, "42.42.42.3/24", "00:00:00:00:00:02", >>> "42.42.42.1") >>> + >>> +# Wait for ovn-controller to catch up. >>> +wait_for_ports_up >>> +check ovn-nbctl --wait=hv sync >>> + >>> +# Start IPv4 TCP server on vm1. >>> +NETNS_DAEMONIZE([vm1], [nc -k -l 42.42.42.2 4242], [nc-vm1.pid]) >>> + >>> +# Make sure connecting to the VIP works. >>> +NS_CHECK_EXEC([vm2], [nc 66.66.66.66 666 -p 2000 -z]) >>> + >>> +# Start IPv4 TCP connection to VIP from vm2. >>> +NETNS_DAEMONIZE([vm2], [nc 66.66.66.66 666 -p 2001], [nc1-vm2.pid]) >>> + >>> +# Check conntrack. >>> +OVS_WAIT_UNTIL([test `ovs-appctl dpctl/dump-conntrack | \ >>> +FORMAT_CT(42.42.42.2) | wc -l` = 1]) >>> + >>> +# Start IPv4 TCP connection to backend IP from vm2 which would require >>> +# additional source port translation to avoid a tuple conflict. >>> +NS_CHECK_EXEC([vm2], [nc 42.42.42.2 4242 -p 2001 -z]) >>> + >>> +# Check conntrack. >>> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(42.42.42.2) | \ >>> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl >>> +tcp,orig=(src=42.42.42.3,dst=42.42.42.2,sport=<cleared>,dport=<cleared>),reply=(src=42.42.42.2,dst=42.42.42.3,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>) >>> +tcp,orig=(src=42.42.42.3,dst=42.42.42.2,sport=<cleared>,dport=<cleared>),reply=(src=42.42.42.2,dst=42.42.42.3,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>) >>> +]) >>> + >>> +AT_CLEANUP >>> +]) >>> + >>> +OVN_FOR_EACH_NORTHD([ >>> +AT_SETUP([ovn -- load-balancer and firewall tuple conflict IPv6]) >>> +AT_SKIP_IF([test $HAVE_NC = no]) >>> +AT_KEYWORDS([ovnlb]) >>> + >>> +CHECK_CONNTRACK() >>> +CHECK_CONNTRACK_NAT() >>> +ovn_start >>> +OVS_TRAFFIC_VSWITCHD_START() >>> +OVS_CHECK_CT_ZERO_SNAT() >>> +ADD_BR([br-int]) >>> + >>> +# Set external-ids in br-int needed for ovn-controller >>> +ovs-vsctl \ >>> + -- set Open_vSwitch . external-ids:system-id=hv1 \ >>> + -- set Open_vSwitch . >>> external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \ >>> + -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \ >>> + -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \ >>> + -- set bridge br-int fail-mode=secure >>> other-config:disable-in-band=true >>> + >>> +# Start ovn-controller >>> +start_daemon ovn-controller >>> + >>> +# Logical network: >>> +# 1 logical switch connetected to one logical router. >>> +# 2 VMs, one used as backend for a load balancer. >>> + >>> +check ovn-nbctl \ >>> + -- lr-add rtr \ >>> + -- lrp-add rtr rtr-ls 00:00:00:00:01:00 4242::1/64 \ >>> + -- ls-add ls \ >>> + -- lsp-add ls ls-rtr \ >>> + -- lsp-set-addresses ls-rtr 00:00:00:00:01:00 \ >>> + -- lsp-set-type ls-rtr router \ >>> + -- lsp-set-options ls-rtr router-port=rtr-ls \ >>> + -- lsp-add ls vm1 -- lsp-set-addresses vm1 00:00:00:00:00:01 \ >>> + -- lsp-add ls vm2 -- lsp-set-addresses vm2 00:00:00:00:00:02 \ >>> + -- lb-add lb-test [[6666::1]]:666 [[4242::2]]:4242 tcp \ >>> + -- ls-lb-add ls lb-test >>> + >>> +ADD_NAMESPACES(vm1) >>> +ADD_VETH(vm1, vm1, br-int, "4242::2/64", "00:00:00:00:00:01", "4242::1") >>> +OVS_WAIT_UNTIL([test "$(ip netns exec vm1 ip a | grep 4242::2 | grep >>> tentative)" = ""]) >>> + >>> +ADD_NAMESPACES(vm2) >>> +ADD_VETH(vm2, vm2, br-int, "4242::3/64", "00:00:00:00:00:02", "4242::1") >>> +OVS_WAIT_UNTIL([test "$(ip netns exec vm2 ip a | grep 4242::3 | grep >>> tentative)" = ""]) >>> + >>> +# Wait for ovn-controller to catch up. >>> +wait_for_ports_up >>> +check ovn-nbctl --wait=hv sync >>> + >>> +# Start IPv6 TCP server on vm1. >>> +NETNS_DAEMONIZE([vm1], [nc -k -l 4242::2 4242], [nc-vm1.pid]) >>> + >>> +# Make sure connecting to the VIP works. >>> +NS_CHECK_EXEC([vm2], [nc 6666::1 666 -p 2000 -z]) >>> + >>> +# Start IPv6 TCP connection to VIP from vm2. >>> +NETNS_DAEMONIZE([vm2], [nc 6666::1 666 -p 2001], [nc1-vm2.pid]) >>> + >>> +# Check conntrack. >>> +OVS_WAIT_UNTIL([test `ovs-appctl dpctl/dump-conntrack | \ >>> +FORMAT_CT(4242::2) | wc -l` = 1]) >>> + >>> +# Start IPv6 TCP connection to backend IP from vm2 which would require >>> +# additional source port translation to avoid a tuple conflict. >>> +NS_CHECK_EXEC([vm2], [nc 4242::2 4242 -p 2001 -z]) >>> + >>> +# Check conntrack. >>> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(4242::2) | \ >>> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl >>> +tcp,orig=(src=4242::3,dst=4242::2,sport=<cleared>,dport=<cleared>),reply=(src=4242::2,dst=4242::3,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>) >>> +tcp,orig=(src=4242::3,dst=4242::2,sport=<cleared>,dport=<cleared>),reply=(src=4242::2,dst=4242::3,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>) >> >> FORMAT_CT() obfuscates the ports. Is there anyway to confirm that they >> are being translated as expected? >> > > Thanks for the review! Would the following incremental work for you? >
This looks fine. Could you add a comment above the check specifying what it is checking for and that the two entries are in different zones? Thanks > diff --git a/tests/system-ovn.at b/tests/system-ovn.at > index 1c11f9bab..849a11d1b 100644 > --- a/tests/system-ovn.at > +++ b/tests/system-ovn.at > @@ -5364,10 +5364,14 @@ FORMAT_CT(42.42.42.2) | wc -l` = 1]) > NS_CHECK_EXEC([vm2], [nc 42.42.42.2 4242 -p 2001 -z]) > > # Check conntrack. > -AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(42.42.42.2) | \ > -sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl > -tcp,orig=(src=42.42.42.3,dst=42.42.42.2,sport=<cleared>,dport=<cleared>),reply=(src=42.42.42.2,dst=42.42.42.3,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>) > -tcp,orig=(src=42.42.42.3,dst=42.42.42.2,sport=<cleared>,dport=<cleared>),reply=(src=42.42.42.2,dst=42.42.42.3,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>) > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep 2001 | \ > +grep "orig=.src=42\.42\.42\.3,dst=42\.42\.42\.2," | \ > +sed -e 's/port=2001/port=<clnt_s_port>/g' \ > + -e 's/sport=4242,dport=[[0-9]]\+/sport=4242,dport=<rnd_port>/g' \ > + -e 's/state=[[0-9_A-Z]]*/state=<cleared>/g' \ > + -e 's/zone=[[0-9]]*/zone=<cleared>/' | sort], [0], [dnl > +tcp,orig=(src=42.42.42.3,dst=42.42.42.2,sport=<clnt_s_port>,dport=4242),reply=(src=42.42.42.2,dst=42.42.42.3,sport=4242,dport=<clnt_s_port>),zone=<cleared>,protoinfo=(state=<cleared>) > +tcp,orig=(src=42.42.42.3,dst=42.42.42.2,sport=<clnt_s_port>,dport=4242),reply=(src=42.42.42.2,dst=42.42.42.3,sport=4242,dport=<rnd_port>),zone=<cleared>,protoinfo=(state=<cleared>) > ]) > > AT_CLEANUP > @@ -5443,10 +5447,14 @@ FORMAT_CT(4242::2) | wc -l` = 1]) > NS_CHECK_EXEC([vm2], [nc 4242::2 4242 -p 2001 -z]) > > # Check conntrack. > -AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(4242::2) | \ > -sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl > -tcp,orig=(src=4242::3,dst=4242::2,sport=<cleared>,dport=<cleared>),reply=(src=4242::2,dst=4242::3,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>) > -tcp,orig=(src=4242::3,dst=4242::2,sport=<cleared>,dport=<cleared>),reply=(src=4242::2,dst=4242::3,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>) > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep 2001 | \ > +grep "orig=.src=4242::3,dst=4242::2," | \ > +sed -e 's/port=2001/port=<clnt_s_port>/g' \ > + -e 's/sport=4242,dport=[[0-9]]\+/sport=4242,dport=<rnd_port>/g' \ > + -e 's/state=[[0-9_A-Z]]*/state=<cleared>/g' \ > + -e 's/zone=[[0-9]]*/zone=<cleared>/' | sort], [0], [dnl > +tcp,orig=(src=4242::3,dst=4242::2,sport=<clnt_s_port>,dport=4242),reply=(src=4242::2,dst=4242::3,sport=4242,dport=<clnt_s_port>),zone=<cleared>,protoinfo=(state=<cleared>) > +tcp,orig=(src=4242::3,dst=4242::2,sport=<clnt_s_port>,dport=4242),reply=(src=4242::2,dst=4242::3,sport=4242,dport=<rnd_port>),zone=<cleared>,protoinfo=(state=<cleared>) > ]) > > AT_CLEANUP > _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
