From: Numan Siddique <[email protected]>
When there are ECMP symmetric static routes configured, OVN selects
one of the next hop for the traffic originated from within the
cluster. For the subsequent packets to the same destination,
OVN may select a different next hop (which is fine). But there can
be certain usecases, where the next hop entity can be stateful and
selecting the same next hop is desirable.
This patch address this usecase in the following way
1. For the first packet originating from the OVN logical port
VIF, OVN selects a next hop 'A' and forwards the traffic to
it.
2. When the reply traffic is received (either from next hop 'A'
or any other next hop), it commits the connection in the
DNAT zone of the logical router and saves the state in
ct_label.ecmp_reply_eth and ct_label.ecmp_reply_port.
Note that we already support this for the traffic
originating from an ECMP route [1]. We are now extending
the same for the traffic originating from the cluster towards
the ECMP route.
3. For the subsequent packets from the cluster, we select the
next hop eth address and the port from the saved conntrack
state. This is straightforward as we anyway send the packet
to the DNAT zone of the logical router.
Example: If a logical router lr0 is configured with the below
EMCP static routes
ovn-nbctl lr-route-list lr0
IPv4 Routes
Route Table <main>:
0.0.0.0/0 172.20.0.1 dst-ip ecmp
0.0.0.0/0 172.20.0.2 dst-ip ecmp
ecmp-symmetric-reply
Before this patch, we were adding the below logical flows in the router
pipeline for the ECMP route handling:
-------------
table=10(lr_in_ecmp_stateful), priority=100 , match=(inport == "lr0-public" &&
ip4.src == 0.0.0.0/0 && !ct.rpl && (ct.new || ct.est)), action=(ct_commit {
ct_label.ecmp_reply_eth = eth.src; ct_mark.ecmp_reply_port = 3;}; next;)
table=14(lr_in_ip_routing ), priority=10300, match=(ct.rpl &&
ct_mark.ecmp_reply_port == 3 && reg7 == 0 && ip4.dst == 0.0.0.0/0),
action=(ip.ttl--; flags.loopback = 1; eth.src = 00:11:22:00:ff:01; reg1 =
172.20.0.100; outport = "lr0-public"; next;)
table=16(lr_in_policy ), priority=65535, match=(ct.rpl &&
ct_mark.ecmp_reply_port == 3), action=(next;)
table=20(lr_in_arp_resolve ), priority=200 , match=(ct.rpl &&
ct_mark.ecmp_reply_port == 3), action=(push(xxreg1); xxreg1 = ct_label; eth.dst
= xxreg1[32..79]; pop(xxreg1); next;)
-------------
After this patch, we add the below logical flows:
--------------
table=10(lr_in_ecmp_stateful), priority=100 , match=(inport == "lr0-public" &&
ip4.src == 0.0.0.0/0 && (ct.new || ct.est)), action=(ct_commit {
ct_label.ecmp_reply_eth = eth.src; ct_mark.ecmp_reply_port = 3;}; next;)
table=14(lr_in_ip_routing ), priority=10300, match=(ct_mark.ecmp_reply_port
== 3 && reg7 == 0 && ip4.dst == 0.0.0.0/0), action=(ip.ttl--; flags.loopback =
1; eth.src = 00:11:22:00:ff:01; reg1 = 172.20.0.100; outport = "lr0-public";
next;)
table=16(lr_in_policy ), priority=65535, match=(ct_mark.ecmp_reply_port
== 3), action=(next;)
table=20(lr_in_arp_resolve ), priority=200 , match=(ct_mark.ecmp_reply_port
== 3), action=(push(xxreg1); xxreg1 = ct_label; eth.dst = xxreg1[32..79];
pop(xxreg1); next;)
--------------
[1] - 4fdca656857d ("Add ECMP symmetric replies.")
Reported-at: https://issues.redhat.com/browse/FDP-628
Signed-off-by: Numan Siddique <[email protected]>
---
northd/northd.c | 4 ++--
tests/ovn-northd.at | 26 +++++++++++++++++++++-----
2 files changed, 23 insertions(+), 7 deletions(-)
diff --git a/northd/northd.c b/northd/northd.c
index 6898daa00d..3227c093cb 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -10706,7 +10706,7 @@ add_ecmp_symmetric_reply_flows(struct lflow_table
*lflows,
* NOTE: we purposely are not clearing match before this
* ds_put_cstr() call. The previous contents are needed.
*/
- ds_put_cstr(&match, " && !ct.rpl && (ct.new || ct.est)");
+ ds_put_cstr(&match, " && (ct.new || ct.est)");
ds_put_format(&actions,
"ct_commit { ct_label.ecmp_reply_eth = eth.src; "
" %s = %" PRId64 ";}; "
@@ -10721,7 +10721,7 @@ add_ecmp_symmetric_reply_flows(struct lflow_table
*lflows,
* for where to route the packet.
*/
ds_put_format(&ecmp_reply,
- "ct.rpl && %s == %"PRId64,
+ "%s == %"PRId64,
ct_ecmp_reply_port_match, out_port->sb->tunnel_key);
ds_clear(&match);
ds_put_format(&match, "%s && %s", ds_cstr(&ecmp_reply),
diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
index a389d19886..98a5006cb5 100644
--- a/tests/ovn-northd.at
+++ b/tests/ovn-northd.at
@@ -6722,8 +6722,14 @@ check ovn-nbctl --wait=sb --ecmp-symmetric-reply
lr-route-add lr0 1.0.0.1 192.16
ovn-sbctl dump-flows lr0 > lr0flows
-AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows |sort], [0], [dnl
+AT_CHECK([grep -w "lr_in_ip_routing" lr0flows | ovn_strip_lflows], [0], [dnl
+ table=??(lr_in_ip_routing ), priority=0 , match=(1), action=(drop;)
+ table=??(lr_in_ip_routing ), priority=10550, match=(nd_rs || nd_ra),
action=(drop;)
+ table=??(lr_in_ip_routing ), priority=194 , match=(inport == "lr0-public"
&& ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 =
ip6.dst; xxreg1 = fe80::200:20ff:fe20:1213; eth.src = 00:00:20:20:12:13;
outport = "lr0-public"; flags.loopback = 1; next;)
+ table=??(lr_in_ip_routing ), priority=74 , match=(ip4.dst ==
192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 =
192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public";
flags.loopback = 1; next;)
+ table=??(lr_in_ip_routing ), priority=97 , match=(reg7 == 0 && ip4.dst
== 1.0.0.1/32), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1
= 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public";
flags.loopback = 1; next;)
])
+
AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | ovn_strip_lflows], [0],
[dnl
table=??(lr_in_ip_routing_ecmp), priority=0 , match=(1), action=(drop;)
table=??(lr_in_ip_routing_ecmp), priority=150 , match=(reg8[[0..15]] == 0),
action=(next;)
@@ -6732,7 +6738,12 @@ AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows |
ovn_strip_lflows], [0], [dn
check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1
192.168.0.20
ovn-sbctl dump-flows lr0 > lr0flows
-AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows | ovn_strip_lflows],
[0], [dnl
+AT_CHECK([grep -w "lr_in_ip_routing" lr0flows | ovn_strip_lflows], [0], [dnl
+ table=??(lr_in_ip_routing ), priority=0 , match=(1), action=(drop;)
+ table=??(lr_in_ip_routing ), priority=10300,
match=(ct_label.ecmp_reply_port == 1 && reg7 == 0 && ip4.dst == 1.0.0.1/32),
action=(ip.ttl--; flags.loopback = 1; eth.src = 00:00:20:20:12:13; reg1 =
192.168.0.1; outport = "lr0-public"; next;)
+ table=??(lr_in_ip_routing ), priority=10550, match=(nd_rs || nd_ra),
action=(drop;)
+ table=??(lr_in_ip_routing ), priority=194 , match=(inport == "lr0-public"
&& ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 =
ip6.dst; xxreg1 = fe80::200:20ff:fe20:1213; eth.src = 00:00:20:20:12:13;
outport = "lr0-public"; flags.loopback = 1; next;)
+ table=??(lr_in_ip_routing ), priority=74 , match=(ip4.dst ==
192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 =
192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public";
flags.loopback = 1; next;)
table=??(lr_in_ip_routing ), priority=97 , match=(reg7 == 0 && ip4.dst
== 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1;
reg8[[16..31]] = select(1, 2);)
])
AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed
's/192\.168\.0\..0/192.168.0.??/' | ovn_strip_lflows], [0], [dnl
@@ -6751,7 +6762,7 @@ AT_CHECK([grep -e "lr_in_defrag" lr0flows |
ovn_strip_lflows], [0], [dnl
dnl The chassis was created with other_config:ct-no-masked-label=false, the
flows
dnl should be using ct_label.ecmp_reply_port.
AT_CHECK([grep -e "lr_in_arp_resolve.*ecmp" lr0flows | ovn_strip_lflows], [0],
[dnl
- table=??(lr_in_arp_resolve ), priority=200 , match=(ct.rpl &&
ct_label.ecmp_reply_port == 1), action=(push(xxreg1); xxreg1 = ct_label;
eth.dst = xxreg1[[32..79]]; pop(xxreg1); next;)
+ table=??(lr_in_arp_resolve ), priority=200 ,
match=(ct_label.ecmp_reply_port == 1), action=(push(xxreg1); xxreg1 = ct_label;
eth.dst = xxreg1[[32..79]]; pop(xxreg1); next;)
])
dnl Simulate an ovn-controller upgrade to a version that supports
@@ -6761,14 +6772,19 @@ check ovn-sbctl set chassis ch1
other_config:ct-no-masked-label=true
check ovn-nbctl --wait=sb sync
ovn-sbctl dump-flows lr0 > lr0flows
AT_CHECK([grep -e "lr_in_arp_resolve.*ecmp" lr0flows | ovn_strip_lflows], [0],
[dnl
- table=??(lr_in_arp_resolve ), priority=200 , match=(ct.rpl &&
ct_mark.ecmp_reply_port == 1), action=(push(xxreg1); xxreg1 = ct_label; eth.dst
= xxreg1[[32..79]]; pop(xxreg1); next;)
+ table=??(lr_in_arp_resolve ), priority=200 ,
match=(ct_mark.ecmp_reply_port == 1), action=(push(xxreg1); xxreg1 = ct_label;
eth.dst = xxreg1[[32..79]]; pop(xxreg1); next;)
])
# add ecmp route with wrong nexthop
check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1
192.168.1.20
ovn-sbctl dump-flows lr0 > lr0flows
-AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows | ovn_strip_lflows],
[0], [dnl
+AT_CHECK([grep -w "lr_in_ip_routing" lr0flows | ovn_strip_lflows], [0], [dnl
+ table=??(lr_in_ip_routing ), priority=0 , match=(1), action=(drop;)
+ table=??(lr_in_ip_routing ), priority=10300,
match=(ct_mark.ecmp_reply_port == 1 && reg7 == 0 && ip4.dst == 1.0.0.1/32),
action=(ip.ttl--; flags.loopback = 1; eth.src = 00:00:20:20:12:13; reg1 =
192.168.0.1; outport = "lr0-public"; next;)
+ table=??(lr_in_ip_routing ), priority=10550, match=(nd_rs || nd_ra),
action=(drop;)
+ table=??(lr_in_ip_routing ), priority=194 , match=(inport == "lr0-public"
&& ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 =
ip6.dst; xxreg1 = fe80::200:20ff:fe20:1213; eth.src = 00:00:20:20:12:13;
outport = "lr0-public"; flags.loopback = 1; next;)
+ table=??(lr_in_ip_routing ), priority=74 , match=(ip4.dst ==
192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 =
192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public";
flags.loopback = 1; next;)
table=??(lr_in_ip_routing ), priority=97 , match=(reg7 == 0 && ip4.dst
== 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1;
reg8[[16..31]] = select(1, 2);)
])
AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed
's/192\.168\.0\..0/192.168.0.??/' | ovn_strip_lflows], [0], [dnl
--
2.45.2
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev