Rely on the following new actions for ecmp-symmetric routing: - chk_ecmp_nh_mac - chk_ecmp_nh - commit_ecmp_nh
In this way OVN is able to keep up if for any reason the ECMP traffic source changes L2 address and keeps old L3 addresses. Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2096233 Signed-off-by: Lorenzo Bianconi <[email protected]> --- northd/northd.c | 46 +++++++++++++++++++++++++++--------- northd/ovn-northd.8.xml | 25 ++++++++++++-------- tests/ovn-northd.at | 4 ++-- tests/ovn.at | 4 ++-- tests/system-ovn.at | 52 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 106 insertions(+), 25 deletions(-) diff --git a/northd/northd.c b/northd/northd.c index d31cb1688..f271b4882 100644 --- a/northd/northd.c +++ b/northd/northd.c @@ -227,6 +227,7 @@ enum ovn_stage { #define REGBIT_LOOKUP_NEIGHBOR_RESULT "reg9[2]" #define REGBIT_LOOKUP_NEIGHBOR_IP_RESULT "reg9[3]" #define REGBIT_DST_NAT_IP_LOCAL "reg9[4]" +#define REGBIT_KNOWN_ECMP_NH "reg9[5]" /* Register to store the eth address associated to a router port for packets * received in S_ROUTER_IN_ADMISSION. @@ -327,7 +328,8 @@ enum ovn_stage { * | | EGRESS_LOOPBACK/ | G | UNUSED | * | R9 | PKT_LARGER/ | 4 | | * | | LOOKUP_NEIGHBOR_RESULT/| | | - * | | SKIP_LOOKUP_NEIGHBOR} | | | + * | | SKIP_LOOKUP_NEIGHBOR/ | | | + * | | KNOWN_ECMP_NH} | | | * | | | | | * | | REG_ORIG_TP_DPORT_ROUTER | | | * | | | | | @@ -9437,6 +9439,7 @@ add_ecmp_symmetric_reply_flows(struct hmap *lflows, struct ds *route_match) { const struct nbrec_logical_router_static_route *st_route = route->route; + struct ds base_match = DS_EMPTY_INITIALIZER; struct ds match = DS_EMPTY_INITIALIZER; struct ds actions = DS_EMPTY_INITIALIZER; struct ds ecmp_reply = DS_EMPTY_INITIALIZER; @@ -9448,20 +9451,22 @@ add_ecmp_symmetric_reply_flows(struct hmap *lflows, /* If symmetric ECMP replies are enabled, then packets that arrive over * an ECMP route need to go through conntrack. */ - ds_put_format(&match, "inport == %s && ip%s.%s == %s", + ds_put_format(&base_match, "inport == %s && ip%s.%s == %s", out_port->json_key, IN6_IS_ADDR_V4MAPPED(&route->prefix) ? "4" : "6", route->is_src_route ? "dst" : "src", cidr); free(cidr); ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DEFRAG, 100, - ds_cstr(&match), "ct_next;", - &st_route->header_); + ds_cstr(&base_match), + REGBIT_KNOWN_ECMP_NH" = chk_ecmp_nh_mac(); ct_next;", + &st_route->header_); /* And packets that go out over an ECMP route need conntrack */ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DEFRAG, 100, - ds_cstr(route_match), "ct_next;", - &st_route->header_); + ds_cstr(route_match), + REGBIT_KNOWN_ECMP_NH" = chk_ecmp_nh(); ct_next;", + &st_route->header_); /* Save src eth and inport in ct_label for packets that arrive over * an ECMP route. @@ -9469,11 +9474,28 @@ add_ecmp_symmetric_reply_flows(struct hmap *lflows, * NOTE: we purposely are not clearing match before this * ds_put_cstr() call. The previous contents are needed. */ - ds_put_cstr(&match, " && (ct.new && !ct.est)"); + ds_put_format(&match, "%s && (ct.new && !ct.est)", ds_cstr(&base_match)); + ds_put_format(&actions, + "ct_commit { ct_label.ecmp_reply_eth = eth.src; " + " %s = %" PRId64 ";}; " + "commit_ecmp_nh(ipv6 = %s;); next;", + ct_ecmp_reply_port_match, out_port->sb->tunnel_key, + IN6_IS_ADDR_V4MAPPED(&route->prefix) ? "false" : "true"); + ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_ECMP_STATEFUL, 100, + ds_cstr(&match), ds_cstr(&actions), + &st_route->header_); - ds_put_format(&actions, "ct_commit { ct_label.ecmp_reply_eth = eth.src;" - " %s = %" PRId64 ";}; next;", - ct_ecmp_reply_port_match, out_port->sb->tunnel_key); + ds_clear(&match); + ds_put_format(&match, + "%s && (!ct.rpl && ct.est) && "REGBIT_KNOWN_ECMP_NH" == 0", + ds_cstr(&base_match)); + ds_clear(&actions); + ds_put_format(&actions, + "ct_commit { ct_label.ecmp_reply_eth = eth.src; " + " %s = %" PRId64 ";}; " + "commit_ecmp_nh(ipv6 = %s;); next;", + ct_ecmp_reply_port_match, out_port->sb->tunnel_key, + IN6_IS_ADDR_V4MAPPED(&route->prefix) ? "false" : "true"); ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_ECMP_STATEFUL, 100, ds_cstr(&match), ds_cstr(&actions), &st_route->header_); @@ -9481,7 +9503,8 @@ add_ecmp_symmetric_reply_flows(struct hmap *lflows, /* Bypass ECMP selection if we already have ct_label information * for where to route the packet. */ - ds_put_format(&ecmp_reply, "ct.rpl && %s == %"PRId64, + ds_put_format(&ecmp_reply, + "ct.rpl && "REGBIT_KNOWN_ECMP_NH" == 1 && %s == %"PRId64, ct_ecmp_reply_port_match, out_port->sb->tunnel_key); ds_clear(&match); ds_put_format(&match, "%s && %s", ds_cstr(&ecmp_reply), @@ -9517,6 +9540,7 @@ add_ecmp_symmetric_reply_flows(struct hmap *lflows, 200, ds_cstr(&ecmp_reply), action, &st_route->header_); + ds_destroy(&base_match); ds_destroy(&match); ds_destroy(&actions); ds_destroy(&ecmp_reply); diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml index a87df24bd..aa7196a92 100644 --- a/northd/ovn-northd.8.xml +++ b/northd/ovn-northd.8.xml @@ -3166,8 +3166,12 @@ icmp6 { configured. The matching logic for these ports essentially reverses the configured logic of the ECMP route. So for instance, a route with a destination routing policy will instead match if the source IP address - matches the static route's prefix. The flow uses the action - <code>ct_next</code> to send IP packets to the connection tracker for + matches the static route's prefix. The flow uses the actions + <code>chk_ecmp_nh_mac(); ct_next</code> or + <code>chk_ecmp_nh(); ct_next</code> to send IP packets to table + <code>OFTABLE_ECMP_NH_MAC</code> or to table + <code>OFTABLE_ECMP_NH</code> in order to check if source info are already + stored by OVN and then to the connection tracker for packet de-fragmentation and tracking before sending it to the next table. </p> @@ -3426,10 +3430,11 @@ icmp6 { route with a destination routing policy will instead match if the source IP address matches the static route's prefix. The flow uses the action <code>ct_commit { ct_label.ecmp_reply_eth = eth.src;" - " ct_mark.ecmp_reply_port = <var>K</var>;}; next; </code> to commit - the connection and storing <code>eth.src</code> and the ECMP - reply port binding tunnel key <var>K</var> in the - <code>ct_label</code>. + " ct_mark.ecmp_reply_port = <var>K</var>;}; commit_ecmp_nh(); next; + </code> to commit the connection and storing <code>eth.src</code> and + the ECMP reply port binding tunnel key <var>K</var> in the + <code>ct_label</code> and the traffic pattern to table + <code>OFTABLE_ECMP_NH_MAC</code> or <code>OFTABLE_ECMP_NH</code>. </li> </ul> @@ -3568,10 +3573,10 @@ output; which the packet should be sent. The <code>ct_mark.ecmp_reply_port</code> tells the logical router port on which the packet should be sent. These values saved to the conntrack fields when the initial ingress traffic is - received over the ECMP route and committed to conntrack. The - priority-10300 flows in this stage set the <code>outport</code>, - while the <code>eth.dst</code> is set by flows at the ARP/ND Resolution - stage. + received over the ECMP route and committed to conntrack. + If <code>REGBIT_KNOWN_ECMP_NH</code> is set, the priority-10300 + flows in this stage set the <code>outport</code>, while the + <code>eth.dst</code> is set by flows at the ARP/ND Resolution stage. </p> <p> diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at index 610a5ce12..c153a338c 100644 --- a/tests/ovn-northd.at +++ b/tests/ovn-northd.at @@ -5647,7 +5647,7 @@ AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed 's/192\.168\.0\..0/192. dnl The chassis was created with other_config:ct-no-masked-label=false, the flows dnl should be using ct_label.ecmp_reply_port. AT_CHECK([grep -e "lr_in_arp_resolve.*ecmp" lr0flows | sed 's/table=../table=??/'], [0], [dnl - table=??(lr_in_arp_resolve ), priority=200 , match=(ct.rpl && ct_label.ecmp_reply_port == 1), action=(push(xxreg1); xxreg1 = ct_label; eth.dst = xxreg1[[32..79]]; pop(xxreg1); next;) + table=??(lr_in_arp_resolve ), priority=200 , match=(ct.rpl && reg9[[5]] == 1 && ct_label.ecmp_reply_port == 1), action=(push(xxreg1); xxreg1 = ct_label; eth.dst = xxreg1[[32..79]]; pop(xxreg1); next;) ]) dnl Simulate an ovn-controller upgrade to a version that supports @@ -5657,7 +5657,7 @@ check ovn-sbctl set chassis ch1 other_config:ct-no-masked-label=true check ovn-nbctl --wait=sb sync ovn-sbctl dump-flows lr0 > lr0flows AT_CHECK([grep -e "lr_in_arp_resolve.*ecmp" lr0flows | sed 's/table=../table=??/'], [0], [dnl - table=??(lr_in_arp_resolve ), priority=200 , match=(ct.rpl && ct_mark.ecmp_reply_port == 1), action=(push(xxreg1); xxreg1 = ct_label; eth.dst = xxreg1[[32..79]]; pop(xxreg1); next;) + table=??(lr_in_arp_resolve ), priority=200 , match=(ct.rpl && reg9[[5]] == 1 && ct_mark.ecmp_reply_port == 1), action=(push(xxreg1); xxreg1 = ct_label; eth.dst = xxreg1[[32..79]]; pop(xxreg1); next;) ]) # add ecmp route with wrong nexthop diff --git a/tests/ovn.at b/tests/ovn.at index 6e35bb5b8..f180b1814 100644 --- a/tests/ovn.at +++ b/tests/ovn.at @@ -27113,7 +27113,7 @@ AT_CHECK([ grep "priority=200" | \ grep -c "move:NXM_NX_CT_LABEL\\[[\\]]->NXM_NX_XXREG1\\[[\\]],move:NXM_NX_XXREG1\\[[32..79\\]]->NXM_OF_ETH_DST" done; :], [0], [dnl -1 +2 1 0 0 @@ -27238,7 +27238,7 @@ AT_CHECK([ grep "priority=200" | \ grep -c "move:NXM_NX_CT_LABEL\\[[\\]]->NXM_NX_XXREG1\\[[\\]],move:NXM_NX_XXREG1\\[[32..79\\]]->NXM_OF_ETH_DST" done; :], [0], [dnl -1 +2 1 0 0 diff --git a/tests/system-ovn.at b/tests/system-ovn.at index 1cabf1f31..7c7063a38 100644 --- a/tests/system-ovn.at +++ b/tests/system-ovn.at @@ -5981,6 +5981,32 @@ AT_CHECK([ovs-appctl dpctl/dump-flows | grep 'ct_state(-new+est+rpl+trk).*ct_lab 1 ]) +# Flush conntrack entries for easier output parsing of next test. +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) + +# Change bob1 L2 address anche check the reply is properly updated. +ovn-nbctl set Logical_Router_Port R2_ext mac='"00:00:10:01:02:04"' +ovn-nbctl set Logical_Switch_Port r2-ext \ + type=router options:router-port=R2_ext addresses='"00:00:10:01:02:04"' + +NS_CHECK_EXEC([bob1], [ping -q -c 20 -i 0.3 -w 15 10.0.0.2 | FORMAT_PING], \ +[0], [dnl +20 packets transmitted, 20 received, 0% packet loss, time 0ms +]) + +AT_CHECK([ovs-appctl dpctl/dump-flows | grep 'ct_state(+new-est-rpl+trk).*ct(.*label=0x1001020400000000/.*)' -c], [0], [dnl +1 +]) +AT_CHECK([ovs-appctl dpctl/dump-flows | grep 'ct_state(-new+est+rpl+trk).*ct_label(0x1001020400000000)' -c], [0], [dnl +1 +]) + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep 0x1001020400000000 | FORMAT_CT(172.16.0.1) | \ +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | +sed -e 's/mark=[[0-9]]*/mark=<cleared>/'], [0], [dnl +icmp,orig=(src=172.16.0.1,dst=10.0.0.2,id=<cleared>,type=8,code=0),reply=(src=10.0.0.2,dst=172.16.0.1,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0x1001020400000000 +]) + ovs-ofctl dump-flows br-int OVS_APP_EXIT_AND_WAIT([ovn-controller]) @@ -6148,6 +6174,32 @@ sed -e 's/mark=[[0-9]]*/mark=<cleared>/'], [0], [dnl icmpv6,orig=(src=fd07::1,dst=fd01::2,id=<cleared>,type=128,code=0),reply=(src=fd01::2,dst=fd07::1,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0x401020400000000 ]) +# Flush conntrack entries for easier output parsing of next test. +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) + +# Change bob1 L2 address anche check the reply is properly updated. +ovn-nbctl set Logical_Router_Port R2_ext mac='"00:00:10:01:02:04"' +ovn-nbctl set Logical_Switch_Port r2-ext \ + type=router options:router-port=R2_ext addresses='"00:00:10:01:02:04"' + +NS_CHECK_EXEC([bob1], [ping -q -c 20 -i 0.3 -w 15 fd01::2 | FORMAT_PING], \ +[0], [dnl +20 packets transmitted, 20 received, 0% packet loss, time 0ms +]) + +AT_CHECK([ovs-appctl dpctl/dump-flows | grep 'ct_state(+new-est-rpl+trk).*ct(.*label=0x1001020400000000/.*)' -c], [0], [dnl +1 +]) +AT_CHECK([ovs-appctl dpctl/dump-flows | grep 'ct_state(-new+est+rpl+trk).*ct_label(0x1001020400000000)' -c], [0], [dnl +1 +]) + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep 0x1001020400000000 | FORMAT_CT(fd01::2) | \ +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | +sed -e 's/mark=[[0-9]]*/mark=<cleared>/'], [0], [dnl +icmpv6,orig=(src=fd07::1,dst=fd01::2,id=<cleared>,type=128,code=0),reply=(src=fd01::2,dst=fd07::1,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0x1001020400000000 +]) + ovs-ofctl dump-flows br-int OVS_APP_EXIT_AND_WAIT([ovn-controller]) -- 2.36.1 _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
