The mechanism for refreshing mac binding entries will send out an ARP/ND
request using the logical port's mac address, regardless of whether the
port is local to a given chassis.
For gateway ports this will make a given LRP incorrectly appear to be
claimed on the chassis that sends out the request, thereby confusing
physical switches and other gateway chassis, which ultimately results in
broken connectivity for that LRP.
In particular, when the chassis that has actually claimed the LRP, sees
the request from the other chassis with the mac address from its LRP,
the OVS bridge that provides the external connectivity will incorrectly
relearn the LRP's mac address on the port that is connected to the
physical network. This causes it to drop all further traffic destined
to the LRP until egress traffic coming from the LRP causes it to move
the mac back to the correct port or until the FDB entry times out.
Add a check to verify that a logical port from a mac binding is local to
a given chassis before trying to refresh it.
Fixes: 1e4d4409f391 ("controller: Send ARP/ND for stale mac_bindings entries.")
Signed-off-by: Felix Moebius <[email protected]>
---
v2:
- Removed handler specific parameters in statctrl
- Avoided double lookup for port binding + moved the locality check to
a later point to avoid unnecessary lookups
v3:
- Added test
- Split refactor into separate commit
---
controller/lport.c | 19 ++++++---
controller/lport.h | 3 ++
controller/mac-cache.c | 8 ++++
controller/mac-cache.h | 1 +
controller/ovn-controller.c | 2 +-
controller/statctrl.c | 2 +
controller/statctrl.h | 1 +
tests/ovn.at | 83 +++++++++++++++++++++++++++++++++++++
8 files changed, 112 insertions(+), 7 deletions(-)
diff --git a/controller/lport.c b/controller/lport.c
index b30bcd398..9cb7308ce 100644
--- a/controller/lport.c
+++ b/controller/lport.c
@@ -89,13 +89,10 @@ lport_is_chassis_resident(struct ovsdb_idl_index
*sbrec_port_binding_by_name,
}
bool
-lport_is_local(struct ovsdb_idl_index *sbrec_port_binding_by_name,
- const struct sbrec_chassis *chassis,
- const char *port_name)
+lport_pb_is_local(struct ovsdb_idl_index *sbrec_port_binding_by_name,
+ const struct sbrec_chassis *chassis,
+ const struct sbrec_port_binding *pb)
{
- const struct sbrec_port_binding *pb = lport_lookup_by_name(
- sbrec_port_binding_by_name, port_name);
-
if (!pb) {
return false;
}
@@ -115,6 +112,16 @@ lport_is_local(struct ovsdb_idl_index
*sbrec_port_binding_by_name,
return lport_pb_is_chassis_resident(chassis, cr_pb);
}
+bool
+lport_is_local(struct ovsdb_idl_index *sbrec_port_binding_by_name,
+ const struct sbrec_chassis *chassis,
+ const char *port_name)
+{
+ const struct sbrec_port_binding *pb = lport_lookup_by_name(
+ sbrec_port_binding_by_name, port_name);
+ return lport_pb_is_local(sbrec_port_binding_by_name, chassis, pb);
+}
+
const struct sbrec_port_binding *
lport_get_peer(const struct sbrec_port_binding *pb,
struct ovsdb_idl_index *sbrec_port_binding_by_name)
diff --git a/controller/lport.h b/controller/lport.h
index 6d48301d2..4465c4242 100644
--- a/controller/lport.h
+++ b/controller/lport.h
@@ -69,6 +69,9 @@ bool lport_pb_is_chassis_resident(const struct sbrec_chassis
*chassis,
bool lport_is_local(struct ovsdb_idl_index *sbrec_port_binding_by_name,
const struct sbrec_chassis *chassis,
const char *port_name);
+bool lport_pb_is_local(struct ovsdb_idl_index *sbrec_port_binding_by_name,
+ const struct sbrec_chassis *chassis,
+ const struct sbrec_port_binding *pb);
const struct sbrec_port_binding *lport_get_peer(
const struct sbrec_port_binding *,
struct ovsdb_idl_index *sbrec_port_binding_by_name);
diff --git a/controller/mac-cache.c b/controller/mac-cache.c
index a03581b3f..bdf35eeb7 100644
--- a/controller/mac-cache.c
+++ b/controller/mac-cache.c
@@ -907,6 +907,14 @@ mac_binding_probe_stats_run(struct vector *stats_vec,
uint64_t *req_delay,
continue;
}
+ if (!lport_pb_is_local(probe_data->sbrec_port_binding_by_name,
+ probe_data->chassis, pb)) {
+ mac_binding_update_log("Not sending ARP/ND request for non-local",
+ &mb->data, true, threshold,
+ stats->idle_age_ms, since_updated_ms);
+ continue;
+ }
+
struct lport_addresses laddr;
if (!extract_lsp_addresses(pb->mac[0], &laddr)) {
continue;
diff --git a/controller/mac-cache.h b/controller/mac-cache.h
index e75c49820..7edb129d7 100644
--- a/controller/mac-cache.h
+++ b/controller/mac-cache.h
@@ -67,6 +67,7 @@ struct mac_binding_probe_data {
struct mac_cache_data *cache_data;
struct rconn *swconn;
struct ovsdb_idl_index *sbrec_port_binding_by_name;
+ const struct sbrec_chassis *chassis;
};
struct mac_binding {
diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index 4353f6094..f57618273 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -7843,7 +7843,7 @@ main(int argc, char *argv[])
mac_cache_data = engine_get_data(&en_mac_cache);
if (mac_cache_data) {
statctrl_run(ovnsb_idl_txn, sbrec_port_binding_by_name,
- mac_cache_data);
+ chassis, mac_cache_data);
}
ofctrl_seqno_update_create(
diff --git a/controller/statctrl.c b/controller/statctrl.c
index 3ad135120..f9f0a30f1 100644
--- a/controller/statctrl.c
+++ b/controller/statctrl.c
@@ -172,6 +172,7 @@ statctrl_init(void)
void
statctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
struct ovsdb_idl_index *sbrec_port_binding_by_name,
+ const struct sbrec_chassis *chassis,
struct mac_cache_data *mac_cache_data)
{
if (!ovnsb_idl_txn) {
@@ -182,6 +183,7 @@ statctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
.cache_data = mac_cache_data,
.sbrec_port_binding_by_name = sbrec_port_binding_by_name,
.swconn = statctrl_ctx.swconn,
+ .chassis = chassis,
};
void *node_data[STATS_MAX] = {
diff --git a/controller/statctrl.h b/controller/statctrl.h
index 69a0b6f35..3e56b4a54 100644
--- a/controller/statctrl.h
+++ b/controller/statctrl.h
@@ -21,6 +21,7 @@
void statctrl_init(void);
void statctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
struct ovsdb_idl_index *sbrec_port_binding_by_name,
+ const struct sbrec_chassis *chassis,
struct mac_cache_data *mac_cache_data);
void statctrl_update_swconn(const char *target, int probe_interval);
diff --git a/tests/ovn.at b/tests/ovn.at
index 9082bba82..6c8c5fd56 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -37090,6 +37090,89 @@ OVN_CLEANUP([hv1])
AT_CLEANUP
])
+OVN_FOR_EACH_NORTHD([
+AT_SETUP([MAC binding aging - probing distributed GW router])
+AT_SKIP_IF([test $HAVE_SCAPY = no])
+ovn_start
+
+# Check that during mac probing only the chassis that has currently claimed
+# a given port tries to refresh the mac bindings associated with that port.
+# Create a router connected to a switch with a localnet port on br-phys.
+# Create two hypervisors hv1 and hv2 and bind the lrp on hv1. hv1 should
+# then try to refresh mac bindings for the lrp whereas hv2 should not.
+
+net_add n1
+for i in 1 2; do
+ sim_add hv$i
+ as hv$i
+ ovn-appctl -t ovn-controller vlog/set mac_cache:file:dbg pinctrl:file:dbg
+ check ovs-vsctl set open . external-ids:ovn-bridge-mappings=physnet:br-phys
+ check ovs-vsctl add-br br-phys
+ check ovs-vsctl add-port br-phys snoopvif -- \
+ set Interface snoopvif \
+ options:tx_pcap=hv$i/snoopvif-tx.pcap \
+ options:rxq_pcap=hv$i/snoopvif-rx.pcap
+ check ovs-vsctl add-br br-underlay
+ ovn_attach n1 br-underlay 192.168.0.$i
+done
+
+lrp_mac=00:00:00:00:00:0a
+lrp_ip=192.168.1.10
+aging_th=10
+
+check ovn-nbctl lr-add lr
+check ovn-nbctl set logical_router lr
options:mac_binding_age_threshold=$aging_th
+check ovn-nbctl lrp-add lr lr-ls $lrp_mac $lrp_ip/24
+check ovn-nbctl ls-add ls
+check ovn-nbctl lsp-add-router-port ls ls-lr lr-ls
+check ovn-nbctl lsp-add-localnet-port ls ls-phys physnet
+check ovn-nbctl lrp-set-gateway-chassis lr-ls hv1 20
+check ovn-nbctl lrp-set-gateway-chassis lr-ls hv2 10
+
+check ovn-nbctl --wait=hv sync
+wait_for_ports_up
+
+# Wait for hv1 to claim the lrp
+hv1_chassis=$(fetch_column Chassis _uuid name=hv1)
+wait_row_count Port_Binding 1 logical_port=cr-lr-ls chassis=$hv1_chassis
+
+make_garp() {
+ local mac=$1 ip=$2
+ local pkt=$(fmt_pkt \
+ "Ether(dst='ff:ff:ff:ff:ff:ff', src='${mac}')/ \
+ ARP(op=1, hwsrc='${mac}', hwdst='00:00:00:00:00:00', psrc='${ip}',
pdst='${ip}')")
+ echo "$pkt"
+}
+
+# hv1 sends out 5 garps after claiming the lrp
+make_garp $lrp_mac $lrp_ip >> hv1_snoopvif.expected
+
+# Let hv1 receive an external garp to create a mac binding on the lrp
+ext_mac=00:00:00:00:00:14
+ext_ip=192.168.1.20
+check as hv1 ovs-appctl netdev-dummy/receive snoopvif $(make_garp $ext_mac
$ext_ip)
+
+# Wait for mac binding to be created
+wait_row_count mac_binding 1 ip="$ext_ip" logical_port="lr-ls"
+
+# Wait for mac binding to be removed
+wait_row_count mac_binding 0 ip="$ext_ip" logical_port="lr-ls"
+
+# hv1 should have sent out arps for mac binding refresh
+arp_req=$(fmt_pkt \
+ "Ether(dst='ff:ff:ff:ff:ff:ff', src='$lrp_mac')/ \
+ ARP(hwsrc='$lrp_mac', hwdst='00:00:00:00:00:00', psrc='$lrp_ip',
pdst='$ext_ip')")
+echo $arp_req >> hv1_snoopvif.expected
+OVN_CHECK_PACKETS_UNIQ([hv1/snoopvif-tx.pcap], [hv1_snoopvif.expected])
+
+# hv2 should have sent nothing
+touch hv2_snoopvif.expected
+OVN_CHECK_PACKETS([hv2/snoopvif-tx.pcap], [hv2_snoopvif.expected])
+
+OVN_CLEANUP([hv1], [hv2])
+AT_CLEANUP
+])
+
OVN_FOR_EACH_NORTHD([
AT_SETUP([MAC binding aging - probing GW router Dynamic Neigh])
AT_SKIP_IF([test $HAVE_SCAPY = no])
--
2.52.0
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev