I pushed these patches to main, 25.03, 24.09, and 24.03.

On 4/11/25 12:03, Mark Michelson wrote:
Thanks Ales.

Acked-by: Mark Michelson <mmich...@redhat.com>

On 4/11/25 06:50, Ales Musil via dev wrote:
Limit the number of claim for virtual ports to 1/s with backoff
period of 10 seconds. This should help with cluster stabilization
during fail-overs. It can happen that during fail-over there will be
a lot of attempts to claim the virtual ports. With the backoff we can
ensure that the cluster had enough time to try to stabilize the
fail-overs.

Reported-at: https://issues.redhat.com/browse/FDP-443
Signed-off-by: Ales Musil <amu...@redhat.com>
---
  controller/pinctrl.c |  59 ++++++++++++++++++++----
  northd/northd.c      |  15 ++++++
  tests/ovn.at         | 106 +++++++++++++++++++++++++++++++++++++++++++
  3 files changed, 171 insertions(+), 9 deletions(-)

diff --git a/controller/pinctrl.c b/controller/pinctrl.c
index 436e5d487..a269e5375 100644
--- a/controller/pinctrl.c
+++ b/controller/pinctrl.c
@@ -7336,6 +7336,9 @@ pinctrl_find_put_vport_binding(uint32_t dp_key, uint32_t vport_key,
      return NULL;
  }
+#define VPORT_BACKOFF_INTERVAL 1000ll
+#define VPORT_MAX_POSTPONE 10000ll
+
  static void
  run_put_vport_binding(struct ovsdb_idl_txn *ovnsb_idl_txn OVS_UNUSED,
                        struct ovsdb_idl_index *sbrec_datapath_binding_by_key, @@ -7356,18 +7359,56 @@ run_put_vport_binding(struct ovsdb_idl_txn *ovnsb_idl_txn OVS_UNUSED,
      }
      /* pinctrl module updates the port binding only for type 'virtual'. */
-    if (!strcmp(pb->type, "virtual")) {
-        const struct sbrec_port_binding *parent = lport_lookup_by_key(
+    if (strcmp(pb->type, "virtual")) {
+        return;
+    }
+
+    const struct sbrec_port_binding *parent = lport_lookup_by_key(
          sbrec_datapath_binding_by_key, sbrec_port_binding_by_key,
          vpb->dp_key, vpb->vport_parent_key);
-        if (parent) {
-            VLOG_INFO("Claiming virtual lport %s for this chassis "
-                       "with the virtual parent %s",
-                       pb->logical_port, parent->logical_port);
-            sbrec_port_binding_set_chassis(pb, chassis);
-            sbrec_port_binding_set_virtual_parent(pb, parent->logical_port);
-        }
+    if (!parent) {
+        return;
      }
+
+    if (pb->chassis == chassis && !strcmp(pb->virtual_parent,
+                                          parent->logical_port)) {
+        VLOG_DBG("Virtual port %s is already claimed by this chassis.",
+                 pb->logical_port);
+        return;
+    }
+
+    long long timewall_now = time_wall_msec();
+    long long last_claim = ovn_smap_get_llong(&pb->options,
+                                              "vport-last-claim", 0);
+    long long backoff = ovn_smap_get_llong(&pb->options, "vport-backoff", 0);
+
+    struct smap options;
+    smap_clone(&options, &pb->options);
+
+    long long next_backoff;
+    if (timewall_now >= last_claim + backoff) {
+        sbrec_port_binding_set_chassis(pb, chassis);
+        sbrec_port_binding_set_virtual_parent(pb, parent->logical_port);
+
+        next_backoff = VPORT_BACKOFF_INTERVAL;
+        smap_remove(&options, "vport-last-claim");
+        smap_add_format(&options, "vport-last-claim", "%lld", timewall_now);
+
+        VLOG_INFO("Claiming virtual lport %s for this chassis "
+                  "with the virtual parent %s",
+                  pb->logical_port, parent->logical_port);
+    } else {
+        next_backoff = MIN(backoff + VPORT_BACKOFF_INTERVAL,
+                           VPORT_MAX_POSTPONE);
+        VLOG_INFO("Postponing virtual lport %s claim by %lld ms",
+                  pb->logical_port, next_backoff);
+    }
+
+    smap_remove(&options, "vport-backoff");
+    smap_add_format(&options, "vport-backoff", "%lld", next_backoff);
+
+    sbrec_port_binding_set_options(pb, &options);
+    smap_destroy(&options);
  }
  /* Called by pinctrl_run(). Runs with in the main ovn-controller
diff --git a/northd/northd.c b/northd/northd.c
index 762e4e057..9bca4dcb5 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -3332,6 +3332,21 @@ ovn_port_update_sbrec(struct ovsdb_idl_txn *ovnsb_txn,
                  }
              }
+            /* Preserve virtual port options. */
+            if (!strcmp(op->nbsp->type, "virtual")) {
+                const char *last_claim = smap_get(&op->sb->options,
+                                                  "vport-last-claim");
+                if (last_claim) {
+                    smap_add(&options, "vport-last-claim", last_claim);
+                }
+
+                const char *backoff = smap_get(&op->sb->options,
+                                               "vport-backoff");
+                if (backoff) {
+                    smap_add(&options, "vport-backoff", backoff);
+                }
+            }
+
              if (lsp_is_remote(op->nbsp) ||
                  op->sb->requested_additional_chassis) {
                  /* ovn-northd is supposed to set port_binding for remote ports
diff --git a/tests/ovn.at b/tests/ovn.at
index bbaa653a8..6c4a1db7d 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -23023,6 +23023,8 @@ check_virtual_offlows_not_present hv1
  # hv2 should not have the flow for ACL.
  check_virtual_offlows_not_present hv2
+# Remove the backoff period, to not prolong the test.
+ovn-sbctl remove Port_Binding sw0-vir options vport-backoff
  # From sw0-p0 resend GARP for 10.0.0.10. hv1 should reclaim sw0-vir
  # and sw0-p1 should be its virtual_parent.
  send_garp hv1 hv1-vif1 1 $(hex_to_mac $eth_src) $(hex_to_mac $eth_dst) $spa $tpa
@@ -23067,6 +23069,8 @@ check_virtual_offlows_not_present hv1
  # hv2 should not have the above flows.
  check_virtual_offlows_not_present hv2
+# Wait for the backoff period to pass.
+sleep 1.2
  # From sw0-p0 send GARP for 10.0.0.10. hv1 should claim sw0-vir
  # and sw0-p1 should be its virtual_parent.
  eth_src="50:54:00:00:00:03"
@@ -23087,6 +23091,8 @@ check_virtual_offlows_present hv1
  # hv2 should not have the above flows.
  check_virtual_offlows_not_present hv2
+# Remove the backoff period, to not prolong the test.
+ovn-sbctl remove Port_Binding sw0-vir options vport-backoff
  # From sw0-p3 send GARP for 10.0.0.10. hv1 should claim sw0-vir
  # and sw0-p3 should be its virtual_parent.
  eth_src="50:54:00:00:00:05"
@@ -23115,6 +23121,8 @@ check_virtual_offlows_present hv1
  # hv2 should not have the above flows.
  check_virtual_offlows_not_present hv2
+# Remove the backoff period, to not prolong the test.
+ovn-sbctl remove Port_Binding sw0-vir options vport-backoff
  # send the garp from sw0-p2 (in hv2). hv2 should claim sw0-vir
  # and sw0-p2 shpuld be its virtual_parent.
  eth_src="50:54:00:00:00:04"
@@ -23142,6 +23150,8 @@ check_virtual_offlows_present hv2
  # hv1 should not have the above flows.
  check_virtual_offlows_not_present hv1
+# Remove the backoff period, to not prolong the test.
+ovn-sbctl remove Port_Binding sw0-vir options vport-backoff
  # Now send arp reply from sw0-p1. hv1 should claim sw0-vir
  # and sw0-p1 shpuld be its virtual_parent.
  eth_src=505400000003
@@ -42661,3 +42671,99 @@ echo $expected > expected
  OVN_CHECK_PACKETS([hv1/server-tx.pcap], [expected])
  AT_CLEANUP
  ])
+
+OVN_FOR_EACH_NORTHD([
+AT_SETUP([virtual port claim postpone])
+AT_KEYWORDS([virtual ports])
+ovn_start
+
+net_add n1
+
+sim_add hv1
+as hv1
+check ovs-vsctl add-br br-phys
+ovn_attach n1 br-phys 192.168.0.1
+check ovs-vsctl -- add-port br-int hv1-vif1 -- \
+    set interface hv1-vif1 external-ids:iface-id=sw0-p1 \
+    options:tx_pcap=hv1/vif1-tx.pcap \
+    options:rxq_pcap=hv1/vif1-rx.pcap
+
+sim_add hv2
+as hv2
+check ovs-vsctl add-br br-phys
+ovn_attach n1 br-phys 192.168.0.2
+check ovs-vsctl -- add-port br-int hv2-vif1 -- \
+    set interface hv2-vif1 external-ids:iface-id=sw0-p2 \
+    options:tx_pcap=hv2/vif1-tx.pcap \
+    options:rxq_pcap=hv2/vif1-rx.pcap
+
+sim_add hv3
+as hv3
+check ovs-vsctl add-br br-phys
+ovn_attach n1 br-phys 192.168.0.3
+check ovs-vsctl -- add-port br-int hv3-vif1 -- \
+    set interface hv3-vif1 external-ids:iface-id=sw0-p3 \
+    options:tx_pcap=hv3/vif1-tx.pcap \
+    options:rxq_pcap=hv3/vif1-rx.pcap
+
+check ovn-nbctl ls-add sw0
+
+check ovn-nbctl lsp-add sw0 sw0-p1
+check ovn-nbctl lsp-set-addresses sw0-p1 "50:54:00:00:00:01 10.0.0.1"
+check ovn-nbctl lsp-set-port-security sw0-p1 "50:54:00:00:00:01 10.0.0.1 10.0.0.10"
+
+check ovn-nbctl lsp-add sw0 sw0-p2
+check ovn-nbctl lsp-set-addresses sw0-p2 "50:54:00:00:00:02 10.0.0.2"
+check ovn-nbctl lsp-set-port-security sw0-p2 "50:54:00:00:00:02 10.0.0.2 10.0.0.10"
+
+check ovn-nbctl lsp-add sw0 sw0-p3
+check ovn-nbctl lsp-set-addresses sw0-p3 "50:54:00:00:00:03 10.0.0.3"
+check ovn-nbctl lsp-set-port-security sw0-p3 "50:54:00:00:00:03 10.0.0.3 10.0.0.10"
+
+check ovn-nbctl lsp-add sw0 sw0-vir
+check ovn-nbctl lsp-set-addresses sw0-vir "50:54:00:00:00:10 10.0.0.10"
+check ovn-nbctl lsp-set-type sw0-vir virtual
+check ovn-nbctl set logical_switch_port sw0-vir options:virtual-ip=10.0.0.10 +check ovn-nbctl set logical_switch_port sw0-vir options:virtual-parents="sw0-p1,sw0-p2,sw0-p3"
+
+wait_for_ports_up
+check ovn-nbctl --wait=hv sync
+
+# Claim virtual port on hv1.
+send_garp hv1 hv1-vif1 1 "50:54:00:00:00:01" "ff:ff:ff:ff:ff:ff" 10.0.0.10 10.0.0.10 +wait_row_count Port_Binding 1 logical_port=sw0-vir chassis=$(fetch_column Chassis _uuid name=hv1)
+wait_row_count Port_Binding 1 logical_port=sw0-vir virtual_parent=sw0-p1
+wait_for_ports_up sw0-vir
+
+# Check that the backoff interval was set.
+AT_CHECK([fetch_column Port_Binding options logical_port=sw0-vir | grep -q "vport-last-claim"]) +check_row_count Port_Binding 1 logical_port=sw0-vir options:vport-backoff=1000
+
+# Pass the backoff timer
+sleep 1.2
+
+# Try to claim on all chassis in quick succession.
+for i in $(seq 1 10); do
+    send_garp hv3 hv3-vif1 1 "50:54:00:00:00:03" "ff:ff:ff:ff:ff:ff" 10.0.0.10 10.0.0.10 +    send_garp hv2 hv2-vif1 1 "50:54:00:00:00:02" "ff:ff:ff:ff:ff:ff" 10.0.0.10 10.0.0.10 +    send_garp hv1 hv1-vif1 1 "50:54:00:00:00:01" "ff:ff:ff:ff:ff:ff" 10.0.0.10 10.0.0.10
+    sleep 0.2
+done
+
+wait_row_count Port_Binding 1 logical_port=sw0-vir chassis=$(fetch_column Chassis _uuid name=hv3)
+wait_row_count Port_Binding 1 logical_port=sw0-vir virtual_parent=sw0-p3
+wait_for_ports_up sw0-vir
+
+# Check that the backoff interval reached the maximum.
+AT_CHECK([fetch_column Port_Binding options logical_port=sw0-vir | grep -q "vport-last-claim"]) +check_row_count Port_Binding 1 logical_port=sw0-vir options:vport-backoff=10000
+
+AT_CHECK([grep -q "Postponing virtual lport sw0-vir" hv1/ovn-controller.log ]) +AT_CHECK([grep -q "Postponing virtual lport sw0-vir" hv2/ovn-controller.log ])
+
+# hv3 has the claim already, it doesn't bump the backoff interval.
+AT_CHECK([grep -q "Postponing virtual lport sw0-vir" hv3/ovn-controller.log], [1])
+
+OVN_CLEANUP([hv1])
+AT_CLEANUP
+])


_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to