Hi Ales,
I have a few comments in-line below.
On 6/14/22 09:49, Ales Musil wrote:
Add MAC binding aging mechanism that utilizes
the ownership of MAC binding row. The controller
that "owns" the MAC binding will track idle_age
statistics for related OpenFlows (table 66 and 67).
If the idle_age exceeds the threshold value for aging
the MAC binding row is removed. The threshold is set to
60 secs. The removal won't happen exactly at the 60
second mark, but can happen any time later on, depending
on timing of the ovn-controller loop.
Reported-at: https://bugzilla.redhat.com/2084668
Signed-off-by: Ales Musil <[email protected]>
---
controller/automake.mk | 4 +-
controller/mac-binding-aging.c | 230 +++++++++++++++++++++++++++++++++
controller/mac-binding-aging.h | 32 +++++
controller/ovn-controller.c | 12 ++
4 files changed, 277 insertions(+), 1 deletion(-)
create mode 100644 controller/mac-binding-aging.c
create mode 100644 controller/mac-binding-aging.h
diff --git a/controller/automake.mk b/controller/automake.mk
index c2ab1bbe6..01546e335 100644
--- a/controller/automake.mk
+++ b/controller/automake.mk
@@ -41,7 +41,9 @@ controller_ovn_controller_SOURCES = \
controller/ovsport.h \
controller/ovsport.c \
controller/vif-plug.h \
- controller/vif-plug.c
+ controller/vif-plug.c \
+ controller/mac-binding-aging.h \
+ controller/mac-binding-aging.c
controller_ovn_controller_LDADD = lib/libovn.la $(OVS_LIBDIR)/libopenvswitch.la
man_MANS += controller/ovn-controller.8
diff --git a/controller/mac-binding-aging.c b/controller/mac-binding-aging.c
new file mode 100644
index 000000000..7ab01c4ab
--- /dev/null
+++ b/controller/mac-binding-aging.c
@@ -0,0 +1,230 @@
+
+/* Copyright (c) 2022, Red Hat, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+
+#include "byte-order.h"
+#include "dirs.h"
+#include "mac-binding-aging.h"
+#include "openvswitch/hmap.h"
+#include "openvswitch/ofp-flow.h"
+#include "openvswitch/vconn.h"
+#include "openvswitch/vlog.h"
+#include "ovn-sb-idl.h"
+#include "timeval.h"
+#include "util.h"
+#include "uuid.h"
+
+VLOG_DEFINE_THIS_MODULE(mac_binding_aging);
+
+/* Contains "struct mac_binding_aging"s. */
+static struct hmap mb_aging_hmap;
+
+struct mac_binding_aging {
+ struct hmap_node hmap_node;
+
+ /* Idle time from last statistic check in ms. */
+ long long idle_age;
+ /* Time when the statistics were updated in ms. */
+ long long last_check;
+
+ const struct sbrec_mac_binding *mb;
Storing an IDL row like this in a long-term data structure is not a good
idea. You are not in control of the lifetime of this object, and it is
possible for it to be freed by the IDL before you are done with it.
Since the only thing you need is the UUID of the MAC_Binding, the best
thing to do would be to store a copy of the UUID instead.
+};
+
+void
+mac_binding_aging_init(void)
+{
+ hmap_init(&mb_aging_hmap);
+}
+
+void
+mac_binding_aging_destroy(void)
+{
+ struct mac_binding_aging *mb_aging;
+
+ HMAP_FOR_EACH_POP(mb_aging, hmap_node, &mb_aging_hmap) {
+ free(mb_aging);
+ }
+ hmap_destroy(&mb_aging_hmap);
+}
+
+static struct mac_binding_aging *
+mac_binding_aging_find(const struct uuid *uuid)
+{
+ struct mac_binding_aging *mb_aging;
+
+ size_t hash = uuid_hash(uuid);
+
+ HMAP_FOR_EACH_WITH_HASH(mb_aging, hmap_node, hash, &mb_aging_hmap) {
+ if (uuid_equals(&mb_aging->mb->header_.uuid, uuid)) {
+ return mb_aging;
+ }
+ }
+
+ return NULL;
+}
+
+static void
+mac_binding_aging_add(const struct sbrec_mac_binding *mb)
+{
+ struct mac_binding_aging *mb_aging;
+ size_t hash = uuid_hash(&mb->header_.uuid);
+
+ mb_aging = xmalloc(sizeof *mb_aging);
+ mb_aging->mb = mb;
+ mb_aging->last_check = time_msec();
+ mb_aging->idle_age = 0;
+ hmap_insert(&mb_aging_hmap, &mb_aging->hmap_node, hash);
+}
+
+static bool
+mac_binding_aging_needs_update(struct mac_binding_aging *mb_aging,
+ long long now, unsigned long long threshold)
+{
+ return (now - mb_aging->last_check + mb_aging->idle_age) >= threshold;
+}
+
+static void
+mac_binding_aging_update_statistics(struct vconn *vconn,
+ struct mac_binding_aging *mb_aging,
+ long long now)
+{
+ uint32_t cookie = mb_aging->mb->header_.uuid.parts[0];
+
+ struct ofputil_flow_stats_request fsr = {
+ .cookie = htonll(cookie),
+ .cookie_mask = OVS_BE64_MAX,
+ .out_port = OFPP_ANY,
+ .out_group = OFPG_ANY,
+ .table_id = OFPTT_ALL,
+ };
If I've read the code correctly, it appears that we send one of these
flow stats requests messages for each MAC binding for which we want to
get updated stats. The cookie is the element that changes with each
successive flow stats request.
What if instead of doing that, we did a single flow stats request and
specify the table we are interested in? Then we could iterate over the
returned flows and find the corresponding mac_binding_aging structure to
determine what action to take.
My thought here is that with a large number of MAC bindings, it may be
faster to perform a single request to OVS instead of many.
Since you add a stopwatch in patch 3 of this series, we can use that as
a way of testing the two implementations to see which is quickest.
+
+ struct ofputil_flow_stats *fses;
+ size_t n_fses;
+
+ int error =
+ vconn_dump_flows(vconn, &fsr, OFPUTIL_P_OF15_OXM, &fses, &n_fses);
+ if (error) {
+ VLOG_WARN("%s: error obtaining flow stats (%s)",
+ vconn_get_name(vconn), ovs_strerror(error));
+ goto free;
+ }
+
+ if (n_fses != 2) {
+ VLOG_DBG("Unexpected statistics count (%" PRIuSIZE "), "
+ "the flows might not be installed yet or they "
+ "are already removed.", n_fses);
+ goto free;
+ }
+
+ mb_aging->idle_age = MIN(fses[0].idle_age, fses[1].idle_age) * 1000;
+ mb_aging->last_check = now;
+
+free:
+ for (size_t i = 0; i < n_fses; i++) {
+ free(CONST_CAST(struct ofpact *, fses[i].ofpacts));
+ }
+ free(fses);
+}
+
+static void
+mac_binding_aging_update_monitored(struct ovsdb_idl_index *mb_by_chassis_index,
+ const struct sbrec_mac_binding_table
+ *mac_binding_table,
+ const struct sbrec_chassis *chassis)
+{
+ const struct sbrec_mac_binding *mb;
+ SBREC_MAC_BINDING_TABLE_FOR_EACH_TRACKED(mb, mac_binding_table) {
+ if (sbrec_mac_binding_is_deleted(mb) ||
+ (sbrec_mac_binding_is_updated(mb, SBREC_MAC_BINDING_COL_CHASSIS) &&
+ mb->chassis != chassis)) {
+ struct mac_binding_aging *mb_aging =
+ mac_binding_aging_find(&mb->header_.uuid);
+ if (mb_aging) {
+ hmap_remove(&mb_aging_hmap, &mb_aging->hmap_node);
+ free(mb_aging);
+ }
+ }
+ }
Using this mechanism is a problem. OVN has an incremental engine built
into it that you can use to process changes from the southbound database
incrementally. The reason why an incremental engine exists is,
partially, so that we can detect scenarios where trying to do an
incremental change will not be possible and that a full recompute is
required instead. A simple example of this would be if we disconnect
from the southbound database and then reconnect. In such a scenario, we
can't rely on the tracked differences in the database.
As such, using the _TRACKED variants of table traversals should only be
done within the confines of the incremental engine. This way, you can
be sure that you are performing incremental processing in a context
where it will work properly.
Your best bet would be to re-write this function to be stateless. If we
deem it necessary, we can potentially add incremental processing at a
later time.
What I mean by stateless in this case is that you will need to traverse
through all MAC bindings in the southbound table, find the corresponding
mac_binding_aging structure, and perform whatever processing is
necessary. If you find that a mac_binding_aging structure went unvisited
during the traversal, then that means the corresponding southbound
MAC_Binding was deleted and we can delete our local mac_binding_aging
struct.
+
+ struct sbrec_mac_binding *mb_index_row =
+ sbrec_mac_binding_index_init_row(mb_by_chassis_index);
+ sbrec_mac_binding_index_set_chassis(mb_index_row, chassis);
+ SBREC_MAC_BINDING_FOR_EACH_EQUAL(mb, mb_index_row, mb_by_chassis_index) {
+ if (!mac_binding_aging_find(&mb->header_.uuid)) {
+ mac_binding_aging_add(mb);
+ }
+ }
+ sbrec_mac_binding_index_destroy_row(mb_index_row);
+}
+
+static struct vconn *
+create_ovs_connection(const char *br_int_name)
+{
+ struct vconn *vconn;
+ char *target = xasprintf("unix:%s/%s.mgmt", ovs_rundir(), br_int_name);
+ int retval = vconn_open_block(target, 1 << OFP15_VERSION, 0, -1, &vconn);
+
+ if (retval) {
+ VLOG_WARN("%s: connection failed (%s)", target, ovs_strerror(retval));
+ }
+ free(target);
+
+ return vconn;
+}
+
+void
+mac_binding_aging_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
+ const char *br_int_name,
+ const struct sbrec_chassis *chassis,
+ const struct sbrec_mac_binding_table *mac_binding_table,
+ struct ovsdb_idl_index *mb_by_chassis_index,
+ unsigned long long threshold)
+{
+ if (!ovnsb_idl_txn) {
+ return;
+ }
+
+ struct vconn *vconn = create_ovs_connection(br_int_name);
+
+ if (!vconn) {
+ return;
+ }
+
+ mac_binding_aging_update_monitored(mb_by_chassis_index, mac_binding_table,
+ chassis);
+
+ long long now = time_msec();
+
+ struct mac_binding_aging *mb_aging;
+
+ HMAP_FOR_EACH_SAFE(mb_aging, hmap_node, &mb_aging_hmap) {
+ if (mac_binding_aging_needs_update(mb_aging, now, threshold)) {
+ mac_binding_aging_update_statistics(vconn, mb_aging, now);
+ }
+ if (mb_aging->idle_age >= threshold) {
+ VLOG_DBG("MAC binding exceeded threshold uuid=" UUID_FMT ", "
+ "idle_age=%llu ms",
+ UUID_ARGS(&mb_aging->mb->header_.uuid),
+ mb_aging->idle_age);
+ sbrec_mac_binding_delete(mb_aging->mb);
+ hmap_remove(&mb_aging_hmap, &mb_aging->hmap_node);
+ free(mb_aging);
+ }
+ }
+
+ vconn_close(vconn);
+}
diff --git a/controller/mac-binding-aging.h b/controller/mac-binding-aging.h
new file mode 100644
index 000000000..2228d0ca1
--- /dev/null
+++ b/controller/mac-binding-aging.h
@@ -0,0 +1,32 @@
+
+/* Copyright (c) 2022, Red Hat, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef OVN_MAC_BINDING_AGING_H
+#define OVN_MAC_BINDING_AGING_H 1
+
+#include "ovn-sb-idl.h"
+
+void mac_binding_aging_init(void);
+void mac_binding_aging_destroy(void);
+void mac_binding_aging_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
+ const char *br_int_name,
+ const struct sbrec_chassis *chassis,
+ const struct sbrec_mac_binding_table
+ *mac_binding_table,
+ struct ovsdb_idl_index *mb_by_chassis_index,
+ unsigned long long threshold);
+
+#endif /* controller/mac-binding-aging.h */
diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index 2793c8687..f33eb43d4 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -76,6 +76,7 @@
#include "stopwatch.h"
#include "lib/inc-proc-eng.h"
#include "hmapx.h"
+#include "mac-binding-aging.h"
VLOG_DEFINE_THIS_MODULE(main);
@@ -3306,6 +3307,7 @@ main(int argc, char *argv[])
pinctrl_init();
lflow_init();
vif_plug_provider_initialize();
+ mac_binding_aging_init();
/* Connect to OVS OVSDB instance. */
struct ovsdb_idl_loop ovs_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
@@ -3378,6 +3380,9 @@ main(int argc, char *argv[])
struct ovsdb_idl_index *sbrec_static_mac_binding_by_datapath
= ovsdb_idl_index_create1(ovnsb_idl_loop.idl,
&sbrec_static_mac_binding_col_datapath);
+ struct ovsdb_idl_index *sbrec_mac_biding_by_chassis
+ = ovsdb_idl_index_create1(ovnsb_idl_loop.idl,
+ &sbrec_mac_binding_col_chassis);
ovsdb_idl_track_add_all(ovnsb_idl_loop.idl);
ovsdb_idl_omit_alert(ovnsb_idl_loop.idl,
@@ -3951,6 +3956,12 @@ main(int argc, char *argv[])
stopwatch_stop(VIF_PLUG_RUN_STOPWATCH_NAME,
time_msec());
}
+ mac_binding_aging_run(ovnsb_idl_txn ,br_int->name,
+ chassis,
+ sbrec_mac_binding_table_get
+ (ovnsb_idl_loop.idl),
+ sbrec_mac_biding_by_chassis,
+ 60000);
stopwatch_start(PINCTRL_RUN_STOPWATCH_NAME,
time_msec());
pinctrl_run(ovnsb_idl_txn,
@@ -4218,6 +4229,7 @@ loop_done:
shash_destroy(&vif_plug_deleted_iface_ids);
shash_destroy(&vif_plug_changed_iface_ids);
vif_plug_provider_destroy_all();
+ mac_binding_aging_destroy();
ovsdb_idl_loop_destroy(&ovs_idl_loop);
ovsdb_idl_loop_destroy(&ovnsb_idl_loop);
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev