On 8/11/22 10:20, Ales Musil wrote:
> Add MAC binding aging mechanism, that utilizes
> the timestamp column of MAC_Binding table.
> When the MAC binding exceeds the threshold it is
> removed from SB DB, this is postponed only in case
> we receive update ARP with update to MAC address.
>
> The threshold is configurable via option
> "mac_binding_age_threshold" that can be specified
> for each logical router. The option is defaulting to
> 0 which means that by default the aging is disabled
> and the MAC binding rows will be persisted the same
> way as before.
>
> Reported-at: https://bugzilla.redhat.com/2084668
<https://bugzilla.redhat.com/2084668>
> Signed-off-by: Ales Musil <[email protected]
<mailto:[email protected]>>
> ---
> v4: Rebase on top of current main.
> Skip one row instead of all if time elapsed is negative.
> Add note to NEWS.
> v5: Rebase on top of current main.
> Address comments from Han, Numan and Dumitru.
> ---
> NEWS | 3 +
> northd/automake.mk <http://automake.mk> | 2 +
> northd/inc-proc-northd.c | 15 ++++
> northd/mac-binding-aging.c | 161
+++++++++++++++++++++++++++++++++++++
> northd/mac-binding-aging.h | 33 ++++++++
> ovn-nb.xml | 7 ++
> tests/ovn.at <http://ovn.at> | 111
+++++++++++++++++++++++++
> 7 files changed, 332 insertions(+)
> create mode 100644 northd/mac-binding-aging.c
> create mode 100644 northd/mac-binding-aging.h
>
> diff --git a/NEWS b/NEWS
> index 20cea579e..eff8e472a 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -14,6 +14,9 @@ Post v22.06.0
> NAT-T UDP encapsulation. Requires OVS support for IPsec
custom tunnel
> options (which will be available in OVS 2.18).
> - Removed possibility of disabling logical datapath groups.
> + - Added MAC binding aging mechanism, that is disabled by default.
> + It can be enabled per logical router with option
> + "mac_binding_age_threshold".
>
> OVN v22.06.0 - 03 Jun 2022
> --------------------------
> diff --git a/northd/automake.mk <http://automake.mk>
b/northd/automake.mk <http://automake.mk>
> index 4862ec7b7..81582867d 100644
> --- a/northd/automake.mk <http://automake.mk>
> +++ b/northd/automake.mk <http://automake.mk>
> @@ -1,6 +1,8 @@
> # ovn-northd
> bin_PROGRAMS += northd/ovn-northd
> northd_ovn_northd_SOURCES = \
> + northd/mac-binding-aging.c \
> + northd/mac-binding-aging.h \
> northd/northd.c \
> northd/northd.h \
> northd/ovn-northd.c \
> diff --git a/northd/inc-proc-northd.c b/northd/inc-proc-northd.c
> index 43093cb5a..fc0d9e670 100644
> --- a/northd/inc-proc-northd.c
> +++ b/northd/inc-proc-northd.c
> @@ -22,9 +22,11 @@
> #include "ip-mcast-index.h"
> #include "static-mac-binding-index.h"
> #include "lib/inc-proc-eng.h"
> +#include "lib/mac-binding-index.h"
> #include "lib/ovn-nb-idl.h"
> #include "lib/ovn-sb-idl.h"
> #include "mcast-group-index.h"
> +#include "northd/mac-binding-aging.h"
> #include "openvswitch/poll-loop.h"
> #include "openvswitch/vlog.h"
> #include "inc-proc-northd.h"
> @@ -149,6 +151,8 @@ enum sb_engine_node {
> * avoid sparse errors. */
> static ENGINE_NODE(northd, "northd");
> static ENGINE_NODE(lflow, "lflow");
> +static ENGINE_NODE(mac_binding_aging, "mac_binding_aging");
> +static ENGINE_NODE(mac_binding_aging_waker,
"mac_binding_aging_waker");
>
> void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
> struct ovsdb_idl_loop *sb)
> @@ -211,12 +215,18 @@ void inc_proc_northd_init(struct
ovsdb_idl_loop *nb,
> engine_add_input(&en_northd, &en_sb_load_balancer, NULL);
> engine_add_input(&en_northd, &en_sb_fdb, NULL);
> engine_add_input(&en_northd, &en_sb_static_mac_binding, NULL);
> + engine_add_input(&en_mac_binding_aging, &en_sb_mac_binding,
NULL);
> + engine_add_input(&en_mac_binding_aging, &en_northd, NULL);
> + engine_add_input(&en_mac_binding_aging,
&en_mac_binding_aging_waker, NULL);
> engine_add_input(&en_lflow, &en_nb_bfd, NULL);
> engine_add_input(&en_lflow, &en_sb_bfd, NULL);
> engine_add_input(&en_lflow, &en_sb_logical_flow, NULL);
> engine_add_input(&en_lflow, &en_sb_multicast_group, NULL);
> engine_add_input(&en_lflow, &en_sb_igmp_group, NULL);
> engine_add_input(&en_lflow, &en_northd, NULL);
> + /* XXX: The "en_mac_binding_aging" should be separate "root"
node
> + * once I-P engine allows multiple root nodes. */
> + engine_add_input(&en_lflow, &en_mac_binding_aging, NULL);
>
> struct engine_arg engine_arg = {
> .nb_idl = nb->idl,
> @@ -235,6 +245,8 @@ void inc_proc_northd_init(struct
ovsdb_idl_loop *nb,
> chassis_hostname_index_create(sb->idl);
> struct ovsdb_idl_index *sbrec_static_mac_binding_by_lport_ip
> = static_mac_binding_index_create(sb->idl);
> + struct ovsdb_idl_index *sbrec_mac_binding_by_datapath
> + = mac_binding_by_datapath_index_create(sb->idl);
>
> engine_init(&en_lflow, &engine_arg);
>
> @@ -256,6 +268,9 @@ void inc_proc_northd_init(struct
ovsdb_idl_loop *nb,
> engine_ovsdb_node_add_index(&en_sb_static_mac_binding,
>
"sbrec_static_mac_binding_by_lport_ip",
>
sbrec_static_mac_binding_by_lport_ip);
> + engine_ovsdb_node_add_index(&en_sb_mac_binding,
> + "sbrec_mac_binding_by_datapath",
> + sbrec_mac_binding_by_datapath);
> }
>
> void inc_proc_northd_run(struct ovsdb_idl_txn *ovnnb_txn,
> diff --git a/northd/mac-binding-aging.c b/northd/mac-binding-aging.c
> new file mode 100644
> index 000000000..3859c050b
> --- /dev/null
> +++ b/northd/mac-binding-aging.c
> @@ -0,0 +1,161 @@
> +/* Copyright (c) 2022, Red Hat, Inc.
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at:
> + *
> + * http://www.apache.org/licenses/LICENSE-2.0
<http://www.apache.org/licenses/LICENSE-2.0>
> + *
> + * Unless required by applicable law or agreed to in writing,
software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
or implied.
> + * See the License for the specific language governing
permissions and
> + * limitations under the License.
> + */
> +
> +#include <config.h>
> +
> +#include "lib/inc-proc-eng.h"
> +#include "lib/ovn-nb-idl.h"
> +#include "lib/ovn-sb-idl.h"
> +#include "lib/timeval.h"
> +#include "northd/mac-binding-aging.h"
> +#include "northd/northd.h"
> +#include "openvswitch/hmap.h"
> +#include "openvswitch/poll-loop.h"
> +#include "openvswitch/util.h"
> +#include "openvswitch/vlog.h"
> +
> +VLOG_DEFINE_THIS_MODULE(mac_binding_aging);
> +
> +struct mac_binding_waker {
> + bool should_schedule;
> + long long next_wake_msec;
> +};
> +
> +static void
> +mac_binding_aging_run_for_datapath(const struct
sbrec_datapath_binding *dp,
> + const struct
nbrec_logical_router *nbr,
> + struct ovsdb_idl_index
*mb_by_datapath,
> + int64_t now, int64_t *wake_delay)
> +{
> + uint64_t threshold = smap_get_uint(&nbr->options,
> + "mac_binding_age_threshold",
> + 0) * 1000;
> + if (!threshold) {
> + return;
> + }
> +
> + struct sbrec_mac_binding *mb_index_row =
> + sbrec_mac_binding_index_init_row(mb_by_datapath);
> + sbrec_mac_binding_index_set_datapath(mb_index_row, dp);
> +
> + const struct sbrec_mac_binding *mb;
> + SBREC_MAC_BINDING_FOR_EACH_EQUAL (mb, mb_index_row,
mb_by_datapath) {
> + int64_t elapsed = now - mb->timestamp;
> +
> + if (elapsed < 0) {
> + continue;
> + } else if (elapsed >= threshold) {
> + sbrec_mac_binding_delete(mb);
> + } else {
> + *wake_delay = MIN(*wake_delay, threshold - elapsed);
> + }
> + }
> + sbrec_mac_binding_index_destroy_row(mb_index_row);
> +}
> +
> +void
> +en_mac_binding_aging_run(struct engine_node *node, void *data
OVS_UNUSED)
> +{
> + const struct engine_context *eng_ctx = engine_get_context();
> +
> + if (!eng_ctx->ovnsb_idl_txn) {
> + return;
> + }
> +
> + int64_t next_expire_msec = INT64_MAX;
> + int64_t now = time_wall_msec();
> + struct northd_data *northd_data =
engine_get_input_data("northd", node);
> + struct mac_binding_waker *waker =
> + engine_get_input_data("mac_binding_aging_waker", node);
> + struct ovsdb_idl_index *sbrec_mac_binding_by_datapath =
> +
engine_ovsdb_node_get_index(engine_get_input("SB_mac_binding", node),
> +
"sbrec_mac_binding_by_datapath");
> +
> + struct ovn_datapath *od;
> + HMAP_FOR_EACH (od, key_node, &northd_data->datapaths) {
> + if (od->sb && od->nbr) {
> + mac_binding_aging_run_for_datapath(od->sb, od->nbr,
> +
sbrec_mac_binding_by_datapath,
> + now,
&next_expire_msec);
> + }
> + }
> +
> + if (next_expire_msec < INT64_MAX) {
> + waker->should_schedule = true;
> + waker->next_wake_msec = time_msec() + next_expire_msec;
> + poll_timer_wait_until(waker->next_wake_msec);
> + } else {
> + waker->should_schedule = false;
> + }
> +
> + /* This node is part of lflow, but lflow does not depend on
it. Setting
> + * state as unchanged does not trigger lflow node when it is
not needed. */
> + engine_set_node_state(node, EN_UNCHANGED);
> +}
> +
> +void *
> +en_mac_binding_aging_init(struct engine_node *node OVS_UNUSED,
> + struct engine_arg *arg OVS_UNUSED)
> +{
> + return NULL;
> +}
> +
> +void
> +en_mac_binding_aging_cleanup(void *data OVS_UNUSED)
> +{
> +}
> +
> +/* The waker node is an input node, but the data about when to
wake up
> + * the aging node are populated by the aging node.
> + * The reason being that engine periodically runs input nodes to
check
> + * if we there are updates, so it could process the other nodes,
however
> + * the waker cannot be dependent on other node because it
wouldn't be
> + * input node anymore. */
> +void
> +en_mac_binding_aging_waker_run(struct engine_node *node, void *data)
> +{
> + struct mac_binding_waker *waker = data;
> +
> + engine_set_node_state(node, EN_UNCHANGED);
> +
> + if (!waker->should_schedule) {
> + return;
> + }
> +
> + if (time_msec() >= waker->next_wake_msec) {
> + waker->should_schedule = false;
> + engine_set_node_state(node, EN_UPDATED);
> + return;
> + }
> +
> + poll_timer_wait_until(waker->next_wake_msec);
> +}
> +
> +void *
> +en_mac_binding_aging_waker_init(struct engine_node *node OVS_UNUSED,
> + struct engine_arg *arg OVS_UNUSED)
> +{
> + struct mac_binding_waker *waker = xmalloc(sizeof *waker);
> +
> + waker->should_schedule = false;
> + waker->next_wake_msec = 0;
> +
> + return waker;
> +}
> +
> +void
> +en_mac_binding_aging_waker_cleanup(void *data OVS_UNUSED)
> +{
> +}
> diff --git a/northd/mac-binding-aging.h b/northd/mac-binding-aging.h
> new file mode 100644
> index 000000000..296a7ab38
> --- /dev/null
> +++ b/northd/mac-binding-aging.h
> @@ -0,0 +1,33 @@
> +/* Copyright (c) 2022, Red Hat, Inc.
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at:
> + *
> + * http://www.apache.org/licenses/LICENSE-2.0
<http://www.apache.org/licenses/LICENSE-2.0>
> + *
> + * Unless required by applicable law or agreed to in writing,
software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
or implied.
> + * See the License for the specific language governing
permissions and
> + * limitations under the License.
> + */
> +
> +#ifndef MAC_BINDING_AGING_H
> +#define MAC_BINDING_AGING_H 1
> +
> +#include "lib/inc-proc-eng.h"
> +
> +/* The MAC binding aging node functions. */
> +void en_mac_binding_aging_run(struct engine_node *node, void *data);
> +void *en_mac_binding_aging_init(struct engine_node *node,
> + struct engine_arg *arg);
> +void en_mac_binding_aging_cleanup(void *data);
> +
> +/* The MAC binding aging waker node functions. */
> +void en_mac_binding_aging_waker_run(struct engine_node *node,
void *data);
> +void *en_mac_binding_aging_waker_init(struct engine_node *node,
> + struct engine_arg *arg);
> +void en_mac_binding_aging_waker_cleanup(void *data);
> +
> +#endif /* northd/mac-binding-aging.h */
> diff --git a/ovn-nb.xml b/ovn-nb.xml
> index e26afd83c..57c7d6174 100644
> --- a/ovn-nb.xml
> +++ b/ovn-nb.xml
> @@ -2392,6 +2392,13 @@
> and other sources. This way, OVN and the other sources
can make use of
> the same conntrack zone.
> </column>
> +
> + <column name="options" key="mac_binding_age_threshold"
> + type='{"type": "integer", "minInteger": 0,
"maxInteger": 4294967295}'>
> + MAC binding aging <code>threshold</code> value in
seconds. MAC binding
> + exceeding this timeout will be automatically removed.
The value
> + defaults to 0, which means disabled.
> + </column>
> </group>
>
> <group title="Common Columns">
> diff --git a/tests/ovn.at <http://ovn.at> b/tests/ovn.at
<http://ovn.at>
> index c8cc8cde4..11094eff5 100644
> --- a/tests/ovn.at <http://ovn.at>
> +++ b/tests/ovn.at <http://ovn.at>
> @@ -32404,3 +32404,114 @@ AT_CHECK([test $(ovn-sbctl list fdb |
grep -c "00:00:00:00:10:30") = 0])
> OVN_CLEANUP([hv1])
> AT_CLEANUP
> ])
> +
> +OVN_FOR_EACH_NORTHD([
> +AT_SETUP([MAC binding aging])
> +ovn_start
> +
> +net_add n1
> +
> +AT_CHECK([ovn-nbctl ls-add public])
> +AT_CHECK([ovn-nbctl ls-add internal])
> +
> +AT_CHECK([ovn-nbctl lsp-add public ln_port])
> +AT_CHECK([ovn-nbctl lsp-set-addresses ln_port unknown])
> +AT_CHECK([ovn-nbctl lsp-set-type ln_port localnet])
> +AT_CHECK([ovn-nbctl lsp-set-options ln_port network_name=physnet1])
> +
> +AT_CHECK([ovn-nbctl lsp-add public public-gw])
> +AT_CHECK([ovn-nbctl lsp-set-type public-gw router])
> +AT_CHECK([ovn-nbctl lsp-set-addresses public-gw
00:00:00:00:10:00 router])
> +AT_CHECK([ovn-nbctl lsp-set-options public-gw
router-port=gw-public])
> +
> +AT_CHECK([ovn-nbctl lsp-add internal internal-gw])
> +AT_CHECK([ovn-nbctl lsp-set-type internal-gw router])
> +AT_CHECK([ovn-nbctl lsp-set-addresses internal-gw
00:00:00:00:20:00 router])
> +AT_CHECK([ovn-nbctl lsp-set-options internal-gw
router-port=gw-internal])
> +
> +AT_CHECK([ovn-nbctl lsp-add internal vif1])
> +AT_CHECK([ovn-nbctl lsp-set-addresses vif1 "00:00:00:00:20:10
192.168.20.10"])
> +
> +AT_CHECK([ovn-nbctl lsp-add internal vif2])
> +AT_CHECK([ovn-nbctl lsp-set-addresses vif2 "00:00:00:00:20:20
192.168.20.20"])
> +
> +AT_CHECK([ovn-nbctl lr-add gw])
> +AT_CHECK([ovn-nbctl lrp-add gw gw-public 00:00:00:00:10:00
192.168.10.1/24 <http://192.168.10.1/24>])
> +AT_CHECK([ovn-nbctl lrp-add gw gw-internal 00:00:00:00:20:00
192.168.20.1/24 <http://192.168.20.1/24>])
> +
> +sim_add hv1
> +as hv1
> +ovs-vsctl add-br br-underlay
> +ovn_attach n1 br-underlay 192.168.0.1
> +ovs-vsctl add-br br-phys
> +ovs-vsctl -- add-port br-int vif1 -- \
> + set interface vif1 external-ids:iface-id=vif1 \
> + options:tx_pcap=hv1/vif1-tx.pcap \
> + options:rxq_pcap=hv1/vif1-rx.pcap \
> + ofport-request=1
> +ovs-vsctl -- add-port br-phys ext1 -- \
> + set interface ext1 \
> + options:tx_pcap=hv1/ext1-tx.pcap \
> + options:rxq_pcap=hv1/ext1-rx.pcap \
> + ofport-request=2
> +ovs-vsctl set open .
external_ids:ovn-bridge-mappings=physnet1:br-phys
> +
> +sim_add hv2
> +as hv2
> +ovs-vsctl add-br br-underlay
> +ovn_attach n1 br-underlay 192.168.0.2
> +ovs-vsctl add-br br-phys
> +ovs-vsctl -- add-port br-int vif2 -- \
> + set interface vif2 external-ids:iface-id=vif2 \
> + options:tx_pcap=hv2/vif2-tx.pcap \
> + options:rxq_pcap=hv2/vif2-rx.pcap \
> + ofport-request=1
> +ovs-vsctl -- add-port br-phys ext2 -- \
> + set interface ext2 \
> + options:tx_pcap=hv2/ext2-tx.pcap \
> + options:rxq_pcap=hv2/ext2-rx.pcap \
> + ofport-request=2
> +ovs-vsctl set open .
external_ids:ovn-bridge-mappings=physnet1:br-phys
> +
> +OVN_POPULATE_ARP
> +
> +send_garp() {
> + hv=$1
> + dev=$2
> + mac_byte=$3
> + ip_byte=${4-$3}
> +
> + mac="0000000010$mac_byte"
> + ip=`ip_to_hex 192 168 10 $ip_byte`
> +
packet=ffffffffffff${mac}08060001080006040002${mac}${ip}${mac}${ip}
> + as $hv ovs-appctl netdev-dummy/receive $dev $packet
> +}
> +
> +# Check if the option is not present by default
> +AT_CHECK([fetch_column nb:logical_router options name="gw" |
grep -q mac_binding_age_threshold], [1])
> +
> +# Send GARP to populate MAC binding table records
> +send_garp hv1 ext1 10
> +send_garp hv2 ext2 20
> +
> +OVS_WAIT_UNTIL([ovn-sbctl list mac_binding | grep -q
"192.168.10.10"])
> +OVS_WAIT_UNTIL([ovn-sbctl list mac_binding | grep -q
"192.168.10.20"])
> +
> +# Set the MAC binding aging threshold
> +AT_CHECK([ovn-nbctl set logical_router gw
options:mac_binding_age_threshold=1])
> +AT_CHECK([fetch_column nb:logical_router options | grep -q
mac_binding_age_threshold=1])
> +AT_CHECK([ovn-nbctl --wait=sb sync])
> +
> +# Set the timeout for OVS_WAIT* functions to 5 seconds
> +OVS_CTL_TIMEOUT=5
> +# Check if the records are removed after some inactivity
> +OVS_WAIT_UNTIL([
> + test "0" = "$(ovn-sbctl list mac_binding | grep -c
'192.168.10.10')"
> +])
> +OVS_WAIT_UNTIL([
> + test "0" = "$(ovn-sbctl list mac_binding | grep -c
'192.168.10.20')"
> +])
> +
> +OVN_CLEANUP([hv1], [hv2])
> +AT_CLEANUP
> +])
--
Ales Musil
Senior Software Engineer - OVN Core
Red Hat EMEA <https://www.redhat.com>
[email protected] <mailto:[email protected]> IM: amusil
<https://red.ht/sig>