On 8/2/24 17:07, Shibir Basak wrote:
> When a VIF port goes down, all FDB entries associated
> with it are deleted.
> This helps is reducing traffic outage due to stale FDB
> entries present until the lsp is removed.
>
> Acked-by: Naveen Yerramneni <[email protected]>
> Signed-off-by: Shibir Basak <[email protected]>
> ---
Hi Shibir,
Thanks for the patch!
As this is a bug fix it probably needs a:
Fixes: dd94f1266ca4 ("northd: MAC learning: Add logical flows for fdb.")
I have a few more comments below.
> controller/ovn-controller.c | 5 ++++
> controller/pinctrl.c | 40 ++++++++++++++++++++------
> controller/pinctrl.h | 1 +
> lib/automake.mk | 4 ++-
> lib/fdb.c | 33 +++++++++++++++++++++
> lib/fdb.h | 26 +++++++++++++++++
> tests/ovn.at | 57 +++++++++++++++++++++++++++++++++++++
> 7 files changed, 157 insertions(+), 9 deletions(-)
> create mode 100644 lib/fdb.c
> create mode 100644 lib/fdb.h
>
> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> index 6a7cca673..2acdddd09 100644
> --- a/controller/ovn-controller.c
> +++ b/controller/ovn-controller.c
> @@ -4936,6 +4936,10 @@ main(int argc, char *argv[])
> = ovsdb_idl_index_create2(ovnsb_idl_loop.idl,
> &sbrec_fdb_col_mac,
> &sbrec_fdb_col_dp_key);
> + struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port
> + = ovsdb_idl_index_create2(ovnsb_idl_loop.idl,
> + &sbrec_fdb_col_port_key,
> + &sbrec_fdb_col_dp_key);
> struct ovsdb_idl_index *sbrec_mac_binding_by_datapath
> = mac_binding_by_datapath_index_create(ovnsb_idl_loop.idl);
> struct ovsdb_idl_index *sbrec_static_mac_binding_by_datapath
> @@ -5645,6 +5649,7 @@ main(int argc, char *argv[])
> sbrec_igmp_group,
> sbrec_ip_multicast,
> sbrec_fdb_by_dp_key_mac,
> + sbrec_fdb_by_dp_and_port,
> sbrec_dns_table_get(ovnsb_idl_loop.idl),
> sbrec_controller_event_table_get(
> ovnsb_idl_loop.idl),
> diff --git a/controller/pinctrl.c b/controller/pinctrl.c
> index 7cbb0cf81..9e37e1693 100644
> --- a/controller/pinctrl.c
> +++ b/controller/pinctrl.c
> @@ -62,6 +62,7 @@
> #include "lflow.h"
> #include "ip-mcast.h"
> #include "ovn-sb-idl.h"
> +#include "lib/fdb.h"
>
> VLOG_DEFINE_THIS_MODULE(pinctrl);
>
> @@ -236,6 +237,7 @@ static void send_garp_rarp_prepare(
> struct ovsdb_idl_index *sbrec_port_binding_by_datapath,
> struct ovsdb_idl_index *sbrec_port_binding_by_name,
> struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip,
> + struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port,
> const struct ovsrec_bridge *,
> const struct sbrec_chassis *,
> const struct hmap *local_datapaths,
> @@ -4145,6 +4147,7 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
> struct ovsdb_idl_index *sbrec_igmp_groups,
> struct ovsdb_idl_index *sbrec_ip_multicast_opts,
> struct ovsdb_idl_index *sbrec_fdb_by_dp_key_mac,
> + struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port,
> const struct sbrec_dns_table *dns_table,
> const struct sbrec_controller_event_table *ce_table,
> const struct sbrec_service_monitor_table *svc_mon_table,
> @@ -4166,7 +4169,8 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
> sbrec_port_binding_by_key, chassis);
> send_garp_rarp_prepare(ovnsb_idl_txn, sbrec_port_binding_by_datapath,
> sbrec_port_binding_by_name,
> - sbrec_mac_binding_by_lport_ip, br_int, chassis,
> + sbrec_mac_binding_by_lport_ip,
> + sbrec_fdb_by_dp_and_port, br_int, chassis,
> local_datapaths, active_tunnels, ovs_table);
> prepare_ipv6_ras(local_active_ports_ras, sbrec_port_binding_by_name);
> prepare_ipv6_prefixd(ovnsb_idl_txn, sbrec_port_binding_by_name,
> @@ -5017,7 +5021,6 @@ wait_put_mac_bindings(struct ovsdb_idl_txn
> *ovnsb_idl_txn)
> }
> }
>
> -
Unrelated change. This line feed should not be removed.
> /*
> * Send gratuitous/reverse ARP for vif on localnet.
> *
> @@ -5034,6 +5037,7 @@ struct garp_rarp_data {
> * announcement (in msecs). */
> uint32_t dp_key; /* Datapath used to output this GARP. */
> uint32_t port_key; /* Port to inject the GARP into. */
> + bool enabled; /* is garp/rarp announcement enabled */
> };
>
> /* Contains GARPs/RARPs to be sent. Protected by pinctrl_mutex*/
> @@ -5054,7 +5058,7 @@ destroy_send_garps_rarps(void)
> /* Runs with in the main ovn-controller thread context. */
> static void
> add_garp_rarp(const char *name, const struct eth_addr ea, ovs_be32 ip,
> - uint32_t dp_key, uint32_t port_key)
> + uint32_t dp_key, uint32_t port_key, bool enabled)
> {
> struct garp_rarp_data *garp_rarp = xmalloc(sizeof *garp_rarp);
> garp_rarp->ea = ea;
> @@ -5063,6 +5067,7 @@ add_garp_rarp(const char *name, const struct eth_addr
> ea, ovs_be32 ip,
> garp_rarp->backoff = 1000; /* msec. */
> garp_rarp->dp_key = dp_key;
> garp_rarp->port_key = port_key;
> + garp_rarp->enabled = enabled;
> shash_add(&send_garp_rarp_data, name, garp_rarp);
>
> /* Notify pinctrl_handler so that it can wakeup and process
> @@ -5081,6 +5086,8 @@ send_garp_rarp_update(struct ovsdb_idl_txn
> *ovnsb_idl_txn,
> bool garp_continuous)
> {
> volatile struct garp_rarp_data *garp_rarp = NULL;
> + bool is_garp_rarp_enabled = !smap_get_bool(&binding_rec->options,
> + "disable_garp_rarp", false);
>
> /* Skip localports as they don't need to be announced */
> if (!strcmp(binding_rec->type, "localport")) {
> @@ -5104,6 +5111,7 @@ send_garp_rarp_update(struct ovsdb_idl_txn
> *ovnsb_idl_txn,
> if (garp_rarp) {
> garp_rarp->dp_key = binding_rec->datapath->tunnel_key;
> garp_rarp->port_key = binding_rec->tunnel_key;
> + garp_rarp->enabled = is_garp_rarp_enabled;
> if (garp_max_timeout != garp_rarp_max_timeout ||
> garp_continuous != garp_rarp_continuous) {
> /* reset backoff */
> @@ -5114,7 +5122,8 @@ send_garp_rarp_update(struct ovsdb_idl_txn
> *ovnsb_idl_txn,
> add_garp_rarp(name, laddrs->ea,
> laddrs->ipv4_addrs[i].addr,
> binding_rec->datapath->tunnel_key,
> - binding_rec->tunnel_key);
> + binding_rec->tunnel_key,
> + is_garp_rarp_enabled);
> send_garp_locally(ovnsb_idl_txn,
> sbrec_mac_binding_by_lport_ip,
> local_datapaths, binding_rec,
> laddrs->ea,
> @@ -5134,6 +5143,7 @@ send_garp_rarp_update(struct ovsdb_idl_txn
> *ovnsb_idl_txn,
> if (garp_rarp) {
> garp_rarp->dp_key =
> binding_rec->datapath->tunnel_key;
> garp_rarp->port_key = binding_rec->tunnel_key;
> + garp_rarp->enabled = is_garp_rarp_enabled;
> if (garp_max_timeout != garp_rarp_max_timeout ||
> garp_continuous != garp_rarp_continuous) {
> /* reset backoff */
> @@ -5143,7 +5153,8 @@ send_garp_rarp_update(struct ovsdb_idl_txn
> *ovnsb_idl_txn,
> } else {
> add_garp_rarp(name, laddrs->ea,
> 0, binding_rec->datapath->tunnel_key,
> - binding_rec->tunnel_key);
> + binding_rec->tunnel_key,
> + is_garp_rarp_enabled);
> }
> free(name);
> }
> @@ -5159,6 +5170,7 @@ send_garp_rarp_update(struct ovsdb_idl_txn
> *ovnsb_idl_txn,
> if (garp_rarp) {
> garp_rarp->dp_key = binding_rec->datapath->tunnel_key;
> garp_rarp->port_key = binding_rec->tunnel_key;
> + garp_rarp->enabled = is_garp_rarp_enabled;
> if (garp_max_timeout != garp_rarp_max_timeout ||
> garp_continuous != garp_rarp_continuous) {
> /* reset backoff */
> @@ -5184,7 +5196,8 @@ send_garp_rarp_update(struct ovsdb_idl_txn
> *ovnsb_idl_txn,
> add_garp_rarp(binding_rec->logical_port,
> laddrs.ea, ip,
> binding_rec->datapath->tunnel_key,
> - binding_rec->tunnel_key);
> + binding_rec->tunnel_key,
> + is_garp_rarp_enabled);
> if (ip) {
> send_garp_locally(ovnsb_idl_txn, sbrec_mac_binding_by_lport_ip,
> local_datapaths, binding_rec, laddrs.ea, ip);
> @@ -6547,6 +6560,10 @@ send_garp_rarp_run(struct rconn *swconn, long long int
> *send_garp_rarp_time)
> long long int current_time = time_msec();
> *send_garp_rarp_time = LLONG_MAX;
> SHASH_FOR_EACH (iter, &send_garp_rarp_data) {
> + struct garp_rarp_data *garp_rarp = iter->data;
> + if (!garp_rarp->enabled) {
> + continue;
> + }
> long long int next_announce = send_garp_rarp(swconn, iter->data,
> current_time);
> if (*send_garp_rarp_time > next_announce) {
> @@ -6562,6 +6579,7 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn
> *ovnsb_idl_txn,
> struct ovsdb_idl_index
> *sbrec_port_binding_by_datapath,
> struct ovsdb_idl_index *sbrec_port_binding_by_name,
> struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip,
> + struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port,
> const struct ovsrec_bridge *br_int,
> const struct sbrec_chassis *chassis,
> const struct hmap *local_datapaths,
> @@ -6597,12 +6615,18 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn
> *ovnsb_idl_txn,
> &nat_ip_keys, &local_l3gw_ports,
> chassis, active_tunnels,
> &nat_addresses);
> +
> /* For deleted ports and deleted nat ips, remove from
> * send_garp_rarp_data. */
> struct shash_node *iter;
> SHASH_FOR_EACH_SAFE (iter, &send_garp_rarp_data) {
> if (!sset_contains(&localnet_vifs, iter->name) &&
> !sset_contains(&nat_ip_keys, iter->name)) {
> + struct garp_rarp_data *data = iter->data;
> + /* Cleanup FDB entries for inactive ports */
> + delete_fdb_entries(sbrec_fdb_by_dp_and_port,
> + data->dp_key,
> + data->port_key);
It feels wrong to use the garp/rarp mechanism to cleanup FDB on port
deletion. It also forces the addition of the 'is_garp_rarp_enabled'
flag and creating 'garp_rarp_data' structures for cases in which we
don't really need them (we send no garps/rarps).
What if we do this in a different way?
In binding.c we set port_binding.up to "false" whenever ports are
removed. Should we cleanup stale FDB entries there instead?
There are two places where we do that:
binding_lport_set_down()
port_binding_set_down()
What do you think?
> send_garp_rarp_delete(iter->name);
> }
> }
> @@ -6612,7 +6636,7 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn
> *ovnsb_idl_txn,
> SSET_FOR_EACH (iface_id, &localnet_vifs) {
> const struct sbrec_port_binding *pb = lport_lookup_by_name(
> sbrec_port_binding_by_name, iface_id);
> - if (pb && !smap_get_bool(&pb->options, "disable_garp_rarp", false)) {
> + if (pb) {
> send_garp_rarp_update(ovnsb_idl_txn,
> sbrec_mac_binding_by_lport_ip,
> local_datapaths, pb, &nat_addresses,
> @@ -6625,7 +6649,7 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn
> *ovnsb_idl_txn,
> SSET_FOR_EACH (gw_port, &local_l3gw_ports) {
> const struct sbrec_port_binding *pb
> = lport_lookup_by_name(sbrec_port_binding_by_name, gw_port);
> - if (pb && !smap_get_bool(&pb->options, "disable_garp_rarp", false)) {
> + if (pb) {
> send_garp_rarp_update(ovnsb_idl_txn,
> sbrec_mac_binding_by_lport_ip,
> local_datapaths, pb, &nat_addresses,
> garp_max_timeout, garp_continuous);
> diff --git a/controller/pinctrl.h b/controller/pinctrl.h
> index 3462b670c..6613c87e4 100644
> --- a/controller/pinctrl.h
> +++ b/controller/pinctrl.h
> @@ -49,6 +49,7 @@ void pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
> struct ovsdb_idl_index *sbrec_igmp_groups,
> struct ovsdb_idl_index *sbrec_ip_multicast_opts,
> struct ovsdb_idl_index *sbrec_fdb_by_dp_key_mac,
> + struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port,
> const struct sbrec_dns_table *,
> const struct sbrec_controller_event_table *,
> const struct sbrec_service_monitor_table *,
> diff --git a/lib/automake.mk b/lib/automake.mk
> index b69e854b0..3ff098331 100644
> --- a/lib/automake.mk
> +++ b/lib/automake.mk
> @@ -49,7 +49,9 @@ lib_libovn_la_SOURCES = \
> lib/stopwatch-names.h \
> lib/vif-plug-provider.h \
> lib/vif-plug-provider.c \
> - lib/vif-plug-providers/dummy/vif-plug-dummy.c
> + lib/vif-plug-providers/dummy/vif-plug-dummy.c \
> + lib/fdb.c \
> + lib/fdb.h
> nodist_lib_libovn_la_SOURCES = \
> lib/ovn-dirs.c \
> lib/ovn-nb-idl.c \
> diff --git a/lib/fdb.c b/lib/fdb.c
> new file mode 100644
> index 000000000..00e3ac902
> --- /dev/null
> +++ b/lib/fdb.c
> @@ -0,0 +1,33 @@
> +/* Copyright (c) 2024, Red Hat, Inc.
This is probably a copy/paste error. Should it say Nutanix?
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at:
> + *
> + * http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +#include <config.h>
> +#include "lib/fdb.h"
> +
> +void
> +delete_fdb_entries(struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port,
> + uint32_t dp_key, uint32_t port_key)
> +{
> + struct sbrec_fdb *target =
> + sbrec_fdb_index_init_row(sbrec_fdb_by_dp_and_port);
> + sbrec_fdb_index_set_dp_key(target, dp_key);
> + sbrec_fdb_index_set_port_key(target, port_key);
> +
> + struct sbrec_fdb *fdb_e;
> + SBREC_FDB_FOR_EACH_EQUAL (fdb_e, target, sbrec_fdb_by_dp_and_port) {
> + sbrec_fdb_delete(fdb_e);
> + }
> + sbrec_fdb_index_destroy_row(target);
> +}
> diff --git a/lib/fdb.h b/lib/fdb.h
> new file mode 100644
> index 000000000..7b64bc7f2
> --- /dev/null
> +++ b/lib/fdb.h
> @@ -0,0 +1,26 @@
> +/* Copyright (c) 2024, Red Hat, Inc.
This is probably a copy/paste error. Should it say Nutanix?
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at:
> + *
> + * http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +#ifndef OVN_FDB_H
> +#define OVN_FDB_H 1
> +
> +#include "ovsdb-idl.h"
> +#include "ovn-sb-idl.h"
> +
> +void
> +delete_fdb_entries(struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port,
> + uint32_t dp_key, uint32_t port_key);
> +
> +#endif
> diff --git a/tests/ovn.at b/tests/ovn.at
> index b31afbfb3..a0df87196 100644
> --- a/tests/ovn.at
> +++ b/tests/ovn.at
> @@ -31706,6 +31706,63 @@ OVN_CLEANUP([hv1], [hv2], [hv3])
> AT_CLEANUP
> ])
>
> +OVN_FOR_EACH_NORTHD([
> +AT_SETUP([OVN FDB cleanup when vif goes down])
No real need for "OVN" in the test case name. When running
it looks like this:
OVN end-to-end tests
405: OVN FDB cleanup when vif goes down -- parallelization=yes --
ovn_monitor_all=yes ok
> +ovn_start
> +
> +net_add n1
> +
> +AT_CHECK([ovn-nbctl ls-add ls0])
> +
> +AT_CHECK([ovn-nbctl lsp-add ls0 ln_port])
> +AT_CHECK([ovn-nbctl lsp-set-addresses ln_port unknown])
> +AT_CHECK([ovn-nbctl lsp-set-type ln_port localnet])
> +AT_CHECK([ovn-nbctl lsp-set-options ln_port network_name=physnet1])
> +AT_CHECK([ovn-nbctl set logical_switch_port ln_port
> options:localnet_learn_fdb=true])
> +
> +AT_CHECK([ovn-nbctl lsp-add ls0 vif0])
> +AT_CHECK([ovn-nbctl lsp-set-addresses vif0 "00:00:00:00:10:10 192.168.10.10"
> unknown])
All these "AT_CHECK" can be replaced with "check".
> +
> +sim_add hv1
> +as hv1
> +ovs-vsctl add-br br-phys
> +ovn_attach n1 br-phys 192.168.0.1
> +ovs-vsctl -- add-port br-int vif0 -- \
> + set interface vif0 external-ids:iface-id=vif0 \
> + options:tx_pcap=hv1/vif0-tx.pcap \
> + options:rxq_pcap=hv1/vif0-rx.pcap \
> + ofport-request=1
> +ovs-vsctl -- add-port br-phys ext0 -- \
> + set interface ext0 \
> + options:tx_pcap=hv1/ext0-tx.pcap \
> + options:rxq_pcap=hv1/ext0-rx.pcap \
> + ofport-request=2
> +ovs-vsctl set open . external_ids:ovn-bridge-mappings=physnet1:br-phys
> +
> +
> +wait_for_ports_up
You should probably add a sync here:
check ovn-nbctl --wait=hv sync
> +ls0_key=$(fetch_column datapath_binding tunnel_key external_ids:name=ls0)
> +vif0_key=$(fetch_column port_binding tunnel_key logical_port=vif0)
> +
> +ovn-sbctl create FDB mac="00\:00\:00\:00\:10\:10" dp_key=$ls0_key
> port_key=$vif0_key
> +ovn-sbctl create FDB mac="00\:00\:00\:00\:20\:20" dp_key=$ls0_key
> port_key=$vif0_key
> +
> +ovn-sbctl list fdb
This is not really needed for the test, it can probably be removed.
> +# vif going down should purge all relevant FDB entries
> +ovs-vsctl del-port br-int vif0
> +OVS_WAIT_UNTIL([test x`ovn-nbctl lsp-get-up vif0` = xdown])
> +AT_CHECK([ovn-nbctl --wait=hv sync])
Please replace AT_CHECK with "check".
> +ovn-sbctl list fdb
This one can be removed.
> +OVS_WAIT_UNTIL([test $(ovn-sbctl list fdb | grep -c "00:00:00:00:10:10") =
> 0])
> +OVS_WAIT_UNTIL([test $(ovn-sbctl list fdb | grep -c "00:00:00:00:20:20") =
> 0])> +
> +# vif0 should not be a local datapath in hv1 as vif0 is down
> +# Hence ln_port should not be a related port.
> +OVN_CLEANUP([hv1
> +ln_port])> +AT_CLEANUP
> +])
> +
> OVN_FOR_EACH_NORTHD([
> AT_SETUP([container port changed to normal port and then deleted])
> ovn_start
Regards,
Dumitru
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev