Add node that will hold data about recently added MAC bindings. This node will be used later on to prevent race condition in packet buffering. the MAC binding is kept in the node longer than the timeout of the buffered packet to make sure that we don't miss it.
Signed-off-by: Ales Musil <[email protected]> --- v3: Split into two patches. --- controller/mac-learn.c | 22 ++-- controller/mac-learn.h | 10 +- controller/ovn-controller.c | 201 ++++++++++++++++++++++++++++++++++++ controller/pinctrl.c | 13 ++- 4 files changed, 226 insertions(+), 20 deletions(-) diff --git a/controller/mac-learn.c b/controller/mac-learn.c index a27607016..29c8b1fbb 100644 --- a/controller/mac-learn.c +++ b/controller/mac-learn.c @@ -29,9 +29,8 @@ VLOG_DEFINE_THIS_MODULE(mac_learn); #define MAX_MAC_BINDINGS 1000 #define MAX_FDB_ENTRIES 1000 -#define MAX_MAC_BINDING_DELAY_MSEC 50 -static size_t mac_binding_hash(uint32_t dp_key, uint32_t port_key, +static size_t keys_ip_hash(uint32_t dp_key, uint32_t port_key, struct in6_addr *); static struct mac_binding *mac_binding_find(struct hmap *mac_bindings, uint32_t dp_key, @@ -62,24 +61,23 @@ ovn_mac_bindings_destroy(struct hmap *mac_bindings) struct mac_binding * ovn_mac_binding_add(struct hmap *mac_bindings, uint32_t dp_key, uint32_t port_key, struct in6_addr *ip, - struct eth_addr mac, bool is_unicast) + struct eth_addr mac, uint32_t timestamp_offset, + bool limited_capacity) { - uint32_t hash = mac_binding_hash(dp_key, port_key, ip); + uint32_t hash = keys_ip_hash(dp_key, port_key, ip); struct mac_binding *mb = mac_binding_find(mac_bindings, dp_key, port_key, ip, hash); if (!mb) { - if (hmap_count(mac_bindings) >= MAX_MAC_BINDINGS) { + if (limited_capacity && hmap_count(mac_bindings) >= MAX_MAC_BINDINGS) { return NULL; } - uint32_t delay = is_unicast - ? 0 : random_range(MAX_MAC_BINDING_DELAY_MSEC) + 1; mb = xmalloc(sizeof *mb); mb->dp_key = dp_key; mb->port_key = port_key; mb->ip = *ip; - mb->commit_at_ms = time_msec() + delay; + mb->expire = time_msec() + timestamp_offset; hmap_insert(mac_bindings, &mb->hmap_node, hash); } mb->mac = mac; @@ -94,7 +92,7 @@ ovn_mac_binding_wait(struct hmap *mac_bindings) struct mac_binding *mb; HMAP_FOR_EACH (mb, hmap_node, mac_bindings) { - poll_timer_wait_until(mb->commit_at_ms); + poll_timer_wait_until(mb->expire); } } @@ -106,9 +104,9 @@ ovn_mac_binding_remove(struct mac_binding *mb, struct hmap *mac_bindings) } bool -ovn_mac_binding_can_commit(const struct mac_binding *mb, long long now) +ovn_mac_binding_is_expired(const struct mac_binding *mb, long long now) { - return now >= mb->commit_at_ms; + return now >= mb->expire; } /* fdb functions. */ @@ -161,7 +159,7 @@ ovn_fdb_add(struct hmap *fdbs, uint32_t dp_key, struct eth_addr mac, /* mac_binding related static functions. */ static size_t -mac_binding_hash(uint32_t dp_key, uint32_t port_key, struct in6_addr *ip) +keys_ip_hash(uint32_t dp_key, uint32_t port_key, struct in6_addr *ip) { return hash_bytes(ip, sizeof *ip, hash_2words(dp_key, port_key)); } diff --git a/controller/mac-learn.h b/controller/mac-learn.h index 57c50c58b..20b6e3b63 100644 --- a/controller/mac-learn.h +++ b/controller/mac-learn.h @@ -33,19 +33,21 @@ struct mac_binding { struct eth_addr mac; /* Timestamp when to commit to SB. */ - long long commit_at_ms; + long long expire; }; void ovn_mac_bindings_init(struct hmap *mac_bindings); void ovn_mac_bindings_destroy(struct hmap *mac_bindings); void ovn_mac_binding_wait(struct hmap *mac_bindings); void ovn_mac_binding_remove(struct mac_binding *mb, struct hmap *mac_bindings); -bool ovn_mac_binding_can_commit(const struct mac_binding *mb, long long now); +bool ovn_mac_binding_is_expired(const struct mac_binding *mb, long long now); struct mac_binding *ovn_mac_binding_add(struct hmap *mac_bindings, uint32_t dp_key, uint32_t port_key, struct in6_addr *ip, - struct eth_addr mac, bool is_unicast); + struct eth_addr mac, + uint32_t timestamp_offset, + bool limited_capacity); @@ -68,4 +70,6 @@ struct fdb_entry *ovn_fdb_add(struct hmap *fdbs, uint32_t dp_key, struct eth_addr mac, uint32_t port_key); +#define OVN_BUFFERED_PACKETS_TIMEOUT 10000 + #endif /* OVN_MAC_LEARN_H */ diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c index 7dcbfd252..a42c1affe 100644 --- a/controller/ovn-controller.c +++ b/controller/ovn-controller.c @@ -45,6 +45,7 @@ #include "lib/vswitch-idl.h" #include "local_data.h" #include "lport.h" +#include "mac-learn.h" #include "memory.h" #include "ofctrl.h" #include "ofctrl-seqno.h" @@ -4305,6 +4306,185 @@ pflow_output_debug_handler(struct engine_node *node, void *data) return true; } +static void +recent_mac_bindings_add(struct hmap *recent_mbs, + const struct sbrec_mac_binding *smb, + const struct hmap *local_datapaths, + struct ovsdb_idl_index *sbrec_port_binding_by_name) +{ + ovs_be32 ip4; + struct in6_addr ip; + struct eth_addr mac; + const struct sbrec_port_binding *pb; + + if (!get_local_datapath(local_datapaths, smb->datapath->tunnel_key)) { + return; + } + + pb = lport_lookup_by_name(sbrec_port_binding_by_name, smb->logical_port); + if (!pb) { + return; + } + + if (!eth_addr_from_string(smb->mac, &mac)) { + return; + } + + if (strchr(smb->ip, '.')) { + if (!ip_parse(smb->ip, &ip4)) { + return; + } + ip = in6_addr_mapped_ipv4(ip4); + } else { + if (!ipv6_parse(smb->ip, &ip)) { + return; + } + } + + /* Give the recent mac_binding entry bigger timeout than the buffered + * packet in case the MAC binding is created earlier. */ + ovn_mac_binding_add(recent_mbs, smb->datapath->tunnel_key, pb->tunnel_key, + &ip, mac, 2 * OVN_BUFFERED_PACKETS_TIMEOUT, false); + + const char *redirect_port = + smap_get(&pb->options, "chassis-redirect-port"); + if (!redirect_port) { + return; + } + + pb = lport_lookup_by_name(sbrec_port_binding_by_name, redirect_port); + if (!pb || pb->datapath->tunnel_key != smb->datapath->tunnel_key || + strcmp(pb->type, "chassisredirect")) { + return; + } + + /* Add the same entry also for chassisredirect port as the buffered traffic + * might be buffered on the cr port. */ + ovn_mac_binding_add(recent_mbs, smb->datapath->tunnel_key, pb->tunnel_key, + &ip, mac, 2 * OVN_BUFFERED_PACKETS_TIMEOUT, false); +} + +static void * +en_recent_mac_bindings_init(struct engine_node *node OVS_UNUSED, + struct engine_arg *arg OVS_UNUSED) +{ + struct hmap *recent_mbs = xmalloc(sizeof *recent_mbs); + hmap_init(recent_mbs); + + return recent_mbs; +} + +static void +en_recent_mac_bindings_cleanup(void *data) +{ + struct hmap *recent_mbs = data; + ovn_mac_bindings_destroy(recent_mbs); +} + +static void +en_recent_mac_bindings_clear_tracked_data(void *data) +{ + long long now = time_msec(); + struct hmap *recent_mbs = data; + + struct mac_binding *mb; + HMAP_FOR_EACH_SAFE (mb, hmap_node, recent_mbs) { + if (ovn_mac_binding_is_expired(mb, now)) { + ovn_mac_binding_remove(mb, recent_mbs); + } + } +} + +static void +en_recent_mac_bindings_run(struct engine_node *node, void *data) +{ + struct hmap *recent_mbs = data; + struct ed_type_runtime_data *rt_data = + engine_get_input_data("runtime_data", node); + const struct sbrec_mac_binding_table *mac_binding_table = + EN_OVSDB_GET(engine_get_input("SB_mac_binding", node)); + struct ovsdb_idl_index *sbrec_port_binding_by_name = + engine_ovsdb_node_get_index( + engine_get_input("SB_port_binding", node), + "name"); + + const struct sbrec_mac_binding *smb; + SBREC_MAC_BINDING_TABLE_FOR_EACH (smb, mac_binding_table) { + recent_mac_bindings_add(recent_mbs, smb, &rt_data->local_datapaths, + sbrec_port_binding_by_name); + } +} + +static bool +recent_mac_bindings_mac_binding_handler(struct engine_node *node, void *data) +{ + struct hmap *recent_mbs = data; + struct ed_type_runtime_data *rt_data = + engine_get_input_data("runtime_data", node); + const struct sbrec_mac_binding_table *mac_binding_table = + EN_OVSDB_GET(engine_get_input("SB_mac_binding", node)); + struct ovsdb_idl_index *sbrec_port_binding_by_name = + engine_ovsdb_node_get_index( + engine_get_input("SB_port_binding", node), + "name"); + + const struct sbrec_mac_binding *smb; + SBREC_MAC_BINDING_TABLE_FOR_EACH_TRACKED (smb, mac_binding_table) { + /* The deleted entry will expire eventually. */ + if (sbrec_mac_binding_is_deleted(smb)) { + continue; + } + + recent_mac_bindings_add(recent_mbs, smb, &rt_data->local_datapaths, + sbrec_port_binding_by_name); + } + + engine_set_node_state(node, EN_UPDATED); + return true; +} + +static bool +recent_mac_bindings_runtime_data_handler(struct engine_node *node, void *data) +{ + struct hmap *recent_mbs = data; + struct ed_type_runtime_data *rt_data = + engine_get_input_data("runtime_data", node); + struct ovsdb_idl_index *smb_by_datapath = + engine_ovsdb_node_get_index( + engine_get_input("SB_mac_binding", node), + "datapath"); + struct ovsdb_idl_index *sbrec_port_binding_by_name = + engine_ovsdb_node_get_index( + engine_get_input("SB_port_binding", node), + "name"); + + if (!rt_data->tracked) { + return false; + } + + struct tracked_datapath *tdp; + HMAP_FOR_EACH (tdp, node, &rt_data->tracked_dp_bindings) { + if (tdp->tracked_type != TRACKED_RESOURCE_NEW) { + continue; + } + + struct sbrec_mac_binding *mb_index_row = + sbrec_mac_binding_index_init_row(smb_by_datapath); + sbrec_mac_binding_index_set_datapath(mb_index_row, tdp->dp); + + const struct sbrec_mac_binding *smb; + SBREC_MAC_BINDING_FOR_EACH_EQUAL (smb, mb_index_row, smb_by_datapath) { + recent_mac_bindings_add(recent_mbs, smb, &rt_data->local_datapaths, + sbrec_port_binding_by_name); + } + + sbrec_mac_binding_index_destroy_row(mb_index_row); + } + + engine_set_node_state(node, EN_UPDATED); + return true; +} + static void * en_controller_output_init(struct engine_node *node OVS_UNUSED, struct engine_arg *arg OVS_UNUSED) @@ -4341,6 +4521,14 @@ controller_output_lflow_output_handler(struct engine_node *node, return true; } +static bool +flow_output_recent_mac_bindings_handler(struct engine_node *node, + void *data OVS_UNUSED) +{ + engine_set_node_state(node, EN_UPDATED); + return true; +} + struct ovn_controller_exit_args { bool *exiting; bool *restart; @@ -4613,6 +4801,8 @@ main(int argc, char *argv[]) ENGINE_NODE(northd_options, "northd_options"); ENGINE_NODE(dhcp_options, "dhcp_options"); ENGINE_NODE_WITH_CLEAR_TRACK_DATA(lb_data, "lb_data"); + ENGINE_NODE_WITH_CLEAR_TRACK_DATA(recent_mac_bindings, + "recent_mac_bindings"); #define SB_NODE(NAME, NAME_STR) ENGINE_NODE_SB(NAME, NAME_STR); SB_NODES @@ -4785,10 +4975,21 @@ main(int argc, char *argv[]) engine_add_input(&en_runtime_data, &en_ovs_interface_shadow, runtime_data_ovs_interface_shadow_handler); + engine_add_input(&en_recent_mac_bindings, &en_sb_mac_binding, + recent_mac_bindings_mac_binding_handler); + engine_add_input(&en_recent_mac_bindings, &en_runtime_data, + recent_mac_bindings_runtime_data_handler); + /* Using a noop handler since we really need just access to the index + * which allows us to filter port binding by name. */ + engine_add_input(&en_recent_mac_bindings, &en_sb_port_binding, + engine_noop_handler); + engine_add_input(&en_controller_output, &en_lflow_output, controller_output_lflow_output_handler); engine_add_input(&en_controller_output, &en_pflow_output, controller_output_pflow_output_handler); + engine_add_input(&en_controller_output, &en_recent_mac_bindings, + flow_output_recent_mac_bindings_handler); struct engine_arg engine_arg = { .sb_idl = ovnsb_idl_loop.idl, diff --git a/controller/pinctrl.c b/controller/pinctrl.c index 795847729..611c19568 100644 --- a/controller/pinctrl.c +++ b/controller/pinctrl.c @@ -1541,7 +1541,6 @@ buffered_send_packets(struct rconn *swconn, struct buffered_packets *bp, } } -#define BUFFER_MAP_TIMEOUT 10000 static void buffered_packets_map_gc(void) { @@ -1549,7 +1548,7 @@ buffered_packets_map_gc(void) long long int now = time_msec(); HMAP_FOR_EACH_SAFE (cur_qp, hmap_node, &buffered_packets_map) { - if (now > cur_qp->timestamp + BUFFER_MAP_TIMEOUT) { + if (now > cur_qp->timestamp + OVN_BUFFERED_PACKETS_TIMEOUT) { destroy_buffered_packets(cur_qp); hmap_remove(&buffered_packets_map, &cur_qp->hmap_node); free(cur_qp); @@ -4196,6 +4195,8 @@ destroy_put_mac_bindings(void) ovn_mac_bindings_destroy(&put_mac_bindings); } +#define MAX_MAC_BINDING_DELAY_MSEC 50 + /* Called with in the pinctrl_handler thread context. */ static void pinctrl_handle_put_mac_binding(const struct flow *md, @@ -4216,11 +4217,13 @@ pinctrl_handle_put_mac_binding(const struct flow *md, /* If the ARP reply was unicast we should not delay it, * there won't be any race. */ - bool is_unicast = !eth_addr_is_multicast(headers->dl_dst); + uint32_t delay = eth_addr_is_multicast(headers->dl_dst) + ? random_range(MAX_MAC_BINDING_DELAY_MSEC) + 1 + : 0; struct mac_binding *mb = ovn_mac_binding_add(&put_mac_bindings, dp_key, port_key, &ip_key, headers->dl_src, - is_unicast); + delay, true); if (!mb) { COVERAGE_INC(pinctrl_drop_put_mac_binding); return; @@ -4391,7 +4394,7 @@ run_put_mac_bindings(struct ovsdb_idl_txn *ovnsb_idl_txn, struct mac_binding *mb; HMAP_FOR_EACH_SAFE (mb, hmap_node, &put_mac_bindings) { - if (ovn_mac_binding_can_commit(mb, now)) { + if (ovn_mac_binding_is_expired(mb, now)) { run_put_mac_binding(ovnsb_idl_txn, sbrec_datapath_binding_by_key, sbrec_port_binding_by_key, -- 2.39.2 _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
