There was a race within packet buffering that could result in first packt being dropped. It could happen under following conditions and topology: S1 == R1 == public == R2 == S2 SNAT on R1 and DGP on port connecting R1 with public.
1) The GARP is sent for the GDP SNAT 2) The GARP is delayed on R2 because it's multicast 3) Some traffic that get's buffered on S2 4) An ARP is sent as consequence of the buffering 5) The delayed MAC binding is added to SB 6) Response for the ARP is ignored because the MAC binding already exists 7) The buffered packet is never sent out and times out In order to prevent this behavior add new node that will keep track of all recently changed MAC bindings. Those recently changed MAC bindings are kept around for a longer time than the buffered packets which should ensure that we can find them even if they are created before the packet is actually buffered. At the same time simplify the packet buffering process and move it to mac-learn module. Signed-off-by: Ales Musil <[email protected]> --- v2: Rebase on top of current main. --- controller/mac-learn.c | 162 +++++++++++++++-- controller/mac-learn.h | 48 +++++- controller/ovn-controller.c | 208 +++++++++++++++++++++- controller/pinctrl.c | 336 ++++++++---------------------------- controller/pinctrl.h | 4 +- 5 files changed, 472 insertions(+), 286 deletions(-) diff --git a/controller/mac-learn.c b/controller/mac-learn.c index a27607016..7cb8cd21d 100644 --- a/controller/mac-learn.c +++ b/controller/mac-learn.c @@ -18,10 +18,10 @@ #include "mac-learn.h" /* OpenvSwitch lib includes. */ +#include "dp-packet.h" #include "openvswitch/poll-loop.h" #include "openvswitch/vlog.h" #include "lib/packets.h" -#include "lib/random.h" #include "lib/smap.h" #include "lib/timeval.h" @@ -29,11 +29,11 @@ VLOG_DEFINE_THIS_MODULE(mac_learn); #define MAX_MAC_BINDINGS 1000 #define MAX_FDB_ENTRIES 1000 -#define MAX_MAC_BINDING_DELAY_MSEC 50 +#define BUFFER_QUEUE_DEPTH 4 -static size_t mac_binding_hash(uint32_t dp_key, uint32_t port_key, - struct in6_addr *); -static struct mac_binding *mac_binding_find(struct hmap *mac_bindings, +static size_t keys_ip_hash(uint32_t dp_key, uint32_t port_key, + struct in6_addr *); +static struct mac_binding *mac_binding_find(const struct hmap *mac_bindings, uint32_t dp_key, uint32_t port_key, struct in6_addr *ip, size_t hash); @@ -62,24 +62,23 @@ ovn_mac_bindings_destroy(struct hmap *mac_bindings) struct mac_binding * ovn_mac_binding_add(struct hmap *mac_bindings, uint32_t dp_key, uint32_t port_key, struct in6_addr *ip, - struct eth_addr mac, bool is_unicast) + struct eth_addr mac, uint32_t timestamp_offset, + bool limited_capacity) { - uint32_t hash = mac_binding_hash(dp_key, port_key, ip); + uint32_t hash = keys_ip_hash(dp_key, port_key, ip); struct mac_binding *mb = mac_binding_find(mac_bindings, dp_key, port_key, ip, hash); if (!mb) { - if (hmap_count(mac_bindings) >= MAX_MAC_BINDINGS) { + if (limited_capacity && hmap_count(mac_bindings) >= MAX_MAC_BINDINGS) { return NULL; } - uint32_t delay = is_unicast - ? 0 : random_range(MAX_MAC_BINDING_DELAY_MSEC) + 1; mb = xmalloc(sizeof *mb); mb->dp_key = dp_key; mb->port_key = port_key; mb->ip = *ip; - mb->commit_at_ms = time_msec() + delay; + mb->expire = time_msec() + timestamp_offset; hmap_insert(mac_bindings, &mb->hmap_node, hash); } mb->mac = mac; @@ -94,7 +93,7 @@ ovn_mac_binding_wait(struct hmap *mac_bindings) struct mac_binding *mb; HMAP_FOR_EACH (mb, hmap_node, mac_bindings) { - poll_timer_wait_until(mb->commit_at_ms); + poll_timer_wait_until(mb->expire); } } @@ -106,9 +105,9 @@ ovn_mac_binding_remove(struct mac_binding *mb, struct hmap *mac_bindings) } bool -ovn_mac_binding_can_commit(const struct mac_binding *mb, long long now) +ovn_mac_binding_is_expired(const struct mac_binding *mb, long long now) { - return now >= mb->commit_at_ms; + return now >= mb->expire; } /* fdb functions. */ @@ -161,14 +160,14 @@ ovn_fdb_add(struct hmap *fdbs, uint32_t dp_key, struct eth_addr mac, /* mac_binding related static functions. */ static size_t -mac_binding_hash(uint32_t dp_key, uint32_t port_key, struct in6_addr *ip) +keys_ip_hash(uint32_t dp_key, uint32_t port_key, struct in6_addr *ip) { return hash_bytes(ip, sizeof *ip, hash_2words(dp_key, port_key)); } static struct mac_binding * -mac_binding_find(struct hmap *mac_bindings, uint32_t dp_key, - uint32_t port_key, struct in6_addr *ip, size_t hash) +mac_binding_find(const struct hmap *mac_bindings, uint32_t dp_key, + uint32_t port_key, struct in6_addr *ip, size_t hash) { struct mac_binding *mb; HMAP_FOR_EACH_WITH_HASH (mb, hmap_node, hash, mac_bindings) { @@ -203,3 +202,132 @@ fdb_entry_find(struct hmap *fdbs, uint32_t dp_key, return NULL; } + +static struct buffered_packets * +buffered_packets_find(struct hmap *hmap, uint64_t dp_key, uint64_t port_key, + struct in6_addr *ip, uint32_t hash) +{ + struct buffered_packets *mb; + HMAP_FOR_EACH_WITH_HASH (mb, hmap_node, hash, hmap) { + if (mb->dp_key == dp_key && mb->port_key == port_key && + IN6_ARE_ADDR_EQUAL(&mb->ip, ip)) { + return mb; + } + } + + return NULL; +} + +static void +buffered_packets_destroy(struct buffered_packets *bp) +{ + ovn_packet_data_list_destroy(&bp->queue); + free(bp); +} + +struct buffered_packets * +ovn_buffered_packets_add(struct hmap *hmap, uint64_t dp_key, uint64_t port_key, + struct in6_addr ip) +{ + struct buffered_packets *bp; + + uint32_t hash = keys_ip_hash(dp_key, port_key, &ip); + bp = buffered_packets_find(hmap, dp_key, port_key, &ip, hash); + if (!bp) { + if (hmap_count(hmap) >= 1000) { + return NULL; + } + + bp = xmalloc(sizeof *bp); + hmap_insert(hmap, &bp->hmap_node, hash); + bp->ip = ip; + bp->dp_key = dp_key; + bp->port_key = port_key; + ovs_list_init(&bp->queue); + } + + bp->expire = time_msec() + OVN_BUFFERED_PACKETS_TIMEOUT; + + return bp; +} + +void +ovn_buffered_packets_add_packet_data(struct buffered_packets *bp, + struct ofpbuf ofpacts, + struct dp_packet *packet) +{ + struct packet_data *pd = xmalloc(sizeof *pd); + + pd->ofpacts = ofpacts; + pd->p = packet; + + if (ovs_list_size(&bp->queue) == BUFFER_QUEUE_DEPTH) { + struct packet_data *p = CONTAINER_OF(ovs_list_pop_front(&bp->queue), + struct packet_data, node); + ovn_packet_data_destroy(p); + } + + ovs_list_push_back(&bp->queue, &pd->node); +} + +void +ovn_buffured_packets_prepare_ready(struct hmap *bp_hmap, + const struct hmap *recent_mac_bindings, + struct ovs_list *ready_packet_data) +{ + long long now = time_msec(); + + struct buffered_packets *bp; + HMAP_FOR_EACH_SAFE (bp, hmap_node, bp_hmap) { + if (now > bp->expire) { + hmap_remove(bp_hmap, &bp->hmap_node); + buffered_packets_destroy(bp); + continue; + } + + uint32_t hash = keys_ip_hash(bp->dp_key, bp->port_key, &bp->ip); + struct mac_binding *mb = mac_binding_find(recent_mac_bindings, + bp->dp_key, bp->port_key, + &bp->ip, hash); + if (!mb) { + continue; + } + + struct packet_data *pd; + LIST_FOR_EACH_POP (pd, node, &bp->queue) { + struct eth_header *eth = dp_packet_data(pd->p); + eth->eth_dst = mb->mac; + + ovs_list_push_back(ready_packet_data, &pd->node); + } + + hmap_remove(bp_hmap, &bp->hmap_node); + buffered_packets_destroy(bp); + } +} + +void +ovn_packet_data_destroy(struct packet_data *pd) +{ + dp_packet_delete(pd->p); + ofpbuf_uninit(&pd->ofpacts); + free(pd); +} + +void +ovn_packet_data_list_destroy(struct ovs_list *list) +{ + struct packet_data *pd; + LIST_FOR_EACH_POP (pd, node, list) { + ovn_packet_data_destroy(pd); + } +} + +void +ovn_buffered_packets_hmap_destroy(struct hmap *hmap) +{ + struct buffered_packets *bp; + HMAP_FOR_EACH_POP (bp, hmap_node, hmap) { + buffered_packets_destroy(bp); + } +} diff --git a/controller/mac-learn.h b/controller/mac-learn.h index 57c50c58b..7b342d339 100644 --- a/controller/mac-learn.h +++ b/controller/mac-learn.h @@ -19,7 +19,11 @@ #include <sys/types.h> #include <netinet/in.h> + +#include "dp-packet.h" #include "openvswitch/hmap.h" +#include "openvswitch/list.h" +#include "openvswitch/ofpbuf.h" struct mac_binding { struct hmap_node hmap_node; /* In a hmap. */ @@ -32,20 +36,21 @@ struct mac_binding { /* Value. */ struct eth_addr mac; - /* Timestamp when to commit to SB. */ - long long commit_at_ms; + long long expire; }; void ovn_mac_bindings_init(struct hmap *mac_bindings); void ovn_mac_bindings_destroy(struct hmap *mac_bindings); void ovn_mac_binding_wait(struct hmap *mac_bindings); void ovn_mac_binding_remove(struct mac_binding *mb, struct hmap *mac_bindings); -bool ovn_mac_binding_can_commit(const struct mac_binding *mb, long long now); +bool ovn_mac_binding_is_expired(const struct mac_binding *mb, long long now); struct mac_binding *ovn_mac_binding_add(struct hmap *mac_bindings, uint32_t dp_key, uint32_t port_key, struct in6_addr *ip, - struct eth_addr mac, bool is_unicast); + struct eth_addr mac, + uint32_t timestamp_offset, + bool limited_capacity); @@ -68,4 +73,39 @@ struct fdb_entry *ovn_fdb_add(struct hmap *fdbs, uint32_t dp_key, struct eth_addr mac, uint32_t port_key); +struct packet_data { + struct ovs_list node; + + struct ofpbuf ofpacts; + struct dp_packet *p; +}; + +struct buffered_packets { + struct hmap_node hmap_node; + + struct in6_addr ip; + uint64_t dp_key; + uint64_t port_key; + + struct ovs_list queue; + + long long int expire; +}; + +#define OVN_BUFFERED_PACKETS_TIMEOUT 10000 + +struct buffered_packets *ovn_buffered_packets_add(struct hmap *hmap, + uint64_t dp_key, + uint64_t port_key, + struct in6_addr ip); +void ovn_buffered_packets_add_packet_data(struct buffered_packets *bp, + struct ofpbuf ofpacts, + struct dp_packet *packet); +void ovn_buffured_packets_prepare_ready(struct hmap *bp_hmap, + const struct hmap *recent_mac_bindings, + struct ovs_list *ready_packet_data); +void ovn_packet_data_destroy(struct packet_data *pd); +void ovn_packet_data_list_destroy(struct ovs_list *list); +void ovn_buffered_packets_hmap_destroy(struct hmap *hmap); + #endif /* OVN_MAC_LEARN_H */ diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c index 7dcbfd252..c03b2af4c 100644 --- a/controller/ovn-controller.c +++ b/controller/ovn-controller.c @@ -45,6 +45,7 @@ #include "lib/vswitch-idl.h" #include "local_data.h" #include "lport.h" +#include "mac-learn.h" #include "memory.h" #include "ofctrl.h" #include "ofctrl-seqno.h" @@ -4305,6 +4306,185 @@ pflow_output_debug_handler(struct engine_node *node, void *data) return true; } +static void +recent_mac_bindings_add(struct hmap *recent_mbs, + const struct sbrec_mac_binding *smb, + const struct hmap *local_datapaths, + struct ovsdb_idl_index *sbrec_port_binding_by_name) +{ + ovs_be32 ip4; + struct in6_addr ip; + struct eth_addr mac; + const struct sbrec_port_binding *pb; + + if (!get_local_datapath(local_datapaths, smb->datapath->tunnel_key)) { + return; + } + + pb = lport_lookup_by_name(sbrec_port_binding_by_name, smb->logical_port); + if (!pb) { + return; + } + + if (!eth_addr_from_string(smb->mac, &mac)) { + return; + } + + if (strchr(smb->ip, '.')) { + if (!ip_parse(smb->ip, &ip4)) { + return; + } + ip = in6_addr_mapped_ipv4(ip4); + } else { + if (!ipv6_parse(smb->ip, &ip)) { + return; + } + } + + /* Give the recent mac_binding entry bigger timeout than the buffered + * packet in case the MAC binding is created earlier. */ + ovn_mac_binding_add(recent_mbs, smb->datapath->tunnel_key, pb->tunnel_key, + &ip, mac, 2 * OVN_BUFFERED_PACKETS_TIMEOUT, false); + + const char *redirect_port = + smap_get(&pb->options, "chassis-redirect-port"); + if (!redirect_port) { + return; + } + + pb = lport_lookup_by_name(sbrec_port_binding_by_name, redirect_port); + if (!pb || pb->datapath->tunnel_key != smb->datapath->tunnel_key || + strcmp(pb->type, "chassisredirect")) { + return; + } + + /* Add the same entry also for chassisredirect port as the buffered traffic + * might be buffered on the cr port. */ + ovn_mac_binding_add(recent_mbs, smb->datapath->tunnel_key, pb->tunnel_key, + &ip, mac, 2 * OVN_BUFFERED_PACKETS_TIMEOUT, false); +} + +static void * +en_recent_mac_bindings_init(struct engine_node *node OVS_UNUSED, + struct engine_arg *arg OVS_UNUSED) +{ + struct hmap *recent_mbs = xmalloc(sizeof *recent_mbs); + hmap_init(recent_mbs); + + return recent_mbs; +} + +static void +en_recent_mac_bindings_cleanup(void *data) +{ + struct hmap *recent_mbs = data; + ovn_mac_bindings_destroy(recent_mbs); +} + +static void +en_recent_mac_bindings_clear_tracked_data(void *data) +{ + long long now = time_msec(); + struct hmap *recent_mbs = data; + + struct mac_binding *mb; + HMAP_FOR_EACH_SAFE (mb, hmap_node, recent_mbs) { + if (ovn_mac_binding_is_expired(mb, now)) { + ovn_mac_binding_remove(mb, recent_mbs); + } + } +} + +static void +en_recent_mac_bindings_run(struct engine_node *node, void *data) +{ + struct hmap *recent_mbs = data; + struct ed_type_runtime_data *rt_data = + engine_get_input_data("runtime_data", node); + const struct sbrec_mac_binding_table *mac_binding_table = + EN_OVSDB_GET(engine_get_input("SB_mac_binding", node)); + struct ovsdb_idl_index *sbrec_port_binding_by_name = + engine_ovsdb_node_get_index( + engine_get_input("SB_port_binding", node), + "name"); + + const struct sbrec_mac_binding *smb; + SBREC_MAC_BINDING_TABLE_FOR_EACH (smb, mac_binding_table) { + recent_mac_bindings_add(recent_mbs, smb, &rt_data->local_datapaths, + sbrec_port_binding_by_name); + } +} + +static bool +recent_mac_bindings_mac_binding_handler(struct engine_node *node, void *data) +{ + struct hmap *recent_mbs = data; + struct ed_type_runtime_data *rt_data = + engine_get_input_data("runtime_data", node); + const struct sbrec_mac_binding_table *mac_binding_table = + EN_OVSDB_GET(engine_get_input("SB_mac_binding", node)); + struct ovsdb_idl_index *sbrec_port_binding_by_name = + engine_ovsdb_node_get_index( + engine_get_input("SB_port_binding", node), + "name"); + + const struct sbrec_mac_binding *smb; + SBREC_MAC_BINDING_TABLE_FOR_EACH_TRACKED (smb, mac_binding_table) { + /* The deleted entry will expire eventually. */ + if (sbrec_mac_binding_is_deleted(smb)) { + continue; + } + + recent_mac_bindings_add(recent_mbs, smb, &rt_data->local_datapaths, + sbrec_port_binding_by_name); + } + + engine_set_node_state(node, EN_UPDATED); + return true; +} + +static bool +recent_mac_bindings_runtime_data_handler(struct engine_node *node, void *data) +{ + struct hmap *recent_mbs = data; + struct ed_type_runtime_data *rt_data = + engine_get_input_data("runtime_data", node); + struct ovsdb_idl_index *smb_by_datapath = + engine_ovsdb_node_get_index( + engine_get_input("SB_mac_binding", node), + "datapath"); + struct ovsdb_idl_index *sbrec_port_binding_by_name = + engine_ovsdb_node_get_index( + engine_get_input("SB_port_binding", node), + "name"); + + if (!rt_data->tracked) { + return false; + } + + struct tracked_datapath *tdp; + HMAP_FOR_EACH (tdp, node, &rt_data->tracked_dp_bindings) { + if (tdp->tracked_type != TRACKED_RESOURCE_NEW) { + continue; + } + + struct sbrec_mac_binding *mb_index_row = + sbrec_mac_binding_index_init_row(smb_by_datapath); + sbrec_mac_binding_index_set_datapath(mb_index_row, tdp->dp); + + const struct sbrec_mac_binding *smb; + SBREC_MAC_BINDING_FOR_EACH_EQUAL (smb, mb_index_row, smb_by_datapath) { + recent_mac_bindings_add(recent_mbs, smb, &rt_data->local_datapaths, + sbrec_port_binding_by_name); + } + + sbrec_mac_binding_index_destroy_row(mb_index_row); + } + + engine_set_node_state(node, EN_UPDATED); + return true; +} + static void * en_controller_output_init(struct engine_node *node OVS_UNUSED, struct engine_arg *arg OVS_UNUSED) @@ -4341,6 +4521,14 @@ controller_output_lflow_output_handler(struct engine_node *node, return true; } +static bool +flow_output_recent_mac_bindings_handler(struct engine_node *node, + void *data OVS_UNUSED) +{ + engine_set_node_state(node, EN_UPDATED); + return true; +} + struct ovn_controller_exit_args { bool *exiting; bool *restart; @@ -4613,6 +4801,8 @@ main(int argc, char *argv[]) ENGINE_NODE(northd_options, "northd_options"); ENGINE_NODE(dhcp_options, "dhcp_options"); ENGINE_NODE_WITH_CLEAR_TRACK_DATA(lb_data, "lb_data"); + ENGINE_NODE_WITH_CLEAR_TRACK_DATA(recent_mac_bindings, + "recent_mac_bindings"); #define SB_NODE(NAME, NAME_STR) ENGINE_NODE_SB(NAME, NAME_STR); SB_NODES @@ -4785,10 +4975,21 @@ main(int argc, char *argv[]) engine_add_input(&en_runtime_data, &en_ovs_interface_shadow, runtime_data_ovs_interface_shadow_handler); + engine_add_input(&en_recent_mac_bindings, &en_sb_mac_binding, + recent_mac_bindings_mac_binding_handler); + engine_add_input(&en_recent_mac_bindings, &en_runtime_data, + recent_mac_bindings_runtime_data_handler); + /* Using a noop handler since we really need just access to the index + * which allows us to filter port binding by name. */ + engine_add_input(&en_recent_mac_bindings, &en_sb_port_binding, + engine_noop_handler); + engine_add_input(&en_controller_output, &en_lflow_output, controller_output_lflow_output_handler); engine_add_input(&en_controller_output, &en_pflow_output, controller_output_pflow_output_handler); + engine_add_input(&en_controller_output, &en_recent_mac_bindings, + flow_output_recent_mac_bindings_handler); struct engine_arg engine_arg = { .sb_idl = ovnsb_idl_loop.idl, @@ -4834,6 +5035,8 @@ main(int argc, char *argv[]) engine_get_internal_data(&en_template_vars); struct ed_type_lb_data *lb_data = engine_get_internal_data(&en_lb_data); + struct hmap *recent_mac_bindings = + engine_get_internal_data(&en_recent_mac_bindings); ofctrl_init(&lflow_output_data->group_table, &lflow_output_data->meter_table, @@ -5186,14 +5389,13 @@ main(int argc, char *argv[]) ovnsb_idl_loop.idl), sbrec_service_monitor_table_get( ovnsb_idl_loop.idl), - sbrec_mac_binding_table_get( - ovnsb_idl_loop.idl), sbrec_bfd_table_get(ovnsb_idl_loop.idl), br_int, chassis, &runtime_data->local_datapaths, &runtime_data->active_tunnels, &runtime_data->local_active_ports_ipv6_pd, - &runtime_data->local_active_ports_ras); + &runtime_data->local_active_ports_ras, + recent_mac_bindings); stopwatch_stop(PINCTRL_RUN_STOPWATCH_NAME, time_msec()); mirror_run(ovs_idl_txn, diff --git a/controller/pinctrl.c b/controller/pinctrl.c index 795847729..e57fd4da1 100644 --- a/controller/pinctrl.c +++ b/controller/pinctrl.c @@ -178,11 +178,9 @@ struct pinctrl { static struct pinctrl pinctrl; -static void init_buffered_packets_map(void); -static void destroy_buffered_packets_map(void); -static void -run_buffered_binding(struct ovsdb_idl_index *sbrec_port_binding_by_name, - const struct sbrec_mac_binding_table *mac_binding_table) +static void init_buffered_packets_data(void); +static void destroy_buffered_packets_data(void); +static void run_buffered_binding(const struct hmap *recent_mac_bidnings) OVS_REQUIRES(pinctrl_mutex); static void pinctrl_handle_put_mac_binding(const struct flow *md, @@ -535,7 +533,7 @@ pinctrl_init(void) init_send_garps_rarps(); init_ipv6_ras(); init_ipv6_prefixd(); - init_buffered_packets_map(); + init_buffered_packets_data(); init_activated_ports(); init_event_table(); ip_mcast_snoop_init(); @@ -1416,222 +1414,72 @@ prepare_ipv6_prefixd(struct ovsdb_idl_txn *ovnsb_idl_txn, } } -struct buffer_info { - struct ofpbuf ofpacts; - struct dp_packet *p; -}; - -#define BUFFER_QUEUE_DEPTH 4 -struct buffered_packets { - struct hmap_node hmap_node; - struct ovs_list list; - - /* key */ - struct in6_addr ip; - struct eth_addr ea; - - uint64_t dp_key; - uint64_t port_key; - - long long int timestamp; - - struct buffer_info data[BUFFER_QUEUE_DEPTH]; - uint32_t head, tail; -}; - static struct hmap buffered_packets_map; -static struct ovs_list buffered_mac_bindings; +/* List of 'struct packet_data' that is ready to be sent. */ +static struct ovs_list ready_packets_data; static void -init_buffered_packets_map(void) +init_buffered_packets_data(void) { hmap_init(&buffered_packets_map); - ovs_list_init(&buffered_mac_bindings); -} - -static void -destroy_buffered_packets(struct buffered_packets *bp) -{ - struct buffer_info *bi; - - while (bp->head != bp->tail) { - bi = &bp->data[bp->head]; - dp_packet_delete(bi->p); - ofpbuf_uninit(&bi->ofpacts); - - bp->head = (bp->head + 1) % BUFFER_QUEUE_DEPTH; - } -} - -static void -destroy_buffered_packets_map(void) -{ - struct buffered_packets *bp; - HMAP_FOR_EACH_SAFE (bp, hmap_node, &buffered_packets_map) { - destroy_buffered_packets(bp); - hmap_remove(&buffered_packets_map, &bp->hmap_node); - free(bp); - } - hmap_destroy(&buffered_packets_map); - - LIST_FOR_EACH_POP (bp, list, &buffered_mac_bindings) { - destroy_buffered_packets(bp); - free(bp); - } -} - -static void -buffered_push_packet(struct buffered_packets *bp, - struct dp_packet *packet, - const struct match *md) -{ - uint32_t next = (bp->tail + 1) % BUFFER_QUEUE_DEPTH; - struct buffer_info *bi = &bp->data[bp->tail]; - - ofpbuf_init(&bi->ofpacts, 4096); - - reload_metadata(&bi->ofpacts, md); - /* reload pkt_mark field */ - const struct mf_field *pkt_mark_field = mf_from_id(MFF_PKT_MARK); - union mf_value pkt_mark_value; - mf_get_value(pkt_mark_field, &md->flow, &pkt_mark_value); - ofpact_put_set_field(&bi->ofpacts, pkt_mark_field, &pkt_mark_value, NULL); - - struct ofpact_resubmit *resubmit = ofpact_put_RESUBMIT(&bi->ofpacts); - resubmit->in_port = OFPP_CONTROLLER; - resubmit->table_id = OFTABLE_REMOTE_OUTPUT; - - bi->p = packet; - - if (next == bp->head) { - bi = &bp->data[bp->head]; - dp_packet_delete(bi->p); - ofpbuf_uninit(&bi->ofpacts); - bp->head = (bp->head + 1) % BUFFER_QUEUE_DEPTH; - } - bp->tail = next; -} - -static void -buffered_send_packets(struct rconn *swconn, struct buffered_packets *bp, - struct eth_addr *addr) -{ - enum ofp_version version = rconn_get_version(swconn); - enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version); - - while (bp->head != bp->tail) { - struct buffer_info *bi = &bp->data[bp->head]; - struct eth_header *eth = dp_packet_data(bi->p); - - eth->eth_dst = *addr; - struct ofputil_packet_out po = { - .packet = dp_packet_data(bi->p), - .packet_len = dp_packet_size(bi->p), - .buffer_id = UINT32_MAX, - .ofpacts = bi->ofpacts.data, - .ofpacts_len = bi->ofpacts.size, - }; - match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER); - queue_msg(swconn, ofputil_encode_packet_out(&po, proto)); - - ofpbuf_uninit(&bi->ofpacts); - dp_packet_delete(bi->p); - - bp->head = (bp->head + 1) % BUFFER_QUEUE_DEPTH; - } + ovs_list_init(&ready_packets_data); } -#define BUFFER_MAP_TIMEOUT 10000 static void -buffered_packets_map_gc(void) -{ - struct buffered_packets *cur_qp; - long long int now = time_msec(); - - HMAP_FOR_EACH_SAFE (cur_qp, hmap_node, &buffered_packets_map) { - if (now > cur_qp->timestamp + BUFFER_MAP_TIMEOUT) { - destroy_buffered_packets(cur_qp); - hmap_remove(&buffered_packets_map, &cur_qp->hmap_node); - free(cur_qp); - } - } -} - -static uint32_t -pinctrl_buffer_packet_hash(uint64_t dp_key, uint64_t port_key, - const struct in6_addr *addr) +destroy_buffered_packets_data(void) { - uint32_t hash = 0; - hash = hash_add64(hash, port_key); - hash = hash_add64(hash, dp_key); - hash = hash_bytes(addr, sizeof addr, hash); - return hash_finish(hash, 16); -} - -static struct buffered_packets * -pinctrl_find_buffered_packets(uint64_t dp_key, uint64_t port_key, - const struct in6_addr *ip, uint32_t hash) -{ - struct buffered_packets *qp; - HMAP_FOR_EACH_WITH_HASH (qp, hmap_node, hash, &buffered_packets_map) { - if (qp->dp_key == dp_key && qp->port_key == port_key && - IN6_ARE_ADDR_EQUAL(&qp->ip, ip)) { - return qp; - } - } - return NULL; -} - -static struct buffered_packets * -pinctrl_find_buffered_packets_with_hash(uint64_t dp_key, uint64_t port_key, - const struct in6_addr *ip) -{ - uint32_t hash = pinctrl_buffer_packet_hash(dp_key, port_key, ip); - - return pinctrl_find_buffered_packets(dp_key, port_key, ip, hash); + ovn_buffered_packets_hmap_destroy(&buffered_packets_map); + ovn_packet_data_list_destroy(&ready_packets_data); } /* Called with in the pinctrl_handler thread context. */ -static int +static void pinctrl_handle_buffered_packets(struct dp_packet *pkt_in, const struct match *md, bool is_arp) OVS_REQUIRES(pinctrl_mutex) { - struct buffered_packets *bp; + struct in6_addr ip; + struct ofpbuf ofpacts; struct dp_packet *clone; - struct in6_addr addr; + struct buffered_packets *bp; uint64_t dp_key = ntohll(md->flow.metadata); uint64_t oport_key = md->flow.regs[MFF_LOG_OUTPORT - MFF_REG0]; if (is_arp) { - addr = in6_addr_mapped_ipv4(htonl(md->flow.regs[0])); + ip = in6_addr_mapped_ipv4(htonl(md->flow.regs[0])); } else { ovs_be128 ip6 = hton128(flow_get_xxreg(&md->flow, 0)); - memcpy(&addr, &ip6, sizeof addr); + memcpy(&ip, &ip6, sizeof ip); } - uint32_t hash = pinctrl_buffer_packet_hash(dp_key, oport_key, &addr); - bp = pinctrl_find_buffered_packets(dp_key, oport_key, &addr, hash); + bp = ovn_buffered_packets_add(&buffered_packets_map, dp_key, + oport_key, ip); if (!bp) { - if (hmap_count(&buffered_packets_map) >= 1000) { - COVERAGE_INC(pinctrl_drop_buffered_packets_map); - return -ENOMEM; - } - - bp = xmalloc(sizeof *bp); - hmap_insert(&buffered_packets_map, &bp->hmap_node, hash); - bp->head = bp->tail = 0; - bp->ip = addr; - bp->dp_key = dp_key; - bp->port_key = oport_key; + COVERAGE_INC(pinctrl_drop_buffered_packets_map); + return; } - bp->timestamp = time_msec(); + /* clone the packet to send it later with correct L2 address */ clone = dp_packet_clone_data(dp_packet_data(pkt_in), dp_packet_size(pkt_in)); - buffered_push_packet(bp, clone, md); - return 0; + + ofpbuf_init(&ofpacts, 4096); + reload_metadata(&ofpacts, md); + /* reload pkt_mark field */ + const struct mf_field *pkt_mark_field = mf_from_id(MFF_PKT_MARK); + union mf_value pkt_mark_value; + mf_get_value(pkt_mark_field, &md->flow, &pkt_mark_value); + ofpact_put_set_field(&ofpacts, pkt_mark_field, &pkt_mark_value, NULL); + + struct ofpact_resubmit *resubmit = ofpact_put_RESUBMIT(&ofpacts); + resubmit->in_port = OFPP_CONTROLLER; + resubmit->table_id = OFTABLE_REMOTE_OUTPUT; + + ovn_buffered_packets_add_packet_data(bp, ofpacts, clone); + + /* There is a chance that the MAC binding was already created. */ + notify_pinctrl_main(); } /* Called with in the pinctrl_handler thread context. */ @@ -3598,14 +3446,14 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn, const struct sbrec_dns_table *dns_table, const struct sbrec_controller_event_table *ce_table, const struct sbrec_service_monitor_table *svc_mon_table, - const struct sbrec_mac_binding_table *mac_binding_table, const struct sbrec_bfd_table *bfd_table, const struct ovsrec_bridge *br_int, const struct sbrec_chassis *chassis, const struct hmap *local_datapaths, const struct sset *active_tunnels, const struct shash *local_active_ports_ipv6_pd, - const struct shash *local_active_ports_ras) + const struct shash *local_active_ports_ras, + const struct hmap *recent_mac_bindings) { ovs_mutex_lock(&pinctrl_mutex); run_put_mac_bindings(ovnsb_idl_txn, sbrec_datapath_binding_by_key, @@ -3628,7 +3476,7 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn, sbrec_port_binding_by_key, sbrec_igmp_groups, sbrec_ip_multicast_opts); - run_buffered_binding(sbrec_port_binding_by_name, mac_binding_table); + run_buffered_binding(recent_mac_bindings); sync_svc_monitors(ovnsb_idl_txn, svc_mon_table, sbrec_port_binding_by_name, chassis); bfd_monitor_run(ovnsb_idl_txn, bfd_table, sbrec_port_binding_by_name, @@ -4154,7 +4002,7 @@ pinctrl_destroy(void) destroy_send_garps_rarps(); destroy_ipv6_ras(); destroy_ipv6_prefixd(); - destroy_buffered_packets_map(); + destroy_buffered_packets_data(); destroy_activated_ports(); event_table_destroy(); destroy_put_mac_bindings(); @@ -4196,6 +4044,8 @@ destroy_put_mac_bindings(void) ovn_mac_bindings_destroy(&put_mac_bindings); } +#define MAX_MAC_BINDING_DELAY_MSEC 50 + /* Called with in the pinctrl_handler thread context. */ static void pinctrl_handle_put_mac_binding(const struct flow *md, @@ -4216,11 +4066,13 @@ pinctrl_handle_put_mac_binding(const struct flow *md, /* If the ARP reply was unicast we should not delay it, * there won't be any race. */ - bool is_unicast = !eth_addr_is_multicast(headers->dl_dst); + uint32_t delay = eth_addr_is_multicast(headers->dl_dst) + ? random_range(MAX_MAC_BINDING_DELAY_MSEC) + 1 + : 0; struct mac_binding *mb = ovn_mac_binding_add(&put_mac_bindings, dp_key, port_key, &ip_key, headers->dl_src, - is_unicast); + delay, true); if (!mb) { COVERAGE_INC(pinctrl_drop_put_mac_binding); return; @@ -4237,12 +4089,25 @@ static void send_mac_binding_buffered_pkts(struct rconn *swconn) OVS_REQUIRES(pinctrl_mutex) { - struct buffered_packets *bp; - LIST_FOR_EACH_POP (bp, list, &buffered_mac_bindings) { - buffered_send_packets(swconn, bp, &bp->ea); - free(bp); + enum ofp_version version = rconn_get_version(swconn); + enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version); + + struct packet_data *pd; + LIST_FOR_EACH_POP (pd, node, &ready_packets_data) { + struct ofputil_packet_out po = { + .packet = dp_packet_data(pd->p), + .packet_len = dp_packet_size(pd->p), + .buffer_id = UINT32_MAX, + .ofpacts = pd->ofpacts.data, + .ofpacts_len = pd->ofpacts.size, + }; + match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER); + queue_msg(swconn, ofputil_encode_packet_out(&po, proto)); + + ovn_packet_data_destroy(pd); } - ovs_list_init(&buffered_mac_bindings); + + ovs_list_init(&ready_packets_data); } static const struct sbrec_mac_binding * @@ -4391,7 +4256,7 @@ run_put_mac_bindings(struct ovsdb_idl_txn *ovnsb_idl_txn, struct mac_binding *mb; HMAP_FOR_EACH_SAFE (mb, hmap_node, &put_mac_bindings) { - if (ovn_mac_binding_can_commit(mb, now)) { + if (ovn_mac_binding_is_expired(mb, now)) { run_put_mac_binding(ovnsb_idl_txn, sbrec_datapath_binding_by_key, sbrec_port_binding_by_key, @@ -4402,68 +4267,19 @@ run_put_mac_bindings(struct ovsdb_idl_txn *ovnsb_idl_txn, } static void -run_buffered_binding(struct ovsdb_idl_index *sbrec_port_binding_by_name, - const struct sbrec_mac_binding_table *mac_binding_table) +run_buffered_binding(const struct hmap *recent_mac_bidnings) OVS_REQUIRES(pinctrl_mutex) { - bool notify = false; - - if (!hmap_count(&buffered_packets_map)) { + if (!hmap_count(&buffered_packets_map) || + !hmap_count(recent_mac_bidnings)) { return; } - const struct sbrec_mac_binding *mb; - SBREC_MAC_BINDING_TABLE_FOR_EACH_TRACKED (mb, mac_binding_table) { - const struct sbrec_port_binding *pb = lport_lookup_by_name( - sbrec_port_binding_by_name, mb->logical_port); - if (!pb || !pb->datapath) { - continue; - } + ovn_buffured_packets_prepare_ready(&buffered_packets_map, + recent_mac_bidnings, + &ready_packets_data); - struct in6_addr ip; - ovs_be32 ip4; - if (ip_parse(mb->ip, &ip4)) { - ip = in6_addr_mapped_ipv4(ip4); - } else if (!ipv6_parse(mb->ip, &ip)) { - continue; - } - - struct buffered_packets *bp = pinctrl_find_buffered_packets_with_hash( - pb->datapath->tunnel_key, pb->tunnel_key, &ip); - if (!bp) { - if (!smap_get(&pb->options, "chassis-redirect-port")) { - continue; - } - - char *redirect_name = xasprintf("cr-%s", pb->logical_port); - pb = lport_lookup_by_name(sbrec_port_binding_by_name, - redirect_name); - free(redirect_name); - - if (!pb || !pb->datapath || strcmp(pb->type, "chassisredirect")) { - continue; - } - - bp = pinctrl_find_buffered_packets_with_hash( - pb->datapath->tunnel_key, pb->tunnel_key, &ip); - } - - if (!bp) { - continue; - } - - if (!ovs_scan(mb->mac, ETH_ADDR_SCAN_FMT, - ETH_ADDR_SCAN_ARGS(bp->ea))) { - continue; - } - - hmap_remove(&buffered_packets_map, &bp->hmap_node); - ovs_list_push_back(&buffered_mac_bindings, &bp->list); - notify = true; - } - buffered_packets_map_gc(); - - if (notify) { + if (!ovs_list_is_empty(&ready_packets_data)) { notify_pinctrl_handler(); } } @@ -6070,7 +5886,7 @@ may_inject_pkts(void) !shash_is_empty(&send_garp_rarp_data) || ipv6_prefixd_should_inject() || !ovs_list_is_empty(&mcast_query_list) || - !ovs_list_is_empty(&buffered_mac_bindings) || + !ovs_list_is_empty(&ready_packets_data) || bfd_monitor_should_inject()); } diff --git a/controller/pinctrl.h b/controller/pinctrl.h index 279a49fbc..cf4071952 100644 --- a/controller/pinctrl.h +++ b/controller/pinctrl.h @@ -51,13 +51,13 @@ void pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn, const struct sbrec_dns_table *, const struct sbrec_controller_event_table *, const struct sbrec_service_monitor_table *, - const struct sbrec_mac_binding_table *, const struct sbrec_bfd_table *, const struct ovsrec_bridge *, const struct sbrec_chassis *, const struct hmap *local_datapaths, const struct sset *active_tunnels, const struct shash *local_active_ports_ipv6_pd, - const struct shash *local_active_ports_ras); + const struct shash *local_active_ports_ras, + const struct hmap *recent_mac_bindings); void pinctrl_wait(struct ovsdb_idl_txn *ovnsb_idl_txn); void pinctrl_destroy(void); void pinctrl_set_br_int_name(const char *br_int_name); -- 2.39.2 _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
