On Wed, Apr 26, 2023 at 12:55 PM Dumitru Ceara <[email protected]> wrote:
> On 3/28/23 10:12, Ales Musil wrote: > > Simplify the logic behind packet buffering > > and house those simplified helpers > > in mac-learn.c. > > > > Signed-off-by: Ales Musil <[email protected]> > > --- > > v4: Rebase on top of current main. > > Split into three patches. > > --- > > Hi, Ales, > > Thanks for working on this and sorry for not reviewing it earlier. > > I think it's indeed good that you split this in 3 patches but it > would've probably been better to further separate this first patch into > - first simplify the code without moving it to a different file > - then, in a second patch, move the code to the new file. > > Functionally your change seems to be OK but I left a few more comments > inline. > > > controller/mac-learn.c | 171 ++++++++++++++++++--- > > controller/mac-learn.h | 50 ++++++- > > controller/pinctrl.c | 329 ++++++++++++----------------------------- > > 3 files changed, 298 insertions(+), 252 deletions(-) > > > > diff --git a/controller/mac-learn.c b/controller/mac-learn.c > > index a27607016..9d51693a7 100644 > > --- a/controller/mac-learn.c > > +++ b/controller/mac-learn.c > > @@ -18,10 +18,10 @@ > > #include "mac-learn.h" > > > > /* OpenvSwitch lib includes. */ > > +#include "dp-packet.h" > > No need for this, it's already included in mac-learn.h. > > > #include "openvswitch/poll-loop.h" > > #include "openvswitch/vlog.h" > > #include "lib/packets.h" > > -#include "lib/random.h" > > #include "lib/smap.h" > > #include "lib/timeval.h" > > > > @@ -29,11 +29,11 @@ VLOG_DEFINE_THIS_MODULE(mac_learn); > > > > #define MAX_MAC_BINDINGS 1000 > > #define MAX_FDB_ENTRIES 1000 > > -#define MAX_MAC_BINDING_DELAY_MSEC 50 > > +#define BUFFER_QUEUE_DEPTH 4 > > Super nit: I would've aligned the values, e.g.: > > #define MAX_MAC_BINDINGS 1000 > #define MAX_FDB_ENTRIES 1000 > #define BUFFER_QUEUE_DEPTH 4 > > > > > -static size_t mac_binding_hash(uint32_t dp_key, uint32_t port_key, > > - struct in6_addr *); > > -static struct mac_binding *mac_binding_find(struct hmap *mac_bindings, > > +static size_t keys_ip_hash(uint32_t dp_key, uint32_t port_key, > > + struct in6_addr *); > > +static struct mac_binding *mac_binding_find(const struct hmap > *mac_bindings, > > uint32_t dp_key, > > uint32_t port_key, > > struct in6_addr *ip, size_t > hash); > > @@ -41,6 +41,12 @@ static size_t fdb_entry_hash(uint32_t dp_key, struct > eth_addr *); > > > > static struct fdb_entry *fdb_entry_find(struct hmap *fdbs, uint32_t > dp_key, > > struct eth_addr *mac, size_t > hash); > > +static struct buffered_packets * buffered_packets_find(struct hmap > *hmap, > > Nit: extra space after '*'. > > > + uint64_t dp_key, > > + uint64_t > port_key, > > + struct in6_addr > *ip, > > + uint32_t hash); > > +static void buffered_packets_destroy(struct buffered_packets *bp); > > > > /* mac_binding functions. */ > > void > > @@ -62,24 +68,23 @@ ovn_mac_bindings_destroy(struct hmap *mac_bindings) > > struct mac_binding * > > ovn_mac_binding_add(struct hmap *mac_bindings, uint32_t dp_key, > > uint32_t port_key, struct in6_addr *ip, > > - struct eth_addr mac, bool is_unicast) > > + struct eth_addr mac, uint32_t timestamp_offset, > > + bool limited_capacity) > > This API really needs a comment now. Without looking at the > implementation it's really hard to guess 'limited_capacity' might mean. > > Thinking more about it, this API is actually not really OK. Reading > more into the patch I think what you actually need is a > "mac_binding_map" that may have a maximum size. I'd define a type like > that and specify the max_size in its _create() function. Then you can > transparently take care of a potential limited capacity inside the > ovn_mac_binding_add() API without cluttering the caller. > > > { > > - uint32_t hash = mac_binding_hash(dp_key, port_key, ip); > > + uint32_t hash = keys_ip_hash(dp_key, port_key, ip); > > > > struct mac_binding *mb = > > mac_binding_find(mac_bindings, dp_key, port_key, ip, hash); > > if (!mb) { > > - if (hmap_count(mac_bindings) >= MAX_MAC_BINDINGS) { > > + if (limited_capacity && hmap_count(mac_bindings) >= > MAX_MAC_BINDINGS) { > > return NULL; > > } > > > > - uint32_t delay = is_unicast > > - ? 0 : random_range(MAX_MAC_BINDING_DELAY_MSEC) + 1; > > mb = xmalloc(sizeof *mb); > > mb->dp_key = dp_key; > > mb->port_key = port_key; > > mb->ip = *ip; > > - mb->commit_at_ms = time_msec() + delay; > > + mb->expire = time_msec() + timestamp_offset; > > hmap_insert(mac_bindings, &mb->hmap_node, hash); > > } > > mb->mac = mac; > > @@ -94,7 +99,7 @@ ovn_mac_binding_wait(struct hmap *mac_bindings) > > struct mac_binding *mb; > > > > HMAP_FOR_EACH (mb, hmap_node, mac_bindings) { > > - poll_timer_wait_until(mb->commit_at_ms); > > + poll_timer_wait_until(mb->expire); > > } > > } > > > > @@ -106,9 +111,9 @@ ovn_mac_binding_remove(struct mac_binding *mb, > struct hmap *mac_bindings) > > } > > > > bool > > -ovn_mac_binding_can_commit(const struct mac_binding *mb, long long now) > > +ovn_mac_binding_is_expired(const struct mac_binding *mb, long long now) > > { > > - return now >= mb->commit_at_ms; > > + return now >= mb->expire; > > } > > > > /* fdb functions. */ > > @@ -158,17 +163,126 @@ ovn_fdb_add(struct hmap *fdbs, uint32_t dp_key, > struct eth_addr mac, > > > > } > > > > +/* packet buffering functions */ > > +struct buffered_packets * > > +ovn_buffered_packets_add(struct hmap *hmap, uint64_t dp_key, uint64_t > port_key, > > + struct in6_addr ip) > > +{ > > + struct buffered_packets *bp; > > + > > + uint32_t hash = keys_ip_hash(dp_key, port_key, &ip); > > + bp = buffered_packets_find(hmap, dp_key, port_key, &ip, hash); > > + if (!bp) { > > + if (hmap_count(hmap) >= 1000) { > > + return NULL; > > + } > > + > > + bp = xmalloc(sizeof *bp); > > + hmap_insert(hmap, &bp->hmap_node, hash); > > + bp->ip = ip; > > + bp->dp_key = dp_key; > > + bp->port_key = port_key; > > + ovs_list_init(&bp->queue); > > + } > > + > > + bp->expire = time_msec() + OVN_BUFFERED_PACKETS_TIMEOUT; > > + > > + return bp; > > +} > > + > > +void > > +ovn_buffered_packets_add_packet_data(struct buffered_packets *bp, > > + struct ofpbuf ofpacts, > > + struct dp_packet *packet) > > Please add a comment for this public API. > > > +{ > > + struct packet_data *pd = xmalloc(sizeof *pd); > > + > > + pd->ofpacts = ofpacts; > > + pd->p = packet; > > + > > + if (ovs_list_size(&bp->queue) == BUFFER_QUEUE_DEPTH) { > > + struct packet_data *p = > CONTAINER_OF(ovs_list_pop_front(&bp->queue), > > + struct packet_data, node); > > + ovn_packet_data_destroy(p); > > + } > > + > > + ovs_list_push_back(&bp->queue, &pd->node); > > +} > > + > > +void > > +ovn_buffured_packets_prepare_ready(struct hmap *bp_hmap, > > Typo: 'buffured'. > > > + const struct hmap > *recent_mac_bindings, > > + struct ovs_list *ready_packet_data) > > Please add a comment for this public API. > > > +{ > > + long long now = time_msec(); > > + > > + struct buffered_packets *bp; > > + HMAP_FOR_EACH_SAFE (bp, hmap_node, bp_hmap) { > > + if (now > bp->expire) { > > + hmap_remove(bp_hmap, &bp->hmap_node); > > + buffered_packets_destroy(bp); > > + continue; > > + } > > + > > + uint32_t hash = keys_ip_hash(bp->dp_key, bp->port_key, &bp->ip); > > + struct mac_binding *mb = mac_binding_find(recent_mac_bindings, > > A better name would be 'bindings_to_skip' instead of > 'recent_mac_bindings'. It's irrelevant here why we need to skip them. > > OTOH, I wonder why you didn't already remove these in the caller, in > run_buffered_binding(). I'd prefer if we didn't have to define APIs of > the form "prepare <something> for all entries EXCEPT <entries>". > > Mentioning this change in the commit log would've been nice too. > > > + bp->dp_key, > bp->port_key, > > + &bp->ip, hash); > > + if (!mb) { > > + continue; > > + } > > + > > + struct packet_data *pd; > > + LIST_FOR_EACH_POP (pd, node, &bp->queue) { > > + struct eth_header *eth = dp_packet_data(pd->p); > > + eth->eth_dst = mb->mac; > > + > > + ovs_list_push_back(ready_packet_data, &pd->node); > > + } > > + > > + hmap_remove(bp_hmap, &bp->hmap_node); > > + buffered_packets_destroy(bp); > > + } > > +} > > + > > +void > > +ovn_packet_data_destroy(struct packet_data *pd) > > +{ > > + dp_packet_delete(pd->p); > > + ofpbuf_uninit(&pd->ofpacts); > > + free(pd); > > +} > > + > > +void > > +ovn_packet_data_list_destroy(struct ovs_list *list) > > +{ > > + struct packet_data *pd; > > + LIST_FOR_EACH_POP (pd, node, list) { > > + ovn_packet_data_destroy(pd); > > + } > > +} > > + > > +void > > +ovn_buffered_packets_hmap_destroy(struct hmap *hmap) > > +{ > > + struct buffered_packets *bp; > > + HMAP_FOR_EACH_POP (bp, hmap_node, hmap) { > > + buffered_packets_destroy(bp); > > + } > > +} > > + > > + > > /* mac_binding related static functions. */ > > > > static size_t > > -mac_binding_hash(uint32_t dp_key, uint32_t port_key, struct in6_addr > *ip) > > +keys_ip_hash(uint32_t dp_key, uint32_t port_key, struct in6_addr *ip) > > { > > return hash_bytes(ip, sizeof *ip, hash_2words(dp_key, port_key)); > > } > > > > static struct mac_binding * > > -mac_binding_find(struct hmap *mac_bindings, uint32_t dp_key, > > - uint32_t port_key, struct in6_addr *ip, size_t hash) > > +mac_binding_find(const struct hmap *mac_bindings, uint32_t dp_key, > > + uint32_t port_key, struct in6_addr *ip, size_t hash) > > { > > struct mac_binding *mb; > > HMAP_FOR_EACH_WITH_HASH (mb, hmap_node, hash, mac_bindings) { > > @@ -203,3 +317,26 @@ fdb_entry_find(struct hmap *fdbs, uint32_t dp_key, > > > > return NULL; > > } > > + > > +/* packet buffering static functions. */ > > +static struct buffered_packets * > > +buffered_packets_find(struct hmap *hmap, uint64_t dp_key, uint64_t > port_key, > > + struct in6_addr *ip, uint32_t hash) > > +{ > > + struct buffered_packets *mb; > > + HMAP_FOR_EACH_WITH_HASH (mb, hmap_node, hash, hmap) { > > + if (mb->dp_key == dp_key && mb->port_key == port_key && > > + IN6_ARE_ADDR_EQUAL(&mb->ip, ip)) { > > + return mb; > > + } > > + } > > + > > + return NULL; > > +} > > + > > +static void > > +buffered_packets_destroy(struct buffered_packets *bp) > > +{ > > + ovn_packet_data_list_destroy(&bp->queue); > > + free(bp); > > +} > > diff --git a/controller/mac-learn.h b/controller/mac-learn.h > > index 57c50c58b..b6ff28650 100644 > > --- a/controller/mac-learn.h > > +++ b/controller/mac-learn.h > > @@ -19,7 +19,11 @@ > > > > #include <sys/types.h> > > #include <netinet/in.h> > > + > > +#include "dp-packet.h" > > #include "openvswitch/hmap.h" > > +#include "openvswitch/list.h" > > +#include "openvswitch/ofpbuf.h" > > > > struct mac_binding { > > struct hmap_node hmap_node; /* In a hmap. */ > > @@ -32,20 +36,22 @@ struct mac_binding { > > /* Value. */ > > struct eth_addr mac; > > > > - /* Timestamp when to commit to SB. */ > > - long long commit_at_ms; > > + /* Timestamp in ms indicating when the MAC binding should expire. */ > > + long long expire; > > }; > > > > void ovn_mac_bindings_init(struct hmap *mac_bindings); > > void ovn_mac_bindings_destroy(struct hmap *mac_bindings); > > void ovn_mac_binding_wait(struct hmap *mac_bindings); > > void ovn_mac_binding_remove(struct mac_binding *mb, struct hmap > *mac_bindings); > > -bool ovn_mac_binding_can_commit(const struct mac_binding *mb, long long > now); > > +bool ovn_mac_binding_is_expired(const struct mac_binding *mb, long long > now); > > > > struct mac_binding *ovn_mac_binding_add(struct hmap *mac_bindings, > > uint32_t dp_key, uint32_t > port_key, > > struct in6_addr *ip, > > - struct eth_addr mac, bool > is_unicast); > > + struct eth_addr mac, > > + uint32_t timestamp_offset, > > + bool limited_capacity); > > > > > > > > @@ -68,4 +74,40 @@ struct fdb_entry *ovn_fdb_add(struct hmap *fdbs, > > uint32_t dp_key, struct eth_addr mac, > > uint32_t port_key); > > > > + > > +struct packet_data { > > + struct ovs_list node; > > + > > + struct ofpbuf ofpacts; > > + struct dp_packet *p; > > +}; > > + > > +struct buffered_packets { > > + struct hmap_node hmap_node; > > + > > + struct in6_addr ip; > > + uint64_t dp_key; > > + uint64_t port_key; > > + > > + struct ovs_list queue; > > + > > + long long int expire; > > This used to be 'timestamp', i.e., the time when we buffered the packet. > It's not obvious to me why we need to change it. In any case it should > be mentioned in the commit log. > > > +}; > > + > > +#define OVN_BUFFERED_PACKETS_TIMEOUT 10000 > > I know the previous definition didn't do this but now that we're > changing it I'd also include the unit in the name, e.g.: > OVN_BUFFERED_PACKETS_TIMEOUT_MS. > > > + > > +struct buffered_packets *ovn_buffered_packets_add(struct hmap *hmap, > > + uint64_t dp_key, > > + uint64_t port_key, > > + struct in6_addr ip); > > +void ovn_buffered_packets_add_packet_data(struct buffered_packets *bp, > > + struct ofpbuf ofpacts, > > + struct dp_packet *packet); > > +void ovn_buffured_packets_prepare_ready(struct hmap *bp_hmap, > > + const struct hmap > *recent_mac_bindings, > > + struct ovs_list > *ready_packet_data); > > +void ovn_packet_data_destroy(struct packet_data *pd); > > +void ovn_packet_data_list_destroy(struct ovs_list *list); > > +void ovn_buffered_packets_hmap_destroy(struct hmap *hmap); > > + > > There are a few things I don't really like about this change: > > There's no ovn_packet_data_create() API but there is a > ovn_packet_data_destroy() API. In general, it's more readable IMO if we > have both *_create() and *_destroy() APIs for a custom data type. > > It seems to me ovn_packet_data_list_destroy() is not really needed. > What I think we should do is do all the "buffered_packets" and > "packet_data" processing in a single place, probably in mac-learn.c. > > It's not great to use raw types like ovs_list and hmap. It makes it > hard to know what the real type is without looking at the > implementation. I think I'd just wrap the list and the map in some > explicit structures, defined in mac-learn.h. We then can rely on the > compiler type checking to ensure we don't pass the wrong structure as > argument. > > > #endif /* OVN_MAC_LEARN_H */ > > diff --git a/controller/pinctrl.c b/controller/pinctrl.c > > index 795847729..fcc278c8c 100644 > > --- a/controller/pinctrl.c > > +++ b/controller/pinctrl.c > > @@ -178,8 +178,8 @@ struct pinctrl { > > > > static struct pinctrl pinctrl; > > > > -static void init_buffered_packets_map(void); > > -static void destroy_buffered_packets_map(void); > > +static void init_buffered_packets_data(void); > > +static void destroy_buffered_packets_data(void); > > static void > > run_buffered_binding(struct ovsdb_idl_index *sbrec_port_binding_by_name, > > const struct sbrec_mac_binding_table > *mac_binding_table) > > @@ -535,7 +535,7 @@ pinctrl_init(void) > > init_send_garps_rarps(); > > init_ipv6_ras(); > > init_ipv6_prefixd(); > > - init_buffered_packets_map(); > > + init_buffered_packets_data(); > > init_activated_ports(); > > init_event_table(); > > ip_mcast_snoop_init(); > > @@ -1416,222 +1416,72 @@ prepare_ipv6_prefixd(struct ovsdb_idl_txn > *ovnsb_idl_txn, > > } > > } > > > > -struct buffer_info { > > - struct ofpbuf ofpacts; > > - struct dp_packet *p; > > -}; > > - > > -#define BUFFER_QUEUE_DEPTH 4 > > -struct buffered_packets { > > - struct hmap_node hmap_node; > > - struct ovs_list list; > > - > > - /* key */ > > - struct in6_addr ip; > > - struct eth_addr ea; > > - > > - uint64_t dp_key; > > - uint64_t port_key; > > - > > - long long int timestamp; > > - > > - struct buffer_info data[BUFFER_QUEUE_DEPTH]; > > - uint32_t head, tail; > > Please mention this change in the commit log. We're not using the > circular buffer anymore. Instead we're poping the front of the list if > needed. In theory this memory access pattern change can influence > performance. In practice, given that BUFFER_QUEUE_DEPTH is 4, I don't > think it matters much though. > > > -}; > > - > > static struct hmap buffered_packets_map; > > -static struct ovs_list buffered_mac_bindings; > > +/* List of 'struct packet_data' that is ready to be sent. */ > > I would add a comment for both 'buffered_packets_map' and > 'ready_packets_data'. Alternatively I'd add a newline before this comment. > > > +static struct ovs_list ready_packets_data; > > > > static void > > -init_buffered_packets_map(void) > > +init_buffered_packets_data(void) > > { > > hmap_init(&buffered_packets_map); > > - ovs_list_init(&buffered_mac_bindings); > > -} > > - > > -static void > > -destroy_buffered_packets(struct buffered_packets *bp) > > -{ > > - struct buffer_info *bi; > > - > > - while (bp->head != bp->tail) { > > - bi = &bp->data[bp->head]; > > - dp_packet_delete(bi->p); > > - ofpbuf_uninit(&bi->ofpacts); > > - > > - bp->head = (bp->head + 1) % BUFFER_QUEUE_DEPTH; > > - } > > -} > > - > > -static void > > -destroy_buffered_packets_map(void) > > -{ > > - struct buffered_packets *bp; > > - HMAP_FOR_EACH_SAFE (bp, hmap_node, &buffered_packets_map) { > > - destroy_buffered_packets(bp); > > - hmap_remove(&buffered_packets_map, &bp->hmap_node); > > - free(bp); > > - } > > - hmap_destroy(&buffered_packets_map); > > - > > - LIST_FOR_EACH_POP (bp, list, &buffered_mac_bindings) { > > - destroy_buffered_packets(bp); > > - free(bp); > > - } > > -} > > - > > -static void > > -buffered_push_packet(struct buffered_packets *bp, > > - struct dp_packet *packet, > > - const struct match *md) > > -{ > > - uint32_t next = (bp->tail + 1) % BUFFER_QUEUE_DEPTH; > > - struct buffer_info *bi = &bp->data[bp->tail]; > > - > > - ofpbuf_init(&bi->ofpacts, 4096); > > - > > - reload_metadata(&bi->ofpacts, md); > > - /* reload pkt_mark field */ > > - const struct mf_field *pkt_mark_field = mf_from_id(MFF_PKT_MARK); > > - union mf_value pkt_mark_value; > > - mf_get_value(pkt_mark_field, &md->flow, &pkt_mark_value); > > - ofpact_put_set_field(&bi->ofpacts, pkt_mark_field, &pkt_mark_value, > NULL); > > - > > - struct ofpact_resubmit *resubmit = > ofpact_put_RESUBMIT(&bi->ofpacts); > > - resubmit->in_port = OFPP_CONTROLLER; > > - resubmit->table_id = OFTABLE_REMOTE_OUTPUT; > > - > > - bi->p = packet; > > - > > - if (next == bp->head) { > > - bi = &bp->data[bp->head]; > > - dp_packet_delete(bi->p); > > - ofpbuf_uninit(&bi->ofpacts); > > - bp->head = (bp->head + 1) % BUFFER_QUEUE_DEPTH; > > - } > > - bp->tail = next; > > + ovs_list_init(&ready_packets_data); > > } > > > > static void > > -buffered_send_packets(struct rconn *swconn, struct buffered_packets *bp, > > - struct eth_addr *addr) > > -{ > > - enum ofp_version version = rconn_get_version(swconn); > > - enum ofputil_protocol proto = > ofputil_protocol_from_ofp_version(version); > > - > > - while (bp->head != bp->tail) { > > - struct buffer_info *bi = &bp->data[bp->head]; > > - struct eth_header *eth = dp_packet_data(bi->p); > > - > > - eth->eth_dst = *addr; > > - struct ofputil_packet_out po = { > > - .packet = dp_packet_data(bi->p), > > - .packet_len = dp_packet_size(bi->p), > > - .buffer_id = UINT32_MAX, > > - .ofpacts = bi->ofpacts.data, > > - .ofpacts_len = bi->ofpacts.size, > > - }; > > - match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER); > > - queue_msg(swconn, ofputil_encode_packet_out(&po, proto)); > > - > > - ofpbuf_uninit(&bi->ofpacts); > > - dp_packet_delete(bi->p); > > - > > - bp->head = (bp->head + 1) % BUFFER_QUEUE_DEPTH; > > - } > > -} > > - > > -#define BUFFER_MAP_TIMEOUT 10000 > > -static void > > -buffered_packets_map_gc(void) > > -{ > > - struct buffered_packets *cur_qp; > > - long long int now = time_msec(); > > - > > - HMAP_FOR_EACH_SAFE (cur_qp, hmap_node, &buffered_packets_map) { > > - if (now > cur_qp->timestamp + BUFFER_MAP_TIMEOUT) { > > - destroy_buffered_packets(cur_qp); > > - hmap_remove(&buffered_packets_map, &cur_qp->hmap_node); > > - free(cur_qp); > > - } > > - } > > -} > > - > > -static uint32_t > > -pinctrl_buffer_packet_hash(uint64_t dp_key, uint64_t port_key, > > - const struct in6_addr *addr) > > +destroy_buffered_packets_data(void) > > { > > - uint32_t hash = 0; > > - hash = hash_add64(hash, port_key); > > - hash = hash_add64(hash, dp_key); > > - hash = hash_bytes(addr, sizeof addr, hash); > > - return hash_finish(hash, 16); > > -} > > - > > -static struct buffered_packets * > > -pinctrl_find_buffered_packets(uint64_t dp_key, uint64_t port_key, > > - const struct in6_addr *ip, uint32_t hash) > > -{ > > - struct buffered_packets *qp; > > - HMAP_FOR_EACH_WITH_HASH (qp, hmap_node, hash, > &buffered_packets_map) { > > - if (qp->dp_key == dp_key && qp->port_key == port_key && > > - IN6_ARE_ADDR_EQUAL(&qp->ip, ip)) { > > - return qp; > > - } > > - } > > - return NULL; > > -} > > - > > -static struct buffered_packets * > > -pinctrl_find_buffered_packets_with_hash(uint64_t dp_key, uint64_t > port_key, > > - const struct in6_addr *ip) > > -{ > > - uint32_t hash = pinctrl_buffer_packet_hash(dp_key, port_key, ip); > > - > > - return pinctrl_find_buffered_packets(dp_key, port_key, ip, hash); > > + ovn_buffered_packets_hmap_destroy(&buffered_packets_map); > > + ovn_packet_data_list_destroy(&ready_packets_data); > > } > > > > /* Called with in the pinctrl_handler thread context. */ > > -static int > > +static void > > pinctrl_handle_buffered_packets(struct dp_packet *pkt_in, > > const struct match *md, bool is_arp) > > OVS_REQUIRES(pinctrl_mutex) > > { > > - struct buffered_packets *bp; > > + struct in6_addr ip; > > + struct ofpbuf ofpacts; > > struct dp_packet *clone; > > - struct in6_addr addr; > > + struct buffered_packets *bp; > > uint64_t dp_key = ntohll(md->flow.metadata); > > uint64_t oport_key = md->flow.regs[MFF_LOG_OUTPORT - MFF_REG0]; > > > > if (is_arp) { > > - addr = in6_addr_mapped_ipv4(htonl(md->flow.regs[0])); > > + ip = in6_addr_mapped_ipv4(htonl(md->flow.regs[0])); > > } else { > > ovs_be128 ip6 = hton128(flow_get_xxreg(&md->flow, 0)); > > - memcpy(&addr, &ip6, sizeof addr); > > + memcpy(&ip, &ip6, sizeof ip); > > } > > > > - uint32_t hash = pinctrl_buffer_packet_hash(dp_key, oport_key, > &addr); > > - bp = pinctrl_find_buffered_packets(dp_key, oport_key, &addr, hash); > > + bp = ovn_buffered_packets_add(&buffered_packets_map, dp_key, > > + oport_key, ip); > > if (!bp) { > > - if (hmap_count(&buffered_packets_map) >= 1000) { > > - COVERAGE_INC(pinctrl_drop_buffered_packets_map); > > - return -ENOMEM; > > - } > > - > > - bp = xmalloc(sizeof *bp); > > - hmap_insert(&buffered_packets_map, &bp->hmap_node, hash); > > - bp->head = bp->tail = 0; > > - bp->ip = addr; > > - bp->dp_key = dp_key; > > - bp->port_key = oport_key; > > + COVERAGE_INC(pinctrl_drop_buffered_packets_map); > > + return; > > } > > - bp->timestamp = time_msec(); > > + > > /* clone the packet to send it later with correct L2 address */ > > clone = dp_packet_clone_data(dp_packet_data(pkt_in), > > dp_packet_size(pkt_in)); > > - buffered_push_packet(bp, clone, md); > > > > - return 0; > > + > > + ofpbuf_init(&ofpacts, 4096); > > + reload_metadata(&ofpacts, md); > > + /* reload pkt_mark field */ > > + const struct mf_field *pkt_mark_field = mf_from_id(MFF_PKT_MARK); > > + union mf_value pkt_mark_value; > > + mf_get_value(pkt_mark_field, &md->flow, &pkt_mark_value); > > + ofpact_put_set_field(&ofpacts, pkt_mark_field, &pkt_mark_value, > NULL); > > + > > + struct ofpact_resubmit *resubmit = ofpact_put_RESUBMIT(&ofpacts); > > + resubmit->in_port = OFPP_CONTROLLER; > > + resubmit->table_id = OFTABLE_REMOTE_OUTPUT; > > + > > + ovn_buffered_packets_add_packet_data(bp, ofpacts, clone); > > + > > + /* There is a chance that the MAC binding was already created. */ > > + notify_pinctrl_main(); > > } > > > > /* Called with in the pinctrl_handler thread context. */ > > @@ -4154,7 +4004,7 @@ pinctrl_destroy(void) > > destroy_send_garps_rarps(); > > destroy_ipv6_ras(); > > destroy_ipv6_prefixd(); > > - destroy_buffered_packets_map(); > > + destroy_buffered_packets_data(); > > destroy_activated_ports(); > > event_table_destroy(); > > destroy_put_mac_bindings(); > > @@ -4196,6 +4046,8 @@ destroy_put_mac_bindings(void) > > ovn_mac_bindings_destroy(&put_mac_bindings); > > } > > > > +#define MAX_MAC_BINDING_DELAY_MSEC 50 > > + > > /* Called with in the pinctrl_handler thread context. */ > > static void > > pinctrl_handle_put_mac_binding(const struct flow *md, > > @@ -4216,11 +4068,13 @@ pinctrl_handle_put_mac_binding(const struct flow > *md, > > > > /* If the ARP reply was unicast we should not delay it, > > * there won't be any race. */ > > - bool is_unicast = !eth_addr_is_multicast(headers->dl_dst); > > + uint32_t delay = eth_addr_is_multicast(headers->dl_dst) > > + ? random_range(MAX_MAC_BINDING_DELAY_MSEC) + 1 > > + : 0; > > struct mac_binding *mb = ovn_mac_binding_add(&put_mac_bindings, > dp_key, > > port_key, &ip_key, > > headers->dl_src, > > - is_unicast); > > + delay, true); > > if (!mb) { > > COVERAGE_INC(pinctrl_drop_put_mac_binding); > > return; > > @@ -4237,12 +4091,25 @@ static void > > send_mac_binding_buffered_pkts(struct rconn *swconn) > > OVS_REQUIRES(pinctrl_mutex) > > { > > - struct buffered_packets *bp; > > - LIST_FOR_EACH_POP (bp, list, &buffered_mac_bindings) { > > - buffered_send_packets(swconn, bp, &bp->ea); > > - free(bp); > > + enum ofp_version version = rconn_get_version(swconn); > > + enum ofputil_protocol proto = > ofputil_protocol_from_ofp_version(version); > > + > > + struct packet_data *pd; > > + LIST_FOR_EACH_POP (pd, node, &ready_packets_data) { > > + struct ofputil_packet_out po = { > > + .packet = dp_packet_data(pd->p), > > + .packet_len = dp_packet_size(pd->p), > > + .buffer_id = UINT32_MAX, > > + .ofpacts = pd->ofpacts.data, > > + .ofpacts_len = pd->ofpacts.size, > > + }; > > + match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER); > > + queue_msg(swconn, ofputil_encode_packet_out(&po, proto)); > > + > > + ovn_packet_data_destroy(pd); > > } > > - ovs_list_init(&buffered_mac_bindings); > > + > > + ovs_list_init(&ready_packets_data); > > } > > > > static const struct sbrec_mac_binding * > > @@ -4391,7 +4258,7 @@ run_put_mac_bindings(struct ovsdb_idl_txn > *ovnsb_idl_txn, > > > > struct mac_binding *mb; > > HMAP_FOR_EACH_SAFE (mb, hmap_node, &put_mac_bindings) { > > - if (ovn_mac_binding_can_commit(mb, now)) { > > + if (ovn_mac_binding_is_expired(mb, now)) { > > This is really confusing. I think _is_expired() is not a good name > choice. Without looking at what run_put_mac_binding() my first reaction > is to assume the mac binding is just flushed. What actually happens is > that the mac binding is committed to the SB. Please revert back to the > old name. That also means we should probably rename the > 'struct mac_binding' field back to 'commit_at_ms'. > > > run_put_mac_binding(ovnsb_idl_txn, > > sbrec_datapath_binding_by_key, > > sbrec_port_binding_by_key, > > @@ -4406,64 +4273,64 @@ run_buffered_binding(struct ovsdb_idl_index > *sbrec_port_binding_by_name, > > const struct sbrec_mac_binding_table > *mac_binding_table) > > OVS_REQUIRES(pinctrl_mutex) > > { > > - bool notify = false; > > - > > if (!hmap_count(&buffered_packets_map)) { > > return; > > } > > > > - const struct sbrec_mac_binding *mb; > > - SBREC_MAC_BINDING_TABLE_FOR_EACH_TRACKED (mb, mac_binding_table) { > > + struct hmap recent_mbs; > > + > > + hmap_init(&recent_mbs); > > + > > + const struct sbrec_mac_binding *smb; > > + SBREC_MAC_BINDING_TABLE_FOR_EACH_TRACKED (smb, mac_binding_table) { > > const struct sbrec_port_binding *pb = lport_lookup_by_name( > > - sbrec_port_binding_by_name, mb->logical_port); > > + sbrec_port_binding_by_name, smb->logical_port); > > if (!pb || !pb->datapath) { > > continue; > > } > > > > + struct eth_addr mac; > > struct in6_addr ip; > > ovs_be32 ip4; > > - if (ip_parse(mb->ip, &ip4)) { > > + > > + if (ip_parse(smb->ip, &ip4)) { > > ip = in6_addr_mapped_ipv4(ip4); > > - } else if (!ipv6_parse(mb->ip, &ip)) { > > + } else if (!ipv6_parse(smb->ip, &ip)) { > > continue; > > } > > > > - struct buffered_packets *bp = > pinctrl_find_buffered_packets_with_hash( > > - pb->datapath->tunnel_key, pb->tunnel_key, &ip); > > - if (!bp) { > > - if (!smap_get(&pb->options, "chassis-redirect-port")) { > > - continue; > > - } > > - > > - char *redirect_name = xasprintf("cr-%s", pb->logical_port); > > - pb = lport_lookup_by_name(sbrec_port_binding_by_name, > > - redirect_name); > > - free(redirect_name); > > - > > - if (!pb || !pb->datapath || strcmp(pb->type, > "chassisredirect")) { > > - continue; > > - } > > - > > - bp = pinctrl_find_buffered_packets_with_hash( > > - pb->datapath->tunnel_key, pb->tunnel_key, &ip); > > + if (!eth_addr_from_string(smb->mac, &mac)) { > > + continue; > > } > > > > - if (!bp) { > > + ovn_mac_binding_add(&recent_mbs, smb->datapath->tunnel_key, > > + pb->tunnel_key, &ip, mac, 0, false); > > + > > + const char *redirect_port = > > + smap_get(&pb->options, "chassis-redirect-port"); > > + if (!redirect_port) { > > continue; > > } > > > > - if (!ovs_scan(mb->mac, ETH_ADDR_SCAN_FMT, > > - ETH_ADDR_SCAN_ARGS(bp->ea))) { > > + pb = lport_lookup_by_name(sbrec_port_binding_by_name, > redirect_port); > > + if (!pb || pb->datapath->tunnel_key != > smb->datapath->tunnel_key || > > + strcmp(pb->type, "chassisredirect")) { > > continue; > > } > > > > - hmap_remove(&buffered_packets_map, &bp->hmap_node); > > - ovs_list_push_back(&buffered_mac_bindings, &bp->list); > > - notify = true; > > + /* Add the same entry also for chassisredirect port as the > buffered > > + * traffic might be buffered on the cr port. */ > > + ovn_mac_binding_add(&recent_mbs, smb->datapath->tunnel_key, > > + pb->tunnel_key, &ip, mac, 0, false); > > } > > - buffered_packets_map_gc(); > > > > - if (notify) { > > + ovn_buffured_packets_prepare_ready(&buffered_packets_map, > > + &recent_mbs, > > + &ready_packets_data); > > + > > + ovn_mac_bindings_destroy(&recent_mbs); > > + > > + if (!ovs_list_is_empty(&ready_packets_data)) { > > notify_pinctrl_handler(); > > } > > } > > @@ -6070,7 +5937,7 @@ may_inject_pkts(void) > > !shash_is_empty(&send_garp_rarp_data) || > > ipv6_prefixd_should_inject() || > > !ovs_list_is_empty(&mcast_query_list) || > > - !ovs_list_is_empty(&buffered_mac_bindings) || > > + !ovs_list_is_empty(&ready_packets_data) || > > bfd_monitor_should_inject()); > > } > > > > After going through the whole diff I wonder if it wouldn't be possible > to split this refactor/optimization into even smaller patches making it > easier to review and ensure no regressions happen. E.g., the > mac_binding changes could probably be separated from the > buffered_packets changes. > > Thanks, > Dumitru > > > Hi Dumitru, thank you for the review. Hopefully I was able to incorporate all comments for this series in v5. Thanks, Ales -- Ales Musil Senior Software Engineer - OVN Core Red Hat EMEA <https://www.redhat.com> [email protected] IM: amusil <https://red.ht/sig> _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
