On 3/28/23 10:12, Ales Musil wrote:
> Simplify the logic behind packet buffering
> and house those simplified helpers
> in mac-learn.c.
>
> Signed-off-by: Ales Musil <[email protected]>
> ---
> v4: Rebase on top of current main.
> Split into three patches.
> ---
Hi, Ales,
Thanks for working on this and sorry for not reviewing it earlier.
I think it's indeed good that you split this in 3 patches but it
would've probably been better to further separate this first patch into
- first simplify the code without moving it to a different file
- then, in a second patch, move the code to the new file.
Functionally your change seems to be OK but I left a few more comments
inline.
> controller/mac-learn.c | 171 ++++++++++++++++++---
> controller/mac-learn.h | 50 ++++++-
> controller/pinctrl.c | 329 ++++++++++++-----------------------------
> 3 files changed, 298 insertions(+), 252 deletions(-)
>
> diff --git a/controller/mac-learn.c b/controller/mac-learn.c
> index a27607016..9d51693a7 100644
> --- a/controller/mac-learn.c
> +++ b/controller/mac-learn.c
> @@ -18,10 +18,10 @@
> #include "mac-learn.h"
>
> /* OpenvSwitch lib includes. */
> +#include "dp-packet.h"
No need for this, it's already included in mac-learn.h.
> #include "openvswitch/poll-loop.h"
> #include "openvswitch/vlog.h"
> #include "lib/packets.h"
> -#include "lib/random.h"
> #include "lib/smap.h"
> #include "lib/timeval.h"
>
> @@ -29,11 +29,11 @@ VLOG_DEFINE_THIS_MODULE(mac_learn);
>
> #define MAX_MAC_BINDINGS 1000
> #define MAX_FDB_ENTRIES 1000
> -#define MAX_MAC_BINDING_DELAY_MSEC 50
> +#define BUFFER_QUEUE_DEPTH 4
Super nit: I would've aligned the values, e.g.:
#define MAX_MAC_BINDINGS 1000
#define MAX_FDB_ENTRIES 1000
#define BUFFER_QUEUE_DEPTH 4
>
> -static size_t mac_binding_hash(uint32_t dp_key, uint32_t port_key,
> - struct in6_addr *);
> -static struct mac_binding *mac_binding_find(struct hmap *mac_bindings,
> +static size_t keys_ip_hash(uint32_t dp_key, uint32_t port_key,
> + struct in6_addr *);
> +static struct mac_binding *mac_binding_find(const struct hmap *mac_bindings,
> uint32_t dp_key,
> uint32_t port_key,
> struct in6_addr *ip, size_t
> hash);
> @@ -41,6 +41,12 @@ static size_t fdb_entry_hash(uint32_t dp_key, struct
> eth_addr *);
>
> static struct fdb_entry *fdb_entry_find(struct hmap *fdbs, uint32_t dp_key,
> struct eth_addr *mac, size_t hash);
> +static struct buffered_packets * buffered_packets_find(struct hmap *hmap,
Nit: extra space after '*'.
> + uint64_t dp_key,
> + uint64_t port_key,
> + struct in6_addr *ip,
> + uint32_t hash);
> +static void buffered_packets_destroy(struct buffered_packets *bp);
>
> /* mac_binding functions. */
> void
> @@ -62,24 +68,23 @@ ovn_mac_bindings_destroy(struct hmap *mac_bindings)
> struct mac_binding *
> ovn_mac_binding_add(struct hmap *mac_bindings, uint32_t dp_key,
> uint32_t port_key, struct in6_addr *ip,
> - struct eth_addr mac, bool is_unicast)
> + struct eth_addr mac, uint32_t timestamp_offset,
> + bool limited_capacity)
This API really needs a comment now. Without looking at the
implementation it's really hard to guess 'limited_capacity' might mean.
Thinking more about it, this API is actually not really OK. Reading
more into the patch I think what you actually need is a
"mac_binding_map" that may have a maximum size. I'd define a type like
that and specify the max_size in its _create() function. Then you can
transparently take care of a potential limited capacity inside the
ovn_mac_binding_add() API without cluttering the caller.
> {
> - uint32_t hash = mac_binding_hash(dp_key, port_key, ip);
> + uint32_t hash = keys_ip_hash(dp_key, port_key, ip);
>
> struct mac_binding *mb =
> mac_binding_find(mac_bindings, dp_key, port_key, ip, hash);
> if (!mb) {
> - if (hmap_count(mac_bindings) >= MAX_MAC_BINDINGS) {
> + if (limited_capacity && hmap_count(mac_bindings) >=
> MAX_MAC_BINDINGS) {
> return NULL;
> }
>
> - uint32_t delay = is_unicast
> - ? 0 : random_range(MAX_MAC_BINDING_DELAY_MSEC) + 1;
> mb = xmalloc(sizeof *mb);
> mb->dp_key = dp_key;
> mb->port_key = port_key;
> mb->ip = *ip;
> - mb->commit_at_ms = time_msec() + delay;
> + mb->expire = time_msec() + timestamp_offset;
> hmap_insert(mac_bindings, &mb->hmap_node, hash);
> }
> mb->mac = mac;
> @@ -94,7 +99,7 @@ ovn_mac_binding_wait(struct hmap *mac_bindings)
> struct mac_binding *mb;
>
> HMAP_FOR_EACH (mb, hmap_node, mac_bindings) {
> - poll_timer_wait_until(mb->commit_at_ms);
> + poll_timer_wait_until(mb->expire);
> }
> }
>
> @@ -106,9 +111,9 @@ ovn_mac_binding_remove(struct mac_binding *mb, struct
> hmap *mac_bindings)
> }
>
> bool
> -ovn_mac_binding_can_commit(const struct mac_binding *mb, long long now)
> +ovn_mac_binding_is_expired(const struct mac_binding *mb, long long now)
> {
> - return now >= mb->commit_at_ms;
> + return now >= mb->expire;
> }
>
> /* fdb functions. */
> @@ -158,17 +163,126 @@ ovn_fdb_add(struct hmap *fdbs, uint32_t dp_key, struct
> eth_addr mac,
>
> }
>
> +/* packet buffering functions */
> +struct buffered_packets *
> +ovn_buffered_packets_add(struct hmap *hmap, uint64_t dp_key, uint64_t
> port_key,
> + struct in6_addr ip)
> +{
> + struct buffered_packets *bp;
> +
> + uint32_t hash = keys_ip_hash(dp_key, port_key, &ip);
> + bp = buffered_packets_find(hmap, dp_key, port_key, &ip, hash);
> + if (!bp) {
> + if (hmap_count(hmap) >= 1000) {
> + return NULL;
> + }
> +
> + bp = xmalloc(sizeof *bp);
> + hmap_insert(hmap, &bp->hmap_node, hash);
> + bp->ip = ip;
> + bp->dp_key = dp_key;
> + bp->port_key = port_key;
> + ovs_list_init(&bp->queue);
> + }
> +
> + bp->expire = time_msec() + OVN_BUFFERED_PACKETS_TIMEOUT;
> +
> + return bp;
> +}
> +
> +void
> +ovn_buffered_packets_add_packet_data(struct buffered_packets *bp,
> + struct ofpbuf ofpacts,
> + struct dp_packet *packet)
Please add a comment for this public API.
> +{
> + struct packet_data *pd = xmalloc(sizeof *pd);
> +
> + pd->ofpacts = ofpacts;
> + pd->p = packet;
> +
> + if (ovs_list_size(&bp->queue) == BUFFER_QUEUE_DEPTH) {
> + struct packet_data *p = CONTAINER_OF(ovs_list_pop_front(&bp->queue),
> + struct packet_data, node);
> + ovn_packet_data_destroy(p);
> + }
> +
> + ovs_list_push_back(&bp->queue, &pd->node);
> +}
> +
> +void
> +ovn_buffured_packets_prepare_ready(struct hmap *bp_hmap,
Typo: 'buffured'.
> + const struct hmap *recent_mac_bindings,
> + struct ovs_list *ready_packet_data)
Please add a comment for this public API.
> +{
> + long long now = time_msec();
> +
> + struct buffered_packets *bp;
> + HMAP_FOR_EACH_SAFE (bp, hmap_node, bp_hmap) {
> + if (now > bp->expire) {
> + hmap_remove(bp_hmap, &bp->hmap_node);
> + buffered_packets_destroy(bp);
> + continue;
> + }
> +
> + uint32_t hash = keys_ip_hash(bp->dp_key, bp->port_key, &bp->ip);
> + struct mac_binding *mb = mac_binding_find(recent_mac_bindings,
A better name would be 'bindings_to_skip' instead of
'recent_mac_bindings'. It's irrelevant here why we need to skip them.
OTOH, I wonder why you didn't already remove these in the caller, in
run_buffered_binding(). I'd prefer if we didn't have to define APIs of
the form "prepare <something> for all entries EXCEPT <entries>".
Mentioning this change in the commit log would've been nice too.
> + bp->dp_key, bp->port_key,
> + &bp->ip, hash);
> + if (!mb) {
> + continue;
> + }
> +
> + struct packet_data *pd;
> + LIST_FOR_EACH_POP (pd, node, &bp->queue) {
> + struct eth_header *eth = dp_packet_data(pd->p);
> + eth->eth_dst = mb->mac;
> +
> + ovs_list_push_back(ready_packet_data, &pd->node);
> + }
> +
> + hmap_remove(bp_hmap, &bp->hmap_node);
> + buffered_packets_destroy(bp);
> + }
> +}
> +
> +void
> +ovn_packet_data_destroy(struct packet_data *pd)
> +{
> + dp_packet_delete(pd->p);
> + ofpbuf_uninit(&pd->ofpacts);
> + free(pd);
> +}
> +
> +void
> +ovn_packet_data_list_destroy(struct ovs_list *list)
> +{
> + struct packet_data *pd;
> + LIST_FOR_EACH_POP (pd, node, list) {
> + ovn_packet_data_destroy(pd);
> + }
> +}
> +
> +void
> +ovn_buffered_packets_hmap_destroy(struct hmap *hmap)
> +{
> + struct buffered_packets *bp;
> + HMAP_FOR_EACH_POP (bp, hmap_node, hmap) {
> + buffered_packets_destroy(bp);
> + }
> +}
> +
> +
> /* mac_binding related static functions. */
>
> static size_t
> -mac_binding_hash(uint32_t dp_key, uint32_t port_key, struct in6_addr *ip)
> +keys_ip_hash(uint32_t dp_key, uint32_t port_key, struct in6_addr *ip)
> {
> return hash_bytes(ip, sizeof *ip, hash_2words(dp_key, port_key));
> }
>
> static struct mac_binding *
> -mac_binding_find(struct hmap *mac_bindings, uint32_t dp_key,
> - uint32_t port_key, struct in6_addr *ip, size_t hash)
> +mac_binding_find(const struct hmap *mac_bindings, uint32_t dp_key,
> + uint32_t port_key, struct in6_addr *ip, size_t hash)
> {
> struct mac_binding *mb;
> HMAP_FOR_EACH_WITH_HASH (mb, hmap_node, hash, mac_bindings) {
> @@ -203,3 +317,26 @@ fdb_entry_find(struct hmap *fdbs, uint32_t dp_key,
>
> return NULL;
> }
> +
> +/* packet buffering static functions. */
> +static struct buffered_packets *
> +buffered_packets_find(struct hmap *hmap, uint64_t dp_key, uint64_t port_key,
> + struct in6_addr *ip, uint32_t hash)
> +{
> + struct buffered_packets *mb;
> + HMAP_FOR_EACH_WITH_HASH (mb, hmap_node, hash, hmap) {
> + if (mb->dp_key == dp_key && mb->port_key == port_key &&
> + IN6_ARE_ADDR_EQUAL(&mb->ip, ip)) {
> + return mb;
> + }
> + }
> +
> + return NULL;
> +}
> +
> +static void
> +buffered_packets_destroy(struct buffered_packets *bp)
> +{
> + ovn_packet_data_list_destroy(&bp->queue);
> + free(bp);
> +}
> diff --git a/controller/mac-learn.h b/controller/mac-learn.h
> index 57c50c58b..b6ff28650 100644
> --- a/controller/mac-learn.h
> +++ b/controller/mac-learn.h
> @@ -19,7 +19,11 @@
>
> #include <sys/types.h>
> #include <netinet/in.h>
> +
> +#include "dp-packet.h"
> #include "openvswitch/hmap.h"
> +#include "openvswitch/list.h"
> +#include "openvswitch/ofpbuf.h"
>
> struct mac_binding {
> struct hmap_node hmap_node; /* In a hmap. */
> @@ -32,20 +36,22 @@ struct mac_binding {
> /* Value. */
> struct eth_addr mac;
>
> - /* Timestamp when to commit to SB. */
> - long long commit_at_ms;
> + /* Timestamp in ms indicating when the MAC binding should expire. */
> + long long expire;
> };
>
> void ovn_mac_bindings_init(struct hmap *mac_bindings);
> void ovn_mac_bindings_destroy(struct hmap *mac_bindings);
> void ovn_mac_binding_wait(struct hmap *mac_bindings);
> void ovn_mac_binding_remove(struct mac_binding *mb, struct hmap
> *mac_bindings);
> -bool ovn_mac_binding_can_commit(const struct mac_binding *mb, long long now);
> +bool ovn_mac_binding_is_expired(const struct mac_binding *mb, long long now);
>
> struct mac_binding *ovn_mac_binding_add(struct hmap *mac_bindings,
> uint32_t dp_key, uint32_t port_key,
> struct in6_addr *ip,
> - struct eth_addr mac, bool
> is_unicast);
> + struct eth_addr mac,
> + uint32_t timestamp_offset,
> + bool limited_capacity);
>
>
>
> @@ -68,4 +74,40 @@ struct fdb_entry *ovn_fdb_add(struct hmap *fdbs,
> uint32_t dp_key, struct eth_addr mac,
> uint32_t port_key);
>
> +
> +struct packet_data {
> + struct ovs_list node;
> +
> + struct ofpbuf ofpacts;
> + struct dp_packet *p;
> +};
> +
> +struct buffered_packets {
> + struct hmap_node hmap_node;
> +
> + struct in6_addr ip;
> + uint64_t dp_key;
> + uint64_t port_key;
> +
> + struct ovs_list queue;
> +
> + long long int expire;
This used to be 'timestamp', i.e., the time when we buffered the packet.
It's not obvious to me why we need to change it. In any case it should
be mentioned in the commit log.
> +};
> +
> +#define OVN_BUFFERED_PACKETS_TIMEOUT 10000
I know the previous definition didn't do this but now that we're
changing it I'd also include the unit in the name, e.g.:
OVN_BUFFERED_PACKETS_TIMEOUT_MS.
> +
> +struct buffered_packets *ovn_buffered_packets_add(struct hmap *hmap,
> + uint64_t dp_key,
> + uint64_t port_key,
> + struct in6_addr ip);
> +void ovn_buffered_packets_add_packet_data(struct buffered_packets *bp,
> + struct ofpbuf ofpacts,
> + struct dp_packet *packet);
> +void ovn_buffured_packets_prepare_ready(struct hmap *bp_hmap,
> + const struct hmap
> *recent_mac_bindings,
> + struct ovs_list *ready_packet_data);
> +void ovn_packet_data_destroy(struct packet_data *pd);
> +void ovn_packet_data_list_destroy(struct ovs_list *list);
> +void ovn_buffered_packets_hmap_destroy(struct hmap *hmap);
> +
There are a few things I don't really like about this change:
There's no ovn_packet_data_create() API but there is a
ovn_packet_data_destroy() API. In general, it's more readable IMO if we
have both *_create() and *_destroy() APIs for a custom data type.
It seems to me ovn_packet_data_list_destroy() is not really needed.
What I think we should do is do all the "buffered_packets" and
"packet_data" processing in a single place, probably in mac-learn.c.
It's not great to use raw types like ovs_list and hmap. It makes it
hard to know what the real type is without looking at the
implementation. I think I'd just wrap the list and the map in some
explicit structures, defined in mac-learn.h. We then can rely on the
compiler type checking to ensure we don't pass the wrong structure as
argument.
> #endif /* OVN_MAC_LEARN_H */
> diff --git a/controller/pinctrl.c b/controller/pinctrl.c
> index 795847729..fcc278c8c 100644
> --- a/controller/pinctrl.c
> +++ b/controller/pinctrl.c
> @@ -178,8 +178,8 @@ struct pinctrl {
>
> static struct pinctrl pinctrl;
>
> -static void init_buffered_packets_map(void);
> -static void destroy_buffered_packets_map(void);
> +static void init_buffered_packets_data(void);
> +static void destroy_buffered_packets_data(void);
> static void
> run_buffered_binding(struct ovsdb_idl_index *sbrec_port_binding_by_name,
> const struct sbrec_mac_binding_table *mac_binding_table)
> @@ -535,7 +535,7 @@ pinctrl_init(void)
> init_send_garps_rarps();
> init_ipv6_ras();
> init_ipv6_prefixd();
> - init_buffered_packets_map();
> + init_buffered_packets_data();
> init_activated_ports();
> init_event_table();
> ip_mcast_snoop_init();
> @@ -1416,222 +1416,72 @@ prepare_ipv6_prefixd(struct ovsdb_idl_txn
> *ovnsb_idl_txn,
> }
> }
>
> -struct buffer_info {
> - struct ofpbuf ofpacts;
> - struct dp_packet *p;
> -};
> -
> -#define BUFFER_QUEUE_DEPTH 4
> -struct buffered_packets {
> - struct hmap_node hmap_node;
> - struct ovs_list list;
> -
> - /* key */
> - struct in6_addr ip;
> - struct eth_addr ea;
> -
> - uint64_t dp_key;
> - uint64_t port_key;
> -
> - long long int timestamp;
> -
> - struct buffer_info data[BUFFER_QUEUE_DEPTH];
> - uint32_t head, tail;
Please mention this change in the commit log. We're not using the
circular buffer anymore. Instead we're poping the front of the list if
needed. In theory this memory access pattern change can influence
performance. In practice, given that BUFFER_QUEUE_DEPTH is 4, I don't
think it matters much though.
> -};
> -
> static struct hmap buffered_packets_map;
> -static struct ovs_list buffered_mac_bindings;
> +/* List of 'struct packet_data' that is ready to be sent. */
I would add a comment for both 'buffered_packets_map' and
'ready_packets_data'. Alternatively I'd add a newline before this comment.
> +static struct ovs_list ready_packets_data;
>
> static void
> -init_buffered_packets_map(void)
> +init_buffered_packets_data(void)
> {
> hmap_init(&buffered_packets_map);
> - ovs_list_init(&buffered_mac_bindings);
> -}
> -
> -static void
> -destroy_buffered_packets(struct buffered_packets *bp)
> -{
> - struct buffer_info *bi;
> -
> - while (bp->head != bp->tail) {
> - bi = &bp->data[bp->head];
> - dp_packet_delete(bi->p);
> - ofpbuf_uninit(&bi->ofpacts);
> -
> - bp->head = (bp->head + 1) % BUFFER_QUEUE_DEPTH;
> - }
> -}
> -
> -static void
> -destroy_buffered_packets_map(void)
> -{
> - struct buffered_packets *bp;
> - HMAP_FOR_EACH_SAFE (bp, hmap_node, &buffered_packets_map) {
> - destroy_buffered_packets(bp);
> - hmap_remove(&buffered_packets_map, &bp->hmap_node);
> - free(bp);
> - }
> - hmap_destroy(&buffered_packets_map);
> -
> - LIST_FOR_EACH_POP (bp, list, &buffered_mac_bindings) {
> - destroy_buffered_packets(bp);
> - free(bp);
> - }
> -}
> -
> -static void
> -buffered_push_packet(struct buffered_packets *bp,
> - struct dp_packet *packet,
> - const struct match *md)
> -{
> - uint32_t next = (bp->tail + 1) % BUFFER_QUEUE_DEPTH;
> - struct buffer_info *bi = &bp->data[bp->tail];
> -
> - ofpbuf_init(&bi->ofpacts, 4096);
> -
> - reload_metadata(&bi->ofpacts, md);
> - /* reload pkt_mark field */
> - const struct mf_field *pkt_mark_field = mf_from_id(MFF_PKT_MARK);
> - union mf_value pkt_mark_value;
> - mf_get_value(pkt_mark_field, &md->flow, &pkt_mark_value);
> - ofpact_put_set_field(&bi->ofpacts, pkt_mark_field, &pkt_mark_value,
> NULL);
> -
> - struct ofpact_resubmit *resubmit = ofpact_put_RESUBMIT(&bi->ofpacts);
> - resubmit->in_port = OFPP_CONTROLLER;
> - resubmit->table_id = OFTABLE_REMOTE_OUTPUT;
> -
> - bi->p = packet;
> -
> - if (next == bp->head) {
> - bi = &bp->data[bp->head];
> - dp_packet_delete(bi->p);
> - ofpbuf_uninit(&bi->ofpacts);
> - bp->head = (bp->head + 1) % BUFFER_QUEUE_DEPTH;
> - }
> - bp->tail = next;
> + ovs_list_init(&ready_packets_data);
> }
>
> static void
> -buffered_send_packets(struct rconn *swconn, struct buffered_packets *bp,
> - struct eth_addr *addr)
> -{
> - enum ofp_version version = rconn_get_version(swconn);
> - enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
> -
> - while (bp->head != bp->tail) {
> - struct buffer_info *bi = &bp->data[bp->head];
> - struct eth_header *eth = dp_packet_data(bi->p);
> -
> - eth->eth_dst = *addr;
> - struct ofputil_packet_out po = {
> - .packet = dp_packet_data(bi->p),
> - .packet_len = dp_packet_size(bi->p),
> - .buffer_id = UINT32_MAX,
> - .ofpacts = bi->ofpacts.data,
> - .ofpacts_len = bi->ofpacts.size,
> - };
> - match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER);
> - queue_msg(swconn, ofputil_encode_packet_out(&po, proto));
> -
> - ofpbuf_uninit(&bi->ofpacts);
> - dp_packet_delete(bi->p);
> -
> - bp->head = (bp->head + 1) % BUFFER_QUEUE_DEPTH;
> - }
> -}
> -
> -#define BUFFER_MAP_TIMEOUT 10000
> -static void
> -buffered_packets_map_gc(void)
> -{
> - struct buffered_packets *cur_qp;
> - long long int now = time_msec();
> -
> - HMAP_FOR_EACH_SAFE (cur_qp, hmap_node, &buffered_packets_map) {
> - if (now > cur_qp->timestamp + BUFFER_MAP_TIMEOUT) {
> - destroy_buffered_packets(cur_qp);
> - hmap_remove(&buffered_packets_map, &cur_qp->hmap_node);
> - free(cur_qp);
> - }
> - }
> -}
> -
> -static uint32_t
> -pinctrl_buffer_packet_hash(uint64_t dp_key, uint64_t port_key,
> - const struct in6_addr *addr)
> +destroy_buffered_packets_data(void)
> {
> - uint32_t hash = 0;
> - hash = hash_add64(hash, port_key);
> - hash = hash_add64(hash, dp_key);
> - hash = hash_bytes(addr, sizeof addr, hash);
> - return hash_finish(hash, 16);
> -}
> -
> -static struct buffered_packets *
> -pinctrl_find_buffered_packets(uint64_t dp_key, uint64_t port_key,
> - const struct in6_addr *ip, uint32_t hash)
> -{
> - struct buffered_packets *qp;
> - HMAP_FOR_EACH_WITH_HASH (qp, hmap_node, hash, &buffered_packets_map) {
> - if (qp->dp_key == dp_key && qp->port_key == port_key &&
> - IN6_ARE_ADDR_EQUAL(&qp->ip, ip)) {
> - return qp;
> - }
> - }
> - return NULL;
> -}
> -
> -static struct buffered_packets *
> -pinctrl_find_buffered_packets_with_hash(uint64_t dp_key, uint64_t port_key,
> - const struct in6_addr *ip)
> -{
> - uint32_t hash = pinctrl_buffer_packet_hash(dp_key, port_key, ip);
> -
> - return pinctrl_find_buffered_packets(dp_key, port_key, ip, hash);
> + ovn_buffered_packets_hmap_destroy(&buffered_packets_map);
> + ovn_packet_data_list_destroy(&ready_packets_data);
> }
>
> /* Called with in the pinctrl_handler thread context. */
> -static int
> +static void
> pinctrl_handle_buffered_packets(struct dp_packet *pkt_in,
> const struct match *md, bool is_arp)
> OVS_REQUIRES(pinctrl_mutex)
> {
> - struct buffered_packets *bp;
> + struct in6_addr ip;
> + struct ofpbuf ofpacts;
> struct dp_packet *clone;
> - struct in6_addr addr;
> + struct buffered_packets *bp;
> uint64_t dp_key = ntohll(md->flow.metadata);
> uint64_t oport_key = md->flow.regs[MFF_LOG_OUTPORT - MFF_REG0];
>
> if (is_arp) {
> - addr = in6_addr_mapped_ipv4(htonl(md->flow.regs[0]));
> + ip = in6_addr_mapped_ipv4(htonl(md->flow.regs[0]));
> } else {
> ovs_be128 ip6 = hton128(flow_get_xxreg(&md->flow, 0));
> - memcpy(&addr, &ip6, sizeof addr);
> + memcpy(&ip, &ip6, sizeof ip);
> }
>
> - uint32_t hash = pinctrl_buffer_packet_hash(dp_key, oport_key, &addr);
> - bp = pinctrl_find_buffered_packets(dp_key, oport_key, &addr, hash);
> + bp = ovn_buffered_packets_add(&buffered_packets_map, dp_key,
> + oport_key, ip);
> if (!bp) {
> - if (hmap_count(&buffered_packets_map) >= 1000) {
> - COVERAGE_INC(pinctrl_drop_buffered_packets_map);
> - return -ENOMEM;
> - }
> -
> - bp = xmalloc(sizeof *bp);
> - hmap_insert(&buffered_packets_map, &bp->hmap_node, hash);
> - bp->head = bp->tail = 0;
> - bp->ip = addr;
> - bp->dp_key = dp_key;
> - bp->port_key = oport_key;
> + COVERAGE_INC(pinctrl_drop_buffered_packets_map);
> + return;
> }
> - bp->timestamp = time_msec();
> +
> /* clone the packet to send it later with correct L2 address */
> clone = dp_packet_clone_data(dp_packet_data(pkt_in),
> dp_packet_size(pkt_in));
> - buffered_push_packet(bp, clone, md);
>
> - return 0;
> +
> + ofpbuf_init(&ofpacts, 4096);
> + reload_metadata(&ofpacts, md);
> + /* reload pkt_mark field */
> + const struct mf_field *pkt_mark_field = mf_from_id(MFF_PKT_MARK);
> + union mf_value pkt_mark_value;
> + mf_get_value(pkt_mark_field, &md->flow, &pkt_mark_value);
> + ofpact_put_set_field(&ofpacts, pkt_mark_field, &pkt_mark_value, NULL);
> +
> + struct ofpact_resubmit *resubmit = ofpact_put_RESUBMIT(&ofpacts);
> + resubmit->in_port = OFPP_CONTROLLER;
> + resubmit->table_id = OFTABLE_REMOTE_OUTPUT;
> +
> + ovn_buffered_packets_add_packet_data(bp, ofpacts, clone);
> +
> + /* There is a chance that the MAC binding was already created. */
> + notify_pinctrl_main();
> }
>
> /* Called with in the pinctrl_handler thread context. */
> @@ -4154,7 +4004,7 @@ pinctrl_destroy(void)
> destroy_send_garps_rarps();
> destroy_ipv6_ras();
> destroy_ipv6_prefixd();
> - destroy_buffered_packets_map();
> + destroy_buffered_packets_data();
> destroy_activated_ports();
> event_table_destroy();
> destroy_put_mac_bindings();
> @@ -4196,6 +4046,8 @@ destroy_put_mac_bindings(void)
> ovn_mac_bindings_destroy(&put_mac_bindings);
> }
>
> +#define MAX_MAC_BINDING_DELAY_MSEC 50
> +
> /* Called with in the pinctrl_handler thread context. */
> static void
> pinctrl_handle_put_mac_binding(const struct flow *md,
> @@ -4216,11 +4068,13 @@ pinctrl_handle_put_mac_binding(const struct flow *md,
>
> /* If the ARP reply was unicast we should not delay it,
> * there won't be any race. */
> - bool is_unicast = !eth_addr_is_multicast(headers->dl_dst);
> + uint32_t delay = eth_addr_is_multicast(headers->dl_dst)
> + ? random_range(MAX_MAC_BINDING_DELAY_MSEC) + 1
> + : 0;
> struct mac_binding *mb = ovn_mac_binding_add(&put_mac_bindings, dp_key,
> port_key, &ip_key,
> headers->dl_src,
> - is_unicast);
> + delay, true);
> if (!mb) {
> COVERAGE_INC(pinctrl_drop_put_mac_binding);
> return;
> @@ -4237,12 +4091,25 @@ static void
> send_mac_binding_buffered_pkts(struct rconn *swconn)
> OVS_REQUIRES(pinctrl_mutex)
> {
> - struct buffered_packets *bp;
> - LIST_FOR_EACH_POP (bp, list, &buffered_mac_bindings) {
> - buffered_send_packets(swconn, bp, &bp->ea);
> - free(bp);
> + enum ofp_version version = rconn_get_version(swconn);
> + enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
> +
> + struct packet_data *pd;
> + LIST_FOR_EACH_POP (pd, node, &ready_packets_data) {
> + struct ofputil_packet_out po = {
> + .packet = dp_packet_data(pd->p),
> + .packet_len = dp_packet_size(pd->p),
> + .buffer_id = UINT32_MAX,
> + .ofpacts = pd->ofpacts.data,
> + .ofpacts_len = pd->ofpacts.size,
> + };
> + match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER);
> + queue_msg(swconn, ofputil_encode_packet_out(&po, proto));
> +
> + ovn_packet_data_destroy(pd);
> }
> - ovs_list_init(&buffered_mac_bindings);
> +
> + ovs_list_init(&ready_packets_data);
> }
>
> static const struct sbrec_mac_binding *
> @@ -4391,7 +4258,7 @@ run_put_mac_bindings(struct ovsdb_idl_txn
> *ovnsb_idl_txn,
>
> struct mac_binding *mb;
> HMAP_FOR_EACH_SAFE (mb, hmap_node, &put_mac_bindings) {
> - if (ovn_mac_binding_can_commit(mb, now)) {
> + if (ovn_mac_binding_is_expired(mb, now)) {
This is really confusing. I think _is_expired() is not a good name
choice. Without looking at what run_put_mac_binding() my first reaction
is to assume the mac binding is just flushed. What actually happens is
that the mac binding is committed to the SB. Please revert back to the
old name. That also means we should probably rename the
'struct mac_binding' field back to 'commit_at_ms'.
> run_put_mac_binding(ovnsb_idl_txn,
> sbrec_datapath_binding_by_key,
> sbrec_port_binding_by_key,
> @@ -4406,64 +4273,64 @@ run_buffered_binding(struct ovsdb_idl_index
> *sbrec_port_binding_by_name,
> const struct sbrec_mac_binding_table *mac_binding_table)
> OVS_REQUIRES(pinctrl_mutex)
> {
> - bool notify = false;
> -
> if (!hmap_count(&buffered_packets_map)) {
> return;
> }
>
> - const struct sbrec_mac_binding *mb;
> - SBREC_MAC_BINDING_TABLE_FOR_EACH_TRACKED (mb, mac_binding_table) {
> + struct hmap recent_mbs;
> +
> + hmap_init(&recent_mbs);
> +
> + const struct sbrec_mac_binding *smb;
> + SBREC_MAC_BINDING_TABLE_FOR_EACH_TRACKED (smb, mac_binding_table) {
> const struct sbrec_port_binding *pb = lport_lookup_by_name(
> - sbrec_port_binding_by_name, mb->logical_port);
> + sbrec_port_binding_by_name, smb->logical_port);
> if (!pb || !pb->datapath) {
> continue;
> }
>
> + struct eth_addr mac;
> struct in6_addr ip;
> ovs_be32 ip4;
> - if (ip_parse(mb->ip, &ip4)) {
> +
> + if (ip_parse(smb->ip, &ip4)) {
> ip = in6_addr_mapped_ipv4(ip4);
> - } else if (!ipv6_parse(mb->ip, &ip)) {
> + } else if (!ipv6_parse(smb->ip, &ip)) {
> continue;
> }
>
> - struct buffered_packets *bp =
> pinctrl_find_buffered_packets_with_hash(
> - pb->datapath->tunnel_key, pb->tunnel_key, &ip);
> - if (!bp) {
> - if (!smap_get(&pb->options, "chassis-redirect-port")) {
> - continue;
> - }
> -
> - char *redirect_name = xasprintf("cr-%s", pb->logical_port);
> - pb = lport_lookup_by_name(sbrec_port_binding_by_name,
> - redirect_name);
> - free(redirect_name);
> -
> - if (!pb || !pb->datapath || strcmp(pb->type, "chassisredirect"))
> {
> - continue;
> - }
> -
> - bp = pinctrl_find_buffered_packets_with_hash(
> - pb->datapath->tunnel_key, pb->tunnel_key, &ip);
> + if (!eth_addr_from_string(smb->mac, &mac)) {
> + continue;
> }
>
> - if (!bp) {
> + ovn_mac_binding_add(&recent_mbs, smb->datapath->tunnel_key,
> + pb->tunnel_key, &ip, mac, 0, false);
> +
> + const char *redirect_port =
> + smap_get(&pb->options, "chassis-redirect-port");
> + if (!redirect_port) {
> continue;
> }
>
> - if (!ovs_scan(mb->mac, ETH_ADDR_SCAN_FMT,
> - ETH_ADDR_SCAN_ARGS(bp->ea))) {
> + pb = lport_lookup_by_name(sbrec_port_binding_by_name, redirect_port);
> + if (!pb || pb->datapath->tunnel_key != smb->datapath->tunnel_key ||
> + strcmp(pb->type, "chassisredirect")) {
> continue;
> }
>
> - hmap_remove(&buffered_packets_map, &bp->hmap_node);
> - ovs_list_push_back(&buffered_mac_bindings, &bp->list);
> - notify = true;
> + /* Add the same entry also for chassisredirect port as the buffered
> + * traffic might be buffered on the cr port. */
> + ovn_mac_binding_add(&recent_mbs, smb->datapath->tunnel_key,
> + pb->tunnel_key, &ip, mac, 0, false);
> }
> - buffered_packets_map_gc();
>
> - if (notify) {
> + ovn_buffured_packets_prepare_ready(&buffered_packets_map,
> + &recent_mbs,
> + &ready_packets_data);
> +
> + ovn_mac_bindings_destroy(&recent_mbs);
> +
> + if (!ovs_list_is_empty(&ready_packets_data)) {
> notify_pinctrl_handler();
> }
> }
> @@ -6070,7 +5937,7 @@ may_inject_pkts(void)
> !shash_is_empty(&send_garp_rarp_data) ||
> ipv6_prefixd_should_inject() ||
> !ovs_list_is_empty(&mcast_query_list) ||
> - !ovs_list_is_empty(&buffered_mac_bindings) ||
> + !ovs_list_is_empty(&ready_packets_data) ||
> bfd_monitor_should_inject());
> }
>
After going through the whole diff I wonder if it wouldn't be possible
to split this refactor/optimization into even smaller patches making it
easier to review and ensure no regressions happen. E.g., the
mac_binding changes could probably be separated from the
buffered_packets changes.
Thanks,
Dumitru
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev