Hi Lorenzo,
On 9/19/24 19:08, Lorenzo Bianconi wrote:
> Introduce the capbility to periodically send ARP/ND packets for
> ECMP nexthops in order to resolve their L2 address. This is a preliminary
> patch to introduce the capability to flush stale ECMP CT entries.
>
> Signed-off-by: Lorenzo Bianconi <[email protected]>
> ---
> controller/ovn-controller.8.xml | 11 ++
> controller/ovn-controller.c | 2 +
> controller/pinctrl.c | 269 +++++++++++++++++++++++++++++---
> controller/pinctrl.h | 2 +
> 4 files changed, 261 insertions(+), 23 deletions(-)
>
> diff --git a/controller/ovn-controller.8.xml b/controller/ovn-controller.8.xml
> index aeaa374c1..c7fcad1fa 100644
> --- a/controller/ovn-controller.8.xml
> +++ b/controller/ovn-controller.8.xml
> @@ -385,6 +385,17 @@
> cap for the exponential backoff used by <code>ovn-controller</code>
> to send GARPs packets.
> </dd>
> + <dt><code>external_ids:arp-nd-max-timeout-sec</code></dt>
> + <dd>
> + When used, this configuration value specifies the maximum timeout
> + (in seconds) between two consecutive ARP/ND packets sent by
> + <code>ovn-controller</code> to resolve ECMP nexthop mac address.
> + <code>ovn-controller</code> by default sends just 4 ARP/ND packets
So we stop ARPing the next hop after we did it 4 times? This sounds
wrong. The point of the feature is to detect when next hops change mac
address, right?
Ah, after reading the rest of the patch you do:
continuous_arp_nd = !!max_arp_nd_timeout;
So if the option is not set (default) ovn-controller continously ARPs.
So the documentation here is misleading in my opinion. I think it
should be "by default continuously sends ARPs.." or something along
those lines.
Also, we miss a NEWS entry for this new option.
> + with an exponential backoff timeout.
> + Setting <code>external_ids:arp-nd-max-timeout-sec</code> allows to
> + cap for the exponential backoff used by <code>ovn-controller</code>
> + to send ARPs/NDs packets.
> + </dd>
> <dt><code>external_ids:ovn-bridge-remote</code></dt>
> <dd>
> <p>
> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> index 64be08ce3..e72d28731 100644
> --- a/controller/ovn-controller.c
> +++ b/controller/ovn-controller.c
> @@ -5653,6 +5653,8 @@ main(int argc, char *argv[])
> sbrec_mac_binding_table_get(
> ovnsb_idl_loop.idl),
> sbrec_bfd_table_get(ovnsb_idl_loop.idl),
> + sbrec_ecmp_nexthop_table_get(
> + ovnsb_idl_loop.idl),
> br_int, chassis,
> &runtime_data->local_datapaths,
> &runtime_data->active_tunnels,
> diff --git a/controller/pinctrl.c b/controller/pinctrl.c
> index c86b4f940..daf0bfa41 100644
> --- a/controller/pinctrl.c
> +++ b/controller/pinctrl.c
> @@ -170,6 +170,9 @@ static struct seq *pinctrl_main_seq;
> static long long int garp_rarp_max_timeout = GARP_RARP_DEF_MAX_TIMEOUT;
> static bool garp_rarp_continuous;
>
> +static long long int arp_nd_max_timeout = GARP_RARP_DEF_MAX_TIMEOUT;
> +static bool arp_nd_continuous;
I know we use individual global vars for garp_rarp_* variables too but
maybe it's time to refactor this into a structure instead of duplicating
this for every user of the "garp_rarp" mechanism.
> +
> static void *pinctrl_handler(void *arg);
>
> struct pinctrl {
> @@ -229,13 +232,17 @@ static void run_activated_ports(
> const struct sbrec_chassis *chassis);
>
> static void init_send_garps_rarps(void);
> +static void init_send_arps_nds(void);
> static void destroy_send_garps_rarps(void);
> +static void destroy_send_arps_nds(void);
> static void send_garp_rarp_wait(long long int send_garp_rarp_time);
> +static void send_arp_nd_wait(long long int send_arp_nd_time);
Same comment about duplication here.
> static void send_garp_rarp_prepare(
> struct ovsdb_idl_txn *ovnsb_idl_txn,
> struct ovsdb_idl_index *sbrec_port_binding_by_datapath,
> struct ovsdb_idl_index *sbrec_port_binding_by_name,
> struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip,
> + const struct sbrec_ecmp_nexthop_table *ecmp_nh_table,
> const struct ovsrec_bridge *,
> const struct sbrec_chassis *,
> const struct hmap *local_datapaths,
> @@ -245,6 +252,9 @@ static void send_garp_rarp_prepare(
> static void send_garp_rarp_run(struct rconn *swconn,
> long long int *send_garp_rarp_time)
> OVS_REQUIRES(pinctrl_mutex);
> +static void send_arp_nd_run(struct rconn *swconn,
> + long long int *send_arp_nd_time)
> + OVS_REQUIRES(pinctrl_mutex);
> static void pinctrl_handle_nd_na(struct rconn *swconn,
> const struct flow *ip_flow,
> const struct match *md,
> @@ -554,6 +564,7 @@ pinctrl_init(void)
> {
> init_put_mac_bindings();
> init_send_garps_rarps();
> + init_send_arps_nds();
> init_ipv6_ras();
> init_ipv6_prefixd();
> init_buffered_packets_ctx();
> @@ -4000,6 +4011,7 @@ pinctrl_handler(void *arg_)
> static long long int send_ipv6_ra_time = LLONG_MAX;
> /* Next GARP/RARP announcement in ms. */
> static long long int send_garp_rarp_time = LLONG_MAX;
> + static long long int send_arp_nd_time = LLONG_MAX;
> /* Next multicast query (IGMP) in ms. */
> static long long int send_mcast_query_time = LLONG_MAX;
> static long long int svc_monitors_next_run_time = LLONG_MAX;
> @@ -4037,6 +4049,7 @@ pinctrl_handler(void *arg_)
> if (may_inject_pkts()) {
> ovs_mutex_lock(&pinctrl_mutex);
> send_garp_rarp_run(swconn, &send_garp_rarp_time);
> + send_arp_nd_run(swconn, &send_arp_nd_time);
> send_ipv6_ras(swconn, &send_ipv6_ra_time);
> send_ipv6_prefixd(swconn, &send_prefixd_time);
> send_mac_binding_buffered_pkts(swconn);
> @@ -4055,6 +4068,7 @@ pinctrl_handler(void *arg_)
> rconn_recv_wait(swconn);
> if (rconn_is_connected(swconn)) {
> send_garp_rarp_wait(send_garp_rarp_time);
> + send_arp_nd_wait(send_arp_nd_time);
> ipv6_ra_wait(send_ipv6_ra_time);
> ip_mcast_querier_wait(send_mcast_query_time);
> svc_monitors_wait(svc_monitors_next_run_time);
> @@ -4151,6 +4165,7 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
> const struct sbrec_service_monitor_table *svc_mon_table,
> const struct sbrec_mac_binding_table *mac_binding_table,
> const struct sbrec_bfd_table *bfd_table,
> + const struct sbrec_ecmp_nexthop_table *ecmp_nh_table,
> const struct ovsrec_bridge *br_int,
> const struct sbrec_chassis *chassis,
> const struct hmap *local_datapaths,
> @@ -4167,8 +4182,9 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
> sbrec_port_binding_by_key, chassis);
> send_garp_rarp_prepare(ovnsb_idl_txn, sbrec_port_binding_by_datapath,
> sbrec_port_binding_by_name,
> - sbrec_mac_binding_by_lport_ip, br_int, chassis,
> - local_datapaths, active_tunnels, ovs_table);
> + sbrec_mac_binding_by_lport_ip, ecmp_nh_table,
> + br_int, chassis, local_datapaths, active_tunnels,
> + ovs_table);
> prepare_ipv6_ras(local_active_ports_ras, sbrec_port_binding_by_name);
> prepare_ipv6_prefixd(ovnsb_idl_txn, sbrec_port_binding_by_name,
> local_active_ports_ipv6_pd, chassis,
> @@ -4702,6 +4718,7 @@ pinctrl_destroy(void)
> latch_destroy(&pinctrl.pinctrl_thread_exit);
> rconn_destroy(pinctrl.swconn);
> destroy_send_garps_rarps();
> + destroy_send_arps_nds();
> destroy_ipv6_ras();
> destroy_ipv6_prefixd();
> destroy_buffered_packets_ctx();
> @@ -5029,7 +5046,8 @@ wait_put_mac_bindings(struct ovsdb_idl_txn
> *ovnsb_idl_txn)
> */
> struct garp_rarp_data {
> struct eth_addr ea; /* Ethernet address of port. */
> - ovs_be32 ipv4; /* Ipv4 address of port. */
> + struct in6_addr src_ip; /* IP address of port. */
> + struct in6_addr dst_ip; /* Destination IP address */
> long long int announce_time; /* Next announcement in ms. */
> int backoff; /* Backoff timeout for the next
> * announcement (in msecs). */
> @@ -5052,19 +5070,37 @@ destroy_send_garps_rarps(void)
> shash_destroy_free_data(&send_garp_rarp_data);
> }
>
> +/* Contains ARPs data to be sent to track ECMP next-hop mac address.
> Protected
> + * by pinctrl_mutex. */
> +static struct shash send_arp_nd_data;
> +
> +static void
> +init_send_arps_nds(void)
> +{
> + shash_init(&send_arp_nd_data);
> +}
> +
> +static void
> +destroy_send_arps_nds(void)
> +{
> + shash_destroy_free_data(&send_arp_nd_data);
> +}
> +
> /* Runs with in the main ovn-controller thread context. */
> static void
> -add_garp_rarp(const char *name, const struct eth_addr ea, ovs_be32 ip,
> - uint32_t dp_key, uint32_t port_key)
> +add_garp_rarp(const char *name, const struct eth_addr ea,
> + struct in6_addr src_ip, struct in6_addr dst_ip,
> + uint32_t dp_key, uint32_t port_key, struct shash *shash)
> {
> struct garp_rarp_data *garp_rarp = xmalloc(sizeof *garp_rarp);
> garp_rarp->ea = ea;
> - garp_rarp->ipv4 = ip;
> + garp_rarp->src_ip = src_ip;
> + garp_rarp->dst_ip = dst_ip;
> garp_rarp->announce_time = time_msec() + 1000;
> garp_rarp->backoff = 1000; /* msec. */
> garp_rarp->dp_key = dp_key;
> garp_rarp->port_key = port_key;
> - shash_add(&send_garp_rarp_data, name, garp_rarp);
> + shash_add(shash, name, garp_rarp);
>
> /* Notify pinctrl_handler so that it can wakeup and process
> * these GARP/RARP requests. */
> @@ -5113,9 +5149,10 @@ send_garp_rarp_update(struct ovsdb_idl_txn
> *ovnsb_idl_txn,
> }
> } else if (ovnsb_idl_txn) {
> add_garp_rarp(name, laddrs->ea,
> - laddrs->ipv4_addrs[i].addr,
> - binding_rec->datapath->tunnel_key,
> - binding_rec->tunnel_key);
> + in6_addr_mapped_ipv4(laddrs->ipv4_addrs[i].addr),
> + in6_addr_mapped_ipv4(laddrs->ipv4_addrs[i].addr),
> + binding_rec->datapath->tunnel_key,
> + binding_rec->tunnel_key, &send_garp_rarp_data);
> send_garp_locally(ovnsb_idl_txn,
> sbrec_mac_binding_by_lport_ip,
> local_datapaths, binding_rec,
> laddrs->ea,
> @@ -5142,9 +5179,11 @@ send_garp_rarp_update(struct ovsdb_idl_txn
> *ovnsb_idl_txn,
> garp_rarp->backoff = 1000; /* msec. */
> }
> } else {
> - add_garp_rarp(name, laddrs->ea,
> - 0, binding_rec->datapath->tunnel_key,
> - binding_rec->tunnel_key);
> + struct in6_addr ip = {};
Please avoid empty initializers. See:
ef51c943a305 ("treewide: Avoid empty initializer")
https://github.com/ovn-org/ovn/commit/ef51c943a305
> + add_garp_rarp(name, laddrs->ea, ip, ip,
> + binding_rec->datapath->tunnel_key,
> + binding_rec->tunnel_key,
> + &send_garp_rarp_data);
> }
> free(name);
> }
> @@ -5182,10 +5221,10 @@ send_garp_rarp_update(struct ovsdb_idl_txn
> *ovnsb_idl_txn,
> ip = laddrs.ipv4_addrs[0].addr;
> }
>
> - add_garp_rarp(binding_rec->logical_port,
> - laddrs.ea, ip,
> + add_garp_rarp(binding_rec->logical_port, laddrs.ea,
> + in6_addr_mapped_ipv4(ip), in6_addr_mapped_ipv4(ip),
> binding_rec->datapath->tunnel_key,
> - binding_rec->tunnel_key);
> + binding_rec->tunnel_key, &send_garp_rarp_data);
> if (ip) {
> send_garp_locally(ovnsb_idl_txn, sbrec_mac_binding_by_lport_ip,
> local_datapaths, binding_rec, laddrs.ea, ip);
> @@ -5196,12 +5235,52 @@ send_garp_rarp_update(struct ovsdb_idl_txn
> *ovnsb_idl_txn,
> }
> }
>
> +/* Add or update a vif for which ARPs need to be announced. */
> +static void
> +send_arp_nd_update(const struct sbrec_port_binding *pb, const char *nexthop,
> + long long int max_arp_timeout, bool continuous_arp_nd)
> +{
> + volatile struct garp_rarp_data *e = shash_find_data(&send_arp_nd_data,
> + nexthop);
volatile? Why?
> + if (e) {
> + e->dp_key = pb->datapath->tunnel_key;
> + e->port_key = pb->tunnel_key;
> + if (max_arp_timeout != arp_nd_max_timeout ||
> + continuous_arp_nd != arp_nd_continuous) {
> + /* reset backoff */
> + e->announce_time = time_msec() + 1000;
> + e->backoff = 1000; /* msec. */
It's already the third place where we have this code that manually sets
announce_time and backoff, we should refactor this into a single
function that we reuse for all garp_rarp users.
> + }
> + } else {
> + struct lport_addresses laddrs;
> + if (!extract_lsp_addresses(pb->mac[0], &laddrs)) {
> + return;
> + }
> +
> + if (laddrs.n_ipv4_addrs) {
> + ovs_be32 dst_ip;
> + inet_pton(AF_INET, nexthop, &dst_ip);
ip_parse() and we should check the return value.
> + add_garp_rarp(nexthop, laddrs.ea,
> + in6_addr_mapped_ipv4(laddrs.ipv4_addrs[0].addr),
> + in6_addr_mapped_ipv4(dst_ip),
> + pb->datapath->tunnel_key, pb->tunnel_key,
> + &send_arp_nd_data);
> + } else if (laddrs.n_ipv6_addrs) {
> + struct in6_addr dst_ip;
> + inet_pton(AF_INET6, nexthop, &dst_ip);
ipv6_parse() and we should check the return value.
> + add_garp_rarp(nexthop, laddrs.ea, laddrs.ipv6_addrs[0].addr,
> + dst_ip, pb->datapath->tunnel_key, pb->tunnel_key,
> + &send_arp_nd_data);
> + }
> + destroy_lport_addresses(&laddrs);
> + }
> +}
> +
> /* Remove a vif from GARP announcements. */
> static void
> -send_garp_rarp_delete(const char *lport)
> +send_garp_rarp_delete(struct shash *shash, const char *lport)
> {
> - struct garp_rarp_data *garp_rarp = shash_find_and_delete
> - (&send_garp_rarp_data, lport);
> + struct garp_rarp_data *garp_rarp = shash_find_and_delete(shash, lport);
> free(garp_rarp);
> notify_pinctrl_handler();
> }
> @@ -5220,9 +5299,11 @@ send_garp_rarp(struct rconn *swconn, struct
> garp_rarp_data *garp_rarp,
> uint64_t packet_stub[128 / 8];
> struct dp_packet packet;
> dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub);
> - if (garp_rarp->ipv4) {
> + ovs_be32 src_ipv4 = in6_addr_get_mapped_ipv4(&garp_rarp->src_ip);
> + if (src_ipv4) {
> compose_arp(&packet, ARP_OP_REQUEST, garp_rarp->ea, eth_addr_zero,
> - true, garp_rarp->ipv4, garp_rarp->ipv4);
> + true, src_ipv4,
> + in6_addr_get_mapped_ipv4(&garp_rarp->dst_ip));
> } else {
> compose_rarp(&packet, garp_rarp->ea);
> }
> @@ -6524,6 +6605,25 @@ get_nat_addresses_and_keys(struct ovsdb_idl_index
> *sbrec_port_binding_by_name,
> }
> }
>
> +static void
> +get_local_ecmp_nexthop_map(
> + const struct sbrec_ecmp_nexthop_table *ecmp_nh_table,
> + struct ovsdb_idl_index *sbrec_port_binding_by_name,
> + const struct sbrec_chassis *chassis,
> + struct smap *local_ecmp_nexthop_map)
> +{
> + const struct sbrec_ecmp_nexthop *sb_ecmp_nexthop;
> + SBREC_ECMP_NEXTHOP_TABLE_FOR_EACH (sb_ecmp_nexthop, ecmp_nh_table) {
> + const struct sbrec_port_binding *pb =
> + lport_lookup_by_name(sbrec_port_binding_by_name,
> + sb_ecmp_nexthop->port);
> + if (pb && !strcmp(pb->type, "l3gateway") && pb->chassis == chassis) {
> + smap_add_once(local_ecmp_nexthop_map, sb_ecmp_nexthop->nexthop,
> + sb_ecmp_nexthop->port);
> + }
> + }
> +}
> +
> static void
> send_garp_rarp_wait(long long int send_garp_rarp_time)
> {
> @@ -6534,6 +6634,16 @@ send_garp_rarp_wait(long long int send_garp_rarp_time)
> }
> }
>
> +static void
> +send_arp_nd_wait(long long int send_arp_nd_time)
> +{
> + /* Set the poll timer for next arp packet only if there is data to
> + * be sent. */
> + if (!shash_is_empty(&send_arp_nd_data)) {
> + poll_timer_wait_until(send_arp_nd_time);
> + }
> +}
This is essentially identical to send_garp_rarp_wait(). Let's refactor?
> +
> /* Called with in the pinctrl_handler thread context. */
> static void
> send_garp_rarp_run(struct rconn *swconn, long long int *send_garp_rarp_time)
> @@ -6556,6 +6666,86 @@ send_garp_rarp_run(struct rconn *swconn, long long int
> *send_garp_rarp_time)
> }
> }
>
> +static long long int
> +send_arp_nd(struct rconn *swconn, struct garp_rarp_data *garp_rarp,
> + long long int current_time)
> + OVS_REQUIRES(pinctrl_mutex)
> +{
This whole function is almost identical to send_garp_rarp() - let's
avoid code duplication. I think the only difference is the table we
inject the packet in:
resubmit->table_id = OFTABLE_LOCAL_OUTPUT;
vs
resubmit->table_id = OFTABLE_LOG_INGRESS_PIPELINE;
So, additional question: why treat these ARP requests different than GARPs?
> + if (current_time < garp_rarp->announce_time) {
> + return garp_rarp->announce_time;
> + }
> +
> + /* Compose a ARP request packet. */
> + uint64_t packet_stub[128 / 8];
> + struct dp_packet packet;
> + dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub);
> + if (IN6_IS_ADDR_V4MAPPED(&garp_rarp->src_ip)) {
> + compose_arp(&packet, ARP_OP_REQUEST, garp_rarp->ea, eth_addr_zero,
> + true, in6_addr_get_mapped_ipv4(&garp_rarp->src_ip),
> + in6_addr_get_mapped_ipv4(&garp_rarp->dst_ip));
> + } else {
> + compose_nd_ns(&packet, garp_rarp->ea, &garp_rarp->src_ip,
> + &garp_rarp->dst_ip);
> + }
> +
> + /* Inject ARP request. */
> + uint64_t ofpacts_stub[4096 / 8];
> + struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub);
> + enum ofp_version version = rconn_get_version(swconn);
> + put_load(garp_rarp->dp_key, MFF_LOG_DATAPATH, 0, 64, &ofpacts);
> + put_load(garp_rarp->port_key, MFF_LOG_OUTPORT, 0, 32, &ofpacts);
> + struct ofpact_resubmit *resubmit = ofpact_put_RESUBMIT(&ofpacts);
> + resubmit->in_port = OFPP_CONTROLLER;
> + resubmit->table_id = OFTABLE_LOCAL_OUTPUT;
> +
> + struct ofputil_packet_out po = {
> + .packet = dp_packet_data(&packet),
> + .packet_len = dp_packet_size(&packet),
> + .buffer_id = UINT32_MAX,
> + .ofpacts = ofpacts.data,
> + .ofpacts_len = ofpacts.size,
> + };
> + match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER);
> + enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
> + queue_msg(swconn, ofputil_encode_packet_out(&po, proto));
> + dp_packet_uninit(&packet);
> + ofpbuf_uninit(&ofpacts);
> +
> + /* Set the next announcement. At most 5 announcements are sent for a
> + * vif if arp_nd_max_timeout is not specified otherwise cap the max
> + * timeout to arp_nd_max_timeout. */
> + if (arp_nd_continuous || garp_rarp->backoff < arp_nd_max_timeout) {
> + garp_rarp->announce_time = current_time + garp_rarp->backoff;
> + } else {
> + garp_rarp->announce_time = LLONG_MAX;
> + }
> + garp_rarp->backoff = MIN(arp_nd_max_timeout, garp_rarp->backoff * 2);
> +
> + return garp_rarp->announce_time;
> +}
> +
> +static void
> +send_arp_nd_run(struct rconn *swconn, long long int *send_arp_nd_time)
> + OVS_REQUIRES(pinctrl_mutex)
Basically identical to send_garp_rarp_run().
> +{
> + if (shash_is_empty(&send_arp_nd_data)) {
> + return;
> + }
> +
> + /* Send ARPs, and update the next announcement. */
> + long long int current_time = time_msec();
> + *send_arp_nd_time = LLONG_MAX;
> +
> + struct shash_node *iter;
> + SHASH_FOR_EACH (iter, &send_arp_nd_data) {
> + long long int next_announce = send_arp_nd(swconn, iter->data,
> + current_time);
> + if (*send_arp_nd_time > next_announce) {
> + *send_arp_nd_time = next_announce;
> + }
> + }
> +}
> +
> /* Called by pinctrl_run(). Runs with in the main ovn-controller
> * thread context. */
> static void
> @@ -6563,6 +6753,7 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn
> *ovnsb_idl_txn,
> struct ovsdb_idl_index
> *sbrec_port_binding_by_datapath,
> struct ovsdb_idl_index *sbrec_port_binding_by_name,
> struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip,
> + const struct sbrec_ecmp_nexthop_table *ecmp_nh_table,
> const struct ovsrec_bridge *br_int,
> const struct sbrec_chassis *chassis,
> const struct hmap *local_datapaths,
> @@ -6572,10 +6763,13 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn
> *ovnsb_idl_txn,
> {
> struct sset localnet_vifs = SSET_INITIALIZER(&localnet_vifs);
> struct sset local_l3gw_ports = SSET_INITIALIZER(&local_l3gw_ports);
> + struct smap local_ecmp_nexthop_map =
> + SMAP_INITIALIZER(&local_ecmp_nexthop_map);
> struct sset nat_ip_keys = SSET_INITIALIZER(&nat_ip_keys);
> struct shash nat_addresses;
> unsigned long long garp_max_timeout = GARP_RARP_DEF_MAX_TIMEOUT;
> - bool garp_continuous = false;
> + unsigned long long max_arp_nd_timeout = GARP_RARP_DEF_MAX_TIMEOUT;
> + bool garp_continuous = false, continuous_arp_nd = true;
> const struct ovsrec_open_vswitch *cfg =
> ovsrec_open_vswitch_table_first(ovs_table);
> if (cfg) {
> @@ -6585,6 +6779,11 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn
> *ovnsb_idl_txn,
> if (!garp_max_timeout) {
> garp_max_timeout = GARP_RARP_DEF_MAX_TIMEOUT;
> }
> +
> + max_arp_nd_timeout = smap_get_ullong(
> + &cfg->external_ids, "arp-nd-max-timeout-sec",
> + GARP_RARP_DEF_MAX_TIMEOUT / 1000) * 1000;
> + continuous_arp_nd = !!max_arp_nd_timeout;
> }
>
> shash_init(&nat_addresses);
> @@ -6598,13 +6797,23 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn
> *ovnsb_idl_txn,
> &nat_ip_keys, &local_l3gw_ports,
> chassis, active_tunnels,
> &nat_addresses);
> +
> + get_local_ecmp_nexthop_map(ecmp_nh_table, sbrec_port_binding_by_name,
> + chassis, &local_ecmp_nexthop_map);
> +
If we store the port_binding reference instead of port name in the SB
like I suggested in patch 1 we don't need this map.
> /* For deleted ports and deleted nat ips, remove from
> * send_garp_rarp_data. */
> struct shash_node *iter;
> SHASH_FOR_EACH_SAFE (iter, &send_garp_rarp_data) {
> if (!sset_contains(&localnet_vifs, iter->name) &&
> !sset_contains(&nat_ip_keys, iter->name)) {
> - send_garp_rarp_delete(iter->name);
> + send_garp_rarp_delete(&send_garp_rarp_data, iter->name);
> + }
> + }
> +
> + SHASH_FOR_EACH_SAFE (iter, &send_arp_nd_data) {
> + if (!smap_get(&local_ecmp_nexthop_map, iter->name)) {
> + send_garp_rarp_delete(&send_arp_nd_data, iter->name);
> }
> }
>
> @@ -6633,10 +6842,21 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn
> *ovnsb_idl_txn,
> }
> }
>
> + struct smap_node *node;
> + SMAP_FOR_EACH (node, &local_ecmp_nexthop_map) {
> + const struct sbrec_port_binding *pb = lport_lookup_by_name(
> + sbrec_port_binding_by_name, node->value);
> + if (pb) {
> + send_arp_nd_update(pb, node->key, max_arp_nd_timeout,
> + continuous_arp_nd);
> + }
> + }
> +
> /* pinctrl_handler thread will send the GARPs. */
>
> sset_destroy(&localnet_vifs);
> sset_destroy(&local_l3gw_ports);
> + smap_destroy(&local_ecmp_nexthop_map);
>
> SHASH_FOR_EACH_SAFE (iter, &nat_addresses) {
> struct lport_addresses *laddrs = iter->data;
> @@ -6650,6 +6870,9 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn
> *ovnsb_idl_txn,
>
> garp_rarp_max_timeout = garp_max_timeout;
> garp_rarp_continuous = garp_continuous;
> +
> + arp_nd_max_timeout = max_arp_nd_timeout;
> + arp_nd_continuous = continuous_arp_nd;
> }
>
> static bool
> diff --git a/controller/pinctrl.h b/controller/pinctrl.h
> index 3462b670c..5c8ea7aea 100644
> --- a/controller/pinctrl.h
> +++ b/controller/pinctrl.h
> @@ -36,6 +36,7 @@ struct sbrec_dns_table;
> struct sbrec_controller_event_table;
> struct sbrec_service_monitor_table;
> struct sbrec_bfd_table;
> +struct sbrec_ecmp_nexthop_table;
> struct sbrec_port_binding;
> struct sbrec_mac_binding_table;
>
> @@ -54,6 +55,7 @@ void pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
> const struct sbrec_service_monitor_table *,
> const struct sbrec_mac_binding_table *,
> const struct sbrec_bfd_table *,
> + const struct sbrec_ecmp_nexthop_table *,
> const struct ovsrec_bridge *, const struct sbrec_chassis *,
> const struct hmap *local_datapaths,
> const struct sset *active_tunnels,
Regards,
Dumitru
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev