When an ARP request reaches a logical switch that is connected to gateway routers, if the IP address in the ARP is an "unreachable" address, then the ARP is flooded out all logical switch ports.
This can become an issue when a logical switch is connected to many gateway logical router ports. In one particular OpenStack case, we saw a logical switch connected to 471 gateway logical router ports. When ARP requests are flooded to all of those logical router ports, it results in hitting the resubmit limit in OVS. To correct this issue, this patch treats reachable and unreachable addresses the same when receiving an ARP request. If the address can be traced to a particular router port, then the ARP is unicast only to that router port. This helps to avoid hitting the resubmit ceiling. The test "ARP flood for unreachable addresses" is removed since we no longer flood ARPs when destined for unreachable addresses. Reported at: https://bugzilla.redhat.com/show_bug.cgi?id=2009323 Signed-off-by: Mark Michelson <[email protected]> --- northd/northd.c | 185 ++++---------------------------------------- tests/ovn-northd.at | 99 ------------------------ tests/ovn.at | 10 +++ 3 files changed, 23 insertions(+), 271 deletions(-) diff --git a/northd/northd.c b/northd/northd.c index da4f9cd14..59286034d 100644 --- a/northd/northd.c +++ b/northd/northd.c @@ -6876,42 +6876,6 @@ build_lrouter_groups(struct hmap *ports, struct ovs_list *lr_list) } } -/* Returns 'true' if the IPv4 'addr' is on the same subnet with one of the - * IPs configured on the router port. - */ -static bool -lrouter_port_ipv4_reachable(const struct ovn_port *op, ovs_be32 addr) -{ - for (size_t i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { - struct ipv4_netaddr *op_addr = &op->lrp_networks.ipv4_addrs[i]; - - if ((addr & op_addr->mask) == op_addr->network) { - return true; - } - } - return false; -} - -/* Returns 'true' if the IPv6 'addr' is on the same subnet with one of the - * IPs configured on the router port. - */ -static bool -lrouter_port_ipv6_reachable(const struct ovn_port *op, - const struct in6_addr *addr) -{ - for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { - struct ipv6_netaddr *op_addr = &op->lrp_networks.ipv6_addrs[i]; - - struct in6_addr nat_addr6_masked = - ipv6_addr_bitand(addr, &op_addr->mask); - - if (ipv6_addr_equals(&nat_addr6_masked, &op_addr->network)) { - return true; - } - } - return false; -} - /* * Ingress table 22: Flows that flood self originated ARP/ND packets in the * switching domain. @@ -6943,23 +6907,6 @@ build_lswitch_rport_arp_req_self_orig_flow(struct ovn_port *op, if (!nat->external_mac) { continue; } - - /* Check if the ovn port has a network configured on which we could - * expect ARP requests/NS for the DNAT external_ip. - */ - if (nat_entry_is_v6(nat_entry)) { - struct in6_addr *addr = &nat_entry->ext_addrs.ipv6_addrs[0].addr; - - if (!lrouter_port_ipv6_reachable(op, addr)) { - continue; - } - } else { - ovs_be32 addr = nat_entry->ext_addrs.ipv4_addrs[0].addr; - - if (!lrouter_port_ipv4_reachable(op, addr)) { - continue; - } - } sset_add(&all_eth_addrs, nat->external_mac); } @@ -7012,7 +6959,7 @@ arp_nd_ns_match(const char *ips, int addr_family, struct ds *match) * switching domain as regular broadcast. */ static void -build_lswitch_rport_arp_req_flow_for_reachable_ip(const char *ips, +build_lswitch_rport_arp_req_flow(const char *ips, int addr_family, struct ovn_port *patch_op, struct ovn_datapath *od, uint32_t priority, struct hmap *lflows, const struct ovsdb_idl_row *stage_hint) @@ -7043,30 +6990,6 @@ build_lswitch_rport_arp_req_flow_for_reachable_ip(const char *ips, ds_destroy(&actions); } -/* - * Ingress table 22: Flows that forward ARP/ND requests for "unreachable" IPs - * (NAT or load balancer IPs configured on a router that are outside the - * router's configured subnets). - * These ARP/ND packets are flooded in the switching domain as regular - * broadcast. - */ -static void -build_lswitch_rport_arp_req_flow_for_unreachable_ip(const char *ips, - int addr_family, struct ovn_datapath *od, uint32_t priority, - struct hmap *lflows, const struct ovsdb_idl_row *stage_hint) -{ - struct ds match = DS_EMPTY_INITIALIZER; - - arp_nd_ns_match(ips, addr_family, &match); - - ovn_lflow_add_with_hint(lflows, od, S_SWITCH_IN_L2_LKUP, - priority, ds_cstr(&match), - "outport = \""MC_FLOOD"\"; output;", - stage_hint); - - ds_destroy(&match); -} - /* * Ingress table 22: Flows that forward ARP/ND requests only to the routers * that own the addresses. @@ -7101,9 +7024,8 @@ build_lswitch_rport_arp_req_flows(struct ovn_port *op, /* Check if the ovn port has a network configured on which we could * expect ARP requests for the LB VIP. */ - if (ip_parse(ip_addr, &ipv4_addr) && - lrouter_port_ipv4_reachable(op, ipv4_addr)) { - build_lswitch_rport_arp_req_flow_for_reachable_ip( + if (ip_parse(ip_addr, &ipv4_addr)) { + build_lswitch_rport_arp_req_flow( ip_addr, AF_INET, sw_op, sw_od, 80, lflows, stage_hint); } @@ -7114,9 +7036,8 @@ build_lswitch_rport_arp_req_flows(struct ovn_port *op, /* Check if the ovn port has a network configured on which we could * expect NS requests for the LB VIP. */ - if (ipv6_parse(ip_addr, &ipv6_addr) && - lrouter_port_ipv6_reachable(op, &ipv6_addr)) { - build_lswitch_rport_arp_req_flow_for_reachable_ip( + if (ipv6_parse(ip_addr, &ipv6_addr)) { + build_lswitch_rport_arp_req_flow( ip_addr, AF_INET6, sw_op, sw_od, 80, lflows, stage_hint); } @@ -7138,42 +7059,27 @@ build_lswitch_rport_arp_req_flows(struct ovn_port *op, * expect ARP requests/NS for the DNAT external_ip. */ if (nat_entry_is_v6(nat_entry)) { - struct in6_addr *addr = &nat_entry->ext_addrs.ipv6_addrs[0].addr; - if (!sset_contains(&op->od->lb_ips_v6, nat->external_ip)) { - if (lrouter_port_ipv6_reachable(op, addr)) { - build_lswitch_rport_arp_req_flow_for_reachable_ip( - nat->external_ip, AF_INET6, sw_op, sw_od, 80, lflows, - stage_hint); - } else { - build_lswitch_rport_arp_req_flow_for_unreachable_ip( - nat->external_ip, AF_INET6, sw_od, 90, lflows, - stage_hint); - } + build_lswitch_rport_arp_req_flow( + nat->external_ip, AF_INET6, sw_op, sw_od, 80, lflows, + stage_hint); } } else { - ovs_be32 addr = nat_entry->ext_addrs.ipv4_addrs[0].addr; if (!sset_contains(&op->od->lb_ips_v4, nat->external_ip)) { - if (lrouter_port_ipv4_reachable(op, addr)) { - build_lswitch_rport_arp_req_flow_for_reachable_ip( - nat->external_ip, AF_INET, sw_op, sw_od, 80, lflows, - stage_hint); - } else { - build_lswitch_rport_arp_req_flow_for_unreachable_ip( - nat->external_ip, AF_INET, sw_od, 90, lflows, - stage_hint); - } + build_lswitch_rport_arp_req_flow( + nat->external_ip, AF_INET, sw_op, sw_od, 80, lflows, + stage_hint); } } } for (size_t i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { - build_lswitch_rport_arp_req_flow_for_reachable_ip( + build_lswitch_rport_arp_req_flow( op->lrp_networks.ipv4_addrs[i].addr_s, AF_INET, sw_op, sw_od, 80, lflows, stage_hint); } for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { - build_lswitch_rport_arp_req_flow_for_reachable_ip( + build_lswitch_rport_arp_req_flow( op->lrp_networks.ipv6_addrs[i].addr_s, AF_INET6, sw_op, sw_od, 80, lflows, stage_hint); } @@ -9665,69 +9571,6 @@ build_lrouter_defrag_flows_for_lb(struct ovn_northd_lb *lb, ds_destroy(&defrag_actions); } -static void -build_lflows_for_unreachable_vips(struct ovn_northd_lb *lb, - struct ovn_lb_vip *lb_vip, - struct hmap *lflows, - struct ds *match) -{ - static const char *action = "outport = \"_MC_flood\"; output;"; - bool ipv4 = IN6_IS_ADDR_V4MAPPED(&lb_vip->vip); - ovs_be32 ipv4_addr; - - ds_clear(match); - if (ipv4) { - if (!ip_parse(lb_vip->vip_str, &ipv4_addr)) { - return; - } - ds_put_format(match, "%s && arp.op == 1 && arp.tpa == %s", - FLAGBIT_NOT_VXLAN, lb_vip->vip_str); - } else { - ds_put_format(match, "%s && nd_ns && nd.target == %s", - FLAGBIT_NOT_VXLAN, lb_vip->vip_str); - } - - struct ovn_lflow *lflow_ref = NULL; - uint32_t hash = ovn_logical_flow_hash( - ovn_stage_get_table(S_SWITCH_IN_L2_LKUP), - ovn_stage_get_pipeline(S_SWITCH_IN_L2_LKUP), 90, - ds_cstr(match), action); - - for (size_t i = 0; i < lb->n_nb_lr; i++) { - struct ovn_datapath *od = lb->nb_lr[i]; - - if (!od->is_gw_router && !od->n_l3dgw_ports) { - continue; - } - - struct ovn_port *op; - LIST_FOR_EACH (op, dp_node, &od->port_list) { - if (!od->is_gw_router && !is_l3dgw_port(op)) { - continue; - } - - struct ovn_port *peer = op->peer; - if (!peer || !peer->nbsp || lsp_is_external(peer->nbsp)) { - continue; - } - - if ((ipv4 && lrouter_port_ipv4_reachable(op, ipv4_addr)) || - (!ipv4 && lrouter_port_ipv6_reachable(op, &lb_vip->vip))) { - continue; - } - - if (ovn_dp_group_add_with_reference(lflow_ref, peer->od)) { - continue; - } - lflow_ref = ovn_lflow_add_at_with_hash(lflows, peer->od, - S_SWITCH_IN_L2_LKUP, 90, - ds_cstr(match), action, - NULL, NULL, &peer->nbsp->header_, - OVS_SOURCE_LOCATOR, hash); - } - } -} - static void build_lrouter_flows_for_lb(struct ovn_northd_lb *lb, struct hmap *lflows, struct shash *meter_groups, struct ds *match, @@ -9740,8 +9583,6 @@ build_lrouter_flows_for_lb(struct ovn_northd_lb *lb, struct hmap *lflows, for (size_t i = 0; i < lb->n_vips; i++) { struct ovn_lb_vip *lb_vip = &lb->vips[i]; - build_lflows_for_unreachable_vips(lb, lb_vip, lflows, match); - build_lrouter_nat_flows_for_lb(lb_vip, lb, &lb->vips_nb[i], lflows, match, action, meter_groups); diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at index e2b9924b6..9d5d21316 100644 --- a/tests/ovn-northd.at +++ b/tests/ovn-northd.at @@ -4479,105 +4479,6 @@ check_lflows 0 AT_CLEANUP -OVN_FOR_EACH_NORTHD([ -AT_SETUP([ovn -- ARP flood for unreachable addresses]) -ovn_start - -AS_BOX([Setting up the logical network]) - -# This network is the same as the one from "Router Address Propagation" -check ovn-nbctl ls-add sw - -check ovn-nbctl lr-add ro1 -check ovn-nbctl lrp-add ro1 ro1-sw 00:00:00:00:00:01 10.0.0.1/24 -check ovn-nbctl lsp-add sw sw-ro1 -check ovn-nbctl lsp-set-type sw-ro1 router -check ovn-nbctl lsp-set-addresses sw-ro1 router -check ovn-nbctl lsp-set-options sw-ro1 router-port=ro1-sw - -check ovn-nbctl lr-add ro2 -check ovn-nbctl lrp-add ro2 ro2-sw 00:00:00:00:00:02 20.0.0.1/24 -check ovn-nbctl lsp-add sw sw-ro2 -check ovn-nbctl lsp-set-type sw-ro2 router -check ovn-nbctl lsp-set-addresses sw-ro2 router -check ovn-nbctl --wait=sb lsp-set-options sw-ro2 router-port=ro2-sw - -check ovn-nbctl ls-add ls1 -check ovn-nbctl lsp-add ls1 vm1 -check ovn-nbctl lsp-set-addresses vm1 "00:00:00:00:01:02 192.168.1.2" -check ovn-nbctl lrp-add ro1 ro1-ls1 00:00:00:00:01:01 192.168.1.1/24 -check ovn-nbctl lsp-add ls1 ls1-ro1 -check ovn-nbctl lsp-set-type ls1-ro1 router -check ovn-nbctl lsp-set-addresses ls1-ro1 router -check ovn-nbctl lsp-set-options ls1-ro1 router-port=ro1-ls1 - -check ovn-nbctl ls-add ls2 -check ovn-nbctl lsp-add ls2 vm2 -check ovn-nbctl lsp-set-addresses vm2 "00:00:00:00:02:02 192.168.2.2" -check ovn-nbctl lrp-add ro2 ro2-ls2 00:00:00:00:02:01 192.168.2.1/24 -check ovn-nbctl lsp-add ls2 ls2-ro2 -check ovn-nbctl lsp-set-type ls2-ro2 router -check ovn-nbctl lsp-set-addresses ls2-ro2 router -check ovn-nbctl lsp-set-options ls2-ro2 router-port=ro2-ls2 - -AS_BOX([Ensure that unreachable flood flows are not installed, since no addresses are unreachable]) - -AT_CHECK([ovn-sbctl lflow-list sw | grep "ls_in_l2_lkup" | grep "priority=90" -c], [1], [dnl -0 -]) - -AS_BOX([Adding some reachable NAT addresses]) - -check ovn-nbctl lr-nat-add ro1 dnat 10.0.0.100 192.168.1.100 -check ovn-nbctl lr-nat-add ro1 snat 10.0.0.200 192.168.1.200/30 - -check ovn-nbctl lr-nat-add ro2 dnat 20.0.0.100 192.168.2.100 -check ovn-nbctl --wait=sb lr-nat-add ro2 snat 20.0.0.200 192.168.2.200/30 - -AS_BOX([Ensure that unreachable flood flows are not installed, since all addresses are reachable]) - -AT_CHECK([ovn-sbctl lflow-list sw | grep "ls_in_l2_lkup" | grep "priority=90" -c], [1], [dnl -0 -]) - -AS_BOX([Adding some unreachable NAT addresses]) - -check ovn-nbctl lr-nat-add ro1 dnat 30.0.0.100 192.168.1.130 -check ovn-nbctl lr-nat-add ro1 snat 30.0.0.200 192.168.1.148/30 - -check ovn-nbctl lr-nat-add ro2 dnat 40.0.0.100 192.168.2.130 -check ovn-nbctl --wait=sb lr-nat-add ro2 snat 40.0.0.200 192.168.2.148/30 - -AS_BOX([Ensure that unreachable flood flows are installed, since there are unreachable addresses]) - -ovn-sbctl lflow-list - -# We expect two flows to be installed, one per connected router port on sw -AT_CHECK([ovn-sbctl lflow-list sw | grep ls_in_l2_lkup | grep priority=90 -c], [0], [dnl -2 -]) - -# We expect that the installed flows will match the unreachable DNAT addresses only. -AT_CHECK([ovn-sbctl lflow-list sw | grep ls_in_l2_lkup | grep priority=90 | grep "arp.tpa == 30.0.0.100" -c], [0], [dnl -1 -]) - -AT_CHECK([ovn-sbctl lflow-list sw | grep ls_in_l2_lkup | grep priority=90 | grep "arp.tpa == 40.0.0.100" -c], [0], [dnl -1 -]) - -# Ensure that we do not create flows for SNAT addresses -AT_CHECK([ovn-sbctl lflow-list sw | grep ls_in_l2_lkup | grep priority=90 | grep "arp.tpa == 30.0.0.200" -c], [1], [dnl -0 -]) - -AT_CHECK([ovn-sbctl lflow-list sw | grep ls_in_l2_lkup | grep priority=90 | grep "arp.tpa == 40.0.0.200" -c], [1], [dnl -0 -]) - -AT_CLEANUP -]) - OVN_FOR_EACH_NORTHD([ AT_SETUP([ovn -- LR NAT flows]) ovn_start diff --git a/tests/ovn.at b/tests/ovn.at index 7cfdede77..dd043987a 100644 --- a/tests/ovn.at +++ b/tests/ovn.at @@ -22131,9 +22131,17 @@ nd_target=fe80::200:ff:fe00:200 # - 20.0.0.1, 20::1, fe80::200:1ff:fe00:0 - interface IPs. as hv1 AT_CHECK([ovs-ofctl dump-flows br-int | grep -E "priority=80,.*${match_sw1_metadata}" | grep -oE "arp_tpa=[[0-9.]]+" | sort], [0], [dnl +arp_tpa=10.0.0.11 +arp_tpa=10.0.0.111 +arp_tpa=10.0.0.121 +arp_tpa=10.0.0.122 arp_tpa=20.0.0.1 ]) AT_CHECK([ovs-ofctl dump-flows br-int | grep -E "priority=80,.*${match_sw1_metadata}" | grep -oE "nd_target=[[0-9a-f:]]+" | sort], [0], [dnl +nd_target=10::11 +nd_target=10::111 +nd_target=10::121 +nd_target=10::122 nd_target=20::1 nd_target=fe80::200:1ff:fe00:0 ]) @@ -22162,6 +22170,8 @@ dl_src=00:00:00:02:00:00 # - 00:00:01:00:00:00 - interface MAC. as hv1 AT_CHECK([ovs-ofctl dump-flows br-int | grep -E "priority=75,.*${match_sw1_metadata}.*arp_op=1" | grep -oE "dl_src=[[0-9a-f:]]+" | sort], [0], [dnl +dl_src=00:00:00:01:00:00 +dl_src=00:00:00:02:00:00 dl_src=00:00:01:00:00:00 ]) -- 2.31.1 _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
