When an ARP request reaches a logical switch that is connected to
gateway routers, if the IP address in the ARP is an "unreachable"
address, then the ARP is flooded out all logical switch ports.

This can become an issue when a logical switch is connected to many
gateway logical router ports. In one particular OpenStack case, we saw a
logical switch connected to 471 gateway logical router ports. When ARP
requests are flooded to all of those logical router ports, it results in
hitting the resubmit limit in OVS.

To correct this issue, this patch treats reachable and unreachable
addresses the same when receiving an ARP request. If the address can be
traced to a particular router port, then the ARP is unicast only to that
router port. This helps to avoid hitting the resubmit ceiling.

The test "ARP flood for unreachable addresses" is removed since we no
longer flood ARPs when destined for unreachable addresses.

Reported at: https://bugzilla.redhat.com/show_bug.cgi?id=2009323

Signed-off-by: Mark Michelson <[email protected]>
---
 northd/northd.c     | 185 ++++----------------------------------------
 tests/ovn-northd.at |  99 ------------------------
 tests/ovn.at        |  10 +++
 3 files changed, 23 insertions(+), 271 deletions(-)

diff --git a/northd/northd.c b/northd/northd.c
index da4f9cd14..59286034d 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -6876,42 +6876,6 @@ build_lrouter_groups(struct hmap *ports, struct ovs_list 
*lr_list)
     }
 }
 
-/* Returns 'true' if the IPv4 'addr' is on the same subnet with one of the
- * IPs configured on the router port.
- */
-static bool
-lrouter_port_ipv4_reachable(const struct ovn_port *op, ovs_be32 addr)
-{
-    for (size_t i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
-        struct ipv4_netaddr *op_addr = &op->lrp_networks.ipv4_addrs[i];
-
-        if ((addr & op_addr->mask) == op_addr->network) {
-            return true;
-        }
-    }
-    return false;
-}
-
-/* Returns 'true' if the IPv6 'addr' is on the same subnet with one of the
- * IPs configured on the router port.
- */
-static bool
-lrouter_port_ipv6_reachable(const struct ovn_port *op,
-                            const struct in6_addr *addr)
-{
-    for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
-        struct ipv6_netaddr *op_addr = &op->lrp_networks.ipv6_addrs[i];
-
-        struct in6_addr nat_addr6_masked =
-            ipv6_addr_bitand(addr, &op_addr->mask);
-
-        if (ipv6_addr_equals(&nat_addr6_masked, &op_addr->network)) {
-            return true;
-        }
-    }
-    return false;
-}
-
 /*
  * Ingress table 22: Flows that flood self originated ARP/ND packets in the
  * switching domain.
@@ -6943,23 +6907,6 @@ build_lswitch_rport_arp_req_self_orig_flow(struct 
ovn_port *op,
         if (!nat->external_mac) {
             continue;
         }
-
-        /* Check if the ovn port has a network configured on which we could
-         * expect ARP requests/NS for the DNAT external_ip.
-         */
-        if (nat_entry_is_v6(nat_entry)) {
-            struct in6_addr *addr = &nat_entry->ext_addrs.ipv6_addrs[0].addr;
-
-            if (!lrouter_port_ipv6_reachable(op, addr)) {
-                continue;
-            }
-        } else {
-            ovs_be32 addr = nat_entry->ext_addrs.ipv4_addrs[0].addr;
-
-            if (!lrouter_port_ipv4_reachable(op, addr)) {
-                continue;
-            }
-        }
         sset_add(&all_eth_addrs, nat->external_mac);
     }
 
@@ -7012,7 +6959,7 @@ arp_nd_ns_match(const char *ips, int addr_family, struct 
ds *match)
  * switching domain as regular broadcast.
  */
 static void
-build_lswitch_rport_arp_req_flow_for_reachable_ip(const char *ips,
+build_lswitch_rport_arp_req_flow(const char *ips,
     int addr_family, struct ovn_port *patch_op, struct ovn_datapath *od,
     uint32_t priority, struct hmap *lflows,
     const struct ovsdb_idl_row *stage_hint)
@@ -7043,30 +6990,6 @@ build_lswitch_rport_arp_req_flow_for_reachable_ip(const 
char *ips,
     ds_destroy(&actions);
 }
 
-/*
- * Ingress table 22: Flows that forward ARP/ND requests for "unreachable" IPs
- * (NAT or load balancer IPs configured on a router that are outside the
- * router's configured subnets).
- * These ARP/ND packets are flooded in the switching domain as regular
- * broadcast.
- */
-static void
-build_lswitch_rport_arp_req_flow_for_unreachable_ip(const char *ips,
-    int addr_family, struct ovn_datapath *od, uint32_t priority,
-    struct hmap *lflows, const struct ovsdb_idl_row *stage_hint)
-{
-    struct ds match = DS_EMPTY_INITIALIZER;
-
-    arp_nd_ns_match(ips, addr_family, &match);
-
-    ovn_lflow_add_with_hint(lflows, od, S_SWITCH_IN_L2_LKUP,
-                            priority, ds_cstr(&match),
-                            "outport = \""MC_FLOOD"\"; output;",
-                            stage_hint);
-
-    ds_destroy(&match);
-}
-
 /*
  * Ingress table 22: Flows that forward ARP/ND requests only to the routers
  * that own the addresses.
@@ -7101,9 +7024,8 @@ build_lswitch_rport_arp_req_flows(struct ovn_port *op,
         /* Check if the ovn port has a network configured on which we could
          * expect ARP requests for the LB VIP.
          */
-        if (ip_parse(ip_addr, &ipv4_addr) &&
-            lrouter_port_ipv4_reachable(op, ipv4_addr)) {
-            build_lswitch_rport_arp_req_flow_for_reachable_ip(
+        if (ip_parse(ip_addr, &ipv4_addr)) {
+            build_lswitch_rport_arp_req_flow(
                 ip_addr, AF_INET, sw_op, sw_od, 80, lflows,
                 stage_hint);
         }
@@ -7114,9 +7036,8 @@ build_lswitch_rport_arp_req_flows(struct ovn_port *op,
         /* Check if the ovn port has a network configured on which we could
          * expect NS requests for the LB VIP.
          */
-        if (ipv6_parse(ip_addr, &ipv6_addr) &&
-            lrouter_port_ipv6_reachable(op, &ipv6_addr)) {
-            build_lswitch_rport_arp_req_flow_for_reachable_ip(
+        if (ipv6_parse(ip_addr, &ipv6_addr)) {
+            build_lswitch_rport_arp_req_flow(
                 ip_addr, AF_INET6, sw_op, sw_od, 80, lflows,
                 stage_hint);
         }
@@ -7138,42 +7059,27 @@ build_lswitch_rport_arp_req_flows(struct ovn_port *op,
          * expect ARP requests/NS for the DNAT external_ip.
          */
         if (nat_entry_is_v6(nat_entry)) {
-            struct in6_addr *addr = &nat_entry->ext_addrs.ipv6_addrs[0].addr;
-
             if (!sset_contains(&op->od->lb_ips_v6, nat->external_ip)) {
-                if (lrouter_port_ipv6_reachable(op, addr)) {
-                    build_lswitch_rport_arp_req_flow_for_reachable_ip(
-                        nat->external_ip, AF_INET6, sw_op, sw_od, 80, lflows,
-                        stage_hint);
-                } else {
-                    build_lswitch_rport_arp_req_flow_for_unreachable_ip(
-                        nat->external_ip, AF_INET6, sw_od, 90, lflows,
-                        stage_hint);
-                }
+                build_lswitch_rport_arp_req_flow(
+                    nat->external_ip, AF_INET6, sw_op, sw_od, 80, lflows,
+                    stage_hint);
             }
         } else {
-            ovs_be32 addr = nat_entry->ext_addrs.ipv4_addrs[0].addr;
             if (!sset_contains(&op->od->lb_ips_v4, nat->external_ip)) {
-                if (lrouter_port_ipv4_reachable(op, addr)) {
-                    build_lswitch_rport_arp_req_flow_for_reachable_ip(
-                        nat->external_ip, AF_INET, sw_op, sw_od, 80, lflows,
-                        stage_hint);
-                } else {
-                    build_lswitch_rport_arp_req_flow_for_unreachable_ip(
-                        nat->external_ip, AF_INET, sw_od, 90, lflows,
-                        stage_hint);
-                }
+                build_lswitch_rport_arp_req_flow(
+                    nat->external_ip, AF_INET, sw_op, sw_od, 80, lflows,
+                    stage_hint);
             }
         }
     }
 
     for (size_t i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
-        build_lswitch_rport_arp_req_flow_for_reachable_ip(
+        build_lswitch_rport_arp_req_flow(
             op->lrp_networks.ipv4_addrs[i].addr_s, AF_INET, sw_op, sw_od, 80,
             lflows, stage_hint);
     }
     for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
-        build_lswitch_rport_arp_req_flow_for_reachable_ip(
+        build_lswitch_rport_arp_req_flow(
             op->lrp_networks.ipv6_addrs[i].addr_s, AF_INET6, sw_op, sw_od, 80,
             lflows, stage_hint);
     }
@@ -9665,69 +9571,6 @@ build_lrouter_defrag_flows_for_lb(struct ovn_northd_lb 
*lb,
     ds_destroy(&defrag_actions);
 }
 
-static void
-build_lflows_for_unreachable_vips(struct ovn_northd_lb *lb,
-                                  struct ovn_lb_vip *lb_vip,
-                                  struct hmap *lflows,
-                                  struct ds *match)
-{
-    static const char *action = "outport = \"_MC_flood\"; output;";
-    bool ipv4 = IN6_IS_ADDR_V4MAPPED(&lb_vip->vip);
-    ovs_be32 ipv4_addr;
-
-    ds_clear(match);
-    if (ipv4) {
-        if (!ip_parse(lb_vip->vip_str, &ipv4_addr)) {
-            return;
-        }
-        ds_put_format(match, "%s && arp.op == 1 && arp.tpa == %s",
-                      FLAGBIT_NOT_VXLAN, lb_vip->vip_str);
-    } else {
-        ds_put_format(match, "%s && nd_ns && nd.target == %s",
-                      FLAGBIT_NOT_VXLAN, lb_vip->vip_str);
-    }
-
-    struct ovn_lflow *lflow_ref = NULL;
-    uint32_t hash = ovn_logical_flow_hash(
-            ovn_stage_get_table(S_SWITCH_IN_L2_LKUP),
-            ovn_stage_get_pipeline(S_SWITCH_IN_L2_LKUP), 90,
-            ds_cstr(match), action);
-
-    for (size_t i = 0; i < lb->n_nb_lr; i++) {
-        struct ovn_datapath *od = lb->nb_lr[i];
-
-        if (!od->is_gw_router && !od->n_l3dgw_ports) {
-            continue;
-        }
-
-        struct ovn_port *op;
-        LIST_FOR_EACH (op, dp_node, &od->port_list) {
-            if (!od->is_gw_router && !is_l3dgw_port(op)) {
-                continue;
-            }
-
-            struct ovn_port *peer = op->peer;
-            if (!peer || !peer->nbsp || lsp_is_external(peer->nbsp)) {
-                continue;
-            }
-
-            if ((ipv4 && lrouter_port_ipv4_reachable(op, ipv4_addr)) ||
-                (!ipv4 && lrouter_port_ipv6_reachable(op, &lb_vip->vip))) {
-                continue;
-            }
-
-            if (ovn_dp_group_add_with_reference(lflow_ref, peer->od)) {
-                continue;
-            }
-            lflow_ref = ovn_lflow_add_at_with_hash(lflows, peer->od,
-                                       S_SWITCH_IN_L2_LKUP, 90,
-                                       ds_cstr(match), action,
-                                       NULL, NULL, &peer->nbsp->header_,
-                                       OVS_SOURCE_LOCATOR, hash);
-        }
-    }
-}
-
 static void
 build_lrouter_flows_for_lb(struct ovn_northd_lb *lb, struct hmap *lflows,
                            struct shash *meter_groups, struct ds *match,
@@ -9740,8 +9583,6 @@ build_lrouter_flows_for_lb(struct ovn_northd_lb *lb, 
struct hmap *lflows,
     for (size_t i = 0; i < lb->n_vips; i++) {
         struct ovn_lb_vip *lb_vip = &lb->vips[i];
 
-        build_lflows_for_unreachable_vips(lb, lb_vip, lflows, match);
-
         build_lrouter_nat_flows_for_lb(lb_vip, lb, &lb->vips_nb[i],
                                        lflows, match, action,
                                        meter_groups);
diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
index e2b9924b6..9d5d21316 100644
--- a/tests/ovn-northd.at
+++ b/tests/ovn-northd.at
@@ -4479,105 +4479,6 @@ check_lflows 0
 
 AT_CLEANUP
 
-OVN_FOR_EACH_NORTHD([
-AT_SETUP([ovn -- ARP flood for unreachable addresses])
-ovn_start
-
-AS_BOX([Setting up the logical network])
-
-# This network is the same as the one from "Router Address Propagation"
-check ovn-nbctl ls-add sw
-
-check ovn-nbctl lr-add ro1
-check ovn-nbctl lrp-add ro1 ro1-sw 00:00:00:00:00:01 10.0.0.1/24
-check ovn-nbctl lsp-add sw sw-ro1
-check ovn-nbctl lsp-set-type sw-ro1 router
-check ovn-nbctl lsp-set-addresses sw-ro1 router
-check ovn-nbctl lsp-set-options sw-ro1 router-port=ro1-sw
-
-check ovn-nbctl lr-add ro2
-check ovn-nbctl lrp-add ro2 ro2-sw 00:00:00:00:00:02 20.0.0.1/24
-check ovn-nbctl lsp-add sw sw-ro2
-check ovn-nbctl lsp-set-type sw-ro2 router
-check ovn-nbctl lsp-set-addresses sw-ro2 router
-check ovn-nbctl --wait=sb lsp-set-options sw-ro2 router-port=ro2-sw
-
-check ovn-nbctl ls-add ls1
-check ovn-nbctl lsp-add ls1 vm1
-check ovn-nbctl lsp-set-addresses vm1 "00:00:00:00:01:02 192.168.1.2"
-check ovn-nbctl lrp-add ro1 ro1-ls1 00:00:00:00:01:01 192.168.1.1/24
-check ovn-nbctl lsp-add ls1 ls1-ro1
-check ovn-nbctl lsp-set-type ls1-ro1 router
-check ovn-nbctl lsp-set-addresses ls1-ro1 router
-check ovn-nbctl lsp-set-options ls1-ro1 router-port=ro1-ls1
-
-check ovn-nbctl ls-add ls2
-check ovn-nbctl lsp-add ls2 vm2
-check ovn-nbctl lsp-set-addresses vm2 "00:00:00:00:02:02 192.168.2.2"
-check ovn-nbctl lrp-add ro2 ro2-ls2 00:00:00:00:02:01 192.168.2.1/24
-check ovn-nbctl lsp-add ls2 ls2-ro2
-check ovn-nbctl lsp-set-type ls2-ro2 router
-check ovn-nbctl lsp-set-addresses ls2-ro2 router
-check ovn-nbctl lsp-set-options ls2-ro2 router-port=ro2-ls2
-
-AS_BOX([Ensure that unreachable flood flows are not installed, since no 
addresses are unreachable])
-
-AT_CHECK([ovn-sbctl lflow-list sw | grep "ls_in_l2_lkup" | grep "priority=90" 
-c], [1], [dnl
-0
-])
-
-AS_BOX([Adding some reachable NAT addresses])
-
-check ovn-nbctl lr-nat-add ro1 dnat 10.0.0.100 192.168.1.100
-check ovn-nbctl lr-nat-add ro1 snat 10.0.0.200 192.168.1.200/30
-
-check ovn-nbctl lr-nat-add ro2 dnat 20.0.0.100 192.168.2.100
-check ovn-nbctl --wait=sb lr-nat-add ro2 snat 20.0.0.200 192.168.2.200/30
-
-AS_BOX([Ensure that unreachable flood flows are not installed, since all 
addresses are reachable])
-
-AT_CHECK([ovn-sbctl lflow-list sw | grep "ls_in_l2_lkup" | grep "priority=90" 
-c], [1], [dnl
-0
-])
-
-AS_BOX([Adding some unreachable NAT addresses])
-
-check ovn-nbctl lr-nat-add ro1 dnat 30.0.0.100 192.168.1.130
-check ovn-nbctl lr-nat-add ro1 snat 30.0.0.200 192.168.1.148/30
-
-check ovn-nbctl lr-nat-add ro2 dnat 40.0.0.100 192.168.2.130
-check ovn-nbctl --wait=sb lr-nat-add ro2 snat 40.0.0.200 192.168.2.148/30
-
-AS_BOX([Ensure that unreachable flood flows are installed, since there are 
unreachable addresses])
-
-ovn-sbctl lflow-list
-
-# We expect two flows to be installed, one per connected router port on sw
-AT_CHECK([ovn-sbctl lflow-list sw | grep ls_in_l2_lkup | grep priority=90 -c], 
[0], [dnl
-2
-])
-
-# We expect that the installed flows will match the unreachable DNAT addresses 
only.
-AT_CHECK([ovn-sbctl lflow-list sw | grep ls_in_l2_lkup | grep priority=90 | 
grep "arp.tpa == 30.0.0.100" -c], [0], [dnl
-1
-])
-
-AT_CHECK([ovn-sbctl lflow-list sw | grep ls_in_l2_lkup | grep priority=90 | 
grep "arp.tpa == 40.0.0.100" -c], [0], [dnl
-1
-])
-
-# Ensure that we do not create flows for SNAT addresses
-AT_CHECK([ovn-sbctl lflow-list sw | grep ls_in_l2_lkup | grep priority=90 | 
grep "arp.tpa == 30.0.0.200" -c], [1], [dnl
-0
-])
-
-AT_CHECK([ovn-sbctl lflow-list sw | grep ls_in_l2_lkup | grep priority=90 | 
grep "arp.tpa == 40.0.0.200" -c], [1], [dnl
-0
-])
-
-AT_CLEANUP
-])
-
 OVN_FOR_EACH_NORTHD([
 AT_SETUP([ovn -- LR NAT flows])
 ovn_start
diff --git a/tests/ovn.at b/tests/ovn.at
index 7cfdede77..dd043987a 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -22131,9 +22131,17 @@ nd_target=fe80::200:ff:fe00:200
 # - 20.0.0.1, 20::1, fe80::200:1ff:fe00:0 - interface IPs.
 as hv1
 AT_CHECK([ovs-ofctl dump-flows br-int | grep -E 
"priority=80,.*${match_sw1_metadata}" | grep -oE "arp_tpa=[[0-9.]]+" | sort], 
[0], [dnl
+arp_tpa=10.0.0.11
+arp_tpa=10.0.0.111
+arp_tpa=10.0.0.121
+arp_tpa=10.0.0.122
 arp_tpa=20.0.0.1
 ])
 AT_CHECK([ovs-ofctl dump-flows br-int | grep -E 
"priority=80,.*${match_sw1_metadata}" | grep -oE "nd_target=[[0-9a-f:]]+" | 
sort], [0], [dnl
+nd_target=10::11
+nd_target=10::111
+nd_target=10::121
+nd_target=10::122
 nd_target=20::1
 nd_target=fe80::200:1ff:fe00:0
 ])
@@ -22162,6 +22170,8 @@ dl_src=00:00:00:02:00:00
 # - 00:00:01:00:00:00 - interface MAC.
 as hv1
 AT_CHECK([ovs-ofctl dump-flows br-int | grep -E 
"priority=75,.*${match_sw1_metadata}.*arp_op=1" | grep -oE 
"dl_src=[[0-9a-f:]]+" | sort], [0], [dnl
+dl_src=00:00:00:01:00:00
+dl_src=00:00:00:02:00:00
 dl_src=00:00:01:00:00:00
 ])
 
-- 
2.31.1

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to