Logical flows that limit the ARP/NS broadcast domain on a logical switch
should only match on ARP requests/NS for IPs that can actually be
replied to on the connected router port (i.e., an IP on the same network
is configured on the router port).

Reported-by: Girish Moodalbail <[email protected]>
Reported-at: 
https://mail.openvswitch.org/pipermail/ovs-discuss/2020-June/050287.html
Fixes: 32f5ebb06226 ("ovn-northd: Limit ARP/ND broadcast domain whenever 
possible.")
Signed-off-by: Dumitru Ceara <[email protected]>
---
 northd/ovn-northd.c |  164 ++++++++++++++++++++++++++++++++++++++++++---------
 tests/ovn.at        |   74 +++++++++++++++++++++++
 2 files changed, 210 insertions(+), 28 deletions(-)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 4c23158..e2e875a 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -6091,6 +6091,44 @@ build_lrouter_groups(struct hmap *ports, struct ovs_list 
*lr_list)
     }
 }
 
+/* Returns 'true' if the IPv4 'addr' is on the same subnet with one of the
+ * IPs configured on the router port.
+ */
+static bool
+lrouter_port_ipv4_reachable(const struct ovn_port *op, ovs_be32 addr)
+{
+    for (size_t i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
+        struct ipv4_netaddr *op_addr = &op->lrp_networks.ipv4_addrs[i];
+
+        if ((op_addr->addr & op_addr->mask) == (addr & op_addr->mask)) {
+            return true;
+        }
+    }
+    return false;
+}
+
+/* Returns 'true' if the IPv6 'addr' is on the same subnet with one of the
+ * IPs configured on the router port.
+ */
+static bool
+lrouter_port_ipv6_reachable(const struct ovn_port *op,
+                            const struct in6_addr *addr)
+{
+    for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
+        struct ipv6_netaddr *op_addr = &op->lrp_networks.ipv6_addrs[i];
+
+        struct in6_addr op_addr6_masked =
+            ipv6_addr_bitand(&op_addr->addr, &op_addr->mask);
+        struct in6_addr nat_addr6_masked =
+            ipv6_addr_bitand(addr, &op_addr->mask);
+
+        if (ipv6_addr_equals(&op_addr6_masked, &nat_addr6_masked)) {
+            return true;
+        }
+    }
+    return false;
+}
+
 /*
  * Ingress table 19: Flows that flood self originated ARP/ND packets in the
  * switching domain.
@@ -6101,8 +6139,47 @@ build_lswitch_rport_arp_req_self_orig_flow(struct 
ovn_port *op,
                                            struct ovn_datapath *od,
                                            struct hmap *lflows)
 {
-    struct ds match = DS_EMPTY_INITIALIZER;
+    struct sset all_eth_addrs = SSET_INITIALIZER(&all_eth_addrs);
     struct ds eth_src = DS_EMPTY_INITIALIZER;
+    struct ds match = DS_EMPTY_INITIALIZER;
+
+    sset_add(&all_eth_addrs, op->lrp_networks.ea_s);
+
+    for (size_t i = 0; i < op->od->nbr->n_nat; i++) {
+        struct ovn_nat *nat_entry = &op->od->nat_entries[i];
+        const struct nbrec_nat *nat = nat_entry->nb;
+
+        if (!nat_entry_is_valid(nat_entry)) {
+            continue;
+        }
+
+        if (!strcmp(nat->type, "snat")) {
+            continue;
+        }
+
+        if (!nat->external_mac) {
+            continue;
+        }
+
+        /* Check if there's a network configured on op on which we could
+         * expect ARP requests/NS for the DNAT external_ip.
+         */
+        if (nat_entry_is_v6(nat_entry)) {
+            struct in6_addr *addr = &nat_entry->ext_addrs.ipv6_addrs[0].addr;
+
+            if (!lrouter_port_ipv6_reachable(op, addr)) {
+                continue;
+            }
+        } else {
+            ovs_be32 addr = nat_entry->ext_addrs.ipv4_addrs[0].addr;
+
+            if (!lrouter_port_ipv4_reachable(op, addr)) {
+                continue;
+            }
+        }
+        sset_add(&all_eth_addrs, nat->external_mac);
+    }
+
 
     /* Self originated (G)ARP requests/ND need to be flooded as usual.
      * Determine that packets are self originated by also matching on
@@ -6110,15 +6187,11 @@ build_lswitch_rport_arp_req_self_orig_flow(struct 
ovn_port *op,
      * is a VLAN-backed network.
      * Priority: 80.
      */
-    ds_put_format(&eth_src, "{ %s, ", op->lrp_networks.ea_s);
-    for (size_t i = 0; i < op->od->nbr->n_nat; i++) {
-        const struct nbrec_nat *nat = op->od->nbr->nat[i];
-
-        if (!nat->external_mac) {
-            continue;
-        }
+    const char *eth_addr;
 
-        ds_put_format(&eth_src, "%s, ", nat->external_mac);
+    ds_put_cstr(&eth_src, "{");
+    SSET_FOR_EACH (eth_addr, &all_eth_addrs) {
+        ds_put_format(&eth_src, "%s, ", eth_addr);
     }
     ds_chomp(&eth_src, ' ');
     ds_chomp(&eth_src, ',');
@@ -6130,8 +6203,9 @@ build_lswitch_rport_arp_req_self_orig_flow(struct 
ovn_port *op,
                   ds_cstr(&match),
                   "outport = \""MC_FLOOD"\"; output;");
 
-    ds_destroy(&match);
+    sset_destroy(&all_eth_addrs);
     ds_destroy(&eth_src);
+    ds_destroy(&match);
 }
 
 /*
@@ -6222,38 +6296,72 @@ build_lswitch_rport_arp_req_flows(struct ovn_port *op,
     struct sset all_ips_v4 = SSET_INITIALIZER(&all_ips_v4);
     struct sset all_ips_v6 = SSET_INITIALIZER(&all_ips_v6);
 
-    for (size_t i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
-        sset_add(&all_ips_v4, op->lrp_networks.ipv4_addrs[i].addr_s);
-    }
-    for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
-        sset_add(&all_ips_v6, op->lrp_networks.ipv6_addrs[i].addr_s);
+    get_router_load_balancer_ips(op->od, &all_ips_v4, &all_ips_v6);
+
+    const char *ip_addr;
+    const char *ip_addr_next;
+    SSET_FOR_EACH_SAFE (ip_addr, ip_addr_next, &all_ips_v4) {
+        ovs_be32 ipv4_addr;
+
+        /* Check if there's a network configured on op on which we could
+         * expect ARP requests for the LB VIP.
+         */
+        if (ip_parse(ip_addr, &ipv4_addr) &&
+                lrouter_port_ipv4_reachable(op, ipv4_addr)) {
+            continue;
+        }
+
+        sset_delete(&all_ips_v4, SSET_NODE_FROM_NAME(ip_addr));
     }
+    SSET_FOR_EACH_SAFE (ip_addr, ip_addr_next, &all_ips_v6) {
+        struct in6_addr ipv6_addr;
 
-    get_router_load_balancer_ips(op->od, &all_ips_v4, &all_ips_v6);
+        /* Check if there's a network configured on op on which we could
+         * expect NS requests for the LB VIP.
+         */
+        if (ipv6_parse(ip_addr, &ipv6_addr) &&
+                lrouter_port_ipv6_reachable(op, &ipv6_addr)) {
+            continue;
+        }
+
+        sset_delete(&all_ips_v6, SSET_NODE_FROM_NAME(ip_addr));
+    }
 
     for (size_t i = 0; i < op->od->nbr->n_nat; i++) {
-        const struct nbrec_nat *nat = op->od->nbr->nat[i];
+        struct ovn_nat *nat_entry = &op->od->nat_entries[i];
+        const struct nbrec_nat *nat = nat_entry->nb;
+
+        if (!nat_entry_is_valid(nat_entry)) {
+            continue;
+        }
 
         if (!strcmp(nat->type, "snat")) {
             continue;
         }
 
-        ovs_be32 ip;
-        ovs_be32 mask;
-        struct in6_addr ipv6;
-        struct in6_addr mask_v6;
+        /* Check if there's a network configured on op on which we could
+         * expect ARP requests/NS for the DNAT external_ip.
+         */
+        if (nat_entry_is_v6(nat_entry)) {
+            struct in6_addr *addr = &nat_entry->ext_addrs.ipv6_addrs[0].addr;
 
-        char *error = ip_parse_masked(nat->external_ip, &ip, &mask);
-        if (error) {
-            free(error);
-            error = ipv6_parse_masked(nat->external_ip, &ipv6, &mask_v6);
-            if (!error) {
+            if (lrouter_port_ipv6_reachable(op, addr)) {
                 sset_add(&all_ips_v6, nat->external_ip);
             }
         } else {
-            sset_add(&all_ips_v4, nat->external_ip);
+            ovs_be32 addr = nat_entry->ext_addrs.ipv4_addrs[0].addr;
+
+            if (lrouter_port_ipv4_reachable(op, addr)) {
+                sset_add(&all_ips_v4, nat->external_ip);
+            }
         }
-        free(error);
+    }
+
+    for (size_t i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
+        sset_add(&all_ips_v4, op->lrp_networks.ipv4_addrs[i].addr_s);
+    }
+    for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
+        sset_add(&all_ips_v6, op->lrp_networks.ipv6_addrs[i].addr_s);
     }
 
     if (!sset_is_empty(&all_ips_v4)) {
diff --git a/tests/ovn.at b/tests/ovn.at
index 24d93bc..88700e8 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -19249,12 +19249,14 @@ ovn-nbctl --wait=hv sync
 
 sw_dp_uuid=$(ovn-sbctl --bare --columns _uuid list datapath_binding sw-agg)
 sw_dp_key=$(ovn-sbctl --bare --columns tunnel_key list datapath_binding sw-agg)
+sw1_dp_key=$(ovn-sbctl --bare --columns tunnel_key list datapath_binding sw1)
 r1_dp_key=$(ovn-sbctl --bare --columns tunnel_key list datapath_binding rtr1)
 
 r1_tnl_key=$(ovn-sbctl --bare --columns tunnel_key list port_binding sw-rtr1)
 r2_tnl_key=$(ovn-sbctl --bare --columns tunnel_key list port_binding sw-rtr2)
 
 match_sw_metadata="metadata=0x${sw_dp_key}"
+match_sw1_metadata="metadata=0x${sw1_dp_key}"
 match_r1_metadata="metadata=0x${r1_dp_key}"
 
 # Inject ARP request for first router owned IP address.
@@ -19370,6 +19372,78 @@ ovn-nbctl lr-nat-add rtr1 dnat_and_snat 10.0.0.122 
20.0.0.12 sw1-p2 00:00:00:02:
 ovn-nbctl lr-nat-add rtr1 dnat_and_snat 10::122 20::12 sw1-p2 00:00:00:02:00:00
 ovn-nbctl --wait=hv sync
 
+# Check that broadcast domain limiting flows match only on IPs that are
+# directly reachable via the router port.
+# For sw-rtr1:
+# - 10.0.0.1, 10::1, fe80::200:ff:fe00:100 - interface IPs.
+# - 10.0.0.11, 10::11 - LB VIPs.
+# - 10.0.0.111, 10.0.0.121, 10.0.0.122, 10::111, 10::121, 10::122 - DNAT IPs.
+# For sw-rtr2:
+# - 10.0.0.2, 10::2, fe80::200:ff:fe00:200 - interface IPs.
+# - 10.0.0.22, 10::22 - LB VIPs.
+# - 10.0.0.222, 10::222 - DNAT IPs.
+as hv1
+AT_CHECK([ovs-ofctl dump-flows br-int | grep -E 
"priority=75,.*${match_sw_metadata}" | grep -oE "arp_tpa=[[0-9.]]+" | sort], 
[0], [dnl
+arp_tpa=10.0.0.1
+arp_tpa=10.0.0.11
+arp_tpa=10.0.0.111
+arp_tpa=10.0.0.121
+arp_tpa=10.0.0.122
+arp_tpa=10.0.0.2
+arp_tpa=10.0.0.22
+arp_tpa=10.0.0.222
+])
+AT_CHECK([ovs-ofctl dump-flows br-int | grep -E 
"priority=75,.*${match_sw_metadata}" | grep -oE "nd_target=[[0-9a-f:]]+" | 
sort], [0], [dnl
+nd_target=10::1
+nd_target=10::11
+nd_target=10::111
+nd_target=10::121
+nd_target=10::122
+nd_target=10::2
+nd_target=10::22
+nd_target=10::222
+nd_target=fe80::200:ff:fe00:100
+nd_target=fe80::200:ff:fe00:200
+])
+
+# For sw1-rtr1:
+# - 20.0.0.1, 20::1, fe80::200:1ff:fe00:0 - interface IPs.
+as hv1
+AT_CHECK([ovs-ofctl dump-flows br-int | grep -E 
"priority=75,.*${match_sw1_metadata}" | grep -oE "arp_tpa=[[0-9.]]+" | sort], 
[0], [dnl
+arp_tpa=20.0.0.1
+])
+AT_CHECK([ovs-ofctl dump-flows br-int | grep -E 
"priority=75,.*${match_sw1_metadata}" | grep -oE "nd_target=[[0-9a-f:]]+" | 
sort], [0], [dnl
+nd_target=20::1
+nd_target=fe80::200:1ff:fe00:0
+])
+
+# Self originated ARP/NS with SMACs owned by rtr1-sw and rtr2-sw should be
+# flooded:
+# - 00:00:00:00:01:00 - interface MAC (rtr1-sw).
+# - 00:00:00:00:02:00 - interface MAC (rtr2-sw).
+# - 00:00:00:01:00:00 - dnat_and_snat external MAC.
+# - 00:00:00:02:00:00 - dnat_and_snat external MAC.
+as hv1
+AT_CHECK([ovs-ofctl dump-flows br-int | grep -E 
"priority=80,.*${match_sw_metadata}.*arp_op=1" | grep -oE "dl_src=[[0-9a-f:]]+" 
| sort], [0], [dnl
+dl_src=00:00:00:00:01:00
+dl_src=00:00:00:00:02:00
+dl_src=00:00:00:01:00:00
+dl_src=00:00:00:02:00:00
+])
+AT_CHECK([ovs-ofctl dump-flows br-int | grep -E 
"priority=80,.*${match_sw_metadata}.*icmp_type=135" | grep -oE 
"dl_src=[[0-9a-f:]]+" | sort], [0], [dnl
+dl_src=00:00:00:00:01:00
+dl_src=00:00:00:00:02:00
+dl_src=00:00:00:01:00:00
+dl_src=00:00:00:02:00:00
+])
+
+# Self originated ARP/NS with SMACs owned by rtr1-sw1 should be flooded:
+# - 00:00:01:00:00:00 - interface MAC.
+as hv1
+AT_CHECK([ovs-ofctl dump-flows br-int | grep -E 
"priority=80,.*${match_sw1_metadata}.*arp_op=1" | grep -oE 
"dl_src=[[0-9a-f:]]+" | sort], [0], [dnl
+dl_src=00:00:01:00:00:00
+])
+
 # Inject ARP request for first router owned NAT address.
 send_arp_request 1 0 ${src_mac} $(ip_to_hex 10 0 0 254) $(ip_to_hex 10 0 0 111)
 

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to