Previously, ARP TPAs were filtered down only to "reachable" addresses. Reachable addresses are all router interface addresses, as well as NAT external addresses and load balancer VIPs that are within the subnet handled by a router's port.
However, it is possible that in some configurations, CMSes purposely configure NAT or load balancer addresses on a router that are outside the router's subnets, and they expect the router to respond to ARPs for those addresses. This commit adds a higher priority flow to logical switches that makes it so ARPs targeted at "unreachable" addresses are flooded to all ports. This way, the ARPs can reach the router appropriately and receive a response. Reported at: https://bugzilla.redhat.com/show_bug.cgi?id=1929901 Signed-off-by: Mark Michelson <[email protected]> --- northd/ovn-northd.8.xml | 8 ++ northd/ovn-northd.c | 158 +++++++++++++++++++++++++++------------- northd/ovn_northd.dl | 98 ++++++++++++++++++++----- 3 files changed, 194 insertions(+), 70 deletions(-) diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml index 8197aa513..ed8c44940 100644 --- a/northd/ovn-northd.8.xml +++ b/northd/ovn-northd.8.xml @@ -1535,6 +1535,14 @@ output; logical ports. </li> + <li> + Priority-80 flows for each IP address/VIP/NAT address configured + outside its owning router port's subnet. These flows match ARP + requests and ND packets for the specific IP addresses. Matched packets + are forwarded only to the <code>MC_FLOOD</code> multicast group which + contains all connected logical ports. + </li> + <li> Priority-75 flows for each port connected to a logical router matching self originated ARP request/ND packets. These packets diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c index ad62ae220..b66db38e8 100644 --- a/northd/ovn-northd.c +++ b/northd/ovn-northd.c @@ -6479,44 +6479,51 @@ build_lswitch_rport_arp_req_self_orig_flow(struct ovn_port *op, ds_destroy(&match); } -/* - * Ingress table 19: Flows that forward ARP/ND requests only to the routers - * that own the addresses. Other ARP/ND packets are still flooded in the - * switching domain as regular broadcast. - */ static void -build_lswitch_rport_arp_req_flow_for_ip(struct sset *ips, - int addr_family, - struct ovn_port *patch_op, - struct ovn_datapath *od, - uint32_t priority, - struct hmap *lflows, - const struct ovsdb_idl_row *stage_hint) +arp_nd_ns_match(struct sset *ips, int addr_family, struct ds *match) { - struct ds match = DS_EMPTY_INITIALIZER; - struct ds actions = DS_EMPTY_INITIALIZER; /* Packets received from VXLAN tunnels have already been through the * router pipeline so we should skip them. Normally this is done by the * multicast_group implementation (VXLAN packets skip table 32 which * delivers to patch ports) but we're bypassing multicast_groups. */ - ds_put_cstr(&match, FLAGBIT_NOT_VXLAN " && "); + ds_put_cstr(match, FLAGBIT_NOT_VXLAN " && "); if (addr_family == AF_INET) { - ds_put_cstr(&match, "arp.op == 1 && arp.tpa == { "); + ds_put_cstr(match, "arp.op == 1 && arp.tpa == { "); } else { - ds_put_cstr(&match, "nd_ns && nd.target == { "); + ds_put_cstr(match, "nd_ns && nd.target == { "); } const char *ip_address; SSET_FOR_EACH (ip_address, ips) { - ds_put_format(&match, "%s, ", ip_address); + ds_put_format(match, "%s, ", ip_address); } - ds_chomp(&match, ' '); - ds_chomp(&match, ','); - ds_put_cstr(&match, "}"); + ds_chomp(match, ' '); + ds_chomp(match, ','); + ds_put_cstr(match, "}"); +} + +/* + * Ingress table 19: Flows that forward ARP/ND requests only to the routers + * that own the addresses. Other ARP/ND packets are still flooded in the + * switching domain as regular broadcast. + */ +static void +build_lswitch_rport_arp_req_flow_for_reachable_ip(struct sset *ips, + int addr_family, + struct ovn_port *patch_op, + struct ovn_datapath *od, + uint32_t priority, + struct hmap *lflows, + const struct ovsdb_idl_row *stage_hint) +{ + struct ds match = DS_EMPTY_INITIALIZER; + struct ds actions = DS_EMPTY_INITIALIZER; + + arp_nd_ns_match(ips, addr_family, &match); /* Send a the packet to the router pipeline. If the switch has non-router * ports then flood it there as well. @@ -6539,6 +6546,32 @@ build_lswitch_rport_arp_req_flow_for_ip(struct sset *ips, ds_destroy(&actions); } +/* + * Ingress table 19: Flows that forward ARP/ND requests for "unreachable" IPs + * (NAT or load balancer IPs configured on a router that are outside the router's + * configured subnets). + * These ARP/ND packets are flooded in the switching domain as regular broadcast. + */ +static void +build_lswitch_rport_arp_req_flow_for_unreachable_ip(struct sset *ips, + int addr_family, + struct ovn_datapath *od, + uint32_t priority, + struct hmap *lflows, + const struct ovsdb_idl_row *stage_hint) +{ + struct ds match = DS_EMPTY_INITIALIZER; + + arp_nd_ns_match(ips, addr_family, &match); + + ovn_lflow_add_unique_with_hint(lflows, od, S_SWITCH_IN_L2_LKUP, + priority, ds_cstr(&match), + "outport = \""MC_FLOOD"\"; output;", + stage_hint); + + ds_destroy(&match); +} + /* * Ingress table 19: Flows that forward ARP/ND requests only to the routers * that own the addresses. @@ -6565,39 +6598,48 @@ build_lswitch_rport_arp_req_flows(struct ovn_port *op, * router port. * Priority: 80. */ - struct sset all_ips_v4 = SSET_INITIALIZER(&all_ips_v4); - struct sset all_ips_v6 = SSET_INITIALIZER(&all_ips_v6); + struct sset lb_ips_v4 = SSET_INITIALIZER(&lb_ips_v4); + struct sset lb_ips_v6 = SSET_INITIALIZER(&lb_ips_v6); - get_router_load_balancer_ips(op->od, &all_ips_v4, &all_ips_v6); + get_router_load_balancer_ips(op->od, &lb_ips_v4, &lb_ips_v6); + + struct sset reachable_ips_v4 = SSET_INITIALIZER(&reachable_ips_v4); + struct sset reachable_ips_v6 = SSET_INITIALIZER(&reachable_ips_v6); + struct sset unreachable_ips_v4 = SSET_INITIALIZER(&unreachable_ips_v4); + struct sset unreachable_ips_v6 = SSET_INITIALIZER(&unreachable_ips_v6); const char *ip_addr; const char *ip_addr_next; - SSET_FOR_EACH_SAFE (ip_addr, ip_addr_next, &all_ips_v4) { + SSET_FOR_EACH_SAFE (ip_addr, ip_addr_next, &lb_ips_v4) { ovs_be32 ipv4_addr; /* Check if the ovn port has a network configured on which we could * expect ARP requests for the LB VIP. */ - if (ip_parse(ip_addr, &ipv4_addr) && - lrouter_port_ipv4_reachable(op, ipv4_addr)) { - continue; + if (ip_parse(ip_addr, &ipv4_addr)) { + if (lrouter_port_ipv4_reachable(op, ipv4_addr)) { + sset_add(&reachable_ips_v4, ip_addr); + } else { + sset_add(&unreachable_ips_v4, ip_addr); + } } - - sset_delete(&all_ips_v4, SSET_NODE_FROM_NAME(ip_addr)); } - SSET_FOR_EACH_SAFE (ip_addr, ip_addr_next, &all_ips_v6) { + SSET_FOR_EACH_SAFE (ip_addr, ip_addr_next, &lb_ips_v6) { struct in6_addr ipv6_addr; /* Check if the ovn port has a network configured on which we could * expect NS requests for the LB VIP. */ - if (ipv6_parse(ip_addr, &ipv6_addr) && - lrouter_port_ipv6_reachable(op, &ipv6_addr)) { - continue; + if (ipv6_parse(ip_addr, &ipv6_addr)) { + if (lrouter_port_ipv6_reachable(op, &ipv6_addr)) { + sset_add(&reachable_ips_v6, ip_addr); + } else { + sset_add(&unreachable_ips_v6, ip_addr); + } } - - sset_delete(&all_ips_v6, SSET_NODE_FROM_NAME(ip_addr)); } + sset_destroy(&lb_ips_v4); + sset_destroy(&lb_ips_v6); for (size_t i = 0; i < op->od->nbr->n_nat; i++) { struct ovn_nat *nat_entry = &op->od->nat_entries[i]; @@ -6618,37 +6660,53 @@ build_lswitch_rport_arp_req_flows(struct ovn_port *op, struct in6_addr *addr = &nat_entry->ext_addrs.ipv6_addrs[0].addr; if (lrouter_port_ipv6_reachable(op, addr)) { - sset_add(&all_ips_v6, nat->external_ip); + sset_add(&reachable_ips_v6, nat->external_ip); + } else { + sset_add(&unreachable_ips_v6, nat->external_ip); } } else { ovs_be32 addr = nat_entry->ext_addrs.ipv4_addrs[0].addr; if (lrouter_port_ipv4_reachable(op, addr)) { - sset_add(&all_ips_v4, nat->external_ip); + sset_add(&reachable_ips_v4, nat->external_ip); + } else { + sset_add(&unreachable_ips_v4, nat->external_ip); } } } for (size_t i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { - sset_add(&all_ips_v4, op->lrp_networks.ipv4_addrs[i].addr_s); + sset_add(&reachable_ips_v4, op->lrp_networks.ipv4_addrs[i].addr_s); } for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { - sset_add(&all_ips_v6, op->lrp_networks.ipv6_addrs[i].addr_s); + sset_add(&reachable_ips_v6, op->lrp_networks.ipv6_addrs[i].addr_s); } - if (!sset_is_empty(&all_ips_v4)) { - build_lswitch_rport_arp_req_flow_for_ip(&all_ips_v4, AF_INET, sw_op, - sw_od, 80, lflows, - stage_hint); + if (!sset_is_empty(&reachable_ips_v4)) { + build_lswitch_rport_arp_req_flow_for_reachable_ip(&reachable_ips_v4, AF_INET, sw_op, + sw_od, 80, lflows, + stage_hint); + } + if (!sset_is_empty(&reachable_ips_v6)) { + build_lswitch_rport_arp_req_flow_for_reachable_ip(&reachable_ips_v6, AF_INET6, sw_op, + sw_od, 80, lflows, + stage_hint); } - if (!sset_is_empty(&all_ips_v6)) { - build_lswitch_rport_arp_req_flow_for_ip(&all_ips_v6, AF_INET6, sw_op, - sw_od, 80, lflows, - stage_hint); + if (!sset_is_empty(&unreachable_ips_v4)) { + build_lswitch_rport_arp_req_flow_for_unreachable_ip(&unreachable_ips_v4, AF_INET, + sw_od, 90, lflows, + stage_hint); + } + if (!sset_is_empty(&unreachable_ips_v6)) { + build_lswitch_rport_arp_req_flow_for_unreachable_ip(&unreachable_ips_v6, AF_INET6, + sw_od, 90, lflows, + stage_hint); } - sset_destroy(&all_ips_v4); - sset_destroy(&all_ips_v6); + sset_destroy(&reachable_ips_v4); + sset_destroy(&reachable_ips_v6); + sset_destroy(&unreachable_ips_v4); + sset_destroy(&unreachable_ips_v6); /* Self originated ARP requests/ND need to be flooded as usual. * diff --git a/northd/ovn_northd.dl b/northd/ovn_northd.dl index 089947516..612fcb3d3 100644 --- a/northd/ovn_northd.dl +++ b/northd/ovn_northd.dl @@ -4091,9 +4091,13 @@ UniqueFlow[Flow{.logical_datapath = sw.ls._uuid, * router port. * Priority: 80. */ -function get_arp_forward_ips(rp: Ref<RouterPort>): (Set<string>, Set<string>) = { - var all_ips_v4 = set_empty(); - var all_ips_v6 = set_empty(); +function get_arp_forward_ips(rp: Ref<RouterPort>): + (Set<string>, Set<string>, Set<string>, Set<string>) = +{ + var reachable_ips_v4 = set_empty(); + var reachable_ips_v6 = set_empty(); + var unreachable_ips_v4 = set_empty(); + var unreachable_ips_v6 = set_empty(); (var lb_ips_v4, var lb_ips_v6) = get_router_load_balancer_ips(deref(rp.router)); @@ -4103,7 +4107,9 @@ function get_arp_forward_ips(rp: Ref<RouterPort>): (Set<string>, Set<string>) = */ match (ip_parse(a)) { Some{ipv4} -> if (lrouter_port_ip_reachable(rp, IPv4{ipv4})) { - all_ips_v4.insert(a) + reachable_ips_v4.insert(a) + } else { + unreachable_ips_v4.insert(a) }, _ -> () } @@ -4114,7 +4120,9 @@ function get_arp_forward_ips(rp: Ref<RouterPort>): (Set<string>, Set<string>) = */ match (ipv6_parse(a)) { Some{ipv6} -> if (lrouter_port_ip_reachable(rp, IPv6{ipv6})) { - all_ips_v6.insert(a) + reachable_ips_v6.insert(a) + } else { + unreachable_ips_v6.insert(a) }, _ -> () } @@ -4127,22 +4135,45 @@ function get_arp_forward_ips(rp: Ref<RouterPort>): (Set<string>, Set<string>) = */ if (lrouter_port_ip_reachable(rp, nat.external_ip)) { match (nat.external_ip) { - IPv4{_} -> all_ips_v4.insert(nat.nat.external_ip), - IPv6{_} -> all_ips_v6.insert(nat.nat.external_ip) + IPv4{_} -> reachable_ips_v4.insert(nat.nat.external_ip), + IPv6{_} -> reachable_ips_v6.insert(nat.nat.external_ip) + } + } else { + match (nat.external_ip) { + IPv4{_} -> unreachable_ips_v4.insert(nat.nat.external_ip), + IPv6{_} -> unreachable_ips_v6.insert(nat.nat.external_ip), } } } }; for (a in rp.networks.ipv4_addrs) { - all_ips_v4.insert("${a.addr}") + reachable_ips_v4.insert("${a.addr}") }; for (a in rp.networks.ipv6_addrs) { - all_ips_v6.insert("${a.addr}") + reachable_ips_v6.insert("${a.addr}") }; - (all_ips_v4, all_ips_v6) + (reachable_ips_v4, reachable_ips_v6, unreachable_ips_v4, unreachable_ips_v6) } + +relation &SwitchPortARPForwards( + port: Ref<SwitchPort>, + reachable_ips_v4: Set<string>, + reachable_ips_v6: Set<string>, + unreachable_ips_v4: Set<string>, + unreachable_ips_v6: Set<string> +) + +&SwitchPortARPForwards(.port = port, + .reachable_ips_v4 = reachable_ips_v4, + .reachable_ips_v6 = reachable_ips_v6, + .unreachable_ips_v4 = unreachable_ips_v4, + .unreachable_ips_v6 = unreachable_ips_v6) :- + port in &SwitchPort(.peer = Some{rp}), + rp.is_enabled(), + (var reachable_ips_v4, var reachable_ips_v6, var unreachable_ips_v4, var unreachable_ips_v6) = get_arp_forward_ips(rp). + /* Packets received from VXLAN tunnels have already been through the * router pipeline so we should skip them. Normally this is done by the * multicast_group implementation (VXLAN packets skip table 32 which @@ -4154,7 +4185,7 @@ AnnotatedFlow(.f = Flow{.logical_datapath = sw.ls._uuid, .priority = 80, .__match = fLAGBIT_NOT_VXLAN() ++ " && arp.op == 1 && arp.tpa == { " ++ - all_ips_v4.to_vec().join(", ") ++ "}", + ipv4.to_vec().join(", ") ++ "}", .actions = if (sw.has_non_router_port) { "clone {outport = ${sp.json_name}; output; }; " "outport = ${mc_flood_l2}; output;" @@ -4163,17 +4194,17 @@ AnnotatedFlow(.f = Flow{.logical_datapath = sw.ls._uuid, }, .external_ids = stage_hint(sp.lsp._uuid)}, .shared = not sw.has_non_router_port) :- - sp in &SwitchPort(.sw = sw, .peer = Some{rp}), - rp.is_enabled(), - (var all_ips_v4, _) = get_arp_forward_ips(rp), - not all_ips_v4.is_empty(), + sp in &SwitchPort(.sw = sw), + &SwitchPortARPForwards(.port = sp, .reachable_ips_v4 = ipv4), + not ipv4.is_empty(), var mc_flood_l2 = json_string_escape(mC_FLOOD_L2().0). + AnnotatedFlow(.f = Flow{.logical_datapath = sw.ls._uuid, .stage = s_SWITCH_IN_L2_LKUP(), .priority = 80, .__match = fLAGBIT_NOT_VXLAN() ++ " && nd_ns && nd.target == { " ++ - all_ips_v6.to_vec().join(", ") ++ "}", + ipv6.to_vec().join(", ") ++ "}", .actions = if (sw.has_non_router_port) { "clone {outport = ${sp.json_name}; output; }; " "outport = ${mc_flood_l2}; output;" @@ -4182,12 +4213,39 @@ AnnotatedFlow(.f = Flow{.logical_datapath = sw.ls._uuid, }, .external_ids = stage_hint(sp.lsp._uuid)}, .shared = not sw.has_non_router_port) :- - sp in &SwitchPort(.sw = sw, .peer = Some{rp}), - rp.is_enabled(), - (_, var all_ips_v6) = get_arp_forward_ips(rp), - not all_ips_v6.is_empty(), + sp in &SwitchPort(.sw = sw), + &SwitchPortARPForwards(.port = sp, .reachable_ips_v6 = ipv6), + not ipv6.is_empty(), var mc_flood_l2 = json_string_escape(mC_FLOOD_L2().0). +AnnotatedFlow(.f = Flow{.logical_datapath = sw.ls._uuid, + .stage = s_SWITCH_IN_L2_LKUP(), + .priority = 90, + .__match = fLAGBIT_NOT_VXLAN() ++ + " && arp.op == 1 && arp.tpa == { " ++ + ipv4.to_vec().join(", ") ++ "}", + .actions = "outport = ${flood}; output;", + .external_ids = stage_hint(sp.lsp._uuid)}, + .shared = not sw.has_non_router_port) :- + sp in &SwitchPort(.sw = sw), + &SwitchPortARPForwards(.port = sp, .unreachable_ips_v4 = ipv4), + not ipv4.is_empty(), + var flood = json_string_escape(mC_FLOOD().0). + +AnnotatedFlow(.f = Flow{.logical_datapath = sw.ls._uuid, + .stage = s_SWITCH_IN_L2_LKUP(), + .priority = 90, + .__match = fLAGBIT_NOT_VXLAN() ++ + " && nd_ns && nd.target == { " ++ + ipv6.to_vec().join(", ") ++ "}", + .actions = "outport = ${flood}; output;", + .external_ids = stage_hint(sp.lsp._uuid)}, + .shared = not sw.has_non_router_port) :- + sp in &SwitchPort(.sw = sw), + &SwitchPortARPForwards(.port = sp, .unreachable_ips_v6 = ipv6), + not ipv6.is_empty(), + var flood = json_string_escape(mC_FLOOD().0). + for (SwitchPortNewDynamicAddress(.port = &SwitchPort{.lsp = lsp, .json_name = json_name, .sw = &sw}, .address = Some{addrs}) if lsp.__type != "external") { -- 2.29.2 _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
