On Thu, Oct 17, 2019 at 4:55 PM Dumitru Ceara <[email protected]> wrote: > > ARP request and ND NS packets for router owned IPs were being > flooded in the complete L2 domain (using the MC_FLOOD multicast group). > However this creates a scaling issue in scenarios where aggregation > logical switches are connected to more logical routers (~350). The > logical pipelines of all routers would have to be executed before the > packet is finally replied to by a single router, the owner of the IP > address. > > This commit limits the broadcast domain by bypassing the L2 Lookup stage > for ARP requests that will be replied by a single router. The packets > are still flooded in the L2 domain but not on any of the other patch > ports towards other routers connected to the switch. This restricted > flooding is done by using a new multicast group (MC_ARP_ND_FLOOD). > > IPs that are owned by the routers and for which this fix applies are: > - IP addresses configured on the router ports. > - VIPs. > - NAT IPs. > > Reported-at: https://bugzilla.redhat.com/1756945 > Reported-by: Anil Venkata <[email protected]> > Signed-off-by: Dumitru Ceara <[email protected]>
Sorry for the noise, please disregard this version. I had to send a v2 version in order to also address ARP requests and ND-NS packets received on localnet ports: https://patchwork.ozlabs.org/patch/1181862/ Thanks, Dumitru > --- > lib/mcast-group-index.h | 1 + > northd/ovn-northd.8.xml | 16 ++++ > northd/ovn-northd.c | 189 > +++++++++++++++++++++++++++++++++++++++++------- > tests/ovn.at | 2 +- > 4 files changed, 179 insertions(+), 29 deletions(-) > > diff --git a/lib/mcast-group-index.h b/lib/mcast-group-index.h > index ba995ba..06bd8b3 100644 > --- a/lib/mcast-group-index.h > +++ b/lib/mcast-group-index.h > @@ -27,6 +27,7 @@ enum ovn_mcast_tunnel_keys { > > OVN_MCAST_FLOOD_TUNNEL_KEY = OVN_MIN_MULTICAST, > OVN_MCAST_UNKNOWN_TUNNEL_KEY, > + OVN_MCAST_ARP_ND_TUNNEL_KEY, > OVN_MCAST_MROUTER_FLOOD_TUNNEL_KEY, > OVN_MCAST_MROUTER_STATIC_TUNNEL_KEY, > OVN_MCAST_STATIC_TUNNEL_KEY, > diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml > index d3e0e5e..d942469 100644 > --- a/northd/ovn-northd.8.xml > +++ b/northd/ovn-northd.8.xml > @@ -699,6 +699,22 @@ nd_na_router { > </li> > > <li> > + Priority-40 flows for each port connected to a logical router > + matching self originated GARP/ARP request/ND packets. These packets > + are flooded to the <code>MC_FLOOD</code> which contains all logical > + ports. > + </li> > + > + <li> > + Priority-30 flows for each IP address/VIP/NAT address owned by a > + router port connected to the switch. These flows match ARP requests > + and ND packets for the specific IP addresses. Matched packets are > + forwarded in the L3 domain only to the router that owns the IP > + address and flooded in the L2 domain on all ports except patch > + ports connected to logical routers. > + </li> > + > + <li> > One priority-0 fallback flow that matches all packets and advances to > the next table. > </li> > diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c > index ea8ad7c..7537a22 100644 > --- a/northd/ovn-northd.c > +++ b/northd/ovn-northd.c > @@ -1193,6 +1193,34 @@ ovn_port_allocate_key(struct ovn_datapath *od) > 1, (1u << 15) - 1, &od->port_key_hint); > } > > +/* Returns true if the logical switch port 'enabled' column is empty or > + * set to true. Otherwise, returns false. */ > +static bool > +lsp_is_enabled(const struct nbrec_logical_switch_port *lsp) > +{ > + return !lsp->n_enabled || *lsp->enabled; > +} > + > +/* Returns true only if the logical switch port 'up' column is set to true. > + * Otherwise, if the column is not set or set to false, returns false. */ > +static bool > +lsp_is_up(const struct nbrec_logical_switch_port *lsp) > +{ > + return lsp->n_up && *lsp->up; > +} > + > +static bool > +lsp_is_external(const struct nbrec_logical_switch_port *nbsp) > +{ > + return !strcmp(nbsp->type, "external"); > +} > + > +static bool > +lrport_is_enabled(const struct nbrec_logical_router_port *lrport) > +{ > + return !lrport->enabled || *lrport->enabled; > +} > + > static char * > chassis_redirect_name(const char *port_name) > { > @@ -3018,6 +3046,10 @@ static const struct multicast_group mc_static = > static const struct multicast_group mc_unknown = > { MC_UNKNOWN, OVN_MCAST_UNKNOWN_TUNNEL_KEY }; > > +#define MC_ARP_ND "_MC_arp_nd" > +static const struct multicast_group mc_arp_nd = > + { MC_ARP_ND, OVN_MCAST_ARP_ND_TUNNEL_KEY }; > + > static bool > multicast_group_equal(const struct multicast_group *a, > const struct multicast_group *b) > @@ -3719,28 +3751,6 @@ build_port_security_ip(enum ovn_pipeline pipeline, > struct ovn_port *op, > > } > > -/* Returns true if the logical switch port 'enabled' column is empty or > - * set to true. Otherwise, returns false. */ > -static bool > -lsp_is_enabled(const struct nbrec_logical_switch_port *lsp) > -{ > - return !lsp->n_enabled || *lsp->enabled; > -} > - > -/* Returns true only if the logical switch port 'up' column is set to true. > - * Otherwise, if the column is not set or set to false, returns false. */ > -static bool > -lsp_is_up(const struct nbrec_logical_switch_port *lsp) > -{ > - return lsp->n_up && *lsp->up; > -} > - > -static bool > -lsp_is_external(const struct nbrec_logical_switch_port *nbsp) > -{ > - return !strcmp(nbsp->type, "external"); > -} > - > static bool > build_dhcpv4_action(struct ovn_port *op, ovs_be32 offer_ip, > struct ds *options_action, struct ds *response_action, > @@ -5143,6 +5153,119 @@ build_lrouter_groups(struct hmap *ports, struct > ovs_list *lr_list) > } > } > > +/* > + * Ingress table 11: Flows that forward ARP/ND requests only to the routers > + * that own the addresses. Packets are still flooded in the switching domain > + * as regular broadcast. > + */ > +static void > +build_lswitch_rport_arp_flow(const char *target_address, int addr_family, > + struct ovn_port *patch_op, > + struct ovn_datapath *od, > + uint32_t priority, > + struct hmap *lflows) > +{ > + struct ds match = DS_EMPTY_INITIALIZER; > + struct ds actions = DS_EMPTY_INITIALIZER; > + > + if (addr_family == AF_INET) { > + ds_put_format(&match, "arp.tpa == %s && arp.op == 1", > target_address); > + } else { > + ds_put_format(&match, "nd.target == %s && nd_ns", target_address); > + } > + > + /* Send a clone of the packet to the router pipeline and flood the > + * original in the broadcast domain (skipping router ports). */ > + ds_put_format(&actions, > + "clone { outport = %s; output; }; " > + "outport = \""MC_ARP_ND"\"; output;", > + patch_op->json_key); > + ovn_lflow_add(lflows, od, S_SWITCH_IN_ARP_ND_RSP, priority, > + ds_cstr(&match), ds_cstr(&actions)); > + > + ds_destroy(&match); > + ds_destroy(&actions); > +} > + > +/* > + * Ingress table 11: Flows that forward ARP/ND requests only to the routers > + * that own the addresses. > + * Priorities: > + * - 40: self originated GARPs that need to follow regular processing. > + * - 30: ARP requests to router owned IPs (interface IP/LB/NAT). > + */ > +static void > +build_lswitch_rport_arp_responders(struct ovn_port *op, > + struct ovn_datapath *sw_od, > + struct ovn_port *sw_op, > + struct hmap *lflows) > +{ > + if (!op || !op->nbrp) { > + return; > + } > + > + if (!lrport_is_enabled(op->nbrp)) { > + return; > + } > + > + struct ds match = DS_EMPTY_INITIALIZER; > + > + /* Self originated (G)ARP requests/ND need to be flooded as usual. > + * Priority: 40. > + */ > + ds_put_format(&match, "inport == %s && (arp.op == 1 || nd_ns)", > + sw_op->json_key); > + ovn_lflow_add(lflows, sw_od, S_SWITCH_IN_ARP_ND_RSP, 40, > + ds_cstr(&match), > + "outport = \""MC_FLOOD"\"; output;"); > + > + ds_destroy(&match); > + > + /* Forward ARP requests for IPs configured on the router only to this > + * router port. > + * Priority: 30. > + */ > + for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { > + build_lswitch_rport_arp_flow(op->lrp_networks.ipv4_addrs[i].addr_s, > + AF_INET, sw_op, sw_od, 30, lflows); > + } > + for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { > + build_lswitch_rport_arp_flow(op->lrp_networks.ipv6_addrs[i].addr_s, > + AF_INET6, sw_op, sw_od, 30, lflows); > + } > + > + /* Forward ARP requests to load-balancer VIPs configured on the router > + * only to this router port. > + * Priority: 30. > + */ > + struct sset all_ips = SSET_INITIALIZER(&all_ips); > + const char *ip_address; > + int addr_family; > + > + get_router_load_balancer_ips(op->od, &all_ips, &addr_family); > + > + SSET_FOR_EACH (ip_address, &all_ips) { > + build_lswitch_rport_arp_flow(ip_address, addr_family, sw_op, sw_od, > + 30, lflows); > + } > + sset_destroy(&all_ips); > + > + /* Forward ARP requests to NAT addresses configured on the router > + * only to this router port. > + * Priority: 30. > + */ > + for (int i = 0; i < op->od->nbr->n_nat; i++) { > + const struct nbrec_nat *nat = op->od->nbr->nat[i]; > + > + if (!strcmp(nat->type, "snat")) { > + continue; > + } > + > + build_lswitch_rport_arp_flow(nat->external_ip, AF_INET, sw_op, sw_od, > + 30, lflows); > + } > +} > + > static void > build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, > struct hmap *port_groups, struct hmap *lflows, > @@ -5266,6 +5389,16 @@ build_lswitch_flows(struct hmap *datapaths, struct > hmap *ports, > > free(tokstr); > } else { > + > + /* For ports connected to logical routers add flows to bypass the > + * broadcast flooding of ARP/ND requests in table 17. We direct > the > + * requests only to the router port that owns the IP address. > + * */ > + if (!strcmp(op->nbsp->type, "router")) { > + build_lswitch_rport_arp_responders(op->peer, op->od, op, > + lflows); > + } > + > /* > * Add ARP/ND reply flows if either the > * - port is up or > @@ -5861,12 +5994,6 @@ build_lswitch_flows(struct hmap *datapaths, struct > hmap *ports, > ds_destroy(&actions); > } > > -static bool > -lrport_is_enabled(const struct nbrec_logical_router_port *lrport) > -{ > - return !lrport->enabled || *lrport->enabled; > -} > - > /* Returns a string of the IP address of the router port 'op' that > * overlaps with 'ip_s". If one is not found, returns NULL. > * > @@ -9129,6 +9256,12 @@ build_mcast_groups(struct northd_context *ctx, > } else if (op->nbsp && lsp_is_enabled(op->nbsp)) { > ovn_multicast_add(mcast_groups, &mc_flood, op); > > + /* Add all non-router ports to the ARP ND L2 broadcast flood > + * domain entry. */ > + if (strcmp(op->nbsp->type, "router")) { > + ovn_multicast_add(mcast_groups, &mc_arp_nd, op); > + } > + > /* If this port is connected to a multicast router then add it > * to the MC_MROUTER_FLOOD group. > */ > diff --git a/tests/ovn.at b/tests/ovn.at > index 6bdb9e8..30b42e6 100644 > --- a/tests/ovn.at > +++ b/tests/ovn.at > @@ -10719,7 +10719,7 @@ ovn-nbctl --wait=hv --timeout=3 sync > # Check that there is a logical flow in logical switch foo's pipeline > # to set the outport to rp-foo (which is expected). > OVS_WAIT_UNTIL([test 1 = `ovn-sbctl dump-flows foo | grep ls_in_l2_lkup | \ > -grep rp-foo | grep -v is_chassis_resident | wc -l`]) > +grep rp-foo | grep -v is_chassis_resident | grep "eth.dst" | wc -l`]) > > # Set the option 'reside-on-redirect-chassis' for foo > ovn-nbctl set logical_router_port foo options:reside-on-redirect-chassis=true > -- > 1.8.3.1 > > _______________________________________________ > dev mailing list > [email protected] > https://mail.openvswitch.org/mailman/listinfo/ovs-dev _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
