Add a new configuration option 'mcast_relay' to the Logical_Router:options in the OVN Northbound database.
If a router is configured with 'mcast_relay' enabled then ovn-northd will install Logical_Flows to allow IP multicast traffic to be routed between Logical_Switches. The logical router will aggregate all IGMP groups from attached logical switches and modify the routing pipeline in the following way: - Table S_ROUTER_IN_IP_INPUT: add flow matching the group address and set outport=<Multicast_Group> associated with the IGMP group. Continue to next table. - Table S_ROUTER_IN_IP_ROUTING: bypass route lookup for IP multicast traffic and just decrement TTL. If the packet reached this table then it must have matched a flow in S_ROUTER_IN_IP_INPUT and had outport set. Continue to next table. - Table S_ROUTER_IN_ARP_RESOLVE: bypass ARP resolve for IP multicast traffic and continue to next table. - Table S_ROUTER_OUT_DELIVERY: add flow matching IP multicast traffic and set ETH.SRC to the MAC address of the logical port on which traffic is forwarded. Signed-off-by: Dumitru Ceara <dce...@redhat.com> --- NEWS | 1 + lib/mcast-group-index.h | 7 +- northd/ovn-northd.c | 506 ++++++++++++++++++++++++++++++++++++------------ ovn-nb.xml | 6 + tests/ovn.at | 199 +++++++++++++++++-- 5 files changed, 580 insertions(+), 139 deletions(-) diff --git a/NEWS b/NEWS index f476984..73045d6 100644 --- a/NEWS +++ b/NEWS @@ -39,6 +39,7 @@ Post-v2.11.0 logical groups which results in tunnels only been formed between members of the same transport zone(s). * Support for new logical switch port type - 'virtual'. + * Support for IGMP Snooping/Querier and Relay. - New QoS type "linux-netem" on Linux. - Added support for TLS Server Name Indication (SNI). diff --git a/lib/mcast-group-index.h b/lib/mcast-group-index.h index 15a1592..1ec46c1 100644 --- a/lib/mcast-group-index.h +++ b/lib/mcast-group-index.h @@ -20,8 +20,11 @@ struct ovsdb_idl; struct sbrec_datapath_binding; -#define OVN_MCAST_FLOOD_TUNNEL_KEY 65535 -#define OVN_MCAST_UNKNOWN_TUNNEL_KEY (OVN_MCAST_FLOOD_TUNNEL_KEY - 1) +#define OVN_MCAST_FLOOD_TUNNEL_KEY 65535 +#define OVN_MCAST_MROUTER_FLOOD_TUNNEL_KEY \ + (OVN_MCAST_FLOOD_TUNNEL_KEY - 1) +#define OVN_MCAST_UNKNOWN_TUNNEL_KEY \ + (OVN_MCAST_MROUTER_FLOOD_TUNNEL_KEY - 1) struct ovsdb_idl_index *mcast_group_index_create(struct ovsdb_idl *); const struct sbrec_multicast_group * diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c index e6953a4..d40944c 100644 --- a/northd/ovn-northd.c +++ b/northd/ovn-northd.c @@ -444,21 +444,49 @@ BUILD_ASSERT_DECL(OVN_MAX_IP_MULTICAST >= OVN_MIN_MULTICAST); /* * Multicast snooping and querier per datapath configuration. */ +struct mcast_switch_info { + + bool enabled; /* True if snooping enabled. */ + bool querier; /* True if querier enabled. */ + bool flood_unregistered; /* True if unregistered multicast should be + * flooded. + */ + bool flood_relay; /* True if the switch is connected to a + * multicast router and unregistered multicast + * should be flooded to the mrouter. Only + * applicable if flood_unregistered == false. + */ + + int64_t table_size; /* Max number of IP multicast groups. */ + int64_t idle_timeout; /* Timeout after which an idle group is + * flushed. + */ + int64_t query_interval; /* Interval between multicast queries. */ + char *eth_src; /* ETH src address of the multicast queries. */ + char *ipv4_src; /* IP src address of the multicast queries. */ + int64_t query_max_response; /* Expected time after which reports should + * be received for queries that were sent out. + */ + + uint32_t active_flows; /* Current number of active IP multicast + * flows. + */ +}; + +struct mcast_router_info { + bool relay; /* True if the router should relay IP multicast. */ +}; + struct mcast_info { - bool enabled; - bool querier; - bool flood_unregistered; - - int64_t table_size; - int64_t idle_timeout; - int64_t query_interval; - char *eth_src; - char *ipv4_src; - int64_t query_max_response; - - struct hmap group_tnlids; - uint32_t group_tnlid_hint; - uint32_t active_flows; + + struct hmap group_tnlids; /* Group tunnel IDs in use on this DP. */ + uint32_t group_tnlid_hint; /* Hint for allocating next group tunnel ID. */ + struct ovs_list groups; /* List of groups learnt on this DP. */ + + union { + struct mcast_switch_info sw; /* Switch specific multicast info. */ + struct mcast_router_info rtr; /* Router specific multicast info. */ + }; }; static uint32_t @@ -559,6 +587,7 @@ ovn_datapath_create(struct hmap *datapaths, const struct uuid *key, } static void ovn_ls_port_group_destroy(struct hmap *nb_pgs); +static void destroy_mcast_info_for_datapath(struct ovn_datapath *od); static void ovn_datapath_destroy(struct hmap *datapaths, struct ovn_datapath *od) @@ -572,12 +601,7 @@ ovn_datapath_destroy(struct hmap *datapaths, struct ovn_datapath *od) bitmap_free(od->ipam_info.allocated_ipv4s); free(od->router_ports); ovn_ls_port_group_destroy(&od->nb_pgs); - - if (od->nbs) { - free(od->mcast_info.eth_src); - free(od->mcast_info.ipv4_src); - destroy_tnlids(&od->mcast_info.group_tnlids); - } + destroy_mcast_info_for_datapath(od); free(od); } @@ -714,23 +738,28 @@ init_ipam_info_for_datapath(struct ovn_datapath *od) } static void -init_mcast_info_for_datapath(struct ovn_datapath *od) +init_mcast_info_for_router_datapath(struct ovn_datapath *od) { - if (!od->nbs) { - return; - } + struct mcast_router_info *mcast_rtr_info = &od->mcast_info.rtr; - struct mcast_info *mcast_info = &od->mcast_info; + mcast_rtr_info->relay = smap_get_bool(&od->nbr->options, "mcast_relay", + false); +} - mcast_info->enabled = +static void +init_mcast_info_for_switch_datapath(struct ovn_datapath *od) +{ + struct mcast_switch_info *mcast_sw_info = &od->mcast_info.sw; + + mcast_sw_info->enabled = smap_get_bool(&od->nbs->other_config, "mcast_snoop", false); - mcast_info->querier = + mcast_sw_info->querier = smap_get_bool(&od->nbs->other_config, "mcast_querier", true); - mcast_info->flood_unregistered = + mcast_sw_info->flood_unregistered = smap_get_bool(&od->nbs->other_config, "mcast_flood_unregistered", false); - mcast_info->table_size = + mcast_sw_info->table_size = smap_get_ullong(&od->nbs->other_config, "mcast_table_size", OVN_MCAST_DEFAULT_MAX_ENTRIES); @@ -742,54 +771,94 @@ init_mcast_info_for_datapath(struct ovn_datapath *od) } else if (idle_timeout > OVN_MCAST_MAX_IDLE_TIMEOUT_S) { idle_timeout = OVN_MCAST_MAX_IDLE_TIMEOUT_S; } - mcast_info->idle_timeout = idle_timeout; + mcast_sw_info->idle_timeout = idle_timeout; uint32_t query_interval = smap_get_ullong(&od->nbs->other_config, "mcast_query_interval", - mcast_info->idle_timeout / 2); + mcast_sw_info->idle_timeout / 2); if (query_interval < OVN_MCAST_MIN_QUERY_INTERVAL_S) { query_interval = OVN_MCAST_MIN_QUERY_INTERVAL_S; } else if (query_interval > OVN_MCAST_MAX_QUERY_INTERVAL_S) { query_interval = OVN_MCAST_MAX_QUERY_INTERVAL_S; } - mcast_info->query_interval = query_interval; + mcast_sw_info->query_interval = query_interval; - mcast_info->eth_src = + mcast_sw_info->eth_src = nullable_xstrdup(smap_get(&od->nbs->other_config, "mcast_eth_src")); - mcast_info->ipv4_src = + mcast_sw_info->ipv4_src = nullable_xstrdup(smap_get(&od->nbs->other_config, "mcast_ip4_src")); - mcast_info->query_max_response = + mcast_sw_info->query_max_response = smap_get_ullong(&od->nbs->other_config, "mcast_query_max_response", OVN_MCAST_DEFAULT_QUERY_MAX_RESPONSE_S); - hmap_init(&mcast_info->group_tnlids); - mcast_info->group_tnlid_hint = OVN_MIN_IP_MULTICAST; - mcast_info->active_flows = 0; + mcast_sw_info->active_flows = 0; +} + +static void +init_mcast_info_for_datapath(struct ovn_datapath *od) +{ + if (!od->nbr && !od->nbs) { + return; + } + + hmap_init(&od->mcast_info.group_tnlids); + od->mcast_info.group_tnlid_hint = OVN_MIN_IP_MULTICAST; + ovs_list_init(&od->mcast_info.groups); + + if (od->nbs) { + init_mcast_info_for_switch_datapath(od); + } else { + init_mcast_info_for_router_datapath(od); + } +} + +static void +destroy_mcast_info_for_switch_datapath(struct ovn_datapath *od) +{ + struct mcast_switch_info *mcast_sw_info = &od->mcast_info.sw; + + free(mcast_sw_info->eth_src); + free(mcast_sw_info->ipv4_src); +} + +static void +destroy_mcast_info_for_datapath(struct ovn_datapath *od) +{ + if (!od->nbr && !od->nbs) { + return; + } + + if (od->nbs) { + destroy_mcast_info_for_switch_datapath(od); + } + + destroy_tnlids(&od->mcast_info.group_tnlids); } static void -store_mcast_info_for_datapath(const struct sbrec_ip_multicast *sb, - struct ovn_datapath *od) +store_mcast_info_for_switch_datapath(const struct sbrec_ip_multicast *sb, + struct ovn_datapath *od) { - struct mcast_info *mcast_info = &od->mcast_info; + struct mcast_switch_info *mcast_sw_info = &od->mcast_info.sw; sbrec_ip_multicast_set_datapath(sb, od->sb); - sbrec_ip_multicast_set_enabled(sb, &mcast_info->enabled, 1); - sbrec_ip_multicast_set_querier(sb, &mcast_info->querier, 1); - sbrec_ip_multicast_set_table_size(sb, &mcast_info->table_size, 1); - sbrec_ip_multicast_set_idle_timeout(sb, &mcast_info->idle_timeout, 1); + sbrec_ip_multicast_set_enabled(sb, &mcast_sw_info->enabled, 1); + sbrec_ip_multicast_set_querier(sb, &mcast_sw_info->querier, 1); + sbrec_ip_multicast_set_table_size(sb, &mcast_sw_info->table_size, 1); + sbrec_ip_multicast_set_idle_timeout(sb, &mcast_sw_info->idle_timeout, 1); sbrec_ip_multicast_set_query_interval(sb, - &mcast_info->query_interval, 1); + &mcast_sw_info->query_interval, 1); sbrec_ip_multicast_set_query_max_resp(sb, - &mcast_info->query_max_response, 1); + &mcast_sw_info->query_max_response, + 1); - if (mcast_info->eth_src) { - sbrec_ip_multicast_set_eth_src(sb, mcast_info->eth_src); + if (mcast_sw_info->eth_src) { + sbrec_ip_multicast_set_eth_src(sb, mcast_sw_info->eth_src); } - if (mcast_info->ipv4_src) { - sbrec_ip_multicast_set_ip4_src(sb, mcast_info->ipv4_src); + if (mcast_sw_info->ipv4_src) { + sbrec_ip_multicast_set_ip4_src(sb, mcast_sw_info->ipv4_src); } } @@ -906,6 +975,7 @@ join_datapaths(struct northd_context *ctx, struct hmap *datapaths, NULL, nbr, NULL); ovs_list_push_back(nb_only, &od->list); } + init_mcast_info_for_datapath(od); ovs_list_push_back(lr_list, &od->lr_list); } } @@ -1999,6 +2069,13 @@ join_logical_ports(struct northd_context *ctx, break; } } + + /* If the router is multicast enabled then set relay on the switch + * datapath. + */ + if (peer->od && peer->od->mcast_info.rtr.relay) { + op->od->mcast_info.sw.flood_relay = true; + } } else if (op->nbrp && op->nbrp->peer && !op->derived) { struct ovn_port *peer = ovn_port_find(ports, op->nbrp->peer); if (peer) { @@ -2846,6 +2923,10 @@ struct multicast_group { static const struct multicast_group mc_flood = { MC_FLOOD, OVN_MCAST_FLOOD_TUNNEL_KEY }; +#define MC_MROUTER_FLOOD "_MC_mrouter_flood" +static const struct multicast_group mc_mrouter_flood = + { MC_MROUTER_FLOOD, OVN_MCAST_MROUTER_FLOOD_TUNNEL_KEY }; + #define MC_UNKNOWN "_MC_unknown" static const struct multicast_group mc_unknown = { MC_UNKNOWN, OVN_MCAST_UNKNOWN_TUNNEL_KEY }; @@ -2955,7 +3036,8 @@ ovn_multicast_update_sbrec(const struct ovn_multicast *mc, */ struct ovn_igmp_group_entry { struct ovs_list list_node; /* Linkage in the list of entries. */ - const struct sbrec_igmp_group *sb; + size_t n_ports; + struct ovn_port **ports; }; /* @@ -2964,12 +3046,13 @@ struct ovn_igmp_group_entry { */ struct ovn_igmp_group { struct hmap_node hmap_node; /* Index on 'datapath' and 'address'. */ + struct ovs_list list_node; /* Linkage in the per-dp igmp group list. */ struct ovn_datapath *datapath; struct in6_addr address; /* Multicast IPv6-mapped-IPv4 or IPv4 address. */ struct multicast_group mcgroup; - struct ovs_list sb_entries; /* List of SB entries for this group. */ + struct ovs_list entries; /* List of SB entries for this group. */ }; static uint32_t @@ -2997,79 +3080,135 @@ ovn_igmp_group_find(struct hmap *igmp_groups, return NULL; } -static void +static struct ovn_igmp_group * ovn_igmp_group_add(struct northd_context *ctx, struct hmap *igmp_groups, struct ovn_datapath *datapath, - const struct sbrec_igmp_group *sb_igmp_group) + const struct in6_addr *address, + const char *address_s) { - struct in6_addr group_address; - ovs_be32 ipv4; - - if (ip_parse(sb_igmp_group->address, &ipv4)) { - group_address = in6_addr_mapped_ipv4(ipv4); - } else if (!ipv6_parse(sb_igmp_group->address, &group_address)) { - static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); - VLOG_WARN_RL(&rl, "invalid IGMP group address: %s", - sb_igmp_group->address); - return; - } - struct ovn_igmp_group *igmp_group = - ovn_igmp_group_find(igmp_groups, datapath, &group_address); + ovn_igmp_group_find(igmp_groups, datapath, address); if (!igmp_group) { igmp_group = xmalloc(sizeof *igmp_group); const struct sbrec_multicast_group *mcgroup = - mcast_group_lookup(ctx->sbrec_mcast_group_by_name_dp, - sb_igmp_group->address, datapath->sb); + mcast_group_lookup(ctx->sbrec_mcast_group_by_name_dp, address_s, + datapath->sb); igmp_group->datapath = datapath; - igmp_group->address = group_address; + igmp_group->address = *address; if (mcgroup) { igmp_group->mcgroup.key = mcgroup->tunnel_key; add_tnlid(&datapath->mcast_info.group_tnlids, mcgroup->tunnel_key); } else { igmp_group->mcgroup.key = 0; } - igmp_group->mcgroup.name = sb_igmp_group->address; - ovs_list_init(&igmp_group->sb_entries); + igmp_group->mcgroup.name = address_s; + ovs_list_init(&igmp_group->entries); hmap_insert(igmp_groups, &igmp_group->hmap_node, - ovn_igmp_group_hash(datapath, &group_address)); + ovn_igmp_group_hash(datapath, address)); + ovs_list_push_back(&datapath->mcast_info.groups, + &igmp_group->list_node); + } + + return igmp_group; +} + +static bool +ovn_igmp_group_get_address(const struct sbrec_igmp_group *sb_igmp_group, + struct in6_addr *address) +{ + ovs_be32 ipv4; + + if (ip_parse(sb_igmp_group->address, &ipv4)) { + *address = in6_addr_mapped_ipv4(ipv4); + return true; + } + if (!ipv6_parse(sb_igmp_group->address, address)) { + return false; } + return true; +} +static struct ovn_port ** +ovn_igmp_group_get_ports(const struct sbrec_igmp_group *sb_igmp_group, + size_t *n_ports, struct hmap *ovn_ports) +{ + struct ovn_port **ports = xmalloc(sb_igmp_group->n_ports * sizeof *ports); + + *n_ports = 0; + for (size_t i = 0; i < sb_igmp_group->n_ports; i++) { + ports[(*n_ports)] = + ovn_port_find(ovn_ports, sb_igmp_group->ports[i]->logical_port); + if (ports[(*n_ports)]) { + (*n_ports)++; + } + } + + return ports; +} + +static void +ovn_igmp_group_add_entry(struct ovn_igmp_group *igmp_group, + struct ovn_port **ports, size_t n_ports) +{ struct ovn_igmp_group_entry *entry = xmalloc(sizeof *entry); - entry->sb = sb_igmp_group; - ovs_list_push_back(&igmp_group->sb_entries , &entry->list_node); + entry->ports = ports; + entry->n_ports = n_ports; + ovs_list_push_back(&igmp_group->entries, &entry->list_node); +} + +static void +ovn_igmp_group_destroy_entry(struct ovn_igmp_group_entry *entry) +{ + free(entry->ports); +} + +static bool +ovn_igmp_group_allocate_id(struct ovn_igmp_group *igmp_group) +{ + if (igmp_group->mcgroup.key == 0) { + struct mcast_info *mcast_info = &igmp_group->datapath->mcast_info; + igmp_group->mcgroup.key = ovn_mcast_group_allocate_key(mcast_info); + } + + if (igmp_group->mcgroup.key == 0) { + return false; + } + + return true; } static void ovn_igmp_group_aggregate_ports(struct ovn_igmp_group *igmp_group, - struct hmap *ovn_ports, struct hmap *mcast_groups) { struct ovn_igmp_group_entry *entry; - LIST_FOR_EACH_POP (entry, list_node, &igmp_group->sb_entries) { - size_t n_oports = 0; - struct ovn_port **oports = - xmalloc(entry->sb->n_ports * sizeof *oports); - - for (size_t i = 0; i < entry->sb->n_ports; i++) { - oports[n_oports] = - ovn_port_find(ovn_ports, entry->sb->ports[i]->logical_port); - if (oports[n_oports]) { - n_oports++; - } - } - + LIST_FOR_EACH_POP (entry, list_node, &igmp_group->entries) { ovn_multicast_add_ports(mcast_groups, igmp_group->datapath, - &igmp_group->mcgroup, oports, n_oports); - free(oports); + &igmp_group->mcgroup, entry->ports, + entry->n_ports); + + ovn_igmp_group_destroy_entry(entry); free(entry); } + + /* Add ports to mrouters to the IGMP group. Traffic might need to be + * routed by the mrouter. + */ + for (size_t i = 0; i < igmp_group->datapath->n_router_ports; i++) { + struct ovn_port *rtr_port = + igmp_group->datapath->router_ports[i]->peer; + + if (rtr_port->od && rtr_port->od->mcast_info.rtr.relay) { + ovn_multicast_add(mcast_groups, &igmp_group->mcgroup, + igmp_group->datapath->router_ports[i]); + } + } } static void @@ -3079,10 +3218,12 @@ ovn_igmp_group_destroy(struct hmap *igmp_groups, if (igmp_group) { struct ovn_igmp_group_entry *entry; - LIST_FOR_EACH_POP (entry, list_node, &igmp_group->sb_entries) { + LIST_FOR_EACH_POP (entry, list_node, &igmp_group->entries) { + ovn_igmp_group_destroy_entry(entry); free(entry); } hmap_remove(igmp_groups, &igmp_group->hmap_node); + ovs_list_remove(&igmp_group->list_node); free(igmp_group); } } @@ -5282,7 +5423,9 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, continue; } - if (od->mcast_info.enabled) { + struct mcast_switch_info *mcast_sw_info = &od->mcast_info.sw; + + if (mcast_sw_info->enabled) { /* Punt IGMP traffic to controller. */ ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 100, "ip4 && ip.proto == 2", "igmp;"); @@ -5295,9 +5438,16 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, "outport = \""MC_FLOOD"\"; output;"); /* Drop unregistered IP multicast if not allowed. */ - if (!od->mcast_info.flood_unregistered) { - ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 80, - "ip4 && ip4.mcast", "drop;"); + if (!mcast_sw_info->flood_unregistered) { + /* Forward unregistered IP multicast to mrouter (if any). */ + if (mcast_sw_info->flood_relay) { + ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 80, + "ip4 && ip4.mcast", + "outport = \""MC_MROUTER_FLOOD"\"; output;"); + } else { + ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 80, + "ip4 && ip4.mcast", "drop;"); + } } } @@ -5314,12 +5464,13 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, continue; } - struct mcast_info *mcast_info = &igmp_group->datapath->mcast_info; + struct mcast_switch_info *mcast_sw_info = + &igmp_group->datapath->mcast_info.sw; - if (mcast_info->active_flows >= mcast_info->table_size) { + if (mcast_sw_info->active_flows >= mcast_sw_info->table_size) { continue; } - mcast_info->active_flows++; + mcast_sw_info->active_flows++; ds_clear(&match); ds_clear(&actions); @@ -6201,11 +6352,10 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, continue; } - /* L3 admission control: drop multicast and broadcast source, localhost - * source or destination, and zero network source or destination + /* L3 admission control: drop broadcast source, localhost source or + * destination, and zero network source or destination * (priority 100). */ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 100, - "ip4.mcast || " "ip4.src == 255.255.255.255 || " "ip4.src == 127.0.0.0/8 || " "ip4.dst == 127.0.0.0/8 || " @@ -6213,6 +6363,27 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, "ip4.dst == 0.0.0.0/8", "drop;"); + /* Allow known multicast if enabled (priority 96). */ + if (od->mcast_info.rtr.relay) { + struct ovn_igmp_group *igmp_group; + + LIST_FOR_EACH (igmp_group, list_node, &od->mcast_info.groups) { + ds_clear(&match); + ds_clear(&actions); + ds_put_format(&match, "ip4 && ip4.dst == %s ", + igmp_group->mcgroup.name); + ds_put_format(&actions, "outport = \"%s\"; next;", + igmp_group->mcgroup.name); + + ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 96, + ds_cstr(&match), ds_cstr(&actions)); + } + } + + /* Drop all unknown IP multicast (priority 95). */ + ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 95, + "ip4.mcast", "drop;"); + /* ARP reply handling. Use ARP replies to populate the logical * router's ARP table. */ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 90, "arp.op == 2", @@ -7483,6 +7654,17 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, } } + /* IP Multicast routing is decided in S_ROUTER_IN_IP_INPUT. Here we + * just adjust TTL and advance to next table (priority 500). + */ + HMAP_FOR_EACH (od, key_node, datapaths) { + if (!od->nbr) { + continue; + } + ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 500, + "ip4.mcast", "ip.ttl--; next;"); + } + /* Logical router ingress table 8: Policy. * * A packet that arrives at this table is an IP packet that should be @@ -7513,10 +7695,24 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, /* Local router ingress table 9: ARP Resolution. * - * Any packet that reaches this table is an IP packet whose next-hop IP - * address is in reg0. (ip4.dst is the final destination.) This table - * resolves the IP address in reg0 into an output port in outport and an - * Ethernet address in eth.dst. */ + * Multicast packets already have the outport set so just advance to next + * table (priority 500). */ + HMAP_FOR_EACH (od, key_node, datapaths) { + if (!od->nbr) { + continue; + } + + ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 500, + "ip4.mcast", "next;"); + } + + /* Local router ingress table 9: ARP Resolution. + * + * Any unicast packet that reaches this table is an IP packet whose + * next-hop IP address is in reg0. (ip4.dst is the final destination.) + * This table resolves the IP address in reg0 into an output port in + * outport and an Ethernet address in eth.dst. + */ HMAP_FOR_EACH (op, key_node, ports) { if (op->nbsp && !lsp_is_enabled(op->nbsp)) { continue; @@ -7998,9 +8194,13 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 0, "1", "output;"); } - /* Logical router egress table 1: Delivery (priority 100). + /* Logical router egress table 1: Delivery (priority 100-110). * - * Priority 100 rules deliver packets to enabled logical ports. */ + * Priority 100 rules deliver packets to enabled logical ports. + * Priority 110 rules match multicast packets and update the source + * mac before delivering to enabled logical ports. IP multicast traffic + * bypasses S_ROUTER_IN_IP_ROUTING route lookups. + */ HMAP_FOR_EACH (op, key_node, ports) { if (!op->nbrp) { continue; @@ -8020,6 +8220,19 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, continue; } + /* If multicast relay is enabled then also adjust source mac for IP + * multicast traffic. + */ + if (op->od->mcast_info.rtr.relay) { + ds_clear(&match); + ds_clear(&actions); + ds_put_format(&match, "ip4.mcast && outport == %s", op->json_key); + ds_put_format(&actions, "eth.src = %s; output;", + op->lrp_networks.ea_s); + ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 110, + ds_cstr(&match), ds_cstr(&actions)); + } + ds_clear(&match); ds_put_format(&match, "outport == %s", op->json_key); ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 100, @@ -8570,7 +8783,7 @@ build_ip_mcast(struct northd_context *ctx, struct hmap *datapaths) if (!ip_mcast) { ip_mcast = sbrec_ip_multicast_insert(ctx->ovnsb_txn); } - store_mcast_info_for_datapath(ip_mcast, od); + store_mcast_info_for_switch_datapath(ip_mcast, od); } /* Delete southbound records without northbound matches. */ @@ -8602,6 +8815,14 @@ build_mcast_groups(struct northd_context *ctx, if (lsp_is_enabled(op->nbsp)) { ovn_multicast_add(mcast_groups, &mc_flood, op); + + /* If this port is connected to a multicast router then add it + * to the MC_MROUTER_FLOOD group. + */ + if (op->od->mcast_info.sw.flood_relay && op->peer && + op->peer->od && op->peer->od->mcast_info.rtr.relay) { + ovn_multicast_add(mcast_groups, &mc_mrouter_flood, op); + } } } @@ -8624,10 +8845,61 @@ build_mcast_groups(struct northd_context *ctx, continue; } + struct in6_addr group_address; + if (!ovn_igmp_group_get_address(sb_igmp, &group_address)) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); + VLOG_WARN_RL(&rl, "invalid IGMP group address: %s", + sb_igmp->address); + continue; + } + /* Add the IGMP group entry. Will also try to allocate an ID for it * if the multicast group already exists. */ - ovn_igmp_group_add(ctx, igmp_groups, od, sb_igmp); + struct ovn_igmp_group *igmp_group = + ovn_igmp_group_add(ctx, igmp_groups, od, &group_address, + sb_igmp->address); + + /* Extract the IGMP group ports from the SB entry and store them + * in the IGMP group. + */ + size_t n_igmp_ports; + struct ovn_port **igmp_ports = + ovn_igmp_group_get_ports(sb_igmp, &n_igmp_ports, ports); + ovn_igmp_group_add_entry(igmp_group, igmp_ports, n_igmp_ports); + } + + /* Build IGMP groups for multicast routers with relay enabled. The router + * IGMP groups are based on the groups learnt by their multicast enabled + * peers. + */ + struct ovn_datapath *od; + HMAP_FOR_EACH (od, key_node, datapaths) { + + if (ovs_list_is_empty(&od->mcast_info.groups)) { + continue; + } + + for (size_t i = 0; i < od->n_router_ports; i++) { + struct ovn_port *router_port = od->router_ports[i]->peer; + + if (!router_port || !router_port->od || + !router_port->od->mcast_info.rtr.relay) { + continue; + } + + struct ovn_igmp_group *igmp_group; + LIST_FOR_EACH (igmp_group, list_node, &od->mcast_info.groups) { + struct ovn_igmp_group *igmp_group_rtr = + ovn_igmp_group_add(ctx, igmp_groups, router_port->od, + &igmp_group->address, + igmp_group->mcgroup.name); + struct ovn_port **router_igmp_ports = + xmalloc(sizeof *router_igmp_ports); + router_igmp_ports[0] = router_port; + ovn_igmp_group_add_entry(igmp_group_rtr, router_igmp_ports, 1); + } + } } /* Walk the aggregated IGMP groups and allocate IDs for new entries. @@ -8635,21 +8907,17 @@ build_mcast_groups(struct northd_context *ctx, */ struct ovn_igmp_group *igmp_group, *igmp_group_next; HMAP_FOR_EACH_SAFE (igmp_group, igmp_group_next, hmap_node, igmp_groups) { - if (igmp_group->mcgroup.key == 0) { - struct mcast_info *mcast_info = &igmp_group->datapath->mcast_info; - igmp_group->mcgroup.key = ovn_mcast_group_allocate_key(mcast_info); - } - /* If we ran out of keys just destroy the entry. */ - if (igmp_group->mcgroup.key == 0) { + if (!ovn_igmp_group_allocate_id(igmp_group)) { + /* If we ran out of keys just destroy the entry. */ ovn_igmp_group_destroy(igmp_groups, igmp_group); continue; } - /* Aggregate the ports from all SB entries corresponding to this + /* Aggregate the ports from all entries corresponding to this * group. */ - ovn_igmp_group_aggregate_ports(igmp_group, ports, mcast_groups); + ovn_igmp_group_aggregate_ports(igmp_group, mcast_groups); } } diff --git a/ovn-nb.xml b/ovn-nb.xml index f5f10a5..db8cc20 100644 --- a/ovn-nb.xml +++ b/ovn-nb.xml @@ -1526,6 +1526,12 @@ address. </p> </column> + <column name="options" key="mcast_relay" type'{"type": "boolean"}'> + <p> + Enables/disables IP multicast relay between logical switches + connected to the logical router. Default: False. + </p> + </column> </group> <group title="Common Columns"> diff --git a/tests/ovn.at b/tests/ovn.at index 71eb390..3187419 100644 --- a/tests/ovn.at +++ b/tests/ovn.at @@ -14721,12 +14721,12 @@ AT_CHECK([ovn-sbctl get controller_event $uuid seq_num], [0], [dnl OVN_CLEANUP([hv1], [hv2]) AT_CLEANUP -AT_SETUP([ovn -- IGMP snoop/querier]) +AT_SETUP([ovn -- IGMP snoop/querier/relay]) AT_SKIP_IF([test $HAVE_PYTHON = no]) ovn_start # Logical network: -# Two independent logical switches (sw1 and sw2). +# Three logical switches (sw1-sw3) connected to a logical router (rtr). # sw1: # - subnet 10.0.0.0/8 # - 2 ports bound on hv1 (sw1-p11, sw1-p12) @@ -14736,6 +14736,10 @@ ovn_start # - 1 port bound on hv1 (sw2-p1) # - 1 port bound on hv2 (sw2-p2) # - IGMP Querier from 20.0.0.254 +# sw3: +# - subnet 30.0.0.0/8 +# - 1 port bound on hv1 (sw3-p1) +# - 1 port bound on hv2 (sw3-p2) reset_pcap_file() { local iface=$1 @@ -14812,29 +14816,47 @@ store_igmp_v3_query() { } # -# send_ip_multicast_pkt INPORT HV ETH_SRC ETH_DST IP_SRC IP_DST IP_LEN -# IP_PROTO DATA OUTFILE +# send_ip_multicast_pkt INPORT HV ETH_SRC ETH_DST IP_SRC IP_DST IP_LEN TTL +# IP_CHKSUM IP_PROTO DATA # # This shell function causes an IP multicast packet to be received on INPORT # of HV. # The hexdump of the packet is stored in OUTFILE. # send_ip_multicast_pkt() { - local inport=$1 hv=$2 eth_src=$3 eth_dst=$4 ip_src=$5 ip_dst=$6 - local ip_len=$7 ip_chksum=$8 proto=$9 data=${10} outfile=${11} - - local ip_ttl=20 + local inport=$1 hv=$2 eth_src=$3 eth_dst=$4 + local ip_src=$5 ip_dst=$6 ip_len=$7 ip_ttl=$8 ip_chksum=$9 proto=${10} + local data=${11} local eth=${eth_dst}${eth_src}0800 local ip=450000${ip_len}95f14000${ip_ttl}${proto}${ip_chksum}${ip_src}${ip_dst} local packet=${eth}${ip}${data} as $hv ovs-appctl netdev-dummy/receive ${inport} ${packet} +} + +# +# store_ip_multicast_pkt ETH_SRC ETH_DST IP_SRC IP_DST IP_LEN TTL +# IP_CHKSUM IP_PROTO DATA OUTFILE +# +# This shell builds an IP multicast packet and stores the hexdump of the +# packet in OUTFILE. +# +store_ip_multicast_pkt() { + local eth_src=$1 eth_dst=$2 + local ip_src=$3 ip_dst=$4 ip_len=$5 ip_ttl=$6 ip_chksum=$7 proto=$8 + local data=$9 outfile=${10} + + local eth=${eth_dst}${eth_src}0800 + local ip=450000${ip_len}95f14000${ip_ttl}${proto}${ip_chksum}${ip_src}${ip_dst} + local packet=${eth}${ip}${data} + echo ${packet} >> ${outfile} } ovn-nbctl ls-add sw1 ovn-nbctl ls-add sw2 +ovn-nbctl ls-add sw3 ovn-nbctl lsp-add sw1 sw1-p11 ovn-nbctl lsp-add sw1 sw1-p12 @@ -14842,6 +14864,26 @@ ovn-nbctl lsp-add sw1 sw1-p21 ovn-nbctl lsp-add sw1 sw1-p22 ovn-nbctl lsp-add sw2 sw2-p1 ovn-nbctl lsp-add sw2 sw2-p2 +ovn-nbctl lsp-add sw3 sw3-p1 +ovn-nbctl lsp-add sw3 sw3-p2 + +ovn-nbctl lr-add rtr +ovn-nbctl lrp-add rtr rtr-sw1 00:00:00:00:01:00 10.0.0.254/24 +ovn-nbctl lrp-add rtr rtr-sw2 00:00:00:00:02:00 20.0.0.254/24 +ovn-nbctl lrp-add rtr rtr-sw3 00:00:00:00:03:00 30.0.0.254/24 + +ovn-nbctl lsp-add sw1 sw1-rtr \ + -- lsp-set-type sw1-rtr router \ + -- lsp-set-addresses sw1-rtr 00:00:00:00:01:00 \ + -- lsp-set-options sw1-rtr router-port=rtr-sw1 +ovn-nbctl lsp-add sw2 sw2-rtr \ + -- lsp-set-type sw2-rtr router \ + -- lsp-set-addresses sw2-rtr 00:00:00:00:02:00 \ + -- lsp-set-options sw2-rtr router-port=rtr-sw2 +ovn-nbctl lsp-add sw3 sw3-rtr \ + -- lsp-set-type sw3-rtr router \ + -- lsp-set-addresses sw3-rtr 00:00:00:00:03:00 \ + -- lsp-set-options sw3-rtr router-port=rtr-sw3 net_add n1 sim_add hv1 @@ -14863,6 +14905,11 @@ ovs-vsctl -- add-port br-int hv1-vif3 -- \ options:tx_pcap=hv1/vif3-tx.pcap \ options:rxq_pcap=hv1/vif3-rx.pcap \ ofport-request=1 +ovs-vsctl -- add-port br-int hv1-vif4 -- \ + set interface hv1-vif4 external-ids:iface-id=sw3-p1 \ + options:tx_pcap=hv1/vif4-tx.pcap \ + options:rxq_pcap=hv1/vif4-rx.pcap \ + ofport-request=1 sim_add hv2 as hv2 @@ -14883,12 +14930,18 @@ ovs-vsctl -- add-port br-int hv2-vif3 -- \ options:tx_pcap=hv2/vif3-tx.pcap \ options:rxq_pcap=hv2/vif3-rx.pcap \ ofport-request=1 +ovs-vsctl -- add-port br-int hv2-vif4 -- \ + set interface hv2-vif4 external-ids:iface-id=sw3-p2 \ + options:tx_pcap=hv2/vif4-tx.pcap \ + options:rxq_pcap=hv2/vif4-rx.pcap \ + ofport-request=1 OVN_POPULATE_ARP # Enable IGMP snooping on sw1. -ovn-nbctl set Logical_Switch sw1 other_config:mcast_querier="false" -ovn-nbctl set Logical_Switch sw1 other_config:mcast_snoop="true" +ovn-nbctl set Logical_Switch sw1 \ + other_config:mcast_querier="false" \ + other_config:mcast_snoop="true" # No IGMP query should be generated by sw1 (mcast_querier="false"). truncate -s 0 expected @@ -14921,9 +14974,12 @@ truncate -s 0 expected truncate -s 0 expected_empty send_ip_multicast_pkt hv1-vif2 hv1 \ 000000000001 01005e000144 \ - $(ip_to_hex 10 0 0 42) $(ip_to_hex 239 0 1 68) 1e ca70 11 \ - e518e518000a3b3a0000 \ - expected + $(ip_to_hex 10 0 0 42) $(ip_to_hex 239 0 1 68) 1e 20 ca70 11 \ + e518e518000a3b3a0000 +store_ip_multicast_pkt \ + 000000000001 01005e000144 \ + $(ip_to_hex 10 0 0 42) $(ip_to_hex 239 0 1 68) 1e 20 ca70 11 \ + e518e518000a3b3a0000 expected OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [expected]) OVN_CHECK_PACKETS([hv2/vif1-tx.pcap], [expected]) @@ -14944,17 +15000,19 @@ OVS_WAIT_UNTIL([ test "${total_entries}" = "1" ]) -# Send traffic traffic and make sure it gets forwarded only on the port that -# joined. +# Send traffic and make sure it gets forwarded only on the port that joined. as hv1 reset_pcap_file hv1-vif1 hv1/vif1 as hv2 reset_pcap_file hv2-vif1 hv2/vif1 truncate -s 0 expected truncate -s 0 expected_empty send_ip_multicast_pkt hv1-vif2 hv1 \ 000000000001 01005e000144 \ - $(ip_to_hex 10 0 0 42) $(ip_to_hex 239 0 1 68) 1e ca70 11 \ - e518e518000a3b3a0000 \ - expected + $(ip_to_hex 10 0 0 42) $(ip_to_hex 239 0 1 68) 1e 20 ca70 11 \ + e518e518000a3b3a0000 +store_ip_multicast_pkt \ + 000000000001 01005e000144 \ + $(ip_to_hex 10 0 0 42) $(ip_to_hex 239 0 1 68) 1e 20 ca70 11 \ + e518e518000a3b3a0000 expected OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [expected_empty]) OVN_CHECK_PACKETS([hv2/vif1-tx.pcap], [expected]) @@ -14988,6 +15046,111 @@ sleep 1 OVN_CHECK_PACKETS([hv1/vif3-tx.pcap], [expected]) OVN_CHECK_PACKETS([hv2/vif3-tx.pcap], [expected]) +# Dissable IGMP querier on sw2. +ovn-nbctl set Logical_Switch sw2 \ + other_config:mcast_querier="false" + +# Enable IGMP snooping on sw3. +ovn-nbctl set Logical_Switch sw3 \ + other_config:mcast_querier="false" \ + other_config:mcast_snoop="true" + +# Send traffic from sw3 and make sure rtr doesn't relay it. +truncate -s 0 expected_empty + +as hv1 reset_pcap_file hv1-vif1 hv1/vif1 +as hv1 reset_pcap_file hv1-vif2 hv1/vif2 +as hv1 reset_pcap_file hv1-vif3 hv1/vif3 +as hv1 reset_pcap_file hv1-vif4 hv1/vif4 +as hv2 reset_pcap_file hv2-vif1 hv2/vif1 +as hv2 reset_pcap_file hv2-vif2 hv2/vif2 +as hv2 reset_pcap_file hv2-vif3 hv2/vif3 +as hv2 reset_pcap_file hv2-vif4 hv2/vif4 + +send_ip_multicast_pkt hv2-vif4 hv2 \ + 000000000001 01005e000144 \ + $(ip_to_hex 10 0 0 42) $(ip_to_hex 239 0 1 68) 1e 20 ca70 11 \ + e518e518000a3b3a0000 + +# Sleep a bit to make sure no traffic is received and then check. +sleep 1 +OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [expected_empty]) +OVN_CHECK_PACKETS([hv2/vif3-tx.pcap], [expected_empty]) +OVN_CHECK_PACKETS([hv1/vif4-tx.pcap], [expected_empty]) +OVN_CHECK_PACKETS([hv1/vif2-tx.pcap], [expected_empty]) +OVN_CHECK_PACKETS([hv1/vif3-tx.pcap], [expected_empty]) +OVN_CHECK_PACKETS([hv2/vif1-tx.pcap], [expected_empty]) +OVN_CHECK_PACKETS([hv2/vif2-tx.pcap], [expected_empty]) +OVN_CHECK_PACKETS([hv2/vif4-tx.pcap], [expected_empty]) + +# Enable IGMP relay on rtr +ovn-nbctl set logical_router rtr \ + options:mcast_relay="true" + +# Inject IGMP Join for 239.0.1.68 on sw1-p11. +send_igmp_v3_report hv1-vif1 hv1 \ + 000000000001 $(ip_to_hex 10 0 0 1) f9f8 \ + $(ip_to_hex 239 0 1 68) 04 e9b9 \ + /dev/null +# Inject IGMP Join for 239.0.1.68 on sw2-p2. +send_igmp_v3_report hv2-vif3 hv2 \ + 000000000001 $(ip_to_hex 10 0 0 1) f9f8 \ + $(ip_to_hex 239 0 1 68) 04 e9b9 \ + /dev/null +# Inject IGMP Join for 239.0.1.68 on sw3-p1. +send_igmp_v3_report hv1-vif4 hv1 \ + 000000000001 $(ip_to_hex 10 0 0 1) f9f8 \ + $(ip_to_hex 239 0 1 68) 04 e9b9 \ + /dev/null + +# Check that the IGMP Group is learned by all switches. +OVS_WAIT_UNTIL([ + total_entries=`ovn-sbctl find IGMP_Group | grep "239.0.1.68" | wc -l` + test "${total_entries}" = "3" +]) + +# Send traffic from sw3 and make sure it is relayed by rtr. +# and ports that joined. +truncate -s 0 expected_routed_sw1 +truncate -s 0 expected_routed_sw2 +truncate -s 0 expected_switched +truncate -s 0 expected_empty + +as hv1 reset_pcap_file hv1-vif1 hv1/vif1 +as hv1 reset_pcap_file hv1-vif2 hv1/vif2 +as hv1 reset_pcap_file hv1-vif3 hv1/vif3 +as hv1 reset_pcap_file hv1-vif4 hv1/vif4 +as hv2 reset_pcap_file hv2-vif1 hv2/vif1 +as hv2 reset_pcap_file hv2-vif2 hv2/vif2 +as hv2 reset_pcap_file hv2-vif3 hv2/vif3 +as hv2 reset_pcap_file hv2-vif4 hv2/vif4 + +send_ip_multicast_pkt hv2-vif4 hv2 \ + 000000000001 01005e000144 \ + $(ip_to_hex 10 0 0 42) $(ip_to_hex 239 0 1 68) 1e 20 ca70 11 \ + e518e518000a3b3a0000 +store_ip_multicast_pkt \ + 000000000100 01005e000144 \ + $(ip_to_hex 10 0 0 42) $(ip_to_hex 239 0 1 68) 1e 1f cb70 11 \ + e518e518000a3b3a0000 expected_routed_sw1 +store_ip_multicast_pkt \ + 000000000200 01005e000144 \ + $(ip_to_hex 10 0 0 42) $(ip_to_hex 239 0 1 68) 1e 1f cb70 11 \ + e518e518000a3b3a0000 expected_routed_sw2 +store_ip_multicast_pkt \ + 000000000001 01005e000144 \ + $(ip_to_hex 10 0 0 42) $(ip_to_hex 239 0 1 68) 1e 20 ca70 11 \ + e518e518000a3b3a0000 expected_switched + +OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [expected_routed_sw1]) +OVN_CHECK_PACKETS([hv2/vif3-tx.pcap], [expected_routed_sw2]) +OVN_CHECK_PACKETS([hv1/vif4-tx.pcap], [expected_switched]) +OVN_CHECK_PACKETS([hv1/vif2-tx.pcap], [expected_empty]) +OVN_CHECK_PACKETS([hv1/vif3-tx.pcap], [expected_empty]) +OVN_CHECK_PACKETS([hv2/vif1-tx.pcap], [expected_empty]) +OVN_CHECK_PACKETS([hv2/vif2-tx.pcap], [expected_empty]) +OVN_CHECK_PACKETS([hv2/vif4-tx.pcap], [expected_empty]) + OVN_CLEANUP([hv1], [hv2]) AT_CLEANUP -- 1.8.3.1 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev