On 7/20/22 10:13, Ales Musil wrote: > Hi Dumitru, > Hi Ales,
> the code looks good to me, with one small nit see below. > Thanks for the review! > On Tue, Jul 19, 2022 at 10:43 PM Dumitru Ceara <dce...@redhat.com> wrote: > >> RFC 4541 "Considerations for Internet Group Management Protocol (IGMP) >> and Multicast Listener Discovery (MLD) Snooping Switches" [0] states: >> >> For IGMP packets: >> 2.1.1. IGMP Forwarding Rules >> 1) A snooping switch should forward IGMP Membership Reports only to >> those ports where multicast routers are attached. >> >> Alternatively stated: a snooping switch should not forward IGMP >> Membership Reports to ports on which only hosts are attached. An >> administrative control may be provided to override this >> restriction, allowing the report messages to be flooded to other >> ports. >> >> This is the main IGMP snooping functionality for the control >> path. >> >> Sending membership reports to other hosts can result, for IGMPv1 >> and IGMPv2, in unintentionally preventing a host from joining a >> specific multicast group. >> >> When an IGMPv1 or IGMPv2 host receives a membership report for a >> group address that it intends to join, the host will suppress its >> own membership report for the same group. This join or message >> suppression is a requirement for IGMPv1 and IGMPv2 hosts. >> However, if a switch does not receive a membership report from >> the >> host it will not forward multicast data to it. >> >> In OVN this translates to forwarding reports only on: >> a. ports where mrouters have been learned (IGMP queries were received on >> those ports) >> b. ports connecting a multicast enabled logical switch to a multicast >> relay enabled logical router (OVN mrouter) >> c. ports configured with mcast_flood_reports=true >> >> 2.1.2. Data Forwarding Rules >> >> 1) Packets with a destination IP address outside 224.0.0.X which are >> not IGMP should be forwarded according to group-based port >> membership tables and must also be forwarded on router ports. >> >> In OVN this translates to forwarding traffic for a group G to: >> a. all ports where G was learned >> b. all ports where an (external or OVN) mrouter was learned. >> c. all ports configured with mcast_flood=true >> >> IGMP queries are already snooped by ovn-controller. Just propagate the >> information about where mrouters are to the Southbound DB through means >> of a custom IGMP_Group (name="mrouters"). >> >> Snooped queries are then re-injected in the OVS pipeline with outport >> set to MC_FLOOD_L2 (only flood them in the L2 domain). >> >> Snooped reports are re-injected in the OVS pipeline with outport set to >> MC_MROUTER_FLOOD (flood them to snooped mrouters and OVN mrouters). >> >> The same behavior applies to IPv6 too (MLD). >> >> [0] https://datatracker.ietf.org/doc/html/rfc4541 >> >> Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1933990 >> Reported-at: https://github.com/ovn-org/ovn/issues/126 >> Acked-by: Numan Siddique <num...@ovn.org> >> Signed-off-by: Dumitru Ceara <dce...@redhat.com> >> --- >> V2: >> - Added Numan's ack. >> --- >> controller/ip-mcast.c | 129 ++++++++++++++++++++++++++++++++----- >> controller/ip-mcast.h | 14 ++++ >> controller/pinctrl.c | 129 +++++++++++++++++++++++++++++++++---- >> lib/ip-mcast-index.h | 5 + >> lib/mcast-group-index.h | 4 - >> northd/northd.c | 54 +++++++++++---- >> northd/ovn-northd.8.xml | 6 -- >> tests/ovn-macros.at | 25 +++++++ >> tests/ovn.at | 164 >> ++++++++++++++++++++++++++++++++++++++++++++++- >> 9 files changed, 472 insertions(+), 58 deletions(-) >> >> diff --git a/controller/ip-mcast.c b/controller/ip-mcast.c >> index 9b0b4465a..a870fb29e 100644 >> --- a/controller/ip-mcast.c >> +++ b/controller/ip-mcast.c >> @@ -16,6 +16,7 @@ >> #include <config.h> >> >> #include "ip-mcast.h" >> +#include "ip-mcast-index.h" >> #include "lport.h" >> #include "lib/ovn-sb-idl.h" >> >> @@ -27,6 +28,18 @@ struct igmp_group_port { >> const struct sbrec_port_binding *port; >> }; >> >> +static const struct sbrec_igmp_group * >> +igmp_group_lookup_(struct ovsdb_idl_index *igmp_groups, >> + const char *addr_str, >> + const struct sbrec_datapath_binding *datapath, >> + const struct sbrec_chassis *chassis); >> + >> +static struct sbrec_igmp_group * >> +igmp_group_create_(struct ovsdb_idl_txn *idl_txn, >> + const char *addr_str, >> + const struct sbrec_datapath_binding *datapath, >> + const struct sbrec_chassis *chassis); >> + >> struct ovsdb_idl_index * >> igmp_group_index_create(struct ovsdb_idl *idl) >> { >> @@ -54,17 +67,16 @@ igmp_group_lookup(struct ovsdb_idl_index *igmp_groups, >> return NULL; >> } >> >> - struct sbrec_igmp_group *target = >> - sbrec_igmp_group_index_init_row(igmp_groups); >> - >> - sbrec_igmp_group_index_set_address(target, addr_str); >> - sbrec_igmp_group_index_set_datapath(target, datapath); >> - sbrec_igmp_group_index_set_chassis(target, chassis); >> + return igmp_group_lookup_(igmp_groups, addr_str, datapath, chassis); >> +} >> >> - const struct sbrec_igmp_group *g = >> - sbrec_igmp_group_index_find(igmp_groups, target); >> - sbrec_igmp_group_index_destroy_row(target); >> - return g; >> +const struct sbrec_igmp_group * >> +igmp_mrouter_lookup(struct ovsdb_idl_index *igmp_groups, >> + const struct sbrec_datapath_binding *datapath, >> + const struct sbrec_chassis *chassis) >> +{ >> + return igmp_group_lookup_(igmp_groups, OVN_IGMP_GROUP_MROUTERS, >> + datapath, chassis); >> } >> >> /* Creates and returns a new IGMP group based on an IPv4 (mapped in IPv6) >> or >> @@ -82,13 +94,16 @@ igmp_group_create(struct ovsdb_idl_txn *idl_txn, >> return NULL; >> } >> >> - struct sbrec_igmp_group *g = sbrec_igmp_group_insert(idl_txn); >> - >> - sbrec_igmp_group_set_address(g, addr_str); >> - sbrec_igmp_group_set_datapath(g, datapath); >> - sbrec_igmp_group_set_chassis(g, chassis); >> + return igmp_group_create_(idl_txn, addr_str, datapath, chassis); >> +} >> >> - return g; >> +struct sbrec_igmp_group * >> +igmp_mrouter_create(struct ovsdb_idl_txn *idl_txn, >> + const struct sbrec_datapath_binding *datapath, >> + const struct sbrec_chassis *chassis) >> +{ >> + return igmp_group_create_(idl_txn, OVN_IGMP_GROUP_MROUTERS, datapath, >> + chassis); >> } >> >> void >> @@ -140,6 +155,54 @@ igmp_group_update_ports(const struct sbrec_igmp_group >> *g, >> hmap_destroy(&old_ports); >> } >> >> +void >> +igmp_mrouter_update_ports(const struct sbrec_igmp_group *g, >> + struct ovsdb_idl_index *datapaths, >> + struct ovsdb_idl_index *port_bindings, >> + const struct mcast_snooping *ms) >> + OVS_REQ_RDLOCK(ms->rwlock) >> +{ >> + struct igmp_group_port *old_ports_storage = >> + (g->n_ports ? xmalloc(g->n_ports * sizeof *old_ports_storage) : >> NULL); >> + >> + struct hmap old_ports = HMAP_INITIALIZER(&old_ports); >> + >> + for (size_t i = 0; i < g->n_ports; i++) { >> + struct igmp_group_port *old_port = &old_ports_storage[i]; >> + >> + old_port->port = g->ports[i]; >> + hmap_insert(&old_ports, &old_port->hmap_node, >> + old_port->port->tunnel_key); >> + } >> + >> + struct mcast_mrouter_bundle *bundle; >> + uint64_t dp_key = g->datapath->tunnel_key; >> + >> + LIST_FOR_EACH (bundle, mrouter_node, &ms->mrouter_lru) { >> + uint32_t port_key = (uintptr_t)bundle->port; >> + const struct sbrec_port_binding *sbrec_port = >> + lport_lookup_by_key(datapaths, port_bindings, dp_key, >> port_key); >> + if (!sbrec_port) { >> + continue; >> + } >> + >> + struct hmap_node *node = hmap_first_with_hash(&old_ports, >> port_key); >> + if (!node) { >> + sbrec_igmp_group_update_ports_addvalue(g, sbrec_port); >> + } else { >> + hmap_remove(&old_ports, node); >> + } >> + } >> + >> + struct igmp_group_port *igmp_port; >> + HMAP_FOR_EACH_POP (igmp_port, hmap_node, &old_ports) { >> + sbrec_igmp_group_update_ports_delvalue(g, igmp_port->port); >> + } >> + >> + free(old_ports_storage); >> + hmap_destroy(&old_ports); >> +} >> + >> void >> igmp_group_delete(const struct sbrec_igmp_group *g) >> { >> @@ -162,3 +225,37 @@ igmp_group_cleanup(struct ovsdb_idl_txn >> *ovnsb_idl_txn, >> >> return true; >> } >> + >> +static const struct sbrec_igmp_group * >> +igmp_group_lookup_(struct ovsdb_idl_index *igmp_groups, >> + const char *addr_str, >> + const struct sbrec_datapath_binding *datapath, >> + const struct sbrec_chassis *chassis) >> +{ >> + struct sbrec_igmp_group *target = >> + sbrec_igmp_group_index_init_row(igmp_groups); >> + >> + sbrec_igmp_group_index_set_address(target, addr_str); >> + sbrec_igmp_group_index_set_datapath(target, datapath); >> + sbrec_igmp_group_index_set_chassis(target, chassis); >> + >> + const struct sbrec_igmp_group *g = >> + sbrec_igmp_group_index_find(igmp_groups, target); >> + sbrec_igmp_group_index_destroy_row(target); >> + return g; >> +} >> + >> +static struct sbrec_igmp_group * >> +igmp_group_create_(struct ovsdb_idl_txn *idl_txn, >> + const char *addr_str, >> + const struct sbrec_datapath_binding *datapath, >> + const struct sbrec_chassis *chassis) >> +{ >> + struct sbrec_igmp_group *g = sbrec_igmp_group_insert(idl_txn); >> + >> + sbrec_igmp_group_set_address(g, addr_str); >> + sbrec_igmp_group_set_datapath(g, datapath); >> + sbrec_igmp_group_set_chassis(g, chassis); >> + >> + return g; >> +} >> diff --git a/controller/ip-mcast.h b/controller/ip-mcast.h >> index b3447d4c7..326f39db1 100644 >> --- a/controller/ip-mcast.h >> +++ b/controller/ip-mcast.h >> @@ -30,12 +30,20 @@ const struct sbrec_igmp_group *igmp_group_lookup( >> const struct in6_addr *address, >> const struct sbrec_datapath_binding *datapath, >> const struct sbrec_chassis *chassis); >> +const struct sbrec_igmp_group *igmp_mrouter_lookup( >> + struct ovsdb_idl_index *igmp_groups, >> + const struct sbrec_datapath_binding *datapath, >> + const struct sbrec_chassis *chassis); >> >> struct sbrec_igmp_group *igmp_group_create( >> struct ovsdb_idl_txn *idl_txn, >> const struct in6_addr *address, >> const struct sbrec_datapath_binding *datapath, >> const struct sbrec_chassis *chassis); >> +struct sbrec_igmp_group *igmp_mrouter_create( >> + struct ovsdb_idl_txn *idl_txn, >> + const struct sbrec_datapath_binding *datapath, >> + const struct sbrec_chassis *chassis); >> >> void igmp_group_update_ports(const struct sbrec_igmp_group *g, >> struct ovsdb_idl_index *datapaths, >> @@ -43,6 +51,12 @@ void igmp_group_update_ports(const struct >> sbrec_igmp_group *g, >> const struct mcast_snooping *ms, >> const struct mcast_group *mc_group) >> OVS_REQ_RDLOCK(ms->rwlock); >> +void >> +igmp_mrouter_update_ports(const struct sbrec_igmp_group *g, >> + struct ovsdb_idl_index *datapaths, >> + struct ovsdb_idl_index *port_bindings, >> + const struct mcast_snooping *ms) >> + OVS_REQ_RDLOCK(ms->rwlock); >> >> void igmp_group_delete(const struct sbrec_igmp_group *g); >> >> diff --git a/controller/pinctrl.c b/controller/pinctrl.c >> index 38e8590af..2b22256ff 100644 >> --- a/controller/pinctrl.c >> +++ b/controller/pinctrl.c >> @@ -624,6 +624,39 @@ set_actions_and_enqueue_msg(struct rconn *swconn, >> ofpbuf_uninit(&ofpacts); >> } >> >> +/* Forwards a packet to 'out_port_key' even if that's on a remote >> + * hypervisor, i.e., the packet is re-injected in table >> OFTABLE_REMOTE_OUTPUT. >> + */ >> +static void >> +pinctrl_forward_pkt(struct rconn *swconn, int64_t dp_key, >> + int64_t in_port_key, int64_t out_port_key, >> + const struct dp_packet *pkt) >> +{ >> + /* Reinject the packet and flood it to all registered mrouters. */ >> + uint64_t ofpacts_stub[4096 / 8]; >> + struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub); >> + enum ofp_version version = rconn_get_version(swconn); >> + put_load(dp_key, MFF_LOG_DATAPATH, 0, 64, &ofpacts); >> + put_load(in_port_key, MFF_LOG_INPORT, 0, 32, &ofpacts); >> + put_load(out_port_key, MFF_LOG_OUTPORT, 0, 32, &ofpacts); >> + >> + struct ofpact_resubmit *resubmit = ofpact_put_RESUBMIT(&ofpacts); >> + resubmit->in_port = OFPP_CONTROLLER; >> + resubmit->table_id = OFTABLE_REMOTE_OUTPUT; >> + >> + struct ofputil_packet_out po = { >> + .packet = dp_packet_data(pkt), >> + .packet_len = dp_packet_size(pkt), >> + .buffer_id = UINT32_MAX, >> + .ofpacts = ofpacts.data, >> + .ofpacts_len = ofpacts.size, >> + }; >> + match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER); >> + enum ofputil_protocol proto = >> ofputil_protocol_from_ofp_version(version); >> + queue_msg(swconn, ofputil_encode_packet_out(&po, proto)); >> + ofpbuf_uninit(&ofpacts); >> +} >> + >> static struct shash ipv6_prefixd; >> >> enum { >> @@ -5148,6 +5181,7 @@ ip_mcast_sync(struct ovsdb_idl_txn *ovnsb_idl_txn, >> } >> } >> >> + const struct sbrec_igmp_group *sbrec_ip_mrouter; >> const struct sbrec_igmp_group *sbrec_igmp; >> >> /* Then flush any IGMP_Group entries that are not needed anymore: >> @@ -5183,7 +5217,9 @@ ip_mcast_sync(struct ovsdb_idl_txn *ovnsb_idl_txn, >> continue; >> } >> >> - if (ip_parse(sbrec_igmp->address, &group_v4_addr)) { >> + if (!strcmp(sbrec_igmp->address, OVN_IGMP_GROUP_MROUTERS)) { >> + continue; >> + } else if (ip_parse(sbrec_igmp->address, &group_v4_addr)) { >> group_addr = in6_addr_mapped_ipv4(group_v4_addr); >> } else if (!ipv6_parse(sbrec_igmp->address, &group_addr)) { >> continue; >> @@ -5222,6 +5258,8 @@ ip_mcast_sync(struct ovsdb_idl_txn *ovnsb_idl_txn, >> struct mcast_group *mc_group; >> >> ovs_rwlock_rdlock(&ip_ms->ms->rwlock); >> + >> + /* Groups. */ >> LIST_FOR_EACH (mc_group, group_node, &ip_ms->ms->group_lru) { >> if (ovs_list_is_empty(&mc_group->bundle_lru)) { >> continue; >> @@ -5237,6 +5275,20 @@ ip_mcast_sync(struct ovsdb_idl_txn *ovnsb_idl_txn, >> sbrec_port_binding_by_key, ip_ms->ms, >> mc_group); >> } >> + >> + /* Mrouters. */ >> + sbrec_ip_mrouter = igmp_mrouter_lookup(sbrec_igmp_groups, >> + local_dp->datapath, >> + chassis); >> + if (!sbrec_ip_mrouter) { >> + sbrec_ip_mrouter = igmp_mrouter_create(ovnsb_idl_txn, >> + local_dp->datapath, >> + chassis); >> + } >> + igmp_mrouter_update_ports(sbrec_ip_mrouter, >> + sbrec_datapath_binding_by_key, >> + sbrec_port_binding_by_key, ip_ms->ms); >> + >> ovs_rwlock_unlock(&ip_ms->ms->rwlock); >> } >> >> @@ -5245,12 +5297,35 @@ ip_mcast_sync(struct ovsdb_idl_txn *ovnsb_idl_txn, >> } >> } >> >> +/* Reinject the packet and flood it to all registered mrouters (also those >> + * who are not local to this chassis). */ >> +static void >> +ip_mcast_forward_report(struct rconn *swconn, struct ip_mcast_snoop >> *ip_ms, >> + uint32_t orig_in_port_key, >> + const struct dp_packet *report) >> +{ >> + pinctrl_forward_pkt(swconn, ip_ms->dp_key, orig_in_port_key, >> + OVN_MCAST_MROUTER_FLOOD_TUNNEL_KEY, report); >> +} >> + >> + >> +static void >> +ip_mcast_forward_query(struct rconn *swconn, struct ip_mcast_snoop *ip_ms, >> + uint32_t orig_in_port_key, >> + const struct dp_packet *query) >> +{ >> + pinctrl_forward_pkt(swconn, ip_ms->dp_key, orig_in_port_key, >> + OVN_MCAST_FLOOD_L2_TUNNEL_KEY, query); >> +} >> + >> static bool >> -pinctrl_ip_mcast_handle_igmp(struct ip_mcast_snoop *ip_ms, >> +pinctrl_ip_mcast_handle_igmp(struct rconn *swconn, >> + struct ip_mcast_snoop *ip_ms, >> const struct flow *ip_flow, >> struct dp_packet *pkt_in, >> - void *port_key_data) >> + uint32_t in_port_key) >> { >> + void *port_key_data = (void *)(uintptr_t)in_port_key; >> const struct igmp_header *igmp; >> size_t offset; >> >> @@ -5264,6 +5339,8 @@ pinctrl_ip_mcast_handle_igmp(struct ip_mcast_snoop >> *ip_ms, >> >> ovs_be32 ip4 = ip_flow->igmp_group_ip4; >> bool group_change = false; >> + bool report = false; >> + bool query = false; >> > > IMO it would be better to use enum as those two cannot be set at the same > time IIUC. > The enum could also be reused in pinctrl_ip_mcast_handle_mld. > > I would prefer not addding a new enum, that would have to go to OVS code, as a new API in mcast-snooping.h (similar to mcast_snooping_is_query() and mcast_snooping_is_membership()), I think. What do you think of the following incremental instead? If it looks good to you I can fold it in and send a v3. Thanks, Dumitru diff --git a/controller/pinctrl.c b/controller/pinctrl.c index 2b22256ff..35d50b9c8 100644 --- a/controller/pinctrl.c +++ b/controller/pinctrl.c @@ -5339,8 +5339,6 @@ pinctrl_ip_mcast_handle_igmp(struct rconn *swconn, ovs_be32 ip4 = ip_flow->igmp_group_ip4; bool group_change = false; - bool report = false; - bool query = false; /* Only default VLAN is supported for now. */ ovs_rwlock_wrlock(&ip_ms->ms->rwlock); @@ -5350,25 +5348,21 @@ pinctrl_ip_mcast_handle_igmp(struct rconn *swconn, group_change = mcast_snooping_add_group4(ip_ms->ms, ip4, IP_MCAST_VLAN, port_key_data); - report = true; break; case IGMP_HOST_LEAVE_MESSAGE: group_change = mcast_snooping_leave_group4(ip_ms->ms, ip4, IP_MCAST_VLAN, port_key_data); - report = true; break; case IGMP_HOST_MEMBERSHIP_QUERY: group_change = mcast_snooping_add_mrouter(ip_ms->ms, IP_MCAST_VLAN, port_key_data); - query = true; break; case IGMPV3_HOST_MEMBERSHIP_REPORT: group_change = mcast_snooping_add_report(ip_ms->ms, pkt_in, IP_MCAST_VLAN, port_key_data); - report = true; break; } ovs_rwlock_unlock(&ip_ms->ms->rwlock); @@ -5376,9 +5370,9 @@ pinctrl_ip_mcast_handle_igmp(struct rconn *swconn, /* Forward reports to all registered mrouters and flood queries to * the whole L2 domain. */ - if (report) { + if (mcast_snooping_is_membership(ip_flow->tp_src)) { ip_mcast_forward_report(swconn, ip_ms, in_port_key, pkt_in); - } else if (query) { + } else if (mcast_snooping_is_query(ip_flow->tp_src)) { ip_mcast_forward_query(swconn, ip_ms, in_port_key, pkt_in); } return group_change; @@ -5408,8 +5402,6 @@ pinctrl_ip_mcast_handle_mld(struct rconn *swconn, } bool group_change = false; - bool report = false; - bool query = false; /* Only default VLAN is supported for now. */ ovs_rwlock_wrlock(&ip_ms->ms->rwlock); @@ -5423,7 +5415,6 @@ pinctrl_ip_mcast_handle_mld(struct rconn *swconn, mcast_snooping_add_mrouter(ip_ms->ms, IP_MCAST_VLAN, port_key_data); } - query = true; break; case MLD_REPORT: case MLD_DONE: @@ -5431,7 +5422,6 @@ pinctrl_ip_mcast_handle_mld(struct rconn *swconn, group_change = mcast_snooping_add_mld(ip_ms->ms, pkt_in, IP_MCAST_VLAN, port_key_data); - report = true; break; } ovs_rwlock_unlock(&ip_ms->ms->rwlock); @@ -5439,9 +5429,9 @@ pinctrl_ip_mcast_handle_mld(struct rconn *swconn, /* Forward reports to all registered mrouters and flood queries to * the whole L2 domain. */ - if (report) { + if (is_mld_report(ip_flow, NULL)) { ip_mcast_forward_report(swconn, ip_ms, in_port_key, pkt_in); - } else if (query) { + } else if (is_mld_query(ip_flow, NULL)) { ip_mcast_forward_query(swconn, ip_ms, in_port_key, pkt_in); } return group_change; _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev