On 01/02/2023 19:28, Petr Machata wrote:
> The MDB maintained by the bridge is limited. When the bridge is configured
> for IGMP / MLD snooping, a buggy or malicious client can easily exhaust its
> capacity. In SW datapath, the capacity is configurable through the
> IFLA_BR_MCAST_HASH_MAX parameter, but ultimately is finite. Obviously a
> similar limit exists in the HW datapath for purposes of offloading.
> 
> In order to prevent the issue of unilateral exhaustion of MDB resources,
> introduce two parameters in each of two contexts:
> 
> - Per-port and per-port-VLAN number of MDB entries that the port
>   is member in.
> 
> - Per-port and (when BROPT_MCAST_VLAN_SNOOPING_ENABLED is enabled)
>   per-port-VLAN maximum permitted number of MDB entries, or 0 for
>   no limit.
> 
> The per-port multicast context is used for tracking of MDB entries for the
> port as a whole. This is available for all bridges.
> 
> The per-port-VLAN multicast context is then only available on
> VLAN-filtering bridges on VLANs that have multicast snooping on.
> 
> With these changes in place, it will be possible to configure MDB limit for
> bridge as a whole, or any one port as a whole, or any single port-VLAN.
> 
> Note that unlike the global limit, exhaustion of the per-port and
> per-port-VLAN maximums does not cause disablement of multicast snooping.
> It is also permitted to configure the local limit larger than hash_max,
> even though that is not useful.
> 
> In this patch, introduce only the accounting for number of entries, and the
> max field itself, but not the means to toggle the max. The next patch
> introduces the netlink APIs to toggle and read the values.
> 
> Signed-off-by: Petr Machata <[email protected]>
> ---
> 
> Notes:
>     v2:
>     - In br_multicast_port_ngroups_inc_one(), bounce
>       if n>=max, not if n==max
>     - Adjust extack messages to mention ngroups, now that
>       the bounces appear when n>=max, not n==max
>     - In __br_multicast_enable_port_ctx(), do not reset
>       max to 0. Also do not count number of entries by
>       going through _inc, as that would end up incorrectly
>       bouncing the entries.
> 
>  net/bridge/br_multicast.c | 132 +++++++++++++++++++++++++++++++++++++-
>  net/bridge/br_private.h   |   2 +
>  2 files changed, 133 insertions(+), 1 deletion(-)
> 
> diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
> index 51b622afdb67..e7ae339a8757 100644
> --- a/net/bridge/br_multicast.c
> +++ b/net/bridge/br_multicast.c
> @@ -31,6 +31,7 @@
>  #include <net/ip6_checksum.h>
>  #include <net/addrconf.h>
>  #endif
> +#include <trace/events/bridge.h>
>  
>  #include "br_private.h"
>  #include "br_private_mcast_eht.h"
> @@ -234,6 +235,29 @@ br_multicast_pg_to_port_ctx(const struct 
> net_bridge_port_group *pg)
>       return pmctx;
>  }
>  
> +static struct net_bridge_mcast_port *
> +br_multicast_port_vid_to_port_ctx(struct net_bridge_port *port, u16 vid)
> +{
> +     struct net_bridge_mcast_port *pmctx = NULL;
> +     struct net_bridge_vlan *vlan;
> +
> +     lockdep_assert_held_once(&port->br->multicast_lock);
> +
> +     if (!br_opt_get(port->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED))
> +             return NULL;
> +
> +     /* Take RCU to access the vlan. */
> +     rcu_read_lock();
> +
> +     vlan = br_vlan_find(nbp_vlan_group_rcu(port), vid);
> +     if (vlan && !br_multicast_port_ctx_vlan_disabled(&vlan->port_mcast_ctx))
> +             pmctx = &vlan->port_mcast_ctx;
> +
> +     rcu_read_unlock();
> +
> +     return pmctx;
> +}
> +
>  /* when snooping we need to check if the contexts should be used
>   * in the following order:
>   * - if pmctx is non-NULL (port), check if it should be used
> @@ -668,6 +692,82 @@ void br_multicast_del_group_src(struct 
> net_bridge_group_src *src,
>       __br_multicast_del_group_src(src);
>  }
>  
> +static int
> +br_multicast_port_ngroups_inc_one(struct net_bridge_mcast_port *pmctx,
> +                               struct netlink_ext_ack *extack)
> +{
> +     if (pmctx->mdb_max_entries &&
> +         pmctx->mdb_n_entries >= pmctx->mdb_max_entries)

These should be using *_ONCE() because of the next patch.
KCSAN might be sad otherwise. :)

> +             return -E2BIG;
> +
> +     pmctx->mdb_n_entries++;

WRITE_ONCE()

> +     return 0;
> +}
> +
> +static void br_multicast_port_ngroups_dec_one(struct net_bridge_mcast_port 
> *pmctx)
> +{
> +     WARN_ON_ONCE(pmctx->mdb_n_entries-- == 0);

READ_ONCE()

> +}
> +
> +static int br_multicast_port_ngroups_inc(struct net_bridge_port *port,
> +                                      const struct br_ip *group,
> +                                      struct netlink_ext_ack *extack)
> +{
> +     struct net_bridge_mcast_port *pmctx;
> +     int err;
> +
> +     lockdep_assert_held_once(&port->br->multicast_lock);
> +
> +     /* Always count on the port context. */
> +     err = br_multicast_port_ngroups_inc_one(&port->multicast_ctx, extack);
> +     if (err) {
> +             NL_SET_ERR_MSG_FMT_MOD(extack, "Port is already in %u groups, 
> and mcast_max_groups=%u",
> +                                    port->multicast_ctx.mdb_n_entries,
> +                                    port->multicast_ctx.mdb_max_entries);

READ_ONCE()

> +             trace_br_mdb_full(port->dev, group);
> +             return err;
> +     }
> +
> +     /* Only count on the VLAN context if VID is given, and if snooping on
> +      * that VLAN is enabled.
> +      */
> +     if (!group->vid)
> +             return 0;
> +
> +     pmctx = br_multicast_port_vid_to_port_ctx(port, group->vid);
> +     if (!pmctx)
> +             return 0;
> +
> +     err = br_multicast_port_ngroups_inc_one(pmctx, extack);
> +     if (err) {
> +             NL_SET_ERR_MSG_FMT_MOD(extack, "Port-VLAN is already in %u 
> groups, and mcast_max_groups=%u",
> +                                    pmctx->mdb_n_entries,
> +                                    pmctx->mdb_max_entries);

READ_ONCE()

> +             trace_br_mdb_full(port->dev, group);
> +             goto dec_one_out;
> +     }
> +
> +     return 0;
> +
> +dec_one_out:
> +     br_multicast_port_ngroups_dec_one(&port->multicast_ctx);
> +     return err;
> +}
> +
> +static void br_multicast_port_ngroups_dec(struct net_bridge_port *port, u16 
> vid)
> +{
> +     struct net_bridge_mcast_port *pmctx;
> +
> +     lockdep_assert_held_once(&port->br->multicast_lock);
> +
> +     if (vid) {
> +             pmctx = br_multicast_port_vid_to_port_ctx(port, vid);
> +             if (pmctx)
> +                     br_multicast_port_ngroups_dec_one(pmctx);
> +     }
> +     br_multicast_port_ngroups_dec_one(&port->multicast_ctx);
> +}
> +
>  static void br_multicast_destroy_port_group(struct net_bridge_mcast_gc *gc)
>  {
>       struct net_bridge_port_group *pg;
> @@ -702,6 +802,7 @@ void br_multicast_del_pg(struct net_bridge_mdb_entry *mp,
>       } else {
>               br_multicast_star_g_handle_mode(pg, MCAST_INCLUDE);
>       }
> +     br_multicast_port_ngroups_dec(pg->key.port, pg->key.addr.vid);
>       hlist_add_head(&pg->mcast_gc.gc_node, &br->mcast_gc_list);
>       queue_work(system_long_wq, &br->mcast_gc_work);
>  
> @@ -1165,6 +1266,7 @@ struct net_bridge_mdb_entry 
> *br_multicast_new_group(struct net_bridge *br,
>               return mp;
>  
>       if (atomic_read(&br->mdb_hash_tbl.nelems) >= br->hash_max) {
> +             trace_br_mdb_full(br->dev, group);
>               br_mc_disabled_update(br->dev, false, NULL);
>               br_opt_toggle(br, BROPT_MULTICAST_ENABLED, false);
>               return ERR_PTR(-E2BIG);
> @@ -1288,11 +1390,16 @@ struct net_bridge_port_group 
> *br_multicast_new_port_group(
>                       struct netlink_ext_ack *extack)
>  {
>       struct net_bridge_port_group *p;
> +     int err;
> +
> +     err = br_multicast_port_ngroups_inc(port, group, extack);
> +     if (err)
> +             return NULL;
>  
>       p = kzalloc(sizeof(*p), GFP_ATOMIC);
>       if (unlikely(!p)) {
>               NL_SET_ERR_MSG_MOD(extack, "Couldn't allocate new port group");
> -             return NULL;
> +             goto dec_out;
>       }
>  
>       p->key.addr = *group;
> @@ -1326,18 +1433,22 @@ struct net_bridge_port_group 
> *br_multicast_new_port_group(
>  
>  free_out:
>       kfree(p);
> +dec_out:
> +     br_multicast_port_ngroups_dec(port, group->vid);
>       return NULL;
>  }
>  
>  void br_multicast_del_port_group(struct net_bridge_port_group *p)
>  {
>       struct net_bridge_port *port = p->key.port;
> +     __u16 vid = p->key.addr.vid;
>  
>       hlist_del_init(&p->mglist);
>       if (!br_multicast_is_star_g(&p->key.addr))
>               rhashtable_remove_fast(&port->br->sg_port_tbl, &p->rhnode,
>                                      br_sg_port_rht_params);
>       kfree(p);
> +     br_multicast_port_ngroups_dec(port, vid);
>  }
>  
>  void br_multicast_host_join(const struct net_bridge_mcast *brmctx,
> @@ -1951,6 +2062,25 @@ static void __br_multicast_enable_port_ctx(struct 
> net_bridge_mcast_port *pmctx)
>               br_ip4_multicast_add_router(brmctx, pmctx);
>               br_ip6_multicast_add_router(brmctx, pmctx);
>       }
> +
> +     if (br_multicast_port_ctx_is_vlan(pmctx)) {
> +             struct net_bridge_port_group *pg;
> +             u32 n = 0;
> +
> +             /* The mcast_n_groups counter might be wrong. First,
> +              * BR_VLFLAG_MCAST_ENABLED is toggled before temporary entries
> +              * are flushed, thus mcast_n_groups after the toggle does not
> +              * reflect the true values. And second, permanent entries added
> +              * while BR_VLFLAG_MCAST_ENABLED was disabled, are not reflected
> +              * either. Thus we have to refresh the counter.
> +              */
> +
> +             hlist_for_each_entry(pg, &pmctx->port->mglist, mglist) {
> +                     if (pg->key.addr.vid == pmctx->vlan->vid)
> +                             n++;
> +             }
> +             pmctx->mdb_n_entries = n;

WRITE_ONCE()

> +     }
>  }
>  
>  void br_multicast_enable_port(struct net_bridge_port *port)
> diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
> index e4069e27b5c6..49f411a0a1f1 100644
> --- a/net/bridge/br_private.h
> +++ b/net/bridge/br_private.h
> @@ -126,6 +126,8 @@ struct net_bridge_mcast_port {
>       struct hlist_node               ip6_rlist;
>  #endif /* IS_ENABLED(CONFIG_IPV6) */
>       unsigned char                   multicast_router;
> +     u32                             mdb_n_entries;
> +     u32                             mdb_max_entries;
>  #endif /* CONFIG_BRIDGE_IGMP_SNOOPING */
>  };
>  

Reply via email to