Re: [PATCH][next] net: bridge: Fix missing return assignment from br_vlan_replay_one call

2021-03-24 Thread Nikolay Aleksandrov
On 24/03/2021 17:09, Colin King wrote:
> From: Colin Ian King 
> 
> The call to br_vlan_replay_one is returning an error return value but
> this is not being assigned to err and the following check on err is
> currently always false because err was initialized to zero. Fix this
> by assigning err.
> 
> Addresses-Coverity: ("'Constant' variable guards dead code")
> Fixes: 22f67cdfae6a ("net: bridge: add helper to replay VLANs installed on 
> port")
> Signed-off-by: Colin Ian King 
> ---
>  net/bridge/br_vlan.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
> index ca8daccff217..7422691230b1 100644
> --- a/net/bridge/br_vlan.c
> +++ b/net/bridge/br_vlan.c
> @@ -1815,7 +1815,7 @@ int br_vlan_replay(struct net_device *br_dev, struct 
> net_device *dev,
>   if (!br_vlan_should_use(v))
>   continue;
>  
> - br_vlan_replay_one(nb, dev, , extack);
> + err = br_vlan_replay_one(nb, dev, , extack);
>   if (err)
>   return err;
>   }
> 

Thanks,
Acked-by: Nikolay Aleksandrov 



Re: [PATCH v4 net-next 05/11] net: bridge: add helper to replay VLANs installed on port

2021-03-23 Thread Nikolay Aleksandrov
On 23/03/2021 01:51, Vladimir Oltean wrote:
> From: Vladimir Oltean 
> 
> Currently this simple setup with DSA:
> 
> ip link add br0 type bridge vlan_filtering 1
> ip link add bond0 type bond
> ip link set bond0 master br0
> ip link set swp0 master bond0
> 
> will not work because the bridge has created the PVID in br_add_if ->
> nbp_vlan_init, and it has notified switchdev of the existence of VLAN 1,
> but that was too early, since swp0 was not yet a lower of bond0, so it
> had no reason to act upon that notification.
> 
> We need a helper in the bridge to replay the switchdev VLAN objects that
> were notified since the bridge port creation, because some of them may
> have been missed.
> 
> As opposed to the br_mdb_replay function, the vg->vlan_list write side
> protection is offered by the rtnl_mutex which is sleepable, so we don't
> need to queue up the objects in atomic context, we can replay them right
> away.
> 
> Signed-off-by: Vladimir Oltean 
> ---
>  include/linux/if_bridge.h | 10 ++
>  net/bridge/br_vlan.c  | 73 +++
>  2 files changed, 83 insertions(+)
> 

Same comments about the const qualifiers as the other patches.
The code looks good to me otherwise.

Acked-by: Nikolay Aleksandrov 

> diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
> index b564c4486a45..2cc35038a8ca 100644
> --- a/include/linux/if_bridge.h
> +++ b/include/linux/if_bridge.h
> @@ -111,6 +111,8 @@ int br_vlan_get_pvid_rcu(const struct net_device *dev, 
> u16 *p_pvid);
>  int br_vlan_get_proto(const struct net_device *dev, u16 *p_proto);
>  int br_vlan_get_info(const struct net_device *dev, u16 vid,
>struct bridge_vlan_info *p_vinfo);
> +int br_vlan_replay(struct net_device *br_dev, struct net_device *dev,
> +struct notifier_block *nb, struct netlink_ext_ack *extack);
>  #else
>  static inline bool br_vlan_enabled(const struct net_device *dev)
>  {
> @@ -137,6 +139,14 @@ static inline int br_vlan_get_info(const struct 
> net_device *dev, u16 vid,
>  {
>   return -EINVAL;
>  }
> +
> +static inline int br_vlan_replay(struct net_device *br_dev,
> +  struct net_device *dev,
> +  struct notifier_block *nb,
> +  struct netlink_ext_ack *extack)
> +{
> + return -EOPNOTSUPP;
> +}
>  #endif
>  
>  #if IS_ENABLED(CONFIG_BRIDGE)
> diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
> index 8829f621b8ec..ca8daccff217 100644
> --- a/net/bridge/br_vlan.c
> +++ b/net/bridge/br_vlan.c
> @@ -1751,6 +1751,79 @@ void br_vlan_notify(const struct net_bridge *br,
>   kfree_skb(skb);
>  }
>  
> +static int br_vlan_replay_one(struct notifier_block *nb,
> +   struct net_device *dev,
> +   struct switchdev_obj_port_vlan *vlan,
> +   struct netlink_ext_ack *extack)
> +{
> + struct switchdev_notifier_port_obj_info obj_info = {
> + .info = {
> + .dev = dev,
> + .extack = extack,
> + },
> + .obj = >obj,
> + };
> + int err;
> +
> + err = nb->notifier_call(nb, SWITCHDEV_PORT_OBJ_ADD, _info);
> + return notifier_to_errno(err);
> +}
> +
> +int br_vlan_replay(struct net_device *br_dev, struct net_device *dev,
> +struct notifier_block *nb, struct netlink_ext_ack *extack)
> +{
> + struct net_bridge_vlan_group *vg;
> + struct net_bridge_vlan *v;
> + struct net_bridge_port *p;
> + struct net_bridge *br;
> + int err = 0;
> + u16 pvid;
> +
> + ASSERT_RTNL();
> +
> + if (!netif_is_bridge_master(br_dev))
> + return -EINVAL;
> +
> + if (!netif_is_bridge_master(dev) && !netif_is_bridge_port(dev))
> + return -EINVAL;
> +
> + if (netif_is_bridge_master(dev)) {
> + br = netdev_priv(dev);
> + vg = br_vlan_group(br);
> + p = NULL;
> + } else {
> + p = br_port_get_rtnl(dev);
> + if (WARN_ON(!p))
> + return -EINVAL;
> + vg = nbp_vlan_group(p);
> + br = p->br;
> + }
> +
> + if (!vg)
> + return 0;
> +
> + pvid = br_get_pvid(vg);
> +
> + list_for_each_entry(v, >vlan_list, vlist) {
> + struct switchdev_obj_port_vlan vlan = {
> + .obj.orig_dev = dev,
> + .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
> + .flags = br_vlan_flags(v, pvid),
> +

Re: [PATCH v4 net-next 03/11] net: bridge: add helper to replay port and host-joined mdb entries

2021-03-23 Thread Nikolay Aleksandrov
On 23/03/2021 01:51, Vladimir Oltean wrote:
> From: Vladimir Oltean 
> 
> I have a system with DSA ports, and udhcpcd is configured to bring
> interfaces up as soon as they are created.
> 
> I create a bridge as follows:
> 
> ip link add br0 type bridge
> 
> As soon as I create the bridge and udhcpcd brings it up, I also have
> avahi which automatically starts sending IPv6 packets to advertise some
> local services, and because of that, the br0 bridge joins the following
> IPv6 groups due to the code path detailed below:
> 
> 33:33:ff:6d:c1:9c vid 0
> 33:33:00:00:00:6a vid 0
> 33:33:00:00:00:fb vid 0
> 
> br_dev_xmit
> -> br_multicast_rcv
>-> br_ip6_multicast_add_group
>   -> __br_multicast_add_group
>  -> br_multicast_host_join
> -> br_mdb_notify
> 
> This is all fine, but inside br_mdb_notify we have br_mdb_switchdev_host
> hooked up, and switchdev will attempt to offload the host joined groups
> to an empty list of ports. Of course nobody offloads them.
> 
> Then when we add a port to br0:
> 
> ip link set swp0 master br0
> 
> the bridge doesn't replay the host-joined MDB entries from br_add_if,
> and eventually the host joined addresses expire, and a switchdev
> notification for deleting it is emitted, but surprise, the original
> addition was already completely missed.
> 
> The strategy to address this problem is to replay the MDB entries (both
> the port ones and the host joined ones) when the new port joins the
> bridge, similar to what vxlan_fdb_replay does (in that case, its FDB can
> be populated and only then attached to a bridge that you offload).
> However there are 2 possibilities: the addresses can be 'pushed' by the
> bridge into the port, or the port can 'pull' them from the bridge.
> 
> Considering that in the general case, the new port can be really late to
> the party, and there may have been many other switchdev ports that
> already received the initial notification, we would like to avoid
> delivering duplicate events to them, since they might misbehave. And
> currently, the bridge calls the entire switchdev notifier chain, whereas
> for replaying it should just call the notifier block of the new guy.
> But the bridge doesn't know what is the new guy's notifier block, it
> just knows where the switchdev notifier chain is. So for simplification,
> we make this a driver-initiated pull for now, and the notifier block is
> passed as an argument.
> 
> To emulate the calling context for mdb objects (deferred and put on the
> blocking notifier chain), we must iterate under RCU protection through
> the bridge's mdb entries, queue them, and only call them once we're out
> of the RCU read-side critical section.
> 
> There was some opportunity for reuse between br_mdb_switchdev_host_port,
> br_mdb_notify and the newly added br_mdb_queue_one in how the switchdev
> mdb object is created, so a helper was created.
> 
> Suggested-by: Ido Schimmel 
> Signed-off-by: Vladimir Oltean 
> ---
>  include/linux/if_bridge.h |   9 +++
>  include/net/switchdev.h   |   1 +
>  net/bridge/br_mdb.c   | 148 +-
>  3 files changed, 141 insertions(+), 17 deletions(-)
> 

Absolutely the same comments here as for the fdb version.
The code looks correct.

Acked-by: Nikolay Aleksandrov 

> diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
> index ebd16495459c..f6472969bb44 100644
> --- a/include/linux/if_bridge.h
> +++ b/include/linux/if_bridge.h
> @@ -69,6 +69,8 @@ bool br_multicast_has_querier_anywhere(struct net_device 
> *dev, int proto);
>  bool br_multicast_has_querier_adjacent(struct net_device *dev, int proto);
>  bool br_multicast_enabled(const struct net_device *dev);
>  bool br_multicast_router(const struct net_device *dev);
> +int br_mdb_replay(struct net_device *br_dev, struct net_device *dev,
> +   struct notifier_block *nb, struct netlink_ext_ack *extack);
>  #else
>  static inline int br_multicast_list_adjacent(struct net_device *dev,
>struct list_head *br_ip_list)
> @@ -93,6 +95,13 @@ static inline bool br_multicast_router(const struct 
> net_device *dev)
>  {
>   return false;
>  }
> +static inline int br_mdb_replay(struct net_device *br_dev,
> + struct net_device *dev,
> + struct notifier_block *nb,
> + struct netlink_ext_ack *extack)
> +{
> + return -EOPNOTSUPP;
> +}
>  #endif
>  
>  #if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_BRIDGE_VLAN_FILTERING)
> diff --git a/include/net/switchdev.h b/include/net/switchdev.h
> index b7fc7d0f54e

Re: [PATCH v4 net-next 04/11] net: bridge: add helper to replay port and local fdb entries

2021-03-23 Thread Nikolay Aleksandrov
On 23/03/2021 01:51, Vladimir Oltean wrote:
> From: Vladimir Oltean 
> 
> When a switchdev port starts offloading a LAG that is already in a
> bridge and has an FDB entry pointing to it:
> 
> ip link set bond0 master br0
> bridge fdb add dev bond0 00:01:02:03:04:05 master static
> ip link set swp0 master bond0
> 
> the switchdev driver will have no idea that this FDB entry is there,
> because it missed the switchdev event emitted at its creation.
> 
> Ido Schimmel pointed this out during a discussion about challenges with
> switchdev offloading of stacked interfaces between the physical port and
> the bridge, and recommended to just catch that condition and deny the
> CHANGEUPPER event:
> https://lore.kernel.org/netdev/20210210105949.gb287...@shredder.lan/
> 
> But in fact, we might need to deal with the hard thing anyway, which is
> to replay all FDB addresses relevant to this port, because it isn't just
> static FDB entries, but also local addresses (ones that are not
> forwarded but terminated by the bridge). There, we can't just say 'oh
> yeah, there was an upper already so I'm not joining that'.
> 
> So, similar to the logic for replaying MDB entries, add a function that
> must be called by individual switchdev drivers and replays local FDB
> entries as well as ones pointing towards a bridge port. This time, we
> use the atomic switchdev notifier block, since that's what FDB entries
> expect for some reason.
> 

I get the reason to have both bridge and bridge port devices (although the 
bridge
is really unnecessary as it can be inferred from the port), but it looks kind of
weird at first glance, I mean we get all of the port's fdbs and all of the 
bridge
fdbs every time (dst == NULL). The code itself is correct and the alternative
to take only 1 net_device and act based on its type would add another
step to the process per-port which also doesn't sound good...
There are a few minor const nits below too, again if there is another version
please take care of them, for the patch:

Acked-by: Nikolay Aleksandrov 

> Reported-by: Ido Schimmel 
> Signed-off-by: Vladimir Oltean 
> ---
>  include/linux/if_bridge.h |  9 +++
>  net/bridge/br_fdb.c   | 50 +++
>  2 files changed, 59 insertions(+)
> 
> diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
> index f6472969bb44..b564c4486a45 100644
> --- a/include/linux/if_bridge.h
> +++ b/include/linux/if_bridge.h
> @@ -147,6 +147,8 @@ void br_fdb_clear_offload(const struct net_device *dev, 
> u16 vid);
>  bool br_port_flag_is_set(const struct net_device *dev, unsigned long flag);
>  u8 br_port_get_stp_state(const struct net_device *dev);
>  clock_t br_get_ageing_time(struct net_device *br_dev);
> +int br_fdb_replay(struct net_device *br_dev, struct net_device *dev,
> +   struct notifier_block *nb);
>  #else
>  static inline struct net_device *
>  br_fdb_find_port(const struct net_device *br_dev,
> @@ -175,6 +177,13 @@ static inline clock_t br_get_ageing_time(struct 
> net_device *br_dev)
>  {
>   return 0;
>  }
> +
> +static inline int br_fdb_replay(struct net_device *br_dev,
> + struct net_device *dev,
> + struct notifier_block *nb)
> +{
> + return -EOPNOTSUPP;
> +}
>  #endif
>  
>  #endif
> diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
> index b7490237f3fc..698b79747d32 100644
> --- a/net/bridge/br_fdb.c
> +++ b/net/bridge/br_fdb.c
> @@ -726,6 +726,56 @@ static inline size_t fdb_nlmsg_size(void)
>   + nla_total_size(sizeof(u8)); /* NFEA_ACTIVITY_NOTIFY */
>  }
>  
> +static int br_fdb_replay_one(struct notifier_block *nb,
> +  struct net_bridge_fdb_entry *fdb,
> +  struct net_device *dev)
> +{
> + struct switchdev_notifier_fdb_info item;
> + int err;
> +
> + item.addr = fdb->key.addr.addr;
> + item.vid = fdb->key.vlan_id;
> + item.added_by_user = test_bit(BR_FDB_ADDED_BY_USER, >flags);
> + item.offloaded = test_bit(BR_FDB_OFFLOADED, >flags);
> + item.info.dev = dev;
> +
> + err = nb->notifier_call(nb, SWITCHDEV_FDB_ADD_TO_DEVICE, );
> + return notifier_to_errno(err);
> +}
> +
> +int br_fdb_replay(struct net_device *br_dev, struct net_device *dev,
> +   struct notifier_block *nb)

The devices can be const

> +{
> + struct net_bridge_fdb_entry *fdb;
> + struct net_bridge *br;
> + int err = 0;
> +
> + if (!netif_is_bridge_master(br_dev) || !netif_is_bridge_port(dev))
> + return -EINVAL;
> +
> + br = netdev_priv(br_dev);
> +
> + rcu_read_lock();
&g

Re: [PATCH v4 net-next 02/11] net: bridge: add helper to retrieve the current ageing time

2021-03-23 Thread Nikolay Aleksandrov
On 23/03/2021 01:51, Vladimir Oltean wrote:
> From: Vladimir Oltean 
> 
> The SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME attribute is only emitted from:
> 
> sysfs/ioctl/netlink
> -> br_set_ageing_time
>-> __set_ageing_time
> 
> therefore not at bridge port creation time, so:
> (a) switchdev drivers have to hardcode the initial value for the address
> ageing time, because they didn't get any notification
> (b) that hardcoded value can be out of sync, if the user changes the
> ageing time before enslaving the port to the bridge
> 
> We need a helper in the bridge, such that switchdev drivers can query
> the current value of the bridge ageing time when they start offloading
> it.
> 
> Signed-off-by: Vladimir Oltean 
> Reviewed-by: Florian Fainelli 
> Reviewed-by: Tobias Waldekranz 
> ---
>  include/linux/if_bridge.h |  6 ++
>  net/bridge/br_stp.c   | 13 +
>  2 files changed, 19 insertions(+)
> 

The patch is mostly fine, there are a few minor nits (const qualifiers). If 
there
is another version of the patch-set please add them, either way:

Acked-by: Nikolay Aleksandrov 

> diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
> index 920d3a02cc68..ebd16495459c 100644
> --- a/include/linux/if_bridge.h
> +++ b/include/linux/if_bridge.h
> @@ -137,6 +137,7 @@ struct net_device *br_fdb_find_port(const struct 
> net_device *br_dev,
>  void br_fdb_clear_offload(const struct net_device *dev, u16 vid);
>  bool br_port_flag_is_set(const struct net_device *dev, unsigned long flag);
>  u8 br_port_get_stp_state(const struct net_device *dev);
> +clock_t br_get_ageing_time(struct net_device *br_dev);
>  #else
>  static inline struct net_device *
>  br_fdb_find_port(const struct net_device *br_dev,
> @@ -160,6 +161,11 @@ static inline u8 br_port_get_stp_state(const struct 
> net_device *dev)
>  {
>   return BR_STATE_DISABLED;
>  }
> +
> +static inline clock_t br_get_ageing_time(struct net_device *br_dev)

const

> +{
> + return 0;
> +}
>  #endif
>  
>  #endif
> diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
> index 86b5e05d3f21..3dafb6143cff 100644
> --- a/net/bridge/br_stp.c
> +++ b/net/bridge/br_stp.c
> @@ -639,6 +639,19 @@ int br_set_ageing_time(struct net_bridge *br, clock_t 
> ageing_time)
>   return 0;
>  }
>  
> +clock_t br_get_ageing_time(struct net_device *br_dev)

const

> +{
> + struct net_bridge *br;

const

> +
> + if (!netif_is_bridge_master(br_dev))
> + return 0;
> +
> + br = netdev_priv(br_dev);
> +
> + return jiffies_to_clock_t(br->ageing_time);
> +}
> +EXPORT_SYMBOL_GPL(br_get_ageing_time);
> +
>  /* called under bridge lock */
>  void __br_set_topology_change(struct net_bridge *br, unsigned char val)
>  {
> 



Re: [PATCH v4 net-next 01/11] net: bridge: add helper for retrieving the current bridge port STP state

2021-03-23 Thread Nikolay Aleksandrov
On 23/03/2021 01:51, Vladimir Oltean wrote:
> From: Vladimir Oltean 
> 
> It may happen that we have the following topology with DSA or any other
> switchdev driver with LAG offload:
> 
> ip link add br0 type bridge stp_state 1
> ip link add bond0 type bond
> ip link set bond0 master br0
> ip link set swp0 master bond0
> ip link set swp1 master bond0
> 
> STP decides that it should put bond0 into the BLOCKING state, and
> that's that. The ports that are actively listening for the switchdev
> port attributes emitted for the bond0 bridge port (because they are
> offloading it) and have the honor of seeing that switchdev port
> attribute can react to it, so we can program swp0 and swp1 into the
> BLOCKING state.
> 
> But if then we do:
> 
> ip link set swp2 master bond0
> 
> then as far as the bridge is concerned, nothing has changed: it still
> has one bridge port. But this new bridge port will not see any STP state
> change notification and will remain FORWARDING, which is how the
> standalone code leaves it in.
> 
> We need a function in the bridge driver which retrieves the current STP
> state, such that drivers can synchronize to it when they may have missed
> switchdev events.
> 
> Signed-off-by: Vladimir Oltean 
> Reviewed-by: Florian Fainelli 
> Reviewed-by: Tobias Waldekranz 
> ---
>  include/linux/if_bridge.h |  6 ++
>  net/bridge/br_stp.c   | 14 ++
>  2 files changed, 20 insertions(+)
> 

Acked-by: Nikolay Aleksandrov 





Re: [PATCH v3 net-next 08/12] net: dsa: replay port and host-joined mdb entries when joining the bridge

2021-03-22 Thread Nikolay Aleksandrov
On 22/03/2021 18:56, Vladimir Oltean wrote:
> On Mon, Mar 22, 2021 at 06:35:10PM +0200, Nikolay Aleksandrov wrote:
>>> +   hlist_for_each_entry(mp, >mdb_list, mdb_node) {
>>
>> You cannot walk over these lists without the multicast lock or RCU. RTNL is 
>> not
>> enough because of various timers and leave messages that can alter both the 
>> mdb_list
>> and the port group lists. I'd prefer RCU to avoid blocking the bridge mcast.
> 
> The trouble is that I need to emulate the calling context that is
> provided to SWITCHDEV_OBJ_ID_HOST_MDB and SWITCHDEV_OBJ_ID_PORT_MDB, and
> that means blocking context.
> 
> So if I hold rcu_read_lock(), I need to queue up the mdb entries, and
> notify the driver only after I leave the RCU critical section. The
> memory footprint may temporarily blow up.
> 
> In fact this is what I did in v1:
> https://patchwork.kernel.org/project/netdevbpf/patch/20210224114350.2791260-15-olte...@gmail.com/
> 
> I just figured I could get away with rtnl_mutex protection, but it looks
> like I can't. So I guess you prefer my v1?
> 

Indeed, if you need a blocking context then you'd have to go with v1.




Re: [PATCH v3 net-next 10/12] net: dsa: replay VLANs installed on port when joining the bridge

2021-03-22 Thread Nikolay Aleksandrov
On 21/03/2021 00:34, Vladimir Oltean wrote:
> From: Vladimir Oltean 
> 
> Currently this simple setup:
> 
> ip link add br0 type bridge vlan_filtering 1
> ip link add bond0 type bond
> ip link set bond0 master br0
> ip link set swp0 master bond0
> 
> will not work because the bridge has created the PVID in br_add_if ->
> nbp_vlan_init, and it has notified switchdev of the existence of VLAN 1,
> but that was too early, since swp0 was not yet a lower of bond0, so it
> had no reason to act upon that notification.
> 
> Signed-off-by: Vladimir Oltean 
> ---
> Changes in v3:
> Made the br_vlan_replay shim return -EOPNOTSUPP.
> 
>  include/linux/if_bridge.h | 10 ++
>  net/bridge/br_vlan.c  | 71 +++
>  net/dsa/port.c|  6 
>  3 files changed, 87 insertions(+)
[snip]
> +int br_vlan_replay(struct net_device *br_dev, struct net_device *dev,
> +struct notifier_block *nb, struct netlink_ext_ack *extack)
> +{
> + struct net_bridge_vlan_group *vg;
> + struct net_bridge_vlan *v;
> + struct net_bridge_port *p;
> + struct net_bridge *br;
> + int err = 0;
> + u16 pvid;
> +
> + ASSERT_RTNL();
> +
> + if (!netif_is_bridge_master(br_dev))
> + return -EINVAL;
> +
> + if (!netif_is_bridge_master(dev) && !netif_is_bridge_port(dev))
> + return -EINVAL;
> +
> + if (netif_is_bridge_master(dev)) {
> + br = netdev_priv(dev);
> + vg = br_vlan_group(br);
> + p = NULL;
> + } else {
> + p = br_port_get_rtnl(dev);
> + if (WARN_ON(!p))
> + return -EINVAL;
> + vg = nbp_vlan_group(p);
> + br = p->br;
> + }
> +
> + if (!vg)
> + return 0;
> +
> + pvid = br_get_pvid(vg);
> +
> + list_for_each_entry(v, >vlan_list, vlist) {
> + struct switchdev_obj_port_vlan vlan = {
> + .obj.orig_dev = dev,
> + .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
> + .flags = br_vlan_flags(v, pvid),
> + .vid = v->vid,
> + };
> +
> + if (!br_vlan_should_use(v))
> + continue;
> +
> + br_vlan_replay_one(nb, dev, , extack);
> + if (err)
> + return err;
> + }
> +
> + return err;
> +}

EXPORT_SYMBOL_GPL ?

>  /* check if v_curr can enter a range ending in range_end */
>  bool br_vlan_can_enter_range(const struct net_bridge_vlan *v_curr,
>const struct net_bridge_vlan *range_end)
> diff --git a/net/dsa/port.c b/net/dsa/port.c
> index d21a511f1e16..84775e253ee8 100644
> --- a/net/dsa/port.c
> +++ b/net/dsa/port.c
> @@ -209,6 +209,12 @@ static int dsa_port_switchdev_sync(struct dsa_port *dp,
>   if (err && err != -EOPNOTSUPP)
>   return err;
>  
> + err = br_vlan_replay(br, brport_dev,
> +  _slave_switchdev_blocking_notifier,
> +  extack);
> + if (err && err != -EOPNOTSUPP)
> + return err;
> +
>   return 0;
>  }
>  
> 



Re: [PATCH v3 net-next 09/12] net: dsa: replay port and local fdb entries when joining the bridge

2021-03-22 Thread Nikolay Aleksandrov
On 21/03/2021 00:34, Vladimir Oltean wrote:
> From: Vladimir Oltean 
> 
> When a DSA port joins a LAG that already had an FDB entry pointing to it:
> 
> ip link set bond0 master br0
> bridge fdb add dev bond0 00:01:02:03:04:05 master static
> ip link set swp0 master bond0
> 
> the DSA port will have no idea that this FDB entry is there, because it
> missed the switchdev event emitted at its creation.
> 
> Ido Schimmel pointed this out during a discussion about challenges with
> switchdev offloading of stacked interfaces between the physical port and
> the bridge, and recommended to just catch that condition and deny the
> CHANGEUPPER event:
> https://lore.kernel.org/netdev/20210210105949.gb287...@shredder.lan/
> 
> But in fact, we might need to deal with the hard thing anyway, which is
> to replay all FDB addresses relevant to this port, because it isn't just
> static FDB entries, but also local addresses (ones that are not
> forwarded but terminated by the bridge). There, we can't just say 'oh
> yeah, there was an upper already so I'm not joining that'.
> 
> So, similar to the logic for replaying MDB entries, add a function that
> must be called by individual switchdev drivers and replays local FDB
> entries as well as ones pointing towards a bridge port. This time, we
> use the atomic switchdev notifier block, since that's what FDB entries
> expect for some reason.
> 
> Reported-by: Ido Schimmel 
> Signed-off-by: Vladimir Oltean 
> ---
> Changes in v3:
> Made the br_fdb_replay shim return -EOPNOTSUPP.
> 
>  include/linux/if_bridge.h |  9 +++
>  include/net/switchdev.h   |  1 +
>  net/bridge/br_fdb.c   | 52 +++
>  net/dsa/dsa_priv.h|  1 +
>  net/dsa/port.c|  4 +++
>  net/dsa/slave.c   |  2 +-
>  6 files changed, 68 insertions(+), 1 deletion(-)
> 
> diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
> index f6472969bb44..b564c4486a45 100644
> --- a/include/linux/if_bridge.h
> +++ b/include/linux/if_bridge.h
> @@ -147,6 +147,8 @@ void br_fdb_clear_offload(const struct net_device *dev, 
> u16 vid);
>  bool br_port_flag_is_set(const struct net_device *dev, unsigned long flag);
>  u8 br_port_get_stp_state(const struct net_device *dev);
>  clock_t br_get_ageing_time(struct net_device *br_dev);
> +int br_fdb_replay(struct net_device *br_dev, struct net_device *dev,
> +   struct notifier_block *nb);
>  #else
>  static inline struct net_device *
>  br_fdb_find_port(const struct net_device *br_dev,
> @@ -175,6 +177,13 @@ static inline clock_t br_get_ageing_time(struct 
> net_device *br_dev)
>  {
>   return 0;
>  }
> +
> +static inline int br_fdb_replay(struct net_device *br_dev,
> + struct net_device *dev,
> + struct notifier_block *nb)
> +{
> + return -EOPNOTSUPP;
> +}
>  #endif
>  
>  #endif
> diff --git a/include/net/switchdev.h b/include/net/switchdev.h
> index b7fc7d0f54e2..7688ec572757 100644
> --- a/include/net/switchdev.h
> +++ b/include/net/switchdev.h
> @@ -205,6 +205,7 @@ struct switchdev_notifier_info {
>  
>  struct switchdev_notifier_fdb_info {
>   struct switchdev_notifier_info info; /* must be first */
> + struct list_head list;
>   const unsigned char *addr;
>   u16 vid;
>   u8 added_by_user:1,
> diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
> index b7490237f3fc..49125cc196ac 100644
> --- a/net/bridge/br_fdb.c
> +++ b/net/bridge/br_fdb.c
> @@ -726,6 +726,58 @@ static inline size_t fdb_nlmsg_size(void)
>   + nla_total_size(sizeof(u8)); /* NFEA_ACTIVITY_NOTIFY */
>  }
>  
> +static int br_fdb_replay_one(struct notifier_block *nb,
> +  struct net_bridge_fdb_entry *fdb,
> +  struct net_device *dev)
> +{
> + struct switchdev_notifier_fdb_info item;
> + int err;
> +
> + item.addr = fdb->key.addr.addr;
> + item.vid = fdb->key.vlan_id;
> + item.added_by_user = test_bit(BR_FDB_ADDED_BY_USER, >flags);
> + item.offloaded = test_bit(BR_FDB_OFFLOADED, >flags);
> + item.info.dev = dev;
> +
> + err = nb->notifier_call(nb, SWITCHDEV_FDB_ADD_TO_DEVICE, );
> + return notifier_to_errno(err);
> +}
> +
> +int br_fdb_replay(struct net_device *br_dev, struct net_device *dev,
> +   struct notifier_block *nb)
> +{
> + struct net_bridge_fdb_entry *fdb;
> + struct net_bridge *br;
> + int err = 0;
> +
> + if (!netif_is_bridge_master(br_dev))
> + return -EINVAL;
> +
> + if (!netif_is_bridge_port(dev))
> + return -EINVAL;
> +
> + br = netdev_priv(br_dev);
> +
> + rcu_read_lock();
> +
> + hlist_for_each_entry_rcu(fdb, >fdb_list, fdb_node) {
> + struct net_device *dst_dev;
> +
> + dst_dev = fdb->dst ? fdb->dst->dev : br->dev;

Please use READ_ONCE() to read fdb->dst and then check the result here.
I'll soon send patches to annotate all fdb->dst lockless 

Re: [PATCH v3 net-next 08/12] net: dsa: replay port and host-joined mdb entries when joining the bridge

2021-03-22 Thread Nikolay Aleksandrov
On 21/03/2021 00:34, Vladimir Oltean wrote:
> From: Vladimir Oltean 
> 
> I have udhcpcd in my system and this is configured to bring interfaces
> up as soon as they are created.
> 
> I create a bridge as follows:
> 
> ip link add br0 type bridge
> 
> As soon as I create the bridge and udhcpcd brings it up, I also have
> avahi which automatically starts sending IPv6 packets to advertise some
> local services, and because of that, the br0 bridge joins the following
> IPv6 groups due to the code path detailed below:
> 
> 33:33:ff:6d:c1:9c vid 0
> 33:33:00:00:00:6a vid 0
> 33:33:00:00:00:fb vid 0
> 
> br_dev_xmit
> -> br_multicast_rcv
>-> br_ip6_multicast_add_group
>   -> __br_multicast_add_group
>  -> br_multicast_host_join
> -> br_mdb_notify
> 
> This is all fine, but inside br_mdb_notify we have br_mdb_switchdev_host
> hooked up, and switchdev will attempt to offload the host joined groups
> to an empty list of ports. Of course nobody offloads them.
> 
> Then when we add a port to br0:
> 
> ip link set swp0 master br0
> 
> the bridge doesn't replay the host-joined MDB entries from br_add_if,
> and eventually the host joined addresses expire, and a switchdev
> notification for deleting it is emitted, but surprise, the original
> addition was already completely missed.
> 
> The strategy to address this problem is to replay the MDB entries (both
> the port ones and the host joined ones) when the new port joins the
> bridge, similar to what vxlan_fdb_replay does (in that case, its FDB can
> be populated and only then attached to a bridge that you offload).
> However there are 2 possibilities: the addresses can be 'pushed' by the
> bridge into the port, or the port can 'pull' them from the bridge.
> 
> Considering that in the general case, the new port can be really late to
> the party, and there may have been many other switchdev ports that
> already received the initial notification, we would like to avoid
> delivering duplicate events to them, since they might misbehave. And
> currently, the bridge calls the entire switchdev notifier chain, whereas
> for replaying it should just call the notifier block of the new guy.
> But the bridge doesn't know what is the new guy's notifier block, it
> just knows where the switchdev notifier chain is. So for simplification,
> we make this a driver-initiated pull for now, and the notifier block is
> passed as an argument.
> 
> To emulate the calling context for mdb objects (deferred and put on the
> blocking notifier chain), we must iterate under RCU protection through
> the bridge's mdb entries, queue them, and only call them once we're out
> of the RCU read-side critical section.
> 
> Suggested-by: Ido Schimmel 
> Signed-off-by: Vladimir Oltean 
> ---
> Changes in v3:
> - Removed the implication that avahi is crap from the commit message.
> - Made the br_mdb_replay shim return -EOPNOTSUPP.
> 
>  include/linux/if_bridge.h |  9 +
>  net/bridge/br_mdb.c   | 84 +++
>  net/dsa/dsa_priv.h|  2 +
>  net/dsa/port.c|  6 +++
>  net/dsa/slave.c   |  2 +-
>  5 files changed, 102 insertions(+), 1 deletion(-)
> 
> diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
> index ebd16495459c..f6472969bb44 100644
> --- a/include/linux/if_bridge.h
> +++ b/include/linux/if_bridge.h
> @@ -69,6 +69,8 @@ bool br_multicast_has_querier_anywhere(struct net_device 
> *dev, int proto);
>  bool br_multicast_has_querier_adjacent(struct net_device *dev, int proto);
>  bool br_multicast_enabled(const struct net_device *dev);
>  bool br_multicast_router(const struct net_device *dev);
> +int br_mdb_replay(struct net_device *br_dev, struct net_device *dev,
> +   struct notifier_block *nb, struct netlink_ext_ack *extack);
>  #else
>  static inline int br_multicast_list_adjacent(struct net_device *dev,
>struct list_head *br_ip_list)
> @@ -93,6 +95,13 @@ static inline bool br_multicast_router(const struct 
> net_device *dev)
>  {
>   return false;
>  }
> +static inline int br_mdb_replay(struct net_device *br_dev,
> + struct net_device *dev,
> + struct notifier_block *nb,
> + struct netlink_ext_ack *extack)
> +{
> + return -EOPNOTSUPP;
> +}
>  #endif
>  
>  #if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_BRIDGE_VLAN_FILTERING)
> diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
> index 8846c5bcd075..23973186094c 100644
> --- a/net/bridge/br_mdb.c
> +++ b/net/bridge/br_mdb.c
> @@ -506,6 +506,90 @@ static void br_mdb_complete(struct net_device *dev, int 
> err, void *priv)
>   kfree(priv);
>  }
>  
> +static int br_mdb_replay_one(struct notifier_block *nb, struct net_device 
> *dev,
> +  struct net_bridge_mdb_entry *mp, int obj_id,
> +  struct net_device *orig_dev,
> +  

Re: [PATCH v3 net-next 00/12] Better support for sandwiched LAGs with bridge and DSA

2021-03-22 Thread Nikolay Aleksandrov
On 21/03/2021 00:34, Vladimir Oltean wrote:
> From: Vladimir Oltean 
> 
> The objective of this series is to make LAG uppers on top of switchdev
> ports work regardless of which order we link interfaces to their masters
> (first make the port join the LAG, then the LAG join the bridge, or the
> other way around).
> 
> There was a design decision to be made in patches 2-4 on whether we
> should adopt the "push" model (which attempts to solve the problem
> centrally, in the bridge layer) where the driver just calls:
> 
>   switchdev_bridge_port_offloaded(brport_dev,
>   _notifier_block,
>   _notifier_block,
>   extack);
> 
> and the bridge just replays the entire collection of switchdev port
> attributes and objects that it has, in some predefined order and with
> some predefined error handling logic;
> 
> 
> or the "pull" model (which attempts to solve the problem by giving the
> driver the rope to hang itself), where the driver, apart from calling:
> 
>   switchdev_bridge_port_offloaded(brport_dev, extack);
> 
> has the task of "dumpster diving" (as Tobias puts it) through the bridge
> attributes and objects by itself, by calling:
> 
>   - br_vlan_replay
>   - br_fdb_replay
>   - br_mdb_replay
>   - br_vlan_enabled
>   - br_port_flag_is_set
>   - br_port_get_stp_state
>   - br_multicast_router
>   - br_get_ageing_time
> 
> (not necessarily all of them, and not necessarily in this order, and
> with driver-defined error handling).
> 
> Even though I'm not in love myself with the "pull" model, I chose it
> because there is a fundamental trick with replaying switchdev events
> like this:
> 
> ip link add br0 type bridge
> ip link add bond0 type bond
> ip link set bond0 master br0
> ip link set swp0 master bond0 <- this will replay the objects once for
>  the bond0 bridge port, and the swp0
>  switchdev port will process them
> ip link set swp1 master bond0 <- this will replay the objects again for
>  the bond0 bridge port, and the swp1
>  switchdev port will see them, but swp0
>  will see them for the second time now
> 
> Basically I believe that it is implementation defined whether the driver
> wants to error out on switchdev objects seen twice on a port, and the
> bridge should not enforce a certain model for that. For example, for FDB
> entries added to a bonding interface, the underling switchdev driver
> might have an abstraction for just that: an FDB entry pointing towards a
> logical (as opposed to physical) port. So when the second port joins the
> bridge, it doesn't realy need to replay FDB entries, since there is
> already at least one hardware port which has been receiving those
> events, and the FDB entries don't need to be added a second time to the
> same logical port.
> In the other corner, we have the drivers that handle switchdev port
> attributes on a LAG as individual switchdev port attributes on physical
> ports (example: VLAN filtering). In fact, the switchdev_handle_port_attr_set
> helper facilitates this: it is a fan-out from a single orig_dev towards
> multiple lowers that pass the check_cb().
> But that's the point: switchdev_handle_port_attr_set is just a helper
> which the driver _opts_ to use. The bridge can't enforce the "push"
> model, because that would assume that all drivers handle port attributes
> in the same way, which is probably false.
> 
> For this reason, I preferred to go with the "pull" mode for this patch
> set. Just to see how bad it is for other switchdev drivers to copy-paste
> this logic, I added the pull support to ocelot too, and I think it's
> pretty manageable.
> 
> Vladimir Oltean (12):
>   net: dsa: call dsa_port_bridge_join when joining a LAG that is already
> in a bridge
>   net: dsa: pass extack to dsa_port_{bridge,lag}_join
>   net: dsa: inherit the actual bridge port flags at join time
>   net: dsa: sync up with bridge port's STP state when joining
>   net: dsa: sync up VLAN filtering state when joining the bridge
>   net: dsa: sync multicast router state when joining the bridge
>   net: dsa: sync ageing time when joining the bridge
>   net: dsa: replay port and host-joined mdb entries when joining the
> bridge
>   net: dsa: replay port and local fdb entries when joining the bridge
>   net: dsa: replay VLANs installed on port when joining the bridge
>   net: ocelot: call ocelot_netdevice_bridge_join when joining a bridged
> LAG
>   net: ocelot: replay switchdev events when joining bridge
> 
>  drivers/net/dsa/ocelot/felix.c |   4 +-
>  drivers/net/ethernet/mscc/ocelot.c |  18 +--
>  drivers/net/ethernet/mscc/ocelot_net.c | 208 +
>  include/linux/if_bridge.h  |  40 +
>  include/net/switchdev.h|   1 +
>  

Re: [PATCH v5 net-next 03/10] net: bridge: don't print in br_switchdev_set_port_flag

2021-02-12 Thread Nikolay Aleksandrov
On 12/02/2021 17:15, Vladimir Oltean wrote:
> From: Vladimir Oltean 
> 
> For the netlink interface, propagate errors through extack rather than
> simply printing them to the console. For the sysfs interface, we still
> print to the console, but at least that's one layer higher than in
> switchdev, which also allows us to silently ignore the offloading of
> flags if that is ever needed in the future.
> 
> Signed-off-by: Vladimir Oltean 
> ---
> Changes in v5:
> None.
> 
> Changes in v4:
> - Adjust the commit message now that we aren't notifying initial and
>   final port flags from the bridge any longer.
> 
> Changes in v3:
> - Deal with the br_switchdev_set_port_flag call from sysfs too.
> 
> Changes in v2:
> - br_set_port_flag now returns void, so no extack there.
> - don't overwrite extack in br_switchdev_set_port_flag if already
>   populated.
> 
>  net/bridge/br_netlink.c   |  9 +
>  net/bridge/br_private.h   |  6 --
>  net/bridge/br_switchdev.c | 13 +++--
>  net/bridge/br_sysfs_if.c  |  7 +--
>  4 files changed, 21 insertions(+), 14 deletions(-)
> 

Acked-by: Nikolay Aleksandrov 




Re: [PATCH v5 net-next 01/10] net: switchdev: propagate extack to port attributes

2021-02-12 Thread Nikolay Aleksandrov
On 12/02/2021 17:15, Vladimir Oltean wrote:
> From: Vladimir Oltean 
> 
> When a struct switchdev_attr is notified through switchdev, there is no
> way to report informational messages, unlike for struct switchdev_obj.
> 
> Signed-off-by: Vladimir Oltean 
> Reviewed-by: Ido Schimmel 
> Reviewed-by: Florian Fainelli 
> ---
> Changes in v5:
> Rebased on top of AM65 CPSW driver merge.
> 
> Changes in v4:
> None.
> 
> Changes in v3:
> None.
> 
> Changes in v2:
> Patch is new.
> 
>  .../ethernet/marvell/prestera/prestera_switchdev.c|  3 ++-
>  .../net/ethernet/mellanox/mlxsw/spectrum_switchdev.c  |  3 ++-
>  drivers/net/ethernet/mscc/ocelot_net.c|  3 ++-
>  drivers/net/ethernet/ti/am65-cpsw-switchdev.c |  3 ++-
>  drivers/net/ethernet/ti/cpsw_switchdev.c  |  3 ++-
>  include/net/switchdev.h   |  6 --
>  net/dsa/slave.c   |  3 ++-
>  net/switchdev/switchdev.c | 11 ++++---
>  8 files changed, 24 insertions(+), 11 deletions(-)
> 

Reviewed-by: Nikolay Aleksandrov 




Re: [PATCH v5 net-next 02/10] net: bridge: offload all port flags at once in br_setport

2021-02-12 Thread Nikolay Aleksandrov
On 12/02/2021 17:15, Vladimir Oltean wrote:
> From: Vladimir Oltean 
> 
> If for example this command:
> 
> ip link set swp0 type bridge_slave flood off mcast_flood off learning off
> 
> succeeded at configuring BR_FLOOD and BR_MCAST_FLOOD but not at
> BR_LEARNING, there would be no attempt to revert the partial state in
> any way. Arguably, if the user changes more than one flag through the
> same netlink command, this one _should_ be all or nothing, which means
> it should be passed through switchdev as all or nothing.
> 
> Signed-off-by: Vladimir Oltean 
> ---
> Changes in v5:
> None.
> 
> Changes in v4:
> Leave br->lock alone completely.
> 
> Changes in v3:
> Don't attempt to drop br->lock around br_switchdev_set_port_flag now,
> move that part to a later patch.
> 
> Changes in v2:
> Patch is new.
> 
> Changes in v2:
> Patch is new.
> 
>  net/bridge/br_netlink.c   | 109 --
>  net/bridge/br_switchdev.c |   6 ++-
>  2 files changed, 39 insertions(+), 76 deletions(-)
> 

LGTM, thanks!
Acked-by: Nikolay Aleksandrov 



Re: [PATCH v3 net-next 00/11] Cleanup in brport flags switchdev offload for DSA

2021-02-10 Thread Nikolay Aleksandrov
On 10/02/2021 14:01, Vladimir Oltean wrote:
> On Wed, Feb 10, 2021 at 01:05:57PM +0200, Nikolay Aleksandrov wrote:
>> On 10/02/2021 13:01, Vladimir Oltean wrote:
>>> On Wed, Feb 10, 2021 at 12:52:33PM +0200, Nikolay Aleksandrov wrote:
>>>> On 10/02/2021 12:45, Vladimir Oltean wrote:
>>>>> Hi Nikolay,
>>>>>
>>>>> On Wed, Feb 10, 2021 at 12:31:43PM +0200, Nikolay Aleksandrov wrote:
>>>>>> Hi Vladimir,
>>>>>> Let's take a step back for a moment and discuss the bridge unlock/lock 
>>>>>> sequences
>>>>>> that come with this set. I'd really like to avoid those as they're a 
>>>>>> recipe
>>>>>> for future problems. The only good way to achieve that currently is to 
>>>>>> keep
>>>>>> the PRE_FLAGS call and do that in unsleepable context but move the FLAGS 
>>>>>> call
>>>>>> after the flags have been changed (if they have changed obviously). That 
>>>>>> would
>>>>>> make the code read much easier since we'll have all our lock/unlock 
>>>>>> sequences
>>>>>> in the same code blocks and won't play games to get sleepable context.
>>>>>> Please let's think and work in that direction, rather than having:
>>>>>> +spin_lock_bh(>br->lock);
>>>>>> +if (err) {
>>>>>> +netdev_err(p->dev, "%s\n", extack._msg);
>>>>>> +return err;
>>>>>>  }
>>>>>> +
>>>>>>
>>>>>> which immediately looks like a bug even though after some code checking 
>>>>>> we can
>>>>>> verify it's ok. WDYT?
>>>>>>
>>>>>> I plan to get rid of most of the br->lock since it's been abused for a 
>>>>>> very long
>>>>>> time because it's essentially STP lock, but people have started using it 
>>>>>> for other
>>>>>> things and I plan to fix that when I get more time.
>>>>>
>>>>> This won't make the sysfs codepath any nicer, will it?
>>>>>
>>>>
>>>> Currently we'll have to live with a hack that checks if the flags have 
>>>> changed. I agree
>>>> it won't be pretty, but we won't have to unlock and lock again in the 
>>>> middle of the
>>>> called function and we'll have all our locking in the same place, easier 
>>>> to verify and
>>>> later easier to remove. Once I get rid of most of the br->lock usage we 
>>>> can revisit
>>>> the drop of PRE_FLAGS if it's a problem. The alternative is to change the 
>>>> flags, then
>>>> send the switchdev notification outside of the lock and revert the flags 
>>>> if it doesn't
>>>> go through which doesn't sound much better.
>>>> I'm open to any other suggestions, but definitely would like to avoid 
>>>> playing locking games.
>>>> Even if it means casing out flag setting from all other store_ functions 
>>>> for sysfs.
>>>
>>> By casing out flag settings you mean something like this?
>>>
>>>
>>> #define BRPORT_ATTR(_name, _mode, _show, _store)\
>>> const struct brport_attribute brport_attr_##_name = {   \
>>> .attr = {.name = __stringify(_name),\
>>>  .mode = _mode },   \
>>> .show   = _show,\
>>> .store_unlocked = _store,   \
>>> };
>>>
>>> #define BRPORT_ATTR_FLAG(_name, _mask)  \
>>> static ssize_t show_##_name(struct net_bridge_port *p, char *buf) \
>>> {   \
>>> return sprintf(buf, "%d\n", !!(p->flags & _mask));  \
>>> }   \
>>> static int store_##_name(struct net_bridge_port *p, unsigned long v) \
>>> {   \
>>> return store_flag(p, v, _mask); \
>>> }   \
>>> static BRPORT_ATTR(_name, 0644, \
>>>show_##_name, store_##_name)
>>>
>>

Re: [PATCH v3 net-next 00/11] Cleanup in brport flags switchdev offload for DSA

2021-02-10 Thread Nikolay Aleksandrov
On 10/02/2021 13:01, Vladimir Oltean wrote:
> On Wed, Feb 10, 2021 at 12:52:33PM +0200, Nikolay Aleksandrov wrote:
>> On 10/02/2021 12:45, Vladimir Oltean wrote:
>>> Hi Nikolay,
>>>
>>> On Wed, Feb 10, 2021 at 12:31:43PM +0200, Nikolay Aleksandrov wrote:
>>>> Hi Vladimir,
>>>> Let's take a step back for a moment and discuss the bridge unlock/lock 
>>>> sequences
>>>> that come with this set. I'd really like to avoid those as they're a recipe
>>>> for future problems. The only good way to achieve that currently is to keep
>>>> the PRE_FLAGS call and do that in unsleepable context but move the FLAGS 
>>>> call
>>>> after the flags have been changed (if they have changed obviously). That 
>>>> would
>>>> make the code read much easier since we'll have all our lock/unlock 
>>>> sequences
>>>> in the same code blocks and won't play games to get sleepable context.
>>>> Please let's think and work in that direction, rather than having:
>>>> +  spin_lock_bh(>br->lock);
>>>> +  if (err) {
>>>> +  netdev_err(p->dev, "%s\n", extack._msg);
>>>> +  return err;
>>>>}
>>>> +
>>>>
>>>> which immediately looks like a bug even though after some code checking we 
>>>> can
>>>> verify it's ok. WDYT?
>>>>
>>>> I plan to get rid of most of the br->lock since it's been abused for a 
>>>> very long
>>>> time because it's essentially STP lock, but people have started using it 
>>>> for other
>>>> things and I plan to fix that when I get more time.
>>>
>>> This won't make the sysfs codepath any nicer, will it?
>>>
>>
>> Currently we'll have to live with a hack that checks if the flags have 
>> changed. I agree
>> it won't be pretty, but we won't have to unlock and lock again in the middle 
>> of the 
>> called function and we'll have all our locking in the same place, easier to 
>> verify and
>> later easier to remove. Once I get rid of most of the br->lock usage we can 
>> revisit
>> the drop of PRE_FLAGS if it's a problem. The alternative is to change the 
>> flags, then
>> send the switchdev notification outside of the lock and revert the flags if 
>> it doesn't
>> go through which doesn't sound much better.
>> I'm open to any other suggestions, but definitely would like to avoid 
>> playing locking games.
>> Even if it means casing out flag setting from all other store_ functions for 
>> sysfs.
> 
> By casing out flag settings you mean something like this?
> 
> 
> #define BRPORT_ATTR(_name, _mode, _show, _store)  \
> const struct brport_attribute brport_attr_##_name = { \
>   .attr = {.name = __stringify(_name),\
>.mode = _mode },   \
>   .show   = _show,\
>   .store_unlocked = _store,   \
> };
> 
> #define BRPORT_ATTR_FLAG(_name, _mask)\
> static ssize_t show_##_name(struct net_bridge_port *p, char *buf) \
> { \
>   return sprintf(buf, "%d\n", !!(p->flags & _mask));  \
> } \
> static int store_##_name(struct net_bridge_port *p, unsigned long v) \
> { \
>   return store_flag(p, v, _mask); \
> } \
> static BRPORT_ATTR(_name, 0644,   \
>  show_##_name, store_##_name)
> 
> static ssize_t brport_store(struct kobject *kobj,
>   struct attribute *attr,
>   const char *buf, size_t count)
> {
>   ...
> 
>   } else if (brport_attr->store_unlocked) {
>   val = simple_strtoul(buf, , 0);
>   if (endp == buf)
>   goto out_unlock;
>   ret = brport_attr->store_unlocked(p, val);
>   }
> 

Yes, this can work but will need a bit more changes because of 
br_port_flags_change().
Then the netlink side can be modeled in a similar way.





Re: [PATCH v3 net-next 00/11] Cleanup in brport flags switchdev offload for DSA

2021-02-10 Thread Nikolay Aleksandrov
On 10/02/2021 12:45, Vladimir Oltean wrote:
> Hi Nikolay,
> 
> On Wed, Feb 10, 2021 at 12:31:43PM +0200, Nikolay Aleksandrov wrote:
>> Hi Vladimir,
>> Let's take a step back for a moment and discuss the bridge unlock/lock 
>> sequences
>> that come with this set. I'd really like to avoid those as they're a recipe
>> for future problems. The only good way to achieve that currently is to keep
>> the PRE_FLAGS call and do that in unsleepable context but move the FLAGS call
>> after the flags have been changed (if they have changed obviously). That 
>> would
>> make the code read much easier since we'll have all our lock/unlock sequences
>> in the same code blocks and won't play games to get sleepable context.
>> Please let's think and work in that direction, rather than having:
>> +spin_lock_bh(>br->lock);
>> +if (err) {
>> +netdev_err(p->dev, "%s\n", extack._msg);
>> +return err;
>>  }
>> +
>>
>> which immediately looks like a bug even though after some code checking we 
>> can
>> verify it's ok. WDYT?
>>
>> I plan to get rid of most of the br->lock since it's been abused for a very 
>> long
>> time because it's essentially STP lock, but people have started using it for 
>> other
>> things and I plan to fix that when I get more time.
> 
> This won't make the sysfs codepath any nicer, will it?
> 

Currently we'll have to live with a hack that checks if the flags have changed. 
I agree
it won't be pretty, but we won't have to unlock and lock again in the middle of 
the 
called function and we'll have all our locking in the same place, easier to 
verify and
later easier to remove. Once I get rid of most of the br->lock usage we can 
revisit
the drop of PRE_FLAGS if it's a problem. The alternative is to change the 
flags, then
send the switchdev notification outside of the lock and revert the flags if it 
doesn't
go through which doesn't sound much better.
I'm open to any other suggestions, but definitely would like to avoid playing 
locking games.
Even if it means casing out flag setting from all other store_ functions for 
sysfs.



Re: [PATCH v3 net-next 00/11] Cleanup in brport flags switchdev offload for DSA

2021-02-10 Thread Nikolay Aleksandrov
On 10/02/2021 11:14, Vladimir Oltean wrote:
> From: Vladimir Oltean 
> 
> The initial goal of this series was to have better support for
> standalone ports mode and multiple bridges on the DSA drivers like
> ocelot/felix and sja1105. Proper support for standalone mode requires
> disabling address learning, which in turn requires interaction with the
> switchdev notifier, which is actually where most of the patches are.
> 
> I also noticed that most of the drivers are actually talking either to
> firmware or SPI/MDIO connected devices from the brport flags switchdev
> attribute handler, so it makes sense to actually make it sleepable
> instead of atomic.
> 

Hi Vladimir,
Let's take a step back for a moment and discuss the bridge unlock/lock sequences
that come with this set. I'd really like to avoid those as they're a recipe
for future problems. The only good way to achieve that currently is to keep
the PRE_FLAGS call and do that in unsleepable context but move the FLAGS call
after the flags have been changed (if they have changed obviously). That would
make the code read much easier since we'll have all our lock/unlock sequences
in the same code blocks and won't play games to get sleepable context.
Please let's think and work in that direction, rather than having:
+   spin_lock_bh(>br->lock);
+   if (err) {
+   netdev_err(p->dev, "%s\n", extack._msg);
+   return err;
}
+

which immediately looks like a bug even though after some code checking we can
verify it's ok. WDYT?

I plan to get rid of most of the br->lock since it's been abused for a very long
time because it's essentially STP lock, but people have started using it for 
other
things and I plan to fix that when I get more time.

Thanks,
 Nik


Re: [PATCH v3 net-next 08/11] net: bridge: put SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS on the blocking call chain

2021-02-10 Thread Nikolay Aleksandrov
On 10/02/2021 11:14, Vladimir Oltean wrote:
> From: Vladimir Oltean 
> 
> Since we would like br_switchdev_set_port_flag to not use an atomic
> notifier, it should be called from outside spinlock context.
> 
> We can temporarily drop br->lock, but that creates some concurrency
> complications (example below is given for sysfs):
> - There might be an "echo 1 > multicast_flood" simultaneous with an
>   "echo 0 > multicast_flood". The result of this is nondeterministic
>   either way, so I'm not too concerned as long as the result is
>   consistent (no other flags have changed).
> - There might be an "echo 1 > multicast_flood" simultaneous with an
>   "echo 0 > learning". My expectation is that none of the two writes are
>   "eaten", and the final flags contain BR_MCAST_FLOOD=1 and BR_LEARNING=0
>   regardless of the order of execution. That is actually possible if, on
>   the commit path, we don't do a trivial "p->flags = flags" which might
>   overwrite bits outside of our mask, but instead we just change the
>   flags corresponding to our mask.
> 

Not sure I follow here, how do we get any concurrency issues with sysfs or 
netlink
when both take rtnl before doing any changes ?

> Now that br_switchdev_set_port_flag is never called from under br->lock,
> it runs in sleepable context.
> 
> All switchdev drivers handle SWITCHDEV_PORT_ATTR_SET as both blocking
> and atomic, so no changes are needed on that front.
> 
> Signed-off-by: Vladimir Oltean 
> ---
> Changes in v3:
> - Drop the br->lock around br_switchdev_set_port_flag in this patch, for
>   both sysfs and netlink.
> - Only set/restore the masked bits in p->flags to avoid concurrency
>   issues.
> 
> Changes in v2:
> Patch is new.
> 
>  net/bridge/br_netlink.c   | 10 +++---
>  net/bridge/br_switchdev.c |  5 ++---
>  net/bridge/br_sysfs_if.c  | 22 ++
>  3 files changed, 23 insertions(+), 14 deletions(-)
> 
> diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
> index b7731614c036..8f09106966c4 100644
> --- a/net/bridge/br_netlink.c
> +++ b/net/bridge/br_netlink.c
> @@ -869,7 +869,7 @@ static void br_set_port_flag(struct net_bridge_port *p, 
> struct nlattr *tb[],
>  static int br_setport(struct net_bridge_port *p, struct nlattr *tb[],
> struct netlink_ext_ack *extack)
>  {
> - unsigned long old_flags, changed_mask;
> + unsigned long flags, old_flags, changed_mask;
>   bool br_vlan_tunnel_old;
>   int err;
>  
> @@ -896,10 +896,14 @@ static int br_setport(struct net_bridge_port *p, struct 
> nlattr *tb[],
>   br_set_port_flag(p, tb, IFLA_BRPORT_ISOLATED, BR_ISOLATED);
>  
>   changed_mask = old_flags ^ p->flags;
> + flags = p->flags;
>  
> - err = br_switchdev_set_port_flag(p, p->flags, changed_mask, extack);
> + spin_unlock_bh(>br->lock);
> + err = br_switchdev_set_port_flag(p, flags, changed_mask, extack);
> + spin_lock_bh(>br->lock);
>   if (err) {
> - p->flags = old_flags;
> + p->flags &= ~changed_mask;
> + p->flags |= (old_flags & changed_mask);
>   goto out;
>   }
>  
> diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
> index dbd94156960f..a79164ee65b9 100644
> --- a/net/bridge/br_switchdev.c
> +++ b/net/bridge/br_switchdev.c
> @@ -79,9 +79,8 @@ int br_switchdev_set_port_flag(struct net_bridge_port *p,
>   attr.u.brport_flags.val = flags & mask;
>   attr.u.brport_flags.mask = mask;
>  
> - /* We run from atomic context here */
> - err = call_switchdev_notifiers(SWITCHDEV_PORT_ATTR_SET, p->dev,
> -, extack);
> + err = call_switchdev_blocking_notifiers(SWITCHDEV_PORT_ATTR_SET, p->dev,
> + , extack);
>   err = notifier_to_errno(err);
>   if (err == -EOPNOTSUPP)
>   return 0;
> diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
> index 72e92376eef1..3f21fdd1cdaa 100644
> --- a/net/bridge/br_sysfs_if.c
> +++ b/net/bridge/br_sysfs_if.c
> @@ -68,16 +68,22 @@ static int store_flag(struct net_bridge_port *p, unsigned 
> long v,
>   else
>   flags &= ~mask;
>  
> - if (flags != p->flags) {
> - err = br_switchdev_set_port_flag(p, flags, mask, );
> - if (err) {
> - netdev_err(p->dev, "%s\n", extack._msg);
> - return err;
> - }
> + if (flags == p->flags)
> + return 0;
>  
> - p->flags = flags;
> - br_port_flags_change(p, mask);
> + spin_unlock_bh(>br->lock);
> + err = br_switchdev_set_port_flag(p, flags, mask, );
> + spin_lock_bh(>br->lock);
> + if (err) {
> + netdev_err(p->dev, "%s\n", extack._msg);
> + return err;
>   }
> +
> + p->flags &= ~mask;
> + p->flags |= (flags & mask);
> +
> + br_port_flags_change(p, mask);
> +
>   return 0;
>  }
>  
> 



Re: [PATCH net-next 2/9] net: bridge: offload initial and final port flags through switchdev

2021-02-08 Thread Nikolay Aleksandrov
On 08/02/2021 13:45, Vladimir Oltean wrote:
> On Mon, Feb 08, 2021 at 01:37:03PM +0200, Nikolay Aleksandrov wrote:
>> Hi Vladimir,
>> I think this patch potentially breaks some use cases. There are a few 
>> problems, I'll
>> start with the more serious one: before the ports would have a set of flags 
>> that were
>> always set when joining, now due to how nbp_flags_change() handles flag 
>> setting some might
>> not be set which would immediately change behaviour w.r.t software fwding. 
>> I'll use your
>> example of BR_BCAST_FLOOD: a lot of drivers will return an error for it and 
>> any broadcast
>> towards these ports will be dropped, we have mixed environments with 
>> software ports that
>> sometimes have traffic (e.g. decapped ARP requests) software forwarded which 
>> will stop working.
> 
> Yes, you're right. The only solution I can think of is to add a "bool 
> ignore_errors"
> to nbp_flags_change, set to true from new_nbp and del_nbp, and to false from 
> the
> netlink code.
> 

Indeed, I can't think of any better solution right now, but that would make it 
more or less
equal to the current situation where the flags are just set. You can 
read/restore them on add/del
of bridge port, but I guess that's what you'd like to avoid. :)
I don't mind adding the add/del_nbp() notifications, but both of them seem 
redundant with
the port add/del notifications which you can handle in the driver.

>> The other lesser issue is with the style below, I mean these three calls for 
>> each flag are
>> just ugly and look weird as you've also noted, since these APIs are internal 
>> can we do better?
> 
> Doing better would mean allowing nbp_flags_change() to have a bit mask with
> potentially more brport flags set, and to call br_switchdev_set_port_flag in
> a for_each_set_bit() loop?
> 

Sure, that sounds better for now. I think you've described the ideal case in 
your
commit message.


Re: [PATCH net-next 2/9] net: bridge: offload initial and final port flags through switchdev

2021-02-08 Thread Nikolay Aleksandrov
On 08/02/2021 01:21, Vladimir Oltean wrote:
> From: Vladimir Oltean 
> 
> It must first be admitted that switchdev device drivers have a life
> beyond the bridge, and when they aren't offloading the bridge driver
> they are operating with forwarding disabled between ports, emulating as
> closely as possible N standalone network interfaces.
> 
> Now it must be said that for a switchdev port operating in standalone
> mode, address learning doesn't make much sense since that is a bridge
> function. In fact, address learning even breaks setups such as this one:
> 
>+-+
>| |
>| +---+   |
>| |br0|send  receive  |
>| ++-++ ++ ++ |
>| || || || || |
>| |  swp0  | |  swp1  | |  swp2  | |  swp3  | |
>| || || || || |
>+-++-++-++-++-+
>   | ^   |  ^
>   | |   |  |
>   | +---+  |
>   ||
>   ++
> 
> because if the ASIC has a single FDB (can offload a single bridge)
> then source address learning on swp3 can "steal" the source MAC address
> of swp2 from br0's FDB, because learning frames coming from swp2 will be
> done twice: first on the swp1 ingress port, second on the swp3 ingress
> port. So the hardware FDB will become out of sync with the software
> bridge, and when swp2 tries to send one more packet towards swp1, the
> ASIC will attempt to short-circuit the forwarding path and send it
> directly to swp3 (since that's the last port it learned that address on),
> which it obviously can't, because swp3 operates in standalone mode.
> 
> So switchdev drivers operating in standalone mode should disable address
> learning. As a matter of practicality, we can reduce code duplication in
> drivers by having the bridge notify through switchdev of the initial and
> final brport flags. Then, drivers can simply start up hardcoded for no
> address learning (similar to how they already start up hardcoded for no
> forwarding), then they only need to listen for
> SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS and their job is basically done, no
> need for special cases when the port joins or leaves the bridge etc.
> 
> When a port leaves the bridge (and therefore becomes standalone), we
> issue a switchdev attribute that apart from disabling address learning,
> enables flooding of all kinds. This is also done for pragmatic reasons,
> because even though standalone switchdev ports might not need to have
> flooding enabled in order to inject traffic with any MAC DA from the
> control interface, it certainly doesn't hurt either, and it even makes
> more sense than disabling flooding of unknown traffic towards that port.
> 
> Note that the implementation is a bit wacky because the switchdev API
> for port attributes is very counterproductive. Instead of issuing a
> single switchdev notification with a bitwise OR of all flags that we're
> modifying, we need to issue 4 individual notifications, one for each bit.
> This is because the SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS notifier
> forces you to refuse the entire operation if there's at least one bit
> which you can't offload, and that is currently BR_BCAST_FLOOD which
> nobody does. So this change would do nothing for no one if we offloaded
> all flags at once, but the idea is to offload as much as possible
> instead of all or nothing.
> 
> Signed-off-by: Vladimir Oltean 
> ---
>  net/bridge/br_if.c  | 24 +++-
>  net/bridge/br_netlink.c | 16 
>  net/bridge/br_private.h |  2 ++
>  3 files changed, 29 insertions(+), 13 deletions(-)
> 

Hi Vladimir,
I think this patch potentially breaks some use cases. There are a few problems, 
I'll
start with the more serious one: before the ports would have a set of flags 
that were
always set when joining, now due to how nbp_flags_change() handles flag setting 
some might
not be set which would immediately change behaviour w.r.t software fwding. I'll 
use your
example of BR_BCAST_FLOOD: a lot of drivers will return an error for it and any 
broadcast
towards these ports will be dropped, we have mixed environments with software 
ports that
sometimes have traffic (e.g. decapped ARP requests) software forwarded which 
will stop working.
The other lesser issue is with the style below, I mean these three calls for 
each flag are
just ugly and look weird as you've also noted, since these APIs are internal 
can we do better?

Cheers,
 Nik

> diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
> index f7d2f472ae24..890654f0 100644
> --- a/net/bridge/br_if.c
> +++ b/net/bridge/br_if.c
> @@ -89,6 +89,21 @@ void br_port_carrier_check(struct 

Re: [PATCH net] net: bridge: use switchdev for port flags set through sysfs too

2021-02-08 Thread Nikolay Aleksandrov
On 07/02/2021 21:47, Vladimir Oltean wrote:
> From: Vladimir Oltean 
> 
> Looking through patchwork I don't see that there was any consensus to
> use switchdev notifiers only in case of netlink provided port flags but
> not sysfs (as a sort of deprecation, punishment or anything like that),
> so we should probably keep the user interface consistent in terms of
> functionality.
> 
> http://patchwork.ozlabs.org/project/netdev/patch/20170605092043.3523-3-j...@resnulli.us/
> http://patchwork.ozlabs.org/project/netdev/patch/20170608064428.4785-3-j...@resnulli.us/
> 
> Fixes: 3922285d96e7 ("net: bridge: Add support for offloading port 
> attributes")
> Signed-off-by: Vladimir Oltean 
> ---
>  net/bridge/br_sysfs_if.c | 9 ++---
>  1 file changed, 6 insertions(+), 3 deletions(-)
> 
> diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
> index 96ff63cde1be..5aea9427ffe1 100644
> --- a/net/bridge/br_sysfs_if.c
> +++ b/net/bridge/br_sysfs_if.c
> @@ -59,9 +59,8 @@ static BRPORT_ATTR(_name, 0644, 
> \
>  static int store_flag(struct net_bridge_port *p, unsigned long v,
> unsigned long mask)
>  {
> - unsigned long flags;
> -
> - flags = p->flags;
> + unsigned long flags = p->flags;
> + int err;
>  
>   if (v)
>   flags |= mask;
> @@ -69,6 +68,10 @@ static int store_flag(struct net_bridge_port *p, unsigned 
> long v,
>   flags &= ~mask;
>  
>   if (flags != p->flags) {
> + err = br_switchdev_set_port_flag(p, flags, mask);
> + if (err)
> + return err;
> +
>   p->flags = flags;
>   br_port_flags_change(p, mask);
>   }
> 

Acked-by: Nikolay Aleksandrov 


Re: [PATCH 106/141] net: bridge: Fix fall-through warnings for Clang

2021-02-02 Thread Nikolay Aleksandrov
On 20/11/2020 20:37, Gustavo A. R. Silva wrote:
> In preparation to enable -Wimplicit-fallthrough for Clang, fix a warning
> by explicitly adding a break statement instead of letting the code fall
> through to the next case.
> 
> Link: https://github.com/KSPP/linux/issues/115
> Signed-off-by: Gustavo A. R. Silva 
> ---
>  net/bridge/br_input.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
> index 59a318b9f646..8db219d979c5 100644
> --- a/net/bridge/br_input.c
> +++ b/net/bridge/br_input.c
> @@ -148,6 +148,7 @@ int br_handle_frame_finish(struct net *net, struct sock 
> *sk, struct sk_buff *skb
>   break;
>   case BR_PKT_UNICAST:
>   dst = br_fdb_find_rcu(br, eth_hdr(skb)->h_dest, vid);
> + break;
>   default:
>   break;
>   }
> 

Somehow this hasn't hit my inbox, good thing I just got the reply and saw the
patch. Anyway, thanks!

Acked-by: Nikolay Aleksandrov 



Re: [PATCH] bridge: Use PTR_ERR_OR_ZERO instead if(IS_ERR(...)) + PTR_ERR

2021-01-25 Thread Nikolay Aleksandrov
On 25/01/2021 04:39, Jiapeng Zhong wrote:
> coccicheck suggested using PTR_ERR_OR_ZERO() and looking at the code.
> 
> Fix the following coccicheck warnings:
> 
> ./net/bridge/br_multicast.c:1295:7-13: WARNING: PTR_ERR_OR_ZERO can be
> used.
> 
> Reported-by: Abaci 
> Signed-off-by: Jiapeng Zhong 
> ---
>  net/bridge/br_multicast.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
> index 257ac4e..2229d10 100644
> --- a/net/bridge/br_multicast.c
> +++ b/net/bridge/br_multicast.c
> @@ -1292,7 +1292,7 @@ static int br_multicast_add_group(struct net_bridge *br,
>   pg = __br_multicast_add_group(br, port, group, src, filter_mode,
> igmpv2_mldv1, false);
>   /* NULL is considered valid for host joined groups */
> - err = IS_ERR(pg) ? PTR_ERR(pg) : 0;
> + err = PTR_ERR_OR_ZERO(pg);
>   spin_unlock(>multicast_lock);
>  
>   return err;
> 

This should be targeted at net-next.
Acked-by: Nikolay Aleksandrov 





Re: [PATCH v1] ipv4: add iPv4_is_multicast() check in ip_mc_leave_group().

2021-01-19 Thread Nikolay Aleksandrov
On 19/01/2021 06:39, Jakub Kicinski wrote:
> On Sun, 17 Jan 2021 05:34:16 -0800 wangyingji...@126.com wrote:
>> From: Yingjie Wang 
>>
>> There is no iPv4_is_multicast() check added to ip_mc_leave_group()
>> to check if imr->imr_multiaddr.s_addr is a multicast address.
>> If not a multicast address, it may result in an error.
> 
> Could you please say more? From looking at the code it seems like
> no address should match if group is non-mcast, and -EADDRNOTAVAIL 
> will be returned.
> 
> Adding Nik to CC.
> 

Thanks, and absolutely right. I don't see any point in changing the code, also
you are definitely not fixing any bug. 

>> In some cases, the callers of ip_mc_leave_group() don't check
>> whether it is multicast address or not before calling
>> such as do_ip_setsockopt(). So I suggest adding the ipv4_is_multicast()
>> check to the ip_mc_leave_group() to prevent this from happening.
>>
>> Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
>> Signed-off-by: Yingjie Wang 
>> ---
>>  net/ipv4/igmp.c | 3 +++
>>  1 file changed, 3 insertions(+)
>>
>> diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
>> index 7b272bbed2b4..1b6f91271cfd 100644
>> --- a/net/ipv4/igmp.c
>> +++ b/net/ipv4/igmp.c
>> @@ -2248,6 +2248,9 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn 
>> *imr)
>>  u32 ifindex;
>>  int ret = -EADDRNOTAVAIL;
>>  
>> +if (!ipv4_is_multicast(group))
>> +return -EINVAL;
>> +
>>  ASSERT_RTNL();
>>  
>>  in_dev = ip_mc_find_dev(net, imr);
> 



Re: [PATCH v4 net-next] net: bridge: check vlan with eth_type_vlan() method

2021-01-18 Thread Nikolay Aleksandrov
On 17/01/2021 10:09, menglong8.d...@gmail.com wrote:
> From: Menglong Dong 
> 
> Replace some checks for ETH_P_8021Q and ETH_P_8021AD with
> eth_type_vlan().
> 
> Signed-off-by: Menglong Dong 
> ---
> v4:
> - remove unnecessary brackets.
> 
> v3:
> - fix compile warning in br_vlan_set_proto() by casting 'val' to
>   be16.
> 
> v2:
> - use eth_type_vlan() in br_validate() and __br_vlan_set_proto()
>   too.
> ---
>  net/bridge/br_forward.c |  3 +--
>  net/bridge/br_netlink.c | 12 +++-
>  net/bridge/br_vlan.c|  2 +-
>  3 files changed, 5 insertions(+), 12 deletions(-)
> 

Acked-by: Nikolay Aleksandrov 



Re: [RFC PATCH v2] net: bridge: igmp: Extend IGMP query to be per vlan

2021-01-18 Thread Nikolay Aleksandrov
On 16/01/2021 17:39, Joachim Wiberg wrote:
> On Wed, Jan 13, 2021 at 14:15, Nikolay Aleksandrov  wrote:
>> On 12/01/2021 15:59, Horatiu Vultur wrote:
>>> Based on the comments of the previous version, we started to work on a
>>> new version, so it would be possible to enable/disable queries per vlan.
>>> [snip]
>>> We were wondering if this what you had in mind when you proposed to have
>>> this per vlan? Or we are completely off? Or we should fix some of the
>>> issues that I mentioned, before you can see more clearly the direction?
>> No, unfortunately not even close. We already have per-port per-vlan and 
>> global per-vlan
>> contexts which are also linked together for each vlan, those must be used 
>> for any vlan
>> configuration and state. The problem is that you'd have to mix igmp and vlan 
>> code and
>> those two live under two different kconfig options, and worse rely on 
>> different locks, so
>> extra care must be taken.
>> [snip]
>> If you don't need this asap, I'll probably get to it in two months
>> after EHT and the new bridge flush api, even we are still carrying an 
>> out-of-tree patch
>> for this which someone (not from cumulus) tried to upstream a few years 
>> back, but it also has
>> wrong design in general. :)
> 
> Hi,
> 
> very interesting thread this!  I believe I may be the one who posted the
> patch[1] a few years ago, and I fully agree with Nik here.  We developed
> the basic concepts further at Westermo, but it's been really difficult
> to get it stable.
> 
> We have discussed at length at work if an IGMP snooping implementation
> really belongs in the bridge, or if it's better suited as a user space
> daemon?  Similar to what was decided for RSTP/MSTP support, i.e., the
> bridge only has STP and RSTP/MSTP is handled by mstpd[2].
> 
> Most of what's required for a user space implementation is available,
> but it would've been nice if a single AF_PACKET socket on br0 could be
> used to catch what brport (ifindex) a query or report comes in on.  As
> it is now that information is lost/replaced with the ifindex of br0.
> And then there's the issue of detecting and forwarding to a multicast
> routing daemon on top of br0.  That br0 is not a brport in the MDB, or
> that host_joined cannot be set/seen with iproute2 is quite limiting.
> These issues can of course be addressed, but are they of interest to
> the community at large?
> 
> 
> Best regards
>  /Joachim
> 
> [1]: https://lore.kernel.org/netdev/20180418120713.GA10742@troglobit/
> [2]: https://github.com/mstpd/mstpd
> 

Hi Joachim,
I actually had started implementing IGMPv3/MLDv2 as a user-space daemon part of
FRRouting (since it already has a lot of the required infra to talk to the 
kernel).
It also has IGMPv3/MLDv2 support within pimd, so a lot of code can be shared.
Obviously there are pros and cons to each choice, but I'd be interested to see a
full user-space implementation. I decided to make the kernel support more 
complete
since it already did IGMPv2 and so stopped with the new FRR daemon. If needed 
I'd be
happy to help with the kernel support for a new user-space daemon, and also can
contribute to the daemon itself if time permits.

Thanks,
 Nik



Re: [PATCH net-next] net: bridge: use eth_type_vlan in br_dev_queue_push_xmit

2021-01-14 Thread Nikolay Aleksandrov
On 14/01/2021 09:51, menglong8.d...@gmail.com wrote:
> From: Menglong Dong 
> 
> Replace the check for ETH_P_8021Q and ETH_P_8021AD in
> br_dev_queue_push_xmit with eth_type_vlan.
> 
> Signed-off-by: Menglong Dong 
> ---
>  net/bridge/br_forward.c | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
> 
> diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
> index e28ffadd1371..6e9b049ae521 100644
> --- a/net/bridge/br_forward.c
> +++ b/net/bridge/br_forward.c
> @@ -39,8 +39,7 @@ int br_dev_queue_push_xmit(struct net *net, struct sock 
> *sk, struct sk_buff *skb
>   br_drop_fake_rtable(skb);
>  
>   if (skb->ip_summed == CHECKSUM_PARTIAL &&
> - (skb->protocol == htons(ETH_P_8021Q) ||
> -  skb->protocol == htons(ETH_P_8021AD))) {
> + eth_type_vlan(skb->protocol)) {
>   int depth;
>  
>   if (!__vlan_get_protocol(skb, skb->protocol, ))
> 


Please change all similar checks, there are also:
br_netlink.c - br_validate()
br_vlan.c - br_vlan_set_proto()

Thanks.


Re: [PATCH] net/bridge: Fix inconsistent format argument types

2021-01-13 Thread Nikolay Aleksandrov
On 13/01/2021 11:44, Jiapeng Zhong wrote:
> Fix the following warnings:
> 
> net/bridge/br_sysfs_if.c(162): warning: %ld in format string (no. 1)
> requires 'long' but the argument type is 'unsigned long'.
> net/bridge/br_sysfs_if.c(155): warning: %ld in format string (no. 1)
> requires 'long' but the argument type is 'unsigned long'.
> net/bridge/br_sysfs_if.c(148): warning: %ld in format string (no. 1)
> requires 'long' but the argument type is 'unsigned long'.
> 
> Signed-off-by: Jiapeng Zhong 
> Reported-by: Abaci Robot
> ---
>  net/bridge/br_sysfs_if.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
> 

As I replied to your other patch with the same subject please squash them
together and send them targeted at net-next.

Thanks.

> diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
> index 7a59cdd..16a7d41 100644
> --- a/net/bridge/br_sysfs_if.c
> +++ b/net/bridge/br_sysfs_if.c
> @@ -145,21 +145,21 @@ static ssize_t show_port_state(struct net_bridge_port 
> *p, char *buf)
>  static ssize_t show_message_age_timer(struct net_bridge_port *p,
>   char *buf)
>  {
> - return sprintf(buf, "%ld\n", br_timer_value(>message_age_timer));
> + return sprintf(buf, "%lu\n", br_timer_value(>message_age_timer));
>  }
>  static BRPORT_ATTR(message_age_timer, 0444, show_message_age_timer, NULL);
>  
>  static ssize_t show_forward_delay_timer(struct net_bridge_port *p,
>   char *buf)
>  {
> - return sprintf(buf, "%ld\n", br_timer_value(>forward_delay_timer));
> + return sprintf(buf, "%lu\n", br_timer_value(>forward_delay_timer));
>  }
>  static BRPORT_ATTR(forward_delay_timer, 0444, show_forward_delay_timer, 
> NULL);
>  
>  static ssize_t show_hold_timer(struct net_bridge_port *p,
>   char *buf)
>  {
> - return sprintf(buf, "%ld\n", br_timer_value(>hold_timer));
> + return sprintf(buf, "%lu\n", br_timer_value(>hold_timer));
>  }
>  static BRPORT_ATTR(hold_timer, 0444, show_hold_timer, NULL);
>  
> 



Re: [PATCH] net/bridge: Fix inconsistent format argument types

2021-01-13 Thread Nikolay Aleksandrov
On 13/01/2021 11:36, Jiapeng Zhong wrote:
> Fix the following warnings:
> 
> net/bridge/br_sysfs_br.c(833): warning: %u in format string (no. 1)
> requires 'unsigned int' but the argument type is 'signed int'.
> net/bridge/br_sysfs_br.c(817): warning: %u in format string (no. 1)
> requires 'unsigned int' but the argument type is 'signed int'.
> net/bridge/br_sysfs_br.c(261): warning: %ld in format string (no. 1)
> requires 'long' but the argument type is 'unsigned long'.
> net/bridge/br_sysfs_br.c(253): warning: %ld in format string (no. 1)
> requires 'long' but the argument type is 'unsigned long'.
> net/bridge/br_sysfs_br.c(244): warning: %ld in format string (no. 1)
> requires 'long' but the argument type is 'unsigned long'.
> net/bridge/br_sysfs_br.c(236): warning: %ld in format string (no. 1)
> requires 'long' but the argument type is 'unsigned long'.
> 
> Signed-off-by: Jiapeng Zhong 
> Reported-by: Abaci Robot
> ---
>  net/bridge/br_sysfs_br.c | 12 ++--
>  1 file changed, 6 insertions(+), 6 deletions(-)
> 

Hi,
You have sent 2 patches with the same subject.. Please squash them into a single
patch and target it to net-next, these don't need to be backported.

Thanks,
 Nik

> diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
> index 7db06e3..7512921 100644
> --- a/net/bridge/br_sysfs_br.c
> +++ b/net/bridge/br_sysfs_br.c
> @@ -233,7 +233,7 @@ static ssize_t hello_timer_show(struct device *d,
>   struct device_attribute *attr, char *buf)
>  {
>   struct net_bridge *br = to_bridge(d);
> - return sprintf(buf, "%ld\n", br_timer_value(>hello_timer));
> + return sprintf(buf, "%lu\n", br_timer_value(>hello_timer));
>  }
>  static DEVICE_ATTR_RO(hello_timer);
>  
> @@ -241,7 +241,7 @@ static ssize_t tcn_timer_show(struct device *d, struct 
> device_attribute *attr,
> char *buf)
>  {
>   struct net_bridge *br = to_bridge(d);
> - return sprintf(buf, "%ld\n", br_timer_value(>tcn_timer));
> + return sprintf(buf, "%lu\n", br_timer_value(>tcn_timer));
>  }
>  static DEVICE_ATTR_RO(tcn_timer);
>  
> @@ -250,7 +250,7 @@ static ssize_t topology_change_timer_show(struct device 
> *d,
> char *buf)
>  {
>   struct net_bridge *br = to_bridge(d);
> - return sprintf(buf, "%ld\n", 
> br_timer_value(>topology_change_timer));
> + return sprintf(buf, "%lu\n", 
> br_timer_value(>topology_change_timer));
>  }
>  static DEVICE_ATTR_RO(topology_change_timer);
>  
> @@ -258,7 +258,7 @@ static ssize_t gc_timer_show(struct device *d, struct 
> device_attribute *attr,
>char *buf)
>  {
>   struct net_bridge *br = to_bridge(d);
> - return sprintf(buf, "%ld\n", br_timer_value(>gc_work.timer));
> + return sprintf(buf, "%lu\n", br_timer_value(>gc_work.timer));
>  }
>  static DEVICE_ATTR_RO(gc_timer);
>  
> @@ -814,7 +814,7 @@ static ssize_t vlan_stats_enabled_show(struct device *d,
>  char *buf)
>  {
>   struct net_bridge *br = to_bridge(d);
> - return sprintf(buf, "%u\n", br_opt_get(br, BROPT_VLAN_STATS_ENABLED));
> + return sprintf(buf, "%d\n", br_opt_get(br, BROPT_VLAN_STATS_ENABLED));
>  }
>  
>  static ssize_t vlan_stats_enabled_store(struct device *d,
> @@ -830,7 +830,7 @@ static ssize_t vlan_stats_per_port_show(struct device *d,
>   char *buf)
>  {
>   struct net_bridge *br = to_bridge(d);
> - return sprintf(buf, "%u\n", br_opt_get(br, BROPT_VLAN_STATS_PER_PORT));
> + return sprintf(buf, "%d\n", br_opt_get(br, BROPT_VLAN_STATS_PER_PORT));
>  }
>  
>  static ssize_t vlan_stats_per_port_store(struct device *d,
> 



Re: [RFC PATCH v2] net: bridge: igmp: Extend IGMP query to be per vlan

2021-01-13 Thread Nikolay Aleksandrov
On 12/01/2021 15:59, Horatiu Vultur wrote:
> Based on the comments of the previous version, we started to work on a
> new version, so it would be possible to enable/disable queries per vlan.
> This is still work in progress and there are plenty of things that are
> not implemented and tested:
> - ipv6 support
> - the fast path needs to be improved
> - currently it is possible only to enable/disable the queries per vlan,
>   all the other configurations are global
> - toggling vlan_filtering is not tested
> - remove duplicated information
> - etc...
> 
> But there are few things that are working like:
> - sending queries per vlan
> - stop sending queries if there is a better querier per vlan
> - when ports are added/removed from vlan
> - etc...
> 
> We were wondering if this what you had in mind when you proposed to have
> this per vlan? Or we are completely off? Or we should fix some of the
> issues that I mentioned, before you can see more clearly the direction?
> 
> Signed-off-by: Horatiu Vultur 
> ---
>  include/uapi/linux/if_link.h |   1 +
>  net/bridge/br_device.c   |   2 +-
>  net/bridge/br_input.c|   2 +-
>  net/bridge/br_multicast.c| 505 ++-
>  net/bridge/br_netlink.c  |   9 +-
>  net/bridge/br_private.h  |  90 ++-
>  net/bridge/br_sysfs_br.c |  31 ++-
>  net/bridge/br_vlan.c |   3 +
>  8 files changed, 560 insertions(+), 83 deletions(-)
> 

Hi Horatiu,
No, unfortunately not even close. We already have per-port per-vlan and global 
per-vlan
contexts which are also linked together for each vlan, those must be used for 
any vlan
configuration and state. The problem is that you'd have to mix igmp and vlan 
code and
those two live under two different kconfig options, and worse rely on different 
locks, so
extra care must be taken. Any vlan lookups must use the vlan hashes, (almost) 
_no_ linear
walks or new lists are needed (the exception is obviously port going down where 
a walk
over port's vlans is needed). In almost all contexts below a vlan lookup has 
already been
done by the input functions, the result of that lookup must be saved and 
re-used. The
vlan options API needs to be used for configuring vlans (per-vlan mcast 
options), unfortunately
I still haven't upstreamed the iproute2 part, so you might have to do that as 
well.
Obviously with all of the above the current default situation must not change 
unless the
user configures it so. If you don't need this asap, I'll probably get to it in 
two months
after EHT and the new bridge flush api, even we are still carrying an 
out-of-tree patch
for this which someone (not from cumulus) tried to upstream a few years back, 
but it also has
wrong design in general. :)

Thanks,
 Nik

> diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
> index 82708c6db432..11ec1d45c24e 100644
> --- a/include/uapi/linux/if_link.h
> +++ b/include/uapi/linux/if_link.h
> @@ -472,6 +472,7 @@ enum {
>   IFLA_BR_MCAST_MLD_VERSION,
>   IFLA_BR_VLAN_STATS_PER_PORT,
>   IFLA_BR_MULTI_BOOLOPT,
> + IFLA_BR_MCAST_QUERIER_VID,
>   __IFLA_BR_MAX,
>  };
>  
> diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
> index 3f2f06b4dd27..aca4e8074a8f 100644
> --- a/net/bridge/br_device.c
> +++ b/net/bridge/br_device.c
> @@ -89,7 +89,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct 
> net_device *dev)
>  
>   mdst = br_mdb_get(br, skb, vid);
>   if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) &&
> - br_multicast_querier_exists(br, eth_hdr(skb), mdst))
> + br_multicast_querier_exists(br, eth_hdr(skb), mdst, vid))
>   br_multicast_flood(mdst, skb, false, true);
>   else
>   br_flood(br, skb, BR_PKT_MULTICAST, false, true);
> diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
> index 85d9dae2..03e445af6c1f 100644
> --- a/net/bridge/br_input.c
> +++ b/net/bridge/br_input.c
> @@ -130,7 +130,7 @@ int br_handle_frame_finish(struct net *net, struct sock 
> *sk, struct sk_buff *skb
>   case BR_PKT_MULTICAST:
>   mdst = br_mdb_get(br, skb, vid);
>   if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) &&
> - br_multicast_querier_exists(br, eth_hdr(skb), mdst)) {
> + br_multicast_querier_exists(br, eth_hdr(skb), mdst, vid)) {
>   if ((mdst && mdst->host_joined) ||
>   br_multicast_is_router(br)) {
>   local_rcv = true;
> diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
> index 257ac4e25f6d..b4fac25101e4 100644
> --- a/net/bridge/br_multicast.c
> +++ b/net/bridge/br_multicast.c
> @@ -48,8 +48,11 @@ static const struct rhashtable_params 
> br_sg_port_rht_params = {
>   .automatic_shrinking = true,
>  };
>  
> +static void br_ip4_multicast_query_expired(struct timer_list *t);
> +static void 

Re: [PATCH v2 net-next 1/6] net: bridge: notify switchdev of disappearance of old FDB entry upon migration

2020-12-13 Thread Nikolay Aleksandrov
On 13/12/2020 15:55, Vladimir Oltean wrote:
> Hi Nik,
> 
> On Sun, Dec 13, 2020 at 03:22:16PM +0200, Nikolay Aleksandrov wrote:
>> Hi Vladimir,
>> Thank you for the good explanation, it really helps a lot to understand the 
>> issue.
>> Even though it's deceptively simple, that call adds another lock/unlock for 
>> everyone
>> when moving or learning (due to notifier lock)
> 
> This unlikely code path is just on movement, as far as I understand it.
> How often do we expect that to happen? Is there any practical use case
> where an FDB entry ping pongs between ports?
> 

It was my bad because I was looking at the wrong atomic notifier call function.
Switchdev uses the standard atomic notifier call chain with RCU only which is 
fine
and there are no locks involved.
I was looking at the _robust version with a spin_lock and that would've meant 
that
learning (because of notifications) would also block movements and vice versa.

Anyway as I said all of that is not an issue, the patch is good. I've replied 
to my comment
and acked it a few minutes ago.

>> , but I do like how simple the solution
>> becomes with this change, so I'm not strictly against it. I think I'll add a 
>> "refcnt"-like
>> check in the switchdev fn which would process the chain only when there are 
>> registered users
>> to avoid any locks when moving fdbs on pure software bridges (like it was 
>> before swdev).
> 
> That makes sense.
> 
>> I get that the alternative is to track these within DSA, I'm tempted to say 
>> that's not such
>> a bad alternative as this change would make moving fdbs slower in general.
> 
> I deliberately said "rule" instead of "static FDB entry" and "control
> interface" instead of "CPU port" because this is not only about DSA.
> I know of at least one other switchdev device which doesn't support
> source address learning for host-injected traffic. It isn't even so much
> of a quirk as it is the way that the hardware works. If you think of it
> as a "switch with queues", there would be little reason for a hardware
> designer to not just provide you the means to inject directly into the
> queues of the egress port, therefore bypassing the normal analyzer and
> forwarding logic.
> 
> Everything we do in DSA must be copied sooner or later in other similar
> drivers, to get the same functionality. So I would really like to keep
> this interface simple, and not inflict unnecessary complications if
> possible.
> 

Right, I like how the solution and this set look.

>> Have you thought about another way to find out, e.g. if more fdb
>> information is passed to the notifications ?
> 
> Like what, keep emitting just the ADD notification, but put some
> metadata in it letting listeners know that it was actually migrated from
> a different bridge port, in order to save one notification? That would
> mean that we would need to:
> 
>   case SWITCHDEV_FDB_ADD_TO_DEVICE:
>   fdb_info = ptr;
> 
>   if (dsa_slave_dev_check(dev)) {
>   if (!fdb_info->migrated_from_dev || 
> dsa_slave_dev_check(fdb_info->migrated_from_dev)) {
>   if (!fdb_info->added_by_user)
>   return NOTIFY_OK;
> 
>   dp = dsa_slave_to_port(dev);
> 
>   add = true;
>   } else if (fdb_info->migrated_from_dev && 
> !dsa_slave_dev_check(fdb_info->migrated_from_dev)) {
>   /* An address has migrated from a non-DSA port
>* to a DSA port. Check if that non-DSA port was
>* bridged with us, aka if we previously had 
> that
>* address installed towards the CPU.
>*/
>   struct net_device *br_dev;
>   struct dsa_slave_priv *p;
> 
>   br_dev = netdev_master_upper_dev_get_rcu(dev);
>   if (!br_dev)
>   return NOTIFY_DONE;
> 
>   if (!netif_is_bridge_master(br_dev))
>   return NOTIFY_DONE;
> 
>   p = dsa_slave_dev_lower_find(br_dev);
>   if (!p)
>   return NOTIFY_DONE;
> 
>   delete = true;
>   }
>   } else {
>   /* Snoop addresses learnt 

Re: [PATCH v2 net-next 1/6] net: bridge: notify switchdev of disappearance of old FDB entry upon migration

2020-12-13 Thread Nikolay Aleksandrov
On 13/12/2020 15:22, Nikolay Aleksandrov wrote:
> On 13/12/2020 04:40, Vladimir Oltean wrote:
>> Currently the bridge emits atomic switchdev notifications for
>> dynamically learnt FDB entries. Monitoring these notifications works
>> wonders for switchdev drivers that want to keep their hardware FDB in
>> sync with the bridge's FDB.
>>
>> For example station A wants to talk to station B in the diagram below,
>> and we are concerned with the behavior of the bridge on the DUT device:
>>
>>DUT
>>  +-+
>>  | br0 |
>>  | +--+ +--+ +--+ +--+ |
>>  | |  | |  | |  | |  | |
>>  | | swp0 | | swp1 | | swp2 | | eth0 | |
>>  +-+
>>   ||  |
>>   Station A|  |
>>|  |
>>  +--+--+--++--+--+--+
>>  |  |  |  ||  |  |  |
>>  |  | swp0 |  ||  | swp0 |  |
>>  Another |  +--+  ||  +--+  | Another
>>   switch | br0|| br0| switch
>>  |  +--+  ||  +--+  |
>>  |  |  |  ||  |  |  |
>>  |  | swp1 |  ||  | swp1 |  |
>>  +--+--+--++--+--+--+
>>   |
>>   Station B
>>
>> Interfaces swp0, swp1, swp2 are handled by a switchdev driver that has
>> the following property: frames injected from its control interface bypass
>> the internal address analyzer logic, and therefore, this hardware does
>> not learn from the source address of packets transmitted by the network
>> stack through it. So, since bridging between eth0 (where Station B is
>> attached) and swp0 (where Station A is attached) is done in software,
>> the switchdev hardware will never learn the source address of Station B.
>> So the traffic towards that destination will be treated as unknown, i.e.
>> flooded.
>>
>> This is where the bridge notifications come in handy. When br0 on the
>> DUT sees frames with Station B's MAC address on eth0, the switchdev
>> driver gets these notifications and can install a rule to send frames
>> towards Station B's address that are incoming from swp0, swp1, swp2,
>> only towards the control interface. This is all switchdev driver private
>> business, which the notification makes possible.
>>
>> All is fine until someone unplugs Station B's cable and moves it to the
>> other switch:
>>
>>DUT
>>  +-+
>>  | br0 |
>>  | +--+ +--+ +--+ +--+ |
>>  | |  | |  | |  | |  | |
>>  | | swp0 | | swp1 | | swp2 | | eth0 | |
>>  +-+
>>   ||  |
>>   Station A|  |
>>|  |
>>  +--+--+--++--+--+--+
>>  |  |  |  ||  |  |  |
>>  |  | swp0 |  ||  | swp0 |  |
>>  Another |  +--+  ||  +--+  | Another
>>   switch | br0|| br0| switch
>>  |  +--+  ||  +--+  |
>>  |  |  |  ||  |  |  |
>>  |  | swp1 |  ||  | swp1 |  |
>>  +--+--+--++--+--+--+
>>|
>>Station B
>>
>> Luckily for the use cases we care about, Station B is noisy enough that
>> the DUT hears it (on swp1 this time). swp1 receives the frames and
>> delivers them to the bridge, who enters the unlikely path in br_fdb_update
>> of updating an existing entry. It moves the entry in the software bridge
>> to swp1 and emits an addition notification towards that.
>>
>> As far as the switchdev driver is concerned, all that it needs to ensure
>> is that traffic between Station A and Station B is not forever broken.
>> If it does nothing, then the stale rule to send frames for Station B
>> towards the control interface remains in place. But Station B is no
>> longer reachable via the control interface, but via a port that can
>> offload the bridge port learning attribute. It's just that the port is
>> prevented from learning this address, since the rule overrides FDB
>> updates. So the rule needs to go. The question is via what mechanism.
>>
>> It sure would be possible for this switchdev driver to keep track of all
>> addresses which are sent 

Re: [PATCH v2 net-next 1/6] net: bridge: notify switchdev of disappearance of old FDB entry upon migration

2020-12-13 Thread Nikolay Aleksandrov
On 13/12/2020 04:40, Vladimir Oltean wrote:
> Currently the bridge emits atomic switchdev notifications for
> dynamically learnt FDB entries. Monitoring these notifications works
> wonders for switchdev drivers that want to keep their hardware FDB in
> sync with the bridge's FDB.
> 
> For example station A wants to talk to station B in the diagram below,
> and we are concerned with the behavior of the bridge on the DUT device:
> 
>DUT
>  +-+
>  | br0 |
>  | +--+ +--+ +--+ +--+ |
>  | |  | |  | |  | |  | |
>  | | swp0 | | swp1 | | swp2 | | eth0 | |
>  +-+
>   ||  |
>   Station A|  |
>|  |
>  +--+--+--++--+--+--+
>  |  |  |  ||  |  |  |
>  |  | swp0 |  ||  | swp0 |  |
>  Another |  +--+  ||  +--+  | Another
>   switch | br0|| br0| switch
>  |  +--+  ||  +--+  |
>  |  |  |  ||  |  |  |
>  |  | swp1 |  ||  | swp1 |  |
>  +--+--+--++--+--+--+
>   |
>   Station B
> 
> Interfaces swp0, swp1, swp2 are handled by a switchdev driver that has
> the following property: frames injected from its control interface bypass
> the internal address analyzer logic, and therefore, this hardware does
> not learn from the source address of packets transmitted by the network
> stack through it. So, since bridging between eth0 (where Station B is
> attached) and swp0 (where Station A is attached) is done in software,
> the switchdev hardware will never learn the source address of Station B.
> So the traffic towards that destination will be treated as unknown, i.e.
> flooded.
> 
> This is where the bridge notifications come in handy. When br0 on the
> DUT sees frames with Station B's MAC address on eth0, the switchdev
> driver gets these notifications and can install a rule to send frames
> towards Station B's address that are incoming from swp0, swp1, swp2,
> only towards the control interface. This is all switchdev driver private
> business, which the notification makes possible.
> 
> All is fine until someone unplugs Station B's cable and moves it to the
> other switch:
> 
>DUT
>  +-+
>  | br0 |
>  | +--+ +--+ +--+ +--+ |
>  | |  | |  | |  | |  | |
>  | | swp0 | | swp1 | | swp2 | | eth0 | |
>  +-+
>   ||  |
>   Station A|  |
>|  |
>  +--+--+--++--+--+--+
>  |  |  |  ||  |  |  |
>  |  | swp0 |  ||  | swp0 |  |
>  Another |  +--+  ||  +--+  | Another
>   switch | br0|| br0| switch
>  |  +--+  ||  +--+  |
>  |  |  |  ||  |  |  |
>  |  | swp1 |  ||  | swp1 |  |
>  +--+--+--++--+--+--+
>|
>Station B
> 
> Luckily for the use cases we care about, Station B is noisy enough that
> the DUT hears it (on swp1 this time). swp1 receives the frames and
> delivers them to the bridge, who enters the unlikely path in br_fdb_update
> of updating an existing entry. It moves the entry in the software bridge
> to swp1 and emits an addition notification towards that.
> 
> As far as the switchdev driver is concerned, all that it needs to ensure
> is that traffic between Station A and Station B is not forever broken.
> If it does nothing, then the stale rule to send frames for Station B
> towards the control interface remains in place. But Station B is no
> longer reachable via the control interface, but via a port that can
> offload the bridge port learning attribute. It's just that the port is
> prevented from learning this address, since the rule overrides FDB
> updates. So the rule needs to go. The question is via what mechanism.
> 
> It sure would be possible for this switchdev driver to keep track of all
> addresses which are sent to the control interface, and then also listen
> for bridge notifier events on its own ports, searching for the ones that
> have a MAC address which was previously sent to the control interface.
> But this is cumbersome and inefficient. Instead, with one small change,
> the bridge could notify of the address deletion from the old port, in a
> symmetrical manner with how it did for the insertion. Then the switchdev
> driver would not be required to monitor learn/forget events for its own
> ports. It could just delete the rule towards the control interface upon
> bridge entry migration. This would make hardware address learning be
> possible again. Then it would take 

Re: [PATCH v2] net: bridge: Fix a warning when del bridge sysfs

2020-12-11 Thread Nikolay Aleksandrov
On 11/12/2020 14:29, Wang Hai wrote:
> I got a warining report:
> 
> br_sysfs_addbr: can't create group bridge4/bridge
> [ cut here ]
> sysfs group 'bridge' not found for kobject 'bridge4'
> WARNING: CPU: 2 PID: 9004 at fs/sysfs/group.c:279 sysfs_remove_group 
> fs/sysfs/group.c:279 [inline]
> WARNING: CPU: 2 PID: 9004 at fs/sysfs/group.c:279 
> sysfs_remove_group+0x153/0x1b0 fs/sysfs/group.c:270
> Modules linked in: iptable_nat
> ...
> Call Trace:
>   br_dev_delete+0x112/0x190 net/bridge/br_if.c:384
>   br_dev_newlink net/bridge/br_netlink.c:1381 [inline]
>   br_dev_newlink+0xdb/0x100 net/bridge/br_netlink.c:1362
>   __rtnl_newlink+0xe11/0x13f0 net/core/rtnetlink.c:3441
>   rtnl_newlink+0x64/0xa0 net/core/rtnetlink.c:3500
>   rtnetlink_rcv_msg+0x385/0x980 net/core/rtnetlink.c:5562
>   netlink_rcv_skb+0x134/0x3d0 net/netlink/af_netlink.c:2494
>   netlink_unicast_kernel net/netlink/af_netlink.c:1304 [inline]
>   netlink_unicast+0x4a0/0x6a0 net/netlink/af_netlink.c:1330
>   netlink_sendmsg+0x793/0xc80 net/netlink/af_netlink.c:1919
>   sock_sendmsg_nosec net/socket.c:651 [inline]
>   sock_sendmsg+0x139/0x170 net/socket.c:671
>   sys_sendmsg+0x658/0x7d0 net/socket.c:2353
>   ___sys_sendmsg+0xf8/0x170 net/socket.c:2407
>   __sys_sendmsg+0xd3/0x190 net/socket.c:2440
>   do_syscall_64+0x33/0x40 arch/x86/entry/common.c:46
>   entry_SYSCALL_64_after_hwframe+0x44/0xa9
> 
> In br_device_event(), if the bridge sysfs fails to be added,
> br_device_event() should return error. This can prevent warining
> when removing bridge sysfs that do not exist.
> 
> Fixes: bb900b27a2f4 ("bridge: allow creating bridge devices with netlink")
> Reported-by: Hulk Robot 
> Signed-off-by: Wang Hai 
> ---
> v1->v2: Fix this by check br_sysfs_addbr() return value as Nik's suggestion
>  net/bridge/br.c | 5 -
>  1 file changed, 4 insertions(+), 1 deletion(-)
> 
> diff --git a/net/bridge/br.c b/net/bridge/br.c
> index 401eeb9142eb..1b169f8e7491 100644
> --- a/net/bridge/br.c
> +++ b/net/bridge/br.c
> @@ -43,7 +43,10 @@ static int br_device_event(struct notifier_block *unused, 
> unsigned long event, v
>  
>   if (event == NETDEV_REGISTER) {
>   /* register of bridge completed, add sysfs entries */
> - br_sysfs_addbr(dev);
> + err = br_sysfs_addbr(dev);
> + if (err)
> + return notifier_from_errno(err);
> +
>           return NOTIFY_DONE;
>   }
>   }
> 

Patch looks good, I also tested it with a notifier error injecting.
Tested-by: Nikolay Aleksandrov 
Acked-by: Nikolay Aleksandrov 


Re: [RFC net-next] net: bridge: igmp: Extend IGMP query with vlan support

2020-12-11 Thread Nikolay Aleksandrov
On 11/12/2020 11:26, Horatiu Vultur wrote:
> This patch tries to add vlan support to IGMP queries.
> It extends the function 'br_ip4_multicast_alloc_query' to add
> also a vlan tag if vlan is enabled. Therefore the bridge will send
> queries for each vlan the ports are in.
> 
> There are few other places that needs to be updated to be fully
> functional. But I am curious if this is the way to go forward or is
> there a different way of implementing this?
> 
> Signed-off-by: Horatiu Vultur 
> ---
>  net/bridge/br_multicast.c | 31 ++-
>  1 file changed, 26 insertions(+), 5 deletions(-)
> 

Hi Horatiu,
We've discussed this with other people on netdev before, the way forward is to
implement it as a per-vlan option and then have a per-vlan querier. Which would 
also
make the change much bigger and more complex. In general some of the multicast 
options
need to be replicated for vlans to get proper per-vlan multicast control and 
operation, but
that would require to change a lot of logic around the whole bridge (fast-path 
included,
where it'd be most sensitive). The good news is that these days we have 
per-vlan options
support and so only the actual per-vlan multicast implementation is left to be 
done.
I have this on my TODO list, unfortunately that list gets longer and longer,
so I'd be happy to review patches if someone decides to do it sooner. :)

Sorry, I couldn't find the previous discussion, it was a few years back.

Cheers,
 Nik

> diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
> index 484820c223a3..4c2db8a9efe0 100644
> --- a/net/bridge/br_multicast.c
> +++ b/net/bridge/br_multicast.c
> @@ -688,7 +688,8 @@ static struct sk_buff 
> *br_ip4_multicast_alloc_query(struct net_bridge *br,
>   __be32 ip_dst, __be32 group,
>   bool with_srcs, bool 
> over_lmqt,
>   u8 sflag, u8 *igmp_type,
> - bool *need_rexmit)
> + bool *need_rexmit,
> + __u16 vid)
>  {
>   struct net_bridge_port *p = pg ? pg->key.port : NULL;
>   struct net_bridge_group_src *ent;
> @@ -724,6 +725,9 @@ static struct sk_buff 
> *br_ip4_multicast_alloc_query(struct net_bridge *br,
>   }
>  
>   pkt_size = sizeof(*eth) + sizeof(*iph) + 4 + igmp_hdr_size;
> + if (br_vlan_enabled(br->dev) && vid != 0)
> + pkt_size += 4;
> +
>   if ((p && pkt_size > p->dev->mtu) ||
>   pkt_size > br->dev->mtu)
>   return NULL;
> @@ -732,6 +736,9 @@ static struct sk_buff 
> *br_ip4_multicast_alloc_query(struct net_bridge *br,
>   if (!skb)
>   goto out;
>  
> + if (br_vlan_enabled(br->dev) && vid != 0)
> + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid);
> +
>   skb->protocol = htons(ETH_P_IP);
>  
>   skb_reset_mac_header(skb);
> @@ -1008,7 +1015,8 @@ static struct sk_buff *br_multicast_alloc_query(struct 
> net_bridge *br,
>   ip4_dst, group->dst.ip4,
>   with_srcs, over_lmqt,
>   sflag, igmp_type,
> - need_rexmit);
> + need_rexmit,
> + group->vid);
>  #if IS_ENABLED(CONFIG_IPV6)
>   case htons(ETH_P_IPV6): {
>   struct in6_addr ip6_dst;
> @@ -1477,6 +1485,8 @@ static void br_multicast_send_query(struct net_bridge 
> *br,
>   struct bridge_mcast_own_query *own_query)
>  {
>   struct bridge_mcast_other_query *other_query = NULL;
> + struct net_bridge_vlan_group *vg;
> + struct net_bridge_vlan *v;
>   struct br_ip br_group;
>   unsigned long time;
>  
> @@ -1485,7 +1495,7 @@ static void br_multicast_send_query(struct net_bridge 
> *br,
>   !br_opt_get(br, BROPT_MULTICAST_QUERIER))
>   return;
>  
> - memset(_group.dst, 0, sizeof(br_group.dst));
> + memset(_group, 0, sizeof(br_group));
>  
>   if (port ? (own_query == >ip4_own_query) :
>  (own_query == >ip4_own_query)) {
> @@ -1501,8 +1511,19 @@ static void br_multicast_send_query(struct net_bridge 
> *br,
>   if (!other_query || timer_pending(_query->timer))
>   return;
>  
> - __br_multicast_send_query(br, port, NULL, NULL, _group, false, 0,
> -   NULL);
> + if (br_vlan_enabled(br->dev) && port) {
> + vg = nbp_vlan_group(port);
> +
> + list_for_each_entry(v, >vlan_list, vlist) {
> + br_group.vid = v->vid == vg->pvid ? 0 : v->vid;
> +
> + __br_multicast_send_query(br, port, 

Re: [PATCH v3] bridge: Fix a deadlock when enabling multicast snooping

2020-12-05 Thread Nikolay Aleksandrov
On 05/12/2020 01:56, Joseph Huang wrote:
> When enabling multicast snooping, bridge module deadlocks on multicast_lock
> if 1) IPv6 is enabled, and 2) there is an existing querier on the same L2
> network.
> 
> The deadlock was caused by the following sequence: While holding the lock,
> br_multicast_open calls br_multicast_join_snoopers, which eventually causes
> IP stack to (attempt to) send out a Listener Report (in igmp6_join_group).
> Since the destination Ethernet address is a multicast address, br_dev_xmit
> feeds the packet back to the bridge via br_multicast_rcv, which in turn
> calls br_multicast_add_group, which then deadlocks on multicast_lock.
> 
> The fix is to move the call br_multicast_join_snoopers outside of the
> critical section. This works since br_multicast_join_snoopers only deals
> with IP and does not modify any multicast data structures of the bridge,
> so there's no need to hold the lock.
> 
> Steps to reproduce:
> 1. sysctl net.ipv6.conf.all.force_mld_version=1
> 2. have another querier
> 3. ip link set dev bridge type bridge mcast_snooping 0 && \
>ip link set dev bridge type bridge mcast_snooping 1 < deadlock >
> 
> A typical call trace looks like the following:
> 
> [  936.251495]  _raw_spin_lock+0x5c/0x68
> [  936.255221]  br_multicast_add_group+0x40/0x170 [bridge]
> [  936.260491]  br_multicast_rcv+0x7ac/0xe30 [bridge]
> [  936.265322]  br_dev_xmit+0x140/0x368 [bridge]
> [  936.269689]  dev_hard_start_xmit+0x94/0x158
> [  936.273876]  __dev_queue_xmit+0x5ac/0x7f8
> [  936.277890]  dev_queue_xmit+0x10/0x18
> [  936.281563]  neigh_resolve_output+0xec/0x198
> [  936.285845]  ip6_finish_output2+0x240/0x710
> [  936.290039]  __ip6_finish_output+0x130/0x170
> [  936.294318]  ip6_output+0x6c/0x1c8
> [  936.297731]  NF_HOOK.constprop.0+0xd8/0xe8
> [  936.301834]  igmp6_send+0x358/0x558
> [  936.305326]  igmp6_join_group.part.0+0x30/0xf0
> [  936.309774]  igmp6_group_added+0xfc/0x110
> [  936.313787]  __ipv6_dev_mc_inc+0x1a4/0x290
> [  936.317885]  ipv6_dev_mc_inc+0x10/0x18
> [  936.321677]  br_multicast_open+0xbc/0x110 [bridge]
> [  936.326506]  br_multicast_toggle+0xec/0x140 [bridge]
> 
> Fixes: 4effd28c1245 ("bridge: join all-snoopers multicast address")
> Signed-off-by: Joseph Huang 
> ---
>  net/bridge/br_device.c|  6 ++
>  net/bridge/br_multicast.c | 34 +-
>  net/bridge/br_private.h   | 10 ++
>  3 files changed, 41 insertions(+), 9 deletions(-)
> 

LGTM, thanks!
Acked-by: Nikolay Aleksandrov 




Re: [PATCH v2] bridge: Fix a deadlock when enabling multicast snooping

2020-12-04 Thread Nikolay Aleksandrov
On 04/12/2020 23:39, Joseph Huang wrote:
> When enabling multicast snooping, bridge module deadlocks on multicast_lock
> if 1) IPv6 is enabled, and 2) there is an existing querier on the same L2
> network.
> 
> The deadlock was caused by the following sequence: While holding the lock,
> br_multicast_open calls br_multicast_join_snoopers, which eventually causes
> IP stack to (attempt to) send out a Listener Report (in igmp6_join_group).
> Since the destination Ethernet address is a multicast address, br_dev_xmit
> feeds the packet back to the bridge via br_multicast_rcv, which in turn
> calls br_multicast_add_group, which then deadlocks on multicast_lock.
> 
> The fix is to move the call br_multicast_join_snoopers outside of the
> critical section. This works since br_multicast_join_snoopers only deals
> with IP and does not modify any multicast data structures of the bridge,
> so there's no need to hold the lock.
> 
> Steps to reproduce:
> 1. sysctl net.ipv6.conf.all.force_mld_version=1
> 2. have another querier
> 3. ip link set dev bridge type bridge mcast_snooping 0 && \
>ip link set dev bridge type bridge mcast_snooping 1 < deadlock >
> 
> A typical call trace looks like the following:
> 
> [  936.251495]  _raw_spin_lock+0x5c/0x68
> [  936.255221]  br_multicast_add_group+0x40/0x170 [bridge]
> [  936.260491]  br_multicast_rcv+0x7ac/0xe30 [bridge]
> [  936.265322]  br_dev_xmit+0x140/0x368 [bridge]
> [  936.269689]  dev_hard_start_xmit+0x94/0x158
> [  936.273876]  __dev_queue_xmit+0x5ac/0x7f8
> [  936.277890]  dev_queue_xmit+0x10/0x18
> [  936.281563]  neigh_resolve_output+0xec/0x198
> [  936.285845]  ip6_finish_output2+0x240/0x710
> [  936.290039]  __ip6_finish_output+0x130/0x170
> [  936.294318]  ip6_output+0x6c/0x1c8
> [  936.297731]  NF_HOOK.constprop.0+0xd8/0xe8
> [  936.301834]  igmp6_send+0x358/0x558
> [  936.305326]  igmp6_join_group.part.0+0x30/0xf0
> [  936.309774]  igmp6_group_added+0xfc/0x110
> [  936.313787]  __ipv6_dev_mc_inc+0x1a4/0x290
> [  936.317885]  ipv6_dev_mc_inc+0x10/0x18
> [  936.321677]  br_multicast_open+0xbc/0x110 [bridge]
> [  936.326506]  br_multicast_toggle+0xec/0x140 [bridge]
> 
> Fixes: 4effd28c1245 ("bridge: join all-snoopers multicast address")
> Signed-off-by: Joseph Huang 
> ---

Hi,
Thank you for fixing it up, a few minor nits below. Overall the patch
looks good.


>  net/bridge/br_device.c|  6 ++
>  net/bridge/br_multicast.c | 33 -
>  net/bridge/br_private.h   | 10 ++
>  3 files changed, 40 insertions(+), 9 deletions(-)
> 
> diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
> index 7730c8f3cb53..d3ea9d0779fb 100644
> --- a/net/bridge/br_device.c
> +++ b/net/bridge/br_device.c
> @@ -177,6 +177,9 @@ static int br_dev_open(struct net_device *dev)
>   br_stp_enable_bridge(br);
>   br_multicast_open(br);
>  
> + if (br_opt_get(br, BROPT_MULTICAST_ENABLED))
> + br_multicast_join_snoopers(br);
> +
>   return 0;
>  }
>  
> @@ -197,6 +200,9 @@ static int br_dev_stop(struct net_device *dev)
>   br_stp_disable_bridge(br);
>   br_multicast_stop(br);
>  
> + if (br_opt_get(br, BROPT_MULTICAST_ENABLED))
> + br_multicast_leave_snoopers(br);
> +
>   netif_stop_queue(dev);
>  
>   return 0;
> diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
> index eae898c3cff7..426fe00db708 100644
> --- a/net/bridge/br_multicast.c
> +++ b/net/bridge/br_multicast.c
> @@ -3286,7 +3286,7 @@ static inline void 
> br_ip6_multicast_join_snoopers(struct net_bridge *br)
>  }
>  #endif
>  
> -static void br_multicast_join_snoopers(struct net_bridge *br)
> +void br_multicast_join_snoopers(struct net_bridge *br)
>  {
>   br_ip4_multicast_join_snoopers(br);
>   br_ip6_multicast_join_snoopers(br);
> @@ -3317,7 +3317,7 @@ static inline void 
> br_ip6_multicast_leave_snoopers(struct net_bridge *br)
>  }
>  #endif
>  
> -static void br_multicast_leave_snoopers(struct net_bridge *br)
> +void br_multicast_leave_snoopers(struct net_bridge *br)
>  {
>   br_ip4_multicast_leave_snoopers(br);
>   br_ip6_multicast_leave_snoopers(br);
> @@ -3336,9 +3336,6 @@ static void __br_multicast_open(struct net_bridge *br,
>  
>  void br_multicast_open(struct net_bridge *br)
>  {
> - if (br_opt_get(br, BROPT_MULTICAST_ENABLED))
> - br_multicast_join_snoopers(br);
> -
>   __br_multicast_open(br, >ip4_own_query);
>  #if IS_ENABLED(CONFIG_IPV6)
>   __br_multicast_open(br, >ip6_own_query);
> @@ -3354,9 +3351,6 @@ void br_multicast_stop(struct net_bridge *br)
>   del_timer_sync(>ip6_other_query.timer);
>   del_timer_sync(>ip6_own_query.timer);
>  #endif
> -
> - if (br_opt_get(br, BROPT_MULTICAST_ENABLED))
> - br_multicast_leave_snoopers(br);
>  }
>  
>  void br_multicast_dev_del(struct net_bridge *br)
> @@ -3487,6 +3481,8 @@ static void br_multicast_start_querier(struct 
> net_bridge *br,
>  int br_multicast_toggle(struct net_bridge *br, 

Re: [PATCH net] net: bridge: vlan: fix error return code in __vlan_add()

2020-12-04 Thread Nikolay Aleksandrov
On 04/12/2020 10:48, Zhang Changzhong wrote:
> Fix to return a negative error code from the error handling
> case instead of 0, as done elsewhere in this function.
> 
> Fixes: f8ed289fab84 ("bridge: vlan: use br_vlan_(get|put)_master to deal with 
> refcounts")
> Reported-by: Hulk Robot 
> Signed-off-by: Zhang Changzhong 
> ---
>  net/bridge/br_vlan.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
> index 3e493eb..08c7741 100644
> --- a/net/bridge/br_vlan.c
> +++ b/net/bridge/br_vlan.c
> @@ -266,8 +266,10 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 
> flags,
>   }
>  
>   masterv = br_vlan_get_master(br, v->vid, extack);
> - if (!masterv)
> + if (!masterv) {
> + err = -ENOMEM;
>   goto out_filt;
> + }
>   v->brvlan = masterv;
>   if (br_opt_get(br, BROPT_VLAN_STATS_PER_PORT)) {
>       v->stats = netdev_alloc_pcpu_stats(struct 
> br_vlan_stats);
> 

Acked-by: Nikolay Aleksandrov 



Re: [PATCH] bridge: Fix a deadlock when enabling multicast snooping

2020-12-03 Thread Nikolay Aleksandrov
On 04/12/2020 00:42, Huang, Joseph wrote:
>> From: Huang, Joseph
>> Sent: Thursday, December 3, 2020 4:53 PM
>> To: Nikolay Aleksandrov ; Jakub Kicinski
>> 
>> Cc: Roopa Prabhu ; David S. Miller
>> ; bri...@lists.linux-foundation.org;
>> net...@vger.kernel.org; linux-kernel@vger.kernel.org; Linus Lüssing
>> 
>> Subject: RE: [PATCH] bridge: Fix a deadlock when enabling multicast snooping
>>
>>> From: Nikolay Aleksandrov 
>>> Sent: Thursday, December 3, 2020 3:47 PM
>>> To: Jakub Kicinski ; Huang, Joseph
>>> 
>>> Cc: Roopa Prabhu ; David S. Miller
>>> ; bri...@lists.linux-foundation.org;
>>> net...@vger.kernel.org; linux-kernel@vger.kernel.org; Linus Lüssing
>>> 
>>> Subject: Re: [PATCH] bridge: Fix a deadlock when enabling multicast
>>> snooping
>>>
>>> On 03/12/2020 20:28, Jakub Kicinski wrote:
>>>> On Tue, 1 Dec 2020 16:40:47 -0500 Joseph Huang wrote:
>>>>> When enabling multicast snooping, bridge module deadlocks on
>>>>> multicast_lock if 1) IPv6 is enabled, and 2) there is an existing
>>>>> querier on the same L2 network.
>>>>>
>>>>> The deadlock was caused by the following sequence: While holding the
>>>>> lock, br_multicast_open calls br_multicast_join_snoopers, which
>>>>> eventually causes IP stack to (attempt to) send out a Listener Report (in
>>> igmp6_join_group).
>>>>> Since the destination Ethernet address is a multicast address,
>>>>> br_dev_xmit feeds the packet back to the bridge via br_multicast_rcv,
>>>>> which in turn calls br_multicast_add_group, which then deadlocks on
>>> multicast_lock.
>>>>>
>>>>> The fix is to move the call br_multicast_join_snoopers outside of the
>>>>> critical section. This works since br_multicast_join_snoopers only
>>>>> deals with IP and does not modify any multicast data structures of
>>>>> the bridge, so there's no need to hold the lock.
>>>>>
>>>>> Fixes: 4effd28c1245 ("bridge: join all-snoopers multicast address")
>>>>>
>>>>> Signed-off-by: Joseph Huang 
>>>>
>>>> Nik, Linus - how does this one look?
>>>>
>>>
>>> Hi,
>>> Thanks, somehow I missed this one too. Need to check my email config. :) I
>>> believe I see how it can happen, although it's not straight-forward to 
>>> follow.
>>> A selftest for this case would be great, and any traces (e.g. hung task)
>> would
>>> help a lot as well.
>>> Correct me if I'm wrong but the sequence is something like:
>>> br_multicast_join_snoopers -> ipv6_dev_mc_inc -> __ipv6_dev_mc_inc ->
>>> igmp6_group_added
>>> -> MLDv1 (mode) igmp6_join_group() -> Again MLDv1 mode
>>> -> igmp6_join_group() -> igmp6_join_group
>>> -> igmp6_send() on the bridge device -> br_dev_xmit and onto the bridge
>>> -> mcast processing code
>>> which uses the multicast_lock spinlock. Right?
>>
>> That is correct.
>>
>> Here's a stack trace from a typical run:
>>
>> echo -n 1 > /sys/devices/virtual/net/gmn0/bridge/multicast_snooping
>> [  936.146754] rcu: INFO: rcu_preempt self-detected stall on CPU
>> [  936.152534] rcu:   0-: (5594 ticks this GP)
>> idle=75a/1/0x4002 softirq=2787/2789 fqs=2625
>> [  936.162026](t=5253 jiffies g=4205 q=12)
>> [  936.166041] Task dump for CPU 0:
>> [  936.169272] sh  R  running task0  1315   1295 
>> 0x0002
>> [  936.176332] Call trace:
>> [  936.178797]  dump_backtrace+0x0/0x140
>> [  936.182469]  show_stack+0x14/0x20
>> [  936.185793]  sched_show_task+0x108/0x138
>> [  936.189727]  dump_cpu_task+0x40/0x50
>> [  936.193313]  rcu_dump_cpu_stacks+0x94/0xd0
>> [  936.197420]  rcu_sched_clock_irq+0x75c/0x9c0
>> [  936.201698]  update_process_times+0x2c/0x68
>> [  936.205893]  tick_sched_handle.isra.0+0x30/0x50
>> [  936.210432]  tick_sched_timer+0x48/0x98
>> [  936.214272]  __hrtimer_run_queues+0x110/0x1b0
>> [  936.218635]  hrtimer_interrupt+0xe4/0x240
>> [  936.222656]  arch_timer_handler_phys+0x30/0x40
>> [  936.227106]  handle_percpu_devid_irq+0x80/0x140
>> [  936.231654]  generic_handle_irq+0x24/0x38
>> [  936.235669]  __handle_domain_irq+0x60/0xb8
>> [  936.239774]  gic_handle_irq+0x5c/0x148
>> [  936.243535]  el1_irq+0xb8/0x180
>> [  936.246689]  queued_spin_lock_slowpath+0x118/0x

Re: [PATCH] bridge: Fix a deadlock when enabling multicast snooping

2020-12-03 Thread Nikolay Aleksandrov
On 03/12/2020 20:28, Jakub Kicinski wrote:
> On Tue, 1 Dec 2020 16:40:47 -0500 Joseph Huang wrote:
>> When enabling multicast snooping, bridge module deadlocks on multicast_lock
>> if 1) IPv6 is enabled, and 2) there is an existing querier on the same L2
>> network.
>>
>> The deadlock was caused by the following sequence: While holding the lock,
>> br_multicast_open calls br_multicast_join_snoopers, which eventually causes
>> IP stack to (attempt to) send out a Listener Report (in igmp6_join_group).
>> Since the destination Ethernet address is a multicast address, br_dev_xmit
>> feeds the packet back to the bridge via br_multicast_rcv, which in turn
>> calls br_multicast_add_group, which then deadlocks on multicast_lock.
>>
>> The fix is to move the call br_multicast_join_snoopers outside of the
>> critical section. This works since br_multicast_join_snoopers only deals
>> with IP and does not modify any multicast data structures of the bridge,
>> so there's no need to hold the lock.
>>
>> Fixes: 4effd28c1245 ("bridge: join all-snoopers multicast address")
>>
>> Signed-off-by: Joseph Huang 
> 
> Nik, Linus - how does this one look?
> 

Hi,
Thanks, somehow I missed this one too. Need to check my email config. :)
I believe I see how it can happen, although it's not straight-forward to follow.
A selftest for this case would be great, and any traces (e.g. hung task) would
help a lot as well.
Correct me if I'm wrong but the sequence is something like:
br_multicast_join_snoopers -> ipv6_dev_mc_inc -> __ipv6_dev_mc_inc -> 
igmp6_group_added
-> MLDv1 (mode) igmp6_join_group() -> Again MLDv1 mode igmp6_join_group() -> 
igmp6_join_group
-> igmp6_send() on the bridge device -> br_dev_xmit and onto the bridge mcast 
processing code
which uses the multicast_lock spinlock. Right?

One question - shouldn't leaving have the same problem? I.e. 
br_multicast_toggle -> br_multicast_leave_snoopers
-> br_ip6_multicast_leave_snoopers -> ipv6_dev_mc_dec -> igmp6_group_dropped -> 
igmp6_leave_group ->
MLDv1 mode && last reporter -> igmp6_send() ?

I think it was saved by the fact that !br_opt_get(br, BROPT_MULTICAST_ENABLED) 
would be true and the
multicast lock won't be acquired in the br_dev_xmit path? If so, I'd appreciate 
a comment about that
because it's not really trivial to find out. :)

Anyhow, the patch is fine as-is too:
Acked-by: Nikolay Aleksandrov 

Thanks,
 Nik



Re: [PATCH net] net: bridge: Fix a warning when del bridge sysfs

2020-12-03 Thread Nikolay Aleksandrov
On 03/12/2020 03:03, Jakub Kicinski wrote:
> On Tue, 1 Dec 2020 22:01:14 +0800 Wang Hai wrote:
>> If adding bridge sysfs fails, br->ifobj will be NULL, there is no
>> need to delete its non-existent sysfs when deleting the bridge device,
>> otherwise, it will cause a warning. So, when br->ifobj == NULL,
>> directly return can fix this bug.
>>
>> br_sysfs_addbr: can't create group bridge4/bridge
>> [ cut here ]
>> sysfs group 'bridge' not found for kobject 'bridge4'
>> WARNING: CPU: 2 PID: 9004 at fs/sysfs/group.c:279 sysfs_remove_group 
>> fs/sysfs/group.c:279 [inline]
>> WARNING: CPU: 2 PID: 9004 at fs/sysfs/group.c:279 
>> sysfs_remove_group+0x153/0x1b0 fs/sysfs/group.c:270
>> Modules linked in: iptable_nat
>> ...
>> Call Trace:
>>   br_dev_delete+0x112/0x190 net/bridge/br_if.c:384
>>   br_dev_newlink net/bridge/br_netlink.c:1381 [inline]
>>   br_dev_newlink+0xdb/0x100 net/bridge/br_netlink.c:1362
>>   __rtnl_newlink+0xe11/0x13f0 net/core/rtnetlink.c:3441
>>   rtnl_newlink+0x64/0xa0 net/core/rtnetlink.c:3500
>>   rtnetlink_rcv_msg+0x385/0x980 net/core/rtnetlink.c:5562
>>   netlink_rcv_skb+0x134/0x3d0 net/netlink/af_netlink.c:2494
>>   netlink_unicast_kernel net/netlink/af_netlink.c:1304 [inline]
>>   netlink_unicast+0x4a0/0x6a0 net/netlink/af_netlink.c:1330
>>   netlink_sendmsg+0x793/0xc80 net/netlink/af_netlink.c:1919
>>   sock_sendmsg_nosec net/socket.c:651 [inline]
>>   sock_sendmsg+0x139/0x170 net/socket.c:671
>>   sys_sendmsg+0x658/0x7d0 net/socket.c:2353
>>   ___sys_sendmsg+0xf8/0x170 net/socket.c:2407
>>   __sys_sendmsg+0xd3/0x190 net/socket.c:2440
>>   do_syscall_64+0x33/0x40 arch/x86/entry/common.c:46
>>   entry_SYSCALL_64_after_hwframe+0x44/0xa9
>>
>> Reported-by: Hulk Robot 
>> Signed-off-by: Wang Hai 
> 
> Nik, is this the way you want to handle this?
> 
> Should the notifier not fail if sysfs files cannot be created?
> Currently br_sysfs_addbr() returns an int but the only caller 
> ignores it.
> 

Hi,
The fix is wrong because this is not the only user of ifobj. The bridge
port sysfs code also uses it and br_sysfs_addif() will create the new
symlink in sysfs_root_kn due to NULL kobj passed which basically means
only one port will be enslaved, the others will fail in creating their
sysfs entries and thus fail to be added as ports.

I'd prefer to just fail from the notifier based on the return value.
The only catch would be to test it with br_vlan_bridge_event() which
is called on bridge master device events, it should be fine as
br_vlan_find() deals with NULL vlan groups but at least a comment
above it in br.c's notifier would be good so if anyone decides to add
any NETDEVICE_UNREGISTER handling would be warned about it.

Also please add proper fixes tag, the bug seems to be since:
bb900b27a2f4 ("bridge: allow creating bridge devices with netlink")

It actually changed the behaviour, before that the return value of 
br_sysfs_addbr()
was checked and the device got unregistered on failure.

Thanks,
 Nik




Re: [PATCH net-next] bridge: mrp: Implement LC mode for MRP

2020-11-23 Thread Nikolay Aleksandrov
On 23/11/2020 14:31, Horatiu Vultur wrote:
> The 11/23/2020 14:13, Nikolay Aleksandrov wrote:
>> EXTERNAL EMAIL: Do not click links or open attachments unless you know the 
>> content is safe
>>
>> On 23/11/2020 13:14, Horatiu Vultur wrote:
>>> Extend MRP to support LC mode(link check) for the interconnect port.
>>> This applies only to the interconnect ring.
>>>
>>> Opposite to RC mode(ring check) the LC mode is using CFM frames to
>>> detect when the link goes up or down and based on that the userspace
>>> will need to react.
>>> One advantage of the LC mode over RC mode is that there will be fewer
>>> frames in the normal rings. Because RC mode generates InTest on all
>>> ports while LC mode sends CFM frame only on the interconnect port.
>>>
>>> All 4 nodes part of the interconnect ring needs to have the same mode.
>>> And it is not possible to have running LC and RC mode at the same time
>>> on a node.
>>>
>>> Whenever the MIM starts it needs to detect the status of the other 3
>>> nodes in the interconnect ring so it would send a frame called
>>> InLinkStatus, on which the clients needs to reply with their link
>>> status.
>>>
>>> This patch adds the frame header for the frame InLinkStatus and
>>> extends existing rules on how to forward this frame.
>>>
>>> Signed-off-by: Horatiu Vultur 
>>> ---
>>>  include/uapi/linux/mrp_bridge.h |  7 +++
>>>  net/bridge/br_mrp.c | 18 +++---
>>>  2 files changed, 22 insertions(+), 3 deletions(-)
>>>
>>
>> Hi Horatiu,
>> The patch looks good overall, just one question below.
> 
> Hi Nik,
> 
> Thanks for taking time to review the patch.
> 
>>
>>> diff --git a/include/uapi/linux/mrp_bridge.h 
>>> b/include/uapi/linux/mrp_bridge.h
>>> index 6aeb13ef0b1e..450f6941a5a1 100644
>>> --- a/include/uapi/linux/mrp_bridge.h
>>> +++ b/include/uapi/linux/mrp_bridge.h
>>> @@ -61,6 +61,7 @@ enum br_mrp_tlv_header_type {
>>>   BR_MRP_TLV_HEADER_IN_TOPO = 0x7,
>>>   BR_MRP_TLV_HEADER_IN_LINK_DOWN = 0x8,
>>>   BR_MRP_TLV_HEADER_IN_LINK_UP = 0x9,
>>> + BR_MRP_TLV_HEADER_IN_LINK_STATUS = 0xa,
>>>   BR_MRP_TLV_HEADER_OPTION = 0x7f,
>>>  };
>>>
>>> @@ -156,4 +157,10 @@ struct br_mrp_in_link_hdr {
>>>   __be16 interval;
>>>  };
>>>
>>> +struct br_mrp_in_link_status_hdr {
>>> + __u8 sa[ETH_ALEN];
>>> + __be16 port_role;
>>> + __be16 id;
>>> +};
>>> +
>>
>> I didn't see this struct used anywhere, am I missing anything?
> 
> Yes, you are right, the struct is not used any. But I put it there as I
> put the other frame types for MRP.
> 

I see, we don't usually add unused code. The patch is fine as-is and since
this is already the case for other MRP parts I'm not strictly against it, so:

Acked-by: Nikolay Aleksandrov 

If Jakub decides to adhere to that rule you can keep my acked-by and just remove
the struct for v2.

Thanks,
 Nik



Re: [PATCH net-next] bridge: mrp: Implement LC mode for MRP

2020-11-23 Thread Nikolay Aleksandrov
On 23/11/2020 13:14, Horatiu Vultur wrote:
> Extend MRP to support LC mode(link check) for the interconnect port.
> This applies only to the interconnect ring.
> 
> Opposite to RC mode(ring check) the LC mode is using CFM frames to
> detect when the link goes up or down and based on that the userspace
> will need to react.
> One advantage of the LC mode over RC mode is that there will be fewer
> frames in the normal rings. Because RC mode generates InTest on all
> ports while LC mode sends CFM frame only on the interconnect port.
> 
> All 4 nodes part of the interconnect ring needs to have the same mode.
> And it is not possible to have running LC and RC mode at the same time
> on a node.
> 
> Whenever the MIM starts it needs to detect the status of the other 3
> nodes in the interconnect ring so it would send a frame called
> InLinkStatus, on which the clients needs to reply with their link
> status.
> 
> This patch adds the frame header for the frame InLinkStatus and
> extends existing rules on how to forward this frame.
> 
> Signed-off-by: Horatiu Vultur 
> ---
>  include/uapi/linux/mrp_bridge.h |  7 +++
>  net/bridge/br_mrp.c | 18 +++---
>  2 files changed, 22 insertions(+), 3 deletions(-)
> 

Hi Horatiu,
The patch looks good overall, just one question below.

> diff --git a/include/uapi/linux/mrp_bridge.h b/include/uapi/linux/mrp_bridge.h
> index 6aeb13ef0b1e..450f6941a5a1 100644
> --- a/include/uapi/linux/mrp_bridge.h
> +++ b/include/uapi/linux/mrp_bridge.h
> @@ -61,6 +61,7 @@ enum br_mrp_tlv_header_type {
>   BR_MRP_TLV_HEADER_IN_TOPO = 0x7,
>   BR_MRP_TLV_HEADER_IN_LINK_DOWN = 0x8,
>   BR_MRP_TLV_HEADER_IN_LINK_UP = 0x9,
> + BR_MRP_TLV_HEADER_IN_LINK_STATUS = 0xa,
>   BR_MRP_TLV_HEADER_OPTION = 0x7f,
>  };
>  
> @@ -156,4 +157,10 @@ struct br_mrp_in_link_hdr {
>   __be16 interval;
>  };
>  
> +struct br_mrp_in_link_status_hdr {
> + __u8 sa[ETH_ALEN];
> + __be16 port_role;
> + __be16 id;
> +};
> +

I didn't see this struct used anywhere, am I missing anything?

Cheers,
 Nik

>  #endif
> diff --git a/net/bridge/br_mrp.c b/net/bridge/br_mrp.c
> index bb12fbf9aaf2..cec2c4e4561d 100644
> --- a/net/bridge/br_mrp.c
> +++ b/net/bridge/br_mrp.c
> @@ -858,7 +858,8 @@ static bool br_mrp_in_frame(struct sk_buff *skb)
>   if (hdr->type == BR_MRP_TLV_HEADER_IN_TEST ||
>   hdr->type == BR_MRP_TLV_HEADER_IN_TOPO ||
>   hdr->type == BR_MRP_TLV_HEADER_IN_LINK_DOWN ||
> - hdr->type == BR_MRP_TLV_HEADER_IN_LINK_UP)
> + hdr->type == BR_MRP_TLV_HEADER_IN_LINK_UP ||
> + hdr->type == BR_MRP_TLV_HEADER_IN_LINK_STATUS)
>   return true;
>  
>   return false;
> @@ -1126,9 +1127,9 @@ static int br_mrp_rcv(struct net_bridge_port *p,
>   goto no_forward;
>   }
>   } else {
> - /* MIM should forward IntLinkChange and
> + /* MIM should forward IntLinkChange/Status and
>* IntTopoChange between ring ports but MIM
> -  * should not forward IntLinkChange and
> +  * should not forward IntLinkChange/Status and
>* IntTopoChange if the frame was received at
>* the interconnect port
>*/
> @@ -1155,6 +1156,17 @@ static int br_mrp_rcv(struct net_bridge_port *p,
>in_type == BR_MRP_TLV_HEADER_IN_LINK_DOWN))
>   goto forward;
>  
> + /* MIC should forward IntLinkStatus frames only to
> +  * interconnect port if it was received on a ring port.
> +  * If it is received on interconnect port then, it
> +  * should be forward on both ring ports
> +  */
> + if (br_mrp_is_ring_port(p_port, s_port, p) &&
> + in_type == BR_MRP_TLV_HEADER_IN_LINK_STATUS) {
> + p_dst = NULL;
> + s_dst = NULL;
> + }
> +
>   /* Should forward the InTopo frames only between the
>* ring ports
>*/
> 



Re: [PATCH] net: bridge: disable multicast while delete bridge

2020-11-03 Thread Nikolay Aleksandrov
On Mon, 2020-11-02 at 22:38 +0800, Menglong Dong wrote:
> From: Menglong Dong 
> 
> This commit seems make no sense, as bridge is destroyed when
> br_multicast_dev_del is called.
> 
> In commit b1b9d366028f
> ("bridge: move bridge multicast cleanup to ndo_uninit"), Xin Long
> fixed the use-after-free panic in br_multicast_group_expired by
> moving br_multicast_dev_del to ndo_uninit. However, that patch is
> not applied to 4.4.X, and the bug exists.
> 
> Fix that bug by disabling multicast in br_multicast_dev_del for
> 4.4.X, and there is no harm for other branches.
> 
> Signed-off-by: Menglong Dong 
> ---
>  net/bridge/br_multicast.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
> index eae898c3cff7..9992fdff2951 100644
> --- a/net/bridge/br_multicast.c
> +++ b/net/bridge/br_multicast.c
> @@ -3369,6 +3369,7 @@ void br_multicast_dev_del(struct net_bridge *br)
>   hlist_for_each_entry_safe(mp, tmp, >mdb_list, mdb_node)
>   br_multicast_del_mdb_entry(mp);
>   hlist_move_list(>mcast_gc_list, _head);
> + br_opt_toggle(br, BROPT_MULTICAST_ENABLED, false);
>   spin_unlock_bh(>multicast_lock);
>  
>   br_multicast_gc(_head);

This doesn't make any sense. It doesn't fix anything.
If 4.4 has a problem then the relevant patches should get backported to it.
We don't add random changes to fix older releases.

Cheers,
 Nik

Nacked-by: Nikolay Aleksandrov 


Re: [PATCH v3 net-next] net: bridge: mcast: add support for raw L2 multicast groups

2020-10-28 Thread Nikolay Aleksandrov
On Wed, 2020-10-28 at 12:54 +0200, Vladimir Oltean wrote:
> From: Nikolay Aleksandrov 
> 
> Extend the bridge multicast control and data path to configure routes
> for L2 (non-IP) multicast groups.
> 
> The uapi struct br_mdb_entry union u is extended with another variant,
> mac_addr, which does not change the structure size, and which is valid
> when the proto field is zero.
> 
> To be compatible with the forwarding code that is already in place,
> which acts as an IGMP/MLD snooping bridge with querier capabilities, we
> need to declare that for L2 MDB entries (for which there exists no such
> thing as IGMP/MLD snooping/querying), that there is always a querier.
> Otherwise, these entries would be flooded to all bridge ports and not
> just to those that are members of the L2 multicast group.
> 
> Needless to say, only permanent L2 multicast groups can be installed on
> a bridge port.
> 
> Signed-off-by: Nikolay Aleksandrov 
> Signed-off-by: Vladimir Oltean 
> ---
> Changes in v3:
> - Removed some noise in the diff.
> 
> Changes in v2:
> - Removed redundant MDB_FLAGS_L2 (we are simply signalling an L2 entry
>   through proto == 0)
> - Moved mac_addr inside union dst of struct br_ip.
> - Validation that L2 multicast address is indeed multicast
> 
>  include/linux/if_bridge.h  |  1 +
>  include/uapi/linux/if_bridge.h |  1 +
>  net/bridge/br_device.c |  2 +-
>  net/bridge/br_input.c  |  2 +-
>  net/bridge/br_mdb.c| 24 ++--
>  net/bridge/br_multicast.c  |  9 +++--
>  net/bridge/br_private.h| 10 --
>  7 files changed, 41 insertions(+), 8 deletions(-)
>
[snip]
> @@ -857,6 +872,11 @@ static int br_mdb_add_group(struct net_bridge *br, 
> struct net_bridge_port *port,
>   return err;
>   }
>  
> + if (entry->state != MDB_PERMANENT && br_group_is_l2(>addr)) {
> + NL_SET_ERR_MSG_MOD(extack, "Only permanent L2 entries allowed");
> + return -EINVAL;
> + }
> +

Sorry, but I didn't notice this earlier. We need to check for this error before
creating the mdb group otherwise we can end up with empty groups that can't be
deleted due to errors. I.e. it must be before the br_multicast_new_group() call.

The rest looks good to me, thanks!

>   /* host join */
>   if (!port) {
>   if (mp->host_joined) {
> diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
> index eae898c3cff7..98de0acb0307 100644
> --- a/net/bridge/br_multicast.c
> +++ b/net/bridge/br_multicast.c
> @@ -179,7 +179,8 @@ struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge 
> *br,



Re: [Bridge] [PATCH net-next v7 01/10] net: bridge: extend the process of special frames

2020-10-27 Thread Nikolay Aleksandrov
On Tue, 2020-10-27 at 07:59 -0700, Stephen Hemminger wrote:
> On Tue, 27 Oct 2020 10:02:42 +
> Henrik Bjoernlund via Bridge  wrote:
> 
> > +/* Return 0 if the frame was not processed otherwise 1
> > + * note: already called with rcu_read_lock
> > + */
> > +static int br_process_frame_type(struct net_bridge_port *p,
> > +struct sk_buff *skb)
> > +{
> > +   struct br_frame_type *tmp;
> > +
> > +   hlist_for_each_entry_rcu(tmp, >br->frame_type_list, list)
> > +   if (unlikely(tmp->type == skb->protocol))
> > +   return tmp->frame_handler(p, skb);
> > +
> > +   return 0;
> > +}
> 
> Does the linear search of frame types have noticable impact on performance?
> Hint: maybe a bitmap or something would be faster.

I don't think it's necessary to optimize it so early. There are only 2 possible
types so far (with this set included) if CfM and MRP both are in use, if at some
point it grows we can turn it into a hash or bitmap, at the moment a simple and
easier to maintain solution seems better to me. We could mask the search itself
behind a static key and do it only if a protocol is registered to minimize the
impact further.

Cheers,
 Nik




Re: [RFC PATCH] net: bridge: multicast: add support for L2 entries

2020-10-25 Thread Nikolay Aleksandrov
On Sun, 2020-10-25 at 06:59 +, Vladimir Oltean wrote:
> On Wed, Oct 21, 2020 at 09:17:07AM +0000, Nikolay Aleksandrov wrote:
> > > diff --git a/include/uapi/linux/if_bridge.h 
> > > b/include/uapi/linux/if_bridge.h
> > > index 4c687686aa8f..a25f6f9aa8c3 100644
> > > --- a/include/uapi/linux/if_bridge.h
> > > +++ b/include/uapi/linux/if_bridge.h
> > > @@ -520,12 +520,14 @@ struct br_mdb_entry {
> > >  #define MDB_FLAGS_FAST_LEAVE (1 << 1)
> > >  #define MDB_FLAGS_STAR_EXCL  (1 << 2)
> > >  #define MDB_FLAGS_BLOCKED(1 << 3)
> > > +#define MDB_FLAGS_L2 (1 << 5)
> > 
> > I think this should be 4.
> > 
> 
> Shouldn't this be in sync with MDB_PG_FLAGS_L2 though? We also have
> MDB_PG_FLAGS_BLOCKED which is BIT(4).

Unfortunately they haven't been in sync from the start. MDB_FLAGS bit
0 is offload, while MDB_PG_FLAGS bit 0 is permanent. As you can see
here blocked is bit 3, while internally it's 4 due to the same reason.
We can't afford to skip 1 bit since this is uAPI and we only got 8 
available bits. I wonder if we need these L2 bits at all, why not use
only proto == 0 to denote it's a L2 entry? I can't remember why I added
the bits back then, but until now proto == 0 wasn't allowed and the
kernel couldn't export it as such, so it seems possible to use it.






Re: [RFC PATCH] net: bridge: multicast: add support for L2 entries

2020-10-21 Thread Nikolay Aleksandrov
On Wed, 2020-10-21 at 09:17 +, Nikolay Aleksandrov wrote:
> On Sat, 2020-10-17 at 21:41 +0300, Vladimir Oltean wrote:
> > From: Nikolay Aleksandrov 
> > 
> > Extend the bridge multicast control and data path to configure routes
> > for L2 (non-IP) multicast groups.
> > 
> > The uapi struct br_mdb_entry union u is extended with another variant,
> > interpretation, mac_addr, which does not change the structure size, and
> > which is valid when the MDB_FLAGS_L2 flag is found set.
> > 
> > To be compatible with the forwarding code that is already in place,
> > which acts as an IGMP/MLD snooping bridge with querier capabilities, we
> > need to declare that for L2 MDB entries (for which there exists no such
> > thing as IGMP/MLD snooping/querying), that there is always a querier.
> > Otherwise, these entries would be flooded to all bridge ports and not
> > just to those that are members of the L2 multicast group.
> > 
> > Needless to say, only permanent L2 multicast groups can be installed on
> > a bridge port.
> > 
> > Signed-off-by: Nikolay Aleksandrov 
> > Signed-off-by: Vladimir Oltean 
> > ---
> > This patch is adapted from the version that Nikolay posted here:
> > https://lore.kernel.org/netdev/20200708090454.zvb6o7jr2woirw3i@skbuf/
> > 
> > There, he marked the patch as "unfinished". I haven't made any major
> > modifications to it, but I've tested it and it appears to work ok,
> > including with offloading. Hence, I would appreciate some tips regarding
> > things that might be missing.
> > 
> 
> Hi,
> I almost missed this one, thank you for fixing it up. I was wondering if we
> can move br_ip's mac_addr in the "dst" union to save some space and reduce
> ops when matching, since we're also matching on the protocol field. In general
> do we need the ->l2 field at all, can we use proto == 0 ? In order to make it
> more readable it can be in a helper with a descriptive name so we don't wonder
> what proto == 0 meant later. A few more minor comments below.
> 

Oh, one more thing, I don't think we validate that the dst mac that's being
added is actually a multicast one.

> >  include/linux/if_bridge.h  |  1 +
> >  include/uapi/linux/if_bridge.h |  2 ++
> >  net/bridge/br_device.c |  2 +-
> >  net/bridge/br_input.c  |  2 +-
> >  net/bridge/br_mdb.c| 24 
> >  net/bridge/br_multicast.c  | 12 ++--
> >  net/bridge/br_private.h|  7 +--
> >  7 files changed, 40 insertions(+), 10 deletions(-)
> > 
> > diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
> > index 556caed00258..b135ad714383 100644
> > --- a/include/linux/if_bridge.h
> > +++ b/include/linux/if_bridge.h
> > @@ -26,6 +26,7 @@ struct br_ip {
> > struct in6_addr ip6;
> >  #endif
> > } dst;
> > +   unsigned char   mac_addr[ETH_ALEN];
> > __be16  proto;
> > __u16   vid;
> >  };
> > diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
> > index 4c687686aa8f..a25f6f9aa8c3 100644
> > --- a/include/uapi/linux/if_bridge.h
> > +++ b/include/uapi/linux/if_bridge.h
> > @@ -520,12 +520,14 @@ struct br_mdb_entry {
> >  #define MDB_FLAGS_FAST_LEAVE   (1 << 1)
> >  #define MDB_FLAGS_STAR_EXCL(1 << 2)
> >  #define MDB_FLAGS_BLOCKED  (1 << 3)
> > +#define MDB_FLAGS_L2   (1 << 5)
> 
> I think this should be 4.
> 
> > __u8 flags;
> > __u16 vid;
> > struct {
> > union {
> > __be32  ip4;
> > struct in6_addr ip6;
> > +   unsigned char mac_addr[ETH_ALEN];
> > } u;
> > __be16  proto;
> > } addr;
> > diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
> > index 6f742fee874a..06c28753b911 100644
> > --- a/net/bridge/br_device.c
> > +++ b/net/bridge/br_device.c
> > @@ -93,7 +93,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct 
> > net_device *dev)
> >  
> > mdst = br_mdb_get(br, skb, vid);
> > if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) &&
> > -   br_multicast_querier_exists(br, eth_hdr(skb)))
> > +   br_multicast_querier_exists(br, eth_hdr(skb), mdst))
> > br_multicast_flood(mdst, skb, false, true);
> > else
> > br_flood(br, skb, BR_PKT_MULTICAST, false, true);
> > diff -

Re: [RFC PATCH] net: bridge: multicast: add support for L2 entries

2020-10-21 Thread Nikolay Aleksandrov
On Sat, 2020-10-17 at 21:41 +0300, Vladimir Oltean wrote:
> From: Nikolay Aleksandrov 
> 
> Extend the bridge multicast control and data path to configure routes
> for L2 (non-IP) multicast groups.
> 
> The uapi struct br_mdb_entry union u is extended with another variant,
> interpretation, mac_addr, which does not change the structure size, and
> which is valid when the MDB_FLAGS_L2 flag is found set.
> 
> To be compatible with the forwarding code that is already in place,
> which acts as an IGMP/MLD snooping bridge with querier capabilities, we
> need to declare that for L2 MDB entries (for which there exists no such
> thing as IGMP/MLD snooping/querying), that there is always a querier.
> Otherwise, these entries would be flooded to all bridge ports and not
> just to those that are members of the L2 multicast group.
> 
> Needless to say, only permanent L2 multicast groups can be installed on
> a bridge port.
> 
> Signed-off-by: Nikolay Aleksandrov 
> Signed-off-by: Vladimir Oltean 
> ---
> This patch is adapted from the version that Nikolay posted here:
> https://lore.kernel.org/netdev/20200708090454.zvb6o7jr2woirw3i@skbuf/
> 
> There, he marked the patch as "unfinished". I haven't made any major
> modifications to it, but I've tested it and it appears to work ok,
> including with offloading. Hence, I would appreciate some tips regarding
> things that might be missing.
> 

Hi,
I almost missed this one, thank you for fixing it up. I was wondering if we
can move br_ip's mac_addr in the "dst" union to save some space and reduce
ops when matching, since we're also matching on the protocol field. In general
do we need the ->l2 field at all, can we use proto == 0 ? In order to make it
more readable it can be in a helper with a descriptive name so we don't wonder
what proto == 0 meant later. A few more minor comments below.

>  include/linux/if_bridge.h  |  1 +
>  include/uapi/linux/if_bridge.h |  2 ++
>  net/bridge/br_device.c |  2 +-
>  net/bridge/br_input.c  |  2 +-
>  net/bridge/br_mdb.c| 24 
>  net/bridge/br_multicast.c  | 12 ++--
>  net/bridge/br_private.h|  7 +--
>  7 files changed, 40 insertions(+), 10 deletions(-)
> 
> diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
> index 556caed00258..b135ad714383 100644
> --- a/include/linux/if_bridge.h
> +++ b/include/linux/if_bridge.h
> @@ -26,6 +26,7 @@ struct br_ip {
>   struct in6_addr ip6;
>  #endif
>   } dst;
> + unsigned char   mac_addr[ETH_ALEN];
>   __be16  proto;
>   __u16   vid;
>  };
> diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
> index 4c687686aa8f..a25f6f9aa8c3 100644
> --- a/include/uapi/linux/if_bridge.h
> +++ b/include/uapi/linux/if_bridge.h
> @@ -520,12 +520,14 @@ struct br_mdb_entry {
>  #define MDB_FLAGS_FAST_LEAVE (1 << 1)
>  #define MDB_FLAGS_STAR_EXCL  (1 << 2)
>  #define MDB_FLAGS_BLOCKED(1 << 3)
> +#define MDB_FLAGS_L2 (1 << 5)

I think this should be 4.

>   __u8 flags;
>   __u16 vid;
>   struct {
>   union {
>   __be32  ip4;
>   struct in6_addr ip6;
> + unsigned char mac_addr[ETH_ALEN];
>   } u;
>   __be16  proto;
>   } addr;
> diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
> index 6f742fee874a..06c28753b911 100644
> --- a/net/bridge/br_device.c
> +++ b/net/bridge/br_device.c
> @@ -93,7 +93,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct 
> net_device *dev)
>  
>   mdst = br_mdb_get(br, skb, vid);
>   if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) &&
> - br_multicast_querier_exists(br, eth_hdr(skb)))
> + br_multicast_querier_exists(br, eth_hdr(skb), mdst))
>   br_multicast_flood(mdst, skb, false, true);
>   else
>   br_flood(br, skb, BR_PKT_MULTICAST, false, true);
> diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
> index 59a318b9f646..d31b5c18c6a1 100644
> --- a/net/bridge/br_input.c
> +++ b/net/bridge/br_input.c
> @@ -134,7 +134,7 @@ int br_handle_frame_finish(struct net *net, struct sock 
> *sk, struct sk_buff *skb
>   case BR_PKT_MULTICAST:
>   mdst = br_mdb_get(br, skb, vid);
>   if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) &&
> - br_multicast_querier_exists(br, eth_hdr(skb))) {
> + br_multicast_querier_exists(br, eth_hdr(skb), mdst)) {
>   if ((mds

Re: [RFC PATCH] net: bridge: call br_multicast_del_port before the port leaves

2020-10-16 Thread Nikolay Aleksandrov
On Thu, 2020-10-15 at 20:33 +0300, Vladimir Oltean wrote:
> Switchdev drivers often have different VLAN semantics than the bridge.
> For example, consider this:
> 
> ip link add br0 type bridge
> ip link set swp0 master br0
> bridge mdb add dev br0 port swp0 grp 01:02:03:04:05:06 permanent
> ip link del br0
> [   26.085816] mscc_felix :00:00.5 swp0: failed (err=-2) to del object 
> (id=2)
> 
> This is because the mscc_ocelot driver, when VLAN awareness is disabled,
> classifies all traffic to the port-based VLAN (pvid). The pvid is 0 when
> the port is standalone, and it is inherited from the bridge default pvid
> (which is 1 by default, but it may take other values) when it joins the
> VLAN-unaware bridge, and then the pvid resets to 0 when the port leaves
> the bridge again.
> 
> Now because the mscc_ocelot switch classifies all traffic to its private
> pvid, it needs to translate between the vid that the mdb comes with, and
> the vid that will actually be programmed into hardware. The bridge uses
> the vid of 0 in VLAN-unaware mode, while the hardware uses the pvid
> inherited from the bridge, that's the difference.
> 
> So what will happen is:
> 
> Step 1 (addition):
> br_mdb_notify(RTM_NEWMDB)
> -> ocelot_port_mdb_add(mdb->addr=01:02:03:04:05:06, mdb->vid=0)
>-> mdb->vid is remapped from 0 to 1 and installed into ocelot->multicast
> 
> Step 2 (removal):
> del_nbp
> -> netdev_upper_dev_unlink(dev, br->dev)
>-> ocelot_port_bridge_leave
>   -> ocelot_port_set_pvid(ocelot, port, 0)
> -> br_multicast_del_port is called and the switchdev notifier is
>deferred for some time later
>-> ocelot_port_mdb_del(mdb->addr=01:02:03:04:05:06, mdb->vid=0)
>   -> mdb->vid is remapped from 0 to 0, the port pvid (!!!)
>   -> the remapped mdb (addr=01:02:03:04:05:06, vid=0) is not found
>  inside the ocelot->multicast list, and -ENOENT is returned
> 
> So the problem is that mscc_ocelot assumes that the port is removed
> _after_ the multicast entries have been deleted. And this is not an
> unreasonable assumption, presumably it isn't the only switchdev that
> needs to remap the vid. So we can reorder the teardown path in order
> for that assumption to hold true.
> 
> Since br_mdb_notify() issues a SWITCHDEV_F_DEFER operation, we must move
> the call not only before netdev_upper_dev_unlink(), but in fact before
> switchdev_deferred_process().
> 
> Signed-off-by: Vladimir Oltean 
> ---
>  net/bridge/br_if.c | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
> 

It can potentially use after free, multicast resources (per-cpu stats) are freed
in br_multicast_del_port() and can be used due to a race with port state
sync on other CPUs since the handler can still process packets. That has a
chance of happening if vlans are not used.

Interesting that br_stp_disable_port() calls br_multicast_disable_port() which
flushes all non-permanent mdb entries, so I'm guessing you have problem only
with permanent ones? Perhaps we can flush them all before. Either by passing an
argument to br_stp_disable_port() that we're deleting the port which will be
passed down to br_multicast_disable_port() or by calling an additional helper to
flush all which can be re-used by both disable_port() and stop_multicast()
calls. Adding an argument to br_stp_disable_port() to be passed down sounds
cleaner to me. What do you think?

Cheers,
 Nik



Re: [PATCH net-next v5 09/10] bridge: cfm: Netlink GET status Interface.

2020-10-14 Thread Nikolay Aleksandrov
On Mon, 2020-10-12 at 14:04 +, Henrik Bjoernlund wrote:
> This is the implementation of CFM netlink status
> get information interface.
> 
> Add new nested netlink attributes. These attributes are used by the
> user space to get status information.
> 
> GETLINK:
> Request filter RTEXT_FILTER_CFM_STATUS:
> Indicating that CFM status information must be delivered.
> 
> IFLA_BRIDGE_CFM:
> Points to the CFM information.
> 
> IFLA_BRIDGE_CFM_MEP_STATUS_INFO:
> This indicate that the MEP instance status are following.
> IFLA_BRIDGE_CFM_CC_PEER_STATUS_INFO:
> This indicate that the peer MEP status are following.
> 
> CFM nested attribute has the following attributes in next level.
> 
> GETLINK RTEXT_FILTER_CFM_STATUS:
> IFLA_BRIDGE_CFM_MEP_STATUS_INSTANCE:
> The MEP instance number of the delivered status.
> The type is u32.
> IFLA_BRIDGE_CFM_MEP_STATUS_OPCODE_UNEXP_SEEN:
> The MEP instance received CFM PDU with unexpected Opcode.
> The type is u32 (bool).
> IFLA_BRIDGE_CFM_MEP_STATUS_VERSION_UNEXP_SEEN:
> The MEP instance received CFM PDU with unexpected version.
> The type is u32 (bool).
> IFLA_BRIDGE_CFM_MEP_STATUS_RX_LEVEL_LOW_SEEN:
> The MEP instance received CCM PDU with MD level lower than
> configured level. This frame is discarded.
> The type is u32 (bool).
> 
> IFLA_BRIDGE_CFM_CC_PEER_STATUS_INSTANCE:
> The MEP instance number of the delivered status.
> The type is u32.
> IFLA_BRIDGE_CFM_CC_PEER_STATUS_PEER_MEPID:
> The added Peer MEP ID of the delivered status.
> The type is u32.
> IFLA_BRIDGE_CFM_CC_PEER_STATUS_CCM_DEFECT:
> The CCM defect status.
> The type is u32 (bool).
> True means no CCM frame is received for 3.25 intervals.
> IFLA_BRIDGE_CFM_CC_CONFIG_EXP_INTERVAL.
> IFLA_BRIDGE_CFM_CC_PEER_STATUS_RDI:
> The last received CCM PDU RDI.
> The type is u32 (bool).
> IFLA_BRIDGE_CFM_CC_PEER_STATUS_PORT_TLV_VALUE:
> The last received CCM PDU Port Status TLV value field.
> The type is u8.
> IFLA_BRIDGE_CFM_CC_PEER_STATUS_IF_TLV_VALUE:
> The last received CCM PDU Interface Status TLV value field.
> The type is u8.
> IFLA_BRIDGE_CFM_CC_PEER_STATUS_SEEN:
> A CCM frame has been received from Peer MEP.
> The type is u32 (bool).
> This is cleared after GETLINK IFLA_BRIDGE_CFM_CC_PEER_STATUS_INFO.
> IFLA_BRIDGE_CFM_CC_PEER_STATUS_TLV_SEEN:
> A CCM frame with TLV has been received from Peer MEP.
> The type is u32 (bool).
> This is cleared after GETLINK IFLA_BRIDGE_CFM_CC_PEER_STATUS_INFO.
> IFLA_BRIDGE_CFM_CC_PEER_STATUS_SEQ_UNEXP_SEEN:
> A CCM frame with unexpected sequence number has been received
> from Peer MEP.
> The type is u32 (bool).
> When a sequence number is not one higher than previously received
> then it is unexpected.
> This is cleared after GETLINK IFLA_BRIDGE_CFM_CC_PEER_STATUS_INFO.
> 
> Signed-off-by: Henrik Bjoernlund  
> Reviewed-by: Horatiu Vultur  
> ---
>  include/uapi/linux/if_bridge.h |  29 +
>  include/uapi/linux/rtnetlink.h |   1 +
>  net/bridge/br_cfm_netlink.c| 105 +
>  net/bridge/br_netlink.c|  16 -
>  net/bridge/br_private.h|   6 ++
>  5 files changed, 154 insertions(+), 3 deletions(-)
> 
> 

Acked-by: Nikolay Aleksandrov 




Re: [PATCH net-next v5 08/10] bridge: cfm: Netlink GET configuration Interface.

2020-10-14 Thread Nikolay Aleksandrov
(increment) of sequence
> number is enabled or disabled.
> The type is u32 (bool).
> IFLA_BRIDGE_CFM_CC_CCM_TX_PERIOD:
> The period of time where CCM frame are transmitted.
> The type is u32.
> The time is given in seconds. SETLINK IFLA_BRIDGE_CFM_CC_CCM_TX
> must be done before timeout to keep transmission alive.
> When period is zero any ongoing CCM frame transmission
> will be stopped.
> IFLA_BRIDGE_CFM_CC_CCM_TX_IF_TLV:
> The transmitted CCM frame update with Interface Status TLV
> is enabled or disabled.
> The type is u32 (bool).
> IFLA_BRIDGE_CFM_CC_CCM_TX_IF_TLV_VALUE:
> The transmitted Interface Status TLV value field.
> The type is u8.
> IFLA_BRIDGE_CFM_CC_CCM_TX_PORT_TLV:
> The transmitted CCM frame update with Port Status TLV is enabled
> or disabled.
> The type is u32 (bool).
> IFLA_BRIDGE_CFM_CC_CCM_TX_PORT_TLV_VALUE:
> The transmitted Port Status TLV value field.
> The type is u8.
> 
> Signed-off-by: Henrik Bjoernlund  
> Reviewed-by: Horatiu Vultur  
> ---
>  include/uapi/linux/if_bridge.h |   6 ++
>  net/bridge/br_cfm_netlink.c| 161 +
>  net/bridge/br_netlink.c|  29 +-
>  net/bridge/br_private.h|   6 ++
>  4 files changed, 200 insertions(+), 2 deletions(-)
> 

Acked-by: Nikolay Aleksandrov 




Re: [PATCH net-next v5 01/10] net: bridge: extend the process of special frames

2020-10-14 Thread Nikolay Aleksandrov
On Mon, 2020-10-12 at 14:04 +, Henrik Bjoernlund wrote:
> This patch extends the processing of frames in the bridge. Currently MRP
> frames needs special processing and the current implementation doesn't
> allow a nice way to process different frame types. Therefore try to
> improve this by adding a list that contains frame types that need
> special processing. This list is iterated for each input frame and if
> there is a match based on frame type then these functions will be called
> and decide what to do with the frame. It can process the frame then the
> bridge doesn't need to do anything or don't process so then the bridge
> will do normal forwarding.
> 
> Signed-off-by: Henrik Bjoernlund  
> Reviewed-by: Horatiu Vultur  
> ---
>  net/bridge/br_device.c  |  1 +
>  net/bridge/br_input.c   | 33 -
>  net/bridge/br_mrp.c | 19 +++
>  net/bridge/br_private.h | 19 ---
>  4 files changed, 60 insertions(+), 12 deletions(-)
> 

Looks good.
Acked-by: Nikolay Aleksandrov 



Re: [PATCH net-next v4 01/10] net: bridge: extend the process of special frames

2020-10-12 Thread Nikolay Aleksandrov
On Fri, 2020-10-09 at 14:35 +, Henrik Bjoernlund wrote:
> This patch extends the processing of frames in the bridge. Currently MRP
> frames needs special processing and the current implementation doesn't
> allow a nice way to process different frame types. Therefore try to
> improve this by adding a list that contains frame types that need
> special processing. This list is iterated for each input frame and if
> there is a match based on frame type then these functions will be called
> and decide what to do with the frame. It can process the frame then the
> bridge doesn't need to do anything or don't process so then the bridge
> will do normal forwarding.
> 
> Signed-off-by: Henrik Bjoernlund  
> Reviewed-by: Horatiu Vultur  
> ---
>  net/bridge/br_device.c  |  1 +
>  net/bridge/br_input.c   | 33 -
>  net/bridge/br_mrp.c | 19 +++
>  net/bridge/br_private.h | 18 --
>  4 files changed, 60 insertions(+), 11 deletions(-)
> 
[snip]
> diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
> index 345118e35c42..3e62ce2ef8a5 100644
> --- a/net/bridge/br_private.h
> +++ b/net/bridge/br_private.h
> @@ -480,6 +480,8 @@ struct net_bridge {
>  #endif
>   struct hlist_head   fdb_list;
>  
> + struct hlist_head   frame_type_list;

Since there will be a v5, I'd suggest to move this struct member in the first
cache line as it will be always used in the bridge fast-path for all cases.
In order to make room for it there you can move port_list after fdb_hash_tbl and
add this in its place, port_list is currently used only when flooding and soon
I'll even change that.

Thanks,
 Nik

> +
>  #if IS_ENABLED(CONFIG_BRIDGE_MRP)
>   struct list_headmrp_list;
>  #endif
> @@ -755,6 +757,16 @@ int nbp_backup_change(struct net_bridge_port *p, struct 
> net_device *backup_dev);
>  int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff 
> *skb);
>  rx_handler_func_t *br_get_rx_handler(const struct net_device *dev);
>  
> +struct br_frame_type {
> + __be16  type;
> + int (*frame_handler)(struct net_bridge_port *port,
> +  struct sk_buff *skb);
> + struct hlist_node   list;
> +};
> +
> +void br_add_frame(struct net_bridge *br, struct br_frame_type *ft);
> +void br_del_frame(struct net_bridge *br, struct br_frame_type *ft);
> +
>  static inline bool br_rx_handler_check_rcu(const struct net_device *dev)
>  {
>   return rcu_dereference(dev->rx_handler) == br_get_rx_handler(dev);
> @@ -1417,7 +1429,6 @@ extern int (*br_fdb_test_addr_hook)(struct net_device 
> *dev, unsigned char *addr)
>  #if IS_ENABLED(CONFIG_BRIDGE_MRP)
>  int br_mrp_parse(struct net_bridge *br, struct net_bridge_port *p,
>struct nlattr *attr, int cmd, struct netlink_ext_ack *extack);
> -int br_mrp_process(struct net_bridge_port *p, struct sk_buff *skb);
>  bool br_mrp_enabled(struct net_bridge *br);
>  void br_mrp_port_del(struct net_bridge *br, struct net_bridge_port *p);
>  int br_mrp_fill_info(struct sk_buff *skb, struct net_bridge *br);
> @@ -1429,11 +1440,6 @@ static inline int br_mrp_parse(struct net_bridge *br, 
> struct net_bridge_port *p,
>   return -EOPNOTSUPP;
>  }
>  
> -static inline int br_mrp_process(struct net_bridge_port *p, struct sk_buff 
> *skb)
> -{
> - return 0;
> -}
> -
>  static inline bool br_mrp_enabled(struct net_bridge *br)
>  {
>   return false;



Re: [PATCH net-next v4 10/10] bridge: cfm: Netlink Notifications.

2020-10-09 Thread Nikolay Aleksandrov
On Fri, 2020-10-09 at 14:35 +, Henrik Bjoernlund wrote:
> This is the implementation of Netlink notifications out of CFM.
> 
> Notifications are initiated whenever a state change happens in CFM.
> 
> IFLA_BRIDGE_CFM:
> Points to the CFM information.
> 
> IFLA_BRIDGE_CFM_MEP_STATUS_INFO:
> This indicate that the MEP instance status are following.
> IFLA_BRIDGE_CFM_CC_PEER_STATUS_INFO:
> This indicate that the peer MEP status are following.
> 
> CFM nested attribute has the following attributes in next level.
> 
> IFLA_BRIDGE_CFM_MEP_STATUS_INSTANCE:
> The MEP instance number of the delivered status.
> The type is NLA_U32.
> IFLA_BRIDGE_CFM_MEP_STATUS_OPCODE_UNEXP_SEEN:
> The MEP instance received CFM PDU with unexpected Opcode.
> The type is NLA_U32 (bool).
> IFLA_BRIDGE_CFM_MEP_STATUS_VERSION_UNEXP_SEEN:
> The MEP instance received CFM PDU with unexpected version.
> The type is NLA_U32 (bool).
> IFLA_BRIDGE_CFM_MEP_STATUS_RX_LEVEL_LOW_SEEN:
> The MEP instance received CCM PDU with MD level lower than
> configured level. This frame is discarded.
> The type is NLA_U32 (bool).
> 
> IFLA_BRIDGE_CFM_CC_PEER_STATUS_INSTANCE:
> The MEP instance number of the delivered status.
> The type is NLA_U32.
> IFLA_BRIDGE_CFM_CC_PEER_STATUS_PEER_MEPID:
> The added Peer MEP ID of the delivered status.
> The type is NLA_U32.
> IFLA_BRIDGE_CFM_CC_PEER_STATUS_CCM_DEFECT:
> The CCM defect status.
> The type is NLA_U32 (bool).
> True means no CCM frame is received for 3.25 intervals.
> IFLA_BRIDGE_CFM_CC_CONFIG_EXP_INTERVAL.
> IFLA_BRIDGE_CFM_CC_PEER_STATUS_RDI:
> The last received CCM PDU RDI.
> The type is NLA_U32 (bool).
> IFLA_BRIDGE_CFM_CC_PEER_STATUS_PORT_TLV_VALUE:
> The last received CCM PDU Port Status TLV value field.
> The type is NLA_U8.
> IFLA_BRIDGE_CFM_CC_PEER_STATUS_IF_TLV_VALUE:
> The last received CCM PDU Interface Status TLV value field.
> The type is NLA_U8.
> IFLA_BRIDGE_CFM_CC_PEER_STATUS_SEEN:
> A CCM frame has been received from Peer MEP.
> The type is NLA_U32 (bool).
> This is cleared after GETLINK IFLA_BRIDGE_CFM_CC_PEER_STATUS_INFO.
> IFLA_BRIDGE_CFM_CC_PEER_STATUS_TLV_SEEN:
> A CCM frame with TLV has been received from Peer MEP.
> The type is NLA_U32 (bool).
> This is cleared after GETLINK IFLA_BRIDGE_CFM_CC_PEER_STATUS_INFO.
> IFLA_BRIDGE_CFM_CC_PEER_STATUS_SEQ_UNEXP_SEEN:
> A CCM frame with unexpected sequence number has been received
> from Peer MEP.
> The type is NLA_U32 (bool).
> When a sequence number is not one higher than previously received
> then it is unexpected.
> This is cleared after GETLINK IFLA_BRIDGE_CFM_CC_PEER_STATUS_INFO.
> 
> Signed-off-by: Henrik Bjoernlund  
> Reviewed-by: Horatiu Vultur  
> ---
>  net/bridge/br_cfm.c | 48 
>  net/bridge/br_cfm_netlink.c | 25 ++++-
>  net/bridge/br_netlink.c | 73 -
>  net/bridge/br_private.h | 22 ++-
>  4 files changed, 147 insertions(+), 21 deletions(-)
> 

Acked-by: Nikolay Aleksandrov 



Re: [PATCH net-next v4 09/10] bridge: cfm: Netlink GET status Interface.

2020-10-09 Thread Nikolay Aleksandrov
On Fri, 2020-10-09 at 14:35 +, Henrik Bjoernlund wrote:
> This is the implementation of CFM netlink status
> get information interface.
> 
> Add new nested netlink attributes. These attributes are used by the
> user space to get status information.
> 
> GETLINK:
> Request filter RTEXT_FILTER_CFM_STATUS:
> Indicating that CFM status information must be delivered.
> 
> IFLA_BRIDGE_CFM:
> Points to the CFM information.
> 
> IFLA_BRIDGE_CFM_MEP_STATUS_INFO:
> This indicate that the MEP instance status are following.
> IFLA_BRIDGE_CFM_CC_PEER_STATUS_INFO:
> This indicate that the peer MEP status are following.
> 
> CFM nested attribute has the following attributes in next level.
> 
> GETLINK RTEXT_FILTER_CFM_STATUS:
> IFLA_BRIDGE_CFM_MEP_STATUS_INSTANCE:
> The MEP instance number of the delivered status.
> The type is u32.
> IFLA_BRIDGE_CFM_MEP_STATUS_OPCODE_UNEXP_SEEN:
> The MEP instance received CFM PDU with unexpected Opcode.
> The type is u32 (bool).
> IFLA_BRIDGE_CFM_MEP_STATUS_VERSION_UNEXP_SEEN:
> The MEP instance received CFM PDU with unexpected version.
> The type is u32 (bool).
> IFLA_BRIDGE_CFM_MEP_STATUS_RX_LEVEL_LOW_SEEN:
> The MEP instance received CCM PDU with MD level lower than
> configured level. This frame is discarded.
> The type is u32 (bool).
> 
> IFLA_BRIDGE_CFM_CC_PEER_STATUS_INSTANCE:
> The MEP instance number of the delivered status.
> The type is u32.
> IFLA_BRIDGE_CFM_CC_PEER_STATUS_PEER_MEPID:
> The added Peer MEP ID of the delivered status.
> The type is u32.
> IFLA_BRIDGE_CFM_CC_PEER_STATUS_CCM_DEFECT:
> The CCM defect status.
> The type is u32 (bool).
> True means no CCM frame is received for 3.25 intervals.
> IFLA_BRIDGE_CFM_CC_CONFIG_EXP_INTERVAL.
> IFLA_BRIDGE_CFM_CC_PEER_STATUS_RDI:
> The last received CCM PDU RDI.
> The type is u32 (bool).
> IFLA_BRIDGE_CFM_CC_PEER_STATUS_PORT_TLV_VALUE:
> The last received CCM PDU Port Status TLV value field.
> The type is u8.
> IFLA_BRIDGE_CFM_CC_PEER_STATUS_IF_TLV_VALUE:
> The last received CCM PDU Interface Status TLV value field.
> The type is u8.
> IFLA_BRIDGE_CFM_CC_PEER_STATUS_SEEN:
> A CCM frame has been received from Peer MEP.
> The type is u32 (bool).
> This is cleared after GETLINK IFLA_BRIDGE_CFM_CC_PEER_STATUS_INFO.
> IFLA_BRIDGE_CFM_CC_PEER_STATUS_TLV_SEEN:
> A CCM frame with TLV has been received from Peer MEP.
> The type is u32 (bool).
> This is cleared after GETLINK IFLA_BRIDGE_CFM_CC_PEER_STATUS_INFO.
> IFLA_BRIDGE_CFM_CC_PEER_STATUS_SEQ_UNEXP_SEEN:
> A CCM frame with unexpected sequence number has been received
> from Peer MEP.
> The type is u32 (bool).
> When a sequence number is not one higher than previously received
> then it is unexpected.
> This is cleared after GETLINK IFLA_BRIDGE_CFM_CC_PEER_STATUS_INFO.
> 
> Signed-off-by: Henrik Bjoernlund  
> Reviewed-by: Horatiu Vultur  
> ---
>  include/uapi/linux/if_bridge.h |  29 +
>  include/uapi/linux/rtnetlink.h |   1 +
>  net/bridge/br_cfm_netlink.c| 105 +
>  net/bridge/br_netlink.c|  16 -
>  net/bridge/br_private.h|   6 ++
>  5 files changed, 154 insertions(+), 3 deletions(-)
> 
[snip]
> diff --git a/net/bridge/br_cfm_netlink.c b/net/bridge/br_cfm_netlink.c
> index 952b6372874e..94e9b46d5fb4 100644
> --- a/net/bridge/br_cfm_netlink.c
> +++ b/net/bridge/br_cfm_netlink.c
> @@ -617,3 +617,108 @@ int br_cfm_config_fill_info(struct sk_buff *skb, struct 
> net_bridge *br)
>  nla_info_failure:
>   return -EMSGSIZE;
>  }
> +
> +int br_cfm_status_fill_info(struct sk_buff *skb, struct net_bridge *br)
> +{
> + struct nlattr *tb;
> + struct br_cfm_mep *mep;
> + struct br_cfm_peer_mep *peer_mep;
> +
> 

Reverse xmas tree here, too. Sorry I missed these earlier.




Re: [PATCH net-next v4 08/10] bridge: cfm: Netlink GET configuration Interface.

2020-10-09 Thread Nikolay Aleksandrov
On Sat, 2020-10-10 at 00:56 +0300, Nikolay Aleksandrov wrote:
> On Fri, 2020-10-09 at 14:35 +, Henrik Bjoernlund wrote:
> > This is the implementation of CFM netlink configuration
> > get information interface.
> > 
> > Add new nested netlink attributes. These attributes are used by the
> > user space to get configuration information.
> > 
> > GETLINK:
> > Request filter RTEXT_FILTER_CFM_CONFIG:
> > Indicating that CFM configuration information must be delivered.
> > 
> > IFLA_BRIDGE_CFM:
> > Points to the CFM information.
> > 
> > IFLA_BRIDGE_CFM_MEP_CREATE_INFO:
> > This indicate that MEP instance create parameters are following.
> > IFLA_BRIDGE_CFM_MEP_CONFIG_INFO:
> > This indicate that MEP instance config parameters are following.
> > IFLA_BRIDGE_CFM_CC_CONFIG_INFO:
> > This indicate that MEP instance CC functionality
> > parameters are following.
> > IFLA_BRIDGE_CFM_CC_RDI_INFO:
> > This indicate that CC transmitted CCM PDU RDI
> > parameters are following.
> > IFLA_BRIDGE_CFM_CC_CCM_TX_INFO:
> > This indicate that CC transmitted CCM PDU parameters are
> > following.
> > IFLA_BRIDGE_CFM_CC_PEER_MEP_INFO:
> > This indicate that the added peer MEP IDs are following.
> > 
> > CFM nested attribute has the following attributes in next level.
> > 
> > GETLINK RTEXT_FILTER_CFM_CONFIG:
> > IFLA_BRIDGE_CFM_MEP_CREATE_INSTANCE:
> > The created MEP instance number.
> > The type is u32.
> > IFLA_BRIDGE_CFM_MEP_CREATE_DOMAIN:
> > The created MEP domain.
> > The type is u32 (br_cfm_domain).
> > It must be BR_CFM_PORT.
> > This means that CFM frames are transmitted and received
> > directly on the port - untagged. Not in a VLAN.
> > IFLA_BRIDGE_CFM_MEP_CREATE_DIRECTION:
> > The created MEP direction.
> > The type is u32 (br_cfm_mep_direction).
> > It must be BR_CFM_MEP_DIRECTION_DOWN.
> > This means that CFM frames are transmitted and received on
> > the port. Not in the bridge.
> > IFLA_BRIDGE_CFM_MEP_CREATE_IFINDEX:
> > The created MEP residence port ifindex.
> > The type is u32 (ifindex).
> > 
> > IFLA_BRIDGE_CFM_MEP_DELETE_INSTANCE:
> > The deleted MEP instance number.
> > The type is u32.
> > 
> > IFLA_BRIDGE_CFM_MEP_CONFIG_INSTANCE:
> > The configured MEP instance number.
> > The type is u32.
> > IFLA_BRIDGE_CFM_MEP_CONFIG_UNICAST_MAC:
> > The configured MEP unicast MAC address.
> > The type is 6*u8 (array).
> > This is used as SMAC in all transmitted CFM frames.
> > IFLA_BRIDGE_CFM_MEP_CONFIG_MDLEVEL:
> > The configured MEP unicast MD level.
> > The type is u32.
> > It must be in the range 1-7.
> > No CFM frames are passing through this MEP on lower levels.
> > IFLA_BRIDGE_CFM_MEP_CONFIG_MEPID:
> > The configured MEP ID.
> > The type is u32.
> > It must be in the range 0-0x1FFF.
> > This MEP ID is inserted in any transmitted CCM frame.
> > 
> > IFLA_BRIDGE_CFM_CC_CONFIG_INSTANCE:
> > The configured MEP instance number.
> > The type is u32.
> > IFLA_BRIDGE_CFM_CC_CONFIG_ENABLE:
> > The Continuity Check (CC) functionality is enabled or disabled.
> > The type is u32 (bool).
> > IFLA_BRIDGE_CFM_CC_CONFIG_EXP_INTERVAL:
> > The CC expected receive interval of CCM frames.
> > The type is u32 (br_cfm_ccm_interval).
> > This is also the transmission interval of CCM frames when enabled.
> > IFLA_BRIDGE_CFM_CC_CONFIG_EXP_MAID:
> > The CC expected receive MAID in CCM frames.
> > The type is CFM_MAID_LENGTH*u8.
> > This is MAID is also inserted in transmitted CCM frames.
> > 
> > IFLA_BRIDGE_CFM_CC_PEER_MEP_INSTANCE:
> > The configured MEP instance number.
> > The type is u32.
> > IFLA_BRIDGE_CFM_CC_PEER_MEPID:
> > The CC Peer MEP ID added.
> > The type is u32.
> > When a Peer MEP ID is added and CC is enabled it is expected to
> > receive CCM frames from that Peer MEP.
> > 
> > IFLA_BRIDGE_CFM_CC_RDI_INSTANCE:
> > The configured MEP instance number.
> > The type is u32.
> >

Re: [PATCH net-next v4 08/10] bridge: cfm: Netlink GET configuration Interface.

2020-10-09 Thread Nikolay Aleksandrov
(increment) of sequence
> number is enabled or disabled.
> The type is u32 (bool).
> IFLA_BRIDGE_CFM_CC_CCM_TX_PERIOD:
> The period of time where CCM frame are transmitted.
> The type is u32.
> The time is given in seconds. SETLINK IFLA_BRIDGE_CFM_CC_CCM_TX
> must be done before timeout to keep transmission alive.
> When period is zero any ongoing CCM frame transmission
> will be stopped.
> IFLA_BRIDGE_CFM_CC_CCM_TX_IF_TLV:
> The transmitted CCM frame update with Interface Status TLV
> is enabled or disabled.
> The type is u32 (bool).
> IFLA_BRIDGE_CFM_CC_CCM_TX_IF_TLV_VALUE:
> The transmitted Interface Status TLV value field.
> The type is u8.
> IFLA_BRIDGE_CFM_CC_CCM_TX_PORT_TLV:
> The transmitted CCM frame update with Port Status TLV is enabled
> or disabled.
> The type is u32 (bool).
> IFLA_BRIDGE_CFM_CC_CCM_TX_PORT_TLV_VALUE:
> The transmitted Port Status TLV value field.
> The type is u8.
> 
> Signed-off-by: Henrik Bjoernlund  
> Reviewed-by: Horatiu Vultur  
> ---
>  include/uapi/linux/if_bridge.h |   6 ++
>  net/bridge/br_cfm_netlink.c| 161 +
>  net/bridge/br_netlink.c|  29 +-
>  net/bridge/br_private.h|   6 ++
>  4 files changed, 200 insertions(+), 2 deletions(-)
> 

Acked-by: Nikolay Aleksandrov 



Re: [PATCH net-next v4 07/10] bridge: cfm: Netlink SET configuration Interface.

2020-10-09 Thread Nikolay Aleksandrov
TX_SEQ_NO_UPDATE:
> The transmitted CCM frame update (increment) of sequence
> number is enabled or disabled.
> The type is u32 (bool).
> IFLA_BRIDGE_CFM_CC_CCM_TX_PERIOD:
> The period of time where CCM frame are transmitted.
> The type is u32.
> The time is given in seconds. SETLINK IFLA_BRIDGE_CFM_CC_CCM_TX
> must be done before timeout to keep transmission alive.
> When period is zero any ongoing CCM frame transmission
> will be stopped.
> IFLA_BRIDGE_CFM_CC_CCM_TX_IF_TLV:
> The transmitted CCM frame update with Interface Status TLV
> is enabled or disabled.
> The type is u32 (bool).
> IFLA_BRIDGE_CFM_CC_CCM_TX_IF_TLV_VALUE:
> The transmitted Interface Status TLV value field.
> The type is u8.
> IFLA_BRIDGE_CFM_CC_CCM_TX_PORT_TLV:
> The transmitted CCM frame update with Port Status TLV is enabled
> or disabled.
> The type is u32 (bool).
> IFLA_BRIDGE_CFM_CC_CCM_TX_PORT_TLV_VALUE:
> The transmitted Port Status TLV value field.
> The type is u8.
> 
> Signed-off-by: Henrik Bjoernlund  
> Reviewed-by: Horatiu Vultur  
> ---
>  include/uapi/linux/if_bridge.h |  90 +++
>  include/uapi/linux/rtnetlink.h |   1 +
>  net/bridge/Makefile|   2 +-
>  net/bridge/br_cfm.c|   5 +
>  net/bridge/br_cfm_netlink.c| 458 +
>  net/bridge/br_netlink.c|   5 +
>  net/bridge/br_private.h|  17 +-
>  7 files changed, 576 insertions(+), 2 deletions(-)
>  create mode 100644 net/bridge/br_cfm_netlink.c
> 

Acked-by: Nikolay Aleksandrov 



Re: [PATCH net-next v4 06/10] bridge: cfm: Kernel space implementation of CFM. CCM frame RX added.

2020-10-09 Thread Nikolay Aleksandrov
On Fri, 2020-10-09 at 14:35 +, Henrik Bjoernlund wrote:
> This is the third commit of the implementation of the CFM protocol
> according to 802.1Q section 12.14.
> 
> Functionality is extended with CCM frame reception.
> The MEP instance now contains CCM based status information.
> Most important is the CCM defect status indicating if correct
> CCM frames are received with the expected interval.
> 
> Signed-off-by: Henrik Bjoernlund  
> Reviewed-by: Horatiu Vultur  
> ---
>  include/uapi/linux/cfm_bridge.h |  10 ++
>  net/bridge/br_cfm.c | 269 
>  net/bridge/br_private_cfm.h |  32 
>  3 files changed, 311 insertions(+)
> 

Acked-by: Nikolay Aleksandrov 




Re: [PATCH net-next v4 05/10] bridge: cfm: Kernel space implementation of CFM. CCM frame TX added.

2020-10-09 Thread Nikolay Aleksandrov
On Fri, 2020-10-09 at 14:35 +, Henrik Bjoernlund wrote:
> This is the second commit of the implementation of the CFM protocol
> according to 802.1Q section 12.14.
> 
> Functionality is extended with CCM frame transmission.
> 
> Interface is extended with these functions:
> br_cfm_cc_rdi_set()
> br_cfm_cc_ccm_tx()
> br_cfm_cc_config_set()
> 
> A MEP Continuity Check feature can be configured by
> br_cfm_cc_config_set()
> The Continuity Check parameters can be configured to be used when
> transmitting CCM.
> 
> A MEP can be configured to start or stop transmission of CCM frames by
> br_cfm_cc_ccm_tx()
> The CCM will be transmitted for a selected period in seconds.
> Must call this function before timeout to keep transmission alive.
> 
> A MEP transmitting CCM can be configured with inserted RDI in PDU by
> br_cfm_cc_rdi_set()
> 
> Signed-off-by: Henrik Bjoernlund  
> Reviewed-by: Horatiu Vultur  
> ---
>  include/uapi/linux/cfm_bridge.h |  39 -
>  net/bridge/br_cfm.c | 284 
>  net/bridge/br_private_cfm.h |  54 ++
>  3 files changed, 376 insertions(+), 1 deletion(-)
> 

Acked-by: Nikolay Aleksandrov 



Re: [PATCH net-next v4 04/10] bridge: cfm: Kernel space implementation of CFM. MEP create/delete.

2020-10-09 Thread Nikolay Aleksandrov
On Fri, 2020-10-09 at 14:35 +, Henrik Bjoernlund wrote:
> This is the first commit of the implementation of the CFM protocol
> according to 802.1Q section 12.14.
> 
> It contains MEP instance create, delete and configuration.
> 
> Connectivity Fault Management (CFM) comprises capabilities for
> detecting, verifying, and isolating connectivity failures in
> Virtual Bridged Networks. These capabilities can be used in
> networks operated by multiple independent organizations, each
> with restricted management access to each other<80><99>s equipment.
> 
> CFM functions are partitioned as follows:
> - Path discovery
> - Fault detection
> - Fault verification and isolation
> - Fault notification
> - Fault recovery
> 
> Interface consists of these functions:
> br_cfm_mep_create()
> br_cfm_mep_delete()
> br_cfm_mep_config_set()
> br_cfm_cc_config_set()
> br_cfm_cc_peer_mep_add()
> br_cfm_cc_peer_mep_remove()
> 
> A MEP instance is created by br_cfm_mep_create()
> -It is the Maintenance association End Point
>  described in 802.1Q section 19.2.
> -It is created on a specific level (1-7) and is assuring
>  that no CFM frames are passing through this MEP on lower levels.
> -It initiates and validates CFM frames on its level.
> -It can only exist on a port that is related to a bridge.
> -Attributes given cannot be changed until the instance is
>  deleted.
> 
> A MEP instance can be deleted by br_cfm_mep_delete().
> 
> A created MEP instance has attributes that can be
> configured by br_cfm_mep_config_set().
> 
> A MEP Continuity Check feature can be configured by
> br_cfm_cc_config_set()
> The Continuity Check Receiver state machine can be
> enabled and disabled.
> According to 802.1Q section 19.2.8
> 
> A MEP can have Peer MEPs added and removed by
> br_cfm_cc_peer_mep_add() and br_cfm_cc_peer_mep_remove()
> The Continuity Check feature can maintain connectivity
> status on each added Peer MEP.
> 
> Signed-off-by: Henrik Bjoernlund  
> Reviewed-by: Horatiu Vultur  
> ---
>  include/uapi/linux/cfm_bridge.h |  23 +++
>  net/bridge/Makefile |   2 +
>  net/bridge/br_cfm.c | 278 
>  net/bridge/br_if.c  |   1 +
>  net/bridge/br_private.h |  10 ++
>  net/bridge/br_private_cfm.h |  61 +++
>  6 files changed, 375 insertions(+)
>  create mode 100644 include/uapi/linux/cfm_bridge.h
>  create mode 100644 net/bridge/br_cfm.c
>  create mode 100644 net/bridge/br_private_cfm.h
> 

Acked-by: Nikolay Aleksandrov 



Re: [PATCH net-next v4 03/10] bridge: uapi: cfm: Added EtherType used by the CFM protocol.

2020-10-09 Thread Nikolay Aleksandrov
On Fri, 2020-10-09 at 14:35 +, Henrik Bjoernlund wrote:
> This EtherType is used by all CFM protocal frames transmitted
> according to 802.1Q section 12.14.
> 
> Signed-off-by: Henrik Bjoernlund  
> Reviewed-by: Horatiu Vultur  
> Acked-by: Nikolay Aleksandrov 
> ---
>  include/uapi/linux/if_ether.h | 1 +
>  1 file changed, 1 insertion(+)
> 

Acked-by: Nikolay Aleksandrov 


Re: [PATCH net-next v4 01/10] net: bridge: extend the process of special frames

2020-10-09 Thread Nikolay Aleksandrov
On Fri, 2020-10-09 at 14:35 +, Henrik Bjoernlund wrote:
> This patch extends the processing of frames in the bridge. Currently MRP
> frames needs special processing and the current implementation doesn't
> allow a nice way to process different frame types. Therefore try to
> improve this by adding a list that contains frame types that need
> special processing. This list is iterated for each input frame and if
> there is a match based on frame type then these functions will be called
> and decide what to do with the frame. It can process the frame then the
> bridge doesn't need to do anything or don't process so then the bridge
> will do normal forwarding.
> 
> Signed-off-by: Henrik Bjoernlund  
> Reviewed-by: Horatiu Vultur  
> ---
>  net/bridge/br_device.c  |  1 +
>  net/bridge/br_input.c   | 33 -
>  net/bridge/br_mrp.c | 19 +++
>  net/bridge/br_private.h | 18 --
>  4 files changed, 60 insertions(+), 11 deletions(-)
> 

Acked-by: Nikolay Aleksandrov 


Re: [PATCH net-next v4 02/10] bridge: cfm: Add BRIDGE_CFM to Kconfig.

2020-10-09 Thread Nikolay Aleksandrov
On Fri, 2020-10-09 at 14:35 +, Henrik Bjoernlund wrote:
> This makes it possible to include or exclude the CFM
> protocol according to 802.1Q section 12.14.
> 
> Signed-off-by: Henrik Bjoernlund  
> Reviewed-by: Horatiu Vultur  
> ---
>  net/bridge/Kconfig  | 11 +++
>  net/bridge/br_device.c  |  3 +++
>  net/bridge/br_private.h |  3 +++
>  3 files changed, 17 insertions(+)
> 

Acked-by: Nikolay Aleksandrov 


Re: [PATCH net-next v4 02/10] bridge: cfm: Add BRIDGE_CFM to Kconfig.

2020-10-09 Thread Nikolay Aleksandrov
On Fri, 2020-10-09 at 14:35 +, Henrik Bjoernlund wrote:
> This makes it possible to include or exclude the CFM
> protocol according to 802.1Q section 12.14.
> 
> Signed-off-by: Henrik Bjoernlund  
> Reviewed-by: Horatiu Vultur  
> ---
>  net/bridge/Kconfig  | 11 +++
>  net/bridge/br_device.c  |  3 +++
>  net/bridge/br_private.h |  3 +++
>  3 files changed, 17 insertions(+)
> 

Acked-by: Nikolay Aleksandrov 



Re: [PATCH net] bridge: Netlink interface fix.

2020-10-08 Thread Nikolay Aleksandrov
On Thu, 2020-10-08 at 10:09 -0700, Jakub Kicinski wrote:
> On Thu, 8 Oct 2020 10:18:09 +0000 Nikolay Aleksandrov wrote:
> > On Wed, 2020-10-07 at 14:49 +, Nikolay Aleksandrov wrote:
> > > On Wed, 2020-10-07 at 12:07 +, Henrik Bjoernlund wrote:  
> > > > This commit is correcting NETLINK br_fill_ifinfo() to be able to
> > > > handle 'filter_mask' with multiple flags asserted.
> > > > 
> > > > Fixes: 36a8e8e265420 ("bridge: Extend br_fill_ifinfo to return MPR 
> > > > status")
> > > > 
> > > > Signed-off-by: Henrik Bjoernlund 
> > > > Reviewed-by: Horatiu Vultur 
> > > > Suggested-by: Nikolay Aleksandrov 
> > > > Tested-by: Horatiu Vultur 
> > > > ---
> > > >  net/bridge/br_netlink.c | 26 +++---
> > > >  1 file changed, 11 insertions(+), 15 deletions(-)
> > > >   
> > > 
> > > The patch looks good, please don't separate the Fixes tag from the others.
> > > Acked-by: Nikolay Aleksandrov 
> > >   
> > 
> > TBH, this does change a user facing api (the attribute nesting), but I think
> > in this case it's acceptable due to the format being wrong and MRP being 
> > new, so
> > I doubt anyone is yet dumping it mixed with vlan filter_mask and checking 
> > for
> > two identical attributes, i.e. in the old/broken case parsing the attributes
> > into a table would hide one of them and you'd have to walk over all 
> > attributes
> > to catch that.
> 
> To be clear - this changes the uAPI as far as 5.9-rcs are concerned. 
> So if this change was to hit 5.9 final there would be no uAPI breakage,
> right?

Yes, correct.


Re: [PATCH net] bridge: Netlink interface fix.

2020-10-08 Thread Nikolay Aleksandrov
On Wed, 2020-10-07 at 14:49 +, Nikolay Aleksandrov wrote:
> On Wed, 2020-10-07 at 12:07 +, Henrik Bjoernlund wrote:
> > This commit is correcting NETLINK br_fill_ifinfo() to be able to
> > handle 'filter_mask' with multiple flags asserted.
> > 
> > Fixes: 36a8e8e265420 ("bridge: Extend br_fill_ifinfo to return MPR status")
> > 
> > Signed-off-by: Henrik Bjoernlund 
> > Reviewed-by: Horatiu Vultur 
> > Suggested-by: Nikolay Aleksandrov 
> > Tested-by: Horatiu Vultur 
> > ---
> >  net/bridge/br_netlink.c | 26 +++---
> >  1 file changed, 11 insertions(+), 15 deletions(-)
> > 
> 
> The patch looks good, please don't separate the Fixes tag from the others.
> Acked-by: Nikolay Aleksandrov 
> 

TBH, this does change a user facing api (the attribute nesting), but I think
in this case it's acceptable due to the format being wrong and MRP being new, so
I doubt anyone is yet dumping it mixed with vlan filter_mask and checking for
two identical attributes, i.e. in the old/broken case parsing the attributes
into a table would hide one of them and you'd have to walk over all attributes
to catch that.




Re: [PATCH net] bridge: Netlink interface fix.

2020-10-07 Thread Nikolay Aleksandrov
On Wed, 2020-10-07 at 12:07 +, Henrik Bjoernlund wrote:
> This commit is correcting NETLINK br_fill_ifinfo() to be able to
> handle 'filter_mask' with multiple flags asserted.
> 
> Fixes: 36a8e8e265420 ("bridge: Extend br_fill_ifinfo to return MPR status")
> 
> Signed-off-by: Henrik Bjoernlund 
> Reviewed-by: Horatiu Vultur 
> Suggested-by: Nikolay Aleksandrov 
> Tested-by: Horatiu Vultur 
> ---
>  net/bridge/br_netlink.c | 26 +++---
>  1 file changed, 11 insertions(+), 15 deletions(-)
> 

The patch looks good, please don't separate the Fixes tag from the others.
Acked-by: Nikolay Aleksandrov 



Re: [net-next v3 0/9] net: bridge: cfm: Add support for Connectivity Fault Management(CFM)

2020-10-06 Thread Nikolay Aleksandrov
On Tue, 2020-10-06 at 14:53 +, Henrik Bjoernlund wrote:
> Connectivity Fault Management (CFM) is defined in 802.1Q section 12.14.
> 
> Connectivity Fault Management (CFM) comprises capabilities for detecting, 
> verifying,
> and isolating connectivity failures in Virtual Bridged Networks.
> These capabilities can be used in networks operated by multiple independent 
> organizations,
> each with restricted management access to each other’s equipment.
> 
> CFM functions are partitioned as follows:
> — Path discovery
> — Fault detection
> — Fault verification and isolation
> — Fault notification
> — Fault recovery
> 
> The primary CFM protocol shims are called Maintenance Points (MPs).
> A MP can be either a MEP or a MHF.
> The MEP:
> -It is the Maintenance association End Point
>  described in 802.1Q section 19.2.
> -It is created on a specific level (1-7) and is assuring
>  that no CFM frames are passing through this MEP on lower levels.
> -It initiates and terminates/validates CFM frames on its level.
> -It can only exist on a port that is related to a bridge.
> The MHF:
> -It is the Maintenance Domain Intermediate Point
>  (MIP) Half Function (MHF) described in 802.1Q section 19.3.
> -It is created on a specific level (1-7).
> -It is extracting/injecting certain CFM frame on this level.
> -It can only exist on a port that is related to a bridge.
> -Currently not supported.
> 
> There are defined the following CFM protocol functions:
> -Continuity Check
> -Loopback. Currently not supported.
> -Linktrace. Currently not supported.
> 
> This CFM component supports create/delete of MEP instances and configuration 
> of
> the different CFM protocols. Also status information can be fetched and 
> delivered
> through notification due to defect status change.
> 
> The user interacts with CFM using the 'cfm' user space client program,
> the client talks with the kernel using netlink.
> 
> Any notification emitted by CFM from the kernel can be monitored in user space
> by starting 'cfm_server' program.
> 
> Currently this 'cfm' and 'cfm_server' programs are standalone placed in a cfm
> repository https://github.com/microchip-ung/cfm but it is considered to 
> integrate
> this into 'iproute2'.
> 
> v2 -> v3
> The switchdev definition and utilization has been removed as there was no
> switchdev implementation.
> Some compiling issues are fixed as Reported-by: kernel test robot 
> .
> 

Well, funny timing. :) I just finished reviewing v2 half an hour ago. All of my
comments for v2 apply for this v3 set, sorry but you'll have to fix a few more
issues.

Thanks,
 Nik



Re: [net-next v2 09/11] bridge: cfm: Bridge port remove.

2020-10-06 Thread Nikolay Aleksandrov
On Thu, 2020-10-01 at 10:30 +, Henrik Bjoernlund wrote:
> This is addition of CFM functionality to delete MEP instances
> on a port that is removed from the bridge.
> A MEP can only exist on a port that is related to a bridge.
> 
> Reviewed-by: Horatiu Vultur  
> Signed-off-by: Henrik Bjoernlund  m
> ---
>  net/bridge/br_cfm.c | 13 +
>  net/bridge/br_if.c  |  1 +
>  net/bridge/br_private.h |  6 ++
>  3 files changed, 20 insertions(+)
> 

This patch should be a part of the one which adds the ability to attach MEPs to
ports so they will get cleaned up properly on port del in the same patch.
Is there a reason for it to be a separate patch?

One more comment below.

Thanks,
 Nik

> diff --git a/net/bridge/br_cfm.c b/net/bridge/br_cfm.c
> index 6fbfef44c235..fc8268cb76c1 100644
> --- a/net/bridge/br_cfm.c
> +++ b/net/bridge/br_cfm.c
> @@ -867,3 +867,16 @@ bool br_cfm_created(struct net_bridge *br)
>  {
>   return !hlist_empty(>mep_list);
>  }
> +
> +/* Deletes the CFM instances on a specific bridge port
> + */
> +void br_cfm_port_del(struct net_bridge *br, struct net_bridge_port *port)
> +{
> + struct br_cfm_mep *mep;
> +
> + ASSERT_RTNL();
> +
> + hlist_for_each_entry(mep, >mep_list, head)

hlist_for_each_entry_safe()

> + if (mep->create.ifindex == port->dev->ifindex)
> + mep_delete_implementation(br, mep);
> +}
> diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
> index a0e9a7937412..f7d2f472ae24 100644
> --- a/net/bridge/br_if.c
> +++ b/net/bridge/br_if.c
> @@ -334,6 +334,7 @@ static void del_nbp(struct net_bridge_port *p)
>   spin_unlock_bh(>lock);
>  
>   br_mrp_port_del(br, p);
> + br_cfm_port_del(br, p);
>  
>   br_ifinfo_notify(RTM_DELLINK, NULL, p);
>  
> diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
> index 5954ee45af80..735dd0028b40 100644
> --- a/net/bridge/br_private.h
> +++ b/net/bridge/br_private.h
> @@ -1465,6 +1465,7 @@ static inline int br_mrp_fill_info(struct sk_buff *skb, 
> struct net_bridge *br)
>  int br_cfm_parse(struct net_bridge *br, struct net_bridge_port *p,
>struct nlattr *attr, int cmd, struct netlink_ext_ack *extack);
>  bool br_cfm_created(struct net_bridge *br);
> +void br_cfm_port_del(struct net_bridge *br, struct net_bridge_port *p);
>  int br_cfm_config_fill_info(struct sk_buff *skb, struct net_bridge *br);
>  int br_cfm_status_fill_info(struct sk_buff *skb,
>   struct net_bridge *br,
> @@ -1484,6 +1485,11 @@ static inline bool br_cfm_created(struct net_bridge 
> *br)
>   return false;
>  }
>  
> +static inline void br_cfm_port_del(struct net_bridge *br,
> +struct net_bridge_port *p)
> +{
> +}
> +
>  static inline int br_cfm_config_fill_info(struct sk_buff *skb, struct 
> net_bridge *br)
>  {
>   return -EOPNOTSUPP;



Re: [net-next v2 08/11] bridge: cfm: Netlink Notifications.

2020-10-06 Thread Nikolay Aleksandrov
On Thu, 2020-10-01 at 10:30 +, Henrik Bjoernlund wrote:
> This is the implementation of Netlink notifications out of CFM.
> 
> Notifications are initiated whenever a state change happens in CFM.
> 
> IFLA_BRIDGE_CFM:
> Points to the CFM information.
> 
> IFLA_BRIDGE_CFM_MEP_STATUS_INFO:
> This indicate that the MEP instance status are following.
> IFLA_BRIDGE_CFM_CC_PEER_STATUS_INFO:
> This indicate that the peer MEP status are following.
> 
> CFM nested attribute has the following attributes in next level.
> 
> IFLA_BRIDGE_CFM_MEP_STATUS_INSTANCE:
> The MEP instance number of the delivered status.
> The type is NLA_U32.
> IFLA_BRIDGE_CFM_MEP_STATUS_OPCODE_UNEXP_SEEN:
> The MEP instance received CFM PDU with unexpected Opcode.
> The type is NLA_U32 (bool).
> IFLA_BRIDGE_CFM_MEP_STATUS_VERSION_UNEXP_SEEN:
> The MEP instance received CFM PDU with unexpected version.
> The type is NLA_U32 (bool).
> IFLA_BRIDGE_CFM_MEP_STATUS_RX_LEVEL_LOW_SEEN:
> The MEP instance received CCM PDU with MD level lower than
> configured level. This frame is discarded.
> The type is NLA_U32 (bool).
> 
> IFLA_BRIDGE_CFM_CC_PEER_STATUS_INSTANCE:
> The MEP instance number of the delivered status.
> The type is NLA_U32.
> IFLA_BRIDGE_CFM_CC_PEER_STATUS_PEER_MEPID:
> The added Peer MEP ID of the delivered status.
> The type is NLA_U32.
> IFLA_BRIDGE_CFM_CC_PEER_STATUS_CCM_DEFECT:
> The CCM defect status.
> The type is NLA_U32 (bool).
> True means no CCM frame is received for 3.25 intervals.
> IFLA_BRIDGE_CFM_CC_CONFIG_EXP_INTERVAL.
> IFLA_BRIDGE_CFM_CC_PEER_STATUS_RDI:
> The last received CCM PDU RDI.
> The type is NLA_U32 (bool).
> IFLA_BRIDGE_CFM_CC_PEER_STATUS_PORT_TLV_VALUE:
> The last received CCM PDU Port Status TLV value field.
> The type is NLA_U8.
> IFLA_BRIDGE_CFM_CC_PEER_STATUS_IF_TLV_VALUE:
> The last received CCM PDU Interface Status TLV value field.
> The type is NLA_U8.
> IFLA_BRIDGE_CFM_CC_PEER_STATUS_SEEN:
> A CCM frame has been received from Peer MEP.
> The type is NLA_U32 (bool).
> This is cleared after GETLINK IFLA_BRIDGE_CFM_CC_PEER_STATUS_INFO.
> IFLA_BRIDGE_CFM_CC_PEER_STATUS_TLV_SEEN:
> A CCM frame with TLV has been received from Peer MEP.
> The type is NLA_U32 (bool).
> This is cleared after GETLINK IFLA_BRIDGE_CFM_CC_PEER_STATUS_INFO.
> IFLA_BRIDGE_CFM_CC_PEER_STATUS_SEQ_UNEXP_SEEN:
> A CCM frame with unexpected sequence number has been received
> from Peer MEP.
> The type is NLA_U32 (bool).
> When a sequence number is not one higher than previously received
> then it is unexpected.
> This is cleared after GETLINK IFLA_BRIDGE_CFM_CC_PEER_STATUS_INFO.
> 
> Reviewed-by: Horatiu Vultur  
> Signed-off-by: Henrik Bjoernlund  
> ---
>  net/bridge/br_cfm.c | 48 
>  net/bridge/br_cfm_netlink.c | 27 +-
>  net/bridge/br_netlink.c | 73 -
>  net/bridge/br_private.h | 22 ++-
>  4 files changed, 148 insertions(+), 22 deletions(-)
> 
[snip]
>   return !hlist_empty(>mep_list);
> diff --git a/net/bridge/br_cfm_netlink.c b/net/bridge/br_cfm_netlink.c
> index 7bdf890b8ccc..5f81262c9caa 100644
> --- a/net/bridge/br_cfm_netlink.c
> +++ b/net/bridge/br_cfm_netlink.c
> @@ -325,8 +325,8 @@ static int br_cc_ccm_tx_parse(struct net_bridge *br, 
> struct nlattr *attr,
> struct netlink_ext_ack *extack)
>  {
>   struct nlattr *tb[IFLA_BRIDGE_CFM_CC_CCM_TX_MAX + 1];
> - u32 instance;
>   struct br_cfm_cc_ccm_tx_info tx_info;
> + u32 instance;
>   int err;

This hunk is unnecessary as it's new code added by this set, just add it
correctly in the first place and drop this change.

Thanks,
 Nik


Re: [net-next v2 07/11] bridge: cfm: Netlink Interface.

2020-10-06 Thread Nikolay Aleksandrov
On Thu, 2020-10-01 at 10:30 +, Henrik Bjoernlund wrote:
> This is the implementation of CFM netlink configuration
> and status information interface.
> 
> Add new nested netlink attributes. These attributes are used by the
> user space to create/delete/configure CFM instances and get status.
> Also they are used by the kernel to notify the user space when changes
> in any status happens.
> 
> SETLINK:
> IFLA_BRIDGE_CFM:
> Indicate that the following attributes are CFM.
> 
> IFLA_BRIDGE_CFM_MEP_CREATE:
> This indicate that a MEP instance must be created.
> IFLA_BRIDGE_CFM_MEP_DELETE:
> This indicate that a MEP instance must be deleted.
> IFLA_BRIDGE_CFM_MEP_CONFIG:
> This indicate that a MEP instance must be configured.
> IFLA_BRIDGE_CFM_CC_CONFIG:
> This indicate that a MEP instance Continuity Check (CC)
> functionality must be configured.
> IFLA_BRIDGE_CFM_CC_PEER_MEP_ADD:
> This indicate that a CC Peer MEP must be added.
> IFLA_BRIDGE_CFM_CC_PEER_MEP_REMOVE:
> This indicate that a CC Peer MEP must be removed.
> IFLA_BRIDGE_CFM_CC_CCM_TX:
> This indicate that the CC transmitted CCM PDU must be configured.
> IFLA_BRIDGE_CFM_CC_RDI:
> This indicate that the CC transmitted CCM PDU RDI must be
> configured.
> 
> GETLINK:
> Request filter RTEXT_FILTER_CFM_CONFIG:
> Indicating that CFM configuration information must be delivered.
> 
> IFLA_BRIDGE_CFM:
> Points to the CFM information.
> 
> IFLA_BRIDGE_CFM_MEP_CREATE_INFO:
> This indicate that MEP instance create parameters are following.
> IFLA_BRIDGE_CFM_MEP_CONFIG_INFO:
> This indicate that MEP instance config parameters are following.
> IFLA_BRIDGE_CFM_CC_CONFIG_INFO:
> This indicate that MEP instance CC functionality
> parameters are following.
> IFLA_BRIDGE_CFM_CC_RDI_INFO:
> This indicate that CC transmitted CCM PDU RDI
> parameters are following.
> IFLA_BRIDGE_CFM_CC_CCM_TX_INFO:
> This indicate that CC transmitted CCM PDU parameters are
> following.
> IFLA_BRIDGE_CFM_CC_PEER_MEP_INFO:
> This indicate that the added peer MEP IDs are following.
> 
> Request filter RTEXT_FILTER_CFM_STATUS:
> Indicating that CFM status information must be delivered.
> 
> IFLA_BRIDGE_CFM:
> Points to the CFM information.
> 
> IFLA_BRIDGE_CFM_MEP_STATUS_INFO:
> This indicate that the MEP instance status are following.
> IFLA_BRIDGE_CFM_CC_PEER_STATUS_INFO:
> This indicate that the peer MEP status are following.
> 
> CFM nested attribute has the following attributes in next level.
> 
> SETLINK and GETLINK RTEXT_FILTER_CFM_CONFIG:
> IFLA_BRIDGE_CFM_MEP_CREATE_INSTANCE:
> The created MEP instance number.
> The type is u32.
> IFLA_BRIDGE_CFM_MEP_CREATE_DOMAIN:
> The created MEP domain.
> The type is u32 (br_cfm_domain).
> It must be BR_CFM_PORT.
> This means that CFM frames are transmitted and received
> directly on the port - untagged. Not in a VLAN.
> IFLA_BRIDGE_CFM_MEP_CREATE_DIRECTION:
> The created MEP direction.
> The type is u32 (br_cfm_mep_direction).
> It must be BR_CFM_MEP_DIRECTION_DOWN.
> This means that CFM frames are transmitted and received on
> the port. Not in the bridge.
> IFLA_BRIDGE_CFM_MEP_CREATE_IFINDEX:
> The created MEP residence port ifindex.
> The type is u32 (ifindex).
> 
> IFLA_BRIDGE_CFM_MEP_DELETE_INSTANCE:
> The deleted MEP instance number.
> The type is u32.
> 
> IFLA_BRIDGE_CFM_MEP_CONFIG_INSTANCE:
> The configured MEP instance number.
> The type is u32.
> IFLA_BRIDGE_CFM_MEP_CONFIG_UNICAST_MAC:
> The configured MEP unicast MAC address.
> The type is 6*u8 (array).
> This is used as SMAC in all transmitted CFM frames.
> IFLA_BRIDGE_CFM_MEP_CONFIG_MDLEVEL:
> The configured MEP unicast MD level.
> The type is u32.
> It must be in the range 1-7.
> No CFM frames are passing through this MEP on lower levels.
> IFLA_BRIDGE_CFM_MEP_CONFIG_MEPID:
> The configured MEP ID.
> The type is u32.
> It must be in the range 0-0x1FFF.
> This MEP ID is inserted in any transmitted CCM frame.
> 
> IFLA_BRIDGE_CFM_CC_CONFIG_INSTANCE:
> The configured MEP instance number.
> The type is u32.
> IFLA_BRIDGE_CFM_CC_CONFIG_ENABLE:
> The Continuity Check (CC) functionality is enabled or disabled.
> The type is u32 (bool).
> IFLA_BRIDGE_CFM_CC_CONFIG_EXP_INTERVAL:
> The CC expected receive interval of CCM frames.
> The type is u32 (br_cfm_ccm_interval).
> This is also the transmission interval of CCM frames when enabled.
> IFLA_BRIDGE_CFM_CC_CONFIG_EXP_MAID:
>  

Re: [net-next v2 04/11] bridge: cfm: Kernel space implementation of CFM.

2020-10-06 Thread Nikolay Aleksandrov
On Thu, 2020-10-01 at 10:30 +, Henrik Bjoernlund wrote:
> This is the first commit of the implementation of the CFM protocol
> according to 802.1Q section 12.14.
> 
> Connectivity Fault Management (CFM) comprises capabilities for
> detecting, verifying, and isolating connectivity failures in
> Virtual Bridged Networks. These capabilities can be used in
> networks operated by multiple independent organizations, each
> with restricted management access to each other<80><99>s equipment.
> 
> CFM functions are partitioned as follows:
> - Path discovery
> - Fault detection
> - Fault verification and isolation
> - Fault notification
> - Fault recovery
> 
> Interface consists of these functions:
> br_cfm_mep_create()
> br_cfm_mep_delete()
> br_cfm_mep_config_set()
> br_cfm_cc_config_set()
> br_cfm_cc_peer_mep_add()
> br_cfm_cc_peer_mep_remove()
> 
> A MEP instance is created by br_cfm_mep_create()
> -It is the Maintenance association End Point
>  described in 802.1Q section 19.2.
> -It is created on a specific level (1-7) and is assuring
>  that no CFM frames are passing through this MEP on lower levels.
> -It initiates and validates CFM frames on its level.
> -It can only exist on a port that is related to a bridge.
> -Attributes given cannot be changed until the instance is
>  deleted.
> 
> A MEP instance can be deleted by br_cfm_mep_delete().
> 
> A created MEP instance has attributes that can be
> configured by br_cfm_mep_config_set().
> 
> A MEP Continuity Check feature can be configured by
> br_cfm_cc_config_set()
> The Continuity Check Receiver state machine can be
> enabled and disabled.
> According to 802.1Q section 19.2.8
> 
> A MEP can have Peer MEPs added and removed by
> br_cfm_cc_peer_mep_add() and br_cfm_cc_peer_mep_remove()
> The Continuity Check feature can maintain connectivity
> status on each added Peer MEP.
> 
> Reviewed-by: Horatiu Vultur  
> Signed-off-by: Henrik Bjoernlund  
> ---

Thank you for breaking the big patch into 3 smaller pieces, but could you please
name them appropriately? I'm sure they add different things, so just give them
something more descriptive. Having the same subject for 3 patches looks odd.

>  include/uapi/linux/cfm_bridge.h |  23 +++
>  net/bridge/Makefile |   2 +
>  net/bridge/br_cfm.c | 263 
>  net/bridge/br_private_cfm.h |  61 
>  4 files changed, 349 insertions(+)
>  create mode 100644 include/uapi/linux/cfm_bridge.h
>  create mode 100644 net/bridge/br_cfm.c
>  create mode 100644 net/bridge/br_private_cfm.h
> 
[snip]
> +
> + mep = kzalloc(sizeof(*mep), GFP_KERNEL);
> + if (!mep)
> + return -ENOMEM;
> +
> + mep->create = *create;
> + mep->instance = instance;
> + rcu_assign_pointer(mep->b_port, p);
> +
> + INIT_HLIST_HEAD(>peer_mep_list);
> +
> + hlist_add_tail_rcu(>head, >mep_list);
> +
> + return 0;
> +}
> +
> +static void mep_delete_implementation(struct net_bridge *br,
> +   struct br_cfm_mep *mep)
> +{
> + struct br_cfm_peer_mep *peer_mep;
> +
> + ASSERT_RTNL();
> +
> + /* Empty and free peer MEP list */
> + hlist_for_each_entry(peer_mep, >peer_mep_list, head) {

hlist_for_each_entry_safe()

> + hlist_del_rcu(_mep->head);
> + kfree_rcu(peer_mep, rcu);
> + }
> +
> + RCU_INIT_POINTER(mep->b_port, NULL);
> + hlist_del_rcu(>head);
> + kfree_rcu(mep, rcu);
> +}



Re: [net-next v2 01/11] net: bridge: extend the process of special frames

2020-10-06 Thread Nikolay Aleksandrov
On Thu, 2020-10-01 at 10:30 +, Henrik Bjoernlund wrote:
> This patch extends the processing of frames in the bridge. Currently MRP
> frames needs special processing and the current implementation doesn't
> allow a nice way to process different frame types. Therefore try to
> improve this by adding a list that contains frame types that need
> special processing. This list is iterated for each input frame and if
> there is a match based on frame type then these functions will be called
> and decide what to do with the frame. It can process the frame then the
> bridge doesn't need to do anything or don't process so then the bridge
> will do normal forwarding.
> 
> Reviewed-by: Horatiu Vultur  
> Signed-off-by: Henrik Bjoernlund  
> ---
>  net/bridge/br_device.c  |  1 +
>  net/bridge/br_input.c   | 31 ++-
>  net/bridge/br_mrp.c | 19 +++
>  net/bridge/br_private.h | 18 --
>  4 files changed, 58 insertions(+), 11 deletions(-)
> 

Hi,
Mostly looks good, one comment below.

> diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
> index 9a2fb4aa1a10..206c4ba51cd2 100644
> --- a/net/bridge/br_device.c
> +++ b/net/bridge/br_device.c
> [snip]
> @@ -380,3 +395,17 @@ rx_handler_func_t *br_get_rx_handler(const struct 
> net_device *dev)
>  
>   return br_handle_frame;
>  }
> +
> +void br_add_frame(struct net_bridge *br, struct br_frame_type *ft)
> +{
> + hlist_add_head_rcu(>list, >frame_type_list);
> +}
> +
> +void br_del_frame(struct net_bridge *br, struct br_frame_type *ft)
> +{
> + struct br_frame_type *tmp;
> +
> + hlist_for_each_entry(tmp, >frame_type_list, list)
> + if (ft == tmp)
> + hlist_del_rcu(>list);

This hasn't crashed only because you're using hlist_del_rcu(), otherwise it's
wrong. You should use hlist_for_each_entry_safe() when deleting from the list
while walking it or you should end the walk after the delete since there can't
be two elements with the same address anyway.

Thanks,
 Nik





Re: [net-next v2 10/11] bridge: switchdev: cfm: switchdev interface implementation

2020-10-06 Thread Nikolay Aleksandrov
On Mon, 2020-10-05 at 15:07 +0200, Allan W. Nielsen wrote:
> Hi Jiri
> 
> On 01.10.2020 14:49, Jiri Pirko wrote:
> > EXTERNAL EMAIL: Do not click links or open attachments unless you know the 
> > content is safe
> > 
> > Thu, Oct 01, 2020 at 12:30:18PM CEST, henrik.bjoernl...@microchip.com wrote:
> > > This is the definition of the CFM switchdev interface.
> > > 
> > > The interface consist of these objects:
> > >SWITCHDEV_OBJ_ID_MEP_CFM,
> > >SWITCHDEV_OBJ_ID_MEP_CONFIG_CFM,
> > >SWITCHDEV_OBJ_ID_CC_CONFIG_CFM,
> > >SWITCHDEV_OBJ_ID_CC_PEER_MEP_CFM,
> > >SWITCHDEV_OBJ_ID_CC_CCM_TX_CFM,
> > >SWITCHDEV_OBJ_ID_MEP_STATUS_CFM,
> > >SWITCHDEV_OBJ_ID_PEER_MEP_STATUS_CFM
> > > 
> > > MEP instance add/del
> > >switchdev_port_obj_add(SWITCHDEV_OBJ_ID_MEP_CFM)
> > >switchdev_port_obj_del(SWITCHDEV_OBJ_ID_MEP_CFM)
> > > 
> > > MEP cofigure
> > >switchdev_port_obj_add(SWITCHDEV_OBJ_ID_MEP_CONFIG_CFM)
> > > 
> > > MEP CC cofigure
> > >switchdev_port_obj_add(SWITCHDEV_OBJ_ID_CC_CONFIG_CFM)
> > > 
> > > Peer MEP add/del
> > >switchdev_port_obj_add(SWITCHDEV_OBJ_ID_CC_PEER_MEP_CFM)
> > >switchdev_port_obj_del(SWITCHDEV_OBJ_ID_CC_PEER_MEP_CFM)
> > > 
> > > Start/stop CCM transmission
> > >switchdev_port_obj_add(SWITCHDEV_OBJ_ID_CC_CCM_TX_CFM)
> > > 
> > > Get MEP status
> > >   switchdev_port_obj_get(SWITCHDEV_OBJ_ID_MEP_STATUS_CFM)
> > > 
> > > Get Peer MEP status
> > >   switchdev_port_obj_get(SWITCHDEV_OBJ_ID_PEER_MEP_STATUS_CFM)
> > > 
> > > Reviewed-by: Horatiu Vultur  
> > > Signed-off-by: Henrik Bjoernlund  
> > 
> > You have to submit the driver parts as a part of this patchset.
> > Otherwise it is no good.
> Fair enough.
> 
> With MRP we did it like this, and after Nik asked for details on what is
> being offload, we thought that adding this would help.
> 
> The reason why we did not include the implementation of this interface
> is that it is for a new SoC which is still not fully available which is
> why we have not done the basic SwitchDev driver for it yet. But the
> basic functionality clearly needs to come first.
> 
> Our preference is to continue fixing the comments we got on the pure SW
> implementation and then get back to the SwitchDev offloading.
> 
> This will mean dropping the last 2 patches in the serie.
> 
> Does that work for you Jiri, and Nik?
> 
> /Allan
> 

Sounds good to me. Sorry I was unresponsive last week, but I was sick and
couldn't get to netdev@. I'll review the set today.

Cheers,
 Nik



Re: [PATCH] Revert "net: linkwatch: add check for netdevice being present to linkwatch_do_dev"

2020-09-18 Thread Nikolay Aleksandrov
On Mon, 2020-09-14 at 09:40 +0200, Geert Uytterhoeven wrote:
> Hi David,
> 
> CC bridge
> 
> On Sun, Sep 13, 2020 at 3:34 AM David Miller  wrote:
> > From: Geert Uytterhoeven 
> > Date: Sat, 12 Sep 2020 14:33:59 +0200
> > 
> > > "dev" is not the bridge device, but the physical Ethernet interface, which
> > > may already be suspended during s2ram.
> > 
> > Hmmm, ok.
> > 
> > Looking more deeply NETDEV_CHANGE causes br_port_carrier_check() to run 
> > which
> > exits early if netif_running() is false, which is going to be true if
> > netif_device_present() is false:
> > 
> > *notified = false;
> > if (!netif_running(br->dev))
> > return;
> > 
> > The only other work the bridge notifier does is:
> > 
> > if (event != NETDEV_UNREGISTER)
> > br_vlan_port_event(p, event);
> > 
> > and:
> > 
> > /* Events that may cause spanning tree to refresh */
> > if (!notified && (event == NETDEV_CHANGEADDR || event == NETDEV_UP 
> > ||
> >   event == NETDEV_CHANGE || event == NETDEV_DOWN))
> > br_ifinfo_notify(RTM_NEWLINK, NULL, p);
> > 
> > So some vlan stuff, and emitting a netlink message to any available
> > listeners.
> > 
> > Should we really do all of this for a device which is not even
> > present?
> > 
> > This whole situation seems completely illogical.  The device is
> > useless, it logically has no link or other state that can be managed
> > or used, while it is not present.
> > 
> > So all of these bridge operations should only happen when the device
> > transitions back to present again.
> 
> Thanks for your analysis!
> I'd like to defer this to the bridge people (CC).
> 
> Gr{oetje,eeting}s,
> 
> Geert
> 

Hi,
Sorry for the delay. Interesting problem. :)
Thanks for the analysis, I don't see any issues with checking if the device
isn't present. It will have to go through some testing, but no obvious
objections/issues. Have you tried if it fixes your case?
I have briefly gone over drivers' use of net_device_detach(), mostly it's used
for suspends, but there are a few cases which use it on IO error or when a
device is actually detaching (VF detach). The vlan port event is for vlan
devices on top of the bridge when BROPT_VLAN_BRIDGE_BINDING is enabled and their
carrier is changed based on vlan participating ports' state.

Thanks,
 Nik



Re: [PATCH RFC 2/7] bridge: cfm: Add BRIDGE_CFM to Kconfig.

2020-09-08 Thread Nikolay Aleksandrov
On Fri, 2020-09-04 at 09:15 +, Henrik Bjoernlund wrote:
> This makes it possible to include or exclude the CFM
> protocol according to 802.1Q section 12.14.
> 
> Signed-off-by: Henrik Bjoernlund  
> ---
>  net/bridge/Kconfig  | 11 +++
>  net/bridge/br_device.c  |  3 +++
>  net/bridge/br_private.h |  3 +++
>  3 files changed, 17 insertions(+)
> 
> diff --git a/net/bridge/Kconfig b/net/bridge/Kconfig
> index 80879196560c..3c8ded7d3e84 100644
> --- a/net/bridge/Kconfig
> +++ b/net/bridge/Kconfig
> @@ -73,3 +73,14 @@ config BRIDGE_MRP
> Say N to exclude this support and reduce the binary size.
>  
> If unsure, say N.
> +
> +config BRIDGE_CFM
> + bool "CFM protocol"
> + depends on BRIDGE
> + help
> +   If you say Y here, then the Ethernet bridge will be able to run CFM
> +   protocol according to 802.1Q section 12.14
> +
> +   Say N to exclude this support and reduce the binary size.
> +
> +   If unsure, say N.
> diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
> index a9232db03108..d12f5626a4b1 100644
> --- a/net/bridge/br_device.c
> +++ b/net/bridge/br_device.c
> @@ -476,6 +476,9 @@ void br_dev_setup(struct net_device *dev)
>   INIT_LIST_HEAD(>ftype_list);
>  #if IS_ENABLED(CONFIG_BRIDGE_MRP)
>   INIT_LIST_HEAD(>mrp_list);
> +#endif
> +#if IS_ENABLED(CONFIG_BRIDGE_CFM)
> + INIT_LIST_HEAD(>mep_list);
>  #endif
>   spin_lock_init(>hash_lock);
>  
> diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
> index e67c6d9e8bea..6294a3e51a33 100644
> --- a/net/bridge/br_private.h
> +++ b/net/bridge/br_private.h
> @@ -445,6 +445,9 @@ struct net_bridge {
>  #if IS_ENABLED(CONFIG_BRIDGE_MRP)
>   struct list_headmrp_list;
>  #endif
> +#if IS_ENABLED(CONFIG_BRIDGE_CFM)
> + struct list_headmep_list;
> +#endif
>  };
>  
>  struct br_input_skb_cb {

Looks good, perhaps also can use hlist to reduce the head size in net_bridge.



Re: [PATCH RFC 7/7] bridge: cfm: Bridge port remove.

2020-09-08 Thread Nikolay Aleksandrov
On Fri, 2020-09-04 at 09:15 +, Henrik Bjoernlund wrote:
> This is addition of CFM functionality to delete MEP instances
> on a port that is removed from the bridge.
> A MEP can only exist on a port that is related to a bridge.
> 
> Signed-off-by: Henrik Bjoernlund  
> ---
>  net/bridge/br_cfm.c | 13 +
>  net/bridge/br_if.c  |  1 +
>  net/bridge/br_private.h |  6 ++
>  3 files changed, 20 insertions(+)
> 
> diff --git a/net/bridge/br_cfm.c b/net/bridge/br_cfm.c
> index b7fed2c1d8ec..c724ce020ce3 100644
> --- a/net/bridge/br_cfm.c
> +++ b/net/bridge/br_cfm.c
> @@ -921,3 +921,16 @@ bool br_cfm_created(struct net_bridge *br)
>  {
>   return !list_empty(>mep_list);
>  }
> +
> +/* Deletes the CFM instances on a specific bridge port
> + * note: called under rtnl_lock
> + */
> +void br_cfm_port_del(struct net_bridge *br, struct net_bridge_port *port)
> +{
> + struct br_cfm_mep *mep;
> +
> + list_for_each_entry_rcu(mep, >mep_list, head,
> + lockdep_rtnl_is_held())

Use standard/non-rcu list traversing, rtnl is already held.

> + if (mep->create.ifindex == port->dev->ifindex)
> + mep_delete_implementation(br, mep);
> +}
> diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
> index a0e9a7937412..f7d2f472ae24 100644
> --- a/net/bridge/br_if.c
> +++ b/net/bridge/br_if.c
> @@ -334,6 +334,7 @@ static void del_nbp(struct net_bridge_port *p)
>   spin_unlock_bh(>lock);
>  
>   br_mrp_port_del(br, p);
> + br_cfm_port_del(br, p);
>  
>   br_ifinfo_notify(RTM_DELLINK, NULL, p);
>  
> diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
> index 53bcbdd21f34..5617255f0c0c 100644
> --- a/net/bridge/br_private.h
> +++ b/net/bridge/br_private.h
> @@ -1369,6 +1369,7 @@ int br_cfm_parse(struct net_bridge *br, struct 
> net_bridge_port *p,
>struct nlattr *attr, int cmd, struct netlink_ext_ack *extack);
>  int br_cfm_rx_frame_process(struct net_bridge_port *p, struct sk_buff *skb);
>  bool br_cfm_created(struct net_bridge *br);
> +void br_cfm_port_del(struct net_bridge *br, struct net_bridge_port *p);
>  int br_cfm_config_fill_info(struct sk_buff *skb, struct net_bridge *br);
>  int br_cfm_status_fill_info(struct sk_buff *skb,
>   struct net_bridge *br,
> @@ -1393,6 +1394,11 @@ static inline bool br_cfm_created(struct net_bridge 
> *br)
>   return false;
>  }
>  
> +static inline void br_cfm_port_del(struct net_bridge *br,
> +struct net_bridge_port *p)
> +{
> +}
> +
>  static inline int br_cfm_config_fill_info(struct sk_buff *skb, struct 
> net_bridge *br)
>  {
>   return -EOPNOTSUPP;



Re: [PATCH RFC 3/7] bridge: uapi: cfm: Added EtherType used by the CFM protocol.

2020-09-08 Thread Nikolay Aleksandrov
On Fri, 2020-09-04 at 09:15 +, Henrik Bjoernlund wrote:
> This EtherType is used by all CFM protocal frames transmitted
> according to 802.1Q section 12.14.
> 
> Signed-off-by: Henrik Bjoernlund  
> ---
>  include/uapi/linux/if_ether.h | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h
> index d6de2b167448..a0b637911d3c 100644
> --- a/include/uapi/linux/if_ether.h
> +++ b/include/uapi/linux/if_ether.h
> @@ -99,6 +99,7 @@
>  #define ETH_P_1588   0x88F7  /* IEEE 1588 Timesync */
>  #define ETH_P_NCSI   0x88F8  /* NCSI protocol*/
>  #define ETH_P_PRP0x88FB  /* IEC 62439-3 PRP/HSRv0*/
> +#define ETH_P_CFM0x8902  /* Connectivity Fault Management */
>  #define ETH_P_FCOE   0x8906  /* Fibre Channel over Ethernet  */
>  #define ETH_P_IBOE   0x8915  /* Infiniband over Ethernet */
>  #define ETH_P_TDLS   0x890D  /* TDLS */

Acked-by: Nikolay Aleksandrov 



Re: [PATCH RFC 4/7] bridge: cfm: Kernel space implementation of CFM.

2020-09-08 Thread Nikolay Aleksandrov
On Fri, 2020-09-04 at 09:15 +, Henrik Bjoernlund wrote:
> This is the implementation of the CFM protocol according to
> 802.1Q section 12.14.
> 
> Connectivity Fault Management (CFM) comprises capabilities for
> detecting, verifying, and isolating connectivity failures in
> Virtual Bridged Networks. These capabilities can be used in
> networks operated by multiple independent organizations, each
> with restricted management access to each other’s equipment.
> 
> CFM functions are partitioned as follows:
> - Path discovery
> - Fault detection
> - Fault verification and isolation
> - Fault notification
> - Fault recovery
> 
> Interface consists of these functions:
> br_cfm_mep_create()
> br_cfm_mep_delete()
> br_cfm_mep_config_set()
> br_cfm_mep_status_get()
> br_cfm_mep_counters_get()
> br_cfm_mep_counters_clear()
> br_cfm_cc_config_set()
> br_cfm_cc_peer_mep_add()
> br_cfm_cc_peer_mep_remove()
> br_cfm_cc_rdi_set()
> br_cfm_cc_ccm_tx()
> br_cfm_cc_status_get()
> br_cfm_cc_counters_get()
> br_cfm_cc_counters_clear()
> br_cfm_cc_peer_status_get()
> 
> A MEP instance is created by br_cfm_mep_create()
> -It is the Maintenance association End Point
>  described in 802.1Q section 19.2.
> -It is created on a specific level (1-7) and is assuring
>  that no CFM frames are passing through this MEP on lower levels.
> -It initiates and validates CFM frames on its level.
> -It can only exist on a port that is related to a bridge.
> -Attributes given cannot be changed until the instance is
>  deleted.
> 
> A MEP instance can be deleted by br_cfm_mep_delete().
> 
> A created MEP instance has attributes that can be
> configured by br_cfm_mep_config_set().
> 
> A MEP contain status and counter information that can be
> retrieved by br_cfm_mep_status_get() and
> br_cfm_mep_counters_get().
> 
> A MEP counters can be cleared by br_cfm_mep_counters_clear().
> 
> A MEP Continuity Check feature can be configured by
> br_cfm_cc_config_set()
> The Continuity Check Receiver state machine can be
> enabled and disabled.
> According to 802.1Q section 19.2.8
> 
> A MEP can have Peer MEPs added and removed by
> br_cfm_cc_peer_mep_add() and br_cfm_cc_peer_mep_remove()
> The Continuity Check feature can maintain connectivity
> status on each added Peer MEP.
> 
> A MEP can be configured to start or stop transmission of CCM frames by
> br_cfm_cc_ccm_tx()
> The CCM will be transmitted for a selected period in seconds.
> Must call this function before timeout to keep transmission alive.
> 
> A MEP transmitting CCM can be configured with inserted RDI in PDU by
> br_cfm_cc_rdi_set()
> 
> A MEP contain Continuity Check status and counter information
> that can be retrieved by br_cfm_cc_status_get() and
> br_cfm_cc_counters_get().
> 
> A MEP Continuity Check counters can be cleared
> by br_cfm_cc_counters_clear().
> 
> A MEP contain Peer MEP Continuity Check status information that
> can be retrieved by br_cfm_cc_peer_status_get().
> 
> Signed-off-by: Henrik Bjoernlund  
> ---
>  include/uapi/linux/cfm_bridge.h |  75 +++
>  net/bridge/Makefile |   2 +
>  net/bridge/br_cfm.c | 880 
>  net/bridge/br_private.h |  16 +
>  net/bridge/br_private_cfm.h | 242 +
>  5 files changed, 1215 insertions(+)
>  create mode 100644 include/uapi/linux/cfm_bridge.h
>  create mode 100644 net/bridge/br_cfm.c
>  create mode 100644 net/bridge/br_private_cfm.h
> 

This is a large single patch, do you think it can be broken down into pieces?
I'll review it like this now, but it would be much easier if it's in smaller
logical pieces.
In general are you sure there are no holes in the structs being assigned
directly? Since you use memcmp() and such, you could end up surprised. :)

> diff --git a/include/uapi/linux/cfm_bridge.h b/include/uapi/linux/cfm_bridge.h
> new file mode 100644
> index ..389ea1e1f68e
> --- /dev/null
> +++ b/include/uapi/linux/cfm_bridge.h
> @@ -0,0 +1,75 @@
> +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
> +
> +#ifndef _UAPI_LINUX_CFM_BRIDGE_H_
> +#define _UAPI_LINUX_CFM_BRIDGE_H_
> +
> +#include 
> +#include 
> +
> +#define ETHER_HEADER_LENGTH  (6+6+4+2)
> +#define CFM_MAID_LENGTH  48
> +#define CFM_CCM_PDU_LENGTH   75
> +#define CFM_PORT_STATUS_TLV_LENGTH   4
> +#define CFM_IF_STATUS_TLV_LENGTH 4
> +#define CFM_IF_STATUS_TLV_TYPE   4
> +#define CFM_PORT_STATUS_TLV_TYPE 2
> +#define CFM_ENDE_TLV_TYPE0
> +#define CFM_CCM_MAX_FRAME_LENGTH (ETHER_HEADER_LENGTH+\
> +  CFM_CCM_PDU_LENGTH+\
> +  CFM_PORT_STATUS_TLV_LENGTH+\
> +  CFM_IF_STATUS_TLV_LENGTH)
> +#define CFM_FRAME_PRIO   7
> +#define CFM_CCM_OPCODE   1
> +#define CFM_CCM_TLV_OFFSET   70

Re: [PATCH RFC 1/7] net: bridge: extend the process of special frames

2020-09-08 Thread Nikolay Aleksandrov
On Fri, 2020-09-04 at 09:15 +, Henrik Bjoernlund wrote:
> This patch extends the processing of frames in the bridge. Currently MRP
> frames needs special processing and the current implementation doesn't
> allow a nice way to process different frame types. Therefore try to
> improve this by adding a list that contains frame types that need
> special processing. This list is iterated for each input frame and if
> there is a match based on frame type then these functions will be called
> and decide what to do with the frame. It can process the frame then the
> bridge doesn't need to do anything or don't process so then the bridge
> will do normal forwarding.
> 
> Signed-off-by: Henrik Bjoernlund  
> ---
>  net/bridge/br_device.c  |  1 +
>  net/bridge/br_input.c   | 31 ++-
>  net/bridge/br_mrp.c | 19 +++
>  net/bridge/br_private.h | 18 --
>  4 files changed, 58 insertions(+), 11 deletions(-)
> 

Hi,
First I must say I do like this approach, thanks for generalizing it.
You can switch to a hlist so that there's just 1 pointer in the head.
I don't think you need list when you're walking only in one direction.
A few more minor comments below.

> diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
> index 9a2fb4aa1a10..a9232db03108 100644
> --- a/net/bridge/br_device.c
> +++ b/net/bridge/br_device.c
> @@ -473,6 +473,7 @@ void br_dev_setup(struct net_device *dev)
>   spin_lock_init(>lock);
>   INIT_LIST_HEAD(>port_list);
>   INIT_HLIST_HEAD(>fdb_list);
> + INIT_LIST_HEAD(>ftype_list);
>  #if IS_ENABLED(CONFIG_BRIDGE_MRP)
>   INIT_LIST_HEAD(>mrp_list);
>  #endif
> diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
> index 59a318b9f646..0f475b21094c 100644
> --- a/net/bridge/br_input.c
> +++ b/net/bridge/br_input.c
> @@ -254,6 +254,21 @@ static int nf_hook_bridge_pre(struct sk_buff *skb, 
> struct sk_buff **pskb)
>   return RX_HANDLER_CONSUMED;
>  }
>  
> +/* Return 0 if the frame was not processed otherwise 1
> + * note: already called with rcu_read_lock
> + */
> +static int br_process_frame_type(struct net_bridge_port *p,
> +  struct sk_buff *skb)
> +{
> + struct br_frame_type *tmp;
> +
> + list_for_each_entry_rcu(tmp, >br->ftype_list, list) {
> + if (unlikely(tmp->type == skb->protocol))
> + return tmp->func(p, skb);
> + }

Nit: you can drop the {}.

> + return 0;
> +}
> +
>  /*
>   * Return NULL if skb is handled
>   * note: already called with rcu_read_lock
> @@ -343,7 +358,7 @@ static rx_handler_result_t br_handle_frame(struct sk_buff 
> **pskb)
>   }
>   }
>  
> - if (unlikely(br_mrp_process(p, skb)))
> + if (unlikely(br_process_frame_type(p, skb)))
>   return RX_HANDLER_PASS;
>  
>  forward:
> @@ -380,3 +395,17 @@ rx_handler_func_t *br_get_rx_handler(const struct 
> net_device *dev)
>  
>   return br_handle_frame;
>  }
> +
> +void br_add_frame(struct net_bridge *br, struct br_frame_type *ft)
> +{
> + list_add_rcu(>list, >ftype_list);
> +}
> +
> +void br_del_frame(struct net_bridge *br, struct br_frame_type *ft)
> +{
> + struct br_frame_type *tmp;
> +
> + list_for_each_entry(tmp, >ftype_list, list)
> + if (ft == tmp)
> + list_del_rcu(>list);
> +}
> diff --git a/net/bridge/br_mrp.c b/net/bridge/br_mrp.c
> index b36689e6e7cb..0428e1785041 100644
> --- a/net/bridge/br_mrp.c
> +++ b/net/bridge/br_mrp.c
> @@ -6,6 +6,13 @@
>  static const u8 mrp_test_dmac[ETH_ALEN] = { 0x1, 0x15, 0x4e, 0x0, 0x0, 0x1 };
>  static const u8 mrp_in_test_dmac[ETH_ALEN] = { 0x1, 0x15, 0x4e, 0x0, 0x0, 
> 0x3 };
>  
> +static int br_mrp_process(struct net_bridge_port *p, struct sk_buff *skb);
> +
> +static struct br_frame_type mrp_frame_type __read_mostly = {
> + .type = cpu_to_be16(ETH_P_MRP),
> + .func = br_mrp_process,
> +};
> +
>  static bool br_mrp_is_ring_port(struct net_bridge_port *p_port,
>   struct net_bridge_port *s_port,
>   struct net_bridge_port *port)
> @@ -445,6 +452,9 @@ static void br_mrp_del_impl(struct net_bridge *br, struct 
> br_mrp *mrp)
>  
>   list_del_rcu(>list);
>   kfree_rcu(mrp, rcu);
> +
> + if (list_empty(>mrp_list))
> + br_del_frame(br, _frame_type);
>  }
>  
>  /* Adds a new MRP instance.
> @@ -493,6 +503,9 @@ int br_mrp_add(struct net_bridge *br, struct 
> br_mrp_instance *instance)
>   spin_unlock_bh(>lock);
>   rcu_assign_pointer(mrp->s_port, p);
>  
> + if (list_empty(>mrp_list))
> + br_add_frame(br, _frame_type);
> +
>   INIT_DELAYED_WORK(>test_work, br_mrp_test_work_expired);
>   INIT_DELAYED_WORK(>in_test_work, br_mrp_in_test_work_expired);
>   list_add_tail_rcu(>list, >mrp_list);
> @@ -1172,15 +1185,13 @@ static int br_mrp_rcv(struct net_bridge_port *p,
>   * normal forwarding.
>   * note: already called with 

Re: [PATCH RFC 5/7] bridge: cfm: Netlink Interface.

2020-09-08 Thread Nikolay Aleksandrov
On Fri, 2020-09-04 at 09:15 +, Henrik Bjoernlund wrote:
> This is the implementation of CFM netlink configuration
> and status information interface.
> 
> Add new nested netlink attributes. These attributes are used by the
> user space to create/delete/configure CFM instances and get status.
> Also they are used by the kernel to notify the user space when changes
> in any status happens.
[snip]
>   __u64 transition_fwd;
> diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
> index 9b814c92de12..fdd408f6a5d2 100644
> --- a/include/uapi/linux/rtnetlink.h
> +++ b/include/uapi/linux/rtnetlink.h
> @@ -779,6 +779,8 @@ enum {
>  #define RTEXT_FILTER_BRVLAN_COMPRESSED   (1 << 2)
>  #define  RTEXT_FILTER_SKIP_STATS (1 << 3)
>  #define RTEXT_FILTER_MRP (1 << 4)
> +#define RTEXT_FILTER_CFM_CONFIG  (1 << 5)
> +#define RTEXT_FILTER_CFM_STATUS  (1 << 6)
>  
>  /* End of information exported to user level */
>  
> diff --git a/net/bridge/Makefile b/net/bridge/Makefile
> index ddc0a9192348..4702702a74d3 100644
> --- a/net/bridge/Makefile
> +++ b/net/bridge/Makefile
> @@ -28,4 +28,4 @@ obj-$(CONFIG_NETFILTER) += netfilter/
>  
>  bridge-$(CONFIG_BRIDGE_MRP)  += br_mrp_switchdev.o br_mrp.o br_mrp_netlink.o
>  
> -bridge-$(CONFIG_BRIDGE_CFM)  += br_cfm.o
> +bridge-$(CONFIG_BRIDGE_CFM)  += br_cfm.o br_cfm_netlink.o
> diff --git a/net/bridge/br_cfm_netlink.c b/net/bridge/br_cfm_netlink.c
> new file mode 100644
> index ..4e39aab1cd0b
> --- /dev/null
> +++ b/net/bridge/br_cfm_netlink.c
> @@ -0,0 +1,684 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +
> +#include 
> +
> +#include "br_private.h"
> +#include "br_private_cfm.h"
> +
> +static inline struct mac_addr nla_get_mac(const struct nlattr *nla)
> +{
> + struct mac_addr mac;
> +
> + nla_memcpy(, nla, sizeof(mac.addr));
> +
> + return mac;
> +}
> +
> +static inline struct br_cfm_maid nla_get_maid(const struct nlattr *nla)
> +{
> + struct br_cfm_maid maid;
> +
> + nla_memcpy(, nla, sizeof(maid.data));
> +
> + return maid;
> +}

IMO, these 1-line helpers don't really help readability.

> +
> +static inline int nla_put_u64(struct sk_buff *skb, int attrtype, u64 value)
> +{
> + u64 tmp = value;
> +
> + return nla_put(skb, attrtype, sizeof(u64), );
> +}

What?! Read include/net/netlink.h

> +
> +static const struct nla_policy
> +br_cfm_policy[IFLA_BRIDGE_CFM_MAX + 1] = {
> + [IFLA_BRIDGE_CFM_UNSPEC]= { .type = NLA_REJECT },
> + [IFLA_BRIDGE_CFM_MEP_CREATE]= { .type = NLA_NESTED },
> + [IFLA_BRIDGE_CFM_MEP_DELETE]= { .type = NLA_NESTED },
> + [IFLA_BRIDGE_CFM_MEP_CONFIG]= { .type = NLA_NESTED },
> + [IFLA_BRIDGE_CFM_CC_CONFIG] = { .type = NLA_NESTED },
> + [IFLA_BRIDGE_CFM_CC_PEER_MEP_ADD]   = { .type = NLA_NESTED },
> + [IFLA_BRIDGE_CFM_CC_PEER_MEP_REMOVE]= { .type = NLA_NESTED },
> + [IFLA_BRIDGE_CFM_CC_RDI]= { .type = NLA_NESTED },
> + [IFLA_BRIDGE_CFM_CC_CCM_TX] = { .type = NLA_NESTED },
> +};
> +
> +static const struct nla_policy
> +br_cfm_mep_create_policy[IFLA_BRIDGE_CFM_MEP_CREATE_MAX + 1] = {
> + [IFLA_BRIDGE_CFM_MEP_CREATE_UNSPEC] = { .type = NLA_REJECT },
> + [IFLA_BRIDGE_CFM_MEP_CREATE_INSTANCE]   = { .type = NLA_U32 },
> + [IFLA_BRIDGE_CFM_MEP_CREATE_DOMAIN] = { .type = NLA_U32 },
> + [IFLA_BRIDGE_CFM_MEP_CREATE_DIRECTION]  = { .type = NLA_U32 },
> + [IFLA_BRIDGE_CFM_MEP_CREATE_IFINDEX]= { .type = NLA_U32 },
> +};
> +
> +static const struct nla_policy
> +br_cfm_mep_delete_policy[IFLA_BRIDGE_CFM_MEP_DELETE_MAX + 1] = {
> + [IFLA_BRIDGE_CFM_MEP_DELETE_UNSPEC] = { .type = NLA_REJECT },
> + [IFLA_BRIDGE_CFM_MEP_DELETE_INSTANCE]   = { .type = NLA_U32 },
> +};
> +
> +static const struct nla_policy
> +br_cfm_mep_config_policy[IFLA_BRIDGE_CFM_MEP_CONFIG_MAX + 1] = {
> + [IFLA_BRIDGE_CFM_MEP_CONFIG_UNSPEC] = { .type = NLA_REJECT 
> },
> + [IFLA_BRIDGE_CFM_MEP_CONFIG_INSTANCE]   = { .type = NLA_U32 },
> + [IFLA_BRIDGE_CFM_MEP_CONFIG_UNICAST_MAC]= NLA_POLICY_ETH_ADDR,
> + [IFLA_BRIDGE_CFM_MEP_CONFIG_MDLEVEL]= { .type = NLA_U32 },
> + [IFLA_BRIDGE_CFM_MEP_CONFIG_MEPID]  = { .type = NLA_U32 },
> +};
> +
> +static const struct nla_policy
> +br_cfm_cc_config_policy[IFLA_BRIDGE_CFM_CC_CONFIG_MAX + 1] = {
> + [IFLA_BRIDGE_CFM_CC_CONFIG_UNSPEC]  = { .type = NLA_REJECT 
> },
> + [IFLA_BRIDGE_CFM_CC_CONFIG_INSTANCE]= { .type = NLA_U32 },
> + [IFLA_BRIDGE_CFM_CC_CONFIG_ENABLE]  = { .type = NLA_U32 },
> + [IFLA_BRIDGE_CFM_CC_CONFIG_EXP_INTERVAL]= { .type = NLA_U32 },
> + [IFLA_BRIDGE_CFM_CC_CONFIG_EXP_MAID]= {
> + .type = NLA_BINARY, .len = CFM_MAID_LENGTH },
> +};
> +
> +static const struct nla_policy
> +br_cfm_cc_peer_mep_policy[IFLA_BRIDGE_CFM_CC_PEER_MEP_MAX + 1] = {
> 

Re: [PATCH RFC 6/7] bridge: cfm: Netlink Notifications.

2020-09-08 Thread Nikolay Aleksandrov
On Fri, 2020-09-04 at 09:15 +, Henrik Bjoernlund wrote:
> This is the implementation of Netlink notifications out of CFM.
> 
> Notifications are initiated whenever a state change happens in CFM.
> 
[snip]
> @@ -445,6 +458,7 @@ static int br_cfm_frame_rx(struct net_bridge_port *port, 
> struct sk_buff *skb)
>   peer_mep->cc_status.ccm_defect = false;
>  
>   /* Change in CCM defect status - notify */
> + br_cfm_notify(RTM_NEWLINK, port);
>  
>   /* Start CCM RX timer */
>   ccm_rx_timer_start(peer_mep);
> @@ -874,6 +888,35 @@ int br_cfm_cc_counters_clear(struct net_bridge *br, 
> const u32 instance,
>   return 0;
>  }
>  
> +int br_cfm_mep_count(struct net_bridge *br, u32 *count)
> +{
> + struct br_cfm_mep *mep;

Leave a blank line between local variable definitions and code.

> + *count = 0;
> +
> + rcu_read_lock();
> + list_for_each_entry_rcu(mep, >mep_list, head)
> + * count += 1;

please remove the extra space

> + rcu_read_unlock();
> +
> + return 0;
> +}
> +
> +int br_cfm_peer_mep_count(struct net_bridge *br, u32 *count)
> +{
> + struct br_cfm_peer_mep *peer_mep;
> + struct br_cfm_mep *mep;

Leave a blank line between local variable definitions and code.

> + *count = 0;
> +
> + rcu_read_lock();
> + list_for_each_entry_rcu(mep, >mep_list, head) {
> + list_for_each_entry_rcu(peer_mep, >peer_mep_list, head)
> + * count += 1;

please remove the extra space

> + }
> + rcu_read_unlock();
> +
> + return 0;
> +}
> +
>  bool br_cfm_created(struct net_bridge *br)
>  {
>   return !list_empty(>mep_list);
> diff --git a/net/bridge/br_cfm_netlink.c b/net/bridge/br_cfm_netlink.c
> index 4e39aab1cd0b..13664ac8608a 100644
> --- a/net/bridge/br_cfm_netlink.c
> +++ b/net/bridge/br_cfm_netlink.c
> @@ -582,7 +582,9 @@ int br_cfm_config_fill_info(struct sk_buff *skb, struct 
> net_bridge *br)
>   return -EMSGSIZE;
>  }
>  
> -int br_cfm_status_fill_info(struct sk_buff *skb, struct net_bridge *br)
> +int br_cfm_status_fill_info(struct sk_buff *skb,
> + struct net_bridge *br,
> + bool getlink)
>  {
>   struct nlattr *tb, *cfm_tb;
>   struct br_cfm_mep *mep;
> @@ -613,10 +615,12 @@ int br_cfm_status_fill_info(struct sk_buff *skb, struct 
> net_bridge *br)
>   mep->status.rx_level_low_seen))
>   goto nla_put_failure;
>  
> - /* Clear all 'seen' indications */
> - mep->status.opcode_unexp_seen = false;
> - mep->status.version_unexp_seen = false;
> - mep->status.rx_level_low_seen = false;
> + if (getlink) { /* Only clear if this is a GETLINK */
> + /* Clear all 'seen' indications */
> + mep->status.opcode_unexp_seen = false;
> + mep->status.version_unexp_seen = false;
> + mep->status.rx_level_low_seen = false;
> + }
>  
>   nla_nest_end(skb, tb);
>  
> @@ -662,10 +666,12 @@ int br_cfm_status_fill_info(struct sk_buff *skb, struct 
> net_bridge *br)
>   peer_mep->cc_status.seq_unexp_seen))
>   goto nla_put_failure;
>  
> - /* Clear all 'seen' indications */
> - peer_mep->cc_status.seen = false;
> - peer_mep->cc_status.tlv_seen = false;
> - peer_mep->cc_status.seq_unexp_seen = false;
> + if (getlink) { /* Only clear if this is a GETLINK */
> + /* Clear all 'seen' indications */
> + peer_mep->cc_status.seen = false;
> + peer_mep->cc_status.tlv_seen = false;
> + peer_mep->cc_status.seq_unexp_seen = false;

Why clear these on GETLINK? This sounds like it should be a set op.

> + }
>  
>   nla_nest_end(skb, tb);
>   }
> diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
> index 6de5cb1295f6..f2e885521f4f 100644
> --- a/net/bridge/br_netlink.c
> +++ b/net/bridge/br_netlink.c
> @@ -94,9 +94,11 @@ static size_t br_get_link_af_size_filtered(const struct 
> net_device *dev,
>  {
>   struct net_bridge_vlan_group *vg = NULL;
>   struct net_bridge_port *p = NULL;
> - struct net_bridge *br;
> - int num_vlan_infos;
> + struct net_bridge *br = NULL;
> + u32 num_cfm_peer_mep_infos;
> + u32 num_cfm_mep_infos;
>   size_t vinfo_sz = 0;
> + int num_vlan_infos;
>  
>   rcu_read_lock();
>   if (netif_is_bridge_port(dev)) {
> @@ -115,6 +117,52 @@ static size_t br_get_link_af_size_filtered(const struct 
> net_device *dev,
>   /* Each VLAN is returned in bridge_vlan_info along with flags */
>   

Re: [PATCH RFC 0/7] net: bridge: cfm: Add support for Connectivity Fault Management(CFM)

2020-09-07 Thread Nikolay Aleksandrov
On Sun, 2020-09-06 at 20:21 +0200, Horatiu Vultur wrote:
> The 09/04/2020 15:44, Stephen Hemminger wrote:
> > On Fri, 4 Sep 2020 09:15:20 +
> > Henrik Bjoernlund  wrote:
> > 
> > > Connectivity Fault Management (CFM) is defined in 802.1Q section 12.14.
> > > 
> > > 
[snip]
> > > Currently this 'cfm' and 'cfm_server' programs are standalone placed in a
> > > cfm repository https://github.com/microchip-ung/cfm but it is considered
> > > to integrate this into 'iproute2'.
> > > 
> > > Reviewed-by: Horatiu Vultur  
> > > Signed-off-by: Henrik Bjoernlund  
> 
> Hi Stephen,
> 
> > Could this be done in userspace? It is a control plane protocol.
> > Could it be done by using eBPF?
> 
> I might be able to answer this. We have not considered this approach of
> using eBPF. Because we want actually to push this in HW extending
> switchdev API. I know that this series doesn't cover the switchdev part
> but we posted like this because we wanted to get some feedback from
> community. We had a similar approach for MRP, where we extended the
> bridge and switchdev API, so we tought that is the way to go forward.
> 
> Regarding eBPF, I can't say that it would work or not because I lack
> knowledge in this.
> 
> > Adding more code in bridge impacts a large number of users of Linux distros.
> > It creates bloat and potential security vulnerabilities.

Hi,
I also had the same initial thought - this really doesn't seem to affect the
bridge in any way, it's only collecting and transmitting information. I get
that you'd like to use the bridge as a passthrough device to switchdev to
program your hw, could you share what would be offloaded more specifically ?

All you do - snooping and blocking these packets can easily be done from user-
space with the help of ebtables, but since we need to have a software
implementation/fallback of anything being offloaded via switchdev we might need
this after all, I'd just prefer to push as much as possible to user-space.

I plan to review the individual patches tomorrow.

Thanks,
 Nik



Re: general protection fault in fib_check_nexthop

2020-08-22 Thread Nikolay Aleksandrov

On 8/21/20 9:10 AM, syzbot wrote:

Hello,

syzbot found the following issue on:

HEAD commit:18445bf4 Merge tag 'spi-fix-v5.9-rc1' of git://git.kernel...
git tree:   upstream
console output: https://syzkaller.appspot.com/x/log.txt?x=162cbd7a90
kernel config:  https://syzkaller.appspot.com/x/.config?x=a0437fdd630bee11
dashboard link: https://syzkaller.appspot.com/bug?extid=55a3e617aaf04b962a3e
compiler:   gcc (GCC) 10.1.0-syz 20200507
userspace arch: i386



#syz dup: general protection fault in fib_dump_info (2)



Re: general protection fault in fib_dump_info (2)

2020-08-21 Thread Nikolay Aleksandrov

On 8/21/20 6:27 PM, syzbot wrote:

Hello,

syzbot found the following issue on:

HEAD commit:da2968ff Merge tag 'pci-v5.9-fixes-1' of git://git.kernel...
git tree:   upstream
console output: https://syzkaller.appspot.com/x/log.txt?x=137316ca90
kernel config:  https://syzkaller.appspot.com/x/.config?x=a0437fdd630bee11
dashboard link: https://syzkaller.appspot.com/bug?extid=a61aa19b0c14c8770bd9
compiler:   gcc (GCC) 10.1.0-syz 20200507
userspace arch: i386
syz repro:  https://syzkaller.appspot.com/x/repro.syz?x=1270705190
C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=1150a04690

The issue was bisected to:

commit 0b5e2e39739e861fa5fc84ab27a35dbe62a15330
Author: David Ahern 
Date:   Tue May 26 18:56:16 2020 +

 nexthop: Expand nexthop_is_multipath in a few places



This seems like a much older bug to me, the code allows to pass 0 groups and
thus we end up without any nh_grp_entry pointers. I reproduced it with a
modified iproute2 that sends an empty NHA_GROUP and then just uses the new
nexthop in any way (e.g. add a route with it). This is the same bug as the
earlier report for: "general protection fault in fib_check_nexthop"

I have a patch but I'll be able to send it tomorrow.

Cheers,
 Nik


Re: [PATCH v5 1/2] net: dsa: Add protocol support for 802.1AD when adding or deleting vlan for dsa switch port

2020-08-07 Thread Nikolay Aleksandrov
On 07/08/2020 14:13, hongbo.w...@nxp.com wrote:
> From: "hongbo.wang" 
> 
> the following command will be supported:
> 
> Set bridge's vlan protocol:
> ip link set br0 type bridge vlan_protocol 802.1ad
> Add VLAN:
> ip link add link swp1 name swp1.100 type vlan protocol 802.1ad id 100
> Delete VLAN:
> ip link del link swp1 name swp1.100
> 
> Signed-off-by: hongbo.wang 
> ---
>  include/net/switchdev.h   |  1 +
>  net/bridge/br_switchdev.c | 22 
>  net/dsa/dsa_priv.h|  4 +--
>  net/dsa/port.c|  6 +++--
>  net/dsa/slave.c   | 53 ++-
>  net/dsa/tag_8021q.c   |  4 +--
>  6 files changed, 66 insertions(+), 24 deletions(-)
> 

Hi,
Please put the bridge changes in a separate patch with proper description.
Reviewers would easily miss these bridge changes. Also I believe net-next
is currently closed and that's where these patches should be targeted (i.e.
have net-next after PATCH in the subject). Few more comments below.

Thanks,
 Nik

> diff --git a/include/net/switchdev.h b/include/net/switchdev.h
> index ff2246914301..7594ea82879f 100644
> --- a/include/net/switchdev.h
> +++ b/include/net/switchdev.h
> @@ -97,6 +97,7 @@ struct switchdev_obj_port_vlan {
>   u16 flags;
>   u16 vid_begin;
>   u16 vid_end;
> + u16 proto;
>  };
>  
>  #define SWITCHDEV_OBJ_PORT_VLAN(OBJ) \
> diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
> index 015209bf44aa..bcfa00d6d5eb 100644
> --- a/net/bridge/br_switchdev.c
> +++ b/net/bridge/br_switchdev.c
> @@ -146,6 +146,26 @@ br_switchdev_fdb_notify(const struct 
> net_bridge_fdb_entry *fdb, int type)
>   }
>  }
>  
> +static u16 br_switchdev_get_bridge_vlan_proto(struct net_device *dev)

const

> +{
> + u16 vlan_proto = ETH_P_8021Q;
> + struct net_device *br = NULL;
> + struct net_bridge_port *p;
> +
> + if (netif_is_bridge_master(dev)) {
> + br = dev;
> + } else if (netif_is_bridge_port(dev)) {

You can use br_port_get_rtnl_rcu() and just check if p is not NULL.
But in general these helpers are used only on bridge devices, I don't think you
can reach them with a device that's not either a bridge or a port. So you can 
just
check if it's a bridge master else it's a port.

> + p = br_port_get_rcu(dev);
> + if (p && p->br)

No need to check for p->br, it always exists.

> + br = p->br->dev;
> + }
> +
> + if (br)
> + br_vlan_get_proto(br, _proto);
> +
> + return vlan_proto;
> +}
> +
>  int br_switchdev_port_vlan_add(struct net_device *dev, u16 vid, u16 flags,
>  struct netlink_ext_ack *extack)
>  {
> @@ -157,6 +177,7 @@ int br_switchdev_port_vlan_add(struct net_device *dev, 
> u16 vid, u16 flags,
>   .vid_end = vid,
>   };
>  
> + v.proto = br_switchdev_get_bridge_vlan_proto(dev);
>   return switchdev_port_obj_add(dev, , extack);
>  }
>  
> @@ -169,5 +190,6 @@ int br_switchdev_port_vlan_del(struct net_device *dev, 
> u16 vid)
>   .vid_end = vid,
>   };
>  
> + v.proto = br_switchdev_get_bridge_vlan_proto(dev);
>   return switchdev_port_obj_del(dev, );
>  }


Re: [PATCH 1/2] net: dsa: Add flag for 802.1AD when adding VLAN for dsa switch and port

2020-07-20 Thread Nikolay Aleksandrov
On 20/07/2020 13:41, hongbo.w...@nxp.com wrote:
> From: "hongbo.wang" 
> 
> the following command can be supported:
> ip link add link swp1 name swp1.100 type vlan protocol 802.1ad id 100
> 
> Signed-off-by: hongbo.wang 
> ---
>  include/uapi/linux/if_bridge.h | 1 +
>  net/dsa/slave.c| 9 +++--
>  2 files changed, 8 insertions(+), 2 deletions(-)
> 

This is not bridge related at all, please leave its flags out of it.

Nacked-by: Nikolay Aleksandrov 



> diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
> index caa6914a3e53..ecd960aa65c7 100644
> --- a/include/uapi/linux/if_bridge.h
> +++ b/include/uapi/linux/if_bridge.h
> @@ -132,6 +132,7 @@ enum {
>  #define BRIDGE_VLAN_INFO_RANGE_END   (1<<4) /* VLAN is end of vlan range */
>  #define BRIDGE_VLAN_INFO_BRENTRY (1<<5) /* Global bridge VLAN entry */
>  #define BRIDGE_VLAN_INFO_ONLY_OPTS   (1<<6) /* Skip create/delete/flags */
> +#define BRIDGE_VLAN_INFO_8021AD  (1<<7) /* VLAN is 802.1AD protocol */
>  
>  struct bridge_vlan_info {
>   __u16 flags;
> diff --git a/net/dsa/slave.c b/net/dsa/slave.c
> index 4c7f086a047b..376d7ac5f1e5 100644
> --- a/net/dsa/slave.c
> +++ b/net/dsa/slave.c
> @@ -1232,6 +1232,7 @@ static int dsa_slave_get_ts_info(struct net_device *dev,
>  static int dsa_slave_vlan_rx_add_vid(struct net_device *dev, __be16 proto,
>u16 vid)
>  {
> + u16 flags = 0;
>   struct dsa_port *dp = dsa_slave_to_port(dev);
>   struct bridge_vlan_info info;
>   int ret;
> @@ -1252,7 +1253,10 @@ static int dsa_slave_vlan_rx_add_vid(struct net_device 
> *dev, __be16 proto,
>   return -EBUSY;
>   }
>  
> - ret = dsa_port_vid_add(dp, vid, 0);
> + if (ntohs(proto) == ETH_P_8021AD)
> + flags |= BRIDGE_VLAN_INFO_8021AD;
> +
> + ret = dsa_port_vid_add(dp, vid, flags);
>   if (ret)
>   return ret;
>  
> @@ -1744,7 +1748,8 @@ int dsa_slave_create(struct dsa_port *port)
>  
>   slave_dev->features = master->vlan_features | NETIF_F_HW_TC;
>   if (ds->ops->port_vlan_add && ds->ops->port_vlan_del)
> - slave_dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
> + slave_dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER |
> +NETIF_F_HW_VLAN_STAG_FILTER;
>   slave_dev->hw_features |= NETIF_F_HW_TC;
>   slave_dev->features |= NETIF_F_LLTX;
>   slave_dev->ethtool_ops = _slave_ethtool_ops;
> 



Re: [PATCH net-next v4 12/12] net: bridge: Add port attribute IFLA_BRPORT_MRP_IN_OPEN

2020-07-14 Thread Nikolay Aleksandrov
On 14/07/2020 18:07, Horatiu Vultur wrote:
> The 07/14/2020 16:29, Nikolay Aleksandrov wrote:
>> EXTERNAL EMAIL: Do not click links or open attachments unless you know the 
>> content is safe
>>
>> On 14/07/2020 10:34, Horatiu Vultur wrote:
>>> This patch adds a new port attribute, IFLA_BRPORT_MRP_IN_OPEN, which
>>> allows to notify the userspace when the node lost the contiuity of
>>> MRP_InTest frames.
>>>
>>> Signed-off-by: Horatiu Vultur 
>>> ---
>>>  include/uapi/linux/if_link.h   | 1 +
>>>  net/bridge/br_netlink.c| 3 +++
>>>  tools/include/uapi/linux/if_link.h | 1 +
>>>  3 files changed, 5 insertions(+)
>>>
> 
> Hi Nik,
> 
>>
>> It's kind of late by now, but I'd wish these were contained in a nested MRP 
>> attribute. :)
>> Horatiu, do you expect to have many more MRP attributes outside of MRP 
>> netlink code?
> 
> I don't expect to add any other MRP attributes outside of MRP netlink
> code.
> 
>>
>> Perhaps we should at least dump them only for MRP-aware ports, that should 
>> be easy.
>> They make no sense outside of MRP anyway, but increase the size of the dump 
>> for all
>> right now.
> 
> You are right. Then should I first send a fix on the net for this and
> after that I will fix these patches or just fix this in the next patch
> series?
> 

IMO it's more of an improvement rather than a bug, but since you don't expect 
to have more
attributes outside of MRP's netlink I guess we can drop it for now. Up to you.

It definitely shouldn't block this patch-set.

>>
>> Acked-by: Nikolay Aleksandrov 
>>
>>> diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
>>> index cc185a007ade8..26842ffd0501d 100644
>>> --- a/include/uapi/linux/if_link.h
>>> +++ b/include/uapi/linux/if_link.h
>>> @@ -344,6 +344,7 @@ enum {
>>>   IFLA_BRPORT_ISOLATED,
>>>   IFLA_BRPORT_BACKUP_PORT,
>>>   IFLA_BRPORT_MRP_RING_OPEN,
>>> + IFLA_BRPORT_MRP_IN_OPEN,
>>>   __IFLA_BRPORT_MAX
>>>  };
>>>  #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
>>> diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
>>> index c532fa65c9834..147d52596e174 100644
>>> --- a/net/bridge/br_netlink.c
>>> +++ b/net/bridge/br_netlink.c
>>> @@ -152,6 +152,7 @@ static inline size_t br_port_info_size(void)
>>>  #endif
>>>   + nla_total_size(sizeof(u16))   /* IFLA_BRPORT_GROUP_FWD_MASK 
>>> */
>>>   + nla_total_size(sizeof(u8))/* IFLA_BRPORT_MRP_RING_OPEN 
>>> */
>>> + + nla_total_size(sizeof(u8))/* IFLA_BRPORT_MRP_IN_OPEN */
>>>   + 0;
>>>  }
>>>
>>> @@ -216,6 +217,8 @@ static int br_port_fill_attrs(struct sk_buff *skb,
>>>  !!(p->flags & BR_NEIGH_SUPPRESS)) ||
>>>   nla_put_u8(skb, IFLA_BRPORT_MRP_RING_OPEN, !!(p->flags &
>>> BR_MRP_LOST_CONT)) 
>>> ||
>>> + nla_put_u8(skb, IFLA_BRPORT_MRP_IN_OPEN,
>>> +!!(p->flags & BR_MRP_LOST_IN_CONT)) ||
>>>   nla_put_u8(skb, IFLA_BRPORT_ISOLATED, !!(p->flags & BR_ISOLATED)))
>>>   return -EMSGSIZE;
>>>
>>> diff --git a/tools/include/uapi/linux/if_link.h 
>>> b/tools/include/uapi/linux/if_link.h
>>> index cafedbbfefbe9..781e482dc499f 100644
>>> --- a/tools/include/uapi/linux/if_link.h
>>> +++ b/tools/include/uapi/linux/if_link.h
>>> @@ -344,6 +344,7 @@ enum {
>>>   IFLA_BRPORT_ISOLATED,
>>>   IFLA_BRPORT_BACKUP_PORT,
>>>   IFLA_BRPORT_MRP_RING_OPEN,
>>> + IFLA_BRPORT_MRP_IN_OPEN,
>>>   __IFLA_BRPORT_MAX
>>>  };
>>>  #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
>>>
>>
> 



Re: [PATCH net-next v4 01/12] switchdev: mrp: Extend switchdev API for MRP Interconnect

2020-07-14 Thread Nikolay Aleksandrov
On 14/07/2020 10:34, Horatiu Vultur wrote:
> Extend switchdev API to add support for MRP interconnect. The HW is
> notified in the following cases:
> 
> SWITCHDEV_OBJ_ID_IN_ROLE_MRP: This is used when the interconnect role
>   of the node changes. The supported roles are MIM and MIC.
> 
> SWITCHDEV_OBJ_ID_IN_STATE_MRP: This is used when the interconnect ring
>   changes it states to open or closed.
> 
> SWITCHDEV_OBJ_ID_IN_TEST_MRP: This is used to start/stop sending
>   MRP_InTest frames on all MRP ports. This is called only on nodes that
>   have the interconnect role MIM.
> 
> Signed-off-by: Horatiu Vultur 
> ---
>  include/net/switchdev.h | 38 ++
>  1 file changed, 38 insertions(+)
> 

Reviewed-by: Nikolay Aleksandrov 

> diff --git a/include/net/switchdev.h b/include/net/switchdev.h
> index b8c059b4e06d9..ff22469143013 100644
> --- a/include/net/switchdev.h
> +++ b/include/net/switchdev.h
> @@ -76,6 +76,10 @@ enum switchdev_obj_id {
>   SWITCHDEV_OBJ_ID_RING_TEST_MRP,
>   SWITCHDEV_OBJ_ID_RING_ROLE_MRP,
>   SWITCHDEV_OBJ_ID_RING_STATE_MRP,
> + SWITCHDEV_OBJ_ID_IN_TEST_MRP,
> + SWITCHDEV_OBJ_ID_IN_ROLE_MRP,
> + SWITCHDEV_OBJ_ID_IN_STATE_MRP,
> +
>  #endif
>  };
>  
> @@ -155,6 +159,40 @@ struct switchdev_obj_ring_state_mrp {
>  #define SWITCHDEV_OBJ_RING_STATE_MRP(OBJ) \
>   container_of((OBJ), struct switchdev_obj_ring_state_mrp, obj)
>  
> +/* SWITCHDEV_OBJ_ID_IN_TEST_MRP */
> +struct switchdev_obj_in_test_mrp {
> + struct switchdev_obj obj;
> + /* The value is in us and a value of 0 represents to stop */
> + u32 interval;
> + u32 in_id;
> + u32 period;
> + u8 max_miss;
> +};
> +
> +#define SWITCHDEV_OBJ_IN_TEST_MRP(OBJ) \
> + container_of((OBJ), struct switchdev_obj_in_test_mrp, obj)
> +
> +/* SWICHDEV_OBJ_ID_IN_ROLE_MRP */
> +struct switchdev_obj_in_role_mrp {
> + struct switchdev_obj obj;
> + struct net_device *i_port;
> + u32 ring_id;
> + u16 in_id;
> + u8 in_role;
> +};
> +
> +#define SWITCHDEV_OBJ_IN_ROLE_MRP(OBJ) \
> + container_of((OBJ), struct switchdev_obj_in_role_mrp, obj)
> +
> +struct switchdev_obj_in_state_mrp {
> + struct switchdev_obj obj;
> + u32 in_id;
> + u8 in_state;
> +};
> +
> +#define SWITCHDEV_OBJ_IN_STATE_MRP(OBJ) \
> + container_of((OBJ), struct switchdev_obj_in_state_mrp, obj)
> +
>  #endif
>  
>  typedef int switchdev_obj_dump_cb_t(struct switchdev_obj *obj);
> 



Re: [PATCH net-next v4 12/12] net: bridge: Add port attribute IFLA_BRPORT_MRP_IN_OPEN

2020-07-14 Thread Nikolay Aleksandrov
On 14/07/2020 10:34, Horatiu Vultur wrote:
> This patch adds a new port attribute, IFLA_BRPORT_MRP_IN_OPEN, which
> allows to notify the userspace when the node lost the contiuity of
> MRP_InTest frames.
> 
> Signed-off-by: Horatiu Vultur 
> ---
>  include/uapi/linux/if_link.h   | 1 +
>  net/bridge/br_netlink.c| 3 +++
>  tools/include/uapi/linux/if_link.h | 1 +
>  3 files changed, 5 insertions(+)
> 

It's kind of late by now, but I'd wish these were contained in a nested MRP 
attribute. :)
Horatiu, do you expect to have many more MRP attributes outside of MRP netlink 
code?

Perhaps we should at least dump them only for MRP-aware ports, that should be 
easy.
They make no sense outside of MRP anyway, but increase the size of the dump for 
all
right now.

Acked-by: Nikolay Aleksandrov 

> diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
> index cc185a007ade8..26842ffd0501d 100644
> --- a/include/uapi/linux/if_link.h
> +++ b/include/uapi/linux/if_link.h
> @@ -344,6 +344,7 @@ enum {
>   IFLA_BRPORT_ISOLATED,
>   IFLA_BRPORT_BACKUP_PORT,
>   IFLA_BRPORT_MRP_RING_OPEN,
> + IFLA_BRPORT_MRP_IN_OPEN,
>   __IFLA_BRPORT_MAX
>  };
>  #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
> diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
> index c532fa65c9834..147d52596e174 100644
> --- a/net/bridge/br_netlink.c
> +++ b/net/bridge/br_netlink.c
> @@ -152,6 +152,7 @@ static inline size_t br_port_info_size(void)
>  #endif
>   + nla_total_size(sizeof(u16))   /* IFLA_BRPORT_GROUP_FWD_MASK */
>   + nla_total_size(sizeof(u8))/* IFLA_BRPORT_MRP_RING_OPEN */
> + + nla_total_size(sizeof(u8))/* IFLA_BRPORT_MRP_IN_OPEN */
>   + 0;
>  }
>  
> @@ -216,6 +217,8 @@ static int br_port_fill_attrs(struct sk_buff *skb,
>  !!(p->flags & BR_NEIGH_SUPPRESS)) ||
>   nla_put_u8(skb, IFLA_BRPORT_MRP_RING_OPEN, !!(p->flags &
> BR_MRP_LOST_CONT)) ||
> + nla_put_u8(skb, IFLA_BRPORT_MRP_IN_OPEN,
> +!!(p->flags & BR_MRP_LOST_IN_CONT)) ||
>   nla_put_u8(skb, IFLA_BRPORT_ISOLATED, !!(p->flags & BR_ISOLATED)))
>   return -EMSGSIZE;
>  
> diff --git a/tools/include/uapi/linux/if_link.h 
> b/tools/include/uapi/linux/if_link.h
> index cafedbbfefbe9..781e482dc499f 100644
> --- a/tools/include/uapi/linux/if_link.h
> +++ b/tools/include/uapi/linux/if_link.h
> @@ -344,6 +344,7 @@ enum {
>   IFLA_BRPORT_ISOLATED,
>   IFLA_BRPORT_BACKUP_PORT,
>   IFLA_BRPORT_MRP_RING_OPEN,
> + IFLA_BRPORT_MRP_IN_OPEN,
>   __IFLA_BRPORT_MAX
>  };
>  #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
> 



Re: [PATCH net-next v4 10/12] bridge: uapi: mrp: Extend MRP_INFO attributes for interconnect status

2020-07-14 Thread Nikolay Aleksandrov
On 14/07/2020 10:34, Horatiu Vultur wrote:
> Extend the existing MRP_INFO to return status of MRP interconnect. In
> case there is no MRP interconnect on the node then the role will be
> disabled so the other attributes can be ignored.
> 
> Signed-off-by: Horatiu Vultur 
> ---
>  include/uapi/linux/if_bridge.h | 5 +
>  1 file changed, 5 insertions(+)
> 
> diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
> index d840a3e37a37c..c1227aecd38fd 100644
> --- a/include/uapi/linux/if_bridge.h
> +++ b/include/uapi/linux/if_bridge.h
> @@ -243,6 +243,11 @@ enum {
>   IFLA_BRIDGE_MRP_INFO_TEST_INTERVAL,
>   IFLA_BRIDGE_MRP_INFO_TEST_MAX_MISS,
>   IFLA_BRIDGE_MRP_INFO_TEST_MONITOR,
> + IFLA_BRIDGE_MRP_INFO_I_IFINDEX,
> + IFLA_BRIDGE_MRP_INFO_IN_STATE,
> + IFLA_BRIDGE_MRP_INFO_IN_ROLE,
> + IFLA_BRIDGE_MRP_INFO_IN_TEST_INTERVAL,
> + IFLA_BRIDGE_MRP_INFO_IN_TEST_MAX_MISS,
>   __IFLA_BRIDGE_MRP_INFO_MAX,
>  };
>  
> 

Acked-by: Nikolay Aleksandrov 




Re: [PATCH net-next v4 11/12] bridge: mrp: Extend br_mrp_fill_info

2020-07-14 Thread Nikolay Aleksandrov
On 14/07/2020 10:34, Horatiu Vultur wrote:
> This patch extends the function br_mrp_fill_info to return also the
> status for the interconnect ring.
> 
> Signed-off-by: Horatiu Vultur 
> ---
>  net/bridge/br_mrp_netlink.c | 18 ++
>  1 file changed, 18 insertions(+)
> 
> diff --git a/net/bridge/br_mrp_netlink.c b/net/bridge/br_mrp_netlink.c
> index a006e0771e8d3..2a2fdf3500c5b 100644
> --- a/net/bridge/br_mrp_netlink.c
> +++ b/net/bridge/br_mrp_netlink.c
> @@ -474,6 +474,11 @@ int br_mrp_fill_info(struct sk_buff *skb, struct 
> net_bridge *br)
>p->dev->ifindex))
>   goto nla_put_failure;
>  
> + p = rcu_dereference(mrp->i_port);
> + if (p && nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_I_IFINDEX,
> +  p->dev->ifindex))
> + goto nla_put_failure;
> +
>   if (nla_put_u16(skb, IFLA_BRIDGE_MRP_INFO_PRIO,
>   mrp->prio))
>   goto nla_put_failure;
> @@ -493,6 +498,19 @@ int br_mrp_fill_info(struct sk_buff *skb, struct 
> net_bridge *br)
>   mrp->test_monitor))
>   goto nla_put_failure;
>  
> + if (nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_IN_STATE,
> + mrp->in_state))
> + goto nla_put_failure;
> + if (nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_IN_ROLE,
> + mrp->in_role))
> + goto nla_put_failure;
> + if (nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_IN_TEST_INTERVAL,
> + mrp->in_test_interval))
> + goto nla_put_failure;
> + if (nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_IN_TEST_MAX_MISS,
> + mrp->in_test_max_miss))
> + goto nla_put_failure;
> +
>   nla_nest_end(skb, tb);
>   }
>   nla_nest_end(skb, mrp_tb);
> 

Acked-by: Nikolay Aleksandrov 


Re: [PATCH net-next v4 09/12] bridge: mrp: Extend MRP netlink interface for configuring MRP interconnect

2020-07-14 Thread Nikolay Aleksandrov
On 14/07/2020 10:34, Horatiu Vultur wrote:
> This patch extends the existing MRP netlink interface with the following
> attributes: IFLA_BRIDGE_MRP_IN_ROLE, IFLA_BRIDGE_MRP_IN_STATE and
> IFLA_BRIDGE_MRP_START_IN_TEST. These attributes are similar with their
> ring attributes but they apply to the interconnect port.
> 
> Signed-off-by: Horatiu Vultur 
> ---
>  net/bridge/br_mrp_netlink.c | 140 
>  1 file changed, 140 insertions(+)
> 

Acked-by: Nikolay Aleksandrov 

> diff --git a/net/bridge/br_mrp_netlink.c b/net/bridge/br_mrp_netlink.c
> index 4bf7aaeb29152..a006e0771e8d3 100644
> --- a/net/bridge/br_mrp_netlink.c
> +++ b/net/bridge/br_mrp_netlink.c
> @@ -14,6 +14,9 @@ static const struct nla_policy 
> br_mrp_policy[IFLA_BRIDGE_MRP_MAX + 1] = {
>   [IFLA_BRIDGE_MRP_RING_STATE]= { .type = NLA_NESTED },
>   [IFLA_BRIDGE_MRP_RING_ROLE] = { .type = NLA_NESTED },
>   [IFLA_BRIDGE_MRP_START_TEST]= { .type = NLA_NESTED },
> + [IFLA_BRIDGE_MRP_IN_ROLE]   = { .type = NLA_NESTED },
> + [IFLA_BRIDGE_MRP_IN_STATE]  = { .type = NLA_NESTED },
> + [IFLA_BRIDGE_MRP_START_IN_TEST] = { .type = NLA_NESTED },
>  };
>  
>  static const struct nla_policy
> @@ -235,6 +238,121 @@ static int br_mrp_start_test_parse(struct net_bridge 
> *br, struct nlattr *attr,
>   return br_mrp_start_test(br, );
>  }
>  
> +static const struct nla_policy
> +br_mrp_in_state_policy[IFLA_BRIDGE_MRP_IN_STATE_MAX + 1] = {
> + [IFLA_BRIDGE_MRP_IN_STATE_UNSPEC]   = { .type = NLA_REJECT },
> + [IFLA_BRIDGE_MRP_IN_STATE_IN_ID]= { .type = NLA_U32 },
> + [IFLA_BRIDGE_MRP_IN_STATE_STATE]= { .type = NLA_U32 },
> +};
> +
> +static int br_mrp_in_state_parse(struct net_bridge *br, struct nlattr *attr,
> +  struct netlink_ext_ack *extack)
> +{
> + struct nlattr *tb[IFLA_BRIDGE_MRP_IN_STATE_MAX + 1];
> + struct br_mrp_in_state state;
> + int err;
> +
> + err = nla_parse_nested(tb, IFLA_BRIDGE_MRP_IN_STATE_MAX, attr,
> +br_mrp_in_state_policy, extack);
> + if (err)
> + return err;
> +
> + if (!tb[IFLA_BRIDGE_MRP_IN_STATE_IN_ID] ||
> + !tb[IFLA_BRIDGE_MRP_IN_STATE_STATE]) {
> + NL_SET_ERR_MSG_MOD(extack,
> +"Missing attribute: IN_ID or STATE");
> + return -EINVAL;
> + }
> +
> + memset(, 0x0, sizeof(state));
> +
> + state.in_id = nla_get_u32(tb[IFLA_BRIDGE_MRP_IN_STATE_IN_ID]);
> + state.in_state = nla_get_u32(tb[IFLA_BRIDGE_MRP_IN_STATE_STATE]);
> +
> + return br_mrp_set_in_state(br, );
> +}
> +
> +static const struct nla_policy
> +br_mrp_in_role_policy[IFLA_BRIDGE_MRP_IN_ROLE_MAX + 1] = {
> + [IFLA_BRIDGE_MRP_IN_ROLE_UNSPEC]= { .type = NLA_REJECT },
> + [IFLA_BRIDGE_MRP_IN_ROLE_RING_ID]   = { .type = NLA_U32 },
> + [IFLA_BRIDGE_MRP_IN_ROLE_IN_ID] = { .type = NLA_U16 },
> + [IFLA_BRIDGE_MRP_IN_ROLE_ROLE]  = { .type = NLA_U32 },
> + [IFLA_BRIDGE_MRP_IN_ROLE_I_IFINDEX] = { .type = NLA_U32 },
> +};
> +
> +static int br_mrp_in_role_parse(struct net_bridge *br, struct nlattr *attr,
> + struct netlink_ext_ack *extack)
> +{
> + struct nlattr *tb[IFLA_BRIDGE_MRP_IN_ROLE_MAX + 1];
> + struct br_mrp_in_role role;
> + int err;
> +
> + err = nla_parse_nested(tb, IFLA_BRIDGE_MRP_IN_ROLE_MAX, attr,
> +br_mrp_in_role_policy, extack);
> + if (err)
> + return err;
> +
> + if (!tb[IFLA_BRIDGE_MRP_IN_ROLE_RING_ID] ||
> + !tb[IFLA_BRIDGE_MRP_IN_ROLE_IN_ID] ||
> + !tb[IFLA_BRIDGE_MRP_IN_ROLE_I_IFINDEX] ||
> + !tb[IFLA_BRIDGE_MRP_IN_ROLE_ROLE]) {
> + NL_SET_ERR_MSG_MOD(extack,
> +"Missing attribute: RING_ID or ROLE or IN_ID 
> or I_IFINDEX");
> + return -EINVAL;
> + }
> +
> + memset(, 0x0, sizeof(role));
> +
> + role.ring_id = nla_get_u32(tb[IFLA_BRIDGE_MRP_IN_ROLE_RING_ID]);
> + role.in_id = nla_get_u16(tb[IFLA_BRIDGE_MRP_IN_ROLE_IN_ID]);
> + role.i_ifindex = nla_get_u32(tb[IFLA_BRIDGE_MRP_IN_ROLE_I_IFINDEX]);
> + role.in_role = nla_get_u32(tb[IFLA_BRIDGE_MRP_IN_ROLE_ROLE]);
> +
> + return br_mrp_set_in_role(br, );
> +}
> +
> +static const struct nla_policy
> +br_mrp_start_in_test_policy[IFLA_BRIDGE_MRP_START_IN_TEST_MAX + 1] = {
> + [IFLA_BRIDGE_MRP_START_IN_TEST_UNSPEC]  = { .type = NLA_REJECT },
> + [IFLA_BRIDGE_MRP_START_IN_TEST_IN_ID]   = { .type = NLA_U32 },
> + [I

Re: [PATCH net-next v4 08/12] bridge: mrp: Implement the MRP Interconnect API

2020-07-14 Thread Nikolay Aleksandrov
On 14/07/2020 10:34, Horatiu Vultur wrote:
> Thie patch adds support for MRP Interconnect. Similar with the MRP ring,
> if the HW can't generate MRP_InTest frames, then the SW will try to
> generate them. And if also the SW fails to generate the frames then an
> error is return to userspace.
> 
> The forwarding/termination of MRP_In frames is happening in the kernel
> and is done by MRP instances.
> 
> Signed-off-by: Horatiu Vultur 
> ---
>  net/bridge/br_mrp.c | 570 ++--
>  net/bridge/br_private_mrp.h |   4 +
>  2 files changed, 543 insertions(+), 31 deletions(-)
> 

Acked-by: Nikolay Aleksandrov 

> diff --git a/net/bridge/br_mrp.c b/net/bridge/br_mrp.c
> index fe7cf1446b58a..b36689e6e7cba 100644
> --- a/net/bridge/br_mrp.c
> +++ b/net/bridge/br_mrp.c
> @@ -4,6 +4,27 @@
>  #include "br_private_mrp.h"
>  
>  static const u8 mrp_test_dmac[ETH_ALEN] = { 0x1, 0x15, 0x4e, 0x0, 0x0, 0x1 };
> +static const u8 mrp_in_test_dmac[ETH_ALEN] = { 0x1, 0x15, 0x4e, 0x0, 0x0, 
> 0x3 };
> +
> +static bool br_mrp_is_ring_port(struct net_bridge_port *p_port,
> + struct net_bridge_port *s_port,
> + struct net_bridge_port *port)
> +{
> + if (port == p_port ||
> + port == s_port)
> + return true;
> +
> + return false;
> +}
> +
> +static bool br_mrp_is_in_port(struct net_bridge_port *i_port,
> +   struct net_bridge_port *port)
> +{
> + if (port == i_port)
> + return true;
> +
> + return false;
> +}
>  
>  static struct net_bridge_port *br_mrp_get_port(struct net_bridge *br,
>  u32 ifindex)
> @@ -37,6 +58,22 @@ static struct br_mrp *br_mrp_find_id(struct net_bridge 
> *br, u32 ring_id)
>   return res;
>  }
>  
> +static struct br_mrp *br_mrp_find_in_id(struct net_bridge *br, u32 in_id)
> +{
> + struct br_mrp *res = NULL;
> + struct br_mrp *mrp;
> +
> + list_for_each_entry_rcu(mrp, >mrp_list, list,
> + lockdep_rtnl_is_held()) {
> + if (mrp->in_id == in_id) {
> + res = mrp;
> + break;
> + }
> + }
> +
> + return res;
> +}
> +
>  static bool br_mrp_unique_ifindex(struct net_bridge *br, u32 ifindex)
>  {
>   struct br_mrp *mrp;
> @@ -52,6 +89,10 @@ static bool br_mrp_unique_ifindex(struct net_bridge *br, 
> u32 ifindex)
>   p = rtnl_dereference(mrp->s_port);
>   if (p && p->dev->ifindex == ifindex)
>   return false;
> +
> + p = rtnl_dereference(mrp->i_port);
> + if (p && p->dev->ifindex == ifindex)
> + return false;
>   }
>  
>   return true;
> @@ -66,7 +107,8 @@ static struct br_mrp *br_mrp_find_port(struct net_bridge 
> *br,
>   list_for_each_entry_rcu(mrp, >mrp_list, list,
>   lockdep_rtnl_is_held()) {
>   if (rcu_access_pointer(mrp->p_port) == p ||
> - rcu_access_pointer(mrp->s_port) == p) {
> + rcu_access_pointer(mrp->s_port) == p ||
> + rcu_access_pointer(mrp->i_port) == p) {
>   res = mrp;
>   break;
>   }
> @@ -160,6 +202,36 @@ static struct sk_buff *br_mrp_alloc_test_skb(struct 
> br_mrp *mrp,
>   return skb;
>  }
>  
> +static struct sk_buff *br_mrp_alloc_in_test_skb(struct br_mrp *mrp,
> + struct net_bridge_port *p,
> + enum br_mrp_port_role_type 
> port_role)
> +{
> + struct br_mrp_in_test_hdr *hdr = NULL;
> + struct sk_buff *skb = NULL;
> +
> + if (!p)
> + return NULL;
> +
> + skb = br_mrp_skb_alloc(p, p->dev->dev_addr, mrp_in_test_dmac);
> + if (!skb)
> + return NULL;
> +
> + br_mrp_skb_tlv(skb, BR_MRP_TLV_HEADER_IN_TEST, sizeof(*hdr));
> + hdr = skb_put(skb, sizeof(*hdr));
> +
> + hdr->id = cpu_to_be16(mrp->in_id);
> + ether_addr_copy(hdr->sa, p->br->dev->dev_addr);
> + hdr->port_role = cpu_to_be16(port_role);
> + hdr->state = cpu_to_be16(mrp->in_state);
> + hdr->transitions = cpu_to_be16(mrp->in_transitions);
> + hdr->timestamp = cpu_to_be32(jiffies_to_msecs(jiffies));
> +
> + br_mrp_skb_common(skb, mrp);
> + br_mrp_skb_tlv(skb, BR_MRP_TLV_HEADER_END, 0x0);
> +
> + return skb;
> +}
> +
>  

Re: [PATCH net-next v4 07/12] bridge: switchdev: mrp: Extend MRP API for switchdev for MRP Interconnect

2020-07-14 Thread Nikolay Aleksandrov
On 14/07/2020 10:34, Horatiu Vultur wrote:
> Implement the MRP API for interconnect switchdev. Similar with the other
> br_mrp_switchdev function, these function will just eventually call the
> switchdev functions: switchdev_port_obj_add/del.
> 
> Signed-off-by: Horatiu Vultur 
> ---
>  net/bridge/br_mrp_switchdev.c | 62 +++
>  net/bridge/br_private_mrp.h   |  7 
>  2 files changed, 69 insertions(+)
> 

Acked-by: Nikolay Aleksandrov 

> diff --git a/net/bridge/br_mrp_switchdev.c b/net/bridge/br_mrp_switchdev.c
> index 0da68a0da4b5a..ed547e03ace17 100644
> --- a/net/bridge/br_mrp_switchdev.c
> +++ b/net/bridge/br_mrp_switchdev.c
> @@ -107,6 +107,68 @@ int br_mrp_switchdev_set_ring_state(struct net_bridge 
> *br,
>   return 0;
>  }
>  
> +int br_mrp_switchdev_set_in_role(struct net_bridge *br, struct br_mrp *mrp,
> +  u16 in_id, u32 ring_id,
> +  enum br_mrp_in_role_type role)
> +{
> + struct switchdev_obj_in_role_mrp mrp_role = {
> + .obj.orig_dev = br->dev,
> + .obj.id = SWITCHDEV_OBJ_ID_IN_ROLE_MRP,
> + .in_role = role,
> + .in_id = mrp->in_id,
> + .ring_id = mrp->ring_id,
> + .i_port = rtnl_dereference(mrp->i_port)->dev,
> + };
> + int err;
> +
> + if (role == BR_MRP_IN_ROLE_DISABLED)
> + err = switchdev_port_obj_del(br->dev, _role.obj);
> + else
> + err = switchdev_port_obj_add(br->dev, _role.obj, NULL);
> +
> + return err;
> +}
> +
> +int br_mrp_switchdev_set_in_state(struct net_bridge *br, struct br_mrp *mrp,
> +   enum br_mrp_in_state_type state)
> +{
> + struct switchdev_obj_in_state_mrp mrp_state = {
> + .obj.orig_dev = br->dev,
> + .obj.id = SWITCHDEV_OBJ_ID_IN_STATE_MRP,
> + .in_state = state,
> + .in_id = mrp->in_id,
> + };
> + int err;
> +
> + err = switchdev_port_obj_add(br->dev, _state.obj, NULL);
> +
> + if (err && err != -EOPNOTSUPP)
> + return err;
> +
> + return 0;
> +}
> +
> +int br_mrp_switchdev_send_in_test(struct net_bridge *br, struct br_mrp *mrp,
> +   u32 interval, u8 max_miss, u32 period)
> +{
> + struct switchdev_obj_in_test_mrp test = {
> + .obj.orig_dev = br->dev,
> + .obj.id = SWITCHDEV_OBJ_ID_IN_TEST_MRP,
> + .interval = interval,
> + .max_miss = max_miss,
> + .in_id = mrp->in_id,
> + .period = period,
> + };
> + int err;
> +
> + if (interval == 0)
> + err = switchdev_port_obj_del(br->dev, );
> + else
> + err = switchdev_port_obj_add(br->dev, , NULL);
> +
> + return err;
> +}
> +
>  int br_mrp_port_switchdev_set_state(struct net_bridge_port *p,
>   enum br_mrp_port_state_type state)
>  {
> diff --git a/net/bridge/br_private_mrp.h b/net/bridge/br_private_mrp.h
> index 23da2f956ad0e..0d554ef88db85 100644
> --- a/net/bridge/br_private_mrp.h
> +++ b/net/bridge/br_private_mrp.h
> @@ -72,6 +72,13 @@ int br_mrp_port_switchdev_set_state(struct net_bridge_port 
> *p,
>   enum br_mrp_port_state_type state);
>  int br_mrp_port_switchdev_set_role(struct net_bridge_port *p,
>  enum br_mrp_port_role_type role);
> +int br_mrp_switchdev_set_in_role(struct net_bridge *br, struct br_mrp *mrp,
> +  u16 in_id, u32 ring_id,
> +  enum br_mrp_in_role_type role);
> +int br_mrp_switchdev_set_in_state(struct net_bridge *br, struct br_mrp *mrp,
> +   enum br_mrp_in_state_type state);
> +int br_mrp_switchdev_send_in_test(struct net_bridge *br, struct br_mrp *mrp,
> +   u32 interval, u8 max_miss, u32 period);
>  
>  /* br_mrp_netlink.c  */
>  int br_mrp_ring_port_open(struct net_device *dev, u8 loc);
> 



Re: [PATCH net-next v4 06/12] bridge: mrp: Add br_mrp_in_port_open function

2020-07-14 Thread Nikolay Aleksandrov
On 14/07/2020 10:34, Horatiu Vultur wrote:
> This function notifies the userspace when the node lost the continuity
> of MRP_InTest frames.
> 
> Signed-off-by: Horatiu Vultur 
> ---
>  net/bridge/br_mrp_netlink.c | 22 ++
>  net/bridge/br_private_mrp.h |  1 +
>  2 files changed, 23 insertions(+)
> 

Acked-by: Nikolay Aleksandrov 

> diff --git a/net/bridge/br_mrp_netlink.c b/net/bridge/br_mrp_netlink.c
> index acce300c0cc29..4bf7aaeb29152 100644
> --- a/net/bridge/br_mrp_netlink.c
> +++ b/net/bridge/br_mrp_netlink.c
> @@ -389,3 +389,25 @@ int br_mrp_ring_port_open(struct net_device *dev, u8 loc)
>  out:
>   return err;
>  }
> +
> +int br_mrp_in_port_open(struct net_device *dev, u8 loc)
> +{
> + struct net_bridge_port *p;
> + int err = 0;
> +
> + p = br_port_get_rcu(dev);
> + if (!p) {
> + err = -EINVAL;
> + goto out;
> + }
> +
> + if (loc)
> + p->flags |= BR_MRP_LOST_IN_CONT;
> + else
> + p->flags &= ~BR_MRP_LOST_IN_CONT;
> +
> + br_ifinfo_notify(RTM_NEWLINK, NULL, p);
> +
> +out:
> + return err;
> +}
> diff --git a/net/bridge/br_private_mrp.h b/net/bridge/br_private_mrp.h
> index e93c8f9d4df58..23da2f956ad0e 100644
> --- a/net/bridge/br_private_mrp.h
> +++ b/net/bridge/br_private_mrp.h
> @@ -75,5 +75,6 @@ int br_mrp_port_switchdev_set_role(struct net_bridge_port 
> *p,
>  
>  /* br_mrp_netlink.c  */
>  int br_mrp_ring_port_open(struct net_device *dev, u8 loc);
> +int br_mrp_in_port_open(struct net_device *dev, u8 loc);
>  
>  #endif /* _BR_PRIVATE_MRP_H */
> 



Re: [PATCH net-next v4 04/12] bridge: mrp: Extend br_mrp for MRP interconnect

2020-07-14 Thread Nikolay Aleksandrov
On 14/07/2020 10:34, Horatiu Vultur wrote:
> This patch extends the 'struct br_mrp' to contain information regarding
> the MRP interconnect. It contains the following:
> - the interconnect port 'i_port', which is NULL if the node doesn't have
>   a interconnect role
> - the interconnect id, which is similar with the ring id, but this field
>   is also part of the MRP_InTest frames.
> - the interconnect role, which can be MIM or MIC.
> - the interconnect state, which can be open or closed.
> - the interconnect delayed_work for sending MRP_InTest frames and check
>   for lost of continuity.
> 
> Signed-off-by: Horatiu Vultur 
> ---
>  net/bridge/br_private_mrp.h | 13 +
>  1 file changed, 13 insertions(+)
> 

Acked-by: Nikolay Aleksandrov 

> diff --git a/net/bridge/br_private_mrp.h b/net/bridge/br_private_mrp.h
> index 315eb37d89f0f..8841ba847fb29 100644
> --- a/net/bridge/br_private_mrp.h
> +++ b/net/bridge/br_private_mrp.h
> @@ -12,8 +12,10 @@ struct br_mrp {
>  
>   struct net_bridge_port __rcu*p_port;
>   struct net_bridge_port __rcu*s_port;
> + struct net_bridge_port __rcu*i_port;
>  
>   u32 ring_id;
> + u16 in_id;
>   u16 prio;
>  
>   enum br_mrp_ring_role_type  ring_role;
> @@ -21,6 +23,11 @@ struct br_mrp {
>   enum br_mrp_ring_state_type ring_state;
>   u32 ring_transitions;
>  
> + enum br_mrp_in_role_typein_role;
> + u8  in_role_offloaded;
> + enum br_mrp_in_state_type   in_state;
> + u32 in_transitions;
> +
>   struct delayed_work test_work;
>   u32 test_interval;
>   unsigned long   test_end;
> @@ -28,6 +35,12 @@ struct br_mrp {
>   u32 test_max_miss;
>   booltest_monitor;
>  
> + struct delayed_work in_test_work;
> + u32 in_test_interval;
> + unsigned long   in_test_end;
> + u32 in_test_count_miss;
> + u32 in_test_max_miss;
> +
>   u32 seq_id;
>  
>   struct rcu_head rcu;
> 



  1   2   3   >