Re: [PATCH net-next 4/6] net: use core MTU range checking in core net infra

2016-10-19 Thread Jarod Wilson
On Wed, Oct 19, 2016 at 05:28:00PM +0200, Sabrina Dubroca wrote:
> 2016-10-19, 10:40:06 -0400, Jarod Wilson wrote:
> > On Wed, Oct 19, 2016 at 03:55:29PM +0200, Sabrina Dubroca wrote:
> > > 2016-10-18, 22:33:31 -0400, Jarod Wilson wrote:
...
> > I'm thinking more and more that we ought to back out the patch that sets
> > min/max in ether_setup, save it for last, after we're sure everyone that
> > calls it has been prepared.
> 
> I'm not sure how that would work now, if some of the patches that
> already went in for ethernet drivers assume that ether_setup will
> configure a basic {min,max}_mtu pair (at least e100 makes that
> assumption, but that might be the only one).

Argh. Yeah. Hrm. Would have to do the revert *and* have e100 and possibly
others set their own min/max pair. So I guess it's a race to fix all the
fallout... Crap.

> > > [...]
> > > > diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
> > > > index 89a687f..81fc79a 100644
> > > > --- a/net/bridge/br_device.c
> > > > +++ b/net/bridge/br_device.c
> > > > @@ -184,17 +184,15 @@ static struct rtnl_link_stats64 
> > > > *br_get_stats64(struct net_device *dev,
> > > >  
> > > >  static int br_change_mtu(struct net_device *dev, int new_mtu)
> > > >  {
> > > > +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
> > > > struct net_bridge *br = netdev_priv(dev);
> > > > -   if (new_mtu < 68 || new_mtu > br_min_mtu(br))
> > > > -   return -EINVAL;
> > > > -
> > > > -   dev->mtu = new_mtu;
> > > >  
> > > > -#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
> > > > /* remember the MTU in the rtable for PMTU */
> > > > dst_metric_set(>fake_rtable.dst, RTAX_MTU, new_mtu);
> > > >  #endif
> > > >  
> > > > +   dev->mtu = new_mtu;
> > > > +
> > > > return 0;
> > > >  }
> > > >  
> > > > @@ -390,6 +388,7 @@ void br_dev_setup(struct net_device *dev)
> > > > dev->hw_features = COMMON_FEATURES | NETIF_F_HW_VLAN_CTAG_TX |
> > > >NETIF_F_HW_VLAN_STAG_TX;
> > > > dev->vlan_features = COMMON_FEATURES;
> > > > +   dev->max_mtu = br_min_mtu(br);
> > > 
> > > br_min_mtu uses br->port_list, which is only initialized a few lines
> > > later (right after the spin_lock_init() at the end of the context of
> > > this diff).
> > 
> > Ah, okay, I'd just grouped it with the other dev->foo settings.
> > 
> > > Besides, I don't think this works: br_min_mtu(br) changes when you add
> > > and remove ports, or when you change the MTU of an enslaved
> > > device. But this makes the max MTU for the bridge fixed (to 1500).
> > 
> > Okay, how about this: set no max_mtu (or set it to IP_MAX_MTU/65535), and
> > then retain a check against the possibly ever-changing br_min_mtu(br) in
> > br_change_mtu()?
> 
> Sounds good to me.

I think I have something here locally that looks sane. Working on a few
other similar cases now.

-- 
Jarod Wilson
ja...@redhat.com



Re: [PATCH net-next 4/6] net: use core MTU range checking in core net infra

2016-10-19 Thread Jarod Wilson
On Wed, Oct 19, 2016 at 05:28:00PM +0200, Sabrina Dubroca wrote:
> 2016-10-19, 10:40:06 -0400, Jarod Wilson wrote:
> > On Wed, Oct 19, 2016 at 03:55:29PM +0200, Sabrina Dubroca wrote:
> > > 2016-10-18, 22:33:31 -0400, Jarod Wilson wrote:
...
> > I'm thinking more and more that we ought to back out the patch that sets
> > min/max in ether_setup, save it for last, after we're sure everyone that
> > calls it has been prepared.
> 
> I'm not sure how that would work now, if some of the patches that
> already went in for ethernet drivers assume that ether_setup will
> configure a basic {min,max}_mtu pair (at least e100 makes that
> assumption, but that might be the only one).

Argh. Yeah. Hrm. Would have to do the revert *and* have e100 and possibly
others set their own min/max pair. So I guess it's a race to fix all the
fallout... Crap.

> > > [...]
> > > > diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
> > > > index 89a687f..81fc79a 100644
> > > > --- a/net/bridge/br_device.c
> > > > +++ b/net/bridge/br_device.c
> > > > @@ -184,17 +184,15 @@ static struct rtnl_link_stats64 
> > > > *br_get_stats64(struct net_device *dev,
> > > >  
> > > >  static int br_change_mtu(struct net_device *dev, int new_mtu)
> > > >  {
> > > > +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
> > > > struct net_bridge *br = netdev_priv(dev);
> > > > -   if (new_mtu < 68 || new_mtu > br_min_mtu(br))
> > > > -   return -EINVAL;
> > > > -
> > > > -   dev->mtu = new_mtu;
> > > >  
> > > > -#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
> > > > /* remember the MTU in the rtable for PMTU */
> > > > dst_metric_set(>fake_rtable.dst, RTAX_MTU, new_mtu);
> > > >  #endif
> > > >  
> > > > +   dev->mtu = new_mtu;
> > > > +
> > > > return 0;
> > > >  }
> > > >  
> > > > @@ -390,6 +388,7 @@ void br_dev_setup(struct net_device *dev)
> > > > dev->hw_features = COMMON_FEATURES | NETIF_F_HW_VLAN_CTAG_TX |
> > > >NETIF_F_HW_VLAN_STAG_TX;
> > > > dev->vlan_features = COMMON_FEATURES;
> > > > +   dev->max_mtu = br_min_mtu(br);
> > > 
> > > br_min_mtu uses br->port_list, which is only initialized a few lines
> > > later (right after the spin_lock_init() at the end of the context of
> > > this diff).
> > 
> > Ah, okay, I'd just grouped it with the other dev->foo settings.
> > 
> > > Besides, I don't think this works: br_min_mtu(br) changes when you add
> > > and remove ports, or when you change the MTU of an enslaved
> > > device. But this makes the max MTU for the bridge fixed (to 1500).
> > 
> > Okay, how about this: set no max_mtu (or set it to IP_MAX_MTU/65535), and
> > then retain a check against the possibly ever-changing br_min_mtu(br) in
> > br_change_mtu()?
> 
> Sounds good to me.

I think I have something here locally that looks sane. Working on a few
other similar cases now.

-- 
Jarod Wilson
ja...@redhat.com



Re: [PATCH net-next 4/6] net: use core MTU range checking in core net infra

2016-10-19 Thread Sabrina Dubroca
2016-10-19, 10:40:06 -0400, Jarod Wilson wrote:
> On Wed, Oct 19, 2016 at 03:55:29PM +0200, Sabrina Dubroca wrote:
> > 2016-10-18, 22:33:31 -0400, Jarod Wilson wrote:
> > > geneve:
> > > - Merge __geneve_change_mtu back into geneve_change_mtu, set max_mtu
> > > - This one isn't quite as straight-forward as others, could use some
> > >   closer inspection and testing
> > > 
> > > macvlan:
> > > - set min/max_mtu
> > > 
> > > tun:
> > > - set min/max_mtu, remove tun_net_change_mtu
> > > 
> > > vxlan:
> > > - Merge __vxlan_change_mtu back into vxlan_change_mtu, set min/max_mtu
> > > - This one is also not as straight-forward and could use closer inspection
> > >   and testing from vxlan folks
> > > 
> > > bridge:
> > > - set max_mtu via br_min_mtu()
> > > 
> > > openvswitch:
> > > - set min/max_mtu, remove internal_dev_change_mtu
> > > - note: max_mtu wasn't checked previously, it's been set to 65535, which
> > >   is the largest possible size supported
> > > 
> > > sch_teql:
> > > - set min/max_mtu (note: max_mtu previously unchecked, used max of 65535)
> > 
> > Nothing for other virtual netdevices? (dummy, veth, bond, etc) Their
> > MTU is limited to 1500 now.  Also missing macsec and ip_gre, probably
> > others that are using ether_setup.
> 
> Yeah, I've clearly missed more than I thought. Doing another sweep now.

Thanks.


> I'm thinking more and more that we ought to back out the patch that sets
> min/max in ether_setup, save it for last, after we're sure everyone that
> calls it has been prepared.

I'm not sure how that would work now, if some of the patches that
already went in for ethernet drivers assume that ether_setup will
configure a basic {min,max}_mtu pair (at least e100 makes that
assumption, but that might be the only one).

> > [...]
> > > diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
> > > index 89a687f..81fc79a 100644
> > > --- a/net/bridge/br_device.c
> > > +++ b/net/bridge/br_device.c
> > > @@ -184,17 +184,15 @@ static struct rtnl_link_stats64 
> > > *br_get_stats64(struct net_device *dev,
> > >  
> > >  static int br_change_mtu(struct net_device *dev, int new_mtu)
> > >  {
> > > +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
> > >   struct net_bridge *br = netdev_priv(dev);
> > > - if (new_mtu < 68 || new_mtu > br_min_mtu(br))
> > > - return -EINVAL;
> > > -
> > > - dev->mtu = new_mtu;
> > >  
> > > -#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
> > >   /* remember the MTU in the rtable for PMTU */
> > >   dst_metric_set(>fake_rtable.dst, RTAX_MTU, new_mtu);
> > >  #endif
> > >  
> > > + dev->mtu = new_mtu;
> > > +
> > >   return 0;
> > >  }
> > >  
> > > @@ -390,6 +388,7 @@ void br_dev_setup(struct net_device *dev)
> > >   dev->hw_features = COMMON_FEATURES | NETIF_F_HW_VLAN_CTAG_TX |
> > >  NETIF_F_HW_VLAN_STAG_TX;
> > >   dev->vlan_features = COMMON_FEATURES;
> > > + dev->max_mtu = br_min_mtu(br);
> > 
> > br_min_mtu uses br->port_list, which is only initialized a few lines
> > later (right after the spin_lock_init() at the end of the context of
> > this diff).
> 
> Ah, okay, I'd just grouped it with the other dev->foo settings.
> 
> > Besides, I don't think this works: br_min_mtu(br) changes when you add
> > and remove ports, or when you change the MTU of an enslaved
> > device. But this makes the max MTU for the bridge fixed (to 1500).
> 
> Okay, how about this: set no max_mtu (or set it to IP_MAX_MTU/65535), and
> then retain a check against the possibly ever-changing br_min_mtu(br) in
> br_change_mtu()?

Sounds good to me.


-- 
Sabrina


Re: [PATCH net-next 4/6] net: use core MTU range checking in core net infra

2016-10-19 Thread Sabrina Dubroca
2016-10-19, 10:40:06 -0400, Jarod Wilson wrote:
> On Wed, Oct 19, 2016 at 03:55:29PM +0200, Sabrina Dubroca wrote:
> > 2016-10-18, 22:33:31 -0400, Jarod Wilson wrote:
> > > geneve:
> > > - Merge __geneve_change_mtu back into geneve_change_mtu, set max_mtu
> > > - This one isn't quite as straight-forward as others, could use some
> > >   closer inspection and testing
> > > 
> > > macvlan:
> > > - set min/max_mtu
> > > 
> > > tun:
> > > - set min/max_mtu, remove tun_net_change_mtu
> > > 
> > > vxlan:
> > > - Merge __vxlan_change_mtu back into vxlan_change_mtu, set min/max_mtu
> > > - This one is also not as straight-forward and could use closer inspection
> > >   and testing from vxlan folks
> > > 
> > > bridge:
> > > - set max_mtu via br_min_mtu()
> > > 
> > > openvswitch:
> > > - set min/max_mtu, remove internal_dev_change_mtu
> > > - note: max_mtu wasn't checked previously, it's been set to 65535, which
> > >   is the largest possible size supported
> > > 
> > > sch_teql:
> > > - set min/max_mtu (note: max_mtu previously unchecked, used max of 65535)
> > 
> > Nothing for other virtual netdevices? (dummy, veth, bond, etc) Their
> > MTU is limited to 1500 now.  Also missing macsec and ip_gre, probably
> > others that are using ether_setup.
> 
> Yeah, I've clearly missed more than I thought. Doing another sweep now.

Thanks.


> I'm thinking more and more that we ought to back out the patch that sets
> min/max in ether_setup, save it for last, after we're sure everyone that
> calls it has been prepared.

I'm not sure how that would work now, if some of the patches that
already went in for ethernet drivers assume that ether_setup will
configure a basic {min,max}_mtu pair (at least e100 makes that
assumption, but that might be the only one).

> > [...]
> > > diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
> > > index 89a687f..81fc79a 100644
> > > --- a/net/bridge/br_device.c
> > > +++ b/net/bridge/br_device.c
> > > @@ -184,17 +184,15 @@ static struct rtnl_link_stats64 
> > > *br_get_stats64(struct net_device *dev,
> > >  
> > >  static int br_change_mtu(struct net_device *dev, int new_mtu)
> > >  {
> > > +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
> > >   struct net_bridge *br = netdev_priv(dev);
> > > - if (new_mtu < 68 || new_mtu > br_min_mtu(br))
> > > - return -EINVAL;
> > > -
> > > - dev->mtu = new_mtu;
> > >  
> > > -#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
> > >   /* remember the MTU in the rtable for PMTU */
> > >   dst_metric_set(>fake_rtable.dst, RTAX_MTU, new_mtu);
> > >  #endif
> > >  
> > > + dev->mtu = new_mtu;
> > > +
> > >   return 0;
> > >  }
> > >  
> > > @@ -390,6 +388,7 @@ void br_dev_setup(struct net_device *dev)
> > >   dev->hw_features = COMMON_FEATURES | NETIF_F_HW_VLAN_CTAG_TX |
> > >  NETIF_F_HW_VLAN_STAG_TX;
> > >   dev->vlan_features = COMMON_FEATURES;
> > > + dev->max_mtu = br_min_mtu(br);
> > 
> > br_min_mtu uses br->port_list, which is only initialized a few lines
> > later (right after the spin_lock_init() at the end of the context of
> > this diff).
> 
> Ah, okay, I'd just grouped it with the other dev->foo settings.
> 
> > Besides, I don't think this works: br_min_mtu(br) changes when you add
> > and remove ports, or when you change the MTU of an enslaved
> > device. But this makes the max MTU for the bridge fixed (to 1500).
> 
> Okay, how about this: set no max_mtu (or set it to IP_MAX_MTU/65535), and
> then retain a check against the possibly ever-changing br_min_mtu(br) in
> br_change_mtu()?

Sounds good to me.


-- 
Sabrina


Re: [PATCH net-next 4/6] net: use core MTU range checking in core net infra

2016-10-19 Thread Jiri Benc
On Tue, 18 Oct 2016 22:33:31 -0400, Jarod Wilson wrote:
> --- a/drivers/net/vxlan.c
> +++ b/drivers/net/vxlan.c
> @@ -2367,43 +2367,31 @@ static void vxlan_set_multicast_list(struct 
> net_device *dev)
>  {
>  }
>  
> -static int __vxlan_change_mtu(struct net_device *dev,
> -   struct net_device *lowerdev,
> -   struct vxlan_rdst *dst, int new_mtu, bool strict)
> +static int vxlan_change_mtu(struct net_device *dev, int new_mtu)
>  {
> - int max_mtu = IP_MAX_MTU;
> -
> - if (lowerdev)
> - max_mtu = lowerdev->mtu;
> + struct vxlan_dev *vxlan = netdev_priv(dev);
> + struct vxlan_rdst *dst = >default_dst;
> + struct net_device *lowerdev = __dev_get_by_index(vxlan->net,
> +  dst->remote_ifindex);
> + bool use_ipv6 = false;
>  
>   if (dst->remote_ip.sa.sa_family == AF_INET6)
> - max_mtu -= VXLAN6_HEADROOM;
> - else
> - max_mtu -= VXLAN_HEADROOM;
> -
> - if (new_mtu < 68)
> - return -EINVAL;
> + use_ipv6 = true;
>  
> - if (new_mtu > max_mtu) {
> - if (strict)
> + /* We re-check this, because users *could* alter the mtu of the
> +  * lower device after we've initialized dev->max_mtu.
> +  */
> + if (lowerdev) {
> + dev->max_mtu = lowerdev->mtu -
> +(use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM);
> + if (new_mtu > dev->max_mtu)
>   return -EINVAL;
> -
> - new_mtu = max_mtu;
>   }
>  
>   dev->mtu = new_mtu;
>   return 0;
>  }

Sorry for the silly question, how does the min_mtu and max_mtu stuff
works? I noticed your patches but haven't looked in depth into them.

When the ndo_change_mtu callback is defined, is the dev->min_mtu and
dev->max_mtu checked first and if the desired mtu is not within range,
ndo_change_mtu is not called?

Or does ndo_change_mtu override the checks?

In either case, the code does not look correct. In the first case,
increasing of lowerdev MTU wouldn't allow increasing of vxlan MTU
without deleting and recreating the vxlan interface. In the second
case, you're missing check against the min_mtu.

>  
> -static int vxlan_change_mtu(struct net_device *dev, int new_mtu)
> -{
> - struct vxlan_dev *vxlan = netdev_priv(dev);
> - struct vxlan_rdst *dst = >default_dst;
> - struct net_device *lowerdev = __dev_get_by_index(vxlan->net,
> -  dst->remote_ifindex);
> - return __vxlan_change_mtu(dev, lowerdev, dst, new_mtu, true);
> -}
> -
>  static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff 
> *skb)
>  {
>   struct vxlan_dev *vxlan = netdev_priv(dev);
> @@ -2795,6 +2783,10 @@ static int vxlan_dev_configure(struct net *src_net, 
> struct net_device *dev,
>   vxlan_ether_setup(dev);
>   }
>  
> + /* MTU range: 68 - 65535 */
> + dev->min_mtu = 68;
> + dev->max_mtu = IP_MAX_MTU;
> +
>   vxlan->net = src_net;
>  
>   dst->remote_vni = conf->vni;
> @@ -2837,8 +2829,11 @@ static int vxlan_dev_configure(struct net *src_net, 
> struct net_device *dev,
>   }
>  #endif
>  
> - if (!conf->mtu)
> - dev->mtu = lowerdev->mtu - (use_ipv6 ? VXLAN6_HEADROOM 
> : VXLAN_HEADROOM);
> + if (!conf->mtu) {
> + dev->mtu = lowerdev->mtu -
> +(use_ipv6 ? VXLAN6_HEADROOM : 
> VXLAN_HEADROOM);
> + dev->max_mtu = dev->mtu;
> + }
>  
>   needed_headroom = lowerdev->hard_header_len;
>   } else if (vxlan_addr_multicast(>remote_ip)) {
> @@ -2847,9 +2842,14 @@ static int vxlan_dev_configure(struct net *src_net, 
> struct net_device *dev,
>   }
>  
>   if (conf->mtu) {
> - err = __vxlan_change_mtu(dev, lowerdev, dst, conf->mtu, false);
> - if (err)
> - return err;
> + if (lowerdev)
> + dev->max_mtu = lowerdev->mtu;
> + dev->max_mtu -= (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM);
> +
> + dev->mtu = conf->mtu;
> +
> + if (conf->mtu > dev->max_mtu)
> + dev->mtu = dev->max_mtu;
>   }

You removed the check for min_mtu but it's needed here. The conf->mtu
value comes from the user space and can be anything.

>  
>   if (use_ipv6 || conf->flags & VXLAN_F_COLLECT_METADATA)

 Jiri


Re: [PATCH net-next 4/6] net: use core MTU range checking in core net infra

2016-10-19 Thread Jiri Benc
On Tue, 18 Oct 2016 22:33:31 -0400, Jarod Wilson wrote:
> --- a/drivers/net/vxlan.c
> +++ b/drivers/net/vxlan.c
> @@ -2367,43 +2367,31 @@ static void vxlan_set_multicast_list(struct 
> net_device *dev)
>  {
>  }
>  
> -static int __vxlan_change_mtu(struct net_device *dev,
> -   struct net_device *lowerdev,
> -   struct vxlan_rdst *dst, int new_mtu, bool strict)
> +static int vxlan_change_mtu(struct net_device *dev, int new_mtu)
>  {
> - int max_mtu = IP_MAX_MTU;
> -
> - if (lowerdev)
> - max_mtu = lowerdev->mtu;
> + struct vxlan_dev *vxlan = netdev_priv(dev);
> + struct vxlan_rdst *dst = >default_dst;
> + struct net_device *lowerdev = __dev_get_by_index(vxlan->net,
> +  dst->remote_ifindex);
> + bool use_ipv6 = false;
>  
>   if (dst->remote_ip.sa.sa_family == AF_INET6)
> - max_mtu -= VXLAN6_HEADROOM;
> - else
> - max_mtu -= VXLAN_HEADROOM;
> -
> - if (new_mtu < 68)
> - return -EINVAL;
> + use_ipv6 = true;
>  
> - if (new_mtu > max_mtu) {
> - if (strict)
> + /* We re-check this, because users *could* alter the mtu of the
> +  * lower device after we've initialized dev->max_mtu.
> +  */
> + if (lowerdev) {
> + dev->max_mtu = lowerdev->mtu -
> +(use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM);
> + if (new_mtu > dev->max_mtu)
>   return -EINVAL;
> -
> - new_mtu = max_mtu;
>   }
>  
>   dev->mtu = new_mtu;
>   return 0;
>  }

Sorry for the silly question, how does the min_mtu and max_mtu stuff
works? I noticed your patches but haven't looked in depth into them.

When the ndo_change_mtu callback is defined, is the dev->min_mtu and
dev->max_mtu checked first and if the desired mtu is not within range,
ndo_change_mtu is not called?

Or does ndo_change_mtu override the checks?

In either case, the code does not look correct. In the first case,
increasing of lowerdev MTU wouldn't allow increasing of vxlan MTU
without deleting and recreating the vxlan interface. In the second
case, you're missing check against the min_mtu.

>  
> -static int vxlan_change_mtu(struct net_device *dev, int new_mtu)
> -{
> - struct vxlan_dev *vxlan = netdev_priv(dev);
> - struct vxlan_rdst *dst = >default_dst;
> - struct net_device *lowerdev = __dev_get_by_index(vxlan->net,
> -  dst->remote_ifindex);
> - return __vxlan_change_mtu(dev, lowerdev, dst, new_mtu, true);
> -}
> -
>  static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff 
> *skb)
>  {
>   struct vxlan_dev *vxlan = netdev_priv(dev);
> @@ -2795,6 +2783,10 @@ static int vxlan_dev_configure(struct net *src_net, 
> struct net_device *dev,
>   vxlan_ether_setup(dev);
>   }
>  
> + /* MTU range: 68 - 65535 */
> + dev->min_mtu = 68;
> + dev->max_mtu = IP_MAX_MTU;
> +
>   vxlan->net = src_net;
>  
>   dst->remote_vni = conf->vni;
> @@ -2837,8 +2829,11 @@ static int vxlan_dev_configure(struct net *src_net, 
> struct net_device *dev,
>   }
>  #endif
>  
> - if (!conf->mtu)
> - dev->mtu = lowerdev->mtu - (use_ipv6 ? VXLAN6_HEADROOM 
> : VXLAN_HEADROOM);
> + if (!conf->mtu) {
> + dev->mtu = lowerdev->mtu -
> +(use_ipv6 ? VXLAN6_HEADROOM : 
> VXLAN_HEADROOM);
> + dev->max_mtu = dev->mtu;
> + }
>  
>   needed_headroom = lowerdev->hard_header_len;
>   } else if (vxlan_addr_multicast(>remote_ip)) {
> @@ -2847,9 +2842,14 @@ static int vxlan_dev_configure(struct net *src_net, 
> struct net_device *dev,
>   }
>  
>   if (conf->mtu) {
> - err = __vxlan_change_mtu(dev, lowerdev, dst, conf->mtu, false);
> - if (err)
> - return err;
> + if (lowerdev)
> + dev->max_mtu = lowerdev->mtu;
> + dev->max_mtu -= (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM);
> +
> + dev->mtu = conf->mtu;
> +
> + if (conf->mtu > dev->max_mtu)
> + dev->mtu = dev->max_mtu;
>   }

You removed the check for min_mtu but it's needed here. The conf->mtu
value comes from the user space and can be anything.

>  
>   if (use_ipv6 || conf->flags & VXLAN_F_COLLECT_METADATA)

 Jiri


Re: [PATCH net-next 4/6] net: use core MTU range checking in core net infra

2016-10-19 Thread Jarod Wilson
On Wed, Oct 19, 2016 at 02:17:03PM +0200, Jiri Benc wrote:
> On Tue, 18 Oct 2016 22:33:31 -0400, Jarod Wilson wrote:
> > --- a/drivers/net/vxlan.c
> > +++ b/drivers/net/vxlan.c
> > @@ -2367,43 +2367,31 @@ static void vxlan_set_multicast_list(struct 
> > net_device *dev)
> >  {
> >  }
> >  
> > -static int __vxlan_change_mtu(struct net_device *dev,
> > - struct net_device *lowerdev,
> > - struct vxlan_rdst *dst, int new_mtu, bool strict)
> > +static int vxlan_change_mtu(struct net_device *dev, int new_mtu)
> >  {
> > -   int max_mtu = IP_MAX_MTU;
> > -
> > -   if (lowerdev)
> > -   max_mtu = lowerdev->mtu;
> > +   struct vxlan_dev *vxlan = netdev_priv(dev);
> > +   struct vxlan_rdst *dst = >default_dst;
> > +   struct net_device *lowerdev = __dev_get_by_index(vxlan->net,
> > +dst->remote_ifindex);
> > +   bool use_ipv6 = false;
> >  
> > if (dst->remote_ip.sa.sa_family == AF_INET6)
> > -   max_mtu -= VXLAN6_HEADROOM;
> > -   else
> > -   max_mtu -= VXLAN_HEADROOM;
> > -
> > -   if (new_mtu < 68)
> > -   return -EINVAL;
> > +   use_ipv6 = true;
> >  
> > -   if (new_mtu > max_mtu) {
> > -   if (strict)
> > +   /* We re-check this, because users *could* alter the mtu of the
> > +* lower device after we've initialized dev->max_mtu.
> > +*/
> > +   if (lowerdev) {
> > +   dev->max_mtu = lowerdev->mtu -
> > +  (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM);
> > +   if (new_mtu > dev->max_mtu)
> > return -EINVAL;
> > -
> > -   new_mtu = max_mtu;
> > }
> >  
> > dev->mtu = new_mtu;
> > return 0;
> >  }
> 
> Sorry for the silly question, how does the min_mtu and max_mtu stuff
> works? I noticed your patches but haven't looked in depth into them.
> 
> When the ndo_change_mtu callback is defined, is the dev->min_mtu and
> dev->max_mtu checked first and if the desired mtu is not within range,
> ndo_change_mtu is not called?
> 
> Or does ndo_change_mtu override the checks?

The former. If the new value is outside min/max, ndo_change_mtu doesn't
get called, which is exactly the chicken and egg problem I introduced by
setting max_mtu to 1500 in ether_setup before having all drivers that call
ether_setup set a more appropriate max_mtu first. :\

> In either case, the code does not look correct. In the first case,
> increasing of lowerdev MTU wouldn't allow increasing of vxlan MTU
> without deleting and recreating the vxlan interface. In the second
> case, you're missing check against the min_mtu.

Okay, this sounds like a similar case to bridge that Sabrina pointed out.
Looks like virtual devices will need to just set no max_mtu directly (or
IP_MAX_MTU), and do dynamic checks in their ndo_change_mtu if they need to
compare against underlying devices on the fly.

...
> > @@ -2847,9 +2842,14 @@ static int vxlan_dev_configure(struct net *src_net, 
> > struct net_device *dev,
> > }
> >  
> > if (conf->mtu) {
> > -   err = __vxlan_change_mtu(dev, lowerdev, dst, conf->mtu, false);
> > -   if (err)
> > -   return err;
> > +   if (lowerdev)
> > +   dev->max_mtu = lowerdev->mtu;
> > +   dev->max_mtu -= (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM);
> > +
> > +   dev->mtu = conf->mtu;
> > +
> > +   if (conf->mtu > dev->max_mtu)
> > +   dev->mtu = dev->max_mtu;
> > }
> 
> You removed the check for min_mtu but it's needed here. The conf->mtu
> value comes from the user space and can be anything.

Hm. Not sure why I did that... Will put it back now...

-- 
Jarod Wilson
ja...@redhat.com



Re: [PATCH net-next 4/6] net: use core MTU range checking in core net infra

2016-10-19 Thread Jarod Wilson
On Wed, Oct 19, 2016 at 02:17:03PM +0200, Jiri Benc wrote:
> On Tue, 18 Oct 2016 22:33:31 -0400, Jarod Wilson wrote:
> > --- a/drivers/net/vxlan.c
> > +++ b/drivers/net/vxlan.c
> > @@ -2367,43 +2367,31 @@ static void vxlan_set_multicast_list(struct 
> > net_device *dev)
> >  {
> >  }
> >  
> > -static int __vxlan_change_mtu(struct net_device *dev,
> > - struct net_device *lowerdev,
> > - struct vxlan_rdst *dst, int new_mtu, bool strict)
> > +static int vxlan_change_mtu(struct net_device *dev, int new_mtu)
> >  {
> > -   int max_mtu = IP_MAX_MTU;
> > -
> > -   if (lowerdev)
> > -   max_mtu = lowerdev->mtu;
> > +   struct vxlan_dev *vxlan = netdev_priv(dev);
> > +   struct vxlan_rdst *dst = >default_dst;
> > +   struct net_device *lowerdev = __dev_get_by_index(vxlan->net,
> > +dst->remote_ifindex);
> > +   bool use_ipv6 = false;
> >  
> > if (dst->remote_ip.sa.sa_family == AF_INET6)
> > -   max_mtu -= VXLAN6_HEADROOM;
> > -   else
> > -   max_mtu -= VXLAN_HEADROOM;
> > -
> > -   if (new_mtu < 68)
> > -   return -EINVAL;
> > +   use_ipv6 = true;
> >  
> > -   if (new_mtu > max_mtu) {
> > -   if (strict)
> > +   /* We re-check this, because users *could* alter the mtu of the
> > +* lower device after we've initialized dev->max_mtu.
> > +*/
> > +   if (lowerdev) {
> > +   dev->max_mtu = lowerdev->mtu -
> > +  (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM);
> > +   if (new_mtu > dev->max_mtu)
> > return -EINVAL;
> > -
> > -   new_mtu = max_mtu;
> > }
> >  
> > dev->mtu = new_mtu;
> > return 0;
> >  }
> 
> Sorry for the silly question, how does the min_mtu and max_mtu stuff
> works? I noticed your patches but haven't looked in depth into them.
> 
> When the ndo_change_mtu callback is defined, is the dev->min_mtu and
> dev->max_mtu checked first and if the desired mtu is not within range,
> ndo_change_mtu is not called?
> 
> Or does ndo_change_mtu override the checks?

The former. If the new value is outside min/max, ndo_change_mtu doesn't
get called, which is exactly the chicken and egg problem I introduced by
setting max_mtu to 1500 in ether_setup before having all drivers that call
ether_setup set a more appropriate max_mtu first. :\

> In either case, the code does not look correct. In the first case,
> increasing of lowerdev MTU wouldn't allow increasing of vxlan MTU
> without deleting and recreating the vxlan interface. In the second
> case, you're missing check against the min_mtu.

Okay, this sounds like a similar case to bridge that Sabrina pointed out.
Looks like virtual devices will need to just set no max_mtu directly (or
IP_MAX_MTU), and do dynamic checks in their ndo_change_mtu if they need to
compare against underlying devices on the fly.

...
> > @@ -2847,9 +2842,14 @@ static int vxlan_dev_configure(struct net *src_net, 
> > struct net_device *dev,
> > }
> >  
> > if (conf->mtu) {
> > -   err = __vxlan_change_mtu(dev, lowerdev, dst, conf->mtu, false);
> > -   if (err)
> > -   return err;
> > +   if (lowerdev)
> > +   dev->max_mtu = lowerdev->mtu;
> > +   dev->max_mtu -= (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM);
> > +
> > +   dev->mtu = conf->mtu;
> > +
> > +   if (conf->mtu > dev->max_mtu)
> > +   dev->mtu = dev->max_mtu;
> > }
> 
> You removed the check for min_mtu but it's needed here. The conf->mtu
> value comes from the user space and can be anything.

Hm. Not sure why I did that... Will put it back now...

-- 
Jarod Wilson
ja...@redhat.com



Re: [PATCH net-next 4/6] net: use core MTU range checking in core net infra

2016-10-19 Thread Jarod Wilson
On Wed, Oct 19, 2016 at 03:55:29PM +0200, Sabrina Dubroca wrote:
> 2016-10-18, 22:33:31 -0400, Jarod Wilson wrote:
> > geneve:
> > - Merge __geneve_change_mtu back into geneve_change_mtu, set max_mtu
> > - This one isn't quite as straight-forward as others, could use some
> >   closer inspection and testing
> > 
> > macvlan:
> > - set min/max_mtu
> > 
> > tun:
> > - set min/max_mtu, remove tun_net_change_mtu
> > 
> > vxlan:
> > - Merge __vxlan_change_mtu back into vxlan_change_mtu, set min/max_mtu
> > - This one is also not as straight-forward and could use closer inspection
> >   and testing from vxlan folks
> > 
> > bridge:
> > - set max_mtu via br_min_mtu()
> > 
> > openvswitch:
> > - set min/max_mtu, remove internal_dev_change_mtu
> > - note: max_mtu wasn't checked previously, it's been set to 65535, which
> >   is the largest possible size supported
> > 
> > sch_teql:
> > - set min/max_mtu (note: max_mtu previously unchecked, used max of 65535)
> 
> Nothing for other virtual netdevices? (dummy, veth, bond, etc) Their
> MTU is limited to 1500 now.  Also missing macsec and ip_gre, probably
> others that are using ether_setup.

Yeah, I've clearly missed more than I thought. Doing another sweep now.
I'm thinking more and more that we ought to back out the patch that sets
min/max in ether_setup, save it for last, after we're sure everyone that
calls it has been prepared.

> [...]
> > diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
> > index 89a687f..81fc79a 100644
> > --- a/net/bridge/br_device.c
> > +++ b/net/bridge/br_device.c
> > @@ -184,17 +184,15 @@ static struct rtnl_link_stats64 
> > *br_get_stats64(struct net_device *dev,
> >  
> >  static int br_change_mtu(struct net_device *dev, int new_mtu)
> >  {
> > +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
> > struct net_bridge *br = netdev_priv(dev);
> > -   if (new_mtu < 68 || new_mtu > br_min_mtu(br))
> > -   return -EINVAL;
> > -
> > -   dev->mtu = new_mtu;
> >  
> > -#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
> > /* remember the MTU in the rtable for PMTU */
> > dst_metric_set(>fake_rtable.dst, RTAX_MTU, new_mtu);
> >  #endif
> >  
> > +   dev->mtu = new_mtu;
> > +
> > return 0;
> >  }
> >  
> > @@ -390,6 +388,7 @@ void br_dev_setup(struct net_device *dev)
> > dev->hw_features = COMMON_FEATURES | NETIF_F_HW_VLAN_CTAG_TX |
> >NETIF_F_HW_VLAN_STAG_TX;
> > dev->vlan_features = COMMON_FEATURES;
> > +   dev->max_mtu = br_min_mtu(br);
> 
> br_min_mtu uses br->port_list, which is only initialized a few lines
> later (right after the spin_lock_init() at the end of the context of
> this diff).

Ah, okay, I'd just grouped it with the other dev->foo settings.

> Besides, I don't think this works: br_min_mtu(br) changes when you add
> and remove ports, or when you change the MTU of an enslaved
> device. But this makes the max MTU for the bridge fixed (to 1500).

Okay, how about this: set no max_mtu (or set it to IP_MAX_MTU/65535), and
then retain a check against the possibly ever-changing br_min_mtu(br) in
br_change_mtu()?

> > diff --git a/net/openvswitch/vport-internal_dev.c 
> > b/net/openvswitch/vport-internal_dev.c
> > index e7da290..d5d6cae 100644
> > --- a/net/openvswitch/vport-internal_dev.c
> > +++ b/net/openvswitch/vport-internal_dev.c
> > @@ -89,15 +89,6 @@ static const struct ethtool_ops internal_dev_ethtool_ops 
> > = {
> > .get_link   = ethtool_op_get_link,
> >  };
> >  
> > -static int internal_dev_change_mtu(struct net_device *netdev, int new_mtu)
> > -{
> > -   if (new_mtu < 68)
> > -   return -EINVAL;
> > -
> > -   netdev->mtu = new_mtu;
> > -   return 0;
> > -}
> > -
> >  static void internal_dev_destructor(struct net_device *dev)
> >  {
> > struct vport *vport = ovs_internal_dev_get_vport(dev);
> > @@ -148,7 +139,6 @@ static const struct net_device_ops 
> > internal_dev_netdev_ops = {
> > .ndo_stop = internal_dev_stop,
> > .ndo_start_xmit = internal_dev_xmit,
> > .ndo_set_mac_address = eth_mac_addr,
> > -   .ndo_change_mtu = internal_dev_change_mtu,
> > .ndo_get_stats64 = internal_get_stats,
> > .ndo_set_rx_headroom = internal_set_rx_headroom,
> >  };
> 
> vport-internal uses ether_setup, so the MTU is currently limited to
> 1500, no?

Yeah. Sweep ongoing...

-- 
Jarod Wilson
ja...@redhat.com



Re: [PATCH net-next 4/6] net: use core MTU range checking in core net infra

2016-10-19 Thread Jarod Wilson
On Wed, Oct 19, 2016 at 03:55:29PM +0200, Sabrina Dubroca wrote:
> 2016-10-18, 22:33:31 -0400, Jarod Wilson wrote:
> > geneve:
> > - Merge __geneve_change_mtu back into geneve_change_mtu, set max_mtu
> > - This one isn't quite as straight-forward as others, could use some
> >   closer inspection and testing
> > 
> > macvlan:
> > - set min/max_mtu
> > 
> > tun:
> > - set min/max_mtu, remove tun_net_change_mtu
> > 
> > vxlan:
> > - Merge __vxlan_change_mtu back into vxlan_change_mtu, set min/max_mtu
> > - This one is also not as straight-forward and could use closer inspection
> >   and testing from vxlan folks
> > 
> > bridge:
> > - set max_mtu via br_min_mtu()
> > 
> > openvswitch:
> > - set min/max_mtu, remove internal_dev_change_mtu
> > - note: max_mtu wasn't checked previously, it's been set to 65535, which
> >   is the largest possible size supported
> > 
> > sch_teql:
> > - set min/max_mtu (note: max_mtu previously unchecked, used max of 65535)
> 
> Nothing for other virtual netdevices? (dummy, veth, bond, etc) Their
> MTU is limited to 1500 now.  Also missing macsec and ip_gre, probably
> others that are using ether_setup.

Yeah, I've clearly missed more than I thought. Doing another sweep now.
I'm thinking more and more that we ought to back out the patch that sets
min/max in ether_setup, save it for last, after we're sure everyone that
calls it has been prepared.

> [...]
> > diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
> > index 89a687f..81fc79a 100644
> > --- a/net/bridge/br_device.c
> > +++ b/net/bridge/br_device.c
> > @@ -184,17 +184,15 @@ static struct rtnl_link_stats64 
> > *br_get_stats64(struct net_device *dev,
> >  
> >  static int br_change_mtu(struct net_device *dev, int new_mtu)
> >  {
> > +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
> > struct net_bridge *br = netdev_priv(dev);
> > -   if (new_mtu < 68 || new_mtu > br_min_mtu(br))
> > -   return -EINVAL;
> > -
> > -   dev->mtu = new_mtu;
> >  
> > -#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
> > /* remember the MTU in the rtable for PMTU */
> > dst_metric_set(>fake_rtable.dst, RTAX_MTU, new_mtu);
> >  #endif
> >  
> > +   dev->mtu = new_mtu;
> > +
> > return 0;
> >  }
> >  
> > @@ -390,6 +388,7 @@ void br_dev_setup(struct net_device *dev)
> > dev->hw_features = COMMON_FEATURES | NETIF_F_HW_VLAN_CTAG_TX |
> >NETIF_F_HW_VLAN_STAG_TX;
> > dev->vlan_features = COMMON_FEATURES;
> > +   dev->max_mtu = br_min_mtu(br);
> 
> br_min_mtu uses br->port_list, which is only initialized a few lines
> later (right after the spin_lock_init() at the end of the context of
> this diff).

Ah, okay, I'd just grouped it with the other dev->foo settings.

> Besides, I don't think this works: br_min_mtu(br) changes when you add
> and remove ports, or when you change the MTU of an enslaved
> device. But this makes the max MTU for the bridge fixed (to 1500).

Okay, how about this: set no max_mtu (or set it to IP_MAX_MTU/65535), and
then retain a check against the possibly ever-changing br_min_mtu(br) in
br_change_mtu()?

> > diff --git a/net/openvswitch/vport-internal_dev.c 
> > b/net/openvswitch/vport-internal_dev.c
> > index e7da290..d5d6cae 100644
> > --- a/net/openvswitch/vport-internal_dev.c
> > +++ b/net/openvswitch/vport-internal_dev.c
> > @@ -89,15 +89,6 @@ static const struct ethtool_ops internal_dev_ethtool_ops 
> > = {
> > .get_link   = ethtool_op_get_link,
> >  };
> >  
> > -static int internal_dev_change_mtu(struct net_device *netdev, int new_mtu)
> > -{
> > -   if (new_mtu < 68)
> > -   return -EINVAL;
> > -
> > -   netdev->mtu = new_mtu;
> > -   return 0;
> > -}
> > -
> >  static void internal_dev_destructor(struct net_device *dev)
> >  {
> > struct vport *vport = ovs_internal_dev_get_vport(dev);
> > @@ -148,7 +139,6 @@ static const struct net_device_ops 
> > internal_dev_netdev_ops = {
> > .ndo_stop = internal_dev_stop,
> > .ndo_start_xmit = internal_dev_xmit,
> > .ndo_set_mac_address = eth_mac_addr,
> > -   .ndo_change_mtu = internal_dev_change_mtu,
> > .ndo_get_stats64 = internal_get_stats,
> > .ndo_set_rx_headroom = internal_set_rx_headroom,
> >  };
> 
> vport-internal uses ether_setup, so the MTU is currently limited to
> 1500, no?

Yeah. Sweep ongoing...

-- 
Jarod Wilson
ja...@redhat.com



Re: [PATCH net-next 4/6] net: use core MTU range checking in core net infra

2016-10-19 Thread Sabrina Dubroca
2016-10-18, 22:33:31 -0400, Jarod Wilson wrote:
> geneve:
> - Merge __geneve_change_mtu back into geneve_change_mtu, set max_mtu
> - This one isn't quite as straight-forward as others, could use some
>   closer inspection and testing
> 
> macvlan:
> - set min/max_mtu
> 
> tun:
> - set min/max_mtu, remove tun_net_change_mtu
> 
> vxlan:
> - Merge __vxlan_change_mtu back into vxlan_change_mtu, set min/max_mtu
> - This one is also not as straight-forward and could use closer inspection
>   and testing from vxlan folks
> 
> bridge:
> - set max_mtu via br_min_mtu()
> 
> openvswitch:
> - set min/max_mtu, remove internal_dev_change_mtu
> - note: max_mtu wasn't checked previously, it's been set to 65535, which
>   is the largest possible size supported
> 
> sch_teql:
> - set min/max_mtu (note: max_mtu previously unchecked, used max of 65535)

Nothing for other virtual netdevices? (dummy, veth, bond, etc) Their
MTU is limited to 1500 now.  Also missing macsec and ip_gre, probably
others that are using ether_setup.


[...]
> diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
> index 89a687f..81fc79a 100644
> --- a/net/bridge/br_device.c
> +++ b/net/bridge/br_device.c
> @@ -184,17 +184,15 @@ static struct rtnl_link_stats64 *br_get_stats64(struct 
> net_device *dev,
>  
>  static int br_change_mtu(struct net_device *dev, int new_mtu)
>  {
> +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
>   struct net_bridge *br = netdev_priv(dev);
> - if (new_mtu < 68 || new_mtu > br_min_mtu(br))
> - return -EINVAL;
> -
> - dev->mtu = new_mtu;
>  
> -#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
>   /* remember the MTU in the rtable for PMTU */
>   dst_metric_set(>fake_rtable.dst, RTAX_MTU, new_mtu);
>  #endif
>  
> + dev->mtu = new_mtu;
> +
>   return 0;
>  }
>  
> @@ -390,6 +388,7 @@ void br_dev_setup(struct net_device *dev)
>   dev->hw_features = COMMON_FEATURES | NETIF_F_HW_VLAN_CTAG_TX |
>  NETIF_F_HW_VLAN_STAG_TX;
>   dev->vlan_features = COMMON_FEATURES;
> + dev->max_mtu = br_min_mtu(br);

br_min_mtu uses br->port_list, which is only initialized a few lines
later (right after the spin_lock_init() at the end of the context of
this diff).

Besides, I don't think this works: br_min_mtu(br) changes when you add
and remove ports, or when you change the MTU of an enslaved
device. But this makes the max MTU for the bridge fixed (to 1500).

>  
>   br->dev = dev;
>   spin_lock_init(>lock);

> diff --git a/net/openvswitch/vport-internal_dev.c 
> b/net/openvswitch/vport-internal_dev.c
> index e7da290..d5d6cae 100644
> --- a/net/openvswitch/vport-internal_dev.c
> +++ b/net/openvswitch/vport-internal_dev.c
> @@ -89,15 +89,6 @@ static const struct ethtool_ops internal_dev_ethtool_ops = 
> {
>   .get_link   = ethtool_op_get_link,
>  };
>  
> -static int internal_dev_change_mtu(struct net_device *netdev, int new_mtu)
> -{
> - if (new_mtu < 68)
> - return -EINVAL;
> -
> - netdev->mtu = new_mtu;
> - return 0;
> -}
> -
>  static void internal_dev_destructor(struct net_device *dev)
>  {
>   struct vport *vport = ovs_internal_dev_get_vport(dev);
> @@ -148,7 +139,6 @@ static const struct net_device_ops 
> internal_dev_netdev_ops = {
>   .ndo_stop = internal_dev_stop,
>   .ndo_start_xmit = internal_dev_xmit,
>   .ndo_set_mac_address = eth_mac_addr,
> - .ndo_change_mtu = internal_dev_change_mtu,
>   .ndo_get_stats64 = internal_get_stats,
>   .ndo_set_rx_headroom = internal_set_rx_headroom,
>  };

vport-internal uses ether_setup, so the MTU is currently limited to
1500, no?


-- 
Sabrina


Re: [PATCH net-next 4/6] net: use core MTU range checking in core net infra

2016-10-19 Thread Sabrina Dubroca
2016-10-18, 22:33:31 -0400, Jarod Wilson wrote:
> geneve:
> - Merge __geneve_change_mtu back into geneve_change_mtu, set max_mtu
> - This one isn't quite as straight-forward as others, could use some
>   closer inspection and testing
> 
> macvlan:
> - set min/max_mtu
> 
> tun:
> - set min/max_mtu, remove tun_net_change_mtu
> 
> vxlan:
> - Merge __vxlan_change_mtu back into vxlan_change_mtu, set min/max_mtu
> - This one is also not as straight-forward and could use closer inspection
>   and testing from vxlan folks
> 
> bridge:
> - set max_mtu via br_min_mtu()
> 
> openvswitch:
> - set min/max_mtu, remove internal_dev_change_mtu
> - note: max_mtu wasn't checked previously, it's been set to 65535, which
>   is the largest possible size supported
> 
> sch_teql:
> - set min/max_mtu (note: max_mtu previously unchecked, used max of 65535)

Nothing for other virtual netdevices? (dummy, veth, bond, etc) Their
MTU is limited to 1500 now.  Also missing macsec and ip_gre, probably
others that are using ether_setup.


[...]
> diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
> index 89a687f..81fc79a 100644
> --- a/net/bridge/br_device.c
> +++ b/net/bridge/br_device.c
> @@ -184,17 +184,15 @@ static struct rtnl_link_stats64 *br_get_stats64(struct 
> net_device *dev,
>  
>  static int br_change_mtu(struct net_device *dev, int new_mtu)
>  {
> +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
>   struct net_bridge *br = netdev_priv(dev);
> - if (new_mtu < 68 || new_mtu > br_min_mtu(br))
> - return -EINVAL;
> -
> - dev->mtu = new_mtu;
>  
> -#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
>   /* remember the MTU in the rtable for PMTU */
>   dst_metric_set(>fake_rtable.dst, RTAX_MTU, new_mtu);
>  #endif
>  
> + dev->mtu = new_mtu;
> +
>   return 0;
>  }
>  
> @@ -390,6 +388,7 @@ void br_dev_setup(struct net_device *dev)
>   dev->hw_features = COMMON_FEATURES | NETIF_F_HW_VLAN_CTAG_TX |
>  NETIF_F_HW_VLAN_STAG_TX;
>   dev->vlan_features = COMMON_FEATURES;
> + dev->max_mtu = br_min_mtu(br);

br_min_mtu uses br->port_list, which is only initialized a few lines
later (right after the spin_lock_init() at the end of the context of
this diff).

Besides, I don't think this works: br_min_mtu(br) changes when you add
and remove ports, or when you change the MTU of an enslaved
device. But this makes the max MTU for the bridge fixed (to 1500).

>  
>   br->dev = dev;
>   spin_lock_init(>lock);

> diff --git a/net/openvswitch/vport-internal_dev.c 
> b/net/openvswitch/vport-internal_dev.c
> index e7da290..d5d6cae 100644
> --- a/net/openvswitch/vport-internal_dev.c
> +++ b/net/openvswitch/vport-internal_dev.c
> @@ -89,15 +89,6 @@ static const struct ethtool_ops internal_dev_ethtool_ops = 
> {
>   .get_link   = ethtool_op_get_link,
>  };
>  
> -static int internal_dev_change_mtu(struct net_device *netdev, int new_mtu)
> -{
> - if (new_mtu < 68)
> - return -EINVAL;
> -
> - netdev->mtu = new_mtu;
> - return 0;
> -}
> -
>  static void internal_dev_destructor(struct net_device *dev)
>  {
>   struct vport *vport = ovs_internal_dev_get_vport(dev);
> @@ -148,7 +139,6 @@ static const struct net_device_ops 
> internal_dev_netdev_ops = {
>   .ndo_stop = internal_dev_stop,
>   .ndo_start_xmit = internal_dev_xmit,
>   .ndo_set_mac_address = eth_mac_addr,
> - .ndo_change_mtu = internal_dev_change_mtu,
>   .ndo_get_stats64 = internal_get_stats,
>   .ndo_set_rx_headroom = internal_set_rx_headroom,
>  };

vport-internal uses ether_setup, so the MTU is currently limited to
1500, no?


-- 
Sabrina


[PATCH net-next 4/6] net: use core MTU range checking in core net infra

2016-10-18 Thread Jarod Wilson
geneve:
- Merge __geneve_change_mtu back into geneve_change_mtu, set max_mtu
- This one isn't quite as straight-forward as others, could use some
  closer inspection and testing

macvlan:
- set min/max_mtu

tun:
- set min/max_mtu, remove tun_net_change_mtu

vxlan:
- Merge __vxlan_change_mtu back into vxlan_change_mtu, set min/max_mtu
- This one is also not as straight-forward and could use closer inspection
  and testing from vxlan folks

bridge:
- set max_mtu via br_min_mtu()

openvswitch:
- set min/max_mtu, remove internal_dev_change_mtu
- note: max_mtu wasn't checked previously, it's been set to 65535, which
  is the largest possible size supported

sch_teql:
- set min/max_mtu (note: max_mtu previously unchecked, used max of 65535)

CC: net...@vger.kernel.org
CC: Nicolas Dichtel 
CC: Hannes Frederic Sowa 
CC: Tom Herbert 
CC: Daniel Borkmann 
CC: Alexander Duyck 
CC: Paolo Abeni 
CC: Jiri Benc 
CC: WANG Cong 
CC: Roopa Prabhu 
CC: Pravin B Shelar 
CC: Sabrina Dubroca 
CC: Patrick McHardy 
CC: Stephen Hemminger 
CC: Pravin Shelar 
Signed-off-by: Jarod Wilson 
---
 drivers/net/geneve.c | 48 +++-
 drivers/net/macvlan.c|  6 +++-
 drivers/net/tun.c| 20 
 drivers/net/vxlan.c  | 62 ++--
 net/bridge/br_device.c   |  9 +++---
 net/openvswitch/vport-internal_dev.c | 10 --
 net/sched/sch_teql.c |  5 ++-
 7 files changed, 67 insertions(+), 93 deletions(-)

diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 3c20e87..752bcaa 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -1034,39 +1034,18 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, 
struct net_device *dev)
return geneve_xmit_skb(skb, dev, info);
 }
 
-static int __geneve_change_mtu(struct net_device *dev, int new_mtu, bool 
strict)
+static int geneve_change_mtu(struct net_device *dev, int new_mtu)
 {
-   struct geneve_dev *geneve = netdev_priv(dev);
-   /* The max_mtu calculation does not take account of GENEVE
-* options, to avoid excluding potentially valid
-* configurations.
+   /* Only possible if called internally, ndo_change_mtu path's new_mtu
+* is guaranteed to be between dev->min_mtu and dev->max_mtu.
 */
-   int max_mtu = IP_MAX_MTU - GENEVE_BASE_HLEN - dev->hard_header_len;
-
-   if (geneve->remote.sa.sa_family == AF_INET6)
-   max_mtu -= sizeof(struct ipv6hdr);
-   else
-   max_mtu -= sizeof(struct iphdr);
-
-   if (new_mtu < 68)
-   return -EINVAL;
-
-   if (new_mtu > max_mtu) {
-   if (strict)
-   return -EINVAL;
-
-   new_mtu = max_mtu;
-   }
+   if (new_mtu > dev->max_mtu)
+   new_mtu = dev->max_mtu;
 
dev->mtu = new_mtu;
return 0;
 }
 
-static int geneve_change_mtu(struct net_device *dev, int new_mtu)
-{
-   return __geneve_change_mtu(dev, new_mtu, true);
-}
-
 static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff 
*skb)
 {
struct ip_tunnel_info *info = skb_tunnel_info(skb);
@@ -1170,6 +1149,14 @@ static void geneve_setup(struct net_device *dev)
dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
dev->hw_features |= NETIF_F_GSO_SOFTWARE;
 
+   /* MTU range: 68 - (something less than 65535) */
+   dev->min_mtu = ETH_MIN_MTU;
+   /* The max_mtu calculation does not take account of GENEVE
+* options, to avoid excluding potentially valid
+* configurations. This will be further reduced by IPvX hdr size.
+*/
+   dev->max_mtu = IP_MAX_MTU - GENEVE_BASE_HLEN - dev->hard_header_len;
+
netif_keep_dst(dev);
dev->priv_flags &= ~IFF_TX_SKB_SHARING;
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE;
@@ -1285,10 +1272,13 @@ static int geneve_configure(struct net *net, struct 
net_device *dev,
 
/* make enough headroom for basic scenario */
encap_len = GENEVE_BASE_HLEN + ETH_HLEN;
-   if (remote->sa.sa_family == AF_INET)
+   if (remote->sa.sa_family == AF_INET) {
encap_len += sizeof(struct iphdr);
-   else
+   dev->max_mtu -= sizeof(struct iphdr);
+   } else {
encap_len += sizeof(struct ipv6hdr);
+   dev->max_mtu -= sizeof(struct ipv6hdr);
+   }
dev->needed_headroom = encap_len + ETH_HLEN;
 
if (metadata) {
@@ -1488,7 +1478,7 @@ struct net_device *geneve_dev_create_fb(struct net *net, 
const char *name,

[PATCH net-next 4/6] net: use core MTU range checking in core net infra

2016-10-18 Thread Jarod Wilson
geneve:
- Merge __geneve_change_mtu back into geneve_change_mtu, set max_mtu
- This one isn't quite as straight-forward as others, could use some
  closer inspection and testing

macvlan:
- set min/max_mtu

tun:
- set min/max_mtu, remove tun_net_change_mtu

vxlan:
- Merge __vxlan_change_mtu back into vxlan_change_mtu, set min/max_mtu
- This one is also not as straight-forward and could use closer inspection
  and testing from vxlan folks

bridge:
- set max_mtu via br_min_mtu()

openvswitch:
- set min/max_mtu, remove internal_dev_change_mtu
- note: max_mtu wasn't checked previously, it's been set to 65535, which
  is the largest possible size supported

sch_teql:
- set min/max_mtu (note: max_mtu previously unchecked, used max of 65535)

CC: net...@vger.kernel.org
CC: Nicolas Dichtel 
CC: Hannes Frederic Sowa 
CC: Tom Herbert 
CC: Daniel Borkmann 
CC: Alexander Duyck 
CC: Paolo Abeni 
CC: Jiri Benc 
CC: WANG Cong 
CC: Roopa Prabhu 
CC: Pravin B Shelar 
CC: Sabrina Dubroca 
CC: Patrick McHardy 
CC: Stephen Hemminger 
CC: Pravin Shelar 
Signed-off-by: Jarod Wilson 
---
 drivers/net/geneve.c | 48 +++-
 drivers/net/macvlan.c|  6 +++-
 drivers/net/tun.c| 20 
 drivers/net/vxlan.c  | 62 ++--
 net/bridge/br_device.c   |  9 +++---
 net/openvswitch/vport-internal_dev.c | 10 --
 net/sched/sch_teql.c |  5 ++-
 7 files changed, 67 insertions(+), 93 deletions(-)

diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 3c20e87..752bcaa 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -1034,39 +1034,18 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, 
struct net_device *dev)
return geneve_xmit_skb(skb, dev, info);
 }
 
-static int __geneve_change_mtu(struct net_device *dev, int new_mtu, bool 
strict)
+static int geneve_change_mtu(struct net_device *dev, int new_mtu)
 {
-   struct geneve_dev *geneve = netdev_priv(dev);
-   /* The max_mtu calculation does not take account of GENEVE
-* options, to avoid excluding potentially valid
-* configurations.
+   /* Only possible if called internally, ndo_change_mtu path's new_mtu
+* is guaranteed to be between dev->min_mtu and dev->max_mtu.
 */
-   int max_mtu = IP_MAX_MTU - GENEVE_BASE_HLEN - dev->hard_header_len;
-
-   if (geneve->remote.sa.sa_family == AF_INET6)
-   max_mtu -= sizeof(struct ipv6hdr);
-   else
-   max_mtu -= sizeof(struct iphdr);
-
-   if (new_mtu < 68)
-   return -EINVAL;
-
-   if (new_mtu > max_mtu) {
-   if (strict)
-   return -EINVAL;
-
-   new_mtu = max_mtu;
-   }
+   if (new_mtu > dev->max_mtu)
+   new_mtu = dev->max_mtu;
 
dev->mtu = new_mtu;
return 0;
 }
 
-static int geneve_change_mtu(struct net_device *dev, int new_mtu)
-{
-   return __geneve_change_mtu(dev, new_mtu, true);
-}
-
 static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff 
*skb)
 {
struct ip_tunnel_info *info = skb_tunnel_info(skb);
@@ -1170,6 +1149,14 @@ static void geneve_setup(struct net_device *dev)
dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
dev->hw_features |= NETIF_F_GSO_SOFTWARE;
 
+   /* MTU range: 68 - (something less than 65535) */
+   dev->min_mtu = ETH_MIN_MTU;
+   /* The max_mtu calculation does not take account of GENEVE
+* options, to avoid excluding potentially valid
+* configurations. This will be further reduced by IPvX hdr size.
+*/
+   dev->max_mtu = IP_MAX_MTU - GENEVE_BASE_HLEN - dev->hard_header_len;
+
netif_keep_dst(dev);
dev->priv_flags &= ~IFF_TX_SKB_SHARING;
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE;
@@ -1285,10 +1272,13 @@ static int geneve_configure(struct net *net, struct 
net_device *dev,
 
/* make enough headroom for basic scenario */
encap_len = GENEVE_BASE_HLEN + ETH_HLEN;
-   if (remote->sa.sa_family == AF_INET)
+   if (remote->sa.sa_family == AF_INET) {
encap_len += sizeof(struct iphdr);
-   else
+   dev->max_mtu -= sizeof(struct iphdr);
+   } else {
encap_len += sizeof(struct ipv6hdr);
+   dev->max_mtu -= sizeof(struct ipv6hdr);
+   }
dev->needed_headroom = encap_len + ETH_HLEN;
 
if (metadata) {
@@ -1488,7 +1478,7 @@ struct net_device *geneve_dev_create_fb(struct net *net, 
const char *name,
/* openvswitch users expect packet sizes to be unrestricted,
 * so set the largest MTU we can.
 */
-   err = __geneve_change_mtu(dev, IP_MAX_MTU, false);
+   err = geneve_change_mtu(dev, IP_MAX_MTU);
if (err)
goto err;
 
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index