Re: [PATCH net-next v2] bridge: fix hello and hold timers starting/stopping

2017-05-19 Thread Hangbin Liu
On Fri, May 19, 2017 at 07:30:43PM +0200, Ivan Vecera wrote:
> Current bridge code incorrectly handles starting/stopping of hello and
> hold timers during STP enable/disable.
> 
> 1. Timers are stopped in br_stp_start() during NO_STP->USER_STP
>transition. The timers are already stopped in NO_STP state so
>this is confusing no-op.

Hi Ivan,

Shouldn't we start hello timer in br_stp_start when NO_STP -> BR_KERNEL_STP ?
> 
> 2. During USER_STP->NO_STP transition the timers are started. This
>does not make sense and is confusion because the timer should not be
>active in NO_STP state.

Yes, but what about BR_KERNEL_STP -> NO_STP in function br_stp_stop() ?
> 
> Cc: da...@davemloft.net
> Cc: sas...@cumulusnetworks.com
> Cc: step...@networkplumber.org
> Cc: bri...@lists.linux-foundation.org
> Cc: lucien@gmail.com
> Cc: niko...@cumulusnetworks.com
> Signed-off-by: Ivan Vecera 
> ---
>  net/bridge/br_stp_if.c | 11 ---
>  1 file changed, 11 deletions(-)
> 
> diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
> index 08341d2aa9c9..a05027027513 100644
> --- a/net/bridge/br_stp_if.c
> +++ b/net/bridge/br_stp_if.c
> @@ -150,7 +150,6 @@ static int br_stp_call_user(struct net_bridge *br, char 
> *arg)
>  
>  static void br_stp_start(struct net_bridge *br)
>  {
> - struct net_bridge_port *p;
>   int err = -ENOENT;
>  
>   if (net_eq(dev_net(br->dev), _net))
> @@ -169,11 +168,6 @@ static void br_stp_start(struct net_bridge *br)
>   if (!err) {
>   br->stp_enabled = BR_USER_STP;
>   br_debug(br, "userspace STP started\n");
> -
> - /* Stop hello and hold timers */
> - del_timer(>hello_timer);
> - list_for_each_entry(p, >port_list, list)
> - del_timer(>hold_timer);

I'm not sure if user space daemon will send bpdu or not? In comment
76b91c32dd86 ("bridge: stp: when using userspace stp stop kernel hello and
hold timers"). Nikolay said we should not handle it with BR_USER_STP.

>   } else {
>   br->stp_enabled = BR_KERNEL_STP;
>   br_debug(br, "using kernel STP\n");
> @@ -187,7 +181,6 @@ static void br_stp_start(struct net_bridge *br)
>  
>  static void br_stp_stop(struct net_bridge *br)
>  {
> - struct net_bridge_port *p;
>   int err;
>  
>   if (br->stp_enabled == BR_USER_STP) {
> @@ -196,10 +189,6 @@ static void br_stp_stop(struct net_bridge *br)
>   br_err(br, "failed to stop userspace STP (%d)\n", err);
>  
>   /* To start timers on any ports left in blocking */
> - mod_timer(>hello_timer, jiffies + br->hello_time);
> - list_for_each_entry(p, >port_list, list)
> - mod_timer(>hold_timer,
> -   round_jiffies(jiffies + BR_HOLD_TIME));

If we do not del hello_timer. after it expired in br_hello_timer_expired(),
Our state is br->dev->flags & IFF_UP and br->stp_enabled == NO_STP, it will
call mod_timer(>hello_timer, round_jiffies(jiffies + br->hello_time))
and we will keep sending bpdu message even after stp stoped.

>   spin_lock_bh(>lock);
>   br_port_state_selection(br);
>   spin_unlock_bh(>lock);
> -- 

So how about just like

diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index d8ad73b..0198f62 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -183,6 +183,7 @@ static void br_stp_start(struct net_bridge *br)
} else {
br->stp_enabled = BR_KERNEL_STP;
br_debug(br, "using kernel STP\n");
+   mod_timer(>hello_timer, jiffies + br->hello_time);

/* To start timers on any ports left in blocking */
br_port_state_selection(br);
@@ -202,7 +203,6 @@ static void br_stp_stop(struct net_bridge *br)
br_err(br, "failed to stop userspace STP (%d)\n", err);

/* To start timers on any ports left in blocking */
-   mod_timer(>hello_timer, jiffies + br->hello_time);
list_for_each_entry(p, >port_list, list)
mod_timer(>hold_timer,
  round_jiffies(jiffies + BR_HOLD_TIME));
@@ -211,6 +211,7 @@ static void br_stp_stop(struct net_bridge *br)
spin_unlock_bh(>lock);
}

+   del_timer_sync(>hello_timer);
br->stp_enabled = BR_NO_STP;
 }

Thanks
Hangbin


[PATCH v2 net-next] net: ipv6: fix code style error and warning of ndisc.c

2017-05-19 Thread yuan linyu
From: yuan linyu 

Signed-off-by: yuan linyu 
---
 net/ipv6/ndisc.c | 300 ---
 1 file changed, 155 insertions(+), 145 deletions(-)

diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index d310dc4..5a3dfaa 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -12,8 +12,7 @@
  *  2 of the License, or (at your option) any later version.
  */
 
-/*
- * Changes:
+/* Changes:
  *
  * Alexey I. Froloff   :   RFC6106 (DNSSL) support
  * Pierre Ynard:   export userland ND options
@@ -99,7 +98,6 @@ static const struct neigh_ops ndisc_hh_ops = {
.connected_output = neigh_resolve_output,
 };
 
-
 static const struct neigh_ops ndisc_direct_ops = {
.family =   AF_INET6,
.output =   neigh_direct_output,
@@ -147,13 +145,13 @@ void __ndisc_fill_addr_option(struct sk_buff *skb, int 
type, void *data,
u8 *opt = skb_put(skb, space);
 
opt[0] = type;
-   opt[1] = space>>3;
+   opt[1] = space >> 3;
 
memset(opt + 2, 0, pad);
opt   += pad;
space -= pad;
 
-   memcpy(opt+2, data, data_len);
+   memcpy(opt + 2, data, data_len);
data_len += 2;
opt += data_len;
space -= data_len;
@@ -182,6 +180,7 @@ static struct nd_opt_hdr *ndisc_next_option(struct 
nd_opt_hdr *cur,
struct nd_opt_hdr *end)
 {
int type;
+
if (!cur || !end || cur >= end)
return NULL;
type = cur->nd_opt_type;
@@ -222,6 +221,7 @@ struct ndisc_options *ndisc_parse_options(const struct 
net_device *dev,
memset(ndopts, 0, sizeof(*ndopts));
while (opt_len) {
int l;
+
if (opt_len < sizeof(struct nd_opt_hdr))
return NULL;
l = nd_opt->nd_opt_len << 3;
@@ -240,13 +240,15 @@ struct ndisc_options *ndisc_parse_options(const struct 
net_device *dev,
  "%s: duplicated ND6 option found: 
type=%d\n",
  __func__, nd_opt->nd_opt_type);
} else {
-   ndopts->nd_opt_array[nd_opt->nd_opt_type] = 
nd_opt;
+   ndopts->nd_opt_array[nd_opt->nd_opt_type] =
+   nd_opt;
}
break;
case ND_OPT_PREFIX_INFO:
ndopts->nd_opts_pi_end = nd_opt;
if (!ndopts->nd_opt_array[nd_opt->nd_opt_type])
-   ndopts->nd_opt_array[nd_opt->nd_opt_type] = 
nd_opt;
+   ndopts->nd_opt_array[nd_opt->nd_opt_type] =
+   nd_opt;
break;
 #ifdef CONFIG_IPV6_ROUTE_INFO
case ND_OPT_ROUTE_INFO:
@@ -261,8 +263,7 @@ struct ndisc_options *ndisc_parse_options(const struct 
net_device *dev,
if (!ndopts->nd_useropts)
ndopts->nd_useropts = nd_opt;
} else {
-   /*
-* Unknown options must be silently ignored,
+   /* Unknown options must be silently ignored,
 * to accommodate future extension to the
 * protocol.
 */
@@ -280,7 +281,8 @@ struct ndisc_options *ndisc_parse_options(const struct 
net_device *dev,
return ndopts;
 }
 
-int ndisc_mc_map(const struct in6_addr *addr, char *buf, struct net_device 
*dev, int dir)
+int ndisc_mc_map(const struct in6_addr *addr, char *buf,
+struct net_device *dev, int dir)
 {
switch (dev->type) {
case ARPHRD_ETHER:
@@ -327,9 +329,8 @@ static int ndisc_constructor(struct neighbour *neigh)
bool is_multicast = ipv6_addr_is_multicast(addr);
 
in6_dev = in6_dev_get(dev);
-   if (!in6_dev) {
+   if (!in6_dev)
return -EINVAL;
-   }
 
parms = in6_dev->nd_parms;
__neigh_parms_put(neigh->parms);
@@ -344,12 +345,12 @@ static int ndisc_constructor(struct neighbour *neigh)
if (is_multicast) {
neigh->nud_state = NUD_NOARP;
ndisc_mc_map(addr, neigh->ha, dev, 1);
-   } else if (dev->flags&(IFF_NOARP|IFF_LOOPBACK)) {
+   } else if (dev->flags & (IFF_NOARP | IFF_LOOPBACK)) {
neigh->nud_state = NUD_NOARP;
memcpy(neigh->ha, dev->dev_addr, dev->addr_len);
-   if (dev->flags_LOOPBACK)
+   if (dev->flags & IFF_LOOPBACK)
neigh->type = RTN_LOCAL;
-   

[net-next] net: ipv6: fix code style error and warning of ndisc.c

2017-05-19 Thread yuan linyu
From: yuan linyu 

Signed-off-by: yuan linyu 
---
 net/ipv6/ndisc.c | 300 ---
 1 file changed, 155 insertions(+), 145 deletions(-)

diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index d310dc4..5a3dfaa 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -12,8 +12,7 @@
  *  2 of the License, or (at your option) any later version.
  */
 
-/*
- * Changes:
+/* Changes:
  *
  * Alexey I. Froloff   :   RFC6106 (DNSSL) support
  * Pierre Ynard:   export userland ND options
@@ -99,7 +98,6 @@ static const struct neigh_ops ndisc_hh_ops = {
.connected_output = neigh_resolve_output,
 };
 
-
 static const struct neigh_ops ndisc_direct_ops = {
.family =   AF_INET6,
.output =   neigh_direct_output,
@@ -147,13 +145,13 @@ void __ndisc_fill_addr_option(struct sk_buff *skb, int 
type, void *data,
u8 *opt = skb_put(skb, space);
 
opt[0] = type;
-   opt[1] = space>>3;
+   opt[1] = space >> 3;
 
memset(opt + 2, 0, pad);
opt   += pad;
space -= pad;
 
-   memcpy(opt+2, data, data_len);
+   memcpy(opt + 2, data, data_len);
data_len += 2;
opt += data_len;
space -= data_len;
@@ -182,6 +180,7 @@ static struct nd_opt_hdr *ndisc_next_option(struct 
nd_opt_hdr *cur,
struct nd_opt_hdr *end)
 {
int type;
+
if (!cur || !end || cur >= end)
return NULL;
type = cur->nd_opt_type;
@@ -222,6 +221,7 @@ struct ndisc_options *ndisc_parse_options(const struct 
net_device *dev,
memset(ndopts, 0, sizeof(*ndopts));
while (opt_len) {
int l;
+
if (opt_len < sizeof(struct nd_opt_hdr))
return NULL;
l = nd_opt->nd_opt_len << 3;
@@ -240,13 +240,15 @@ struct ndisc_options *ndisc_parse_options(const struct 
net_device *dev,
  "%s: duplicated ND6 option found: 
type=%d\n",
  __func__, nd_opt->nd_opt_type);
} else {
-   ndopts->nd_opt_array[nd_opt->nd_opt_type] = 
nd_opt;
+   ndopts->nd_opt_array[nd_opt->nd_opt_type] =
+   nd_opt;
}
break;
case ND_OPT_PREFIX_INFO:
ndopts->nd_opts_pi_end = nd_opt;
if (!ndopts->nd_opt_array[nd_opt->nd_opt_type])
-   ndopts->nd_opt_array[nd_opt->nd_opt_type] = 
nd_opt;
+   ndopts->nd_opt_array[nd_opt->nd_opt_type] =
+   nd_opt;
break;
 #ifdef CONFIG_IPV6_ROUTE_INFO
case ND_OPT_ROUTE_INFO:
@@ -261,8 +263,7 @@ struct ndisc_options *ndisc_parse_options(const struct 
net_device *dev,
if (!ndopts->nd_useropts)
ndopts->nd_useropts = nd_opt;
} else {
-   /*
-* Unknown options must be silently ignored,
+   /* Unknown options must be silently ignored,
 * to accommodate future extension to the
 * protocol.
 */
@@ -280,7 +281,8 @@ struct ndisc_options *ndisc_parse_options(const struct 
net_device *dev,
return ndopts;
 }
 
-int ndisc_mc_map(const struct in6_addr *addr, char *buf, struct net_device 
*dev, int dir)
+int ndisc_mc_map(const struct in6_addr *addr, char *buf,
+struct net_device *dev, int dir)
 {
switch (dev->type) {
case ARPHRD_ETHER:
@@ -327,9 +329,8 @@ static int ndisc_constructor(struct neighbour *neigh)
bool is_multicast = ipv6_addr_is_multicast(addr);
 
in6_dev = in6_dev_get(dev);
-   if (!in6_dev) {
+   if (!in6_dev)
return -EINVAL;
-   }
 
parms = in6_dev->nd_parms;
__neigh_parms_put(neigh->parms);
@@ -344,12 +345,12 @@ static int ndisc_constructor(struct neighbour *neigh)
if (is_multicast) {
neigh->nud_state = NUD_NOARP;
ndisc_mc_map(addr, neigh->ha, dev, 1);
-   } else if (dev->flags&(IFF_NOARP|IFF_LOOPBACK)) {
+   } else if (dev->flags & (IFF_NOARP | IFF_LOOPBACK)) {
neigh->nud_state = NUD_NOARP;
memcpy(neigh->ha, dev->dev_addr, dev->addr_len);
-   if (dev->flags_LOOPBACK)
+   if (dev->flags & IFF_LOOPBACK)
neigh->type = RTN_LOCAL;
-   

Re: [RFC net-next PATCH 4/5] net: new XDP feature for reading HW rxhash from drivers

2017-05-19 Thread Jakub Kicinski
On Fri, 19 May 2017 20:34:00 -0700, Alexei Starovoitov wrote:
> On Fri, May 19, 2017 at 08:21:47PM -0700, Jakub Kicinski wrote:
> > On Fri, 19 May 2017 20:07:52 -0700, Alexei Starovoitov wrote:  
> > > How about exposing 'struct mlx5_cqe64 *' to XDP programs as-is?
> > > We can make sure that XDP program does read only access into it and
> > > it will see cqe->rss_hash_result, cqe->rss_hash_type and everything else
> > > in there, but this will not be uapi and it will be pretty obvious
> > > to program authors that their programs are vendor specific.
> > > 'not uapi' here means that mellanox is free to change their HW descriptor
> > > and its contents as they wish.  
> > 
> > Hm..  Would that mean we have to teach the verifier about all possible
> > drivers and their metadata structures (i.e. sizes thereof).  And add an
> > UAPI enum of known drivers?  
> 
> why? no uapi other than a pointer to this hw rx descriptor.
> Different sizeof(hw_rx_descriptor) is not a problem.
> We deal with it already in tracing. All tracepoints have different
> sizeof(*ctx), yet the safety is preserved.

Ack, quick read of tracing code reveals this indeed should work.


Re: [RFC net-next PATCH 4/5] net: new XDP feature for reading HW rxhash from drivers

2017-05-19 Thread Alexei Starovoitov
On Fri, May 19, 2017 at 08:21:47PM -0700, Jakub Kicinski wrote:
> On Fri, 19 May 2017 20:07:52 -0700, Alexei Starovoitov wrote:
> > How about exposing 'struct mlx5_cqe64 *' to XDP programs as-is?
> > We can make sure that XDP program does read only access into it and
> > it will see cqe->rss_hash_result, cqe->rss_hash_type and everything else
> > in there, but this will not be uapi and it will be pretty obvious
> > to program authors that their programs are vendor specific.
> > 'not uapi' here means that mellanox is free to change their HW descriptor
> > and its contents as they wish.
> 
> Hm..  Would that mean we have to teach the verifier about all possible
> drivers and their metadata structures (i.e. sizes thereof).  And add an
> UAPI enum of known drivers?

why? no uapi other than a pointer to this hw rx descriptor.
Different sizeof(hw_rx_descriptor) is not a problem.
We deal with it already in tracing. All tracepoints have different
sizeof(*ctx), yet the safety is preserved.

> Other idea I floated in early days was to standardize the fields but
> let the driver "JIT" the accesses to look at the right offset of the
> right structure.  Admittedly that would be a lot more work.

'standardize the fields' sounds nice, but failed here already.
As far as I can see the meaning of packet 'hash' is quite different
across the drivers and 'hash' is just a beginning.
I hope we can standardize on 'csum' field and make it checksum_complete,
but so far out of 10+G nics only mlx5 and nfp do it in hw.
We need it at least for mlx4, but it can only fake it via expensive math.



Re: [RFC net-next PATCH 4/5] net: new XDP feature for reading HW rxhash from drivers

2017-05-19 Thread Jakub Kicinski
On Fri, 19 May 2017 20:07:52 -0700, Alexei Starovoitov wrote:
> How about exposing 'struct mlx5_cqe64 *' to XDP programs as-is?
> We can make sure that XDP program does read only access into it and
> it will see cqe->rss_hash_result, cqe->rss_hash_type and everything else
> in there, but this will not be uapi and it will be pretty obvious
> to program authors that their programs are vendor specific.
> 'not uapi' here means that mellanox is free to change their HW descriptor
> and its contents as they wish.

Hm..  Would that mean we have to teach the verifier about all possible
drivers and their metadata structures (i.e. sizes thereof).  And add an
UAPI enum of known drivers?

Other idea I floated in early days was to standardize the fields but
let the driver "JIT" the accesses to look at the right offset of the
right structure.  Admittedly that would be a lot more work.


Re: [RFC net-next PATCH 4/5] net: new XDP feature for reading HW rxhash from drivers

2017-05-19 Thread Alexei Starovoitov
On Thu, May 18, 2017 at 05:41:48PM +0200, Jesper Dangaard Brouer wrote:
>  
> +/* XDP rxhash have an associated type, which is related to the RSS
> + * (Receive Side Scaling) standard, but NIC HW have different mapping
> + * and support. Thus, create mapping that is interesting for XDP.  XDP
> + * would primarly want insight into L3 and L4 protocol info.
> + *
> + * TODO: Likely need to get extended with "L3_IPV6_EX" due RSS standard
> + *
> + * The HASH_TYPE will be returned from bpf helper as the top 32-bit of
> + * the 64-bit rxhash (internally type stored in xdp_buff->flags).
> + */
> +#define XDP_HASH(x)  ((x) & ((1ULL << 32)-1))
> +#define XDP_HASH_TYPE(x) ((x) >> 32)
> +
> +#define XDP_HASH_TYPE_L3_SHIFT   0
> +#define XDP_HASH_TYPE_L3_BITS3
> +#define XDP_HASH_TYPE_L3_MASK((1ULL << XDP_HASH_TYPE_L3_BITS)-1)
> +#define XDP_HASH_TYPE_L3(x)  ((x) & XDP_HASH_TYPE_L3_MASK)
> +enum {
> + XDP_HASH_TYPE_L3_IPV4 = 1,
> + XDP_HASH_TYPE_L3_IPV6,
> +};
> +
> +#define XDP_HASH_TYPE_L4_SHIFT   XDP_HASH_TYPE_L3_BITS
> +#define XDP_HASH_TYPE_L4_BITS5
> +#define XDP_HASH_TYPE_L4_MASK
> \
> + (((1ULL << XDP_HASH_TYPE_L4_BITS)-1) << XDP_HASH_TYPE_L4_SHIFT)
> +#define XDP_HASH_TYPE_L4(x)  ((x) & XDP_HASH_TYPE_L4_MASK)
> +enum {
> + _XDP_HASH_TYPE_L4_TCP = 1,
> + _XDP_HASH_TYPE_L4_UDP,
> +};
> +#define XDP_HASH_TYPE_L4_TCP (_XDP_HASH_TYPE_L4_TCP << 
> XDP_HASH_TYPE_L4_SHIFT)
> +#define XDP_HASH_TYPE_L4_UDP (_XDP_HASH_TYPE_L4_UDP << 
> XDP_HASH_TYPE_L4_SHIFT)

imo this is dangerous territory.
As far as I can see this information doesn't exist in the current drivers at all
and you're enabling it in the patch 5 via fancy:
+   u32 ht = (mlx5_htype_l4_to_xdp[((cht & CQE_RSS_HTYPE_L4) >> 6)] | \
+ mlx5_htype_l3_to_xdp[((cht & CQE_RSS_HTYPE_IP) >> 2)]);

It's pretty cool that you've discovered this hidden mlx5 feature
Did you find it in some hw spec ?
And it looks useful to me, but
1. i'm worried that we'd be relying on something that mellanox didn't
 implement in their drivers before. Was it tested and guarnteed to exist
 in the future revisions of firmware? Is it cx4 or cx4-lx or cx5 feature?
2. but the main concern that it is mellanox only feature. At least I cannot
see anything like this in broadcom and intel nics

In the very beginning we discussed that XDP programs should be as generic as
possible and HW independent while at the same time we want to expose HW
specific features to XDP programs.
So I'm totally fine to expose this fancy hw hash and ipv4 vs v6 and tcp vs udp
flags to xdp programs somehow, but I'm completely against making it into uapi.

How about exposing 'struct mlx5_cqe64 *' to XDP programs as-is?
We can make sure that XDP program does read only access into it and
it will see cqe->rss_hash_result, cqe->rss_hash_type and everything else
in there, but this will not be uapi and it will be pretty obvious
to program authors that their programs are vendor specific.
'not uapi' here means that mellanox is free to change their HW descriptor
and its contents as they wish.
Also no copies and bit conversions will be necessary, so the cost will
be zero to programs that don't use it and we wouldn't need to change
verifier to discover access to this stuff.

Note that bpf programs already have access to all in-kernel data structures
on the tracing side, so this is nothing new and tracing program authors
got used to structures changing from kernel to kernel.
XDP program authors can do the same for vendor specific bits while we
keep core XDP uapi generic across all nics.



RE: [PATCH v3 net-next 1/5] dsa: add support for Microchip KSZ tail tagging

2017-05-19 Thread Woojung.Huh
>> + if (padlen) {
>> + u8 *pad = skb_put(nskb, padlen);
>> +
>> + memset(pad, 0, padlen);
>> + }
>
>Can you use skb_put_padto() here instead of open coding this?
>
>> +
>> + tag = skb_put(nskb, KSZ_INGRESS_TAG_LEN);
>> + tag[0] = 0;
>> + tag[1] = 1 << p->dp->index; /* destnation port */
>
>typo: destination port
>
>With that fixed:
>
>Reviewed-by: Florian Fainelli 
HI Florian,

Thanks for prompt reviews. Will submit another version.

- Woojung


Re: [PATCH net-next] geneve: always fill CSUM6_RX configuration

2017-05-19 Thread Pravin Shelar
On Thu, May 18, 2017 at 12:59 PM, Eric Garver  wrote:
> CSMU6_RX is relevant for collect_metadata as well. As such leave it
> outside of the dev's IPv4/IPv6 checks.
>
Can you explain it bit? is this flag used with ipv4 tunnels?

> Fixes: 9b4437a5b870 ("geneve: Unify LWT and netdev handling.")
> Signed-off-by: Eric Garver 
> ---
>  drivers/net/geneve.c | 8 
>  1 file changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
> index dec5d563ab19..f557d1dc3f9b 100644
> --- a/drivers/net/geneve.c
> +++ b/drivers/net/geneve.c
> @@ -1311,13 +1311,13 @@ static int geneve_fill_info(struct sk_buff *skb, 
> const struct net_device *dev)
> if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_TX,
>!(info->key.tun_flags & TUNNEL_CSUM)))
> goto nla_put_failure;
> -
> -   if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_RX,
> -  !geneve->use_udp6_rx_checksums))
> -   goto nla_put_failure;
>  #endif
> }
>
> +   if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_RX,
> +  !geneve->use_udp6_rx_checksums))
> +   goto nla_put_failure;
> +
> if (nla_put_u8(skb, IFLA_GENEVE_TTL, info->key.ttl) ||
> nla_put_u8(skb, IFLA_GENEVE_TOS, info->key.tos) ||
> nla_put_be32(skb, IFLA_GENEVE_LABEL, info->key.label))
> --
> 2.12.0
>


[PATCH] i40e: Fix incorrect pf->flags

2017-05-19 Thread Tushar Dave
Fix bug introduced by 'commit 47994c119a36e ("i40e: remove
hw_disabled_flags in favor of using separate flag bits")' that
mistakenly wipes out pf->flags.

Signed-off-by: Tushar Dave 
---
 drivers/net/ethernet/intel/i40e/i40e_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c 
b/drivers/net/ethernet/intel/i40e/i40e_main.c
index d5c9c9e..6b98d34 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -8821,9 +8821,9 @@ static int i40e_sw_init(struct i40e_pf *pf)
(pf->hw.aq.api_min_ver > 4))) {
/* Supported in FW API version higher than 1.4 */
pf->flags |= I40E_FLAG_GENEVE_OFFLOAD_CAPABLE;
-   pf->flags = I40E_FLAG_HW_ATR_EVICT_CAPABLE;
+   pf->flags |= I40E_FLAG_HW_ATR_EVICT_CAPABLE;
} else {
-   pf->flags = I40E_FLAG_HW_ATR_EVICT_CAPABLE;
+   pf->flags |= I40E_FLAG_HW_ATR_EVICT_CAPABLE;
}
 
pf->eeprom_version = 0xDEAD;
-- 
1.9.1



Re: [PATCH v2 1/3] bpf: Use 1<<16 as ceiling for immediate alignment in verifier.

2017-05-19 Thread Alexei Starovoitov

On 5/19/17 4:16 PM, David Miller wrote:

From: Alexei Starovoitov 
Date: Fri, 19 May 2017 14:37:56 -0700


On 5/19/17 1:41 PM, David Miller wrote:

From: Edward Cree 
Date: Fri, 19 May 2017 18:17:42 +0100


One question: is there a way to build the verifier as userland code
 (or at least as a module), or will I have to reboot every time I
 want to test a change?


There currently is no such machanism, you will have to reboot every
time.

I have considered working on making the code buildable outside of the
kernel.  It shouldn't be too hard.


it's not hard.
We did it twice and both times abandoned.
First time to have 'user space verifier' to check programs before
loading and second time for fuzzing via llvm.
Abandoned since it diverges very quickly from kernel.



Well, my idea was the create an environment in which kernel verifier.c
could be built as-is.

Maybe there would be some small compromises in verifier.c such as an
ifdef test or two, but that should be it.


that's exactly what we did the first time. Added few ifdef to verifier.c
Second time we went even further by compiling kernel/bpf/verifier.c
as-is and linking everything magically via user space hooks
all the way that test_verifier.c runs as-is but calling
bpf_check() function that was compiled for user space via clang.
That code is here:
https://github.com/iovisor/bpf-fuzzer
It's definitely possible to refresh it and make it work again.

My point that unless we put such 'lets build verifier.c for user space'
as part of tools/testing/selftests/ or something, such project is
destined to bit rot.



Re: [PATCH v3 net-next 3/5] dsa: add DSA switch driver for Microchip KSZ9477

2017-05-19 Thread Florian Fainelli
On 05/19/2017 03:57 PM, woojung@microchip.com wrote:
> From: Woojung Huh 
> 
> The KSZ9477 is a fully integrated layer 2, managed, 7 ports GigE switch
> with numerous advanced features. 5 ports incorporate 10/100/1000 Mbps PHYs.
> The other 2 ports have interfaces that can be configured as SGMII, RGMII, MII
> or RMII. Either of these may connect directly to a host processor or
> to an external PHY. The SGMII port may interface to a fiber optic transceiver.
> 
> This driver currently supports vlan, fdb, mdb & mirror dsa switch operations.
> 
> Signed-off-by: Woojung Huh 

Looks great, thanks Woojung!

Reviewed-by: Florian Fainelli 
-- 
Florian


Re: [PATCH v3 net-next 4/5] dsa: Add spi support to Microchip KSZ switches

2017-05-19 Thread Florian Fainelli
On 05/19/2017 03:57 PM, woojung@microchip.com wrote:
> From: Woojung Huh 
> 
> A sample SPI configuration for Microchip KSZ switches.
> 
> Signed-off-by: Woojung Huh 
> Reviewed-by: Andrew Lunn 

Subject should be something like:

dt-bindings: net: dsa: Add Microchip KSZ switches binding

With that fixed and the nits below:

Reviewed-by: Florian Fainelli 

> ---
>  Documentation/devicetree/bindings/net/dsa/ksz.txt | 73 
> +++
>  1 file changed, 73 insertions(+)
>  create mode 100644 Documentation/devicetree/bindings/net/dsa/ksz.txt
> 
> diff --git a/Documentation/devicetree/bindings/net/dsa/ksz.txt 
> b/Documentation/devicetree/bindings/net/dsa/ksz.txt
> new file mode 100644
> index 000..8a13966
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/net/dsa/ksz.txt
> @@ -0,0 +1,73 @@
> +Microchip KSZ Series Ethernet switches
> +==
> +
> +Required properties:
> +
> +- compatible: For external switch chips, compatible string must be exactly 
> one
> +  of: "microchip,ksz9477"
> +
> +See Documentation/devicetree/bindings/dsa/dsa.txt for a list of additional
> +required and optional properties.
> +
> +Examples:
> +
> +Ethernet switch connected via SPI to the host, CPU port wired to eth0:
> +
> + eth0: ethernet@10001000 {
> + fixed-link {
> + reg = <7>

There is a missing semicolon, and for a fixed-link, there is no "reg"
property.

> + speed = <1000>;
> + duplex-full;

Actually the correct property is named "full-duplex" (like you put it
for the switch)

> + };
> + };
> +
> + spi1: spi@f8008000 {
> + pinctrl-0 = <_spi_ksz>;
> + cs-gpios = < 25 0>;
> + id = <1>;
> + status = "okay";
> +
> + ksz9477: ksz9477@0 {
> + compatible = "microchip,ksz9477";
> + reg = <0>;
> +
> + spi-max-frequency = <4400>;
> + spi-cpha;
> + spi-cpol;
> +
> + status = "okay";
> + ports {
> + #address-cells = <1>;
> + #size-cells = <0>;
> + port@0 {
> + reg = <0>;
> + label = "lan1";
> + };
> + port@1 {
> + reg = <1>;
> + label = "lan2";
> + };
> + port@2 {
> + reg = <2>;
> + label = "lan3";
> + };
> + port@3 {
> + reg = <3>;
> + label = "lan4";
> + };
> + port@4 {
> + reg = <4>;
> + label = "lan5";
> + };
> + port@5 {
> + reg = <5>;
> + label = "cpu";
> + ethernet = <>;
> + fixed-link {
> + speed = <1000>;
> + full-duplex;
> + };
> + };
> + };
> + };
> + };
> 


-- 
Florian


Re: [PATCH v3 net-next 5/5] dsa: add maintainer of Microchip KSZ switches

2017-05-19 Thread Florian Fainelli
On 05/19/2017 03:57 PM, woojung@microchip.com wrote:
> From: Woojung Huh 
> 
> Adding maintainer of Microchip KSZ switches.
> 
> Signed-off-by: Woojung Huh 
> Reviewed-by: Andrew Lunn 

Reviewed-by: Florian Fainelli 

> ---
>  MAINTAINERS | 10 ++
>  1 file changed, 10 insertions(+)
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index f7d568b..a72b40c 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -8454,6 +8454,16 @@ F: drivers/media/platform/atmel/atmel-isc.c
>  F:   drivers/media/platform/atmel/atmel-isc-regs.h
>  F:   devicetree/bindings/media/atmel-isc.txt
>  
> +MICROCHIP KSZ SERIES ETHERNET SWITCH DRIVER
> +M:   Woojung Huh 
> +M:   Microchip Linux Driver Support 
> +L:   netdev@vger.kernel.org
> +S:   Maintained
> +F:   net/dsa/tag_ksz.c
> +F:   drivers/net/dsa/microchip/*
> +F:   include/linux/platform_data/microchip-ksz.h
> +F:   Documentation/devicetree/bindings/net/dsa/ksz.txt
> +
>  MICROCHIP USB251XB DRIVER
>  M:   Richard Leitner 
>  L:   linux-...@vger.kernel.org
> 


-- 
Florian


Re: [PATCH v3 net-next 2/5] phy: micrel: add Microchip KSZ 9477 Switch PHY support

2017-05-19 Thread Florian Fainelli
On 05/19/2017 03:57 PM, woojung@microchip.com wrote:
> From: Woojung Huh 
> 
> Adding Microchip 9477 Phy included in KSZ9477 Switch.
> 
> Signed-off-by: Woojung Huh 
> Signed-off-by: Andrew Lunn 

Reviewed-by: Florian Fainelli 
-- 
Florian


Re: [PATCH v3 net-next 1/5] dsa: add support for Microchip KSZ tail tagging

2017-05-19 Thread Florian Fainelli
On 05/19/2017 03:57 PM, woojung@microchip.com wrote:
> From: Woojung Huh 
> 
> Adding support for the Microchip KSZ switch family tail tagging.
> 
> Signed-off-by: Woojung Huh 
> Reviewed-by: Andrew Lunn 
> ---
>  include/net/dsa.h  |   1 +
>  net/dsa/Kconfig|   3 ++
>  net/dsa/Makefile   |   1 +
>  net/dsa/dsa.c  |   3 ++
>  net/dsa/dsa_priv.h |   3 ++
>  net/dsa/tag_ksz.c  | 103 
> +
>  6 files changed, 114 insertions(+)
>  create mode 100644 net/dsa/tag_ksz.c
> 
> diff --git a/include/net/dsa.h b/include/net/dsa.h
> index 791fed6..fbb00a6 100644
> --- a/include/net/dsa.h
> +++ b/include/net/dsa.h
> @@ -31,6 +31,7 @@ enum dsa_tag_protocol {
>   DSA_TAG_PROTO_BRCM,
>   DSA_TAG_PROTO_DSA,
>   DSA_TAG_PROTO_EDSA,
> + DSA_TAG_PROTO_KSZ,
>   DSA_TAG_PROTO_LAN9303,
>   DSA_TAG_PROTO_MTK,
>   DSA_TAG_PROTO_QCA,
> diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
> index 297389b..cc5f8f9 100644
> --- a/net/dsa/Kconfig
> +++ b/net/dsa/Kconfig
> @@ -25,6 +25,9 @@ config NET_DSA_TAG_DSA
>  config NET_DSA_TAG_EDSA
>   bool
>  
> +config NET_DSA_TAG_KSZ
> + bool
> +
>  config NET_DSA_TAG_LAN9303
>   bool
>  
> diff --git a/net/dsa/Makefile b/net/dsa/Makefile
> index f8c0251..b15141f 100644
> --- a/net/dsa/Makefile
> +++ b/net/dsa/Makefile
> @@ -6,6 +6,7 @@ dsa_core-y += dsa.o slave.o dsa2.o switch.o legacy.o
>  dsa_core-$(CONFIG_NET_DSA_TAG_BRCM) += tag_brcm.o
>  dsa_core-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o
>  dsa_core-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o
> +dsa_core-$(CONFIG_NET_DSA_TAG_KSZ) += tag_ksz.o
>  dsa_core-$(CONFIG_NET_DSA_TAG_LAN9303) += tag_lan9303.o
>  dsa_core-$(CONFIG_NET_DSA_TAG_MTK) += tag_mtk.o
>  dsa_core-$(CONFIG_NET_DSA_TAG_QCA) += tag_qca.o
> diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
> index 3288a80..402459e 100644
> --- a/net/dsa/dsa.c
> +++ b/net/dsa/dsa.c
> @@ -49,6 +49,9 @@ const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] = 
> {
>  #ifdef CONFIG_NET_DSA_TAG_EDSA
>   [DSA_TAG_PROTO_EDSA] = _netdev_ops,
>  #endif
> +#ifdef CONFIG_NET_DSA_TAG_KSZ
> + [DSA_TAG_PROTO_KSZ] = _netdev_ops,
> +#endif
>  #ifdef CONFIG_NET_DSA_TAG_LAN9303
>   [DSA_TAG_PROTO_LAN9303] = _netdev_ops,
>  #endif
> diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
> index c274130..6f23dfa 100644
> --- a/net/dsa/dsa_priv.h
> +++ b/net/dsa/dsa_priv.h
> @@ -85,6 +85,9 @@ extern const struct dsa_device_ops dsa_netdev_ops;
>  /* tag_edsa.c */
>  extern const struct dsa_device_ops edsa_netdev_ops;
>  
> +/* tag_ksz.c */
> +extern const struct dsa_device_ops ksz_netdev_ops;
> +
>  /* tag_lan9303.c */
>  extern const struct dsa_device_ops lan9303_netdev_ops;
>  
> diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c
> new file mode 100644
> index 000..cbc79b5
> --- /dev/null
> +++ b/net/dsa/tag_ksz.c
> @@ -0,0 +1,103 @@
> +/*
> + * net/dsa/tag_ksz.c - Microchip KSZ Switch tag format handling
> + * Copyright (c) 2017 Microchip Technology
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + */
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include "dsa_priv.h"
> +
> +/* For Ingress (Host -> KSZ), 2 bytes are added before FCS.
> + * 
> ---
> + * 
> DA(6bytes)|SA(6bytes)||Data(nbytes)|tag0(1byte)|tag1(1byte)|FCS(4bytes)
> + * 
> ---
> + * tag0 : Prioritization (not used now)
> + * tag1 : each bit represents port (eg, 0x01=port1, 0x02=port2, 0x10=port5)
> + *
> + * For Egress (KSZ -> Host), 1 byte is added before FCS.
> + * 
> ---
> + * DA(6bytes)|SA(6bytes)||Data(nbytes)|tag0(1byte)|FCS(4bytes)
> + * 
> ---
> + * tag0 : zero-based value represents port
> + * (eg, 0x00=port1, 0x02=port3, 0x06=port7)
> + */
> +
> +#define  KSZ_INGRESS_TAG_LEN 2
> +#define  KSZ_EGRESS_TAG_LEN  1
> +
> +static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev)
> +{
> + struct dsa_slave_priv *p = netdev_priv(dev);
> + struct sk_buff *nskb;
> + int padlen;
> + u8 *tag;
> +
> + padlen = (skb->len >= ETH_ZLEN) ? 0 : ETH_ZLEN - skb->len;
> +
> + if (skb_tailroom(skb) >= padlen + KSZ_INGRESS_TAG_LEN) {
> + nskb = skb;
> + } else {
> + nskb = alloc_skb(NET_IP_ALIGN + skb->len +
> +  padlen + KSZ_INGRESS_TAG_LEN, GFP_ATOMIC);
> + if (!nskb) {
> + kfree_skb(skb);
> + return 

[PATCH net v2] bonding: fix accounting of active ports in 3ad

2017-05-19 Thread Jarod Wilson
As of 7bb11dc9f59d and 0622cab0341c, bond slaves in a 3ad bond are not
removed from the aggregator when they are down, and the active slave count
is NOT equal to number of ports in the aggregator, but rather the number
of ports in the aggregator that are still enabled. The sysfs spew for
bonding_show_ad_num_ports() has a comment that says "Show number of active
802.3ad ports.", but it's currently showing total number of ports, both
active and inactive. Remedy it by using the same logic introduced in
0622cab0341c in __bond_3ad_get_active_agg_info(), so sysfs, procfs and
netlink all report the number of active ports. Note that this means that
IFLA_BOND_AD_INFO_NUM_PORTS really means NUM_ACTIVE_PORTS instead of
NUM_PORTS, and thus perhaps should be renamed for clarity.

Lightly tested on a dual i40e lacp bond, simulating link downs with an ip
link set dev  down, was able to produce the state where I could
see both in the same aggregator, but a number of ports count of 1.

MII Status: up
Active Aggregator Info:
Aggregator ID: 1
Number of ports: 2 <---
Slave Interface: ens10
MII Status: up <---
Aggregator ID: 1
Slave Interface: ens11
MII Status: up
Aggregator ID: 1

MII Status: up
Active Aggregator Info:
Aggregator ID: 1
Number of ports: 1 <---
Slave Interface: ens10
MII Status: down <---
Aggregator ID: 1
Slave Interface: ens11
MII Status: up
Aggregator ID: 1

CC: Jay Vosburgh 
CC: Veaceslav Falico 
CC: Andy Gospodarek 
CC: netdev@vger.kernel.org
Signed-off-by: Jarod Wilson 
---
v2: fix incorrect git sha reference, add more testing data

 drivers/net/bonding/bond_3ad.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index c5fd4259da33..b44a6aeb346d 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -2577,7 +2577,7 @@ int __bond_3ad_get_active_agg_info(struct bonding *bond,
return -1;
 
ad_info->aggregator_id = aggregator->aggregator_identifier;
-   ad_info->ports = aggregator->num_of_ports;
+   ad_info->ports = __agg_active_ports(aggregator);
ad_info->actor_key = aggregator->actor_oper_aggregator_key;
ad_info->partner_key = aggregator->partner_oper_aggregator_key;
ether_addr_copy(ad_info->partner_system,
-- 
2.12.1



Re: [PATCH v2 net-next 0/2] Check all RGMII phy mode variants

2017-05-19 Thread David Miller
From: Iyappan Subramanian 
Date: Thu, 18 May 2017 15:13:42 -0700

> This patch set,
>  - adds phy_interface_mode_is_rgmii() helper function
>  - addresses review comment from previous patch set, by calling
>phy_interface_mode_is_rgmii() to address all RGMII variants
> 
> Signed-off-by: Iyappan Subramanian 

Series applied, thanks.


Re: [RFC net-next PATCH 2/5] mlx5: fix bug reading rss_hash_type from CQE

2017-05-19 Thread David Miller
From: Jesper Dangaard Brouer 
Date: Thu, 18 May 2017 17:41:38 +0200

> Masks for extracting part of the Completion Queue Entry (CQE)
> field rss_hash_type was swapped, namely CQE_RSS_HTYPE_IP and
> CQE_RSS_HTYPE_L4.
> 
> The bug resulted in setting skb->l4_hash, even-though the
> rss_hash_type indicated that hash was NOT computed over the
> L4 (UDP or TCP) part of the packet.
> 
> Added comments from the datasheet, to make it more clear what
> these masks are selecting.
> 
> Signed-off-by: Jesper Dangaard Brouer 

Please pass this along to the mlx5 developers as a standalone
bug fix for 'net', thank you.


Re: [RFC net-next PATCH 3/5] net: introduce XDP driver features interface

2017-05-19 Thread David Miller
From: Daniel Borkmann 
Date: Fri, 19 May 2017 19:13:29 +0200

> The problem is that once you add bits markers to bpf_prog like we
> used to do in the past, then as you do in patch 4/5 with the
> xdp_rxhash_needed bit, they will need to be turned /on/
> unconditionally when a prog has tail calls.

Yeah that's the problem with feature checks, once you have tail calls
involved we have to say "entire universe" of features is possible
because it is (intentionally) not possible to track all paths
reachable via tail calls, and in fact these paths can dynamically and
arbitrarily change after the program using tail calls have been loaded
and verified completely.

For example, let's assume we have eBPF program A that uses tail calls
via slots in bpf MAP "M".  At verification time, sure, we could see
the MAP "M" points to programs B and C, which don't use tail calls and
look at what features they use.

But after loading "A", anyone with access to bpf MAP "M" can change
the tail call slots to point to bpf programs other than "B" and "C".
And maybe those new programs use features outside of the set we tested
for when "A" was verified.

So it is impossible to test feature "sets" with eBPF like this.


Re: [PATCH net-next v3 0/7] fix CRC32c in the forwarding path

2017-05-19 Thread David Miller
From: Davide Caratti 
Date: Thu, 18 May 2017 15:44:36 +0200

> Current kernel allows offloading CRC32c computation when SCTP packets
> are generated, setting skb->ip_summed to CHECKSUM_PARTIAL, if the
> underlying device features have NETIF_F_SCTP_CRC set. However, after these
> packets are forwarded, they may land on a device where CRC32c offloading is
> not available: as a consequence, transmission is done with wrong CRC32c.
> It's not possible to use sctp_compte_cksum() in the forwarding path
> and in most drivers, because it needs symbols exported by libcrc32c module.
> 
> Patch 1 and 2 of this series try to solve this problem, introducing a new
> helper function, namely skb_crc32c_csum_help(), that can be used to resolve
> CHECKSUM_PARTIAL when crc32c is needed instead of Internet Checksum.
> 
> Currently, we need to parse the packet headers to understand what algorithm
> is needed to resolve CHECKSUM_PARTIAL. We can speedup things by storing
> this information in the skb metadata, and use it to call an appropriate
> helper (skb_checksum_help or skb_crc32c_csum_help), or leave the packet
> unmodified when the NIC is able to offload the checksum computation.
> 
> Patch 3 deprecates skb->csum_bad to free one bit in skb metadata; patch 4
> introduces skb->csum_not_inet, providing skb with an indication on the
> algorithm needed to resolve CHECKSUM_PARTIAL.
> Patch 5 and 6 fix the kernel forwarding path and openvswitch datapath,
> where skb_checksum_help was unconditionally called to resolve 
> CHECKSUM_PARTIAL,
> thus generating wrong CRC32c in forwarded SCTP packets.
> Finally, patch 7 updates documentation to provide a better description of
> possible values of skb->ip_summed.
> 
> Some further work is still possible:
> * drivers that parse the packet header to correctly resolve CHECKSUM_PARTIAL
> (e.g. ixgbe_tx_csum()) can benefit from testing skb->csum_not_inet to avoid
> calling ip_hdr(skb)->protocol or ixgbe_ipv6_csum_is_sctp(skb).
> 
> * drivers that call skb_checksum_help() to resolve CHECKSUM_PARTIAL can
> call skb_csum_hwoffload_help to avoid corrupting SCTP packets.
...

Ok, series applied.

I do kinda think that the crc32 module handling still isn't very nice.
If this is a core checksumming algorithm we support in the networking
stack, than seriously just like the standard internet checksum we should
statically build crc32 into the kernel image and not have this weird
situation where the code might not be there when we need it.

Thanks.


Re: [PATCH net] bonding: fix accounting of active ports in 3ad

2017-05-19 Thread David Miller
From: Jarod Wilson 
Date: Fri, 19 May 2017 18:15:57 -0400

> On 2017-05-19 5:14 PM, David Miller wrote:
>> From: Jarod Wilson 
>> Date: Wed, 17 May 2017 11:11:44 -0400
>> 
>>> As of 7bb11dc9f59d and 0622cab0341c, bond slaves in a 3ad bond are not
>>> removed from the aggregator when they are down, and the active slave
>>> count
>>> is NOT equal to number of ports in the aggregator, but rather the
>>> number
>>> of ports in the aggregator that are still enabled.
>>   ...
>>> Remedy it by using the same logic introduced in
>>> 7bb11dc9f59d in __bond_3ad_get_active_agg_info(), so sysfs, procfs and
>>
>>> netlink all report the number of active ports.
>> I think you mean to reference commit 0622cab0341c here not
>> 7bb11dc9f59d.
> 
> D'oh, yes, you are entirely correct. Should I submit a v2 with that
> correction?

Yes, please.


Re: [PATCH v2 1/3] bpf: Use 1<<16 as ceiling for immediate alignment in verifier.

2017-05-19 Thread David Miller
From: Alexei Starovoitov 
Date: Fri, 19 May 2017 14:37:56 -0700

> On 5/19/17 1:41 PM, David Miller wrote:
>> From: Edward Cree 
>> Date: Fri, 19 May 2017 18:17:42 +0100
>>
>>> One question: is there a way to build the verifier as userland code
>>>  (or at least as a module), or will I have to reboot every time I
>>>  want to test a change?
>>
>> There currently is no such machanism, you will have to reboot every
>> time.
>>
>> I have considered working on making the code buildable outside of the
>> kernel.  It shouldn't be too hard.
> 
> it's not hard.
> We did it twice and both times abandoned.
> First time to have 'user space verifier' to check programs before
> loading and second time for fuzzing via llvm.
> Abandoned since it diverges very quickly from kernel.
> 

Well, my idea was the create an environment in which kernel verifier.c
could be built as-is.

Maybe there would be some small compromises in verifier.c such as an
ifdef test or two, but that should be it.

It really is just a piece of what amounts to compiler infrastructure
and not very kernel specific.


Re: Alignment in BPF verifier

2017-05-19 Thread Daniel Borkmann

On 05/19/2017 10:39 PM, David Miller wrote:

From: Edward Cree 
Date: Fri, 19 May 2017 21:00:13 +0100


Well, I've managed to get somewhat confused by reg->id.
In particular, I'm unsure which bpf_reg_types can have an id, and what
  exactly it means.  There seems to be some code that checks around map value
  pointers, which seems strange as maps have fixed sizes (and the comments in
  enum bpf_reg_type make it seem like id is a PTR_TO_PACKET thing) - is this


Besides PTR_TO_PACKET also PTR_TO_MAP_VALUE_OR_NULL uses it to
track all registers (incl. spilled ones) with the same reg->id
that originated from the same map lookup. After the reg type is
then migrated to either PTR_TO_MAP_VALUE (resp. CONST_PTR_TO_MAP
for map in map) or UNKNOWN_VALUE depending on the branch, the
reg->id is then reset to 0 again. Whole reason for this is that
LLVM generates code where it can move and/or spill a reg of type
PTR_TO_MAP_VALUE_OR_NULL to other regs before we do the NULL
test on it, and later on it expects that the spilled or moved
regs work wrt access. So they're marked with an id and then all
of them are type migrated. So here meaning of reg->id is different
than in PTR_TO_PACKET case. Example:

0: (b7) r1 = 10
1: (7b) *(u64 *)(r10 -8) = r1
2: (bf) r2 = r10
3: (07) r2 += -8
4: (18) r1 = 0x59c0
6: (85) call 1 //map lookup
7: (bf) r4 = r0
8: (15) if r0 == 0x0 goto pc+1
 R0=map_value(ks=8,vs=8) R4=map_value_or_null(ks=8,vs=8) R10=fp
9: (7a) *(u64 *)(r4 +0) = 0


  maybe because of map-of-maps support, can the contained maps have differing
  element sizes?  Or do we allow *(map_value + var + imm), if map_value + var
  was appropriately bounds-checked?

Does the 'id' identify the variable that was added to an object pointer, or
  the object itself?  Or does it blur these and identify (what the comment in
  enum bpf_reg_type calls) "skb->data + (u16) var"?


The reg->id value changes any time a variable gets added to a packet
pointer.

You will also notice right now that only packet pointers have their
alignment tracked.

I have changes pending that will do that for MAP pointers too, but
it needs more work.





[PATCH v3 net-next 5/5] dsa: add maintainer of Microchip KSZ switches

2017-05-19 Thread Woojung.Huh
From: Woojung Huh 

Adding maintainer of Microchip KSZ switches.

Signed-off-by: Woojung Huh 
Reviewed-by: Andrew Lunn 
---
 MAINTAINERS | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index f7d568b..a72b40c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8454,6 +8454,16 @@ F:   drivers/media/platform/atmel/atmel-isc.c
 F: drivers/media/platform/atmel/atmel-isc-regs.h
 F: devicetree/bindings/media/atmel-isc.txt
 
+MICROCHIP KSZ SERIES ETHERNET SWITCH DRIVER
+M: Woojung Huh 
+M: Microchip Linux Driver Support 
+L: netdev@vger.kernel.org
+S: Maintained
+F: net/dsa/tag_ksz.c
+F: drivers/net/dsa/microchip/*
+F: include/linux/platform_data/microchip-ksz.h
+F: Documentation/devicetree/bindings/net/dsa/ksz.txt
+
 MICROCHIP USB251XB DRIVER
 M: Richard Leitner 
 L: linux-...@vger.kernel.org
-- 
2.7.4



[PATCH v3 net-next 4/5] dsa: Add spi support to Microchip KSZ switches

2017-05-19 Thread Woojung.Huh
From: Woojung Huh 

A sample SPI configuration for Microchip KSZ switches.

Signed-off-by: Woojung Huh 
Reviewed-by: Andrew Lunn 
---
 Documentation/devicetree/bindings/net/dsa/ksz.txt | 73 +++
 1 file changed, 73 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/net/dsa/ksz.txt

diff --git a/Documentation/devicetree/bindings/net/dsa/ksz.txt 
b/Documentation/devicetree/bindings/net/dsa/ksz.txt
new file mode 100644
index 000..8a13966
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/dsa/ksz.txt
@@ -0,0 +1,73 @@
+Microchip KSZ Series Ethernet switches
+==
+
+Required properties:
+
+- compatible: For external switch chips, compatible string must be exactly one
+  of: "microchip,ksz9477"
+
+See Documentation/devicetree/bindings/dsa/dsa.txt for a list of additional
+required and optional properties.
+
+Examples:
+
+Ethernet switch connected via SPI to the host, CPU port wired to eth0:
+
+   eth0: ethernet@10001000 {
+   fixed-link {
+   reg = <7>
+   speed = <1000>;
+   duplex-full;
+   };
+   };
+
+   spi1: spi@f8008000 {
+   pinctrl-0 = <_spi_ksz>;
+   cs-gpios = < 25 0>;
+   id = <1>;
+   status = "okay";
+
+   ksz9477: ksz9477@0 {
+   compatible = "microchip,ksz9477";
+   reg = <0>;
+
+   spi-max-frequency = <4400>;
+   spi-cpha;
+   spi-cpol;
+
+   status = "okay";
+   ports {
+   #address-cells = <1>;
+   #size-cells = <0>;
+   port@0 {
+   reg = <0>;
+   label = "lan1";
+   };
+   port@1 {
+   reg = <1>;
+   label = "lan2";
+   };
+   port@2 {
+   reg = <2>;
+   label = "lan3";
+   };
+   port@3 {
+   reg = <3>;
+   label = "lan4";
+   };
+   port@4 {
+   reg = <4>;
+   label = "lan5";
+   };
+   port@5 {
+   reg = <5>;
+   label = "cpu";
+   ethernet = <>;
+   fixed-link {
+   speed = <1000>;
+   full-duplex;
+   };
+   };
+   };
+   };
+   };
-- 
2.7.4



[PATCH v3 net-next 2/5] phy: micrel: add Microchip KSZ 9477 Switch PHY support

2017-05-19 Thread Woojung.Huh
From: Woojung Huh 

Adding Microchip 9477 Phy included in KSZ9477 Switch.

Signed-off-by: Woojung Huh 
Signed-off-by: Andrew Lunn 
---
 drivers/net/phy/micrel.c   | 11 +++
 include/linux/micrel_phy.h |  2 ++
 2 files changed, 13 insertions(+)

diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 4cfd541..46e80bc 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -20,6 +20,7 @@
  *ksz8081, ksz8091,
  *ksz8061,
  * Switch : ksz8873, ksz886x
+ *  ksz9477
  */
 
 #include 
@@ -996,6 +997,16 @@ static struct phy_driver ksphy_driver[] = {
.read_status= ksz8873mll_read_status,
.suspend= genphy_suspend,
.resume = genphy_resume,
+}, {
+   .phy_id = PHY_ID_KSZ9477,
+   .phy_id_mask= MICREL_PHY_ID_MASK,
+   .name   = "Microchip KSZ9477",
+   .features   = PHY_GBIT_FEATURES,
+   .config_init= kszphy_config_init,
+   .config_aneg= genphy_config_aneg,
+   .read_status= genphy_read_status,
+   .suspend= genphy_suspend,
+   .resume = genphy_resume,
 } };
 
 module_phy_driver(ksphy_driver);
diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h
index f541da6..472fa4d 100644
--- a/include/linux/micrel_phy.h
+++ b/include/linux/micrel_phy.h
@@ -37,6 +37,8 @@
 
 #define PHY_ID_KSZ8795 0x00221550
 
+#definePHY_ID_KSZ9477  0x00221631
+
 /* struct phy_device dev_flags definitions */
 #define MICREL_PHY_50MHZ_CLK   0x0001
 #define MICREL_PHY_FXEN0x0002
-- 
2.7.4



[PATCH v3 net-next 0/5] dsa: add Microchip KSZ9477 DSA driver

2017-05-19 Thread Woojung.Huh
From: Woojung Huh 

This series of patches is for Microchip KSZ9477 DSA driver.
KSZ9477 is 7 ports GigE switch with numerous advanced features.
5 ports are 10/100/1000 Mbps internal PHYs and 2 ports have
Interfaces to SGMII, RGMII, MII or RMII.

This patch supports VLAN, MDB, FDB and port mirroring offloads.

Welcome reviews and comments from community.

Note: Tests are performed on internal development board.

V3
- update per review comments
- cosmetic changes
- drivers/net/dsa/microchip/ksz_common.c 
  * clean up ksz_switch_chips[] 
  * consolidate checking loops into functions
  * update mutex for better locking
  * replace devm_kmalloc_array() to devm_kcalloc()
- MAINTAINERS
  * add missing net/dsa/tag_ksz.c

V2
- update per review comments
- several cosmetic changes
- net/dsa/tag_ksz.c
  * constants are changed to defines
  * remove skb_linearize() in ksz_rcv()
  * ksz_xmit()checks skb tailroom before allocate new skb
- drivers/net/phy/micrel.c
  * remove PHY_HAS_MAGICANEG from ksphy_driver[]
- drivers/net/dsa/microchip/ksz_common.c
  * add timeout to avoid endless loop
  * port initialization is move to ksz_port_enable() instead of  
ksz_setup_ports()
- Documentation/devicetree/bindings/net/dsa/ksz.txt
  * fix typo and indentations

Woojung Huh (5):
  dsa: add support for Microchip KSZ tail tagging
  phy: micrel: add Microchip KSZ 9477 Switch PHY support
  dsa: add DSA switch driver for Microchip KSZ9477
  dsa: Add spi support to Microchip KSZ switches
  dsa: add maintainer of Microchip KSZ switches

 Documentation/devicetree/bindings/net/dsa/ksz.txt |   73 +
 MAINTAINERS   |   10 +
 drivers/net/dsa/Kconfig   |2 +
 drivers/net/dsa/Makefile  |1 +
 drivers/net/dsa/microchip/Kconfig |   12 +
 drivers/net/dsa/microchip/Makefile|2 +
 drivers/net/dsa/microchip/ksz_9477_reg.h  | 1676 +
 drivers/net/dsa/microchip/ksz_common.c| 1255 +++
 drivers/net/dsa/microchip/ksz_priv.h  |  210 +++
 drivers/net/dsa/microchip/ksz_spi.c   |  215 +++
 drivers/net/phy/micrel.c  |   11 +
 include/linux/micrel_phy.h|2 +
 include/linux/platform_data/microchip-ksz.h   |   29 +
 include/net/dsa.h |1 +
 net/dsa/Kconfig   |3 +
 net/dsa/Makefile  |1 +
 net/dsa/dsa.c |3 +
 net/dsa/dsa_priv.h|3 +
 net/dsa/tag_ksz.c |  103 ++
 19 files changed, 3612 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/net/dsa/ksz.txt
 create mode 100644 drivers/net/dsa/microchip/Kconfig
 create mode 100644 drivers/net/dsa/microchip/Makefile
 create mode 100644 drivers/net/dsa/microchip/ksz_9477_reg.h
 create mode 100644 drivers/net/dsa/microchip/ksz_common.c
 create mode 100644 drivers/net/dsa/microchip/ksz_priv.h
 create mode 100644 drivers/net/dsa/microchip/ksz_spi.c
 create mode 100644 include/linux/platform_data/microchip-ksz.h
 create mode 100644 net/dsa/tag_ksz.c

-- 
2.7.4



[PATCH v3 net-next 1/5] dsa: add support for Microchip KSZ tail tagging

2017-05-19 Thread Woojung.Huh
From: Woojung Huh 

Adding support for the Microchip KSZ switch family tail tagging.

Signed-off-by: Woojung Huh 
Reviewed-by: Andrew Lunn 
---
 include/net/dsa.h  |   1 +
 net/dsa/Kconfig|   3 ++
 net/dsa/Makefile   |   1 +
 net/dsa/dsa.c  |   3 ++
 net/dsa/dsa_priv.h |   3 ++
 net/dsa/tag_ksz.c  | 103 +
 6 files changed, 114 insertions(+)
 create mode 100644 net/dsa/tag_ksz.c

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 791fed6..fbb00a6 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -31,6 +31,7 @@ enum dsa_tag_protocol {
DSA_TAG_PROTO_BRCM,
DSA_TAG_PROTO_DSA,
DSA_TAG_PROTO_EDSA,
+   DSA_TAG_PROTO_KSZ,
DSA_TAG_PROTO_LAN9303,
DSA_TAG_PROTO_MTK,
DSA_TAG_PROTO_QCA,
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 297389b..cc5f8f9 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -25,6 +25,9 @@ config NET_DSA_TAG_DSA
 config NET_DSA_TAG_EDSA
bool
 
+config NET_DSA_TAG_KSZ
+   bool
+
 config NET_DSA_TAG_LAN9303
bool
 
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index f8c0251..b15141f 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -6,6 +6,7 @@ dsa_core-y += dsa.o slave.o dsa2.o switch.o legacy.o
 dsa_core-$(CONFIG_NET_DSA_TAG_BRCM) += tag_brcm.o
 dsa_core-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o
 dsa_core-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o
+dsa_core-$(CONFIG_NET_DSA_TAG_KSZ) += tag_ksz.o
 dsa_core-$(CONFIG_NET_DSA_TAG_LAN9303) += tag_lan9303.o
 dsa_core-$(CONFIG_NET_DSA_TAG_MTK) += tag_mtk.o
 dsa_core-$(CONFIG_NET_DSA_TAG_QCA) += tag_qca.o
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 3288a80..402459e 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -49,6 +49,9 @@ const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] = {
 #ifdef CONFIG_NET_DSA_TAG_EDSA
[DSA_TAG_PROTO_EDSA] = _netdev_ops,
 #endif
+#ifdef CONFIG_NET_DSA_TAG_KSZ
+   [DSA_TAG_PROTO_KSZ] = _netdev_ops,
+#endif
 #ifdef CONFIG_NET_DSA_TAG_LAN9303
[DSA_TAG_PROTO_LAN9303] = _netdev_ops,
 #endif
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index c274130..6f23dfa 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -85,6 +85,9 @@ extern const struct dsa_device_ops dsa_netdev_ops;
 /* tag_edsa.c */
 extern const struct dsa_device_ops edsa_netdev_ops;
 
+/* tag_ksz.c */
+extern const struct dsa_device_ops ksz_netdev_ops;
+
 /* tag_lan9303.c */
 extern const struct dsa_device_ops lan9303_netdev_ops;
 
diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c
new file mode 100644
index 000..cbc79b5
--- /dev/null
+++ b/net/dsa/tag_ksz.c
@@ -0,0 +1,103 @@
+/*
+ * net/dsa/tag_ksz.c - Microchip KSZ Switch tag format handling
+ * Copyright (c) 2017 Microchip Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include "dsa_priv.h"
+
+/* For Ingress (Host -> KSZ), 2 bytes are added before FCS.
+ * ---
+ * DA(6bytes)|SA(6bytes)||Data(nbytes)|tag0(1byte)|tag1(1byte)|FCS(4bytes)
+ * ---
+ * tag0 : Prioritization (not used now)
+ * tag1 : each bit represents port (eg, 0x01=port1, 0x02=port2, 0x10=port5)
+ *
+ * For Egress (KSZ -> Host), 1 byte is added before FCS.
+ * ---
+ * DA(6bytes)|SA(6bytes)||Data(nbytes)|tag0(1byte)|FCS(4bytes)
+ * ---
+ * tag0 : zero-based value represents port
+ *   (eg, 0x00=port1, 0x02=port3, 0x06=port7)
+ */
+
+#defineKSZ_INGRESS_TAG_LEN 2
+#defineKSZ_EGRESS_TAG_LEN  1
+
+static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+   struct dsa_slave_priv *p = netdev_priv(dev);
+   struct sk_buff *nskb;
+   int padlen;
+   u8 *tag;
+
+   padlen = (skb->len >= ETH_ZLEN) ? 0 : ETH_ZLEN - skb->len;
+
+   if (skb_tailroom(skb) >= padlen + KSZ_INGRESS_TAG_LEN) {
+   nskb = skb;
+   } else {
+   nskb = alloc_skb(NET_IP_ALIGN + skb->len +
+padlen + KSZ_INGRESS_TAG_LEN, GFP_ATOMIC);
+   if (!nskb) {
+   kfree_skb(skb);
+   return NULL;
+   }
+   skb_reserve(nskb, NET_IP_ALIGN);
+
+   skb_reset_mac_header(nskb);
+   skb_set_network_header(nskb,
+  skb_network_header(skb) - skb->head);
+   skb_set_transport_header(nskb,
+   

[PATCH v3 net-next 3/5] dsa: add DSA switch driver for Microchip KSZ9477

2017-05-19 Thread Woojung.Huh
From: Woojung Huh 

The KSZ9477 is a fully integrated layer 2, managed, 7 ports GigE switch
with numerous advanced features. 5 ports incorporate 10/100/1000 Mbps PHYs.
The other 2 ports have interfaces that can be configured as SGMII, RGMII, MII
or RMII. Either of these may connect directly to a host processor or
to an external PHY. The SGMII port may interface to a fiber optic transceiver.

This driver currently supports vlan, fdb, mdb & mirror dsa switch operations.

Signed-off-by: Woojung Huh 
---
 drivers/net/dsa/Kconfig |2 +
 drivers/net/dsa/Makefile|1 +
 drivers/net/dsa/microchip/Kconfig   |   12 +
 drivers/net/dsa/microchip/Makefile  |2 +
 drivers/net/dsa/microchip/ksz_9477_reg.h| 1676 +++
 drivers/net/dsa/microchip/ksz_common.c  | 1255 
 drivers/net/dsa/microchip/ksz_priv.h|  210 
 drivers/net/dsa/microchip/ksz_spi.c |  215 
 include/linux/platform_data/microchip-ksz.h |   29 +
 9 files changed, 3402 insertions(+)
 create mode 100644 drivers/net/dsa/microchip/Kconfig
 create mode 100644 drivers/net/dsa/microchip/Makefile
 create mode 100644 drivers/net/dsa/microchip/ksz_9477_reg.h
 create mode 100644 drivers/net/dsa/microchip/ksz_common.c
 create mode 100644 drivers/net/dsa/microchip/ksz_priv.h
 create mode 100644 drivers/net/dsa/microchip/ksz_spi.c
 create mode 100644 include/linux/platform_data/microchip-ksz.h

diff --git a/drivers/net/dsa/Kconfig b/drivers/net/dsa/Kconfig
index 68131a4..83a9bc8 100644
--- a/drivers/net/dsa/Kconfig
+++ b/drivers/net/dsa/Kconfig
@@ -39,6 +39,8 @@ config NET_DSA_MV88E6060
  This enables support for the Marvell 88E6060 ethernet switch
  chip.
 
+source "drivers/net/dsa/microchip/Kconfig"
+
 source "drivers/net/dsa/mv88e6xxx/Kconfig"
 
 config NET_DSA_QCA8K
diff --git a/drivers/net/dsa/Makefile b/drivers/net/dsa/Makefile
index 9613f36..4a5b5bd 100644
--- a/drivers/net/dsa/Makefile
+++ b/drivers/net/dsa/Makefile
@@ -8,4 +8,5 @@ obj-$(CONFIG_NET_DSA_SMSC_LAN9303) += lan9303-core.o
 obj-$(CONFIG_NET_DSA_SMSC_LAN9303_I2C) += lan9303_i2c.o
 obj-$(CONFIG_NET_DSA_SMSC_LAN9303_MDIO) += lan9303_mdio.o
 obj-y  += b53/
+obj-y  += microchip/
 obj-y  += mv88e6xxx/
diff --git a/drivers/net/dsa/microchip/Kconfig 
b/drivers/net/dsa/microchip/Kconfig
new file mode 100644
index 000..a8b8f59
--- /dev/null
+++ b/drivers/net/dsa/microchip/Kconfig
@@ -0,0 +1,12 @@
+menuconfig MICROCHIP_KSZ
+   tristate "Microchip KSZ series switch support"
+   depends on NET_DSA
+   select NET_DSA_TAG_KSZ
+   help
+ This driver adds support for Microchip KSZ switch chips.
+
+config MICROCHIP_KSZ_SPI_DRIVER
+   tristate "KSZ series SPI connected switch driver"
+   depends on MICROCHIP_KSZ && SPI
+   help
+ Select to enable support for registering switches configured through 
SPI.
diff --git a/drivers/net/dsa/microchip/Makefile 
b/drivers/net/dsa/microchip/Makefile
new file mode 100644
index 000..ed335e2
--- /dev/null
+++ b/drivers/net/dsa/microchip/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_MICROCHIP_KSZ)+= ksz_common.o
+obj-$(CONFIG_MICROCHIP_KSZ_SPI_DRIVER) += ksz_spi.o
diff --git a/drivers/net/dsa/microchip/ksz_9477_reg.h 
b/drivers/net/dsa/microchip/ksz_9477_reg.h
new file mode 100644
index 000..6aa6752
--- /dev/null
+++ b/drivers/net/dsa/microchip/ksz_9477_reg.h
@@ -0,0 +1,1676 @@
+/*
+ * Microchip KSZ9477 register definitions
+ *
+ * Copyright (C) 2017
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef __KSZ9477_REGS_H
+#define __KSZ9477_REGS_H
+
+#define KS_PRIO_M  0x7
+#define KS_PRIO_S  4
+
+/* 0 - Operation */
+#define REG_CHIP_ID0__10x
+
+#define REG_CHIP_ID1__10x0001
+
+#define FAMILY_ID  0x95
+#define FAMILY_ID_94   0x94
+#define FAMILY_ID_95   0x95
+#define FAMILY_ID_85   0x85
+#define FAMILY_ID_98   0x98
+#define FAMILY_ID_88   0x88
+
+#define REG_CHIP_ID2__1  

[PATCH] net: atheros: atl2: don't return zero on failure path in atl2_probe()

2017-05-19 Thread Alexey Khoroshilov
If dma mask checks fail in atl2_probe(), it breaks off initialization,
deallocates all resources, but returns zero.

The patch adds proper error code return value and
make error code setup unified.

Found by Linux Driver Verification project (linuxtesting.org).

Signed-off-by: Alexey Khoroshilov 
---
 drivers/net/ethernet/atheros/atlx/atl2.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/atheros/atlx/atl2.c 
b/drivers/net/ethernet/atheros/atlx/atl2.c
index 63f2deec2a52..77a1c03255de 100644
--- a/drivers/net/ethernet/atheros/atlx/atl2.c
+++ b/drivers/net/ethernet/atheros/atlx/atl2.c
@@ -1353,6 +1353,7 @@ static int atl2_probe(struct pci_dev *pdev, const struct 
pci_device_id *ent)
if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) &&
pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32))) {
printk(KERN_ERR "atl2: No usable DMA configuration, 
aborting\n");
+   err = -EIO;
goto err_dma;
}
 
@@ -1366,10 +1367,11 @@ static int atl2_probe(struct pci_dev *pdev, const 
struct pci_device_id *ent)
 * pcibios_set_master to do the needed arch specific settings */
pci_set_master(pdev);
 
-   err = -ENOMEM;
netdev = alloc_etherdev(sizeof(struct atl2_adapter));
-   if (!netdev)
+   if (!netdev) {
+   err = -ENOMEM;
goto err_alloc_etherdev;
+   }
 
SET_NETDEV_DEV(netdev, >dev);
 
@@ -1408,8 +1410,6 @@ static int atl2_probe(struct pci_dev *pdev, const struct 
pci_device_id *ent)
if (err)
goto err_sw_init;
 
-   err = -EIO;
-
netdev->hw_features = NETIF_F_HW_VLAN_CTAG_RX;
netdev->features |= (NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX);
 
-- 
2.7.4



Hiking Enthusiasts

2017-05-19 Thread Patrick Grimes


Hi,

Would you be interested in acquiring an email list of "Hiking Enthusiasts List" 
from USA?

We also have data for Sports Enthusiasts List, Scuba Divers List, Fishing 
Enthusiasts List, Outdoor and Camping Enthusiasts, Running Enthusiasts List, 
Boxing Enthusiasts List, Cycling Enthusiasts List, Spa and Resort Visitors 
List, Boat Owners, Cruise Travelers, Food Lovers, Apparel Buyers, travellers, 
Golfers List, Tennis, Luxury Brand Buyers, Gift buyers and many more.

Each record in the list contains Contact Name (First, Middle and Last Name), 
Mailing Address, List type and Opt-in email address.

All the contacts are opt-in verified, 100 percent permission based and can be 
used for unlimited multi-channel marketing.

Please let me know your thoughts towards procuring the Hiking Enthusiasts List.

Best Regards,
Patrick Grimes
Marketing Manager


We respect your privacy, if you do not wish to receive any further emails from 
our end, please reply with a subject “Leave Out”.



[PATCH net-next] macsec: double accounting of dropped rx/tx packets

2017-05-19 Thread Girish Moodalbail
The macsec implementation shouldn't account for rx/tx packets that are
dropped in the netdev framework. The netdev framework itself accounts
for such packets by atomically updating struct net_device`rx_dropped and
struct net_device`tx_dropped fields. Later on when the stats for macsec
link is retrieved, the packets dropped in netdev framework will be
included in dev_get_stats() after calling macsec.c`macsec_get_stats64()

Signed-off-by: Girish Moodalbail 
---
 drivers/net/macsec.c | 15 +++
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index cdc347b..91642fd 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -588,8 +588,6 @@ static void count_tx(struct net_device *dev, int ret, int 
len)
stats->tx_packets++;
stats->tx_bytes += len;
u64_stats_update_end(>syncp);
-   } else {
-   dev->stats.tx_dropped++;
}
 }
 
@@ -883,7 +881,7 @@ static void macsec_decrypt_done(struct crypto_async_request 
*base, int err)
struct macsec_dev *macsec = macsec_priv(dev);
struct macsec_rx_sa *rx_sa = macsec_skb_cb(skb)->rx_sa;
struct macsec_rx_sc *rx_sc = rx_sa->sc;
-   int len, ret;
+   int len;
u32 pn;
 
aead_request_free(macsec_skb_cb(skb)->req);
@@ -904,11 +902,8 @@ static void macsec_decrypt_done(struct 
crypto_async_request *base, int err)
macsec_reset_skb(skb, macsec->secy.netdev);
 
len = skb->len;
-   ret = gro_cells_receive(>gro_cells, skb);
-   if (ret == NET_RX_SUCCESS)
+   if (gro_cells_receive(>gro_cells, skb) == NET_RX_SUCCESS)
count_rx(dev, len);
-   else
-   macsec->secy.netdev->stats.rx_dropped++;
 
rcu_read_unlock_bh();
 
@@ -1037,7 +1032,6 @@ static void handle_not_macsec(struct sk_buff *skb)
 */
list_for_each_entry_rcu(macsec, >secys, secys) {
struct sk_buff *nskb;
-   int ret;
struct pcpu_secy_stats *secy_stats = 
this_cpu_ptr(macsec->stats);
 
if (macsec->secy.validate_frames == MACSEC_VALIDATE_STRICT) {
@@ -1054,13 +1048,10 @@ static void handle_not_macsec(struct sk_buff *skb)
 
nskb->dev = macsec->secy.netdev;
 
-   ret = netif_rx(nskb);
-   if (ret == NET_RX_SUCCESS) {
+   if (netif_rx(nskb) == NET_RX_SUCCESS) {
u64_stats_update_begin(_stats->syncp);
secy_stats->stats.InPktsUntagged++;
u64_stats_update_end(_stats->syncp);
-   } else {
-   macsec->secy.netdev->stats.rx_dropped++;
}
}
 
-- 
1.8.3.1



Re: [Intel-wired-lan] [PATCH 0/4] Configuring traffic classes via new hardware offload mechanism in tc/mqprio

2017-05-19 Thread John Fastabend
On 05/19/2017 05:58 PM, Amritha Nambiar wrote:
> The following series introduces a new harware offload mode in
> tc/mqprio where the TCs, the queue configurations and bandwidth rate
> limits are offloaded to the hardware. The i40e driver enables the new
> mqprio hardware offload mechanism factoring the TCs, queue
> configuration and bandwidth rates by creating HW channel VSIs.
> 

nice work, fix your time stamp and line wrapping though.


> In this mode, the priority to traffic class mapping and the user
> specified queue ranges are used to configure the traffic class when
> the 'hw' option is set to 2. This is achieved by creating HW
> channels(VSI). A new channel is created for each of the traffic class
> configuration offloaded via mqprio framework except for the first TC
> (TC0) which is for the main VSI. TC0 for the main VSI is also
> reconfigured as per user provided queue parameters. Finally,
> bandwidth rate limits are set on these traffic classes through the
> mqprio offload framework by sending these rates in addition to the
> number of TCs and the queue configurations.
> Example:
> # tc qdisc add dev eth0 root mqprio num_tc 2  map 0 0 0 0 1 1 1 1\
>   queues 4@0 4@4 min_rate 0Mbit 0Mbit max_rate 55Mbit 60Mbit hw 2
> 
> To dump the bandwidth rates:
> 
> # tc qdisc show dev eth0
>   qdisc mqprio 804a: root  tc 2 map 0 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0
>queues:(0:3) (4:7)
>min rates:0bit 0bit
>max rates:55Mbit 60Mbit
> 

Looks reasonable to me thanks. Previously, rate limits were being set
via dcbnl but I guess this interface is slightly nicer in that it puts
all configuration in one spot. IMO it would be nice to push dcbnl users
over to this.

Thanks,
.John


Re: [PATCH net] bonding: fix accounting of active ports in 3ad

2017-05-19 Thread Jarod Wilson

On 2017-05-19 5:14 PM, David Miller wrote:

From: Jarod Wilson 
Date: Wed, 17 May 2017 11:11:44 -0400


As of 7bb11dc9f59d and 0622cab0341c, bond slaves in a 3ad bond are not
removed from the aggregator when they are down, and the active slave count
is NOT equal to number of ports in the aggregator, but rather the number
of ports in the aggregator that are still enabled.

  ...

Remedy it by using the same logic introduced in
7bb11dc9f59d in __bond_3ad_get_active_agg_info(), so sysfs, procfs and

   

netlink all report the number of active ports.


I think you mean to reference commit 0622cab0341c here not 7bb11dc9f59d.


D'oh, yes, you are entirely correct. Should I submit a v2 with that 
correction?


--
Jarod Wilson
ja...@redhat.com


Re: [PATCH net] bonding: fix randomly populated arp target array

2017-05-19 Thread Jarod Wilson

On 2017-05-19 4:38 PM, Mahesh Bandewar (महेश बंडेवार) wrote:

On Fri, May 19, 2017 at 11:46 AM, Jarod Wilson  wrote:

In commit dc9c4d0fe023, the arp_target array moved from a static global
to a local variable. By the nature of static globals, the array used to
be initialized to all 0. At present, it's full of random data, which
that gets interpreted as arp_target values, when none have actually been
specified. Systems end up booting with spew along these lines:

[   32.161783] IPv6: ADDRCONF(NETDEV_UP): lacp0: link is not ready
[   32.168475] IPv6: ADDRCONF(NETDEV_UP): lacp0: link is not ready
[   32.175089] 8021q: adding VLAN 0 to HW filter on device lacp0
[   32.193091] IPv6: ADDRCONF(NETDEV_UP): lacp0: link is not ready
[   32.204892] lacp0: Setting MII monitoring interval to 100
[   32.211071] lacp0: Removing ARP target 216.124.228.17
[   32.216824] lacp0: Removing ARP target 218.160.255.255
[   32.222646] lacp0: Removing ARP target 185.170.136.184
[   32.228496] lacp0: invalid ARP target 255.255.255.255 specified for removal
[   32.236294] lacp0: option arp_ip_target: invalid value (-255.255.255.255)
[   32.243987] lacp0: Removing ARP target 56.125.228.17
[   32.249625] lacp0: Removing ARP target 218.160.255.255
[   32.255432] lacp0: Removing ARP target 15.157.233.184
[   32.261165] lacp0: invalid ARP target 255.255.255.255 specified for removal
[   32.268939] lacp0: option arp_ip_target: invalid value (-255.255.255.255)
[   32.276632] lacp0: Removing ARP target 16.0.0.0
[   32.281755] lacp0: Removing ARP target 218.160.255.255
[   32.287567] lacp0: Removing ARP target 72.125.228.17
[   32.293165] lacp0: Removing ARP target 218.160.255.255
[   32.298970] lacp0: Removing ARP target 8.125.228.17
[   32.304458] lacp0: Removing ARP target 218.160.255.255

None of these were actually specified as ARP targets, and the driver does
seem to clean up the mess okay, but it's rather noisy and confusing, leaks
values to userspace, and the 255.255.255.255 spew shows up even when debug
prints are disabled.

The fix: just zero out arp_target at init time.

While we're in here, init arp_all_targets_value in the right place.

Fixes: dc9c4d0fe023 ("bonding: reduce scope of some global variables")
CC: Mahesh Bandewar 
CC: Jay Vosburgh 
CC: Veaceslav Falico 
CC: Andy Gospodarek 
CC: netdev@vger.kernel.org
CC: sta...@vger.kernel.org
Signed-off-by: Jarod Wilson 
---
  drivers/net/bonding/bond_main.c | 5 ++---
  1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 2be78807fd6e..73313318399c 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -4271,10 +4271,10 @@ static int bond_check_params(struct bond_params *params)
 int arp_validate_value, fail_over_mac_value, primary_reselect_value, i;
 struct bond_opt_value newval;
 const struct bond_opt_value *valptr;
-   int arp_all_targets_value;
+   int arp_all_targets_value = 0;


I think this is unnecessary as long as the var is initialized before it's use.


No, it's not part of the fix, it just irked me. The extra line is 
completely unnecessary, less lines is good. Possibly shouldn't have 
folded it into this fix, will respin and submit the cleanup another 
time, if necessary.



 u16 ad_actor_sys_prio = 0;
 u16 ad_user_port_key = 0;
-   __be32 arp_target[BOND_MAX_ARP_TARGETS];
+   __be32 arp_target[BOND_MAX_ARP_TARGETS] = { 0 };


this is the only change required to avoid reported error.


Absolutely correct.

--
Jarod Wilson
ja...@redhat.com


[PATCH net-next 10/13] nfp: provide linking on port structures

2017-05-19 Thread Jakub Kicinski
Add link to nfp_ports to make it possible to iterate over all ports.
This will come in handy when some ports may be representors.

Signed-off-by: Jakub Kicinski 
Reviewed-by: Simon Horman 
---
 drivers/net/ethernet/netronome/nfp/nfp_main.c |  1 +
 drivers/net/ethernet/netronome/nfp/nfp_main.h |  2 ++
 drivers/net/ethernet/netronome/nfp/nfp_net_main.c | 15 ++-
 drivers/net/ethernet/netronome/nfp/nfp_port.c |  7 +++
 drivers/net/ethernet/netronome/nfp/nfp_port.h |  3 +++
 5 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.c 
b/drivers/net/ethernet/netronome/nfp/nfp_main.c
index 9fbc7eedc017..bb586ce1ea06 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_main.c
@@ -341,6 +341,7 @@ static int nfp_pci_probe(struct pci_dev *pdev,
goto err_rel_regions;
}
INIT_LIST_HEAD(>vnics);
+   INIT_LIST_HEAD(>ports);
pci_set_drvdata(pdev, pf);
pf->pdev = pdev;
 
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.h 
b/drivers/net/ethernet/netronome/nfp/nfp_main.h
index b1ddea0e2406..9f1127895dd6 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_main.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_main.h
@@ -72,6 +72,7 @@ struct nfp_eth_table;
  * @max_data_vnics:Number of data vNICs app firmware supports
  * @num_vnics: Number of vNICs spawned
  * @vnics: Linked list of vNIC structures (struct nfp_net)
+ * @ports: Linked list of port structures (struct nfp_port)
  * @port_refresh_work: Work entry for taking netdevs out
  */
 struct nfp_pf {
@@ -100,6 +101,7 @@ struct nfp_pf {
unsigned int num_vnics;
 
struct list_head vnics;
+   struct list_head ports;
struct work_struct port_refresh_work;
struct mutex lock;
 };
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c 
b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
index 0114071dc0fd..9c4ba929cb0a 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
@@ -548,6 +548,7 @@ static void nfp_net_refresh_vnics(struct work_struct *work)
 port_refresh_work);
struct nfp_eth_table *eth_table;
struct nfp_net *nn, *next;
+   struct nfp_port *port;
 
mutex_lock(>lock);
 
@@ -557,9 +558,8 @@ static void nfp_net_refresh_vnics(struct work_struct *work)
 
/* Update state of all ports */
rtnl_lock();
-   list_for_each_entry(nn, >vnics, vnic_list)
-   if (nn->port)
-   clear_bit(NFP_PORT_CHANGED, >port->flags);
+   list_for_each_entry(port, >ports, port_list)
+   clear_bit(NFP_PORT_CHANGED, >flags);
 
eth_table = nfp_eth_read_ports(pf->cpp);
if (!eth_table) {
@@ -568,12 +568,9 @@ static void nfp_net_refresh_vnics(struct work_struct *work)
goto out;
}
 
-   list_for_each_entry(nn, >vnics, vnic_list) {
-   if (!__nfp_port_get_eth_port(nn->port))
-   continue;
-
-   nfp_net_eth_port_update(pf->cpp, nn->port, eth_table);
-   }
+   list_for_each_entry(port, >ports, port_list)
+   if (__nfp_port_get_eth_port(port))
+   nfp_net_eth_port_update(pf->cpp, port, eth_table);
rtnl_unlock();
 
kfree(eth_table);
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_port.c 
b/drivers/net/ethernet/netronome/nfp/nfp_port.c
index 8d0599fc6321..f73a5452f02b 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_port.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_port.c
@@ -86,6 +86,7 @@ nfp_port_alloc(struct nfp_app *app, enum nfp_port_type type,
   struct net_device *netdev)
 {
struct nfp_port *port;
+   struct nfp_pf *pf;
 
port = kzalloc(sizeof(*port), GFP_KERNEL);
if (!port)
@@ -95,10 +96,16 @@ nfp_port_alloc(struct nfp_app *app, enum nfp_port_type type,
port->type = type;
port->app = app;
 
+   pf = nfp_app_pf(app);
+   list_add_tail(>port_list, >ports);
+
return port;
 }
 
 void nfp_port_free(struct nfp_port *port)
 {
+   if (!port)
+   return;
+   list_del(>port_list);
kfree(port);
 }
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_port.h 
b/drivers/net/ethernet/netronome/nfp/nfp_port.h
index d674c8623a65..02d664f58482 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_port.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_port.h
@@ -68,6 +68,7 @@ enum nfp_port_flags {
  * @app:   backpointer to the app structure
  * @eth_id:for %NFP_PORT_PHYS_PORT port ID in NFP enumeration scheme
  * @eth_port:  for %NFP_PORT_PHYS_PORT translated ETH Table port entry
+ * @port_list: entry on pf's list of ports
  */
 struct nfp_port {

[PATCH net-next 04/13] nfp: add nfp_net_pf_free_vnic() function

2017-05-19 Thread Jakub Kicinski
Soon a third place will need to free a struct nfp_net.  Add a free
counterpart to nfp_net_pf_alloc_vnic().

Signed-off-by: Jakub Kicinski 
Reviewed-by: Simon Horman 
---
 drivers/net/ethernet/netronome/nfp/nfp_net_main.c | 16 +---
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c 
b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
index 532371940fd6..5f0c58a56182 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
@@ -268,16 +268,20 @@ static u8 __iomem *nfp_net_pf_map_ctrl_bar(struct nfp_pf 
*pf)
return ctrl_bar;
 }
 
+static void nfp_net_pf_free_vnic(struct nfp_pf *pf, struct nfp_net *nn)
+{
+   list_del(>vnic_list);
+   pf->num_vnics--;
+   nfp_net_free(nn);
+}
+
 static void nfp_net_pf_free_vnics(struct nfp_pf *pf)
 {
struct nfp_net *nn;
 
while (!list_empty(>vnics)) {
nn = list_first_entry(>vnics, struct nfp_net, vnic_list);
-   list_del(>vnic_list);
-   pf->num_vnics--;
-
-   nfp_net_free(nn);
+   nfp_net_pf_free_vnic(pf, nn);
}
 }
 
@@ -518,9 +522,7 @@ static void nfp_net_refresh_vnics(struct work_struct *work)
nfp_net_debugfs_dir_clean(>debugfs_dir);
nfp_net_clean(nn);
 
-   list_del(>vnic_list);
-   pf->num_vnics--;
-   nfp_net_free(nn);
+   nfp_net_pf_free_vnic(pf, nn);
}
 
if (list_empty(>vnics))
-- 
2.11.0



[PATCH net-next 00/13] nfp: introduce nfp_port and nfp_app

2017-05-19 Thread Jakub Kicinski
Hi!

This series builds foundation for upcoming development.  So far the nfp
driver was focused on delivering basic NIC-like functionality.  We want
to switch gears a bit going forward and support more advanced applications.

First few patches are naming clean ups and reshuffling.  The two main 
structures this series adds are nfp_port and nfp_app.  

nfp_port represents a device port, where port can mean external port,
VF or PF.  For now only external port/MAC/PHY port is added.  nfp_port
is supposed to make it easy to share ethtool and devlink code regardless
of netdev type (full vNIC vs representors).

nfp_app is an abstraction which should allow easier development of new
applications.  So far we have relied fully on port capabilities to detect
which offloads and features are available.  The usual development model
for NFP is that people start with one of our "core NIC" FW apps (C one, 
or a macro assembler one) and build advanced functionality on top of that.
Therefore basic netdev code is shared, but the higher-level logic is 
usually more project specific.  The higher-level logic is also per-adapter
rather than per-port, so creating per-adapter control entity makes sense.
Hopefully the separation of lower-level netdev code and application logic
will help us limit interdependencies and accelerate parallel projects
(e.g. TC flower offloads vs eBPF offload).


Jakub Kicinski (12):
  nfp: make nfp_net alloc/init/cleanup/free not depend on netdevs
  nfp: rename netdev/port to vNIC
  nfp: add nfp_net_pf_free_vnic() function
  nfp: introduce very minimal nfp_app
  nfp: disallow mixing vNICs with and without NSP port entry
  nfp: introduce nfp_port
  nfp: update port state in place
  nfp: move refresh tracking into the port structure
  nfp: provide linking on port structures
  nfp: mark port state as stale after reconfig
  nfp: mark port state as stale if update failed
  nfp: refresh port state before reporting autonegotiation

Simon Horman (1):
  nfp: add nfp_cppcore_pcie_unit() helper

 drivers/net/ethernet/netronome/nfp/Makefile|   4 +-
 drivers/net/ethernet/netronome/nfp/nfp_app.c   |  80 +
 drivers/net/ethernet/netronome/nfp/nfp_app.h   |  48 +++
 drivers/net/ethernet/netronome/nfp/nfp_main.c  |   1 +
 drivers/net/ethernet/netronome/nfp/nfp_main.h  |  28 +-
 drivers/net/ethernet/netronome/nfp/nfp_net.h   |  37 +--
 .../net/ethernet/netronome/nfp/nfp_net_common.c|  79 ++---
 .../net/ethernet/netronome/nfp/nfp_net_debugfs.c   |   4 +-
 .../net/ethernet/netronome/nfp/nfp_net_ethtool.c   |  63 ++--
 drivers/net/ethernet/netronome/nfp/nfp_net_main.c  | 346 -
 .../net/ethernet/netronome/nfp/nfp_netvf_main.c|  14 +-
 drivers/net/ethernet/netronome/nfp/nfp_port.c  | 123 
 drivers/net/ethernet/netronome/nfp/nfp_port.h  | 104 +++
 .../net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h   |  11 +
 14 files changed, 677 insertions(+), 265 deletions(-)
 create mode 100644 drivers/net/ethernet/netronome/nfp/nfp_app.c
 create mode 100644 drivers/net/ethernet/netronome/nfp/nfp_app.h
 create mode 100644 drivers/net/ethernet/netronome/nfp/nfp_port.c
 create mode 100644 drivers/net/ethernet/netronome/nfp/nfp_port.h

-- 
2.11.0



[PATCH net-next 02/13] nfp: make nfp_net alloc/init/cleanup/free not depend on netdevs

2017-05-19 Thread Jakub Kicinski
struct nfp_net represents a vNIC, we will be moving away from the
requirement for every vNIC to have a netdev associated with it.
Remove "netdev" from some function names and prefer passing
struct nfp_net pointer as argument instead of struct net_device *.

Signed-off-by: Jakub Kicinski 
Reviewed-by: Simon Horman 
---
 drivers/net/ethernet/netronome/nfp/nfp_net.h   | 12 
 .../net/ethernet/netronome/nfp/nfp_net_common.c| 35 ++
 drivers/net/ethernet/netronome/nfp/nfp_net_main.c  | 14 -
 .../net/ethernet/netronome/nfp/nfp_netvf_main.c| 10 +++
 4 files changed, 35 insertions(+), 36 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h 
b/drivers/net/ethernet/netronome/nfp/nfp_net.h
index 7b9518cbe965..04609191ca88 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h
@@ -807,11 +807,13 @@ void nfp_net_get_fw_version(struct nfp_net_fw_version 
*fw_ver,
void __iomem *ctrl_bar);
 
 struct nfp_net *
-nfp_net_netdev_alloc(struct pci_dev *pdev,
-unsigned int max_tx_rings, unsigned int max_rx_rings);
-void nfp_net_netdev_free(struct nfp_net *nn);
-int nfp_net_netdev_init(struct net_device *netdev);
-void nfp_net_netdev_clean(struct net_device *netdev);
+nfp_net_alloc(struct pci_dev *pdev,
+ unsigned int max_tx_rings, unsigned int max_rx_rings);
+void nfp_net_free(struct nfp_net *nn);
+
+int nfp_net_init(struct nfp_net *nn);
+void nfp_net_clean(struct nfp_net *nn);
+
 void nfp_net_set_ethtool_ops(struct net_device *netdev);
 void nfp_net_info(struct nfp_net *nn);
 int nfp_net_reconfig(struct nfp_net *nn, u32 update);
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c 
b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index da83e17b8b20..b427c95c5acd 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -516,11 +516,10 @@ nfp_net_rx_ring_init(struct nfp_net_rx_ring *rx_ring,
 
 /**
  * nfp_net_vecs_init() - Assign IRQs and setup rvecs.
- * @netdev:   netdev structure
+ * @nn:NFP Network structure
  */
-static void nfp_net_vecs_init(struct net_device *netdev)
+static void nfp_net_vecs_init(struct nfp_net *nn)
 {
-   struct nfp_net *nn = netdev_priv(netdev);
struct nfp_net_r_vector *r_vec;
int r;
 
@@ -3087,7 +3086,7 @@ void nfp_net_info(struct nfp_net *nn)
 }
 
 /**
- * nfp_net_netdev_alloc() - Allocate netdev and related structure
+ * nfp_net_alloc() - Allocate netdev and related structure
  * @pdev: PCI device
  * @max_tx_rings: Maximum number of TX rings supported by device
  * @max_rx_rings: Maximum number of RX rings supported by device
@@ -3097,9 +3096,9 @@ void nfp_net_info(struct nfp_net *nn)
  *
  * Return: NFP Net device structure, or ERR_PTR on error.
  */
-struct nfp_net *nfp_net_netdev_alloc(struct pci_dev *pdev,
-unsigned int max_tx_rings,
-unsigned int max_rx_rings)
+struct nfp_net *nfp_net_alloc(struct pci_dev *pdev,
+ unsigned int max_tx_rings,
+ unsigned int max_rx_rings)
 {
struct net_device *netdev;
struct nfp_net *nn;
@@ -3144,10 +3143,10 @@ struct nfp_net *nfp_net_netdev_alloc(struct pci_dev 
*pdev,
 }
 
 /**
- * nfp_net_netdev_free() - Undo what @nfp_net_netdev_alloc() did
+ * nfp_net_free() - Undo what @nfp_net_alloc() did
  * @nn:  NFP Net device to reconfigure
  */
-void nfp_net_netdev_free(struct nfp_net *nn)
+void nfp_net_free(struct nfp_net *nn)
 {
free_netdev(nn->dp.netdev);
 }
@@ -3221,14 +3220,14 @@ static void nfp_net_irqmod_init(struct nfp_net *nn)
 }
 
 /**
- * nfp_net_netdev_init() - Initialise/finalise the netdev structure
- * @netdev:  netdev structure
+ * nfp_net_init() - Initialise/finalise the nfp_net structure
+ * @nn:NFP Net device structure
  *
  * Return: 0 on success or negative errno on error.
  */
-int nfp_net_netdev_init(struct net_device *netdev)
+int nfp_net_init(struct nfp_net *nn)
 {
-   struct nfp_net *nn = netdev_priv(netdev);
+   struct net_device *netdev = nn->dp.netdev;
int err;
 
nn->dp.rx_dma_dir = DMA_FROM_DEVICE;
@@ -3367,19 +3366,17 @@ int nfp_net_netdev_init(struct net_device *netdev)
netif_carrier_off(netdev);
 
nfp_net_set_ethtool_ops(netdev);
-   nfp_net_vecs_init(netdev);
+   nfp_net_vecs_init(nn);
 
return register_netdev(netdev);
 }
 
 /**
- * nfp_net_netdev_clean() - Undo what nfp_net_netdev_init() did.
- * @netdev:  netdev structure
+ * nfp_net_clean() - Undo what nfp_net_init() did.
+ * @nn:NFP Net device structure
  */
-void nfp_net_netdev_clean(struct net_device *netdev)
+void nfp_net_clean(struct nfp_net *nn)
 {
-   struct 

[PATCH net-next 07/13] nfp: introduce nfp_port

2017-05-19 Thread Jakub Kicinski
Encapsulate port information into struct nfp_port.  nfp_port will
soon be extended to contain devlink_port information.  It also makes
it easier to reuse port-related code between vNICs and representors.

Signed-off-by: Jakub Kicinski 
Reviewed-by: Simon Horman 
---
 drivers/net/ethernet/netronome/nfp/Makefile|   3 +-
 drivers/net/ethernet/netronome/nfp/nfp_net.h   |   7 +-
 .../net/ethernet/netronome/nfp/nfp_net_common.c|  28 ++
 .../net/ethernet/netronome/nfp/nfp_net_ethtool.c   |  39 +---
 drivers/net/ethernet/netronome/nfp/nfp_net_main.c  |  68 +-
 drivers/net/ethernet/netronome/nfp/nfp_port.c  | 104 +
 drivers/net/ethernet/netronome/nfp/nfp_port.h  |  87 +
 7 files changed, 274 insertions(+), 62 deletions(-)
 create mode 100644 drivers/net/ethernet/netronome/nfp/nfp_port.c
 create mode 100644 drivers/net/ethernet/netronome/nfp/nfp_port.h

diff --git a/drivers/net/ethernet/netronome/nfp/Makefile 
b/drivers/net/ethernet/netronome/nfp/Makefile
index a6b9c4dcbe12..e8333283ada6 100644
--- a/drivers/net/ethernet/netronome/nfp/Makefile
+++ b/drivers/net/ethernet/netronome/nfp/Makefile
@@ -20,7 +20,8 @@ nfp-objs := \
nfp_net_ethtool.o \
nfp_net_offload.o \
nfp_net_main.o \
-   nfp_netvf_main.o
+   nfp_netvf_main.o \
+   nfp_port.o
 
 ifeq ($(CONFIG_BPF_SYSCALL),y)
 nfp-objs += \
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h 
b/drivers/net/ethernet/netronome/nfp/nfp_net.h
index d8edd61a5ad1..6a774ac54237 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h
@@ -116,6 +116,7 @@ struct nfp_cpp;
 struct nfp_eth_table_port;
 struct nfp_net;
 struct nfp_net_r_vector;
+struct nfp_port;
 
 /* Convenience macro for wrapping descriptor index on ring size */
 #define D_IDX(ring, idx)   ((idx) & ((ring)->cnt - 1))
@@ -558,7 +559,7 @@ struct nfp_net_dp {
  * @vnic_list: Entry on device vNIC list
  * @pdev:  Backpointer to PCI device
  * @app:   APP handle if available
- * @eth_port:  Translated ETH Table port entry
+ * @port:  Pointer to nfp_port structure if vNIC is a port
  */
 struct nfp_net {
struct nfp_net_dp dp;
@@ -630,7 +631,7 @@ struct nfp_net {
struct pci_dev *pdev;
struct nfp_app *app;
 
-   struct nfp_eth_table_port *eth_port;
+   struct nfp_port *port;
 };
 
 /* Functions to read/write from/to a BAR
@@ -835,8 +836,6 @@ int nfp_net_ring_reconfig(struct nfp_net *nn, struct 
nfp_net_dp *new,
  struct netlink_ext_ack *extack);
 
 bool nfp_net_link_changed_read_clear(struct nfp_net *nn);
-int nfp_net_refresh_eth_port(struct nfp_net *nn);
-void nfp_net_refresh_port_table(struct nfp_net *nn);
 
 #ifdef CONFIG_NFP_DEBUG
 void nfp_net_debugfs_create(void);
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c 
b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index b427c95c5acd..25ec0371e280 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -70,6 +70,7 @@
 #include "nfpcore/nfp_nsp.h"
 #include "nfp_net_ctrl.h"
 #include "nfp_net.h"
+#include "nfp_port.h"
 
 /**
  * nfp_net_get_fw_version() - Read and parse the FW version
@@ -2846,26 +2847,6 @@ nfp_net_features_check(struct sk_buff *skb, struct 
net_device *dev,
return features;
 }
 
-static int
-nfp_net_get_phys_port_name(struct net_device *netdev, char *name, size_t len)
-{
-   struct nfp_net *nn = netdev_priv(netdev);
-   int err;
-
-   if (!nn->eth_port)
-   return -EOPNOTSUPP;
-
-   if (!nn->eth_port->is_split)
-   err = snprintf(name, len, "p%d", nn->eth_port->label_port);
-   else
-   err = snprintf(name, len, "p%ds%d", nn->eth_port->label_port,
-  nn->eth_port->label_subport);
-   if (err >= len)
-   return -EINVAL;
-
-   return 0;
-}
-
 /**
  * nfp_net_set_vxlan_port() - set vxlan port in SW and reconfigure HW
  * @nn:   NFP Net device to reconfigure
@@ -3040,12 +3021,17 @@ static const struct net_device_ops nfp_net_netdev_ops = 
{
.ndo_set_mac_address= eth_mac_addr,
.ndo_set_features   = nfp_net_set_features,
.ndo_features_check = nfp_net_features_check,
-   .ndo_get_phys_port_name = nfp_net_get_phys_port_name,
+   .ndo_get_phys_port_name = nfp_port_get_phys_port_name,
.ndo_udp_tunnel_add = nfp_net_add_vxlan_port,
.ndo_udp_tunnel_del = nfp_net_del_vxlan_port,
.ndo_xdp= nfp_net_xdp,
 };
 
+bool nfp_netdev_is_nfp_net(struct net_device *netdev)
+{
+   return netdev->netdev_ops == _net_netdev_ops;
+}
+
 /**
  * nfp_net_info() - Print general info about the NIC
  * @nn:  NFP Net device to 

[PATCH net-next 13/13] nfp: refresh port state before reporting autonegotiation

2017-05-19 Thread Jakub Kicinski
State of autonegotiation may have changed but is not yet refreshed.
Make sure ethtool respects the NFP_PORT_CHANGED flag when looking
at autoneg.

Signed-off-by: Jakub Kicinski 
Reviewed-by: Simon Horman 
---
 drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c | 10 +-
 drivers/net/ethernet/netronome/nfp/nfp_port.c| 12 
 drivers/net/ethernet/netronome/nfp/nfp_port.h|  1 +
 3 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c 
b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
index 050629df5cff..46fafee87371 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
@@ -209,7 +209,7 @@ nfp_net_get_link_ksettings(struct net_device *netdev,
cmd->base.duplex = DUPLEX_UNKNOWN;
 
port = nfp_port_from_netdev(netdev);
-   eth_port = __nfp_port_get_eth_port(port);
+   eth_port = nfp_port_get_eth_port(port);
if (eth_port)
cmd->base.autoneg = eth_port->aneg != NFP_ANEG_DISABLED ?
AUTONEG_ENABLE : AUTONEG_DISABLE;
@@ -219,14 +219,6 @@ nfp_net_get_link_ksettings(struct net_device *netdev,
 
/* Use link speed from ETH table if available, otherwise try the BAR */
if (eth_port) {
-   int err;
-
-   if (test_bit(NFP_PORT_CHANGED, >flags)) {
-   err = nfp_net_refresh_eth_port(port);
-   if (err)
-   return err;
-   }
-
cmd->base.port = eth_port->port_type;
cmd->base.speed = eth_port->speed;
cmd->base.duplex = DUPLEX_FULL;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_port.c 
b/drivers/net/ethernet/netronome/nfp/nfp_port.c
index f73a5452f02b..2c9e0e36d743 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_port.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_port.c
@@ -58,6 +58,18 @@ struct nfp_eth_table_port *__nfp_port_get_eth_port(struct 
nfp_port *port)
return port->eth_port;
 }
 
+struct nfp_eth_table_port *nfp_port_get_eth_port(struct nfp_port *port)
+{
+   if (!__nfp_port_get_eth_port(port))
+   return NULL;
+
+   if (test_bit(NFP_PORT_CHANGED, >flags))
+   if (nfp_net_refresh_eth_port(port))
+   return NULL;
+
+   return __nfp_port_get_eth_port(port);
+}
+
 int
 nfp_port_get_phys_port_name(struct net_device *netdev, char *name, size_t len)
 {
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_port.h 
b/drivers/net/ethernet/netronome/nfp/nfp_port.h
index 02d664f58482..34276e1d673d 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_port.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_port.h
@@ -88,6 +88,7 @@ bool nfp_netdev_is_nfp_net(struct net_device *netdev);
 
 struct nfp_port *nfp_port_from_netdev(struct net_device *netdev);
 struct nfp_eth_table_port *__nfp_port_get_eth_port(struct nfp_port *port);
+struct nfp_eth_table_port *nfp_port_get_eth_port(struct nfp_port *port);
 
 int
 nfp_port_get_phys_port_name(struct net_device *netdev, char *name, size_t len);
-- 
2.11.0



[PATCH net-next 09/13] nfp: move refresh tracking into the port structure

2017-05-19 Thread Jakub Kicinski
Track whether physical port's state have changed since last refresh
inside the nfp_port structure instead of the vNIC structure.

Signed-off-by: Jakub Kicinski 
Reviewed-by: Simon Horman 
---
 drivers/net/ethernet/netronome/nfp/nfp_net.h |  4 
 drivers/net/ethernet/netronome/nfp/nfp_net_common.c  | 16 ++--
 drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c | 10 +-
 drivers/net/ethernet/netronome/nfp/nfp_net_main.c| 10 --
 drivers/net/ethernet/netronome/nfp/nfp_port.h| 13 +
 5 files changed, 28 insertions(+), 25 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h 
b/drivers/net/ethernet/netronome/nfp/nfp_net.h
index 6a774ac54237..4d45ba2d355f 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h
@@ -543,7 +543,6 @@ struct nfp_net_dp {
  * @reconfig_sync_present:  Some thread is performing synchronous reconfig
  * @reconfig_timer:Timer for async reading of reconfig results
  * @link_up:Is the link up?
- * @link_changed:  Has link state changes since last port refresh?
  * @link_status_lock:  Protects @link_* and ensures atomicity with BAR reading
  * @rx_coalesce_usecs:  RX interrupt moderation usecs delay parameter
  * @rx_coalesce_max_frames: RX interrupt moderation frame count parameter
@@ -601,7 +600,6 @@ struct nfp_net {
u32 me_freq_mhz;
 
bool link_up;
-   bool link_changed;
spinlock_t link_status_lock;
 
spinlock_t reconfig_lock;
@@ -835,8 +833,6 @@ struct nfp_net_dp *nfp_net_clone_dp(struct nfp_net *nn);
 int nfp_net_ring_reconfig(struct nfp_net *nn, struct nfp_net_dp *new,
  struct netlink_ext_ack *extack);
 
-bool nfp_net_link_changed_read_clear(struct nfp_net *nn);
-
 #ifdef CONFIG_NFP_DEBUG
 void nfp_net_debugfs_create(void);
 void nfp_net_debugfs_destroy(void);
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c 
b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 25ec0371e280..f9d8f4311f15 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -392,19 +392,6 @@ static irqreturn_t nfp_net_irq_rxtx(int irq, void *data)
return IRQ_HANDLED;
 }
 
-bool nfp_net_link_changed_read_clear(struct nfp_net *nn)
-{
-   unsigned long flags;
-   bool ret;
-
-   spin_lock_irqsave(>link_status_lock, flags);
-   ret = nn->link_changed;
-   nn->link_changed = false;
-   spin_unlock_irqrestore(>link_status_lock, flags);
-
-   return ret;
-}
-
 /**
  * nfp_net_read_link_status() - Reread link status from control BAR
  * @nn:   NFP Network structure
@@ -424,7 +411,8 @@ static void nfp_net_read_link_status(struct nfp_net *nn)
goto out;
 
nn->link_up = link_up;
-   nn->link_changed = true;
+   if (nn->port)
+   set_bit(NFP_PORT_CHANGED, >port->flags);
 
if (nn->link_up) {
netif_carrier_on(nn->dp.netdev);
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c 
b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
index e9c860a6dbb8..050629df5cff 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
@@ -217,15 +217,11 @@ nfp_net_get_link_ksettings(struct net_device *netdev,
if (!netif_carrier_ok(netdev))
return 0;
 
-   if (!nfp_netdev_is_nfp_net(netdev))
-   return -EOPNOTSUPP;
-   nn = netdev_priv(netdev);
-
/* Use link speed from ETH table if available, otherwise try the BAR */
if (eth_port) {
int err;
 
-   if (nfp_net_link_changed_read_clear(nn)) {
+   if (test_bit(NFP_PORT_CHANGED, >flags)) {
err = nfp_net_refresh_eth_port(port);
if (err)
return err;
@@ -237,6 +233,10 @@ nfp_net_get_link_ksettings(struct net_device *netdev,
return 0;
}
 
+   if (!nfp_netdev_is_nfp_net(netdev))
+   return -EOPNOTSUPP;
+   nn = netdev_priv(netdev);
+
sts = nn_readl(nn, NFP_NET_CFG_STS);
 
ls = FIELD_GET(NFP_NET_CFG_STS_LINK_RATE, sts);
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c 
b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
index 7bed799dee83..0114071dc0fd 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
@@ -555,16 +555,19 @@ static void nfp_net_refresh_vnics(struct work_struct 
*work)
if (list_empty(>vnics))
goto out;
 
+   /* Update state of all ports */
+   rtnl_lock();
list_for_each_entry(nn, >vnics, vnic_list)
-   nfp_net_link_changed_read_clear(nn);
+   if (nn->port)
+   

[PATCH net-next 01/13] nfp: add nfp_cppcore_pcie_unit() helper

2017-05-19 Thread Jakub Kicinski
From: Simon Horman 

Add nfp_cppcore_pcie_unit() helper to retrieve the PCIE unit of a CPP
handle and use the new helper as appropriate.

Signed-off-by: Simon Horman 
Reviewed-by: Jakub Kicinski 
---
 drivers/net/ethernet/netronome/nfp/nfp_net_main.c| 16 
 drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h | 11 +++
 2 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c 
b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
index 8cb87cbe1120..16115973112c 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
@@ -190,15 +190,11 @@ nfp_net_find_port(struct nfp_eth_table *eth_tbl, unsigned 
int id)
 static unsigned int nfp_net_pf_get_num_ports(struct nfp_pf *pf)
 {
char name[256];
-   u16 interface;
-   int pcie_pf;
int err = 0;
u64 val;
 
-   interface = nfp_cpp_interface(pf->cpp);
-   pcie_pf = NFP_CPP_INTERFACE_UNIT_of(interface);
-
-   snprintf(name, sizeof(name), "nfd_cfg_pf%d_num_ports", pcie_pf);
+   snprintf(name, sizeof(name), "nfd_cfg_pf%u_num_ports",
+nfp_cppcore_pcie_unit(pf->cpp));
 
val = nfp_rtsym_read_le(pf->cpp, name, );
/* Default to one port */
@@ -241,13 +237,9 @@ static u8 __iomem *nfp_net_pf_map_ctrl_bar(struct nfp_pf 
*pf)
const struct nfp_rtsym *ctrl_sym;
u8 __iomem *ctrl_bar;
char pf_symbol[256];
-   u16 interface;
-   int pcie_pf;
-
-   interface = nfp_cpp_interface(pf->cpp);
-   pcie_pf = NFP_CPP_INTERFACE_UNIT_of(interface);
 
-   snprintf(pf_symbol, sizeof(pf_symbol), "_pf%d_net_bar0", pcie_pf);
+   snprintf(pf_symbol, sizeof(pf_symbol), "_pf%u_net_bar0",
+nfp_cppcore_pcie_unit(pf->cpp));
 
ctrl_sym = nfp_rtsym_lookup(pf->cpp, pf_symbol);
if (!ctrl_sym) {
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h 
b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h
index edecc0a27485..154b0b594184 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h
@@ -289,6 +289,17 @@ int nfp_cpp_mutex_lock(struct nfp_cpp_mutex *mutex);
 int nfp_cpp_mutex_unlock(struct nfp_cpp_mutex *mutex);
 int nfp_cpp_mutex_trylock(struct nfp_cpp_mutex *mutex);
 
+/**
+ * nfp_cppcore_pcie_unit() - Get PCI Unit of a CPP handle
+ * @cpp:   CPP handle
+ *
+ * Return: PCI unit for the NFP CPP handle
+ */
+static inline u8 nfp_cppcore_pcie_unit(struct nfp_cpp *cpp)
+{
+   return NFP_CPP_INTERFACE_UNIT_of(nfp_cpp_interface(cpp));
+}
+
 struct nfp_cpp_explicit;
 
 struct nfp_cpp_explicit_command {
-- 
2.11.0



[PATCH net-next 11/13] nfp: mark port state as stale after reconfig

2017-05-19 Thread Jakub Kicinski
After port configuration is performed mark it as changed. This
will close a window of time between configuration and async
state refresh which runs from a workqueue where old port state
would be reported.

Signed-off-by: Jakub Kicinski 
Reviewed-by: Simon Horman 
---
 drivers/net/ethernet/netronome/nfp/nfp_net_main.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c 
b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
index 9c4ba929cb0a..3ee9a9772110 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
@@ -596,6 +596,8 @@ void nfp_net_refresh_port_table(struct nfp_port *port)
 {
struct nfp_pf *pf = nfp_app_pf(port->app);
 
+   set_bit(NFP_PORT_CHANGED, >flags);
+
schedule_work(>port_refresh_work);
 }
 
-- 
2.11.0



[PATCH net-next 06/13] nfp: disallow mixing vNICs with and without NSP port entry

2017-05-19 Thread Jakub Kicinski
We only support core NIC apps which have vNICs for each physical port/
split and no representors right now.  Enforce that either each vNIC has
a NSP eth_table entry or if NSP port table is not available none do.

One scenario this will prevent from happening is user force-loading
wrong firmware file if FW app requires different firmwares per media
config.

While at it move some code to nfp_net_pf_alloc_vnic() to make it
counter-match nfp_net_pf_free_vnic() better.

Signed-off-by: Jakub Kicinski 
Reviewed-by: Simon Horman 
---
 drivers/net/ethernet/netronome/nfp/nfp_net_main.c | 52 ++-
 1 file changed, 32 insertions(+), 20 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c 
b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
index 17ff8a88fc24..d54506b3f783 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
@@ -289,7 +289,7 @@ static struct nfp_net *
 nfp_net_pf_alloc_vnic(struct nfp_pf *pf, void __iomem *ctrl_bar,
  void __iomem *tx_bar, void __iomem *rx_bar,
  int stride, struct nfp_net_fw_version *fw_ver,
- struct nfp_eth_table_port *eth_port)
+ unsigned int eth_id)
 {
u32 n_tx_rings, n_rx_rings;
struct nfp_net *nn;
@@ -310,7 +310,10 @@ nfp_net_pf_alloc_vnic(struct nfp_pf *pf, void __iomem 
*ctrl_bar,
nn->dp.is_vf = 0;
nn->stride_rx = stride;
nn->stride_tx = stride;
-   nn->eth_port = eth_port;
+   nn->eth_port = nfp_net_find_port(pf->eth_tbl, eth_id);
+
+   pf->num_vnics++;
+   list_add_tail(>vnic_list, >vnics);
 
return nn;
 }
@@ -346,11 +349,16 @@ nfp_net_pf_alloc_vnics(struct nfp_pf *pf, void __iomem 
*ctrl_bar,
   int stride, struct nfp_net_fw_version *fw_ver)
 {
u32 prev_tx_base, prev_rx_base, tgt_tx_base, tgt_rx_base;
-   struct nfp_eth_table_port *eth_port;
struct nfp_net *nn;
unsigned int i;
int err;
 
+   if (pf->eth_tbl && pf->max_data_vnics != pf->eth_tbl->count) {
+   nfp_err(pf->cpp, "ETH entries don't match vNICs (%d vs %d)\n",
+   pf->max_data_vnics, pf->eth_tbl->count);
+   return -EINVAL;
+   }
+
prev_tx_base = readl(ctrl_bar + NFP_NET_CFG_START_TXQ);
prev_rx_base = readl(ctrl_bar + NFP_NET_CFG_START_RXQ);
 
@@ -362,21 +370,26 @@ nfp_net_pf_alloc_vnics(struct nfp_pf *pf, void __iomem 
*ctrl_bar,
prev_tx_base = tgt_tx_base;
prev_rx_base = tgt_rx_base;
 
-   eth_port = nfp_net_find_port(pf->eth_tbl, i);
-   if (eth_port && eth_port->override_changed) {
-   nfp_warn(pf->cpp, "Config changed for port #%d, reboot 
required before port will be operational\n", i);
-   } else {
-   nn = nfp_net_pf_alloc_vnic(pf, ctrl_bar, tx_bar, rx_bar,
-  stride, fw_ver, eth_port);
-   if (IS_ERR(nn)) {
-   err = PTR_ERR(nn);
-   goto err_free_prev;
-   }
-   list_add_tail(>vnic_list, >vnics);
-   pf->num_vnics++;
+   nn = nfp_net_pf_alloc_vnic(pf, ctrl_bar, tx_bar, rx_bar,
+  stride, fw_ver, i);
+   if (IS_ERR(nn)) {
+   err = PTR_ERR(nn);
+   goto err_free_prev;
}
 
ctrl_bar += NFP_PF_CSR_SLICE_SIZE;
+
+   /* Check if vNIC has external port associated and cfg is OK */
+   if (pf->eth_tbl && !nn->eth_port) {
+   nfp_err(pf->cpp, "NSP port entries don't match vNICs 
(no entry for port #%d)\n", i);
+   err = -EINVAL;
+   goto err_free_prev;
+   }
+   if (nn->eth_port && nn->eth_port->override_changed) {
+   nfp_warn(pf->cpp, "Config changed for port #%d, reboot 
required before port will be operational\n", i);
+   nfp_net_pf_free_vnic(pf, nn);
+   continue;
+   }
}
 
if (list_empty(>vnics))
@@ -517,6 +530,9 @@ static void nfp_net_refresh_vnics(struct work_struct *work)
continue;
nn->eth_port = nfp_net_find_port(eth_table,
 nn->eth_port->eth_index);
+   if (!nn->eth_port)
+   nfp_err(pf->cpp,
+   "Warning: port disappeared after reconfig\n");
}
rtnl_unlock();
 
@@ -524,11 +540,7 @@ static void nfp_net_refresh_vnics(struct work_struct *work)
pf->eth_tbl = eth_table;
 

[PATCH net-next 08/13] nfp: update port state in place

2017-05-19 Thread Jakub Kicinski
Always updating port state in place by overriding values in exiting
pf->eth_tbl makes things easier to manage and allows us to have a
common helper for both full and per-port refresh.

Signed-off-by: Jakub Kicinski 
Reviewed-by: Simon Horman 
---
 drivers/net/ethernet/netronome/nfp/nfp_net_main.c | 54 +--
 1 file changed, 30 insertions(+), 24 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c 
b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
index 167ccf788ba2..7bed799dee83 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
@@ -518,6 +518,30 @@ static void nfp_net_pci_remove_finish(struct nfp_pf *pf)
nfp_cpp_area_release_free(pf->data_vnic_bar);
 }
 
+static int
+nfp_net_eth_port_update(struct nfp_cpp *cpp, struct nfp_port *port,
+   struct nfp_eth_table *eth_table)
+{
+   struct nfp_eth_table_port *eth_port;
+
+   ASSERT_RTNL();
+
+   eth_port = nfp_net_find_port(eth_table, port->eth_id);
+   if (!eth_port) {
+   nfp_warn(cpp, "Warning: port #%d not present after reconfig\n",
+port->eth_id);
+   return -EIO;
+   }
+   if (eth_port->override_changed) {
+   nfp_warn(cpp, "Port #%d config changed, unregistering. Reboot 
required before port will be operational again.\n", port->eth_id);
+   port->type = NFP_PORT_INVALID;
+   }
+
+   memcpy(port->eth_port, eth_port, sizeof(*eth_port));
+
+   return 0;
+}
+
 static void nfp_net_refresh_vnics(struct work_struct *work)
 {
struct nfp_pf *pf = container_of(work, struct nfp_pf,
@@ -544,23 +568,12 @@ static void nfp_net_refresh_vnics(struct work_struct 
*work)
list_for_each_entry(nn, >vnics, vnic_list) {
if (!__nfp_port_get_eth_port(nn->port))
continue;
-   nn->port->eth_port = nfp_net_find_port(eth_table,
-  nn->port->eth_id);
-   if (!nn->port->eth_port) {
-   nfp_warn(pf->cpp, "Warning: port #%d not present after 
reconfig\n",
-nn->port->eth_id);
-   continue;
-   }
-   if (nn->port->eth_port->override_changed) {
-   nfp_warn(pf->cpp, "Port config changed, unregistering. 
Reboot required before port will be operational again.\n");
-   nn->port->type = NFP_PORT_INVALID;
-   continue;
-   }
+
+   nfp_net_eth_port_update(pf->cpp, nn->port, eth_table);
}
rtnl_unlock();
 
-   kfree(pf->eth_tbl);
-   pf->eth_tbl = eth_table;
+   kfree(eth_table);
 
list_for_each_entry_safe(nn, next, >vnics, vnic_list) {
if (!nn->port || nn->port->type != NFP_PORT_INVALID)
@@ -588,8 +601,8 @@ void nfp_net_refresh_port_table(struct nfp_port *port)
 int nfp_net_refresh_eth_port(struct nfp_port *port)
 {
struct nfp_cpp *cpp = nfp_app_cpp(port->app);
-   struct nfp_eth_table_port *eth_port;
struct nfp_eth_table *eth_table;
+   int ret;
 
eth_table = nfp_eth_read_ports(cpp);
if (!eth_table) {
@@ -597,18 +610,11 @@ int nfp_net_refresh_eth_port(struct nfp_port *port)
return -EIO;
}
 
-   eth_port = nfp_net_find_port(eth_table, port->eth_id);
-   if (!eth_port) {
-   nfp_err(cpp, "Error finding state of the port!\n");
-   kfree(eth_table);
-   return -EIO;
-   }
-
-   memcpy(port->eth_port, eth_port, sizeof(*eth_port));
+   ret = nfp_net_eth_port_update(cpp, port, eth_table);
 
kfree(eth_table);
 
-   return 0;
+   return ret;
 }
 
 /*
-- 
2.11.0



[PATCH net-next 12/13] nfp: mark port state as stale if update failed

2017-05-19 Thread Jakub Kicinski
If reading new state of the port failed, mark the port back as CHANGED.
This way next user state request will trigger refresh, which will
hopefully succeed.

Signed-off-by: Jakub Kicinski 
Reviewed-by: Simon Horman 
---
 drivers/net/ethernet/netronome/nfp/nfp_net_main.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c 
b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
index 3ee9a9772110..74fdef44e139 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
@@ -528,6 +528,7 @@ nfp_net_eth_port_update(struct nfp_cpp *cpp, struct 
nfp_port *port,
 
eth_port = nfp_net_find_port(eth_table, port->eth_id);
if (!eth_port) {
+   set_bit(NFP_PORT_CHANGED, >flags);
nfp_warn(cpp, "Warning: port #%d not present after reconfig\n",
 port->eth_id);
return -EIO;
@@ -563,6 +564,9 @@ static void nfp_net_refresh_vnics(struct work_struct *work)
 
eth_table = nfp_eth_read_ports(pf->cpp);
if (!eth_table) {
+   list_for_each_entry(port, >ports, port_list)
+   if (__nfp_port_get_eth_port(port))
+   set_bit(NFP_PORT_CHANGED, >flags);
rtnl_unlock();
nfp_err(pf->cpp, "Error refreshing port config!\n");
goto out;
@@ -611,6 +615,7 @@ int nfp_net_refresh_eth_port(struct nfp_port *port)
 
eth_table = nfp_eth_read_ports(cpp);
if (!eth_table) {
+   set_bit(NFP_PORT_CHANGED, >flags);
nfp_err(cpp, "Error refreshing port state table!\n");
return -EIO;
}
-- 
2.11.0



[PATCH net-next 03/13] nfp: rename netdev/port to vNIC

2017-05-19 Thread Jakub Kicinski
vNIC is a PCIe-side abstraction NFP firmwares supported by this
driver use.  It was initially meant to represent a device port
and therefore a netdev but today should be thought of as a way
of grouping descriptor rings and associated state.  Advanced apps
will have vNICs without netdevs and ports without a vNIC (using
representors instead).

Make sure code refers to vNICs as vNICs and not ports or netdevs.
No functional changes.

Signed-off-by: Jakub Kicinski 
Reviewed-by: Simon Horman 
---
 drivers/net/ethernet/netronome/nfp/nfp_main.c  |   2 +-
 drivers/net/ethernet/netronome/nfp/nfp_main.h  |  25 ++--
 drivers/net/ethernet/netronome/nfp/nfp_net.h   |  10 +-
 .../net/ethernet/netronome/nfp/nfp_net_debugfs.c   |   4 +-
 drivers/net/ethernet/netronome/nfp/nfp_net_main.c  | 150 ++---
 .../net/ethernet/netronome/nfp/nfp_netvf_main.c|   4 +-
 6 files changed, 97 insertions(+), 98 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.c 
b/drivers/net/ethernet/netronome/nfp/nfp_main.c
index dde35dae35c5..9fbc7eedc017 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_main.c
@@ -340,7 +340,7 @@ static int nfp_pci_probe(struct pci_dev *pdev,
err = -ENOMEM;
goto err_rel_regions;
}
-   INIT_LIST_HEAD(>ports);
+   INIT_LIST_HEAD(>vnics);
pci_set_drvdata(pdev, pf);
pf->pdev = pdev;
 
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.h 
b/drivers/net/ethernet/netronome/nfp/nfp_main.h
index b57de047b002..bf31913ac7a5 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_main.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_main.h
@@ -57,19 +57,20 @@ struct nfp_eth_table;
  * struct nfp_pf - NFP PF-specific device structure
  * @pdev:  Backpointer to PCI device
  * @cpp:   Pointer to the CPP handle
- * @ctrl_area: Pointer to the CPP area for the control BAR
+ * @data_vnic_bar: Pointer to the CPP area for the data vNICs' BARs
  * @tx_area:   Pointer to the CPP area for the TX queues
  * @rx_area:   Pointer to the CPP area for the FL/RX queues
- * @irq_entries:   Array of MSI-X entries for all ports
+ * @irq_entries:   Array of MSI-X entries for all vNICs
  * @limit_vfs: Number of VFs supported by firmware (~0 for PCI limit)
  * @num_vfs:   Number of SR-IOV VFs enabled
  * @fw_loaded: Is the firmware loaded?
- * @eth_tbl:   NSP ETH table
  * @ddir:  Per-device debugfs directory
- * @num_ports: Number of adapter ports app firmware supports
- * @num_netdevs:   Number of netdevs spawned
- * @ports: Linked list of port structures (struct nfp_net)
- * @port_lock: Protects @ports, @num_ports, @num_netdevs
+ *
+ * @lock:  Protects all fields below
+ * @eth_tbl:   NSP ETH table
+ * @max_data_vnics:Number of data vNICs app firmware supports
+ * @num_vnics: Number of vNICs spawned
+ * @vnics: Linked list of vNIC structures (struct nfp_net)
  * @port_refresh_work: Work entry for taking netdevs out
  */
 struct nfp_pf {
@@ -77,7 +78,7 @@ struct nfp_pf {
 
struct nfp_cpp *cpp;
 
-   struct nfp_cpp_area *ctrl_area;
+   struct nfp_cpp_area *data_vnic_bar;
struct nfp_cpp_area *tx_area;
struct nfp_cpp_area *rx_area;
 
@@ -92,12 +93,12 @@ struct nfp_pf {
 
struct dentry *ddir;
 
-   unsigned int num_ports;
-   unsigned int num_netdevs;
+   unsigned int max_data_vnics;
+   unsigned int num_vnics;
 
-   struct list_head ports;
+   struct list_head vnics;
struct work_struct port_refresh_work;
-   struct mutex port_lock;
+   struct mutex lock;
 };
 
 extern struct pci_driver nfp_netvf_pci_driver;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h 
b/drivers/net/ethernet/netronome/nfp/nfp_net.h
index 04609191ca88..1d41be9b2309 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h
@@ -84,7 +84,7 @@
 #define NFP_NET_NON_Q_VECTORS  2
 #define NFP_NET_IRQ_LSC_IDX0
 #define NFP_NET_IRQ_EXN_IDX1
-#define NFP_NET_MIN_PORT_IRQS  (NFP_NET_NON_Q_VECTORS + 1)
+#define NFP_NET_MIN_VNIC_IRQS  (NFP_NET_NON_Q_VECTORS + 1)
 
 /* Queue/Ring definitions */
 #define NFP_NET_MAX_TX_RINGS   64  /* Max. # of Tx rings per device */
@@ -555,7 +555,7 @@ struct nfp_net_dp {
  * @rx_bar: Pointer to mapped FL/RX queues
  * @debugfs_dir:   Device directory in debugfs
  * @ethtool_dump_flag: Ethtool dump flag
- * @port_list: Entry on device port list
+ * @vnic_list: Entry on device vNIC list
  * @pdev:  Backpointer to PCI device
  * @cpp:   CPP device handle if available
  * @eth_port:  Translated ETH Table port entry
@@ 

[PATCH net-next 05/13] nfp: introduce very minimal nfp_app

2017-05-19 Thread Jakub Kicinski
Introduce a concept of an application.  For now it's just grouping
pointers and serving as a layer of indirection.  It will help us
weaken the dependency on nfp_net in ethtool code.  Later series
will flesh out support for different apps in the driver.

Signed-off-by: Jakub Kicinski 
Reviewed-by: Simon Horman 
---
 drivers/net/ethernet/netronome/nfp/Makefile|  1 +
 drivers/net/ethernet/netronome/nfp/nfp_app.c   | 80 ++
 drivers/net/ethernet/netronome/nfp/nfp_app.h   | 48 +
 drivers/net/ethernet/netronome/nfp/nfp_main.h  |  3 +
 drivers/net/ethernet/netronome/nfp/nfp_net.h   |  4 +-
 .../net/ethernet/netronome/nfp/nfp_net_ethtool.c   | 20 +++---
 drivers/net/ethernet/netronome/nfp/nfp_net_main.c  | 26 ++-
 7 files changed, 168 insertions(+), 14 deletions(-)
 create mode 100644 drivers/net/ethernet/netronome/nfp/nfp_app.c
 create mode 100644 drivers/net/ethernet/netronome/nfp/nfp_app.h

diff --git a/drivers/net/ethernet/netronome/nfp/Makefile 
b/drivers/net/ethernet/netronome/nfp/Makefile
index 4b15f0f496aa..a6b9c4dcbe12 100644
--- a/drivers/net/ethernet/netronome/nfp/Makefile
+++ b/drivers/net/ethernet/netronome/nfp/Makefile
@@ -14,6 +14,7 @@ nfp-objs := \
nfpcore/nfp_resource.o \
nfpcore/nfp_rtsym.o \
nfpcore/nfp_target.o \
+   nfp_app.o \
nfp_main.o \
nfp_net_common.o \
nfp_net_ethtool.o \
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.c 
b/drivers/net/ethernet/netronome/nfp/nfp_app.c
new file mode 100644
index ..c5dc8faffc61
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/nfp_app.c
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2017 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ *  1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ *  2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include 
+
+#include "nfpcore/nfp_cpp.h"
+#include "nfp_app.h"
+#include "nfp_main.h"
+
+/**
+ * struct nfp_app - NFP application container
+ * @pdev:  backpointer to PCI device
+ * @pf:backpointer to NFP PF structure
+ * @cpp:   pointer to the CPP handle
+ */
+struct nfp_app {
+   struct pci_dev *pdev;
+   struct nfp_pf *pf;
+   struct nfp_cpp *cpp;
+};
+
+struct nfp_cpp *nfp_app_cpp(struct nfp_app *app)
+{
+   return app->cpp;
+}
+
+struct nfp_pf *nfp_app_pf(struct nfp_app *app)
+{
+   return app->pf;
+}
+
+struct nfp_app *nfp_app_alloc(struct nfp_pf *pf)
+{
+   struct nfp_app *app;
+
+   app = kzalloc(sizeof(*app), GFP_KERNEL);
+   if (!app)
+   return ERR_PTR(-ENOMEM);
+
+   app->pf = pf;
+   app->cpp = pf->cpp;
+   app->pdev = pf->pdev;
+
+   return app;
+}
+
+void nfp_app_free(struct nfp_app *app)
+{
+   kfree(app);
+}
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.h 
b/drivers/net/ethernet/netronome/nfp/nfp_app.h
new file mode 100644
index ..c0a5e97d19b8
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/nfp_app.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2017 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ *  1. 

Re: [PATCH v2] hdlcdrv: fix divide error bug if bitrate is 0

2017-05-19 Thread David Miller
From: Firo Yang 
Date: Fri, 19 May 2017 21:21:46 +0800

> @@ -576,6 +576,10 @@ static int hdlcdrv_ioctl(struct net_device *dev, struct 
> ifreq *ifr, int cmd)
>   case HDLCDRVCTL_CALIBRATE:
>   if(!capable(CAP_SYS_RAWIO))
>   return -EPERM;
> + if (!netif_running(dev))
> + return -ENODEV;
> + if (!(s->par.bitrate > 0))
> + return -EINVAL;

This test is so un-canonical and convoluted.

Please use something more straightforward.  I really think Alan
Cox's patch handled this more cleanly.  Make the test something
like "if (x <= 0) return -EINVAL;".

I also am not convinced about the netif_running() test and at
best it is a separate change from this divide by zero bug fix
so belongs in a separate patch.

Thank you.


Re: [PATCH v2 1/3] bpf: Use 1<<16 as ceiling for immediate alignment in verifier.

2017-05-19 Thread Alexei Starovoitov

On 5/19/17 1:41 PM, David Miller wrote:

From: Edward Cree 
Date: Fri, 19 May 2017 18:17:42 +0100


One question: is there a way to build the verifier as userland code
 (or at least as a module), or will I have to reboot every time I
 want to test a change?


There currently is no such machanism, you will have to reboot every
time.

I have considered working on making the code buildable outside of the
kernel.  It shouldn't be too hard.


it's not hard.
We did it twice and both times abandoned.
First time to have 'user space verifier' to check programs before
loading and second time for fuzzing via llvm.
Abandoned since it diverges very quickly from kernel.




[PATCH net] ipv6: fix out of bound writes in __ip6_append_data()

2017-05-19 Thread Eric Dumazet
From: Eric Dumazet 

Andrey Konovalov and idaif...@gmail.com reported crashes caused by
one skb shared_info being overwritten from __ip6_append_data()

Andrey program lead to following state :

copy -4200 datalen 2000 fraglen 2040 
maxfraglen 2040 alloclen 2048 transhdrlen 0 offset 0 fraggap 6200

The skb_copy_and_csum_bits(skb_prev, maxfraglen, data + transhdrlen,
fraggap, 0); is overwriting skb->head and skb_shared_info

Since we apparently detect this rare condition too late, move the
code earlier to even avoid allocating skb and risking crashes.

Once again, many thanks to Andrey and syzkaller team.

Signed-off-by: Eric Dumazet 
Reported-by: Andrey Konovalov 
Tested-by: Andrey Konovalov 
Reported-by: 
---
 net/ipv6/ip6_output.c |   15 ---
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 
d4a31becbd25dda895d7391e1e65c2de237bf2a3..bf8a58a1c32d83a9605844075da5815be23a6bf1
 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1466,6 +1466,11 @@ static int __ip6_append_data(struct sock *sk,
 */
alloclen += sizeof(struct frag_hdr);
 
+   copy = datalen - transhdrlen - fraggap;
+   if (copy < 0) {
+   err = -EINVAL;
+   goto error;
+   }
if (transhdrlen) {
skb = sock_alloc_send_skb(sk,
alloclen + hh_len,
@@ -1515,13 +1520,9 @@ static int __ip6_append_data(struct sock *sk,
data += fraggap;
pskb_trim_unique(skb_prev, maxfraglen);
}
-   copy = datalen - transhdrlen - fraggap;
-
-   if (copy < 0) {
-   err = -EINVAL;
-   kfree_skb(skb);
-   goto error;
-   } else if (copy > 0 && getfrag(from, data + 
transhdrlen, offset, copy, fraggap, skb) < 0) {
+   if (copy > 0 &&
+   getfrag(from, data + transhdrlen, offset,
+   copy, fraggap, skb) < 0) {
err = -EFAULT;
kfree_skb(skb);
goto error;




Re: [PATCH net] bonding: fix accounting of active ports in 3ad

2017-05-19 Thread David Miller
From: Jarod Wilson 
Date: Wed, 17 May 2017 11:11:44 -0400

> As of 7bb11dc9f59d and 0622cab0341c, bond slaves in a 3ad bond are not
> removed from the aggregator when they are down, and the active slave count
> is NOT equal to number of ports in the aggregator, but rather the number
> of ports in the aggregator that are still enabled.
 ...
> Remedy it by using the same logic introduced in
> 7bb11dc9f59d in __bond_3ad_get_active_agg_info(), so sysfs, procfs and
  
> netlink all report the number of active ports.

I think you mean to reference commit 0622cab0341c here not 7bb11dc9f59d.


RE: [PATCH v2] e1000e: Don't return uninitialized stats

2017-05-19 Thread Brown, Aaron F
> From: Kirsher, Jeffrey T
> Sent: Friday, May 19, 2017 1:17 AM
> To: David Miller ; bpoir...@suse.com
> Cc: s.pri...@profihost.ag; intel-wired-...@lists.osuosl.org;
> netdev@vger.kernel.org; pmen...@molgen.mpg.de; Neftin, Sasha
> ; Brown, Aaron F ;
> step...@networkplumber.org
> Subject: Re: [PATCH v2] e1000e: Don't return uninitialized stats
> 
> On Thu, 2017-05-18 at 10:46 -0400, David Miller wrote:
> > From: Benjamin Poirier 
> > Date: Wed, 17 May 2017 16:24:13 -0400
> >
> > > Some statistics passed to ethtool are garbage because
> > > e1000e_get_stats64()
> > > doesn't write them, for example: tx_heartbeat_errors. This leaks kernel
> > > memory to userspace and confuses users.
> > >
> > > Do like ixgbe and use dev_get_stats() which first zeroes out
> > > rtnl_link_stats64.
> > >
> > > Fixes: 5944701df90d ("net: remove useless memset's in drivers
> > > get_stats64")
> > > Reported-by: Stefan Priebe 
> > > Signed-off-by: Benjamin Poirier 

Tested-by: Aaron Brown 


[PATCH net-next 06/20] net: dsa: change scope of VLAN handlers

2017-05-19 Thread Vivien Didelot
Change the scope of the switchdev VLAN object handlers from the DSA
slave device to the generic DSA port, so that the future port-wide API
can also be used for other port types, such as CPU and DSA links.

Signed-off-by: Vivien Didelot 
---
 net/dsa/slave.c | 40 
 1 file changed, 16 insertions(+), 24 deletions(-)

diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 0921d306aedf..de39da69fd33 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -254,12 +254,10 @@ static int dsa_slave_set_mac_address(struct net_device 
*dev, void *a)
return 0;
 }
 
-static int dsa_slave_port_vlan_add(struct net_device *dev,
-  const struct switchdev_obj_port_vlan *vlan,
-  struct switchdev_trans *trans)
+static int dsa_port_vlan_add(struct dsa_port *dp,
+const struct switchdev_obj_port_vlan *vlan,
+struct switchdev_trans *trans)
 {
-   struct dsa_slave_priv *p = netdev_priv(dev);
-   struct dsa_port *dp = p->dp;
struct dsa_switch *ds = dp->ds;
 
if (switchdev_trans_ph_prepare(trans)) {
@@ -274,27 +272,25 @@ static int dsa_slave_port_vlan_add(struct net_device *dev,
return 0;
 }
 
-static int dsa_slave_port_vlan_del(struct net_device *dev,
-  const struct switchdev_obj_port_vlan *vlan)
+static int dsa_port_vlan_del(struct dsa_port *dp,
+const struct switchdev_obj_port_vlan *vlan)
 {
-   struct dsa_slave_priv *p = netdev_priv(dev);
-   struct dsa_switch *ds = p->dp->ds;
+   struct dsa_switch *ds = dp->ds;
 
if (!ds->ops->port_vlan_del)
return -EOPNOTSUPP;
 
-   return ds->ops->port_vlan_del(ds, p->dp->index, vlan);
+   return ds->ops->port_vlan_del(ds, dp->index, vlan);
 }
 
-static int dsa_slave_port_vlan_dump(struct net_device *dev,
-   struct switchdev_obj_port_vlan *vlan,
-   switchdev_obj_dump_cb_t *cb)
+static int dsa_port_vlan_dump(struct dsa_port *dp,
+ struct switchdev_obj_port_vlan *vlan,
+ switchdev_obj_dump_cb_t *cb)
 {
-   struct dsa_slave_priv *p = netdev_priv(dev);
-   struct dsa_switch *ds = p->dp->ds;
+   struct dsa_switch *ds = dp->ds;
 
if (ds->ops->port_vlan_dump)
-   return ds->ops->port_vlan_dump(ds, p->dp->index, vlan, cb);
+   return ds->ops->port_vlan_dump(ds, dp->index, vlan, cb);
 
return -EOPNOTSUPP;
 }
@@ -499,9 +495,8 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
err = dsa_port_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj), trans);
break;
case SWITCHDEV_OBJ_ID_PORT_VLAN:
-   err = dsa_slave_port_vlan_add(dev,
- SWITCHDEV_OBJ_PORT_VLAN(obj),
- trans);
+   err = dsa_port_vlan_add(dp, SWITCHDEV_OBJ_PORT_VLAN(obj),
+   trans);
break;
default:
err = -EOPNOTSUPP;
@@ -526,8 +521,7 @@ static int dsa_slave_port_obj_del(struct net_device *dev,
err = dsa_port_mdb_del(dp, SWITCHDEV_OBJ_PORT_MDB(obj));
break;
case SWITCHDEV_OBJ_ID_PORT_VLAN:
-   err = dsa_slave_port_vlan_del(dev,
- SWITCHDEV_OBJ_PORT_VLAN(obj));
+   err = dsa_port_vlan_del(dp, SWITCHDEV_OBJ_PORT_VLAN(obj));
break;
default:
err = -EOPNOTSUPP;
@@ -553,9 +547,7 @@ static int dsa_slave_port_obj_dump(struct net_device *dev,
err = dsa_port_mdb_dump(dp, SWITCHDEV_OBJ_PORT_MDB(obj), cb);
break;
case SWITCHDEV_OBJ_ID_PORT_VLAN:
-   err = dsa_slave_port_vlan_dump(dev,
-  SWITCHDEV_OBJ_PORT_VLAN(obj),
-  cb);
+   err = dsa_port_vlan_dump(dp, SWITCHDEV_OBJ_PORT_VLAN(obj), cb);
break;
default:
err = -EOPNOTSUPP;
-- 
2.13.0



[PATCH net-next 03/20] net: dsa: change scope of bridging code

2017-05-19 Thread Vivien Didelot
Now that the bridge join and leave functions only deal with a DSA port,
change their scope from the DSA slave net_device to the DSA generic
dsa_port.

Signed-off-by: Vivien Didelot 
---
 net/dsa/slave.c | 36 +---
 1 file changed, 17 insertions(+), 19 deletions(-)

diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 371f6d267917..1ad62ef8c261 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -572,13 +572,11 @@ static int dsa_slave_port_obj_dump(struct net_device *dev,
return err;
 }
 
-static int dsa_slave_bridge_port_join(struct net_device *dev,
- struct net_device *br)
+static int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br)
 {
-   struct dsa_slave_priv *p = netdev_priv(dev);
struct dsa_notifier_bridge_info info = {
-   .sw_index = p->dp->ds->index,
-   .port = p->dp->index,
+   .sw_index = dp->ds->index,
+   .port = dp->index,
.br = br,
};
int err;
@@ -586,24 +584,22 @@ static int dsa_slave_bridge_port_join(struct net_device 
*dev,
/* Here the port is already bridged. Reflect the current configuration
 * so that drivers can program their chips accordingly.
 */
-   p->dp->bridge_dev = br;
+   dp->bridge_dev = br;
 
-   err = dsa_port_notify(p->dp, DSA_NOTIFIER_BRIDGE_JOIN, );
+   err = dsa_port_notify(dp, DSA_NOTIFIER_BRIDGE_JOIN, );
 
/* The bridging is rolled back on error */
if (err)
-   p->dp->bridge_dev = NULL;
+   dp->bridge_dev = NULL;
 
return err;
 }
 
-static void dsa_slave_bridge_port_leave(struct net_device *dev,
-   struct net_device *br)
+static void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br)
 {
-   struct dsa_slave_priv *p = netdev_priv(dev);
struct dsa_notifier_bridge_info info = {
-   .sw_index = p->dp->ds->index,
-   .port = p->dp->index,
+   .sw_index = dp->ds->index,
+   .port = dp->index,
.br = br,
};
int err;
@@ -611,16 +607,16 @@ static void dsa_slave_bridge_port_leave(struct net_device 
*dev,
/* Here the port is already unbridged. Reflect the current configuration
 * so that drivers can program their chips accordingly.
 */
-   p->dp->bridge_dev = NULL;
+   dp->bridge_dev = NULL;
 
-   err = dsa_port_notify(p->dp, DSA_NOTIFIER_BRIDGE_LEAVE, );
+   err = dsa_port_notify(dp, DSA_NOTIFIER_BRIDGE_LEAVE, );
if (err)
-   netdev_err(dev, "failed to notify DSA_NOTIFIER_BRIDGE_LEAVE\n");
+   pr_err("DSA: failed to notify DSA_NOTIFIER_BRIDGE_LEAVE\n");
 
/* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer,
 * so allow it to be in BR_STATE_FORWARDING to be kept functional
 */
-   dsa_port_set_state_now(p->dp, BR_STATE_FORWARDING);
+   dsa_port_set_state_now(dp, BR_STATE_FORWARDING);
 }
 
 static int dsa_slave_port_attr_get(struct net_device *dev,
@@ -1526,14 +1522,16 @@ static bool dsa_slave_dev_check(struct net_device *dev)
 static int dsa_slave_changeupper(struct net_device *dev,
 struct netdev_notifier_changeupper_info *info)
 {
+   struct dsa_slave_priv *p = netdev_priv(dev);
+   struct dsa_port *dp = p->dp;
int err = NOTIFY_DONE;
 
if (netif_is_bridge_master(info->upper_dev)) {
if (info->linking) {
-   err = dsa_slave_bridge_port_join(dev, info->upper_dev);
+   err = dsa_port_bridge_join(dp, info->upper_dev);
err = notifier_from_errno(err);
} else {
-   dsa_slave_bridge_port_leave(dev, info->upper_dev);
+   dsa_port_bridge_leave(dp, info->upper_dev);
err = NOTIFY_OK;
}
}
-- 
2.13.0



[PATCH net-next 09/20] net: dsa: move port state setters

2017-05-19 Thread Vivien Didelot
Add a new port.c file to hold all DSA port-wide logic. This patch moves
in the code which sets a port state.

Signed-off-by: Vivien Didelot 
---
 net/dsa/Makefile   |  2 +-
 net/dsa/dsa_priv.h |  5 +
 net/dsa/port.c | 55 ++
 net/dsa/slave.c| 40 ---
 4 files changed, 61 insertions(+), 41 deletions(-)
 create mode 100644 net/dsa/port.c

diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index f8c0251d1f43..90e5aa6f7d0f 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -1,6 +1,6 @@
 # the core
 obj-$(CONFIG_NET_DSA) += dsa_core.o
-dsa_core-y += dsa.o slave.o dsa2.o switch.o legacy.o
+dsa_core-y += dsa.o dsa2.o legacy.o port.o slave.o switch.o
 
 # tagging formats
 dsa_core-$(CONFIG_NET_DSA_TAG_BRCM) += tag_brcm.o
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index c274130e3ac9..cda218cd9b05 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -60,6 +60,11 @@ void dsa_cpu_port_ethtool_restore(struct dsa_switch *ds);
 int dsa_legacy_register(void);
 void dsa_legacy_unregister(void);
 
+/* port.c */
+int dsa_port_set_state(struct dsa_port *dp, u8 state,
+  struct switchdev_trans *trans);
+void dsa_port_set_state_now(struct dsa_port *dp, u8 state);
+
 /* slave.c */
 extern const struct dsa_device_ops notag_netdev_ops;
 void dsa_slave_mii_bus_init(struct dsa_switch *ds);
diff --git a/net/dsa/port.c b/net/dsa/port.c
new file mode 100644
index ..6cc4704190fd
--- /dev/null
+++ b/net/dsa/port.c
@@ -0,0 +1,55 @@
+/*
+ * Handling of a single switch port
+ *
+ * Copyright (c) 2017 Savoir-faire Linux Inc.
+ * Vivien Didelot 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include 
+
+#include "dsa_priv.h"
+
+int dsa_port_set_state(struct dsa_port *dp, u8 state,
+  struct switchdev_trans *trans)
+{
+   struct dsa_switch *ds = dp->ds;
+   int port = dp->index;
+
+   if (switchdev_trans_ph_prepare(trans))
+   return ds->ops->port_stp_state_set ? 0 : -EOPNOTSUPP;
+
+   if (ds->ops->port_stp_state_set)
+   ds->ops->port_stp_state_set(ds, port, state);
+
+   if (ds->ops->port_fast_age) {
+   /* Fast age FDB entries or flush appropriate forwarding database
+* for the given port, if we are moving it from Learning or
+* Forwarding state, to Disabled or Blocking or Listening state.
+*/
+
+   if ((dp->stp_state == BR_STATE_LEARNING ||
+dp->stp_state == BR_STATE_FORWARDING) &&
+   (state == BR_STATE_DISABLED ||
+state == BR_STATE_BLOCKING ||
+state == BR_STATE_LISTENING))
+   ds->ops->port_fast_age(ds, port);
+   }
+
+   dp->stp_state = state;
+
+   return 0;
+}
+
+void dsa_port_set_state_now(struct dsa_port *dp, u8 state)
+{
+   int err;
+
+   err = dsa_port_set_state(dp, state, NULL);
+   if (err)
+   pr_err("DSA: failed to set STP state %u (%d)\n", state, err);
+}
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index b0150f79dcdd..2c57c7205aa3 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -84,46 +84,6 @@ static inline bool dsa_port_is_bridged(struct dsa_port *dp)
return !!dp->bridge_dev;
 }
 
-static int dsa_port_set_state(struct dsa_port *dp, u8 state,
- struct switchdev_trans *trans)
-{
-   struct dsa_switch *ds = dp->ds;
-   int port = dp->index;
-
-   if (switchdev_trans_ph_prepare(trans))
-   return ds->ops->port_stp_state_set ? 0 : -EOPNOTSUPP;
-
-   if (ds->ops->port_stp_state_set)
-   ds->ops->port_stp_state_set(ds, port, state);
-
-   if (ds->ops->port_fast_age) {
-   /* Fast age FDB entries or flush appropriate forwarding database
-* for the given port, if we are moving it from Learning or
-* Forwarding state, to Disabled or Blocking or Listening state.
-*/
-
-   if ((dp->stp_state == BR_STATE_LEARNING ||
-dp->stp_state == BR_STATE_FORWARDING) &&
-   (state == BR_STATE_DISABLED ||
-state == BR_STATE_BLOCKING ||
-state == BR_STATE_LISTENING))
-   ds->ops->port_fast_age(ds, port);
-   }
-
-   dp->stp_state = state;
-
-   return 0;
-}
-
-static void dsa_port_set_state_now(struct dsa_port *dp, u8 state)
-{
-   int err;
-
-   err = dsa_port_set_state(dp, state, NULL);
-   if (err)
-   pr_err("DSA: failed to set STP state %u (%d)\n", 

[PATCH net-next 18/20] net: dsa: add FDB notifier

2017-05-19 Thread Vivien Didelot
Add two new DSA_NOTIFIER_FDB_ADD and DSA_NOTIFIER_FDB_DEL events to
notify not only a single switch, but all switches of a the fabric when
an FDB entry is added or removed.

For the moment, keep the current behavior and ignore other switches.

Signed-off-by: Vivien Didelot 
---
 net/dsa/dsa_priv.h | 10 ++
 net/dsa/port.c | 31 ++-
 net/dsa/switch.c   | 43 +++
 3 files changed, 67 insertions(+), 17 deletions(-)

diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index becaf8a61b13..6a7d0d7d0489 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -20,6 +20,8 @@ enum {
DSA_NOTIFIER_AGEING_TIME,
DSA_NOTIFIER_BRIDGE_JOIN,
DSA_NOTIFIER_BRIDGE_LEAVE,
+   DSA_NOTIFIER_FDB_ADD,
+   DSA_NOTIFIER_FDB_DEL,
 };
 
 /* DSA_NOTIFIER_AGEING_TIME */
@@ -36,6 +38,14 @@ struct dsa_notifier_bridge_info {
int port;
 };
 
+/* DSA_NOTIFIER_FDB_* */
+struct dsa_notifier_fdb_info {
+   const struct switchdev_obj_port_fdb *fdb;
+   struct switchdev_trans *trans;
+   int sw_index;
+   int port;
+};
+
 struct dsa_device_ops {
struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev);
struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev,
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 59328a35394d..ed88d8381642 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -151,29 +151,26 @@ int dsa_port_fdb_add(struct dsa_port *dp,
 const struct switchdev_obj_port_fdb *fdb,
 struct switchdev_trans *trans)
 {
-   struct dsa_switch *ds = dp->ds;
-
-   if (switchdev_trans_ph_prepare(trans)) {
-   if (!ds->ops->port_fdb_prepare || !ds->ops->port_fdb_add)
-   return -EOPNOTSUPP;
-
-   return ds->ops->port_fdb_prepare(ds, dp->index, fdb, trans);
-   }
-
-   ds->ops->port_fdb_add(ds, dp->index, fdb, trans);
-
-   return 0;
+   struct dsa_notifier_fdb_info info = {
+   .sw_index = dp->ds->index,
+   .port = dp->index,
+   .trans = trans,
+   .fdb = fdb,
+   };
+
+   return dsa_port_notify(dp, DSA_NOTIFIER_FDB_ADD, );
 }
 
 int dsa_port_fdb_del(struct dsa_port *dp,
 const struct switchdev_obj_port_fdb *fdb)
 {
-   struct dsa_switch *ds = dp->ds;
+   struct dsa_notifier_fdb_info info = {
+   .sw_index = dp->ds->index,
+   .port = dp->index,
+   .fdb = fdb,
+   };
 
-   if (ds->ops->port_fdb_del)
-   return -EOPNOTSUPP;
-
-   return ds->ops->port_fdb_del(ds, dp->index, fdb);
+   return dsa_port_notify(dp, DSA_NOTIFIER_FDB_DEL, );
 }
 
 int dsa_port_fdb_dump(struct dsa_port *dp, struct switchdev_obj_port_fdb *fdb,
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index 540770ecc8b0..e71cc860d32c 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -84,6 +84,43 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds,
return 0;
 }
 
+static int dsa_switch_fdb_add(struct dsa_switch *ds,
+ struct dsa_notifier_fdb_info *info)
+{
+   const struct switchdev_obj_port_fdb *fdb = info->fdb;
+   struct switchdev_trans *trans = info->trans;
+
+   /* Do not care yet about other switch chips of the fabric */
+   if (ds->index != info->sw_index)
+   return 0;
+
+   if (switchdev_trans_ph_prepare(trans)) {
+   if (!ds->ops->port_fdb_prepare || !ds->ops->port_fdb_add)
+   return -EOPNOTSUPP;
+
+   return ds->ops->port_fdb_prepare(ds, info->port, fdb, trans);
+   }
+
+   ds->ops->port_fdb_add(ds, info->port, fdb, trans);
+
+   return 0;
+}
+
+static int dsa_switch_fdb_del(struct dsa_switch *ds,
+ struct dsa_notifier_fdb_info *info)
+{
+   const struct switchdev_obj_port_fdb *fdb = info->fdb;
+
+   /* Do not care yet about other switch chips of the fabric */
+   if (ds->index != info->sw_index)
+   return 0;
+
+   if (!ds->ops->port_fdb_del)
+   return -EOPNOTSUPP;
+
+   return ds->ops->port_fdb_del(ds, info->port, fdb);
+}
+
 static int dsa_switch_event(struct notifier_block *nb,
unsigned long event, void *info)
 {
@@ -100,6 +137,12 @@ static int dsa_switch_event(struct notifier_block *nb,
case DSA_NOTIFIER_BRIDGE_LEAVE:
err = dsa_switch_bridge_leave(ds, info);
break;
+   case DSA_NOTIFIER_FDB_ADD:
+   err = dsa_switch_fdb_add(ds, info);
+   break;
+   case DSA_NOTIFIER_FDB_DEL:
+   err = dsa_switch_fdb_del(ds, info);
+   break;
default:
err = -EOPNOTSUPP;
break;
-- 
2.13.0



[PATCH net-next 15/20] net: dsa: move VLAN handlers

2017-05-19 Thread Vivien Didelot
Move the DSA port code which handles VLAN objects in port.c, where it
belongs.

Signed-off-by: Vivien Didelot 
---
 net/dsa/dsa_priv.h |  8 
 net/dsa/port.c | 41 +
 net/dsa/slave.c| 41 -
 3 files changed, 49 insertions(+), 41 deletions(-)

diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index c2a595036746..16021a891095 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -84,6 +84,14 @@ int dsa_port_mdb_del(struct dsa_port *dp,
 const struct switchdev_obj_port_mdb *mdb);
 int dsa_port_mdb_dump(struct dsa_port *dp, struct switchdev_obj_port_mdb *mdb,
  switchdev_obj_dump_cb_t *cb);
+int dsa_port_vlan_add(struct dsa_port *dp,
+ const struct switchdev_obj_port_vlan *vlan,
+ struct switchdev_trans *trans);
+int dsa_port_vlan_del(struct dsa_port *dp,
+ const struct switchdev_obj_port_vlan *vlan);
+int dsa_port_vlan_dump(struct dsa_port *dp,
+  struct switchdev_obj_port_vlan *vlan,
+  switchdev_obj_dump_cb_t *cb);
 
 /* slave.c */
 extern const struct dsa_device_ops notag_netdev_ops;
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 4ed0124a8d4b..f211b0dfb12d 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -247,3 +247,44 @@ int dsa_port_mdb_dump(struct dsa_port *dp, struct 
switchdev_obj_port_mdb *mdb,
 
return -EOPNOTSUPP;
 }
+
+int dsa_port_vlan_add(struct dsa_port *dp,
+ const struct switchdev_obj_port_vlan *vlan,
+ struct switchdev_trans *trans)
+{
+   struct dsa_switch *ds = dp->ds;
+
+   if (switchdev_trans_ph_prepare(trans)) {
+   if (!ds->ops->port_vlan_prepare || !ds->ops->port_vlan_add)
+   return -EOPNOTSUPP;
+
+   return ds->ops->port_vlan_prepare(ds, dp->index, vlan, trans);
+   }
+
+   ds->ops->port_vlan_add(ds, dp->index, vlan, trans);
+
+   return 0;
+}
+
+int dsa_port_vlan_del(struct dsa_port *dp,
+ const struct switchdev_obj_port_vlan *vlan)
+{
+   struct dsa_switch *ds = dp->ds;
+
+   if (!ds->ops->port_vlan_del)
+   return -EOPNOTSUPP;
+
+   return ds->ops->port_vlan_del(ds, dp->index, vlan);
+}
+
+int dsa_port_vlan_dump(struct dsa_port *dp,
+  struct switchdev_obj_port_vlan *vlan,
+  switchdev_obj_dump_cb_t *cb)
+{
+   struct dsa_switch *ds = dp->ds;
+
+   if (ds->ops->port_vlan_dump)
+   return ds->ops->port_vlan_dump(ds, dp->index, vlan, cb);
+
+   return -EOPNOTSUPP;
+}
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 9adcb8267d9a..887e26695519 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -204,47 +204,6 @@ static int dsa_slave_set_mac_address(struct net_device 
*dev, void *a)
return 0;
 }
 
-static int dsa_port_vlan_add(struct dsa_port *dp,
-const struct switchdev_obj_port_vlan *vlan,
-struct switchdev_trans *trans)
-{
-   struct dsa_switch *ds = dp->ds;
-
-   if (switchdev_trans_ph_prepare(trans)) {
-   if (!ds->ops->port_vlan_prepare || !ds->ops->port_vlan_add)
-   return -EOPNOTSUPP;
-
-   return ds->ops->port_vlan_prepare(ds, dp->index, vlan, trans);
-   }
-
-   ds->ops->port_vlan_add(ds, dp->index, vlan, trans);
-
-   return 0;
-}
-
-static int dsa_port_vlan_del(struct dsa_port *dp,
-const struct switchdev_obj_port_vlan *vlan)
-{
-   struct dsa_switch *ds = dp->ds;
-
-   if (!ds->ops->port_vlan_del)
-   return -EOPNOTSUPP;
-
-   return ds->ops->port_vlan_del(ds, dp->index, vlan);
-}
-
-static int dsa_port_vlan_dump(struct dsa_port *dp,
- struct switchdev_obj_port_vlan *vlan,
- switchdev_obj_dump_cb_t *cb)
-{
-   struct dsa_switch *ds = dp->ds;
-
-   if (ds->ops->port_vlan_dump)
-   return ds->ops->port_vlan_dump(ds, dp->index, vlan, cb);
-
-   return -EOPNOTSUPP;
-}
-
 static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
struct dsa_slave_priv *p = netdev_priv(dev);
-- 
2.13.0



[PATCH net-next 14/20] net: dsa: move MDB handlers

2017-05-19 Thread Vivien Didelot
Move the DSA port code which handles MDB objects in port.c, where it
belongs.

Signed-off-by: Vivien Didelot 
---
 net/dsa/dsa_priv.h |  7 +++
 net/dsa/port.c | 40 
 net/dsa/slave.c| 41 -
 3 files changed, 47 insertions(+), 41 deletions(-)

diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index d003a2554c7a..c2a595036746 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -77,6 +77,13 @@ int dsa_port_fdb_del(struct dsa_port *dp,
 const struct switchdev_obj_port_fdb *fdb);
 int dsa_port_fdb_dump(struct dsa_port *dp, struct switchdev_obj_port_fdb *fdb,
  switchdev_obj_dump_cb_t *cb);
+int dsa_port_mdb_add(struct dsa_port *dp,
+const struct switchdev_obj_port_mdb *mdb,
+struct switchdev_trans *trans);
+int dsa_port_mdb_del(struct dsa_port *dp,
+const struct switchdev_obj_port_mdb *mdb);
+int dsa_port_mdb_dump(struct dsa_port *dp, struct switchdev_obj_port_mdb *mdb,
+ switchdev_obj_dump_cb_t *cb);
 
 /* slave.c */
 extern const struct dsa_device_ops notag_netdev_ops;
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 18ec6d432152..4ed0124a8d4b 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -207,3 +207,43 @@ int dsa_port_fdb_dump(struct dsa_port *dp, struct 
switchdev_obj_port_fdb *fdb,
 
return -EOPNOTSUPP;
 }
+
+int dsa_port_mdb_add(struct dsa_port *dp,
+const struct switchdev_obj_port_mdb *mdb,
+struct switchdev_trans *trans)
+{
+   struct dsa_switch *ds = dp->ds;
+
+   if (switchdev_trans_ph_prepare(trans)) {
+   if (!ds->ops->port_mdb_prepare || !ds->ops->port_mdb_add)
+   return -EOPNOTSUPP;
+
+   return ds->ops->port_mdb_prepare(ds, dp->index, mdb, trans);
+   }
+
+   ds->ops->port_mdb_add(ds, dp->index, mdb, trans);
+
+   return 0;
+}
+
+int dsa_port_mdb_del(struct dsa_port *dp,
+const struct switchdev_obj_port_mdb *mdb)
+{
+   struct dsa_switch *ds = dp->ds;
+
+   if (ds->ops->port_mdb_del)
+   return ds->ops->port_mdb_del(ds, dp->index, mdb);
+
+   return -EOPNOTSUPP;
+}
+
+int dsa_port_mdb_dump(struct dsa_port *dp, struct switchdev_obj_port_mdb *mdb,
+ switchdev_obj_dump_cb_t *cb)
+{
+   struct dsa_switch *ds = dp->ds;
+
+   if (ds->ops->port_mdb_dump)
+   return ds->ops->port_mdb_dump(ds, dp->index, mdb, cb);
+
+   return -EOPNOTSUPP;
+}
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index d9b7bf759f44..9adcb8267d9a 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -245,47 +245,6 @@ static int dsa_port_vlan_dump(struct dsa_port *dp,
return -EOPNOTSUPP;
 }
 
-static int dsa_port_mdb_add(struct dsa_port *dp,
-   const struct switchdev_obj_port_mdb *mdb,
-   struct switchdev_trans *trans)
-{
-   struct dsa_switch *ds = dp->ds;
-
-   if (switchdev_trans_ph_prepare(trans)) {
-   if (!ds->ops->port_mdb_prepare || !ds->ops->port_mdb_add)
-   return -EOPNOTSUPP;
-
-   return ds->ops->port_mdb_prepare(ds, dp->index, mdb, trans);
-   }
-
-   ds->ops->port_mdb_add(ds, dp->index, mdb, trans);
-
-   return 0;
-}
-
-static int dsa_port_mdb_del(struct dsa_port *dp,
-   const struct switchdev_obj_port_mdb *mdb)
-{
-   struct dsa_switch *ds = dp->ds;
-
-   if (ds->ops->port_mdb_del)
-   return ds->ops->port_mdb_del(ds, dp->index, mdb);
-
-   return -EOPNOTSUPP;
-}
-
-static int dsa_port_mdb_dump(struct dsa_port *dp,
-struct switchdev_obj_port_mdb *mdb,
-switchdev_obj_dump_cb_t *cb)
-{
-   struct dsa_switch *ds = dp->ds;
-
-   if (ds->ops->port_mdb_dump)
-   return ds->ops->port_mdb_dump(ds, dp->index, mdb, cb);
-
-   return -EOPNOTSUPP;
-}
-
 static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
struct dsa_slave_priv *p = netdev_priv(dev);
-- 
2.13.0



[PATCH net-next 13/20] net: dsa: move FDB handlers

2017-05-19 Thread Vivien Didelot
Move the DSA port code which handles FDB objects in port.c, where it
belongs.

Signed-off-by: Vivien Didelot 
---
 net/dsa/dsa_priv.h |  7 +++
 net/dsa/port.c | 40 
 net/dsa/slave.c| 42 --
 3 files changed, 47 insertions(+), 42 deletions(-)

diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index b0f9837bf5ed..d003a2554c7a 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -70,6 +70,13 @@ int dsa_port_vlan_filtering(struct dsa_port *dp, bool 
vlan_filtering,
struct switchdev_trans *trans);
 int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock,
 struct switchdev_trans *trans);
+int dsa_port_fdb_add(struct dsa_port *dp,
+const struct switchdev_obj_port_fdb *fdb,
+struct switchdev_trans *trans);
+int dsa_port_fdb_del(struct dsa_port *dp,
+const struct switchdev_obj_port_fdb *fdb);
+int dsa_port_fdb_dump(struct dsa_port *dp, struct switchdev_obj_port_fdb *fdb,
+ switchdev_obj_dump_cb_t *cb);
 
 /* slave.c */
 extern const struct dsa_device_ops notag_netdev_ops;
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 3382fdc07a11..18ec6d432152 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -167,3 +167,43 @@ int dsa_port_ageing_time(struct dsa_port *dp, clock_t 
ageing_clock,
 
return 0;
 }
+
+int dsa_port_fdb_add(struct dsa_port *dp,
+const struct switchdev_obj_port_fdb *fdb,
+struct switchdev_trans *trans)
+{
+   struct dsa_switch *ds = dp->ds;
+
+   if (switchdev_trans_ph_prepare(trans)) {
+   if (!ds->ops->port_fdb_prepare || !ds->ops->port_fdb_add)
+   return -EOPNOTSUPP;
+
+   return ds->ops->port_fdb_prepare(ds, dp->index, fdb, trans);
+   }
+
+   ds->ops->port_fdb_add(ds, dp->index, fdb, trans);
+
+   return 0;
+}
+
+int dsa_port_fdb_del(struct dsa_port *dp,
+const struct switchdev_obj_port_fdb *fdb)
+{
+   struct dsa_switch *ds = dp->ds;
+
+   if (ds->ops->port_fdb_del)
+   return -EOPNOTSUPP;
+
+   return ds->ops->port_fdb_del(ds, dp->index, fdb);
+}
+
+int dsa_port_fdb_dump(struct dsa_port *dp, struct switchdev_obj_port_fdb *fdb,
+ switchdev_obj_dump_cb_t *cb)
+{
+   struct dsa_switch *ds = dp->ds;
+
+   if (ds->ops->port_fdb_dump)
+   return ds->ops->port_fdb_dump(ds, dp->index, fdb, cb);
+
+   return -EOPNOTSUPP;
+}
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 1b0f396c4314..d9b7bf759f44 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -245,48 +245,6 @@ static int dsa_port_vlan_dump(struct dsa_port *dp,
return -EOPNOTSUPP;
 }
 
-static int dsa_port_fdb_add(struct dsa_port *dp,
-   const struct switchdev_obj_port_fdb *fdb,
-   struct switchdev_trans *trans)
-{
-   struct dsa_switch *ds = dp->ds;
-
-   if (switchdev_trans_ph_prepare(trans)) {
-   if (!ds->ops->port_fdb_prepare || !ds->ops->port_fdb_add)
-   return -EOPNOTSUPP;
-
-   return ds->ops->port_fdb_prepare(ds, dp->index, fdb, trans);
-   }
-
-   ds->ops->port_fdb_add(ds, dp->index, fdb, trans);
-
-   return 0;
-}
-
-static int dsa_port_fdb_del(struct dsa_port *dp,
-   const struct switchdev_obj_port_fdb *fdb)
-{
-   struct dsa_switch *ds = dp->ds;
-   int ret = -EOPNOTSUPP;
-
-   if (ds->ops->port_fdb_del)
-   ret = ds->ops->port_fdb_del(ds, dp->index, fdb);
-
-   return ret;
-}
-
-static int dsa_port_fdb_dump(struct dsa_port *dp,
-struct switchdev_obj_port_fdb *fdb,
-switchdev_obj_dump_cb_t *cb)
-{
-   struct dsa_switch *ds = dp->ds;
-
-   if (ds->ops->port_fdb_dump)
-   return ds->ops->port_fdb_dump(ds, dp->index, fdb, cb);
-
-   return -EOPNOTSUPP;
-}
-
 static int dsa_port_mdb_add(struct dsa_port *dp,
const struct switchdev_obj_port_mdb *mdb,
struct switchdev_trans *trans)
-- 
2.13.0



[PATCH net-next 12/20] net: dsa: move ageing time setter

2017-05-19 Thread Vivien Didelot
Move the DSA port code which sets a port ageing time in port.c, where it
belongs.

Signed-off-by: Vivien Didelot 
---
 net/dsa/dsa_priv.h |  2 ++
 net/dsa/port.c | 40 
 net/dsa/slave.c| 40 
 3 files changed, 42 insertions(+), 40 deletions(-)

diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index c145223247c5..b0f9837bf5ed 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -68,6 +68,8 @@ int dsa_port_bridge_join(struct dsa_port *dp, struct 
net_device *br);
 void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br);
 int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
struct switchdev_trans *trans);
+int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock,
+struct switchdev_trans *trans);
 
 /* slave.c */
 extern const struct dsa_device_ops notag_netdev_ops;
diff --git a/net/dsa/port.c b/net/dsa/port.c
index c9f95aaf25f1..3382fdc07a11 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -127,3 +127,43 @@ int dsa_port_vlan_filtering(struct dsa_port *dp, bool 
vlan_filtering,
 
return 0;
 }
+
+static unsigned int dsa_fastest_ageing_time(struct dsa_switch *ds,
+   unsigned int ageing_time)
+{
+   int i;
+
+   for (i = 0; i < ds->num_ports; ++i) {
+   struct dsa_port *dp = >ports[i];
+
+   if (dp->ageing_time && dp->ageing_time < ageing_time)
+   ageing_time = dp->ageing_time;
+   }
+
+   return ageing_time;
+}
+
+int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock,
+struct switchdev_trans *trans)
+{
+   unsigned long ageing_jiffies = clock_t_to_jiffies(ageing_clock);
+   unsigned int ageing_time = jiffies_to_msecs(ageing_jiffies);
+   struct dsa_switch *ds = dp->ds;
+
+   if (switchdev_trans_ph_prepare(trans)) {
+   if (ds->ageing_time_min && ageing_time < ds->ageing_time_min)
+   return -ERANGE;
+   if (ds->ageing_time_max && ageing_time > ds->ageing_time_max)
+   return -ERANGE;
+   return 0;
+   }
+
+   /* Keep the fastest ageing time in case of multiple bridges */
+   dp->ageing_time = ageing_time;
+   ageing_time = dsa_fastest_ageing_time(ds, ageing_time);
+
+   if (ds->ops->set_ageing_time)
+   return ds->ops->set_ageing_time(ds, ageing_time);
+
+   return 0;
+}
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 32e7e78313ba..1b0f396c4314 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -338,46 +338,6 @@ static int dsa_slave_ioctl(struct net_device *dev, struct 
ifreq *ifr, int cmd)
return -EOPNOTSUPP;
 }
 
-static unsigned int dsa_fastest_ageing_time(struct dsa_switch *ds,
-   unsigned int ageing_time)
-{
-   int i;
-
-   for (i = 0; i < ds->num_ports; ++i) {
-   struct dsa_port *dp = >ports[i];
-
-   if (dp->ageing_time && dp->ageing_time < ageing_time)
-   ageing_time = dp->ageing_time;
-   }
-
-   return ageing_time;
-}
-
-static int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock,
-   struct switchdev_trans *trans)
-{
-   unsigned long ageing_jiffies = clock_t_to_jiffies(ageing_clock);
-   unsigned int ageing_time = jiffies_to_msecs(ageing_jiffies);
-   struct dsa_switch *ds = dp->ds;
-
-   if (switchdev_trans_ph_prepare(trans)) {
-   if (ds->ageing_time_min && ageing_time < ds->ageing_time_min)
-   return -ERANGE;
-   if (ds->ageing_time_max && ageing_time > ds->ageing_time_max)
-   return -ERANGE;
-   return 0;
-   }
-
-   /* Keep the fastest ageing time in case of multiple bridges */
-   dp->ageing_time = ageing_time;
-   ageing_time = dsa_fastest_ageing_time(ds, ageing_time);
-
-   if (ds->ops->set_ageing_time)
-   return ds->ops->set_ageing_time(ds, ageing_time);
-
-   return 0;
-}
-
 static int dsa_slave_port_attr_set(struct net_device *dev,
   const struct switchdev_attr *attr,
   struct switchdev_trans *trans)
-- 
2.13.0



[PATCH net-next 07/20] net: dsa: change scope of VLAN filtering setter

2017-05-19 Thread Vivien Didelot
Change the scope of the switchdev VLAN filtering attribute setter from
the DSA slave device to the generic DSA port, so that the future
port-wide API can also be used for other port types, such as CPU and DSA
links.

Signed-off-by: Vivien Didelot 
---
 net/dsa/slave.c | 15 +++
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index de39da69fd33..216eb38a847d 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -388,20 +388,18 @@ static int dsa_slave_ioctl(struct net_device *dev, struct 
ifreq *ifr, int cmd)
return -EOPNOTSUPP;
 }
 
-static int dsa_slave_vlan_filtering(struct net_device *dev,
-   const struct switchdev_attr *attr,
-   struct switchdev_trans *trans)
+static int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
+  struct switchdev_trans *trans)
 {
-   struct dsa_slave_priv *p = netdev_priv(dev);
-   struct dsa_switch *ds = p->dp->ds;
+   struct dsa_switch *ds = dp->ds;
 
/* bridge skips -EOPNOTSUPP, so skip the prepare phase */
if (switchdev_trans_ph_prepare(trans))
return 0;
 
if (ds->ops->port_vlan_filtering)
-   return ds->ops->port_vlan_filtering(ds, p->dp->index,
-   attr->u.vlan_filtering);
+   return ds->ops->port_vlan_filtering(ds, dp->index,
+   vlan_filtering);
 
return 0;
 }
@@ -461,7 +459,8 @@ static int dsa_slave_port_attr_set(struct net_device *dev,
ret = dsa_port_set_state(dp, attr->u.stp_state, trans);
break;
case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING:
-   ret = dsa_slave_vlan_filtering(dev, attr, trans);
+   ret = dsa_port_vlan_filtering(dp, attr->u.vlan_filtering,
+ trans);
break;
case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME:
ret = dsa_slave_ageing_time(dev, attr, trans);
-- 
2.13.0



[PATCH net-next 19/20] net: dsa: add MDB notifier

2017-05-19 Thread Vivien Didelot
Add two new DSA_NOTIFIER_MDB_ADD and DSA_NOTIFIER_MDB_DEL events to
notify not only a single switch, but all switches of a the fabric when
an MDB entry is added or removed.

For the moment, keep the current behavior and ignore other switches.

Signed-off-by: Vivien Didelot 
---
 net/dsa/dsa_priv.h | 10 ++
 net/dsa/port.c | 31 ++-
 net/dsa/switch.c   | 43 +++
 3 files changed, 67 insertions(+), 17 deletions(-)

diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 6a7d0d7d0489..2b60293b325c 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -22,6 +22,8 @@ enum {
DSA_NOTIFIER_BRIDGE_LEAVE,
DSA_NOTIFIER_FDB_ADD,
DSA_NOTIFIER_FDB_DEL,
+   DSA_NOTIFIER_MDB_ADD,
+   DSA_NOTIFIER_MDB_DEL,
 };
 
 /* DSA_NOTIFIER_AGEING_TIME */
@@ -46,6 +48,14 @@ struct dsa_notifier_fdb_info {
int port;
 };
 
+/* DSA_NOTIFIER_MDB_* */
+struct dsa_notifier_mdb_info {
+   const struct switchdev_obj_port_mdb *mdb;
+   struct switchdev_trans *trans;
+   int sw_index;
+   int port;
+};
+
 struct dsa_device_ops {
struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev);
struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev,
diff --git a/net/dsa/port.c b/net/dsa/port.c
index ed88d8381642..c7c4920e7bc9 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -188,29 +188,26 @@ int dsa_port_mdb_add(struct dsa_port *dp,
 const struct switchdev_obj_port_mdb *mdb,
 struct switchdev_trans *trans)
 {
-   struct dsa_switch *ds = dp->ds;
-
-   if (switchdev_trans_ph_prepare(trans)) {
-   if (!ds->ops->port_mdb_prepare || !ds->ops->port_mdb_add)
-   return -EOPNOTSUPP;
-
-   return ds->ops->port_mdb_prepare(ds, dp->index, mdb, trans);
-   }
-
-   ds->ops->port_mdb_add(ds, dp->index, mdb, trans);
-
-   return 0;
+   struct dsa_notifier_mdb_info info = {
+   .sw_index = dp->ds->index,
+   .port = dp->index,
+   .trans = trans,
+   .mdb = mdb,
+   };
+
+   return dsa_port_notify(dp, DSA_NOTIFIER_MDB_ADD, );
 }
 
 int dsa_port_mdb_del(struct dsa_port *dp,
 const struct switchdev_obj_port_mdb *mdb)
 {
-   struct dsa_switch *ds = dp->ds;
+   struct dsa_notifier_mdb_info info = {
+   .sw_index = dp->ds->index,
+   .port = dp->index,
+   .mdb = mdb,
+   };
 
-   if (ds->ops->port_mdb_del)
-   return ds->ops->port_mdb_del(ds, dp->index, mdb);
-
-   return -EOPNOTSUPP;
+   return dsa_port_notify(dp, DSA_NOTIFIER_MDB_DEL, );
 }
 
 int dsa_port_mdb_dump(struct dsa_port *dp, struct switchdev_obj_port_mdb *mdb,
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index e71cc860d32c..b7e8e45869fc 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -121,6 +121,43 @@ static int dsa_switch_fdb_del(struct dsa_switch *ds,
return ds->ops->port_fdb_del(ds, info->port, fdb);
 }
 
+static int dsa_switch_mdb_add(struct dsa_switch *ds,
+ struct dsa_notifier_mdb_info *info)
+{
+   const struct switchdev_obj_port_mdb *mdb = info->mdb;
+   struct switchdev_trans *trans = info->trans;
+
+   /* Do not care yet about other switch chips of the fabric */
+   if (ds->index != info->sw_index)
+   return 0;
+
+   if (switchdev_trans_ph_prepare(trans)) {
+   if (!ds->ops->port_mdb_prepare || !ds->ops->port_mdb_add)
+   return -EOPNOTSUPP;
+
+   return ds->ops->port_mdb_prepare(ds, info->port, mdb, trans);
+   }
+
+   ds->ops->port_mdb_add(ds, info->port, mdb, trans);
+
+   return 0;
+}
+
+static int dsa_switch_mdb_del(struct dsa_switch *ds,
+ struct dsa_notifier_mdb_info *info)
+{
+   const struct switchdev_obj_port_mdb *mdb = info->mdb;
+
+   /* Do not care yet about other switch chips of the fabric */
+   if (ds->index != info->sw_index)
+   return 0;
+
+   if (!ds->ops->port_mdb_del)
+   return -EOPNOTSUPP;
+
+   return ds->ops->port_mdb_del(ds, info->port, mdb);
+}
+
 static int dsa_switch_event(struct notifier_block *nb,
unsigned long event, void *info)
 {
@@ -143,6 +180,12 @@ static int dsa_switch_event(struct notifier_block *nb,
case DSA_NOTIFIER_FDB_DEL:
err = dsa_switch_fdb_del(ds, info);
break;
+   case DSA_NOTIFIER_MDB_ADD:
+   err = dsa_switch_mdb_add(ds, info);
+   break;
+   case DSA_NOTIFIER_MDB_DEL:
+   err = dsa_switch_mdb_del(ds, info);
+   break;
default:
err = -EOPNOTSUPP;
break;
-- 
2.13.0



[PATCH net-next 16/20] net: dsa: move notifier info to private header

2017-05-19 Thread Vivien Didelot
The DSA notifier events and info structure definitions are not meant for
DSA drivers and users, but only used internally by the DSA core files.

Move them from the public net/dsa.h file to the private dsa_priv.h file.

Also use this opportunity to turn the events into an anonymous enum,
because we don't care about the values, and this will prevent future
conflicts when adding (and sorting) new events.

Signed-off-by: Vivien Didelot 
---
 include/net/dsa.h  | 10 --
 net/dsa/dsa_priv.h | 12 
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 791fed62fb16..c0e567c0c824 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -285,16 +285,6 @@ static inline u8 dsa_upstream_port(struct dsa_switch *ds)
return ds->rtable[dst->cpu_dp->ds->index];
 }
 
-#define DSA_NOTIFIER_BRIDGE_JOIN   1
-#define DSA_NOTIFIER_BRIDGE_LEAVE  2
-
-/* DSA_NOTIFIER_BRIDGE_* */
-struct dsa_notifier_bridge_info {
-   struct net_device *br;
-   int sw_index;
-   int port;
-};
-
 struct dsa_switch_ops {
/*
 * Legacy probing.
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 16021a891095..c19241eb094b 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -16,6 +16,18 @@
 #include 
 #include 
 
+enum {
+   DSA_NOTIFIER_BRIDGE_JOIN,
+   DSA_NOTIFIER_BRIDGE_LEAVE,
+};
+
+/* DSA_NOTIFIER_BRIDGE_* */
+struct dsa_notifier_bridge_info {
+   struct net_device *br;
+   int sw_index;
+   int port;
+};
+
 struct dsa_device_ops {
struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev);
struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev,
-- 
2.13.0



[PATCH net-next 17/20] net: dsa: add notifier for ageing time

2017-05-19 Thread Vivien Didelot
This patch keeps the port-wide ageing time handling code in
dsa_port_ageing_time, pushes the requested ageing time value in a new
switch fabric notification, and moves the switch-wide ageing time
handling code in dsa_switch_ageing_time.

This has the effect that now not only the switch that the target port
belongs to can be programmed, but all switches composing the switch
fabric. For the moment, keep the current behavior and ignore other
switches.

Signed-off-by: Vivien Didelot 
---
 net/dsa/dsa_priv.h |  8 
 net/dsa/port.c | 37 -
 net/dsa/switch.c   | 46 ++
 3 files changed, 62 insertions(+), 29 deletions(-)

diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index c19241eb094b..becaf8a61b13 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -17,10 +17,18 @@
 #include 
 
 enum {
+   DSA_NOTIFIER_AGEING_TIME,
DSA_NOTIFIER_BRIDGE_JOIN,
DSA_NOTIFIER_BRIDGE_LEAVE,
 };
 
+/* DSA_NOTIFIER_AGEING_TIME */
+struct dsa_notifier_ageing_time_info {
+   struct switchdev_trans *trans;
+   unsigned int ageing_time;
+   int sw_index;
+};
+
 /* DSA_NOTIFIER_BRIDGE_* */
 struct dsa_notifier_bridge_info {
struct net_device *br;
diff --git a/net/dsa/port.c b/net/dsa/port.c
index f211b0dfb12d..59328a35394d 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -128,44 +128,23 @@ int dsa_port_vlan_filtering(struct dsa_port *dp, bool 
vlan_filtering,
return 0;
 }
 
-static unsigned int dsa_fastest_ageing_time(struct dsa_switch *ds,
-   unsigned int ageing_time)
-{
-   int i;
-
-   for (i = 0; i < ds->num_ports; ++i) {
-   struct dsa_port *dp = >ports[i];
-
-   if (dp->ageing_time && dp->ageing_time < ageing_time)
-   ageing_time = dp->ageing_time;
-   }
-
-   return ageing_time;
-}
-
 int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock,
 struct switchdev_trans *trans)
 {
unsigned long ageing_jiffies = clock_t_to_jiffies(ageing_clock);
unsigned int ageing_time = jiffies_to_msecs(ageing_jiffies);
-   struct dsa_switch *ds = dp->ds;
+   struct dsa_notifier_ageing_time_info info = {
+   .ageing_time = ageing_time,
+   .sw_index = dp->ds->index,
+   .trans = trans,
+   };
 
-   if (switchdev_trans_ph_prepare(trans)) {
-   if (ds->ageing_time_min && ageing_time < ds->ageing_time_min)
-   return -ERANGE;
-   if (ds->ageing_time_max && ageing_time > ds->ageing_time_max)
-   return -ERANGE;
-   return 0;
-   }
+   if (switchdev_trans_ph_prepare(trans))
+   return dsa_port_notify(dp, DSA_NOTIFIER_AGEING_TIME, );
 
-   /* Keep the fastest ageing time in case of multiple bridges */
dp->ageing_time = ageing_time;
-   ageing_time = dsa_fastest_ageing_time(ds, ageing_time);
 
-   if (ds->ops->set_ageing_time)
-   return ds->ops->set_ageing_time(ds, ageing_time);
-
-   return 0;
+   return dsa_port_notify(dp, DSA_NOTIFIER_AGEING_TIME, );
 }
 
 int dsa_port_fdb_add(struct dsa_port *dp,
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index f477053308d2..540770ecc8b0 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -12,9 +12,52 @@
 
 #include 
 #include 
+#include 
 
 #include "dsa_priv.h"
 
+static unsigned int dsa_switch_fastest_ageing_time(struct dsa_switch *ds,
+  unsigned int ageing_time)
+{
+   int i;
+
+   for (i = 0; i < ds->num_ports; ++i) {
+   struct dsa_port *dp = >ports[i];
+
+   if (dp->ageing_time && dp->ageing_time < ageing_time)
+   ageing_time = dp->ageing_time;
+   }
+
+   return ageing_time;
+}
+
+static int dsa_switch_ageing_time(struct dsa_switch *ds,
+ struct dsa_notifier_ageing_time_info *info)
+{
+   unsigned int ageing_time = info->ageing_time;
+   struct switchdev_trans *trans = info->trans;
+
+   /* Do not care yet about other switch chips of the fabric */
+   if (ds->index != info->sw_index)
+   return 0;
+
+   if (switchdev_trans_ph_prepare(trans)) {
+   if (ds->ageing_time_min && ageing_time < ds->ageing_time_min)
+   return -ERANGE;
+   if (ds->ageing_time_max && ageing_time > ds->ageing_time_max)
+   return -ERANGE;
+   return 0;
+   }
+
+   /* Program the fastest ageing time in case of multiple bridges */
+   ageing_time = dsa_switch_fastest_ageing_time(ds, ageing_time);
+
+   if (ds->ops->set_ageing_time)
+   return ds->ops->set_ageing_time(ds, ageing_time);
+
+   return 0;
+}
+
 static int 

[PATCH net-next 02/20] net: dsa: change scope of notifier call chain

2017-05-19 Thread Vivien Didelot
Change the scope of the fabric notification helper from the DSA slave to
the DSA port, since this is a DSA layer specific notion, that can be
used by non-slave ports (CPU and DSA).

Signed-off-by: Vivien Didelot 
---
 net/dsa/slave.c | 9 -
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 403d1dfe7f50..371f6d267917 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -27,10 +27,9 @@
 
 static bool dsa_slave_dev_check(struct net_device *dev);
 
-static int dsa_slave_notify(struct net_device *dev, unsigned long e, void *v)
+static int dsa_port_notify(struct dsa_port *dp, unsigned long e, void *v)
 {
-   struct dsa_slave_priv *p = netdev_priv(dev);
-   struct raw_notifier_head *nh = >dp->ds->dst->nh;
+   struct raw_notifier_head *nh = >ds->dst->nh;
int err;
 
err = raw_notifier_call_chain(nh, e, v);
@@ -589,7 +588,7 @@ static int dsa_slave_bridge_port_join(struct net_device 
*dev,
 */
p->dp->bridge_dev = br;
 
-   err = dsa_slave_notify(dev, DSA_NOTIFIER_BRIDGE_JOIN, );
+   err = dsa_port_notify(p->dp, DSA_NOTIFIER_BRIDGE_JOIN, );
 
/* The bridging is rolled back on error */
if (err)
@@ -614,7 +613,7 @@ static void dsa_slave_bridge_port_leave(struct net_device 
*dev,
 */
p->dp->bridge_dev = NULL;
 
-   err = dsa_slave_notify(dev, DSA_NOTIFIER_BRIDGE_LEAVE, );
+   err = dsa_port_notify(p->dp, DSA_NOTIFIER_BRIDGE_LEAVE, );
if (err)
netdev_err(dev, "failed to notify DSA_NOTIFIER_BRIDGE_LEAVE\n");
 
-- 
2.13.0



[PATCH net-next 10/20] net: dsa: move bridging routines

2017-05-19 Thread Vivien Didelot
Move the DSA port code which bridges a port in port.c, where it belongs.

Signed-off-by: Vivien Didelot 
---
 net/dsa/dsa_priv.h |  2 ++
 net/dsa/port.c | 58 ++
 net/dsa/slave.c| 57 -
 3 files changed, 60 insertions(+), 57 deletions(-)

diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index cda218cd9b05..f0b6cd3c8a65 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -64,6 +64,8 @@ void dsa_legacy_unregister(void);
 int dsa_port_set_state(struct dsa_port *dp, u8 state,
   struct switchdev_trans *trans);
 void dsa_port_set_state_now(struct dsa_port *dp, u8 state);
+int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br);
+void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br);
 
 /* slave.c */
 extern const struct dsa_device_ops notag_netdev_ops;
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 6cc4704190fd..da8577fb3d07 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -11,9 +11,20 @@
  */
 
 #include 
+#include 
 
 #include "dsa_priv.h"
 
+static int dsa_port_notify(struct dsa_port *dp, unsigned long e, void *v)
+{
+   struct raw_notifier_head *nh = >ds->dst->nh;
+   int err;
+
+   err = raw_notifier_call_chain(nh, e, v);
+
+   return notifier_to_errno(err);
+}
+
 int dsa_port_set_state(struct dsa_port *dp, u8 state,
   struct switchdev_trans *trans)
 {
@@ -53,3 +64,50 @@ void dsa_port_set_state_now(struct dsa_port *dp, u8 state)
if (err)
pr_err("DSA: failed to set STP state %u (%d)\n", state, err);
 }
+
+int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br)
+{
+   struct dsa_notifier_bridge_info info = {
+   .sw_index = dp->ds->index,
+   .port = dp->index,
+   .br = br,
+   };
+   int err;
+
+   /* Here the port is already bridged. Reflect the current configuration
+* so that drivers can program their chips accordingly.
+*/
+   dp->bridge_dev = br;
+
+   err = dsa_port_notify(dp, DSA_NOTIFIER_BRIDGE_JOIN, );
+
+   /* The bridging is rolled back on error */
+   if (err)
+   dp->bridge_dev = NULL;
+
+   return err;
+}
+
+void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br)
+{
+   struct dsa_notifier_bridge_info info = {
+   .sw_index = dp->ds->index,
+   .port = dp->index,
+   .br = br,
+   };
+   int err;
+
+   /* Here the port is already unbridged. Reflect the current configuration
+* so that drivers can program their chips accordingly.
+*/
+   dp->bridge_dev = NULL;
+
+   err = dsa_port_notify(dp, DSA_NOTIFIER_BRIDGE_LEAVE, );
+   if (err)
+   pr_err("DSA: failed to notify DSA_NOTIFIER_BRIDGE_LEAVE\n");
+
+   /* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer,
+* so allow it to be in BR_STATE_FORWARDING to be kept functional
+*/
+   dsa_port_set_state_now(dp, BR_STATE_FORWARDING);
+}
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 2c57c7205aa3..ab298c41b8e7 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -27,16 +27,6 @@
 
 static bool dsa_slave_dev_check(struct net_device *dev);
 
-static int dsa_port_notify(struct dsa_port *dp, unsigned long e, void *v)
-{
-   struct raw_notifier_head *nh = >ds->dst->nh;
-   int err;
-
-   err = raw_notifier_call_chain(nh, e, v);
-
-   return notifier_to_errno(err);
-}
-
 /* slave mii_bus handling ***/
 static int dsa_slave_phy_read(struct mii_bus *bus, int addr, int reg)
 {
@@ -514,53 +504,6 @@ static int dsa_slave_port_obj_dump(struct net_device *dev,
return err;
 }
 
-static int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br)
-{
-   struct dsa_notifier_bridge_info info = {
-   .sw_index = dp->ds->index,
-   .port = dp->index,
-   .br = br,
-   };
-   int err;
-
-   /* Here the port is already bridged. Reflect the current configuration
-* so that drivers can program their chips accordingly.
-*/
-   dp->bridge_dev = br;
-
-   err = dsa_port_notify(dp, DSA_NOTIFIER_BRIDGE_JOIN, );
-
-   /* The bridging is rolled back on error */
-   if (err)
-   dp->bridge_dev = NULL;
-
-   return err;
-}
-
-static void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br)
-{
-   struct dsa_notifier_bridge_info info = {
-   .sw_index = dp->ds->index,
-   .port = dp->index,
-   .br = br,
-   };
-   int err;
-
-   /* Here the port is already unbridged. Reflect the current configuration
-* so that drivers can program their chips accordingly.
-*/
-   dp->bridge_dev = NULL;
-
- 

[PATCH net-next 01/20] net: dsa: change scope of STP state setter

2017-05-19 Thread Vivien Didelot
Instead of having multiple STP state helpers scoping a slave device
supporting both the DSA logic and the switchdev binding, provide a
single dsa_port_set_state helper scoping a DSA port, as well as its
dsa_port_set_state_now wrapper which skips the prepare phase.

This allows us to better separate the DSA logic from the slave device
handling.

Signed-off-by: Vivien Didelot 
---
 net/dsa/slave.c | 44 ++--
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 91236d602301..403d1dfe7f50 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -85,13 +85,15 @@ static inline bool dsa_port_is_bridged(struct dsa_port *dp)
return !!dp->bridge_dev;
 }
 
-static void dsa_slave_set_state(struct net_device *dev, u8 state)
+static int dsa_port_set_state(struct dsa_port *dp, u8 state,
+ struct switchdev_trans *trans)
 {
-   struct dsa_slave_priv *p = netdev_priv(dev);
-   struct dsa_port *dp = p->dp;
struct dsa_switch *ds = dp->ds;
int port = dp->index;
 
+   if (switchdev_trans_ph_prepare(trans))
+   return ds->ops->port_stp_state_set ? 0 : -EOPNOTSUPP;
+
if (ds->ops->port_stp_state_set)
ds->ops->port_stp_state_set(ds, port, state);
 
@@ -110,6 +112,17 @@ static void dsa_slave_set_state(struct net_device *dev, u8 
state)
}
 
dp->stp_state = state;
+
+   return 0;
+}
+
+static void dsa_port_set_state_now(struct dsa_port *dp, u8 state)
+{
+   int err;
+
+   err = dsa_port_set_state(dp, state, NULL);
+   if (err)
+   pr_err("DSA: failed to set STP state %u (%d)\n", state, err);
 }
 
 static int dsa_slave_open(struct net_device *dev)
@@ -147,7 +160,7 @@ static int dsa_slave_open(struct net_device *dev)
goto clear_promisc;
}
 
-   dsa_slave_set_state(dev, stp_state);
+   dsa_port_set_state_now(p->dp, stp_state);
 
if (p->phy)
phy_start(p->phy);
@@ -189,7 +202,7 @@ static int dsa_slave_close(struct net_device *dev)
if (ds->ops->port_disable)
ds->ops->port_disable(ds, p->dp->index, p->phy);
 
-   dsa_slave_set_state(dev, BR_STATE_DISABLED);
+   dsa_port_set_state_now(p->dp, BR_STATE_DISABLED);
 
return 0;
 }
@@ -386,21 +399,6 @@ static int dsa_slave_ioctl(struct net_device *dev, struct 
ifreq *ifr, int cmd)
return -EOPNOTSUPP;
 }
 
-static int dsa_slave_stp_state_set(struct net_device *dev,
-  const struct switchdev_attr *attr,
-  struct switchdev_trans *trans)
-{
-   struct dsa_slave_priv *p = netdev_priv(dev);
-   struct dsa_switch *ds = p->dp->ds;
-
-   if (switchdev_trans_ph_prepare(trans))
-   return ds->ops->port_stp_state_set ? 0 : -EOPNOTSUPP;
-
-   dsa_slave_set_state(dev, attr->u.stp_state);
-
-   return 0;
-}
-
 static int dsa_slave_vlan_filtering(struct net_device *dev,
const struct switchdev_attr *attr,
struct switchdev_trans *trans)
@@ -465,11 +463,13 @@ static int dsa_slave_port_attr_set(struct net_device *dev,
   const struct switchdev_attr *attr,
   struct switchdev_trans *trans)
 {
+   struct dsa_slave_priv *p = netdev_priv(dev);
+   struct dsa_port *dp = p->dp;
int ret;
 
switch (attr->id) {
case SWITCHDEV_ATTR_ID_PORT_STP_STATE:
-   ret = dsa_slave_stp_state_set(dev, attr, trans);
+   ret = dsa_port_set_state(dp, attr->u.stp_state, trans);
break;
case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING:
ret = dsa_slave_vlan_filtering(dev, attr, trans);
@@ -621,7 +621,7 @@ static void dsa_slave_bridge_port_leave(struct net_device 
*dev,
/* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer,
 * so allow it to be in BR_STATE_FORWARDING to be kept functional
 */
-   dsa_slave_set_state(dev, BR_STATE_FORWARDING);
+   dsa_port_set_state_now(p->dp, BR_STATE_FORWARDING);
 }
 
 static int dsa_slave_port_attr_get(struct net_device *dev,
-- 
2.13.0



[PATCH net-next 00/20] net: dsa: distribute switch events

2017-05-19 Thread Vivien Didelot
DSA is by nature the support for a switch fabric, which can be composed
of a single, or multiple interconnected Ethernet switch chips.

The current DSA core behavior is to identify the slave port targeted by
a request (e.g. adding a VLAN entry), and program the switch chip to
which it belongs accordingly.

This is problematic in a multi-chip environment, since all chips of a
fabric must be aware of most configuration changes. Here are some
concrete examples in a 3-chip environment:

 [CPU] (mdio)
(eth0) |   :   :  :
  _|_______
 [__sw0__]--[__sw1__]--[__sw2__]
  |  |  ||  |  ||  |  |
  v  v  vv  v  vv  v  v
  p1 p2 p3   p4 p5 p6   p7 p8 p9

If you add a VLAN entry on p7, sw2 gets programmed, but frames won't
reach the CPU interface in a VLAN filtered setup. sw0 and sw1 also need
to be programmed. The same problem comes with MAC addresses (FDB, MDB),
or ageing time changes for instance.

This patch series uses the notification chain introduced for bridging,
to notify not only bridge, but switchdev attributes and objects events
to all switch chips of the fabric.

An ugly debug message printing the ignored event and switch info in the
code handling the switch VLAN events would give us:

# bridge vlan add dev p7 vid 42
sw0: ignoring DSA_NOTIFIER_VLAN_ADD for sw2 (prepare phase)
sw1: ignoring DSA_NOTIFIER_VLAN_ADD for sw2 (prepare phase)
sw0: ignoring DSA_NOTIFIER_VLAN_ADD for sw2 (commit phase)
sw1: ignoring DSA_NOTIFIER_VLAN_ADD for sw2 (commit phase)

To achieve that, patches 1-8 change the scope of the bridge and
switchdev callbacks from the DSA slave device to the generic DSA port,
so that the port-wide API can be used later for switch ports not exposed
to userspace, such as CPU and DSA links.

Patches 9-15 move the DSA port specific functions in a new port.c file.

Patches 16-20 introduce new events to notify the fabric about switchdev
attributes and objects manipulation.

This patch series only adds the plumbing to support a distributed
configuration, but for the moment, each switch chip ignores events from
other chips of the fabric, to keep the current behavior.

The next patch series will add support for cross-chip configuration of
bridge ageing time, VLAN and MAC address databases operations, etc.


Vivien Didelot (20):
  net: dsa: change scope of STP state setter
  net: dsa: change scope of notifier call chain
  net: dsa: change scope of bridging code
  net: dsa: change scope of FDB handlers
  net: dsa: change scope of MDB handlers
  net: dsa: change scope of VLAN handlers
  net: dsa: change scope of VLAN filtering setter
  net: dsa: change scope of ageing time setter
  net: dsa: move port state setters
  net: dsa: move bridging routines
  net: dsa: move VLAN filtering setter
  net: dsa: move ageing time setter
  net: dsa: move FDB handlers
  net: dsa: move MDB handlers
  net: dsa: move VLAN handlers
  net: dsa: move notifier info to private header
  net: dsa: add notifier for ageing time
  net: dsa: add FDB notifier
  net: dsa: add MDB notifier
  net: dsa: add VLAN notifier

 include/net/dsa.h  |  10 --
 net/dsa/Makefile   |   2 +-
 net/dsa/dsa_priv.h |  83 +
 net/dsa/port.c | 260 +++
 net/dsa/slave.c| 354 +
 net/dsa/switch.c   | 175 ++
 6 files changed, 547 insertions(+), 337 deletions(-)
 create mode 100644 net/dsa/port.c

-- 
2.13.0



[PATCH net-next 04/20] net: dsa: change scope of FDB handlers

2017-05-19 Thread Vivien Didelot
Change the scope of the switchdev FDB object handlers from the DSA slave
device to the generic DSA port, so that the future port-wide API can
also be used for other port types, such as CPU and DSA links.

Signed-off-by: Vivien Didelot 
---
 net/dsa/slave.c | 50 --
 1 file changed, 24 insertions(+), 26 deletions(-)

diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 1ad62ef8c261..e9c3ea09cc09 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -299,47 +299,44 @@ static int dsa_slave_port_vlan_dump(struct net_device 
*dev,
return -EOPNOTSUPP;
 }
 
-static int dsa_slave_port_fdb_add(struct net_device *dev,
- const struct switchdev_obj_port_fdb *fdb,
- struct switchdev_trans *trans)
+static int dsa_port_fdb_add(struct dsa_port *dp,
+   const struct switchdev_obj_port_fdb *fdb,
+   struct switchdev_trans *trans)
 {
-   struct dsa_slave_priv *p = netdev_priv(dev);
-   struct dsa_switch *ds = p->dp->ds;
+   struct dsa_switch *ds = dp->ds;
 
if (switchdev_trans_ph_prepare(trans)) {
if (!ds->ops->port_fdb_prepare || !ds->ops->port_fdb_add)
return -EOPNOTSUPP;
 
-   return ds->ops->port_fdb_prepare(ds, p->dp->index, fdb, trans);
+   return ds->ops->port_fdb_prepare(ds, dp->index, fdb, trans);
}
 
-   ds->ops->port_fdb_add(ds, p->dp->index, fdb, trans);
+   ds->ops->port_fdb_add(ds, dp->index, fdb, trans);
 
return 0;
 }
 
-static int dsa_slave_port_fdb_del(struct net_device *dev,
- const struct switchdev_obj_port_fdb *fdb)
+static int dsa_port_fdb_del(struct dsa_port *dp,
+   const struct switchdev_obj_port_fdb *fdb)
 {
-   struct dsa_slave_priv *p = netdev_priv(dev);
-   struct dsa_switch *ds = p->dp->ds;
+   struct dsa_switch *ds = dp->ds;
int ret = -EOPNOTSUPP;
 
if (ds->ops->port_fdb_del)
-   ret = ds->ops->port_fdb_del(ds, p->dp->index, fdb);
+   ret = ds->ops->port_fdb_del(ds, dp->index, fdb);
 
return ret;
 }
 
-static int dsa_slave_port_fdb_dump(struct net_device *dev,
-  struct switchdev_obj_port_fdb *fdb,
-  switchdev_obj_dump_cb_t *cb)
+static int dsa_port_fdb_dump(struct dsa_port *dp,
+struct switchdev_obj_port_fdb *fdb,
+switchdev_obj_dump_cb_t *cb)
 {
-   struct dsa_slave_priv *p = netdev_priv(dev);
-   struct dsa_switch *ds = p->dp->ds;
+   struct dsa_switch *ds = dp->ds;
 
if (ds->ops->port_fdb_dump)
-   return ds->ops->port_fdb_dump(ds, p->dp->index, fdb, cb);
+   return ds->ops->port_fdb_dump(ds, dp->index, fdb, cb);
 
return -EOPNOTSUPP;
 }
@@ -488,6 +485,8 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
  const struct switchdev_obj *obj,
  struct switchdev_trans *trans)
 {
+   struct dsa_slave_priv *p = netdev_priv(dev);
+   struct dsa_port *dp = p->dp;
int err;
 
/* For the prepare phase, ensure the full set of changes is feasable in
@@ -497,9 +496,7 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
 
switch (obj->id) {
case SWITCHDEV_OBJ_ID_PORT_FDB:
-   err = dsa_slave_port_fdb_add(dev,
-SWITCHDEV_OBJ_PORT_FDB(obj),
-trans);
+   err = dsa_port_fdb_add(dp, SWITCHDEV_OBJ_PORT_FDB(obj), trans);
break;
case SWITCHDEV_OBJ_ID_PORT_MDB:
err = dsa_slave_port_mdb_add(dev, SWITCHDEV_OBJ_PORT_MDB(obj),
@@ -521,12 +518,13 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
 static int dsa_slave_port_obj_del(struct net_device *dev,
  const struct switchdev_obj *obj)
 {
+   struct dsa_slave_priv *p = netdev_priv(dev);
+   struct dsa_port *dp = p->dp;
int err;
 
switch (obj->id) {
case SWITCHDEV_OBJ_ID_PORT_FDB:
-   err = dsa_slave_port_fdb_del(dev,
-SWITCHDEV_OBJ_PORT_FDB(obj));
+   err = dsa_port_fdb_del(dp, SWITCHDEV_OBJ_PORT_FDB(obj));
break;
case SWITCHDEV_OBJ_ID_PORT_MDB:
err = dsa_slave_port_mdb_del(dev, SWITCHDEV_OBJ_PORT_MDB(obj));
@@ -547,13 +545,13 @@ static int dsa_slave_port_obj_dump(struct net_device *dev,
   struct switchdev_obj *obj,
   switchdev_obj_dump_cb_t *cb)
 {
+   struct dsa_slave_priv *p = netdev_priv(dev);
+   struct dsa_port *dp = p->dp;
int err;
 

[PATCH net-next 05/20] net: dsa: change scope of MDB handlers

2017-05-19 Thread Vivien Didelot
Change the scope of the switchdev MDB object handlers from the DSA slave
device to the generic DSA port, so that the future port-wide API can
also be used for other port types, such as CPU and DSA links.

Signed-off-by: Vivien Didelot 
---
 net/dsa/slave.c | 41 ++---
 1 file changed, 18 insertions(+), 23 deletions(-)

diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index e9c3ea09cc09..0921d306aedf 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -341,46 +341,43 @@ static int dsa_port_fdb_dump(struct dsa_port *dp,
return -EOPNOTSUPP;
 }
 
-static int dsa_slave_port_mdb_add(struct net_device *dev,
- const struct switchdev_obj_port_mdb *mdb,
- struct switchdev_trans *trans)
+static int dsa_port_mdb_add(struct dsa_port *dp,
+   const struct switchdev_obj_port_mdb *mdb,
+   struct switchdev_trans *trans)
 {
-   struct dsa_slave_priv *p = netdev_priv(dev);
-   struct dsa_switch *ds = p->dp->ds;
+   struct dsa_switch *ds = dp->ds;
 
if (switchdev_trans_ph_prepare(trans)) {
if (!ds->ops->port_mdb_prepare || !ds->ops->port_mdb_add)
return -EOPNOTSUPP;
 
-   return ds->ops->port_mdb_prepare(ds, p->dp->index, mdb, trans);
+   return ds->ops->port_mdb_prepare(ds, dp->index, mdb, trans);
}
 
-   ds->ops->port_mdb_add(ds, p->dp->index, mdb, trans);
+   ds->ops->port_mdb_add(ds, dp->index, mdb, trans);
 
return 0;
 }
 
-static int dsa_slave_port_mdb_del(struct net_device *dev,
- const struct switchdev_obj_port_mdb *mdb)
+static int dsa_port_mdb_del(struct dsa_port *dp,
+   const struct switchdev_obj_port_mdb *mdb)
 {
-   struct dsa_slave_priv *p = netdev_priv(dev);
-   struct dsa_switch *ds = p->dp->ds;
+   struct dsa_switch *ds = dp->ds;
 
if (ds->ops->port_mdb_del)
-   return ds->ops->port_mdb_del(ds, p->dp->index, mdb);
+   return ds->ops->port_mdb_del(ds, dp->index, mdb);
 
return -EOPNOTSUPP;
 }
 
-static int dsa_slave_port_mdb_dump(struct net_device *dev,
-  struct switchdev_obj_port_mdb *mdb,
-  switchdev_obj_dump_cb_t *cb)
+static int dsa_port_mdb_dump(struct dsa_port *dp,
+struct switchdev_obj_port_mdb *mdb,
+switchdev_obj_dump_cb_t *cb)
 {
-   struct dsa_slave_priv *p = netdev_priv(dev);
-   struct dsa_switch *ds = p->dp->ds;
+   struct dsa_switch *ds = dp->ds;
 
if (ds->ops->port_mdb_dump)
-   return ds->ops->port_mdb_dump(ds, p->dp->index, mdb, cb);
+   return ds->ops->port_mdb_dump(ds, dp->index, mdb, cb);
 
return -EOPNOTSUPP;
 }
@@ -499,8 +496,7 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
err = dsa_port_fdb_add(dp, SWITCHDEV_OBJ_PORT_FDB(obj), trans);
break;
case SWITCHDEV_OBJ_ID_PORT_MDB:
-   err = dsa_slave_port_mdb_add(dev, SWITCHDEV_OBJ_PORT_MDB(obj),
-trans);
+   err = dsa_port_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj), trans);
break;
case SWITCHDEV_OBJ_ID_PORT_VLAN:
err = dsa_slave_port_vlan_add(dev,
@@ -527,7 +523,7 @@ static int dsa_slave_port_obj_del(struct net_device *dev,
err = dsa_port_fdb_del(dp, SWITCHDEV_OBJ_PORT_FDB(obj));
break;
case SWITCHDEV_OBJ_ID_PORT_MDB:
-   err = dsa_slave_port_mdb_del(dev, SWITCHDEV_OBJ_PORT_MDB(obj));
+   err = dsa_port_mdb_del(dp, SWITCHDEV_OBJ_PORT_MDB(obj));
break;
case SWITCHDEV_OBJ_ID_PORT_VLAN:
err = dsa_slave_port_vlan_del(dev,
@@ -554,8 +550,7 @@ static int dsa_slave_port_obj_dump(struct net_device *dev,
err = dsa_port_fdb_dump(dp, SWITCHDEV_OBJ_PORT_FDB(obj), cb);
break;
case SWITCHDEV_OBJ_ID_PORT_MDB:
-   err = dsa_slave_port_mdb_dump(dev, SWITCHDEV_OBJ_PORT_MDB(obj),
- cb);
+   err = dsa_port_mdb_dump(dp, SWITCHDEV_OBJ_PORT_MDB(obj), cb);
break;
case SWITCHDEV_OBJ_ID_PORT_VLAN:
err = dsa_slave_port_vlan_dump(dev,
-- 
2.13.0



[PATCH net-next 11/20] net: dsa: move VLAN filtering setter

2017-05-19 Thread Vivien Didelot
Move the DSA port code which sets VLAN filtering on a port in port.c,
where it belongs.

Signed-off-by: Vivien Didelot 
---
 net/dsa/dsa_priv.h |  2 ++
 net/dsa/port.c | 16 
 net/dsa/slave.c| 16 
 3 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index f0b6cd3c8a65..c145223247c5 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -66,6 +66,8 @@ int dsa_port_set_state(struct dsa_port *dp, u8 state,
 void dsa_port_set_state_now(struct dsa_port *dp, u8 state);
 int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br);
 void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br);
+int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
+   struct switchdev_trans *trans);
 
 /* slave.c */
 extern const struct dsa_device_ops notag_netdev_ops;
diff --git a/net/dsa/port.c b/net/dsa/port.c
index da8577fb3d07..c9f95aaf25f1 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -111,3 +111,19 @@ void dsa_port_bridge_leave(struct dsa_port *dp, struct 
net_device *br)
 */
dsa_port_set_state_now(dp, BR_STATE_FORWARDING);
 }
+
+int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
+   struct switchdev_trans *trans)
+{
+   struct dsa_switch *ds = dp->ds;
+
+   /* bridge skips -EOPNOTSUPP, so skip the prepare phase */
+   if (switchdev_trans_ph_prepare(trans))
+   return 0;
+
+   if (ds->ops->port_vlan_filtering)
+   return ds->ops->port_vlan_filtering(ds, dp->index,
+   vlan_filtering);
+
+   return 0;
+}
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index ab298c41b8e7..32e7e78313ba 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -338,22 +338,6 @@ static int dsa_slave_ioctl(struct net_device *dev, struct 
ifreq *ifr, int cmd)
return -EOPNOTSUPP;
 }
 
-static int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
-  struct switchdev_trans *trans)
-{
-   struct dsa_switch *ds = dp->ds;
-
-   /* bridge skips -EOPNOTSUPP, so skip the prepare phase */
-   if (switchdev_trans_ph_prepare(trans))
-   return 0;
-
-   if (ds->ops->port_vlan_filtering)
-   return ds->ops->port_vlan_filtering(ds, dp->index,
-   vlan_filtering);
-
-   return 0;
-}
-
 static unsigned int dsa_fastest_ageing_time(struct dsa_switch *ds,
unsigned int ageing_time)
 {
-- 
2.13.0



[PATCH net-next 08/20] net: dsa: change scope of ageing time setter

2017-05-19 Thread Vivien Didelot
Change the scope of the switchdev bridge ageing time attribute setter
from the DSA slave device to the generic DSA port, so that the future
port-wide API can also be used for other port types, such as CPU and DSA
links.

Also ds->ports is now a contiguous array of dsa_port structures, thus
their addresses cannot be NULL. Remove the useless check in
dsa_fastest_ageing_time.

Signed-off-by: Vivien Didelot 
---
 net/dsa/slave.c | 16 +++-
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 216eb38a847d..b0150f79dcdd 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -412,21 +412,19 @@ static unsigned int dsa_fastest_ageing_time(struct 
dsa_switch *ds,
for (i = 0; i < ds->num_ports; ++i) {
struct dsa_port *dp = >ports[i];
 
-   if (dp && dp->ageing_time && dp->ageing_time < ageing_time)
+   if (dp->ageing_time && dp->ageing_time < ageing_time)
ageing_time = dp->ageing_time;
}
 
return ageing_time;
 }
 
-static int dsa_slave_ageing_time(struct net_device *dev,
-const struct switchdev_attr *attr,
-struct switchdev_trans *trans)
+static int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock,
+   struct switchdev_trans *trans)
 {
-   struct dsa_slave_priv *p = netdev_priv(dev);
-   struct dsa_switch *ds = p->dp->ds;
-   unsigned long ageing_jiffies = clock_t_to_jiffies(attr->u.ageing_time);
+   unsigned long ageing_jiffies = clock_t_to_jiffies(ageing_clock);
unsigned int ageing_time = jiffies_to_msecs(ageing_jiffies);
+   struct dsa_switch *ds = dp->ds;
 
if (switchdev_trans_ph_prepare(trans)) {
if (ds->ageing_time_min && ageing_time < ds->ageing_time_min)
@@ -437,7 +435,7 @@ static int dsa_slave_ageing_time(struct net_device *dev,
}
 
/* Keep the fastest ageing time in case of multiple bridges */
-   p->dp->ageing_time = ageing_time;
+   dp->ageing_time = ageing_time;
ageing_time = dsa_fastest_ageing_time(ds, ageing_time);
 
if (ds->ops->set_ageing_time)
@@ -463,7 +461,7 @@ static int dsa_slave_port_attr_set(struct net_device *dev,
  trans);
break;
case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME:
-   ret = dsa_slave_ageing_time(dev, attr, trans);
+   ret = dsa_port_ageing_time(dp, attr->u.ageing_time, trans);
break;
default:
ret = -EOPNOTSUPP;
-- 
2.13.0



[PATCH net-next 20/20] net: dsa: add VLAN notifier

2017-05-19 Thread Vivien Didelot
Add two new DSA_NOTIFIER_VLAN_ADD and DSA_NOTIFIER_VLAN_DEL events to
notify not only a single switch, but all switches of a the fabric when
an VLAN entry is added or removed.

For the moment, keep the current behavior and ignore other switches.

Signed-off-by: Vivien Didelot 
---
 net/dsa/dsa_priv.h | 10 ++
 net/dsa/port.c | 31 ++-
 net/dsa/switch.c   | 43 +++
 3 files changed, 67 insertions(+), 17 deletions(-)

diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 2b60293b325c..1d52f9051d0e 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -24,6 +24,8 @@ enum {
DSA_NOTIFIER_FDB_DEL,
DSA_NOTIFIER_MDB_ADD,
DSA_NOTIFIER_MDB_DEL,
+   DSA_NOTIFIER_VLAN_ADD,
+   DSA_NOTIFIER_VLAN_DEL,
 };
 
 /* DSA_NOTIFIER_AGEING_TIME */
@@ -56,6 +58,14 @@ struct dsa_notifier_mdb_info {
int port;
 };
 
+/* DSA_NOTIFIER_VLAN_* */
+struct dsa_notifier_vlan_info {
+   const struct switchdev_obj_port_vlan *vlan;
+   struct switchdev_trans *trans;
+   int sw_index;
+   int port;
+};
+
 struct dsa_device_ops {
struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev);
struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev,
diff --git a/net/dsa/port.c b/net/dsa/port.c
index c7c4920e7bc9..c88c0cec8454 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -225,29 +225,26 @@ int dsa_port_vlan_add(struct dsa_port *dp,
  const struct switchdev_obj_port_vlan *vlan,
  struct switchdev_trans *trans)
 {
-   struct dsa_switch *ds = dp->ds;
-
-   if (switchdev_trans_ph_prepare(trans)) {
-   if (!ds->ops->port_vlan_prepare || !ds->ops->port_vlan_add)
-   return -EOPNOTSUPP;
-
-   return ds->ops->port_vlan_prepare(ds, dp->index, vlan, trans);
-   }
-
-   ds->ops->port_vlan_add(ds, dp->index, vlan, trans);
-
-   return 0;
+   struct dsa_notifier_vlan_info info = {
+   .sw_index = dp->ds->index,
+   .port = dp->index,
+   .trans = trans,
+   .vlan = vlan,
+   };
+
+   return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_ADD, );
 }
 
 int dsa_port_vlan_del(struct dsa_port *dp,
  const struct switchdev_obj_port_vlan *vlan)
 {
-   struct dsa_switch *ds = dp->ds;
+   struct dsa_notifier_vlan_info info = {
+   .sw_index = dp->ds->index,
+   .port = dp->index,
+   .vlan = vlan,
+   };
 
-   if (!ds->ops->port_vlan_del)
-   return -EOPNOTSUPP;
-
-   return ds->ops->port_vlan_del(ds, dp->index, vlan);
+   return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_DEL, );
 }
 
 int dsa_port_vlan_dump(struct dsa_port *dp,
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index b7e8e45869fc..c1e4b2d5a3ae 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -158,6 +158,43 @@ static int dsa_switch_mdb_del(struct dsa_switch *ds,
return ds->ops->port_mdb_del(ds, info->port, mdb);
 }
 
+static int dsa_switch_vlan_add(struct dsa_switch *ds,
+  struct dsa_notifier_vlan_info *info)
+{
+   const struct switchdev_obj_port_vlan *vlan = info->vlan;
+   struct switchdev_trans *trans = info->trans;
+
+   /* Do not care yet about other switch chips of the fabric */
+   if (ds->index != info->sw_index)
+   return 0;
+
+   if (switchdev_trans_ph_prepare(trans)) {
+   if (!ds->ops->port_vlan_prepare || !ds->ops->port_vlan_add)
+   return -EOPNOTSUPP;
+
+   return ds->ops->port_vlan_prepare(ds, info->port, vlan, trans);
+   }
+
+   ds->ops->port_vlan_add(ds, info->port, vlan, trans);
+
+   return 0;
+}
+
+static int dsa_switch_vlan_del(struct dsa_switch *ds,
+  struct dsa_notifier_vlan_info *info)
+{
+   const struct switchdev_obj_port_vlan *vlan = info->vlan;
+
+   /* Do not care yet about other switch chips of the fabric */
+   if (ds->index != info->sw_index)
+   return 0;
+
+   if (!ds->ops->port_vlan_del)
+   return -EOPNOTSUPP;
+
+   return ds->ops->port_vlan_del(ds, info->port, vlan);
+}
+
 static int dsa_switch_event(struct notifier_block *nb,
unsigned long event, void *info)
 {
@@ -186,6 +223,12 @@ static int dsa_switch_event(struct notifier_block *nb,
case DSA_NOTIFIER_MDB_DEL:
err = dsa_switch_mdb_del(ds, info);
break;
+   case DSA_NOTIFIER_VLAN_ADD:
+   err = dsa_switch_vlan_add(ds, info);
+   break;
+   case DSA_NOTIFIER_VLAN_DEL:
+   err = dsa_switch_vlan_del(ds, info);
+   break;
default:
err = -EOPNOTSUPP;
break;
-- 
2.13.0



Re: [PATCH net] fix BUG: scheduling while atomic in netlink broadcast

2017-05-19 Thread Cong Wang
On Fri, May 19, 2017 at 11:47 AM, Akshay Narayan  wrote:
>> I don't want to defend the use of yield() but it looks like there is other
>> problem.
>
> I believe this use of yield() should be replaced with cond_resched()
> even if it turns out there is an unrelated problem.

Yeah, it is a different problem, because cond_resched() itself could
sleep too so it doesn't fix the schedule-in-atomic problem, not to
mention the kmalloc() would sleep.

>
>> Does this module call netlink_broadcast() with __GFP_DIRECT_RECLAIM
>> in IRQ context? If so you should adjust the gfp flags.
>
> The module only calls netlink_broadcast() from a pluggable TCP
> function; from my understanding this is not in the IRQ context. Full
> trace, perhaps more clear, attached below.

It is process context but with a spinlock (bh_lock_sock) held, so
you still can't sleep. IOW, you have to pass a proper gfp flag to
reflect this.


Re: [PATCH net-next] tcp: warn on negative reordering values

2017-05-19 Thread David Miller
From: Soheil Hassas Yeganeh 
Date: Tue, 16 May 2017 17:39:02 -0400

> From: Soheil Hassas Yeganeh 
> 
> Commit bafbb9c73241 ("tcp: eliminate negative reordering
> in tcp_clean_rtx_queue") fixes an issue for negative
> reordering metrics.
> 
> To be resilient to such errors, warn and return
> when a negative metric is passed to tcp_update_reordering().
> 
> Signed-off-by: Soheil Hassas Yeganeh 
> Signed-off-by: Neal Cardwell 
> Signed-off-by: Yuchung Cheng 
> Signed-off-by: Eric Dumazet 

Applied, thanks.


Re: Alignment in BPF verifier

2017-05-19 Thread David Miller
From: Edward Cree 
Date: Fri, 19 May 2017 21:00:13 +0100

> Here's what I'm thinking of doing:
> struct bpf_reg_state {
> enum bpf_reg_type type;
> union {
> /* valid when type == PTR_TO_PACKET */
> u16 range;
> 
> /* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE |
>  *   PTR_TO_MAP_VALUE_OR_NULL
>  */
> struct bpf_map *map_ptr;
> };
> /* Used to find other pointers with the same variable base, so they
>  * can share range and align knowledge.
>  */
> u32 id;
> u32 off; /* fixed part of pointer offset */
> /* For scalar types (CONST_IMM | UNKNOWN_VALUE), this represents our
>  * knowledge of the actual value.
>  * For pointer types, this represents the variable part of the offset
>  * from the pointed-to object, and is shared with all bpf_reg_states
>  * with the same id as us.
>  */
> struct tnum align;
> /* Used to determine if any memory access using this register will
>  * result in a bad access. These two fields must be last.
>  * See states_equal()
>  * These refer to the same value as align, not necessarily the actual
>  * contents of the register.
>  */
> s64 min_value;
> u64 max_value;
> };

Be very careful with the layout of bpf_reg_state.

There are layout dependencies in the state pruning.  Please take a look
at states_equal().  It is walking the set of registers at two snapshot
locations and trying to see if they are "equivalent".

What's happening here is that the verifier makes a stack of all branch
points in the program.  On the first pass it analyzes the register
values taking one of the two paths a branch takes.  Then when it hits
the end of the program, on that path, to BPF_EXIT it starts popping
the entries on the stack.

The naive implementation would pop each stack entry, and then traverse
the other arm of the branch.  But for programs with lots of branches
this gets very expensive.

So at each stack pop, the verifier tries to determine if it can skip
traversing the branch's other path.  And it does this by analyzing
register state.

The tests are basically:

if (memcmp(rold, rcur, sizeof(*rold)) == 0)
continue;

exact equivalent, then we're fine.

/* If the ranges were not the same, but everything else was and
 * we didn't do a variable access into a map then we are a-ok.
 */
if (!varlen_map_access &&
memcmp(rold, rcur, offsetofend(struct bpf_reg_state, id)) 
== 0)
continue;

We didn't do any variable MAP accesses, and everything in the register
"up to and including member ID" is the same, we're fine.

And then we drop down into some packet pointer specific tests to try
and optimize things further.

So you have to be careful what you place before and/or after 'id'.

Probably we need to put the alignment stuff before 'id' so that it
is considered by the offsetofend() length memcmp().

Hope that helps.


Re: [PATCH v2 1/3] bpf: Use 1<<16 as ceiling for immediate alignment in verifier.

2017-05-19 Thread David Miller
From: Edward Cree 
Date: Fri, 19 May 2017 18:17:42 +0100

> One question: is there a way to build the verifier as userland code
>  (or at least as a module), or will I have to reboot every time I
>  want to test a change?

There currently is no such machanism, you will have to reboot every
time.

I have considered working on making the code buildable outside of the
kernel.  It shouldn't be too hard.


Re: [PATCH net] fix BUG: scheduling while atomic in netlink broadcast

2017-05-19 Thread Eric Dumazet
On Fri, 2017-05-19 at 14:47 -0400, Akshay Narayan wrote:
> > I don't want to defend the use of yield() but it looks like there is other
> > problem.
> 
> I believe this use of yield() should be replaced with cond_resched()
> even if it turns out there is an unrelated problem.
> 
> > Does this module call netlink_broadcast() with __GFP_DIRECT_RECLAIM
> > in IRQ context? If so you should adjust the gfp flags.
> 
> The module only calls netlink_broadcast() from a pluggable TCP
> function; from my understanding this is not in the IRQ context. Full
> trace, perhaps more clear, attached below.
> 
> May 19 14:30:44 ccp kernel: [  178.885546] BUG: scheduling while
> atomic: mm-link/3105/0x0200
> May 19 14:30:44 ccp kernel: [  178.885552] Modules linked in:
> ipt_MASQUERADE nf_nat_masquerade_ipv4 iptable_nat nf_conntrack_ipv4
> nf_defrag_ipv4 nf_nat_ipv4 nf_nat libcrc32c xt_connmark nf_conntrack
> ccp(OE) crct10dif_pclmul crc32_pclmul
>  ghash_clmulni_intel snd_intel8x0 pcbc snd_ac97_codec joydev ac97_bus
> snd_pcm snd_seq_midi snd_seq_midi_event snd_rawmidi aesni_intel
> snd_seq aes_x86_64 crypto_simd snd_seq_device snd_timer snd input_leds
> i2c_piix4 glue_helper cryptd so
> undcore mac_hid serio_raw vboxvideo ttm drm_kms_helper drm fb_sys_fops
> syscopyarea sysfillrect sysimgblt vboxguest intel_rapl_perf parport_pc
> ppdev lp parport ip_tables x_tables autofs4 hid_generic usbhid hid
> e1000 ahci libahci psmouse
> fjes pata_acpi video
> May 19 14:30:44 ccp kernel: [  178.885665] CPU: 0 PID: 3105 Comm:
> mm-link Tainted: GW  OE   4.10.0-21-generic #23-Ubuntu
> May 19 14:30:44 ccp kernel: [  178.885666] Hardware name: innotek GmbH
> VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
> May 19 14:30:44 ccp kernel: [  178.885667] Call Trace:
> May 19 14:30:44 ccp kernel: [  178.885674]  dump_stack+0x63/0x81
> May 19 14:30:44 ccp kernel: [  178.885678]  __schedule_bug+0x54/0x70
> May 19 14:30:44 ccp kernel: [  178.885682]  __schedule+0x536/0x6f0
> May 19 14:30:44 ccp kernel: [  178.885685]  schedule+0x36/0x80
> May 19 14:30:44 ccp kernel: [  178.885687]  sys_sched_yield+0x4f/0x60
> May 19 14:30:44 ccp kernel: [  178.885688]  yield+0x33/0x40
> May 19 14:30:44 ccp kernel: [  178.885691]
> netlink_broadcast_filtered+0x29b/0x3c0
> May 19 14:30:44 ccp kernel: [  178.885692]  netlink_broadcast+0x1d/0x20
> May 19 14:30:44 ccp kernel: [  178.885697]  nl_sendmsg+0xb8/0x664 [ccp]
> May 19 14:30:44 ccp kernel: [  178.885699]  nl_send_ack_notif+0x7d/0x90 [ccp]
> May 19 14:30:44 ccp kernel: [  178.885702]  tcp_ccp_cong_avoid+0x69/0x70 [ccp]
> May 19 14:30:44 ccp kernel: [  178.885704]  tcp_ack+0x980/0xa60
> May 19 14:30:44 ccp kernel: [  178.885708]  tcp_rcv_state_process+0x2be/0xda0
> May 19 14:30:44 ccp kernel: [  178.885712]  ? security_sock_rcv_skb+0x3b/0x50
> May 19 14:30:44 ccp kernel: [  178.885715]  ? sk_filter_trim_cap+0x3b/0x270

No idea what ccp is, it is not in upstream kernel, and it looks buggy.

Please do not send patches that are not needed in upstream kernel.




Re: Alignment in BPF verifier

2017-05-19 Thread David Miller
From: Edward Cree 
Date: Fri, 19 May 2017 21:00:13 +0100

> Well, I've managed to get somewhat confused by reg->id.
> In particular, I'm unsure which bpf_reg_types can have an id, and what
>  exactly it means.  There seems to be some code that checks around map value
>  pointers, which seems strange as maps have fixed sizes (and the comments in
>  enum bpf_reg_type make it seem like id is a PTR_TO_PACKET thing) - is this
>  maybe because of map-of-maps support, can the contained maps have differing
>  element sizes?  Or do we allow *(map_value + var + imm), if map_value + var
>  was appropriately bounds-checked?
> 
> Does the 'id' identify the variable that was added to an object pointer, or
>  the object itself?  Or does it blur these and identify (what the comment in
>  enum bpf_reg_type calls) "skb->data + (u16) var"?

The reg->id value changes any time a variable gets added to a packet
pointer.

You will also notice right now that only packet pointers have their
alignment tracked.

I have changes pending that will do that for MAP pointers too, but
it needs more work.


Re: [PATCH net] bonding: fix randomly populated arp target array

2017-05-19 Thread महेश बंडेवार
On Fri, May 19, 2017 at 11:46 AM, Jarod Wilson  wrote:
> In commit dc9c4d0fe023, the arp_target array moved from a static global
> to a local variable. By the nature of static globals, the array used to
> be initialized to all 0. At present, it's full of random data, which
> that gets interpreted as arp_target values, when none have actually been
> specified. Systems end up booting with spew along these lines:
>
> [   32.161783] IPv6: ADDRCONF(NETDEV_UP): lacp0: link is not ready
> [   32.168475] IPv6: ADDRCONF(NETDEV_UP): lacp0: link is not ready
> [   32.175089] 8021q: adding VLAN 0 to HW filter on device lacp0
> [   32.193091] IPv6: ADDRCONF(NETDEV_UP): lacp0: link is not ready
> [   32.204892] lacp0: Setting MII monitoring interval to 100
> [   32.211071] lacp0: Removing ARP target 216.124.228.17
> [   32.216824] lacp0: Removing ARP target 218.160.255.255
> [   32.222646] lacp0: Removing ARP target 185.170.136.184
> [   32.228496] lacp0: invalid ARP target 255.255.255.255 specified for removal
> [   32.236294] lacp0: option arp_ip_target: invalid value (-255.255.255.255)
> [   32.243987] lacp0: Removing ARP target 56.125.228.17
> [   32.249625] lacp0: Removing ARP target 218.160.255.255
> [   32.255432] lacp0: Removing ARP target 15.157.233.184
> [   32.261165] lacp0: invalid ARP target 255.255.255.255 specified for removal
> [   32.268939] lacp0: option arp_ip_target: invalid value (-255.255.255.255)
> [   32.276632] lacp0: Removing ARP target 16.0.0.0
> [   32.281755] lacp0: Removing ARP target 218.160.255.255
> [   32.287567] lacp0: Removing ARP target 72.125.228.17
> [   32.293165] lacp0: Removing ARP target 218.160.255.255
> [   32.298970] lacp0: Removing ARP target 8.125.228.17
> [   32.304458] lacp0: Removing ARP target 218.160.255.255
>
> None of these were actually specified as ARP targets, and the driver does
> seem to clean up the mess okay, but it's rather noisy and confusing, leaks
> values to userspace, and the 255.255.255.255 spew shows up even when debug
> prints are disabled.
>
> The fix: just zero out arp_target at init time.
>
> While we're in here, init arp_all_targets_value in the right place.
>
> Fixes: dc9c4d0fe023 ("bonding: reduce scope of some global variables")
> CC: Mahesh Bandewar 
> CC: Jay Vosburgh 
> CC: Veaceslav Falico 
> CC: Andy Gospodarek 
> CC: netdev@vger.kernel.org
> CC: sta...@vger.kernel.org
> Signed-off-by: Jarod Wilson 
> ---
>  drivers/net/bonding/bond_main.c | 5 ++---
>  1 file changed, 2 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
> index 2be78807fd6e..73313318399c 100644
> --- a/drivers/net/bonding/bond_main.c
> +++ b/drivers/net/bonding/bond_main.c
> @@ -4271,10 +4271,10 @@ static int bond_check_params(struct bond_params 
> *params)
> int arp_validate_value, fail_over_mac_value, primary_reselect_value, 
> i;
> struct bond_opt_value newval;
> const struct bond_opt_value *valptr;
> -   int arp_all_targets_value;
> +   int arp_all_targets_value = 0;

I think this is unnecessary as long as the var is initialized before it's use.

> u16 ad_actor_sys_prio = 0;
> u16 ad_user_port_key = 0;
> -   __be32 arp_target[BOND_MAX_ARP_TARGETS];
> +   __be32 arp_target[BOND_MAX_ARP_TARGETS] = { 0 };

this is the only change required to avoid reported error.

> int arp_ip_count;
> int bond_mode   = BOND_MODE_ROUNDROBIN;
> int xmit_hashtype = BOND_XMIT_POLICY_LAYER2;
> @@ -4501,7 +4501,6 @@ static int bond_check_params(struct bond_params *params)
> arp_validate_value = 0;
> }
>
> -   arp_all_targets_value = 0;
> if (arp_all_targets) {
> bond_opt_initstr(, arp_all_targets);
> valptr = 
> bond_opt_parse(bond_opt_get(BOND_OPT_ARP_ALL_TARGETS),
> --
> 2.12.1
>


[PATCH 3/4] [next-queue]net: i40e: Enable mqprio full offload mode in the i40e driver for configuring TCs and queue mapping

2017-05-19 Thread Amritha Nambiar
The i40e driver is modified to enable the new mqprio hardware
offload mode and factor the TCs and queue configuration by
creating channel VSIs. In this mode, the priority to traffic
class mapping and the user specified queue ranges are used
to configure the traffic classes when the 'hw' option is set
to 2.

Example:
# tc qdisc add dev eth0 root mqprio num_tc 4\
  map 0 0 0 0 1 2 2 3 queues 2@0 2@2 1@4 1@5 hw 2

# tc qdisc show dev eth0
qdisc mqprio 8038: root  tc 4 map 0 0 0 0 1 2 2 3 0 0 0 0 0 0 0 0
 queues:(0:1) (2:3) (4:4) (5:5)

The HW channels created are removed and all the queue configuration
is set to default when the qdisc is detached from the root of the
device.

#tc qdisc del dev eth0 root

This patch also disables setting up channels via ethtool (ethtool -L)
when the TCs are confgured using mqprio scheduler.

Signed-off-by: Amritha Nambiar 
---
 drivers/net/ethernet/intel/i40e/i40e.h |4 
 drivers/net/ethernet/intel/i40e/i40e_ethtool.c |6 
 drivers/net/ethernet/intel/i40e/i40e_main.c|  311 ++--
 3 files changed, 292 insertions(+), 29 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e.h 
b/drivers/net/ethernet/intel/i40e/i40e.h
index 0915b02..a62f65a 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -54,6 +54,8 @@
 #include 
 #include 
 #include 
+#include 
+
 #include "i40e_type.h"
 #include "i40e_prototype.h"
 #include "i40e_client.h"
@@ -685,6 +687,7 @@ struct i40e_vsi {
enum i40e_vsi_type type;  /* VSI type, e.g., LAN, FCoE, etc */
s16 vf_id;  /* Virtual function ID for SRIOV VSIs */
 
+   struct tc_mqprio_qopt_offload mqprio_qopt; /* queue parameters */
struct i40e_tc_configuration tc_config;
struct i40e_aqc_vsi_properties_data info;
 
@@ -710,6 +713,7 @@ struct i40e_vsi {
u16 cnt_q_avail; /* num of queues available for channel usage */
u16 orig_rss_size;
u16 current_rss_size;
+   bool reconfig_rss;
 
/* keeps track of next_base_queue to be used for channel setup */
atomic_t next_base_queue;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c 
b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 3d58762..ab52979 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -3841,6 +3841,12 @@ static int i40e_set_channels(struct net_device *dev,
if (vsi->type != I40E_VSI_MAIN)
return -EINVAL;
 
+   /* We do not support setting channels via ethtool when TCs are
+* configured through mqprio
+*/
+   if (pf->flags & I40E_FLAG_TC_MQPRIO)
+   return -EINVAL;
+
/* verify they are not requesting separate vectors */
if (!count || ch->rx_count || ch->tx_count)
return -EINVAL;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c 
b/drivers/net/ethernet/intel/i40e/i40e_main.c
index e1bea45..7f61d4f 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -68,6 +68,7 @@ static int i40e_reset(struct i40e_pf *pf);
 static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired);
 static void i40e_fdir_sb_setup(struct i40e_pf *pf);
 static int i40e_veb_get_bw_info(struct i40e_veb *veb);
+static int i40e_vsi_config_rss(struct i40e_vsi *vsi);
 
 /* i40e_pci_tbl - PCI Device ID Table
  *
@@ -1560,6 +1561,105 @@ static int i40e_set_mac(struct net_device *netdev, void 
*p)
 }
 
 /**
+ * i40e_vsi_setup_queue_map_mqprio - Prepares VSI tc_config to have queue
+ * configurations based on MQPRIO options.
+ * @vsi: the VSI being configured,
+ * @ctxt: VSI context structure
+ * @enabled_tc: number of traffic classes to enable
+ **/
+static int i40e_vsi_setup_queue_map_mqprio(struct i40e_vsi *vsi,
+  struct i40e_vsi_context *ctxt,
+  u8 enabled_tc)
+{
+   u8 netdev_tc = 0, offset = 0;
+   u16 qcount = 0, max_qcount, qmap, sections = 0;
+   int i, override_q, pow, num_qps, ret;
+
+   if (vsi->type != I40E_VSI_MAIN)
+   return -EINVAL;
+
+   sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID;
+   sections |= I40E_AQ_VSI_PROP_SCHED_VALID;
+
+   vsi->tc_config.numtc = vsi->mqprio_qopt.qopt.num_tc;
+   vsi->tc_config.enabled_tc = enabled_tc ? enabled_tc : 1;
+
+   num_qps = vsi->mqprio_qopt.qopt.count[0];
+
+   /* find the next higher power-of-2 of num queue pairs */
+   pow = ilog2(num_qps);
+   if (!is_power_of_2(num_qps))
+   pow++;
+
+   qmap = (offset << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) |
+   (pow << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT);
+
+   /* Setup queue offset/count for all TCs for given VSI */
+   max_qcount = vsi->mqprio_qopt.qopt.count[0];
+
+   for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+  

[PATCH 2/4] [next-queue]net: i40e: Add infrastructure for queue channel support with the TCs and queue configurations offloaded via mqprio scheduler

2017-05-19 Thread Amritha Nambiar
This patch sets up the infrastructure for offloading TCs and
queue configurations to the hardware by creating HW channels(VSI).
A new channel is created for each of the traffic class
configuration offloaded via mqprio framework except for the first TC
(TC0). TC0 for the main VSI is also reconfigured as per user provided
queue parameters. Queue counts that are not power-of-2 are handled by
reconfiguring RSS by reprogramming LUTs using the queue count value.
This patch also handles configuring the TX rings for the channels,
setting up the RX queue map for channel.

Also, the channels so created are removed and all the queue
configuration is set to default when the qdisc is detached from the
root of the device.

Signed-off-by: Amritha Nambiar 
Signed-off-by: Kiran Patil 
---
 drivers/net/ethernet/intel/i40e/i40e.h  |   36 +
 drivers/net/ethernet/intel/i40e/i40e_main.c |  740 +++
 drivers/net/ethernet/intel/i40e/i40e_txrx.h |2 
 3 files changed, 771 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e.h 
b/drivers/net/ethernet/intel/i40e/i40e.h
index 395ca94..0915b02 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -330,6 +330,24 @@ struct i40e_flex_pit {
u8 pit_index;
 };
 
+struct i40e_channel {
+   struct list_head list;
+   bool initialized;
+   u8 type;
+   u16 vsi_number;
+   u16 stat_counter_idx;
+   u16 base_queue;
+   u16 num_queue_pairs; /* Requested by user */
+   u16 allowed_queue_pairs;
+   u16 seid;
+
+   u8 enabled_tc;
+   struct i40e_aqc_vsi_properties_data info;
+
+   /* track this channel belongs to which VSI */
+   struct i40e_vsi *parent_vsi;
+};
+
 /* struct that defines the Ethernet device */
 struct i40e_pf {
struct pci_dev *pdev;
@@ -442,6 +460,7 @@ struct i40e_pf {
 #define I40E_FLAG_CLIENT_L2_CHANGE BIT_ULL(56)
 #define I40E_FLAG_WOL_MC_MAGIC_PKT_WAKEBIT_ULL(57)
 #define I40E_FLAG_LEGACY_RXBIT_ULL(58)
+#define I40E_FLAG_TC_MQPRIOBIT_ULL(59)
 
struct i40e_client_instance *cinst;
bool stat_offsets_loaded;
@@ -523,6 +542,9 @@ struct i40e_pf {
u32 ioremap_len;
u32 fd_inv;
u16 phy_led_val;
+
+#define I40E_MAX_QUEUES_PER_CH 64
+   u16 override_q_count;
 };
 
 /**
@@ -684,6 +706,16 @@ struct i40e_vsi {
bool current_isup;  /* Sync 'link up' logging */
enum i40e_aq_link_speed current_speed;  /* Sync link speed logging */
 
+   /* channel specific fields */
+   u16 cnt_q_avail; /* num of queues available for channel usage */
+   u16 orig_rss_size;
+   u16 current_rss_size;
+
+   /* keeps track of next_base_queue to be used for channel setup */
+   atomic_t next_base_queue;
+
+   struct list_head ch_list;
+
void *priv; /* client driver data reference. */
 
/* VSI specific handlers */
@@ -716,6 +748,9 @@ struct i40e_q_vector {
bool arm_wb_state;
 #define ITR_COUNTDOWN_START 100
u8 itr_countdown;   /* when 0 should adjust ITR */
+
+   /* Following field(s) are specific to channel usage */
+   bool is_an_atq;
 } cacheline_internodealigned_in_smp;
 
 /* lan device */
@@ -972,4 +1007,5 @@ i40e_status i40e_get_npar_bw_setting(struct i40e_pf *pf);
 i40e_status i40e_set_npar_bw_setting(struct i40e_pf *pf);
 i40e_status i40e_commit_npar_bw_setting(struct i40e_pf *pf);
 void i40e_print_link_message(struct i40e_vsi *vsi, bool isup);
+int i40e_create_queue_channel(struct i40e_vsi *vsi, struct i40e_channel *ch);
 #endif /* _I40E_H_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c 
b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 8d1d3b85..e1bea45 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -2864,7 +2864,7 @@ static void i40e_config_xps_tx_ring(struct i40e_ring 
*ring)
struct i40e_vsi *vsi = ring->vsi;
cpumask_var_t mask;
 
-   if (!ring->q_vector || !ring->netdev)
+   if (!ring->q_vector || !ring->netdev || ring->ch)
return;
 
/* Single TC mode enable XPS */
@@ -2937,7 +2937,17 @@ static int i40e_configure_tx_ring(struct i40e_ring *ring)
 * initialization. This has to be done regardless of
 * DCB as by default everything is mapped to TC0.
 */
-   tx_ctx.rdylist = le16_to_cpu(vsi->info.qs_handle[ring->dcb_tc]);
+
+   if (ring->ch) {
+   tx_ctx.rdylist =
+   le16_to_cpu(ring->ch->info.qs_handle[ring->dcb_tc]);
+
+   dev_dbg(>back->pdev->dev, "ch, pf_q %d, rdylist %d\n",
+   pf_q, tx_ctx.rdylist);
+   } else {
+   tx_ctx.rdylist = le16_to_cpu(vsi->info.qs_handle[ring->dcb_tc]);
+   }
+
tx_ctx.rdylist_act = 0;
 
/* clear 

[PATCH 4/4] [next-queue]net: i40e: Add support to set max bandwidth rates for TCs offloaded via tc/mqprio

2017-05-19 Thread Amritha Nambiar
This patch enables setting up maximum Tx rates for the traffic
classes in i40e. The maximum rate offloaded to the hardware through
the mqprio framework is configured for the VSI. Configuring
minimum Tx rate limit is not supported in the device. The minimum
usable value for Tx rate is 50Mbps.

Example:
# tc qdisc add dev eth0 root mqprio num_tc 2  map 0 0 0 0 1 1 1 1\
  queues 4@0 4@4 min_rate 0Mbit 0Mbit max_rate 55Mbit 60Mbit hw 2

To dump the bandwidth rates:

# tc qdisc show dev eth0
qdisc mqprio 804a: root  tc 2 map 0 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0
 queues:(0:3) (4:7)
 min rates:0bit 0bit
 max rates:55Mbit 60Mbit

Signed-off-by: Amritha Nambiar 
Signed-off-by: Kiran Patil 
---
 drivers/net/ethernet/intel/i40e/i40e.h  |2 +
 drivers/net/ethernet/intel/i40e/i40e_main.c |  102 ++-
 2 files changed, 100 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e.h 
b/drivers/net/ethernet/intel/i40e/i40e.h
index a62f65a..83a060d 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -346,6 +346,8 @@ struct i40e_channel {
u8 enabled_tc;
struct i40e_aqc_vsi_properties_data info;
 
+   u32 max_tx_rate;
+
/* track this channel belongs to which VSI */
struct i40e_vsi *parent_vsi;
 };
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c 
b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 7f61d4f..3261dab 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -69,6 +69,8 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, 
bool lock_acquired);
 static void i40e_fdir_sb_setup(struct i40e_pf *pf);
 static int i40e_veb_get_bw_info(struct i40e_veb *veb);
 static int i40e_vsi_config_rss(struct i40e_vsi *vsi);
+static int i40e_set_bw_limit(struct i40e_vsi *vsi, u16 ch_seid,
+u32 max_tx_rate);
 
 /* i40e_pci_tbl - PCI Device ID Table
  *
@@ -5033,7 +5035,7 @@ static int i40e_vsi_configure_bw_alloc(struct i40e_vsi 
*vsi, u8 enabled_tc,
   u8 *bw_share)
 {
struct i40e_aqc_configure_vsi_tc_bw_data bw_data;
-   i40e_status ret;
+   i40e_status ret = 0;
int i;
 
bw_data.tc_valid_bits = enabled_tc;
@@ -5041,8 +5043,20 @@ static int i40e_vsi_configure_bw_alloc(struct i40e_vsi 
*vsi, u8 enabled_tc,
bw_data.tc_bw_credits[i] = bw_share[i];
 
if ((vsi->back->flags & I40E_FLAG_TC_MQPRIO) ||
-   !vsi->mqprio_qopt.qopt.hw)
-   return 0;
+   !vsi->mqprio_qopt.qopt.hw) {
+   if (vsi->mqprio_qopt.max_rate[0]) {
+   u32 max_tx_rate = vsi->mqprio_qopt.max_rate[0];
+
+   max_tx_rate = (max_tx_rate * 8) / 100;
+
+   ret = i40e_set_bw_limit(vsi, vsi->seid, max_tx_rate);
+   if (ret)
+   dev_err(>back->pdev->dev,
+   "Failed to set tx rate (%u Mbps) for 
vsi->seid %u, error code %d.\n",
+   max_tx_rate, vsi->seid, ret);
+   }
+   return ret;
+   }
 
ret = i40e_aq_config_vsi_tc_bw(>back->hw, vsi->seid, _data,
   NULL);
@@ -5297,6 +5311,71 @@ static void i40e_remove_queue_channel(struct i40e_vsi 
*vsi)
 }
 
 /**
+ * i40e_set_bw_limit - setup BW limit based on max_tx_rate
+ * @vsi: the VSI being setup
+ * @ch_seid: seid of the channel (VSI)
+ * @max_tx_rate: max TX rate to be configured as BW limit
+ *
+ * This function sets up BW limit for a given channel (ch_seid)
+ * based on max TX rate specified.
+ **/
+static int i40e_set_bw_limit(struct i40e_vsi *vsi, u16 ch_seid, u32 
max_tx_rate)
+{
+   struct i40e_pf *pf = vsi->back;
+   int speed = 0;
+   int ret = 0;
+
+   switch (pf->hw.phy.link_info.link_speed) {
+   case I40E_LINK_SPEED_40GB:
+   speed = 4;
+   break;
+   case I40E_LINK_SPEED_20GB:
+   speed = 2;
+   break;
+   case I40E_LINK_SPEED_10GB:
+   speed = 1;
+   break;
+   case I40E_LINK_SPEED_1GB:
+   speed = 1000;
+   break;
+   default:
+   break;
+   }
+
+   if (max_tx_rate > speed) {
+   dev_err(>pdev->dev,
+   "Invalid tx rate %d specified for channel seid %d.",
+   max_tx_rate, ch_seid);
+   return -EINVAL;
+   }
+
+   if ((max_tx_rate < 50) && (max_tx_rate > 0)) {
+   dev_warn(>pdev->dev,
+"Setting tx rate to minimum usable value of 
50Mbps.\n");
+   max_tx_rate = 50;
+   }
+
+#define I40E_BW_CREDIT_DIVISOR 50 /* 50Mbps per BW credit */
+#define 

[PATCH 1/4] [next-queue]net: mqprio: Introduce new hardware offload mode in mqprio for offloading full TC configurations

2017-05-19 Thread Amritha Nambiar
This patch introduces a new hardware offload mode in mqprio
which makes full use of the mqprio options, the TCs, the
queue configurations and the bandwidth rates for the TCs.
This is achieved by setting the value 2 for the "hw" option.
This new offload mode supports new attributes for traffic
class such as minimum and maximum values for bandwidth rate limits.

Introduces a new datastructure 'tc_mqprio_qopt_offload' for offloading
mqprio queue options and use this to be shared between the kernel and
device driver. This contains a copy of the exisiting datastructure
for mqprio queue options. This new datastructure can be extended when
adding new attributes for traffic class such as bandwidth rate limits. The
existing datastructure for mqprio queue options will be shared between the
kernel and userspace.

This patch enables configuring additional attributes associated
with a traffic class such as minimum and maximum bandwidth
rates and can be offloaded to the hardware in the new offload mode.
The min and max limits for bandwidth rates are provided
by the user along with the the TCs and the queue configurations
when creating the mqprio qdisc.

Example:
# tc qdisc add dev eth0 root mqprio num_tc 2  map 0 0 0 0 1 1 1 1\
  queues 4@0 4@4 min_rate 0Mbit 0Mbit max_rate 55Mbit 60Mbit hw 2

To dump the bandwidth rates:

# tc qdisc show dev eth0
qdisc mqprio 804a: root  tc 2 map 0 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0
 queues:(0:3) (4:7)
 min rates:0bit 0bit
 max rates:55Mbit 60Mbit

Signed-off-by: Amritha Nambiar 
---
 include/linux/netdevice.h  |2 
 include/net/pkt_cls.h  |7 ++
 include/uapi/linux/pkt_sched.h |   13 +++
 net/sched/sch_mqprio.c |  169 +---
 4 files changed, 180 insertions(+), 11 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 0150b2d..17b9066 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -779,6 +779,7 @@ enum {
TC_SETUP_CLSFLOWER,
TC_SETUP_MATCHALL,
TC_SETUP_CLSBPF,
+   TC_SETUP_MQPRIO_EXT,
 };
 
 struct tc_cls_u32_offload;
@@ -791,6 +792,7 @@ struct tc_to_netdev {
struct tc_cls_matchall_offload *cls_mall;
struct tc_cls_bpf_offload *cls_bpf;
struct tc_mqprio_qopt *mqprio;
+   struct tc_mqprio_qopt_offload *mqprio_qopt;
};
bool egress_dev;
 };
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 2c213a6..5ab8052 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -549,6 +549,13 @@ struct tc_cls_bpf_offload {
u32 gen_flags;
 };
 
+struct tc_mqprio_qopt_offload {
+   /* struct tc_mqprio_qopt must always be the first element */
+   struct tc_mqprio_qopt qopt;
+   u32 flags;
+   u64 min_rate[TC_QOPT_MAX_QUEUE];
+   u64 max_rate[TC_QOPT_MAX_QUEUE];
+};
 
 /* This structure holds cookie structure that is passed from user
  * to the kernel for actions and classifiers
diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index 099bf55..cf2a146 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -620,6 +620,7 @@ struct tc_drr_stats {
 enum {
TC_MQPRIO_HW_OFFLOAD_NONE,  /* no offload requested */
TC_MQPRIO_HW_OFFLOAD_TCS,   /* offload TCs, no queue counts */
+   TC_MQPRIO_HW_OFFLOAD,   /* fully supported offload */
__TC_MQPRIO_HW_OFFLOAD_MAX
 };
 
@@ -633,6 +634,18 @@ struct tc_mqprio_qopt {
__u16   offset[TC_QOPT_MAX_QUEUE];
 };
 
+#define TC_MQPRIO_F_MIN_RATE  0x1
+#define TC_MQPRIO_F_MAX_RATE  0x2
+
+enum {
+   TCA_MQPRIO_UNSPEC,
+   TCA_MQPRIO_MIN_RATE64,
+   TCA_MQPRIO_MAX_RATE64,
+   __TCA_MQPRIO_MAX,
+};
+
+#define TCA_MQPRIO_MAX (__TCA_MQPRIO_MAX - 1)
+
 /* SFB */
 
 enum {
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 0a4cf27..6457ec9 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -18,10 +18,13 @@
 #include 
 #include 
 #include 
+#include 
 
 struct mqprio_sched {
struct Qdisc**qdiscs;
int hw_offload;
+   u32 flags;
+   u64 min_rate[TC_QOPT_MAX_QUEUE], max_rate[TC_QOPT_MAX_QUEUE];
 };
 
 static void mqprio_destroy(struct Qdisc *sch)
@@ -39,10 +42,21 @@ static void mqprio_destroy(struct Qdisc *sch)
}
 
if (priv->hw_offload && dev->netdev_ops->ndo_setup_tc) {
-   struct tc_mqprio_qopt offload = { 0 };
-   struct tc_to_netdev tc = { .type = TC_SETUP_MQPRIO,
-  { .mqprio =  } };
+   struct tc_mqprio_qopt_offload offload = { 0 };
+   struct tc_to_netdev tc = { 0 };
 
+   switch (priv->hw_offload) {
+   case TC_MQPRIO_HW_OFFLOAD_TCS:
+   tc.type = TC_SETUP_MQPRIO;
+   tc.mqprio = 
+  

[PATCH 0/4] Configuring traffic classes via new hardware offload mechanism in tc/mqprio

2017-05-19 Thread Amritha Nambiar
The following series introduces a new harware offload mode in tc/mqprio where 
the TCs, the queue configurations and bandwidth rate limits are offloaded to 
the hardware.
The i40e driver enables the new mqprio hardware offload mechanism factoring the 
TCs, queue configuration and bandwidth rates by creating HW channel VSIs. 

In this mode, the priority to traffic class mapping and the user specified 
queue ranges are used to configure the traffic class when the 'hw' option is 
set to 2. This is achieved by creating HW channels(VSI). A new channel is 
created for each of the traffic class configuration offloaded via mqprio 
framework except for the first TC (TC0) which is for the main VSI. TC0 for the 
main VSI is also reconfigured as per user provided queue parameters. Finally, 
bandwidth rate limits are set on these traffic classes through the mqprio 
offload framework by sending these rates in addition to the number of TCs and 
the queue configurations.

Example:
# tc qdisc add dev eth0 root mqprio num_tc 2  map 0 0 0 0 1 1 1 1\
  queues 4@0 4@4 min_rate 0Mbit 0Mbit max_rate 55Mbit 60Mbit hw 2

To dump the bandwidth rates:

# tc qdisc show dev eth0
  qdisc mqprio 804a: root  tc 2 map 0 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0
   queues:(0:3) (4:7)
   min rates:0bit 0bit
   max rates:55Mbit 60Mbit

---

Amritha Nambiar (4):
  [next-queue]net: mqprio: Introduce new hardware offload mode in mqprio 
for offloading full TC configurations
  [next-queue]net: i40e: Add infrastructure for queue channel support with 
the TCs and queue configurations offloaded via mqprio scheduler
  [next-queue]net: i40e: Enable mqprio full offload mode in the i40e driver 
for configuring TCs and queue mapping
  [next-queue]net: i40e: Add support to set max bandwidth rates for TCs 
offloaded via tc/mqprio


 drivers/net/ethernet/intel/i40e/i40e.h |   42 +
 drivers/net/ethernet/intel/i40e/i40e_ethtool.c |6 
 drivers/net/ethernet/intel/i40e/i40e_main.c| 1365 +---
 drivers/net/ethernet/intel/i40e/i40e_txrx.h|2 
 include/linux/netdevice.h  |2 
 include/net/pkt_cls.h  |7 
 include/uapi/linux/pkt_sched.h |   13 
 net/sched/sch_mqprio.c |  169 +++
 8 files changed, 1449 insertions(+), 157 deletions(-)

--


Re: [PATCH v6 net-next 0/7] Extend socket timestamping API

2017-05-19 Thread Richard Cochran
On Fri, May 19, 2017 at 05:52:34PM +0200, Miroslav Lichvar wrote:
> Changes v5->v6:
> - fixed skb_is_swtx_tstamp() when OPT_TX_SWHW is disabled and improved
>   its description
> - improved OPT_PKTINFO documentation
> - improved scm_timestamping documentation

For the series:

Acked-by: Richard Cochran 


Re: [PATCH net-next v2] bridge: fix hello and hold timers starting/stopping

2017-05-19 Thread Nikolay Aleksandrov

On 5/19/17 8:30 PM, Ivan Vecera wrote:

Current bridge code incorrectly handles starting/stopping of hello and
hold timers during STP enable/disable.

1. Timers are stopped in br_stp_start() during NO_STP->USER_STP
transition. The timers are already stopped in NO_STP state so
this is confusing no-op.

2. During USER_STP->NO_STP transition the timers are started. This
does not make sense and is confusion because the timer should not be
active in NO_STP state.

Cc: da...@davemloft.net
Cc: sas...@cumulusnetworks.com
Cc: step...@networkplumber.org
Cc: bri...@lists.linux-foundation.org
Cc: lucien@gmail.com
Cc: niko...@cumulusnetworks.com
Signed-off-by: Ivan Vecera 
---
  net/bridge/br_stp_if.c | 11 ---
  1 file changed, 11 deletions(-)



LGTM, thanks!

Acked-by: Nikolay Aleksandrov 




Alignment in BPF verifier

2017-05-19 Thread Edward Cree
Well, I've managed to get somewhat confused by reg->id.
In particular, I'm unsure which bpf_reg_types can have an id, and what
 exactly it means.  There seems to be some code that checks around map value
 pointers, which seems strange as maps have fixed sizes (and the comments in
 enum bpf_reg_type make it seem like id is a PTR_TO_PACKET thing) - is this
 maybe because of map-of-maps support, can the contained maps have differing
 element sizes?  Or do we allow *(map_value + var + imm), if map_value + var
 was appropriately bounds-checked?

Does the 'id' identify the variable that was added to an object pointer, or
 the object itself?  Or does it blur these and identify (what the comment in
 enum bpf_reg_type calls) "skb->data + (u16) var"?

Here's what I'm thinking of doing:
struct bpf_reg_state {
enum bpf_reg_type type;
union {
/* valid when type == PTR_TO_PACKET */
u16 range;

/* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE |
 *   PTR_TO_MAP_VALUE_OR_NULL
 */
struct bpf_map *map_ptr;
};
/* Used to find other pointers with the same variable base, so they
 * can share range and align knowledge.
 */
u32 id;
u32 off; /* fixed part of pointer offset */
/* For scalar types (CONST_IMM | UNKNOWN_VALUE), this represents our
 * knowledge of the actual value.
 * For pointer types, this represents the variable part of the offset
 * from the pointed-to object, and is shared with all bpf_reg_states
 * with the same id as us.
 */
struct tnum align;
/* Used to determine if any memory access using this register will
 * result in a bad access. These two fields must be last.
 * See states_equal()
 * These refer to the same value as align, not necessarily the actual
 * contents of the register.
 */
s64 min_value;
u64 max_value;
};

Does that sound reasonable?  (And does my added comment on min/max_value
 accurately describe the current semantics, or will I need to change that
 as well?)

-Ed

PS. I think this approach would also mean several of the bpf_reg_types can
 be folded together:
* PTR_TO_MAP_VALUE and PTR_TO_MAP_VALUE_ADJ are the same
* FRAME_PTR is just a PTR_TO_STACK with known-zero offset
* CONST_IMM is similarly a special case of UNKNOWN_VALUE


Darlehen angebot 3 %

2017-05-19 Thread Frau SCHMIDT


Sehr geehrte Damen  und Herren,

Haben Sie Interesse über einer finanziellen Darlehen zu 3%?
kontaktieren Sie mich für mehr Details und Bedingungen. ich kann all
jenen helfen, wer ein Darlehen benötigen.
Ich kann Ihnen biete ein darlehen in hohe von 10.000.000 EUR
Meine mail: info@rschmidt.online

Mit freundlichen Grüßen

Frau SCHMIDT


Re: [PATCH net] bonding: fix randomly populated arp target array

2017-05-19 Thread Andy Gospodarek
On Fri, May 19, 2017 at 02:46:46PM -0400, Jarod Wilson wrote:
> In commit dc9c4d0fe023, the arp_target array moved from a static global
> to a local variable. By the nature of static globals, the array used to
> be initialized to all 0. At present, it's full of random data, which
> that gets interpreted as arp_target values, when none have actually been
> specified. Systems end up booting with spew along these lines:
> 
> [   32.161783] IPv6: ADDRCONF(NETDEV_UP): lacp0: link is not ready
> [   32.168475] IPv6: ADDRCONF(NETDEV_UP): lacp0: link is not ready
> [   32.175089] 8021q: adding VLAN 0 to HW filter on device lacp0
> [   32.193091] IPv6: ADDRCONF(NETDEV_UP): lacp0: link is not ready
> [   32.204892] lacp0: Setting MII monitoring interval to 100
> [   32.211071] lacp0: Removing ARP target 216.124.228.17
> [   32.216824] lacp0: Removing ARP target 218.160.255.255
> [   32.222646] lacp0: Removing ARP target 185.170.136.184
> [   32.228496] lacp0: invalid ARP target 255.255.255.255 specified for removal
> [   32.236294] lacp0: option arp_ip_target: invalid value (-255.255.255.255)
> [   32.243987] lacp0: Removing ARP target 56.125.228.17
> [   32.249625] lacp0: Removing ARP target 218.160.255.255
> [   32.255432] lacp0: Removing ARP target 15.157.233.184
> [   32.261165] lacp0: invalid ARP target 255.255.255.255 specified for removal
> [   32.268939] lacp0: option arp_ip_target: invalid value (-255.255.255.255)
> [   32.276632] lacp0: Removing ARP target 16.0.0.0
> [   32.281755] lacp0: Removing ARP target 218.160.255.255
> [   32.287567] lacp0: Removing ARP target 72.125.228.17
> [   32.293165] lacp0: Removing ARP target 218.160.255.255
> [   32.298970] lacp0: Removing ARP target 8.125.228.17
> [   32.304458] lacp0: Removing ARP target 218.160.255.255
> 
> None of these were actually specified as ARP targets, and the driver does
> seem to clean up the mess okay, but it's rather noisy and confusing, leaks
> values to userspace, and the 255.255.255.255 spew shows up even when debug
> prints are disabled.
> 
> The fix: just zero out arp_target at init time.
> 
> While we're in here, init arp_all_targets_value in the right place.
> 

Looks good.  Thanks, Jarod!

Acked-by: Andy Gospodarek 

> Fixes: dc9c4d0fe023 ("bonding: reduce scope of some global variables")
> CC: Mahesh Bandewar 
> CC: Jay Vosburgh 
> CC: Veaceslav Falico 
> CC: Andy Gospodarek 
> CC: netdev@vger.kernel.org
> CC: sta...@vger.kernel.org
> Signed-off-by: Jarod Wilson 
> ---
>  drivers/net/bonding/bond_main.c | 5 ++---
>  1 file changed, 2 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
> index 2be78807fd6e..73313318399c 100644
> --- a/drivers/net/bonding/bond_main.c
> +++ b/drivers/net/bonding/bond_main.c
> @@ -4271,10 +4271,10 @@ static int bond_check_params(struct bond_params 
> *params)
>   int arp_validate_value, fail_over_mac_value, primary_reselect_value, i;
>   struct bond_opt_value newval;
>   const struct bond_opt_value *valptr;
> - int arp_all_targets_value;
> + int arp_all_targets_value = 0;
>   u16 ad_actor_sys_prio = 0;
>   u16 ad_user_port_key = 0;
> - __be32 arp_target[BOND_MAX_ARP_TARGETS];
> + __be32 arp_target[BOND_MAX_ARP_TARGETS] = { 0 };
>   int arp_ip_count;
>   int bond_mode   = BOND_MODE_ROUNDROBIN;
>   int xmit_hashtype = BOND_XMIT_POLICY_LAYER2;
> @@ -4501,7 +4501,6 @@ static int bond_check_params(struct bond_params *params)
>   arp_validate_value = 0;
>   }
>  
> - arp_all_targets_value = 0;
>   if (arp_all_targets) {
>   bond_opt_initstr(, arp_all_targets);
>   valptr = bond_opt_parse(bond_opt_get(BOND_OPT_ARP_ALL_TARGETS),
> -- 
> 2.12.1
> 


Re: [PATCH net] fix BUG: scheduling while atomic in netlink broadcast

2017-05-19 Thread Akshay Narayan
> I don't want to defend the use of yield() but it looks like there is other
> problem.

I believe this use of yield() should be replaced with cond_resched()
even if it turns out there is an unrelated problem.

> Does this module call netlink_broadcast() with __GFP_DIRECT_RECLAIM
> in IRQ context? If so you should adjust the gfp flags.

The module only calls netlink_broadcast() from a pluggable TCP
function; from my understanding this is not in the IRQ context. Full
trace, perhaps more clear, attached below.

May 19 14:30:44 ccp kernel: [  178.885546] BUG: scheduling while
atomic: mm-link/3105/0x0200
May 19 14:30:44 ccp kernel: [  178.885552] Modules linked in:
ipt_MASQUERADE nf_nat_masquerade_ipv4 iptable_nat nf_conntrack_ipv4
nf_defrag_ipv4 nf_nat_ipv4 nf_nat libcrc32c xt_connmark nf_conntrack
ccp(OE) crct10dif_pclmul crc32_pclmul
 ghash_clmulni_intel snd_intel8x0 pcbc snd_ac97_codec joydev ac97_bus
snd_pcm snd_seq_midi snd_seq_midi_event snd_rawmidi aesni_intel
snd_seq aes_x86_64 crypto_simd snd_seq_device snd_timer snd input_leds
i2c_piix4 glue_helper cryptd so
undcore mac_hid serio_raw vboxvideo ttm drm_kms_helper drm fb_sys_fops
syscopyarea sysfillrect sysimgblt vboxguest intel_rapl_perf parport_pc
ppdev lp parport ip_tables x_tables autofs4 hid_generic usbhid hid
e1000 ahci libahci psmouse
fjes pata_acpi video
May 19 14:30:44 ccp kernel: [  178.885665] CPU: 0 PID: 3105 Comm:
mm-link Tainted: GW  OE   4.10.0-21-generic #23-Ubuntu
May 19 14:30:44 ccp kernel: [  178.885666] Hardware name: innotek GmbH
VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
May 19 14:30:44 ccp kernel: [  178.885667] Call Trace:
May 19 14:30:44 ccp kernel: [  178.885674]  dump_stack+0x63/0x81
May 19 14:30:44 ccp kernel: [  178.885678]  __schedule_bug+0x54/0x70
May 19 14:30:44 ccp kernel: [  178.885682]  __schedule+0x536/0x6f0
May 19 14:30:44 ccp kernel: [  178.885685]  schedule+0x36/0x80
May 19 14:30:44 ccp kernel: [  178.885687]  sys_sched_yield+0x4f/0x60
May 19 14:30:44 ccp kernel: [  178.885688]  yield+0x33/0x40
May 19 14:30:44 ccp kernel: [  178.885691]
netlink_broadcast_filtered+0x29b/0x3c0
May 19 14:30:44 ccp kernel: [  178.885692]  netlink_broadcast+0x1d/0x20
May 19 14:30:44 ccp kernel: [  178.885697]  nl_sendmsg+0xb8/0x664 [ccp]
May 19 14:30:44 ccp kernel: [  178.885699]  nl_send_ack_notif+0x7d/0x90 [ccp]
May 19 14:30:44 ccp kernel: [  178.885702]  tcp_ccp_cong_avoid+0x69/0x70 [ccp]
May 19 14:30:44 ccp kernel: [  178.885704]  tcp_ack+0x980/0xa60
May 19 14:30:44 ccp kernel: [  178.885708]  tcp_rcv_state_process+0x2be/0xda0
May 19 14:30:44 ccp kernel: [  178.885712]  ? security_sock_rcv_skb+0x3b/0x50
May 19 14:30:44 ccp kernel: [  178.885715]  ? sk_filter_trim_cap+0x3b/0x270
May 19 14:30:44 ccp kernel: [  178.885717]  tcp_v4_do_rcv+0xb2/0x200
May 19 14:30:44 ccp kernel: [  178.885719]  tcp_v4_rcv+0x90a/0xa00
May 19 14:30:44 ccp kernel: [  178.885722]  ip_local_deliver_finish+0x96/0x1c0
May 19 14:30:44 ccp kernel: [  178.885725]  ip_local_deliver+0x6f/0xe0
May 19 14:30:44 ccp kernel: [  178.885727]  ? ip_rcv_finish+0x3f0/0x3f0
May 19 14:30:44 ccp kernel: [  178.885730]  ip_rcv_finish+0x118/0x3f0
May 19 14:30:44 ccp kernel: [  178.885732]  ip_rcv+0x282/0x390
May 19 14:30:44 ccp kernel: [  178.885735]  ? inet_del_offload+0x40/0x40
May 19 14:30:44 ccp kernel: [  178.885737]  __netif_receive_skb_core+0x514/0xa40
May 19 14:30:44 ccp kernel: [  178.885740]  ? __check_object_size+0x10/0x1d7
May 19 14:30:44 ccp kernel: [  178.885742]  __netif_receive_skb+0x18/0x60
May 19 14:30:44 ccp kernel: [  178.885744]  netif_receive_skb_internal+0x32/0xa0
May 19 14:30:44 ccp kernel: [  178.885746]  netif_receive_skb+0x1c/0x70
May 19 14:30:44 ccp kernel: [  178.885749]  tun_get_user+0x425/0x800
May 19 14:30:44 ccp kernel: [  178.885751]  tun_chr_write_iter+0x57/0x70
May 19 14:30:44 ccp kernel: [  178.885752]  new_sync_write+0xd5/0x130
May 19 14:30:44 ccp kernel: [  178.885754]  __vfs_write+0x26/0x40
May 19 14:30:44 ccp kernel: [  178.885756]  vfs_write+0xb5/0x1a0
May 19 14:30:44 ccp kernel: [  178.885757]  SyS_write+0x55/0xc0
May 19 14:30:44 ccp kernel: [  178.885760]  entry_SYSCALL_64_fastpath+0x1e/0xad
May 19 14:30:44 ccp kernel: [  178.885762] RIP: 0033:0x7f8d9abbf670
May 19 14:30:44 ccp kernel: [  178.885763] RSP: 002b:7ffc2f16d8b8
EFLAGS: 0246 ORIG_RAX: 0001
May 19 14:30:44 ccp kernel: [  178.885765] RAX: ffda RBX:
7ffc2f16dde0 RCX: 7f8d9abbf670
May 19 14:30:44 ccp kernel: [  178.885767] RDX: 0038 RSI:
557403fd86e0 RDI: 0008
May 19 14:30:44 ccp kernel: [  178.885768] RBP: 7ffc2f16dcd0 R08:
557403fd8d40 R09: 557403fd8690
May 19 14:30:44 ccp kernel: [  178.885769] R10: 000e20d41e89a5c5 R11:
0246 R12: 0108
May 19 14:30:44 ccp kernel: [  178.885770] R13: 145c R14:
7ffc2f16dd98 R15: 0003


[PATCH net] bonding: fix randomly populated arp target array

2017-05-19 Thread Jarod Wilson
In commit dc9c4d0fe023, the arp_target array moved from a static global
to a local variable. By the nature of static globals, the array used to
be initialized to all 0. At present, it's full of random data, which
that gets interpreted as arp_target values, when none have actually been
specified. Systems end up booting with spew along these lines:

[   32.161783] IPv6: ADDRCONF(NETDEV_UP): lacp0: link is not ready
[   32.168475] IPv6: ADDRCONF(NETDEV_UP): lacp0: link is not ready
[   32.175089] 8021q: adding VLAN 0 to HW filter on device lacp0
[   32.193091] IPv6: ADDRCONF(NETDEV_UP): lacp0: link is not ready
[   32.204892] lacp0: Setting MII monitoring interval to 100
[   32.211071] lacp0: Removing ARP target 216.124.228.17
[   32.216824] lacp0: Removing ARP target 218.160.255.255
[   32.222646] lacp0: Removing ARP target 185.170.136.184
[   32.228496] lacp0: invalid ARP target 255.255.255.255 specified for removal
[   32.236294] lacp0: option arp_ip_target: invalid value (-255.255.255.255)
[   32.243987] lacp0: Removing ARP target 56.125.228.17
[   32.249625] lacp0: Removing ARP target 218.160.255.255
[   32.255432] lacp0: Removing ARP target 15.157.233.184
[   32.261165] lacp0: invalid ARP target 255.255.255.255 specified for removal
[   32.268939] lacp0: option arp_ip_target: invalid value (-255.255.255.255)
[   32.276632] lacp0: Removing ARP target 16.0.0.0
[   32.281755] lacp0: Removing ARP target 218.160.255.255
[   32.287567] lacp0: Removing ARP target 72.125.228.17
[   32.293165] lacp0: Removing ARP target 218.160.255.255
[   32.298970] lacp0: Removing ARP target 8.125.228.17
[   32.304458] lacp0: Removing ARP target 218.160.255.255

None of these were actually specified as ARP targets, and the driver does
seem to clean up the mess okay, but it's rather noisy and confusing, leaks
values to userspace, and the 255.255.255.255 spew shows up even when debug
prints are disabled.

The fix: just zero out arp_target at init time.

While we're in here, init arp_all_targets_value in the right place.

Fixes: dc9c4d0fe023 ("bonding: reduce scope of some global variables")
CC: Mahesh Bandewar 
CC: Jay Vosburgh 
CC: Veaceslav Falico 
CC: Andy Gospodarek 
CC: netdev@vger.kernel.org
CC: sta...@vger.kernel.org
Signed-off-by: Jarod Wilson 
---
 drivers/net/bonding/bond_main.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 2be78807fd6e..73313318399c 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -4271,10 +4271,10 @@ static int bond_check_params(struct bond_params *params)
int arp_validate_value, fail_over_mac_value, primary_reselect_value, i;
struct bond_opt_value newval;
const struct bond_opt_value *valptr;
-   int arp_all_targets_value;
+   int arp_all_targets_value = 0;
u16 ad_actor_sys_prio = 0;
u16 ad_user_port_key = 0;
-   __be32 arp_target[BOND_MAX_ARP_TARGETS];
+   __be32 arp_target[BOND_MAX_ARP_TARGETS] = { 0 };
int arp_ip_count;
int bond_mode   = BOND_MODE_ROUNDROBIN;
int xmit_hashtype = BOND_XMIT_POLICY_LAYER2;
@@ -4501,7 +4501,6 @@ static int bond_check_params(struct bond_params *params)
arp_validate_value = 0;
}
 
-   arp_all_targets_value = 0;
if (arp_all_targets) {
bond_opt_initstr(, arp_all_targets);
valptr = bond_opt_parse(bond_opt_get(BOND_OPT_ARP_ALL_TARGETS),
-- 
2.12.1



Re: [PATCH net] sctp: fix ICMP processing if skb is non-linear

2017-05-19 Thread Xin Long
On Fri, May 19, 2017 at 11:34 PM, Davide Caratti  wrote:
> when the ICMP packet is carried by a paged skb, sctp_err_lookup() may fail
> validation even if the payload contents match an open socket: as a
> consequence, sometimes ICMPs are wrongly ignored. Use skb_header_pointer()
> to retrieve encapsulated SCTP headers, to ensure that ICMP payloads are
> validated correctly, also when skbs are non-linear.
>
> Signed-off-by: Davide Caratti 
> ---
>  include/net/sctp/sctp.h |  2 +-
>  net/sctp/input.c| 29 +++--
>  net/sctp/ipv6.c |  2 +-
>  3 files changed, 21 insertions(+), 12 deletions(-)
>
> diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
> index 069582e..1b8c16b 100644
> --- a/include/net/sctp/sctp.h
> +++ b/include/net/sctp/sctp.h
> @@ -152,7 +152,7 @@ void sctp_v4_err(struct sk_buff *skb, u32 info);
>  void sctp_hash_endpoint(struct sctp_endpoint *);
>  void sctp_unhash_endpoint(struct sctp_endpoint *);
>  struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *,
> -struct sctphdr *, struct sctp_association **,
> +struct sctp_association **,
>  struct sctp_transport **);
>  void sctp_err_finish(struct sock *, struct sctp_transport *);
>  void sctp_icmp_frag_needed(struct sock *, struct sctp_association *,
> diff --git a/net/sctp/input.c b/net/sctp/input.c
> index 0e06a27..7f3f983 100644
> --- a/net/sctp/input.c
> +++ b/net/sctp/input.c
> @@ -469,19 +469,19 @@ void sctp_icmp_proto_unreachable(struct sock *sk,
>
>  /* Common lookup code for icmp/icmpv6 error handler. */
>  struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff 
> *skb,
> -struct sctphdr *sctphdr,
>  struct sctp_association **app,
>  struct sctp_transport **tpp)
>  {
> +   struct sctp_init_chunk _chunkhdr, *chunkhdr;
> +   struct sctphdr _sctphdr, *sctphdr;
> union sctp_addr saddr;
> union sctp_addr daddr;
> struct sctp_af *af;
> struct sock *sk = NULL;
> struct sctp_association *asoc;
> struct sctp_transport *transport = NULL;
> -   struct sctp_init_chunk *chunkhdr;
> -   __u32 vtag = ntohl(sctphdr->vtag);
> -   int len = skb->len - ((void *)sctphdr - (void *)skb->data);
> +   int offset;
> +   __u32 vtag;
>
> *app = NULL; *tpp = NULL;
>
> @@ -515,14 +515,23 @@ struct sock *sctp_err_lookup(struct net *net, int 
> family, struct sk_buff *skb,
>  * or the chunk type or the Initiate Tag does not match, silently
>  * discard the packet.
>  */
> +   offset = skb_transport_offset(skb);
> +   sctphdr = skb_header_pointer(skb, offset, sizeof(_sctphdr), 
> &_sctphdr);
> +   if (unlikely(!sctphdr))
> +   goto out;
> +
> +   vtag = ntohl(sctphdr->vtag);
> if (vtag == 0) {
> -   chunkhdr = (void *)sctphdr + sizeof(struct sctphdr);
> -   if (len < sizeof(struct sctphdr) + sizeof(sctp_chunkhdr_t)
> - + sizeof(__be32) ||
> +   offset += sizeof(_sctphdr);
will be nice to delete this line, and use
> +   /* chunk header + first 4 octects of init header */
> +   chunkhdr = skb_header_pointer(skb, offset,
chunkhdr = skb_header_pointer(skb, offset + sizeof(_sctphdr), ;)
wdyt?

> + sizeof(struct sctp_chunkhdr) +
> + sizeof(__be32), &_chunkhdr);
> +   if (!chunkhdr ||
> chunkhdr->chunk_hdr.type != SCTP_CID_INIT ||
> -   ntohl(chunkhdr->init_hdr.init_tag) != asoc->c.my_vtag) {
> +   ntohl(chunkhdr->init_hdr.init_tag) != asoc->c.my_vtag)
> goto out;
> -   }
> +
> } else if (vtag != asoc->c.peer_vtag) {
> goto out;
> }
> @@ -585,7 +594,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info)
> savesctp = skb->transport_header;
> skb_reset_network_header(skb);
> skb_set_transport_header(skb, ihlen);
> -   sk = sctp_err_lookup(net, AF_INET, skb, sctp_hdr(skb), , 
> );
> +   sk = sctp_err_lookup(net, AF_INET, skb, , );
> /* Put back, the original values. */
> skb->network_header = saveip;
> skb->transport_header = savesctp;
> diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
> index 142b70e..d72c8d5 100644
> --- a/net/sctp/ipv6.c
> +++ b/net/sctp/ipv6.c
> @@ -157,7 +157,7 @@ static void sctp_v6_err(struct sk_buff *skb, struct 
> inet6_skb_parm *opt,
> savesctp = skb->transport_header;
> skb_reset_network_header(skb);
> skb_set_transport_header(skb, offset);
> -   sk = sctp_err_lookup(net, AF_INET6, skb, sctp_hdr(skb), , 
> );
> +   sk = 

Re: [PATCH] e1000e: use disable_hardirq() also for MSIX vectors in e1000_netpoll()

2017-05-19 Thread Cong Wang
On Fri, May 19, 2017 at 12:18 AM, Konstantin Khlebnikov
 wrote:
> Replace disable_irq() which waits for threaded irq handlers with
> disable_hardirq() which waits only for hardirq part.
>
> Signed-off-by: Konstantin Khlebnikov 
> Fixes: 311191297125 ("e1000: use disable_hardirq() for e1000_netpoll()")

Thomas had a similar patch, I don't know why he never sends it
out formally. Anyway,

Acked-by: Cong Wang 


[Patch net] vsock: use new wait API for vsock_stream_sendmsg()

2017-05-19 Thread Cong Wang
As reported by Michal, vsock_stream_sendmsg() could still
sleep at vsock_stream_has_space() after prepare_to_wait():

  vsock_stream_has_space
vmci_transport_stream_has_space
  vmci_qpair_produce_free_space
qp_lock
  qp_acquire_queue_mutex
mutex_lock

Just switch to the new wait API like we did for commit
d9dc8b0f8b4e ("net: fix sleeping for sk_wait_event()").

Reported-by: Michal Kubecek 
Cc: Stefan Hajnoczi 
Cc: Jorgen Hansen 
Cc: "Michael S. Tsirkin" 
Cc: Claudio Imbrenda 
Signed-off-by: Cong Wang 
---
 net/vmw_vsock/af_vsock.c | 21 -
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 6f7f675..dfc8c51e 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1540,8 +1540,7 @@ static int vsock_stream_sendmsg(struct socket *sock, 
struct msghdr *msg,
long timeout;
int err;
struct vsock_transport_send_notify_data send_data;
-
-   DEFINE_WAIT(wait);
+   DEFINE_WAIT_FUNC(wait, woken_wake_function);
 
sk = sock->sk;
vsk = vsock_sk(sk);
@@ -1584,11 +1583,10 @@ static int vsock_stream_sendmsg(struct socket *sock, 
struct msghdr *msg,
if (err < 0)
goto out;
 
-
while (total_written < len) {
ssize_t written;
 
-   prepare_to_wait(sk_sleep(sk), , TASK_INTERRUPTIBLE);
+   add_wait_queue(sk_sleep(sk), );
while (vsock_stream_has_space(vsk) == 0 &&
   sk->sk_err == 0 &&
   !(sk->sk_shutdown & SEND_SHUTDOWN) &&
@@ -1597,33 +1595,30 @@ static int vsock_stream_sendmsg(struct socket *sock, 
struct msghdr *msg,
/* Don't wait for non-blocking sockets. */
if (timeout == 0) {
err = -EAGAIN;
-   finish_wait(sk_sleep(sk), );
+   remove_wait_queue(sk_sleep(sk), );
goto out_err;
}
 
err = transport->notify_send_pre_block(vsk, _data);
if (err < 0) {
-   finish_wait(sk_sleep(sk), );
+   remove_wait_queue(sk_sleep(sk), );
goto out_err;
}
 
release_sock(sk);
-   timeout = schedule_timeout(timeout);
+   timeout = wait_woken(, TASK_INTERRUPTIBLE, 
timeout);
lock_sock(sk);
if (signal_pending(current)) {
err = sock_intr_errno(timeout);
-   finish_wait(sk_sleep(sk), );
+   remove_wait_queue(sk_sleep(sk), );
goto out_err;
} else if (timeout == 0) {
err = -EAGAIN;
-   finish_wait(sk_sleep(sk), );
+   remove_wait_queue(sk_sleep(sk), );
goto out_err;
}
-
-   prepare_to_wait(sk_sleep(sk), ,
-   TASK_INTERRUPTIBLE);
}
-   finish_wait(sk_sleep(sk), );
+   remove_wait_queue(sk_sleep(sk), );
 
/* These checks occur both as part of and after the loop
 * conditional since we need to check before and after
-- 
2.5.5



Re: [PATCH net] fix BUG: scheduling while atomic in netlink broadcast

2017-05-19 Thread Cong Wang
On Fri, May 19, 2017 at 10:22 AM, Akshay Narayan  wrote:
> netlink_broadcast_filtered() calls yield() when a slow listener causes
> the buffer to fill. yield() is the wrong choice here, as pointed out by
> Commit 8e3fabfde4 (sched: Update yield() docs); in some cases, its use causes
> "BUG: scheduling while atomic" and, when fewer cores are available,
> kernel hangs:


I don't want to defend the use of yield() but it looks like there is other
problem.

>
> (note: "ccp" is a kernel module which multicasts netlink messages upon
> certain TCP events)

Does this module call netlink_broadcast() with __GFP_DIRECT_RECLAIM
in IRQ context? If so you should adjust the gfp flags.


Re: [PATCH net-next v2] bridge: fix hello and hold timers starting/stopping

2017-05-19 Thread Xin Long
On Sat, May 20, 2017 at 1:30 AM, Ivan Vecera  wrote:
> Current bridge code incorrectly handles starting/stopping of hello and
> hold timers during STP enable/disable.
>
> 1. Timers are stopped in br_stp_start() during NO_STP->USER_STP
>transition. The timers are already stopped in NO_STP state so
>this is confusing no-op.
>
> 2. During USER_STP->NO_STP transition the timers are started. This
>does not make sense and is confusion because the timer should not be
>active in NO_STP state.
>
> Cc: da...@davemloft.net
> Cc: sas...@cumulusnetworks.com
> Cc: step...@networkplumber.org
> Cc: bri...@lists.linux-foundation.org
> Cc: lucien@gmail.com
> Cc: niko...@cumulusnetworks.com
> Signed-off-by: Ivan Vecera 
Reviewed-by: Xin Long 

> ---
>  net/bridge/br_stp_if.c | 11 ---
>  1 file changed, 11 deletions(-)
>
> diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
> index 08341d2aa9c9..a05027027513 100644
> --- a/net/bridge/br_stp_if.c
> +++ b/net/bridge/br_stp_if.c
> @@ -150,7 +150,6 @@ static int br_stp_call_user(struct net_bridge *br, char 
> *arg)
>
>  static void br_stp_start(struct net_bridge *br)
>  {
> -   struct net_bridge_port *p;
> int err = -ENOENT;
>
> if (net_eq(dev_net(br->dev), _net))
> @@ -169,11 +168,6 @@ static void br_stp_start(struct net_bridge *br)
> if (!err) {
> br->stp_enabled = BR_USER_STP;
> br_debug(br, "userspace STP started\n");
> -
> -   /* Stop hello and hold timers */
> -   del_timer(>hello_timer);
> -   list_for_each_entry(p, >port_list, list)
> -   del_timer(>hold_timer);
> } else {
> br->stp_enabled = BR_KERNEL_STP;
> br_debug(br, "using kernel STP\n");
> @@ -187,7 +181,6 @@ static void br_stp_start(struct net_bridge *br)
>
>  static void br_stp_stop(struct net_bridge *br)
>  {
> -   struct net_bridge_port *p;
> int err;
>
> if (br->stp_enabled == BR_USER_STP) {
> @@ -196,10 +189,6 @@ static void br_stp_stop(struct net_bridge *br)
> br_err(br, "failed to stop userspace STP (%d)\n", 
> err);
>
> /* To start timers on any ports left in blocking */
> -   mod_timer(>hello_timer, jiffies + br->hello_time);
> -   list_for_each_entry(p, >port_list, list)
> -   mod_timer(>hold_timer,
> - round_jiffies(jiffies + BR_HOLD_TIME));
> spin_lock_bh(>lock);
> br_port_state_selection(br);
> spin_unlock_bh(>lock);
> --
> 2.13.0
>


  1   2   3   >