[PATCH net-next 6/6] net: do not block BH while processing socket backlog

2016-04-27 Thread Eric Dumazet
Socket backlog processing is a major latency source.

With current TCP socket sk_rcvbuf limits, I have sampled __release_sock()
holding cpu for more than 5 ms, and packets being dropped by the NIC
once ring buffer is filled.

All users are now ready to be called from process context,
we can unblock BH and let interrupts be serviced faster.

cond_resched_softirq() could be removed, as it has no more user.

Signed-off-by: Eric Dumazet 
---
 net/core/sock.c | 22 --
 1 file changed, 8 insertions(+), 14 deletions(-)

diff --git a/net/core/sock.c b/net/core/sock.c
index e16a5db853c6..70744dbb6c3f 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2019,33 +2019,27 @@ static void __release_sock(struct sock *sk)
__releases(>sk_lock.slock)
__acquires(>sk_lock.slock)
 {
-   struct sk_buff *skb = sk->sk_backlog.head;
+   struct sk_buff *skb, *next;
 
-   do {
+   while ((skb = sk->sk_backlog.head) != NULL) {
sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
-   bh_unlock_sock(sk);
 
-   do {
-   struct sk_buff *next = skb->next;
+   spin_unlock_bh(>sk_lock.slock);
 
+   do {
+   next = skb->next;
prefetch(next);
WARN_ON_ONCE(skb_dst_is_noref(skb));
skb->next = NULL;
sk_backlog_rcv(sk, skb);
 
-   /*
-* We are in process context here with softirqs
-* disabled, use cond_resched_softirq() to preempt.
-* This is safe to do because we've taken the backlog
-* queue private:
-*/
-   cond_resched_softirq();
+   cond_resched();
 
skb = next;
} while (skb != NULL);
 
-   bh_lock_sock(sk);
-   } while ((skb = sk->sk_backlog.head) != NULL);
+   spin_lock_bh(>sk_lock.slock);
+   }
 
/*
 * Doing the zeroing here guarantee we can not loop forever
-- 
2.8.0.rc3.226.g39d4020



[PATCH net-next 2/6] tcp: do not block bh during prequeue processing

2016-04-27 Thread Eric Dumazet
AFAIK, nothing in current TCP stack absolutely wants BH
being disabled once socket is owned by a thread running in
process context.

As mentioned in my prior patch ("tcp: give prequeue mode some care"),
processing a batch of packets might take time, better not block BH
at all.

Signed-off-by: Eric Dumazet 
---
 net/ipv4/tcp.c   |  4 
 net/ipv4/tcp_input.c | 30 ++
 2 files changed, 2 insertions(+), 32 deletions(-)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 7f51389814e6..f8856b76f941 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1445,12 +1445,8 @@ static void tcp_prequeue_process(struct sock *sk)
 
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPREQUEUED);
 
-   /* RX process wants to run with disabled BHs, though it is not
-* necessary */
-   local_bh_disable();
while ((skb = __skb_dequeue(>ucopy.prequeue)) != NULL)
sk_backlog_rcv(sk, skb);
-   local_bh_enable();
 
/* Clear memory counter. */
tp->ucopy.memory = 0;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 0eb31df8edfa..44e0f9f15f32 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4608,14 +4608,12 @@ static void tcp_data_queue(struct sock *sk, struct 
sk_buff *skb)
 
__set_current_state(TASK_RUNNING);
 
-   local_bh_enable();
if (!skb_copy_datagram_msg(skb, 0, tp->ucopy.msg, 
chunk)) {
tp->ucopy.len -= chunk;
tp->copied_seq += chunk;
eaten = (chunk == skb->len);
tcp_rcv_space_adjust(sk);
}
-   local_bh_disable();
}
 
if (eaten <= 0) {
@@ -5131,7 +5129,6 @@ static int tcp_copy_to_iovec(struct sock *sk, struct 
sk_buff *skb, int hlen)
int chunk = skb->len - hlen;
int err;
 
-   local_bh_enable();
if (skb_csum_unnecessary(skb))
err = skb_copy_datagram_msg(skb, hlen, tp->ucopy.msg, chunk);
else
@@ -5143,32 +5140,9 @@ static int tcp_copy_to_iovec(struct sock *sk, struct 
sk_buff *skb, int hlen)
tcp_rcv_space_adjust(sk);
}
 
-   local_bh_disable();
return err;
 }
 
-static __sum16 __tcp_checksum_complete_user(struct sock *sk,
-   struct sk_buff *skb)
-{
-   __sum16 result;
-
-   if (sock_owned_by_user(sk)) {
-   local_bh_enable();
-   result = __tcp_checksum_complete(skb);
-   local_bh_disable();
-   } else {
-   result = __tcp_checksum_complete(skb);
-   }
-   return result;
-}
-
-static inline bool tcp_checksum_complete_user(struct sock *sk,
-struct sk_buff *skb)
-{
-   return !skb_csum_unnecessary(skb) &&
-  __tcp_checksum_complete_user(sk, skb);
-}
-
 /* Does PAWS and seqno based validation of an incoming segment, flags will
  * play significant role here.
  */
@@ -5383,7 +5357,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff 
*skb,
}
}
if (!eaten) {
-   if (tcp_checksum_complete_user(sk, skb))
+   if (tcp_checksum_complete(skb))
goto csum_error;
 
if ((int)skb->truesize > sk->sk_forward_alloc)
@@ -5427,7 +5401,7 @@ no_ack:
}
 
 slow_path:
-   if (len < (th->doff << 2) || tcp_checksum_complete_user(sk, skb))
+   if (len < (th->doff << 2) || tcp_checksum_complete(skb))
goto csum_error;
 
if (!th->ack && !th->rst && !th->syn)
-- 
2.8.0.rc3.226.g39d4020



[PATCH net-next 4/6] udp: prepare for non BH masking at backlog processing

2016-04-27 Thread Eric Dumazet
UDP uses the generic socket backlog code, and this will soon
be changed to not disable BH when protocol is called back.

We need to use appropriate SNMP accessors.

Signed-off-by: Eric Dumazet 
---
 net/ipv4/udp.c | 4 ++--
 net/ipv6/udp.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 093284c5c03b..f67f52ba4809 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1514,9 +1514,9 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct 
sk_buff *skb)
 
/* Note that an ENOMEM error is charged twice */
if (rc == -ENOMEM)
-   __UDP_INC_STATS(sock_net(sk), UDP_MIB_RCVBUFERRORS,
+   UDP_INC_STATS(sock_net(sk), UDP_MIB_RCVBUFERRORS,
is_udplite);
-   __UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
+   UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
kfree_skb(skb);
trace_udp_fail_queue_rcv_skb(rc, sk);
return -1;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 1ba5a74ac18f..f911c63f79e6 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -570,9 +570,9 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct 
sk_buff *skb)
 
/* Note that an ENOMEM error is charged twice */
if (rc == -ENOMEM)
-   __UDP6_INC_STATS(sock_net(sk),
+   UDP6_INC_STATS(sock_net(sk),
 UDP_MIB_RCVBUFERRORS, is_udplite);
-   __UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
+   UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
kfree_skb(skb);
return -1;
}
-- 
2.8.0.rc3.226.g39d4020



[PATCH net-next 3/6] dccp: do not assume DCCP code is non preemptible

2016-04-27 Thread Eric Dumazet
DCCP uses the generic backlog code, and this will soon
be changed to not disable BH when protocol is called back.

Signed-off-by: Eric Dumazet 
---
 net/dccp/input.c   | 2 +-
 net/dccp/ipv4.c| 4 ++--
 net/dccp/ipv6.c| 4 ++--
 net/dccp/options.c | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/net/dccp/input.c b/net/dccp/input.c
index 2437ecc13b82..ba347184bda9 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -359,7 +359,7 @@ send_sync:
goto discard;
}
 
-   __DCCP_INC_STATS(DCCP_MIB_INERRS);
+   DCCP_INC_STATS(DCCP_MIB_INERRS);
 discard:
__kfree_skb(skb);
return 0;
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index a8164272e0f4..5c7e413a3ae4 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -533,8 +533,8 @@ static void dccp_v4_ctl_send_reset(const struct sock *sk, 
struct sk_buff *rxskb)
bh_unlock_sock(ctl_sk);
 
if (net_xmit_eval(err) == 0) {
-   __DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
-   __DCCP_INC_STATS(DCCP_MIB_OUTRSTS);
+   DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
+   DCCP_INC_STATS(DCCP_MIB_OUTRSTS);
}
 out:
 dst_release(dst);
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 0f4eb4ea57a5..d176f4e66369 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -277,8 +277,8 @@ static void dccp_v6_ctl_send_reset(const struct sock *sk, 
struct sk_buff *rxskb)
if (!IS_ERR(dst)) {
skb_dst_set(skb, dst);
ip6_xmit(ctl_sk, skb, , NULL, 0);
-   __DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
-   __DCCP_INC_STATS(DCCP_MIB_OUTRSTS);
+   DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
+   DCCP_INC_STATS(DCCP_MIB_OUTRSTS);
return;
}
 
diff --git a/net/dccp/options.c b/net/dccp/options.c
index b82b7ee9a1d2..74d29c56c367 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -253,7 +253,7 @@ out_nonsensical_length:
return 0;
 
 out_invalid_option:
-   __DCCP_INC_STATS(DCCP_MIB_INVALIDOPT);
+   DCCP_INC_STATS(DCCP_MIB_INVALIDOPT);
rc = DCCP_RESET_CODE_OPTION_ERROR;
 out_featneg_failed:
DCCP_WARN("DCCP(%p): Option %d (len=%d) error=%u\n", sk, opt, len, rc);
-- 
2.8.0.rc3.226.g39d4020



[PATCH net-next 1/6] tcp: do not assume TCP code is non preemptible

2016-04-27 Thread Eric Dumazet
We want to to make TCP stack preemptible, as draining prequeue
and backlog queues can take lot of time.

Many SNMP updates were assuming that BH (and preemption) was disabled.

Need to convert some __NET_INC_STATS() calls to NET_INC_STATS()
and some __TCP_INC_STATS() to TCP_INC_STATS()

Before using this_cpu_ptr(net->ipv4.tcp_sk) in tcp_v4_send_reset()
and tcp_v4_send_ack(), we add an explicit preempt disabled section.

Signed-off-by: Eric Dumazet 
---
 net/ipv4/tcp.c   |  2 +-
 net/ipv4/tcp_cdg.c   | 20 +-
 net/ipv4/tcp_cubic.c | 20 +-
 net/ipv4/tcp_fastopen.c  | 12 +++---
 net/ipv4/tcp_input.c | 96 
 net/ipv4/tcp_ipv4.c  | 14 ---
 net/ipv4/tcp_minisocks.c |  2 +-
 net/ipv4/tcp_output.c|  7 ++--
 net/ipv4/tcp_recovery.c  |  4 +-
 net/ipv4/tcp_timer.c | 10 +++--
 net/ipv6/tcp_ipv6.c  | 12 +++---
 11 files changed, 102 insertions(+), 97 deletions(-)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 040f35e7efe0..7f51389814e6 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3091,7 +3091,7 @@ void tcp_done(struct sock *sk)
struct request_sock *req = tcp_sk(sk)->fastopen_rsk;
 
if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV)
-   __TCP_INC_STATS(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
+   TCP_INC_STATS(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
 
tcp_set_state(sk, TCP_CLOSE);
tcp_clear_xmit_timers(sk);
diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c
index 3c00208c37f4..4e3007845888 100644
--- a/net/ipv4/tcp_cdg.c
+++ b/net/ipv4/tcp_cdg.c
@@ -155,11 +155,11 @@ static void tcp_cdg_hystart_update(struct sock *sk)
 
ca->last_ack = now_us;
if (after(now_us, ca->round_start + base_owd)) {
-   __NET_INC_STATS(sock_net(sk),
-   
LINUX_MIB_TCPHYSTARTTRAINDETECT);
-   __NET_ADD_STATS(sock_net(sk),
-   LINUX_MIB_TCPHYSTARTTRAINCWND,
-   tp->snd_cwnd);
+   NET_INC_STATS(sock_net(sk),
+ LINUX_MIB_TCPHYSTARTTRAINDETECT);
+   NET_ADD_STATS(sock_net(sk),
+ LINUX_MIB_TCPHYSTARTTRAINCWND,
+ pp>>sn__cwdd);
tp->snd_ssthresh = tp->snd_cwnd;
return;
}
@@ -174,11 +174,11 @@ static void tcp_cdg_hystart_update(struct sock *sk)
 125U);
 
if (ca->rtt.min > thresh) {
-   __NET_INC_STATS(sock_net(sk),
-   
LINUX_MIB_TCPHYSTARTDELAYDETECT);
-   __NET_ADD_STATS(sock_net(sk),
-   LINUX_MIB_TCPHYSTARTDELAYCWND,
-   tp->snd_cwnd);
+   NET_INC_STATS(sock_net(sk),
+ LINUX_MIB_TCPHYSTARTDELAYDETECT);
+   NET_ADD_STATS(sock_net(sk),
+ LINUX_MIB_TCPHYSTARTDELAYCWND,
+ tp->snd_cwnd);
tp->snd_ssthresh = tp->snd_cwnd;
}
}
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 59155af9de5d..0ce946e395e1 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -402,11 +402,11 @@ static void hystart_update(struct sock *sk, u32 delay)
ca->last_ack = now;
if ((s32)(now - ca->round_start) > ca->delay_min >> 4) {
ca->found |= HYSTART_ACK_TRAIN;
-   __NET_INC_STATS(sock_net(sk),
-   
LINUX_MIB_TCPHYSTARTTRAINDETECT);
-   __NET_ADD_STATS(sock_net(sk),
-   LINUX_MIB_TCPHYSTARTTRAINCWND,
-   tp->snd_cwnd);
+   NET_INC_STATS(sock_net(sk),
+ LINUX_MIB_TCPHYSTARTTRAINDETECT);
+   NET_ADD_STATS(sock_net(sk),
+ LINUX_MIB_TCPHYSTARTTRAINCWND,
+ tp->snd_cwnd);
tp->snd_ssthresh = tp->snd_cwnd;
}
}
@@ -423,11 +423,11 @@ static void hystart_update(struct sock *sk, u32 delay)
if (ca->curr_rtt > ca->delay_min +
  

[PATCH net-next 5/6] sctp: prepare for socket backlog behavior change

2016-04-27 Thread Eric Dumazet
sctp_inq_push() will soon be called without BH being blocked
when generic socket code flushes the socket backlog.

It is very possible SCTP can be converted to not rely on BH,
but this needs to be done by SCTP experts.

Signed-off-by: Eric Dumazet 
---
 net/sctp/inqueue.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index b335ffcef0b9..9d87bba0ff1d 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -89,10 +89,12 @@ void sctp_inq_push(struct sctp_inq *q, struct sctp_chunk 
*chunk)
 * Eventually, we should clean up inqueue to not rely
 * on the BH related data structures.
 */
+   local_bh_disable();
list_add_tail(>list, >in_chunk_list);
if (chunk->asoc)
chunk->asoc->stats.ipackets++;
q->immediate.func(>immediate);
+   local_bh_enable();
 }
 
 /* Peek at the next chunk on the inqeue. */
-- 
2.8.0.rc3.226.g39d4020



[PATCH net-next 0/6] net: make TCP preemptible

2016-04-27 Thread Eric Dumazet
Most of TCP stack assumed it was running from BH handler.

This is great for most things, as TCP behavior is very sensitive
to scheduling artifacts.

However, the prequeue and backlog processing are problematic,
as they need to be flushed with BH being blocked.

To cope with modern needs, TCP sockets have big sk_rcvbuf values,
in the order of 16 MB.
This means that backlog can hold thousands of packets, and things
like TCP coalescing or collapsing on this amount of packets can
lead to insane latency spikes, since BH are blocked for too long.

It is time to make UDP/TCP stacks preemptible.

Note that fast path still runs from BH handler.

Eric Dumazet (6):
  tcp: do not assume TCP code is non preemptible
  tcp: do not block bh during prequeue processing
  dccp: do not assume DCCP code is non preemptible
  udp: prepare for non BH masking at backlog processing
  sctp: prepare for socket backlog behavior change
  net: do not block BH while processing socket backlog

 net/core/sock.c  |  22 +++--
 net/dccp/input.c |   2 +-
 net/dccp/ipv4.c  |   4 +-
 net/dccp/ipv6.c  |   4 +-
 net/dccp/options.c   |   2 +-
 net/ipv4/tcp.c   |   6 +--
 net/ipv4/tcp_cdg.c   |  20 
 net/ipv4/tcp_cubic.c |  20 
 net/ipv4/tcp_fastopen.c  |  12 ++---
 net/ipv4/tcp_input.c | 126 +++
 net/ipv4/tcp_ipv4.c  |  14 --
 net/ipv4/tcp_minisocks.c |   2 +-
 net/ipv4/tcp_output.c|   7 ++-
 net/ipv4/tcp_recovery.c  |   4 +-
 net/ipv4/tcp_timer.c |  10 ++--
 net/ipv4/udp.c   |   4 +-
 net/ipv6/tcp_ipv6.c  |  12 ++---
 net/ipv6/udp.c   |   4 +-
 net/sctp/inqueue.c   |   2 +
 19 files changed, 124 insertions(+), 153 deletions(-)

-- 
2.8.0.rc3.226.g39d4020



Re: [PATCH net-next 2/7] net: rtnetlink: allow only one idx saving stats attribute

2016-04-27 Thread Roopa Prabhu
On 4/27/16, 9:18 AM, Nikolay Aleksandrov wrote:
> We can't allow more than one stats attribute which uses the local idx
> since the result will be a mess. This is a simple check to make sure
> only one is being used at a time. Later when the filter_mask's 32 bits
> are over we can switch to a bitmap.
>
> Signed-off-by: Nikolay Aleksandrov 
> ---
>  include/net/rtnetlink.h |  6 ++
>  net/core/rtnetlink.c| 17 +++--
>  2 files changed, 21 insertions(+), 2 deletions(-)
>
> diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
> index 2f87c1ba13de..3f3b0b1b8722 100644
> --- a/include/net/rtnetlink.h
> +++ b/include/net/rtnetlink.h
> @@ -150,4 +150,10 @@ int rtnl_nla_parse_ifla(struct nlattr **tb, const struct 
> nlattr *head, int len);
>  
>  #define MODULE_ALIAS_RTNL_LINK(kind) MODULE_ALIAS("rtnl-link-" kind)
>  
> +/* at most one attribute which can save a local idx is allowed to be set
> + * IFLA_STATS_IDX_ATTR_MASK has all the idx saving attributes set and is
> + * used to check if more than one is being requested
> + */
> +#define IFLA_STATS_IDX_ATTR_MASK 0
> +
>  #endif
> diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
> index aeb2fa9b1cda..ea03b6cd3d3c 100644
> --- a/net/core/rtnetlink.c
> +++ b/net/core/rtnetlink.c
> @@ -3512,7 +3512,7 @@ static int rtnl_stats_get(struct sk_buff *skb, struct 
> nlmsghdr *nlh)
>   struct if_stats_msg *ifsm;
>   struct net_device *dev = NULL;
>   struct sk_buff *nskb;
> - u32 filter_mask;
> + u32 filter_mask, lidx_filter;
>   int lidx = 0;
>   int err;
>  
> @@ -3529,6 +3529,14 @@ static int rtnl_stats_get(struct sk_buff *skb, struct 
> nlmsghdr *nlh)
>   if (!filter_mask)
>   return -EINVAL;
>  
> + /* only one attribute which can save a local idx is allowed at a time
> +  * even though rtnl_stats_get doesn't save the lidx, we need to be
> +  * consistent with the dump side and error out
> +  */
> + lidx_filter = filter_mask & IFLA_STATS_IDX_ATTR_MASK;
> + if (lidx_filter && !is_power_of_2(lidx_filter))
> + return -EINVAL;
> +
>   nskb = nlmsg_new(if_nlmsg_stats_size(dev, filter_mask), GFP_KERNEL);
>   if (!nskb)
>   return -ENOBUFS;
> @@ -3556,7 +3564,7 @@ static int rtnl_stats_dump(struct sk_buff *skb, struct 
> netlink_callback *cb)
>   struct net_device *dev;
>   struct hlist_head *head;
>   unsigned int flags = NLM_F_MULTI;
> - u32 filter_mask = 0;
> + u32 filter_mask = 0, lidx_filter;
>   int err;
>  
>   s_h = cb->args[0];
> @@ -3570,6 +3578,11 @@ static int rtnl_stats_dump(struct sk_buff *skb, struct 
> netlink_callback *cb)
>   if (!filter_mask)
>   return -EINVAL;
>  
> + /* only one attribute which can save a local idx is allowed at a time */
> + lidx_filter = filter_mask & IFLA_STATS_IDX_ATTR_MASK;
> + if (lidx_filter && !is_power_of_2(lidx_filter))
> + return -EINVAL;
> +
>   
instead of introducing the restriction at this level, is it possible to use two 
args for this
like below and avoid the restriction ?
cb->args[2] = current filter being processed
cb->args[3] = private filter idx (your lidx)


Re: iproute2: bash completion function for tc

2016-04-27 Thread Stephen Hemminger
On Wed, 27 Apr 2016 20:19:26 -0700
Alexei Starovoitov  wrote:

> On Tue, Apr 26, 2016 at 09:28:17AM +0200, Quentin Monnet wrote:
> > Hi Jamal, Stephen,
> > 
> > I searched for a function providing auto-completion for `tc` utility in
> > bash, but I found none. So I have created one, and I would like share it
> > with the community. It is available here:
> > https://github.com/6WIND/tc_bash-completion/blob/master/tc
> > I would like to make it easily available to tc users, so here is a
> > twofold request:
> > 
> > * I do not know where to submit the code. Should I submit here on netdev
> > for inclusion in iproute2 package, or rather to the bash-completion
> > repository on GitHub? I feel like it would receive better feedback and
> > updates if pushed to iproute2. Could you please provide some advice here?
> > * The completion for `tc` seems to work well; I have tested it with many
> > commands, but I am no tc expert, and there are probably some cases where
> > the completion fails to propose the correct choices. I would be really
> > interested in any feedback/bug reports that you, or anyone on this list
> > who uses tc, could provide.
> 
> that looks very interesting.
> I think making it a part of iproute2 is a good thing.
> How about installing it into /etc/iproute2/ ?
> Stephen, any comments?
> 

I am ok with keeping it in the repository.
But it would need to be installed in the standard bash directory,
is that distro dependent?


Re: [PATCH v2 net-next 2/2] tcp: remove SKBTX_ACK_TSTAMP since it is redundant

2016-04-27 Thread Eric Dumazet
On Wed, 2016-04-27 at 23:39 -0400, Soheil Hassas Yeganeh wrote:
> From: Soheil Hassas Yeganeh 
> 
> The SKBTX_ACK_TSTAMP flag is set in skb_shinfo->tx_flags when
> the timestamp of the TCP acknowledgement should be reported on
> error queue. Since accessing skb_shinfo is likely to incur a
> cache-line miss at the time of receiving the ack, the
> txstamp_ack bit was added in tcp_skb_cb, which is set iff
> the SKBTX_ACK_TSTAMP flag is set for an skb. This makes
> SKBTX_ACK_TSTAMP flag redundant.
> 
> Remove the SKBTX_ACK_TSTAMP and instead use the txstamp_ack bit
> everywhere.
> 
> Note that this frees one bit in shinfo->tx_flags.
> 
> Signed-off-by: Soheil Hassas Yeganeh 
> Acked-by: Martin KaFai Lau 
> Suggested-by: Willem de Bruijn 
> ---

Acked-by: Eric Dumazet 




Re: [PATCH v2 net-next 1/2] tcp: remove an unnecessary check in tcp_tx_timestamp

2016-04-27 Thread Eric Dumazet
On Wed, 2016-04-27 at 23:39 -0400, Soheil Hassas Yeganeh wrote:
> From: Soheil Hassas Yeganeh 
> 
> Remove the redundant check for sk->sk_tsflags in tcp_tx_timestamp.
> 
> tcp_tx_timestamp() receives the tsflags as a parameter. As a
> result the "sk->sk_tsflags || tsflags" is redundant, since
> tsflags already includes sk->sk_tsflags plus overrides from
> control messages.
> 
> Signed-off-by: Soheil Hassas Yeganeh 
> ---

Acked-by: Eric Dumazet 





Re: [RFC PATCH 4/5] bnxt: Add support for segmentation of tunnels with outer checksums

2016-04-27 Thread Michael Chan
On Wed, Apr 27, 2016 at 8:21 AM, Alexander Duyck
 wrote:
> On Tue, Apr 26, 2016 at 10:55 PM, Michael Chan
>  wrote:
>> On Tue, Apr 19, 2016 at 12:06 PM, Alexander Duyck  
>> wrote:
>>> This patch assumes that the bnxt hardware will ignore existing IPv4/v6
>>> header fields for length and checksum as well as the length and checksum
>>> fields for outer UDP and GRE headers.
>>>
>>> I have no means of testing this as I do not have any bnx2x hardware but
>>> thought I would submit it as an RFC to see if anyone out there wants to
>>> test this and see if this does in fact enable this functionality allowing
>>> us to to segment tunneled frames that have an outer checksum.
>>>
>>> Signed-off-by: Alexander Duyck 
>>
>> Hi Alex, I just did a very quick test of this patch on our bnxt
>> hardware and it seemed to work.
>>
>> I created a vxlan endpoint with udpcsum enabled and I saw TSO packets
>> getting through.  I've verified that our hardware can be programmed to
>> either ignore outer UDP checksum or to calculate it.  Current default
>> is to ignore ipv4 UDP checksum and calculate ipv6 UDP checksum.
>> Thanks.
>
> Are you saying you can natively support UDP tunnel with outer checksum
> offload then?

Yes.  Calculate or ignore the outer UDP checksum.

>
> I'm just trying to sort out if you actually need to have the partial
> segmentation offload support or if we can handle it in hardware.  Also
> is there any documentation you could point me to that might help to
> clarify what the hardware does/doesn't support so that I could improve
> upon this patch in order to make sure we are getting the most bang for
> the buck in terms of the features that can be offloaded by hardware?

No public documentation yet.  I think the plan is to publish the
programmer's reference on our website at some point in the future.


[PATCH v2 net-next 2/2] tcp: remove SKBTX_ACK_TSTAMP since it is redundant

2016-04-27 Thread Soheil Hassas Yeganeh
From: Soheil Hassas Yeganeh 

The SKBTX_ACK_TSTAMP flag is set in skb_shinfo->tx_flags when
the timestamp of the TCP acknowledgement should be reported on
error queue. Since accessing skb_shinfo is likely to incur a
cache-line miss at the time of receiving the ack, the
txstamp_ack bit was added in tcp_skb_cb, which is set iff
the SKBTX_ACK_TSTAMP flag is set for an skb. This makes
SKBTX_ACK_TSTAMP flag redundant.

Remove the SKBTX_ACK_TSTAMP and instead use the txstamp_ack bit
everywhere.

Note that this frees one bit in shinfo->tx_flags.

Signed-off-by: Soheil Hassas Yeganeh 
Acked-by: Martin KaFai Lau 
Suggested-by: Willem de Bruijn 
---
 include/linux/skbuff.h |  6 +-
 net/ipv4/tcp.c |  5 +++--
 net/ipv4/tcp_input.c   |  3 +--
 net/ipv4/tcp_output.c  | 17 +++--
 net/socket.c   |  3 ---
 5 files changed, 16 insertions(+), 18 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index da0ace3..ae30555 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -382,14 +382,10 @@ enum {
 
/* generate software time stamp when entering packet scheduling */
SKBTX_SCHED_TSTAMP = 1 << 6,
-
-   /* generate software timestamp on peer data acknowledgment */
-   SKBTX_ACK_TSTAMP = 1 << 7,
 };
 
 #define SKBTX_ANY_SW_TSTAMP(SKBTX_SW_TSTAMP| \
-SKBTX_SCHED_TSTAMP | \
-SKBTX_ACK_TSTAMP)
+SKBTX_SCHED_TSTAMP)
 #define SKBTX_ANY_TSTAMP   (SKBTX_HW_TSTAMP | SKBTX_ANY_SW_TSTAMP)
 
 /*
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 3c542dc..8e05eb6 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -435,9 +435,10 @@ static void tcp_tx_timestamp(struct sock *sk, u16 tsflags, 
struct sk_buff *skb)
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
 
sock_tx_timestamp(sk, tsflags, >tx_flags);
-   if (shinfo->tx_flags & SKBTX_ANY_TSTAMP)
+   if (tsflags & SOF_TIMESTAMPING_TX_ACK)
+   tcb->txstamp_ack = 1;
+   if (tsflags & SOF_TIMESTAMPING_TX_RECORD_MASK)
shinfo->tskey = TCP_SKB_CB(skb)->seq + skb->len - 1;
-   tcb->txstamp_ack = !!(shinfo->tx_flags & SKBTX_ACK_TSTAMP);
}
 }
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 967520d..2f3fd92 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3087,8 +3087,7 @@ static void tcp_ack_tstamp(struct sock *sk, struct 
sk_buff *skb,
return;
 
shinfo = skb_shinfo(skb);
-   if ((shinfo->tx_flags & SKBTX_ACK_TSTAMP) &&
-   !before(shinfo->tskey, prior_snd_una) &&
+   if (!before(shinfo->tskey, prior_snd_una) &&
before(shinfo->tskey, tcp_sk(sk)->snd_una))
__skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
 }
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 9d3b4b3..ace183c 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -,11 +,17 @@ static void tcp_adjust_pcount(struct sock *sk, const 
struct sk_buff *skb, int de
tcp_verify_left_out(tp);
 }
 
+static bool tcp_has_tx_tstamp(const struct sk_buff *skb)
+{
+   return TCP_SKB_CB(skb)->txstamp_ack ||
+   (skb_shinfo(skb)->tx_flags & SKBTX_ANY_TSTAMP);
+}
+
 static void tcp_fragment_tstamp(struct sk_buff *skb, struct sk_buff *skb2)
 {
struct skb_shared_info *shinfo = skb_shinfo(skb);
 
-   if (unlikely(shinfo->tx_flags & SKBTX_ANY_TSTAMP) &&
+   if (unlikely(tcp_has_tx_tstamp(skb)) &&
!before(shinfo->tskey, TCP_SKB_CB(skb2)->seq)) {
struct skb_shared_info *shinfo2 = skb_shinfo(skb2);
u8 tsflags = shinfo->tx_flags & SKBTX_ANY_TSTAMP;
@@ -2446,13 +2452,12 @@ u32 __tcp_select_window(struct sock *sk)
 void tcp_skb_collapse_tstamp(struct sk_buff *skb,
 const struct sk_buff *next_skb)
 {
-   const struct skb_shared_info *next_shinfo = skb_shinfo(next_skb);
-   u8 tsflags = next_shinfo->tx_flags & SKBTX_ANY_TSTAMP;
-
-   if (unlikely(tsflags)) {
+   if (unlikely(tcp_has_tx_tstamp(next_skb))) {
+   const struct skb_shared_info *next_shinfo =
+   skb_shinfo(next_skb);
struct skb_shared_info *shinfo = skb_shinfo(skb);
 
-   shinfo->tx_flags |= tsflags;
+   shinfo->tx_flags |= next_shinfo->tx_flags & SKBTX_ANY_TSTAMP;
shinfo->tskey = next_shinfo->tskey;
TCP_SKB_CB(skb)->txstamp_ack |=
TCP_SKB_CB(next_skb)->txstamp_ack;
diff --git a/net/socket.c b/net/socket.c
index 5dbb0bb..7789d79 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -600,9 +600,6 @@ void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
flags |= 

[PATCH v2 net-next 1/2] tcp: remove an unnecessary check in tcp_tx_timestamp

2016-04-27 Thread Soheil Hassas Yeganeh
From: Soheil Hassas Yeganeh 

Remove the redundant check for sk->sk_tsflags in tcp_tx_timestamp.

tcp_tx_timestamp() receives the tsflags as a parameter. As a
result the "sk->sk_tsflags || tsflags" is redundant, since
tsflags already includes sk->sk_tsflags plus overrides from
control messages.

Signed-off-by: Soheil Hassas Yeganeh 
---
 net/ipv4/tcp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 4d73858..3c542dc 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -430,7 +430,7 @@ EXPORT_SYMBOL(tcp_init_sock);
 
 static void tcp_tx_timestamp(struct sock *sk, u16 tsflags, struct sk_buff *skb)
 {
-   if (sk->sk_tsflags || tsflags) {
+   if (tsflags) {
struct skb_shared_info *shinfo = skb_shinfo(skb);
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
 
-- 
2.8.0.rc3.226.g39d4020



[PATCH v2 net-next 0/2] tcp: simplify ack tx timestamps

2016-04-27 Thread Soheil Hassas Yeganeh
From: Soheil Hassas Yeganeh 

v2:
- Fully remove SKBTX_ACK_TSTAMP, as suggested by Willem de Bruijn.

This patch series aims at removing redundant checks and fields
for ack timestamps for TCP.

Soheil Hassas Yeganeh (2):
  tcp: remove an unnecessary check in tcp_tx_timestamp
  tcp: remove SKBTX_ACK_TSTAMP since it is redundant

 include/linux/skbuff.h |  6 +-
 net/ipv4/tcp.c |  7 ---
 net/ipv4/tcp_input.c   |  3 +--
 net/ipv4/tcp_output.c  | 17 +++--
 net/socket.c   |  3 ---
 5 files changed, 17 insertions(+), 19 deletions(-)

-- 
2.8.0.rc3.226.g39d4020



Re: iproute2: bash completion function for tc

2016-04-27 Thread Alexei Starovoitov
On Tue, Apr 26, 2016 at 09:28:17AM +0200, Quentin Monnet wrote:
> Hi Jamal, Stephen,
> 
> I searched for a function providing auto-completion for `tc` utility in
> bash, but I found none. So I have created one, and I would like share it
> with the community. It is available here:
> https://github.com/6WIND/tc_bash-completion/blob/master/tc
> I would like to make it easily available to tc users, so here is a
> twofold request:
> 
> * I do not know where to submit the code. Should I submit here on netdev
> for inclusion in iproute2 package, or rather to the bash-completion
> repository on GitHub? I feel like it would receive better feedback and
> updates if pushed to iproute2. Could you please provide some advice here?
> * The completion for `tc` seems to work well; I have tested it with many
> commands, but I am no tc expert, and there are probably some cases where
> the completion fails to propose the correct choices. I would be really
> interested in any feedback/bug reports that you, or anyone on this list
> who uses tc, could provide.

that looks very interesting.
I think making it a part of iproute2 is a good thing.
How about installing it into /etc/iproute2/ ?
Stephen, any comments?



Re: [PATCH net-next 0/2] net: avoid some atomic ops when FASYNC is not used

2016-04-27 Thread David Miller
From: Eric Dumazet 
Date: Mon, 25 Apr 2016 10:39:31 -0700

> We can avoid some atomic operations on sockets not using FASYNC

I guess a user can do weird things and set/clear the FASYNC bit in the
middle of the SOCKWQ_ASYNC_ bit being set, and reset the FASYNC bit
later and the SOCKWQ_* state is stale.

However, that's probably not worth handling explicitly.

Series applied, thanks.


Re: [net-next PATCH V3 0/5] samples/bpf: Improve user experience

2016-04-27 Thread David Miller
From: Jesper Dangaard Brouer 
Date: Wed, 27 Apr 2016 09:30:08 +0200

> It is a steep learning curve getting started with using the eBPF
> examples in samples/bpf/.  There are several dependencies, and
> specific versions of these dependencies.  Invoking make in the correct
> manor is also slightly obscure.
> 
> This patchset cleanup, document and hopefully improves the first time
> user experience with the eBPF samples directory by auto-detecting
> certain scenarios.
> 
> V3:
>  - Add Alexei's ACKs
>  - Remove README paragraph about LLVM experimental BPF target
>as it only existed between LLVM version 3.6 to 3.7.
> 
> V2:
>  - Adjusted recommend minimum versions to 3.7.1
>  - Included clang build instructions
>  - New patch adding CLANG variable and validation of command

Please respin addressing Naveen's feedback, thanks.


Re: [PATCH net-next 00/17] net: snmp: update SNMP methods

2016-04-27 Thread David Miller
From: Eric Dumazet 
Date: Wed, 27 Apr 2016 16:44:26 -0700

> In the old days (before linux-3.0), SNMP counters were duplicated,
> one set for user context, and anther one for BH context.
> 
> After commit 8f0ea0fe3a03 ("snmp: reduce percpu needs by 50%")
> we have a single copy, and what really matters is preemption being
> enabled or disabled, since we use this_cpu_inc() or __this_cpu_inc()
> respectively.
> 
> This patch series kills the obsolete STATS_USER() helpers,
> and rename all XXX_BH() helpers to __XXX() ones, to more
> closely match conventions used to update per cpu variables.
> 
> This is probably going to hurt maintainers job for a while,
> since cherry-picks will not be clean, but this had to be
> cleaned at one point. I am so sorry guys.

Looks good to me, series applied, thanks Eric.


Re: [net-next v2 00/14][pull request] 40GbE Intel Wired LAN Driver Updates 2016-04-27

2016-04-27 Thread David Miller
From: Jeff Kirsher 
Date: Wed, 27 Apr 2016 13:15:39 -0700

> This series contains updates to i40e and i40evf.

Pulled, thanks Jeff.


[PATCH net 3/3] samples/bpf: fix trace_output example

2016-04-27 Thread Alexei Starovoitov
llvm cannot always recognize memset as builtin function and optimize
it away, so just delete it. It was a leftover from testing
of bpf_perf_event_output() with large data structures.

Fixes: 39111695b1b8 ("samples: bpf: add bpf_perf_event_output example")
Signed-off-by: Alexei Starovoitov 
---
 samples/bpf/trace_output_kern.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/samples/bpf/trace_output_kern.c b/samples/bpf/trace_output_kern.c
index 8d8d1ec429eb..9b96f4fb8cea 100644
--- a/samples/bpf/trace_output_kern.c
+++ b/samples/bpf/trace_output_kern.c
@@ -18,7 +18,6 @@ int bpf_prog1(struct pt_regs *ctx)
u64 cookie;
} data;
 
-   memset(, 0, sizeof(data));
data.pid = bpf_get_current_pid_tgid();
data.cookie = 0x12345678;
 
-- 
2.8.0



[PATCH net 1/3] bpf: fix refcnt overflow

2016-04-27 Thread Alexei Starovoitov
On a system with >32Gbyte of phyiscal memory and infinite RLIMIT_MEMLOCK,
the malicious application may overflow 32-bit bpf program refcnt.
It's also possible to overflow map refcnt on 1Tb system.
Impose 32k hard limit which means that the same bpf program or
map cannot be shared by more than 32k processes.

Fixes: 1be7f75d1668 ("bpf: enable non-root eBPF programs")
Reported-by: Jann Horn 
Signed-off-by: Alexei Starovoitov 
Acked-by: Daniel Borkmann 
---
 include/linux/bpf.h   |  3 ++-
 kernel/bpf/inode.c|  7 ---
 kernel/bpf/syscall.c  | 24 
 kernel/bpf/verifier.c | 11 +++
 4 files changed, 33 insertions(+), 12 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 21ee41b92e8a..f1d5c5acc8dd 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -171,12 +171,13 @@ void bpf_register_prog_type(struct bpf_prog_type_list 
*tl);
 void bpf_register_map_type(struct bpf_map_type_list *tl);
 
 struct bpf_prog *bpf_prog_get(u32 ufd);
+struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog);
 void bpf_prog_put(struct bpf_prog *prog);
 void bpf_prog_put_rcu(struct bpf_prog *prog);
 
 struct bpf_map *bpf_map_get_with_uref(u32 ufd);
 struct bpf_map *__bpf_map_get(struct fd f);
-void bpf_map_inc(struct bpf_map *map, bool uref);
+struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref);
 void bpf_map_put_with_uref(struct bpf_map *map);
 void bpf_map_put(struct bpf_map *map);
 int bpf_map_precharge_memlock(u32 pages);
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index f2ece3c174a5..8f94ca1860cf 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -31,10 +31,10 @@ static void *bpf_any_get(void *raw, enum bpf_type type)
 {
switch (type) {
case BPF_TYPE_PROG:
-   atomic_inc(&((struct bpf_prog *)raw)->aux->refcnt);
+   raw = bpf_prog_inc(raw);
break;
case BPF_TYPE_MAP:
-   bpf_map_inc(raw, true);
+   raw = bpf_map_inc(raw, true);
break;
default:
WARN_ON_ONCE(1);
@@ -297,7 +297,8 @@ static void *bpf_obj_do_get(const struct filename *pathname,
goto out;
 
raw = bpf_any_get(inode->i_private, *type);
-   touch_atime();
+   if (!IS_ERR(raw))
+   touch_atime();
 
path_put();
return raw;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index adc5e4bd74f8..cf5e9f7ad13a 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -218,11 +218,18 @@ struct bpf_map *__bpf_map_get(struct fd f)
return f.file->private_data;
 }
 
-void bpf_map_inc(struct bpf_map *map, bool uref)
+/* prog's and map's refcnt limit */
+#define BPF_MAX_REFCNT 32768
+
+struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref)
 {
-   atomic_inc(>refcnt);
+   if (atomic_inc_return(>refcnt) > BPF_MAX_REFCNT) {
+   atomic_dec(>refcnt);
+   return ERR_PTR(-EBUSY);
+   }
if (uref)
atomic_inc(>usercnt);
+   return map;
 }
 
 struct bpf_map *bpf_map_get_with_uref(u32 ufd)
@@ -234,7 +241,7 @@ struct bpf_map *bpf_map_get_with_uref(u32 ufd)
if (IS_ERR(map))
return map;
 
-   bpf_map_inc(map, true);
+   map = bpf_map_inc(map, true);
fdput(f);
 
return map;
@@ -658,6 +665,15 @@ static struct bpf_prog *__bpf_prog_get(struct fd f)
return f.file->private_data;
 }
 
+struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog)
+{
+   if (atomic_inc_return(>aux->refcnt) > BPF_MAX_REFCNT) {
+   atomic_dec(>aux->refcnt);
+   return ERR_PTR(-EBUSY);
+   }
+   return prog;
+}
+
 /* called by sockets/tracing/seccomp before attaching program to an event
  * pairs with bpf_prog_put()
  */
@@ -670,7 +686,7 @@ struct bpf_prog *bpf_prog_get(u32 ufd)
if (IS_ERR(prog))
return prog;
 
-   atomic_inc(>aux->refcnt);
+   prog = bpf_prog_inc(prog);
fdput(f);
 
return prog;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index db2574e7b8b0..89bcaa0966da 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2049,15 +2049,18 @@ static int replace_map_fd_with_map_ptr(struct 
verifier_env *env)
return -E2BIG;
}
 
-   /* remember this map */
-   env->used_maps[env->used_map_cnt++] = map;
-
/* hold the map. If the program is rejected by verifier,
 * the map will be released by release_maps() or it
 * will be used by the valid program until it's unloaded
 * and all maps are released in free_bpf_prog_info()
 */
-   bpf_map_inc(map, false);
+   map = bpf_map_inc(map, false);
+   

[PATCH net 0/3] bpf: fix several bugs

2016-04-27 Thread Alexei Starovoitov
First two patches address bugs found by Jann Horn.
Last patch is a minor samples fix spotted during the testing.

Alexei Starovoitov (3):
  bpf: fix refcnt overflow
  bpf: fix check_map_func_compatibility logic
  samples/bpf: fix trace_output example

 include/linux/bpf.h |  3 +-
 kernel/bpf/inode.c  |  7 ++--
 kernel/bpf/syscall.c| 24 ++---
 kernel/bpf/verifier.c   | 76 +
 samples/bpf/trace_output_kern.c |  1 -
 5 files changed, 73 insertions(+), 38 deletions(-)

-- 
2.8.0



[PATCH net 2/3] bpf: fix check_map_func_compatibility logic

2016-04-27 Thread Alexei Starovoitov
The commit 35578d798400 ("bpf: Implement function bpf_perf_event_read() that 
get the selected hardware PMU conuter")
introduced clever way to check bpf_helper<->map_type compatibility.
Later on commit a43eec304259 ("bpf: introduce bpf_perf_event_output() helper") 
adjusted
the logic and inadvertently broke it.
Get rid of the clever bool compare and go back to two-way check
from map and from helper perspective.

Fixes: a43eec304259 ("bpf: introduce bpf_perf_event_output() helper")
Reported-by: Jann Horn 
Signed-off-by: Alexei Starovoitov 
Signed-off-by: Daniel Borkmann 
---
 kernel/bpf/verifier.c | 65 +++
 1 file changed, 40 insertions(+), 25 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 89bcaa0966da..c5c17a62f509 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -239,16 +239,6 @@ static const char * const reg_type_str[] = {
[CONST_IMM] = "imm",
 };
 
-static const struct {
-   int map_type;
-   int func_id;
-} func_limit[] = {
-   {BPF_MAP_TYPE_PROG_ARRAY, BPF_FUNC_tail_call},
-   {BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_read},
-   {BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_output},
-   {BPF_MAP_TYPE_STACK_TRACE, BPF_FUNC_get_stackid},
-};
-
 static void print_verifier_state(struct verifier_env *env)
 {
enum bpf_reg_type t;
@@ -921,27 +911,52 @@ static int check_func_arg(struct verifier_env *env, u32 
regno,
 
 static int check_map_func_compatibility(struct bpf_map *map, int func_id)
 {
-   bool bool_map, bool_func;
-   int i;
-
if (!map)
return 0;
 
-   for (i = 0; i < ARRAY_SIZE(func_limit); i++) {
-   bool_map = (map->map_type == func_limit[i].map_type);
-   bool_func = (func_id == func_limit[i].func_id);
-   /* only when map & func pair match it can continue.
-* don't allow any other map type to be passed into
-* the special func;
-*/
-   if (bool_func && bool_map != bool_func) {
-   verbose("cannot pass map_type %d into func %d\n",
-   map->map_type, func_id);
-   return -EINVAL;
-   }
+   /* We need a two way check, first is from map perspective ... */
+   switch (map->map_type) {
+   case BPF_MAP_TYPE_PROG_ARRAY:
+   if (func_id != BPF_FUNC_tail_call)
+   goto error;
+   break;
+   case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
+   if (func_id != BPF_FUNC_perf_event_read &&
+   func_id != BPF_FUNC_perf_event_output)
+   goto error;
+   break;
+   case BPF_MAP_TYPE_STACK_TRACE:
+   if (func_id != BPF_FUNC_get_stackid)
+   goto error;
+   break;
+   default:
+   break;
+   }
+
+   /* ... and second from the function itself. */
+   switch (func_id) {
+   case BPF_FUNC_tail_call:
+   if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
+   goto error;
+   break;
+   case BPF_FUNC_perf_event_read:
+   case BPF_FUNC_perf_event_output:
+   if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
+   goto error;
+   break;
+   case BPF_FUNC_get_stackid:
+   if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
+   goto error;
+   break;
+   default:
+   break;
}
 
return 0;
+error:
+   verbose("cannot pass map_type %d into func %d\n",
+   map->map_type, func_id);
+   return -EINVAL;
 }
 
 static int check_call(struct verifier_env *env, int func_id)
-- 
2.8.0



Re: [PATCH v2 net-next 11/13] Documentation: Bindings: Update DT binding for separating dsaf dev support

2016-04-27 Thread Yisen Zhuang
Hi Rob,

Thanks for you comments.

在 2016/4/27 23:25, Rob Herring 写道:
> On Tue, Apr 26, 2016 at 10:33 PM, Yisen Zhuang  
> wrote:
>> Hi Rob and David,
>>
>> Please see my comments inline.
>>
>> David have merged this series to net-next, but we need to modify some codes 
>> according
>> to Rob's comments. I am not sure if i need to send V3 for this series, or 
>> separate
>> patches of documentation to independent series and generate a new patch for 
>> hns base
>> on current net-next?
> 
> That's David's call. I'm guessing he wants follow-up patches on top of these.

Okay, I will send a new series base on current net-next.

> 
>> 在 2016/4/26 20:48, Rob Herring 写道:
>>> On Sat, Apr 23, 2016 at 05:05:15PM +0800, Yisen Zhuang wrote:
 Because debug dsaf port was separated from service dsaf port, this patch
 updates the related information of DT binding.
>>>
>>> Separated when? New version of the h/w? If so, where's the new
>>> compatible string? This is quite a big binding change.
>>
>> There isn't any change of h/w. I separated debug dsaf port from sevice dsaf
>> port to make the code more simple and readability.
> 
> Okay.
> 
> [...]
> 
 +  serdes-syscon rather than this address.
The third region is the PPE register base and size.
 -  The fourth region is dsa fabric base register and size.
 -  The fifth region is cpld base register and size, it is not required if 
 do not use cpld.
 -- phy-handle: phy handle of physicl port, 0 if not any phy device. see 
 ethernet.txt [1].
 +  The fourth region is dsa fabric base register and size. It is not 
 required for
 +  single-port mode.
 +- reg-names: may be ppe-base and(or) dsaf-base. It is used to find the
 +  corresponding reg's index.
>>>
>>> But you have up to 5 regions.
>>>
>>> The variable nature of what regions you have tells me you need more
>>> specific compatible strings for each chip.
>>
>> we didn't add support of new h/w. We added these regions to make code simple 
>> and readability.
>> If we need to add support of next h/w version next time, we don't need to 
>> add many branches
>> for these attributes. So we didn't add a new compatible here.
> 
> Not sure what you mean by branches. It's fine to put properties for
> things that vary among h/w versions, but new compatible strings will
> be needed for any new versions.

I mean than we put properties for things that vary among h/w versions. If we 
add support for
new h/w versions next time, we will add new compatible strings.

> 
> 
 +- port: subnodes of dsaf. A dsaf node may contain several port 
 nodes(Depending
 +  on mode of dsaf). Port node contain some attributes listed below:
 +- port-id: is physical port index in one dsaf.
>>>
>>> Indexes should generally be avoided. What does the number correspond
>>> to in h/w (if anything)?
>>
>> port-id is index for a port in dsaf, it is correspond to index of PHY showed 
>> below.
> 
> Okay, you should use reg property here instead.

Agree, thanks.

> 
>>
>>  CPU
>>   |
>> ---
>> | |   |
>> ---   -
>> | |  ||   |   |   |   |
>> |PPE ||  PPE  |   |  PPE  |
>> | |  ||   |   |   |   |   |
>> | |  ||   |   |   |   |   |
>> |  crossbar  ||   |   |   |   |   |
>> | |  ||   |   |   |   |   |
>> |   --   ||   |   |   |   |   |
>> |   | | |  |  |  |   ||   |   |   |   |   |
>> |   | | |  |  |  |   ||   |   |   |   |   |
>> |  MAC   MAC   MACMACMACMAC  ||  MAC  |   |  MAC  |
>> |   | | |  |  |  |   ||   |   |   |   |   |
>> ---   -
>> | | |  |  |  |\/  |/  |
>>PHY   PHY   PHYPHYPHYPHY\  /  PHY  /  PHY
>> \/   /
>>  \  /   /
>>  DSAF(three platform device)
>>
>>>
 +- phy-handle: phy handle of physicl port. It is not required if there 
 isn't
> 
> Another typo here.

Agree, thanks.

> 
> Rob
> 
> .
> 



[PATCH net v3 5/5] drivers: net: cpsw: use of_phy_connect() in fixed-link case

2016-04-27 Thread David Rivshin (Allworx)
From: David Rivshin 

If a fixed-link DT subnode is used, the phy_device was looked up so
that a PHY ID string could be constructed and passed to phy_connect().
This is not necessary, as the device_node can be passed directly to
of_phy_connect() instead. This reuses the same codepath as if the
phy-handle DT property was used.

Signed-off-by: David Rivshin 
Tested-by: Nicolas Chauvet 
Tested-by: Andrew Goodbody 
Reviewed-by: Mugunthan V N 
Reviewed-by: Grygorii Strashko 
---

Changes since v2 [1]:
- Added Tested-by from Andrew Goodbody [3]
- Added Reviewed-by from Mugunthan V N [4]
- Added Reviewed-by from Grygorii Strashko [5]

Changes since v1 [2]:
- Rebased (trivial conflict, e5a03bfd modified the deleted snprintf)
- Added Tested-by from Nicolas Chauvet

[1] http://patchwork.ozlabs.org/patch/613276/
[2] http://patchwork.ozlabs.org/patch/560327/
[3] https://lkml.org/lkml/2016/4/22/537
[4] https://lkml.org/lkml/2016/4/22/63
[5] https://lkml.org/lkml/2016/4/22/529


 drivers/net/ethernet/ti/cpsw.c | 11 +--
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 712bc6d..e2fcdf1 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -2044,30 +2044,21 @@ static int cpsw_probe_dt(struct cpsw_platform_data 
*data,
"phy-handle", 0);
parp = of_get_property(slave_node, "phy_id", );
if (slave_data->phy_node) {
dev_dbg(>dev,
"slave[%d] using phy-handle=\"%s\"\n",
i, slave_data->phy_node->full_name);
} else if (of_phy_is_fixed_link(slave_node)) {
-   struct device_node *phy_node;
-   struct phy_device *phy_dev;
-
/* In the case of a fixed PHY, the DT node associated
 * to the PHY is the Ethernet MAC DT node.
 */
ret = of_phy_register_fixed_link(slave_node);
if (ret)
return ret;
-   phy_node = of_node_get(slave_node);
-   phy_dev = of_phy_find_device(phy_node);
-   if (!phy_dev)
-   return -ENODEV;
-   snprintf(slave_data->phy_id, sizeof(slave_data->phy_id),
-PHY_ID_FMT, phy_dev->mdio.bus->id,
-phy_dev->mdio.addr);
+   slave_data->phy_node = of_node_get(slave_node);
} else if (parp) {
u32 phyid;
struct device_node *mdio_node;
struct platform_device *mdio;
 
if (lenp != (sizeof(__be32) * 2)) {
dev_err(>dev, "Invalid slave[%d] phy_id 
property\n", i);
-- 
2.5.5



[PATCH net v3 4/5] dt: cpsw: phy-handle, phy_id, and fixed-link are mutually exclusive

2016-04-27 Thread David Rivshin (Allworx)
From: David Rivshin 

The phy-handle, phy_id, and fixed-link properties are mutually exclusive,
and only one need be specified. Make this clear in the binding doc.

Also mark the phy_id property as deprecated, as phy-handle should be
used instead.

Signed-off-by: David Rivshin 
---

Changes since v2 [1]:
- split from previous patch 2
- marked the phy_id property as deprecated [3]
- removed Rob Herring's Acked-by due to above change

Changes since v1 [2]:
- Rebased (no conflicts)
- Added Tested-by from Nicolas Chauvet
- Added Acked-by from Rob Herring for the binding change

[1] http://patchwork.ozlabs.org/patch/613260/
[2] http://patchwork.ozlabs.org/patch/560324/
[3] https://lkml.org/lkml/2016/4/22/494


 Documentation/devicetree/bindings/net/cpsw.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/cpsw.txt 
b/Documentation/devicetree/bindings/net/cpsw.txt
index 28a4781..0ae0649 100644
--- a/Documentation/devicetree/bindings/net/cpsw.txt
+++ b/Documentation/devicetree/bindings/net/cpsw.txt
@@ -41,21 +41,21 @@ Optional properties:
 Slave Properties:
 Required properties:
 - phy-mode : See ethernet.txt file in the same directory
 
 Optional properties:
 - dual_emac_res_vlan   : Specifies VID to be used to segregate the ports
 - mac-address  : See ethernet.txt file in the same directory
-- phy_id   : Specifies slave phy id
+- phy_id   : Specifies slave phy id (deprecated, use phy-handle)
 - phy-handle   : See ethernet.txt file in the same directory
 
 Slave sub-nodes:
 - fixed-link   : See fixed-link.txt file in the same directory
- Either the property phy_id, or the sub-node
- fixed-link can be specified
+
+Note: Exactly one of phy_id, phy-handle, or fixed-link must be specified.
 
 Note: "ti,hwmods" field is used to fetch the base address and irq
 resources from TI, omap hwmod data base during device registration.
 Future plan is to migrate hwmod data base contents into device tree
 blob so that, all the required data will be used from device tree dts
 file.
 
-- 
2.5.5



[PATCH net v3 3/5] drivers: net: cpsw: don't ignore phy-mode if phy-handle is used

2016-04-27 Thread David Rivshin (Allworx)
From: David Rivshin 

The phy-mode emac property was only being processed in the phy_id
or fixed-link cases. However if phy-handle was specified instead,
an error message would complain about the lack of phy_id or
fixed-link, and then jump past the of_get_phy_mode(). This would
result in the PHY mode defaulting to MII, regardless of what the
devicetree specified.

Fixes: 9e42f715264f ("drivers: net: cpsw: add phy-handle parsing")
Signed-off-by: David Rivshin 
Tested-by: Nicolas Chauvet 
Tested-by: Andrew Goodbody 
Reviewed-by: Mugunthan V N 
---
I would suggest this for -stable. It should apply cleanly as far back
as 4.4.

Changes since v2 [1]:
- split from previous patch 2
- Added Tested-by from Andrew Goodbody [3]
- Added Reviewed-by from Mugunthan V N [4]
- rewrote commit log to focus on the functional bug fixed, rather
  than the bogus error message

Changes since v1 [2]:
- Rebased (no conflicts)
- Added Tested-by from Nicolas Chauvet
- Added Acked-by from Rob Herring for the binding change

[1] http://patchwork.ozlabs.org/patch/613260/
[2] http://patchwork.ozlabs.org/patch/560324/
[3] https://lkml.org/lkml/2016/4/22/537
[4] https://lkml.org/lkml/2016/4/22/63


 drivers/net/ethernet/ti/cpsw.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 5903448..712bc6d 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -2039,15 +2039,19 @@ static int cpsw_probe_dt(struct cpsw_platform_data 
*data,
/* This is no slave child node, continue */
if (strcmp(slave_node->name, "slave"))
continue;
 
slave_data->phy_node = of_parse_phandle(slave_node,
"phy-handle", 0);
parp = of_get_property(slave_node, "phy_id", );
-   if (of_phy_is_fixed_link(slave_node)) {
+   if (slave_data->phy_node) {
+   dev_dbg(>dev,
+   "slave[%d] using phy-handle=\"%s\"\n",
+   i, slave_data->phy_node->full_name);
+   } else if (of_phy_is_fixed_link(slave_node)) {
struct device_node *phy_node;
struct phy_device *phy_dev;
 
/* In the case of a fixed PHY, the DT node associated
 * to the PHY is the Ethernet MAC DT node.
 */
ret = of_phy_register_fixed_link(slave_node);
@@ -2076,15 +2080,17 @@ static int cpsw_probe_dt(struct cpsw_platform_data 
*data,
if (!mdio) {
dev_err(>dev, "Missing mdio platform 
device\n");
return -EINVAL;
}
snprintf(slave_data->phy_id, sizeof(slave_data->phy_id),
 PHY_ID_FMT, mdio->name, phyid);
} else {
-   dev_err(>dev, "No slave[%d] phy_id or fixed-link 
property\n", i);
+   dev_err(>dev,
+   "No slave[%d] phy_id, phy-handle, or fixed-link 
property\n",
+   i);
goto no_phy_slave;
}
slave_data->phy_if = of_get_phy_mode(slave_node);
if (slave_data->phy_if < 0) {
dev_err(>dev, "Missing or malformed slave[%d] 
phy-mode property\n",
i);
return slave_data->phy_if;
-- 
2.5.5



[PATCH net v3 2/5] drivers: net: cpsw: fix segfault in case of bad phy-handle

2016-04-27 Thread David Rivshin (Allworx)
From: David Rivshin 

If an emac node has a phy-handle property that points to something
which is not a phy, then a segmentation fault will occur when the
interface is brought up. This is because while phy_connect() will
return ERR_PTR() on failure, of_phy_connect() will return NULL.
The common error check uses IS_ERR(), and so missed when
of_phy_connect() fails. The NULL pointer is then dereferenced.

Also, the common error message referenced slave->data->phy_id,
which would be empty in the case of phy-handle. Instead, use the
name of the device_node as a useful identifier. And in the phy_id
case add the error code for completeness.

Fixes: 9e42f715264f ("drivers: net: cpsw: add phy-handle parsing")
Signed-off-by: David Rivshin 
---
I would suggest this for -stable. It should apply cleanly as far back
as 4.5, although there is a trivial conflict in 4.4. I can produce a
separate patch against linux-4.4.y if preferred.

Changes since v2:
- new patch, although fixing part of previous patch 2 [1]

Changes since v1 [2]:
- Rebased (no conflicts)
- Added Tested-by from Nicolas Chauvet
- Added Acked-by from Rob Herring for the binding change

[1] http://patchwork.ozlabs.org/patch/613260/
[2] http://patchwork.ozlabs.org/patch/560324/


 drivers/net/ethernet/ti/cpsw.c | 37 +++--
 1 file changed, 23 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index ce0b0ca..5903448 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -1143,33 +1143,42 @@ static void cpsw_slave_open(struct cpsw_slave *slave, 
struct cpsw_priv *priv)
 
if (priv->data.dual_emac)
cpsw_add_dual_emac_def_ale_entries(priv, slave, slave_port);
else
cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast,
   1 << slave_port, 0, 0, ALE_MCAST_FWD_2);
 
-   if (slave->data->phy_node)
+   if (slave->data->phy_node) {
slave->phy = of_phy_connect(priv->ndev, slave->data->phy_node,
 _adjust_link, 0, slave->data->phy_if);
-   else
+   if (!slave->phy) {
+   dev_err(priv->dev, "phy \"%s\" not found on slave %d\n",
+   slave->data->phy_node->full_name,
+   slave->slave_num);
+   return;
+   }
+   } else {
slave->phy = phy_connect(priv->ndev, slave->data->phy_id,
 _adjust_link, slave->data->phy_if);
-   if (IS_ERR(slave->phy)) {
-   dev_err(priv->dev, "phy %s not found on slave %d\n",
-   slave->data->phy_id, slave->slave_num);
-   slave->phy = NULL;
-   } else {
-   phy_attached_info(slave->phy);
-
-   phy_start(slave->phy);
-
-   /* Configure GMII_SEL register */
-   cpsw_phy_sel(>pdev->dev, slave->phy->interface,
-slave->slave_num);
+   if (IS_ERR(slave->phy)) {
+   dev_err(priv->dev,
+   "phy \"%s\" not found on slave %d, err %ld\n",
+   slave->data->phy_id, slave->slave_num,
+   PTR_ERR(slave->phy));
+   slave->phy = NULL;
+   return;
+   }
}
+
+   phy_attached_info(slave->phy);
+
+   phy_start(slave->phy);
+
+   /* Configure GMII_SEL register */
+   cpsw_phy_sel(>pdev->dev, slave->phy->interface, slave->slave_num);
 }
 
 static inline void cpsw_add_default_vlan(struct cpsw_priv *priv)
 {
const int vlan = priv->data.default_vlan;
const int port = priv->host_port;
u32 reg;
-- 
2.5.5



[PATCH net v3 1/5] drivers: net: cpsw: fix parsing of phy-handle DT property in dual_emac config

2016-04-27 Thread David Rivshin (Allworx)
From: David Rivshin 

Commit 9e42f715264ff158478fa30eaed847f6e131366b ("drivers: net: cpsw: add
phy-handle parsing") saved the "phy-handle" phandle into a new cpsw_priv
field. However, phy connections are per-slave, so the phy_node field should
be in cpsw_slave_data rather than cpsw_priv.

This would go unnoticed in a single emac configuration. But in dual_emac
mode, the last "phy-handle" property parsed for either slave would be used
by both of them, causing them both to refer to the same phy_device.

Fixes: 9e42f715264f ("drivers: net: cpsw: add phy-handle parsing")
Signed-off-by: David Rivshin 
Tested-by: Nicolas Chauvet 
Tested-by: Andrew Goodbody 
Reviewed-by: Mugunthan V N 
Reviewed-by: Grygorii Strashko 
---
I would suggest this for -stable. It should apply cleanly as far back
as 4.4.

Changes since v2 [1]:
- Added Tested-by from Andrew Goodbody [3]
- Added Reviewed-by from Mugunthan V N [4]
- Added Reviewed-by from Grygorii Strashko [5]

Changes since v1 [2]:
- Rebased (no conflicts)
- Added Tested-by from Nicolas Chauvet

[1] http://patchwork.ozlabs.org/patch/613237/
[2] http://patchwork.ozlabs.org/patch/560326/
[3] https://lkml.org/lkml/2016/4/22/537
[4] https://lkml.org/lkml/2016/4/22/63
[5] https://lkml.org/lkml/2016/4/22/496


 drivers/net/ethernet/ti/cpsw.c | 13 ++---
 drivers/net/ethernet/ti/cpsw.h |  1 +
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index bbb77cd..ce0b0ca 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -363,15 +363,14 @@ static inline void slave_write(struct cpsw_slave *slave, 
u32 val, u32 offset)
__raw_writel(val, slave->regs + offset);
 }
 
 struct cpsw_priv {
spinlock_t  lock;
struct platform_device  *pdev;
struct net_device   *ndev;
-   struct device_node  *phy_node;
struct napi_struct  napi_rx;
struct napi_struct  napi_tx;
struct device   *dev;
struct cpsw_platform_data   data;
struct cpsw_ss_regs __iomem *regs;
struct cpsw_wr_regs __iomem *wr_regs;
u8 __iomem  *hw_stats;
@@ -1144,16 +1143,16 @@ static void cpsw_slave_open(struct cpsw_slave *slave, 
struct cpsw_priv *priv)
 
if (priv->data.dual_emac)
cpsw_add_dual_emac_def_ale_entries(priv, slave, slave_port);
else
cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast,
   1 << slave_port, 0, 0, ALE_MCAST_FWD_2);
 
-   if (priv->phy_node)
-   slave->phy = of_phy_connect(priv->ndev, priv->phy_node,
+   if (slave->data->phy_node)
+   slave->phy = of_phy_connect(priv->ndev, slave->data->phy_node,
 _adjust_link, 0, slave->data->phy_if);
else
slave->phy = phy_connect(priv->ndev, slave->data->phy_id,
 _adjust_link, slave->data->phy_if);
if (IS_ERR(slave->phy)) {
dev_err(priv->dev, "phy %s not found on slave %d\n",
slave->data->phy_id, slave->slave_num);
@@ -1936,20 +1935,19 @@ static void cpsw_slave_init(struct cpsw_slave *slave, 
struct cpsw_priv *priv,
 
slave->data = data;
slave->regs = regs + slave_reg_ofs;
slave->sliver   = regs + sliver_reg_ofs;
slave->port_vlan = data->dual_emac_res_vlan;
 }
 
-static int cpsw_probe_dt(struct cpsw_priv *priv,
+static int cpsw_probe_dt(struct cpsw_platform_data *data,
 struct platform_device *pdev)
 {
struct device_node *node = pdev->dev.of_node;
struct device_node *slave_node;
-   struct cpsw_platform_data *data = >data;
int i = 0, ret;
u32 prop;
 
if (!node)
return -EINVAL;
 
if (of_property_read_u32(node, "slaves", )) {
@@ -2029,15 +2027,16 @@ static int cpsw_probe_dt(struct cpsw_priv *priv,
int lenp;
const __be32 *parp;
 
/* This is no slave child node, continue */
if (strcmp(slave_node->name, "slave"))
continue;
 
-   priv->phy_node = of_parse_phandle(slave_node, "phy-handle", 0);
+   slave_data->phy_node = of_parse_phandle(slave_node,
+   "phy-handle", 0);
parp = of_get_property(slave_node, "phy_id", );
if (of_phy_is_fixed_link(slave_node)) {
struct device_node *phy_node;
struct phy_device *phy_dev;
 
/* In the case of a fixed PHY, the DT node associated
 

[PATCH net v3 0/5] drivers: net: cpsw: phy-handle fixes

2016-04-27 Thread David Rivshin (Allworx)
From: David Rivshin 

This series fixes a number of related issues around using phy-handle
properties in cpsw emac nodes.

Patch 1 fixes a bug if more than one slave is used, and either
slave uses the phy-handle property in the devicetree.

Patch 2 fixes a NULL pointer dereference which can occur if a
phy-handle property is used and of_phy_connect() return NULL,
such as with a bad devicetree.

Patch 3 fixes an issue where the phy-mode property would be ignored
if a phy-handle property was used. This also fixes a bogus error
message that would be emitted.

Patch 4 fixes makes the binding documentation more explicit that
exactly one PHY property should be used, and also marks phy_id as
deprecated.

Patch 5 cleans up the fixed-link case to work like the now-fixed
phy-handle case.

I have tested on the following hardware configurations:
 - (EVMSK) dual emac, phy_id property in both slaves
 - (EVMSK) dual emac, phy-handle property in both slaves
 - (EVMSK) a bad phy-handle property pointing to 
 - (EVMSK) phy_id property with incorrect PHY address
 - (BeagleBoneBlack) single emac, phy_id property
 - (custom) single emac, fixed-link subnode

Andrew Goodbody reported testing v2 on a board that doesn't use
dual_emac mode, but with 2 PHYs using phy-handle properties [1].

Nicolas Chauvet reported testing v2 on an HP t410 (dm8148).

Markus Brunner reported testing v1 on the following [2]:
 - emac0 with phy_id and emac1 with fixed phy
 - emac0 with phy-handle and emac1 with fixed phy
 - emac0 with fixed phy and emac1 with fixed phy

[1] https://lkml.org/lkml/2016/4/22/537
[2] http://www.spinics.net/lists/netdev/msg357890.html

David Rivshin (5):
  drivers: net: cpsw: fix parsing of phy-handle DT property in dual_emac
config
  drivers: net: cpsw: fix segfault in case of bad phy-handle
  drivers: net: cpsw: don't ignore phy-mode if phy-handle is used
  dt: cpsw: phy-handle, phy_id, and fixed-link are mutually exclusive
  drivers: net: cpsw: use of_phy_connect() in fixed-link case

 Documentation/devicetree/bindings/net/cpsw.txt |  6 +--
 drivers/net/ethernet/ti/cpsw.c | 69 ++
 drivers/net/ethernet/ti/cpsw.h |  1 +
 3 files changed, 41 insertions(+), 35 deletions(-)

-- 
2.5.5



Re: [PATCH 3.2 085/115] veth: don’t modify ip_summed; doing so treats packets with bad checksums as good.

2016-04-27 Thread Ben Greear

On 04/27/2016 05:00 PM, Hannes Frederic Sowa wrote:

Hi Ben,

On Wed, Apr 27, 2016, at 20:07, Ben Hutchings wrote:

On Wed, 2016-04-27 at 08:59 -0700, Ben Greear wrote:

On 04/26/2016 04:02 PM, Ben Hutchings wrote:


3.2.80-rc1 review patch.  If anyone has any objections, please let me know.

I would be careful about this.  It causes regressions when sending
PACKET_SOCKET buffers from user-space to veth devices.

There was a proposed upstream fix for the regression, but it has not gone
into the tree as far as I know.

http://www.spinics.net/lists/netdev/msg370436.html

[...]

OK, I'll drop this for now.


The fall out from not having this patch is in my opinion a bigger
fallout than not having this patch. This patch fixes silent data
corruption vs. the problem Ben Greear is talking about, which might not
be that a common usage.

What do others think?

Bye,
Hannes



This patch from Cong Wang seems to fix the regression for me, I think it should 
be added and
tested in the main tree, and then apply them to stable as a pair.

http://dmz2.candelatech.com/?p=linux-4.4.dev.y/.git;a=commitdiff;h=8153e983c0e5eba1aafe1fc296248ed2a553f1ac;hp=454b07405d694dad52e7f41af5816eed0190da8a



diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index da1ae0e..f8cc758 100644 (file)
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1926,6 +1926,7 @@ retry:
goto out_unlock;
}

+   skb->ip_summed = CHECKSUM_UNNECESSARY;
skb->protocol = proto;
skb->dev = dev;
skb->priority = sk->sk_priority;
@@ -2352,6 +2353,7 @@ static int tpacket_fill_skb(struct packet_sock *po, 
struct sk_buff *skb,

ph.raw = frame;

+   skb->ip_summed = CHECKSUM_UNNECESSARY;
skb->protocol = proto;
skb->dev = dev;
skb->priority = po->sk.sk_priority;
@@ -2776,6 +2778,7 @@ static int packet_snd(struct socket *sock, struct msghdr 
*msg, size_t len)
goto out_free;
}

+   skb->ip_summed = CHECKSUM_UNNECESSARY;
skb->protocol = proto;
skb->dev = dev;
skb->priority = sk->sk_priority;

Thanks,
Ben

--
Ben Greear 
Candela Technologies Inc  http://www.candelatech.com



Re: [PATCH 3.2 085/115] veth: don’t modify ip_summed; doing so treats packets with bad checksums as good.

2016-04-27 Thread Hannes Frederic Sowa
Hi Ben,

On Wed, Apr 27, 2016, at 20:07, Ben Hutchings wrote:
> On Wed, 2016-04-27 at 08:59 -0700, Ben Greear wrote:
> > On 04/26/2016 04:02 PM, Ben Hutchings wrote:
> > > 
> > > 3.2.80-rc1 review patch.  If anyone has any objections, please let me 
> > > know.
> > I would be careful about this.  It causes regressions when sending
> > PACKET_SOCKET buffers from user-space to veth devices.
> > 
> > There was a proposed upstream fix for the regression, but it has not gone
> > into the tree as far as I know.
> > 
> > http://www.spinics.net/lists/netdev/msg370436.html
> [...]
> 
> OK, I'll drop this for now.

The fall out from not having this patch is in my opinion a bigger
fallout than not having this patch. This patch fixes silent data
corruption vs. the problem Ben Greear is talking about, which might not
be that a common usage.

What do others think?

Bye,
Hannes


[PATCH net-next 13/17] net: rename NET_{ADD|INC}_STATS_BH()

2016-04-27 Thread Eric Dumazet
Rename NET_INC_STATS_BH() to __NET_INC_STATS()
and NET_ADD_STATS_BH() to __NET_ADD_STATS()

Signed-off-by: Eric Dumazet 
---
 include/net/ip.h  |   4 +-
 include/net/tcp.h |   4 +-
 net/core/dev.c|   4 +-
 net/dccp/ipv4.c   |  10 ++---
 net/dccp/ipv6.c   |   8 ++--
 net/dccp/timer.c  |   4 +-
 net/ipv4/arp.c|   2 +-
 net/ipv4/inet_hashtables.c|   2 +-
 net/ipv4/inet_timewait_sock.c |   4 +-
 net/ipv4/ip_input.c   |   2 +-
 net/ipv4/syncookies.c |   4 +-
 net/ipv4/tcp.c|   4 +-
 net/ipv4/tcp_cdg.c|  20 -
 net/ipv4/tcp_cubic.c  |  20 -
 net/ipv4/tcp_fastopen.c   |  14 +++---
 net/ipv4/tcp_input.c  | 100 ++
 net/ipv4/tcp_ipv4.c   |  22 +-
 net/ipv4/tcp_minisocks.c  |  10 ++---
 net/ipv4/tcp_output.c |  14 +++---
 net/ipv4/tcp_recovery.c   |   4 +-
 net/ipv4/tcp_timer.c  |  22 +-
 net/ipv6/inet6_hashtables.c   |   2 +-
 net/ipv6/syncookies.c |   4 +-
 net/ipv6/tcp_ipv6.c   |  16 +++
 net/sctp/input.c  |   2 +-
 25 files changed, 153 insertions(+), 149 deletions(-)

diff --git a/include/net/ip.h b/include/net/ip.h
index 55f5de50a564..fb3b766ca1c7 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -193,9 +193,9 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff 
*skb,
 #define IP_UPD_PO_STATS(net, field, val) 
SNMP_UPD_PO_STATS64((net)->mib.ip_statistics, field, val)
 #define __IP_UPD_PO_STATS(net, field, val) 
SNMP_UPD_PO_STATS64_BH((net)->mib.ip_statistics, field, val)
 #define NET_INC_STATS(net, field)  
SNMP_INC_STATS((net)->mib.net_statistics, field)
-#define NET_INC_STATS_BH(net, field)   
SNMP_INC_STATS_BH((net)->mib.net_statistics, field)
+#define __NET_INC_STATS(net, field)
SNMP_INC_STATS_BH((net)->mib.net_statistics, field)
 #define NET_ADD_STATS(net, field, adnd)
SNMP_ADD_STATS((net)->mib.net_statistics, field, adnd)
-#define NET_ADD_STATS_BH(net, field, adnd) 
SNMP_ADD_STATS_BH((net)->mib.net_statistics, field, adnd)
+#define __NET_ADD_STATS(net, field, adnd) 
SNMP_ADD_STATS_BH((net)->mib.net_statistics, field, adnd)
 
 u64 snmp_get_cpu_field(void __percpu *mib, int cpu, int offct);
 unsigned long snmp_fold_field(void __percpu *mib, int offt);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 939ebd5320a9..ff8b4265cb2b 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1743,7 +1743,7 @@ static inline __u32 cookie_init_sequence(const struct 
tcp_request_sock_ops *ops,
 __u16 *mss)
 {
tcp_synq_overflow(sk);
-   NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
+   __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
return ops->cookie_init_seq(skb, mss);
 }
 #else
@@ -1852,7 +1852,7 @@ static inline void tcp_segs_in(struct tcp_sock *tp, const 
struct sk_buff *skb)
 static inline void tcp_listendrop(const struct sock *sk)
 {
atomic_inc(&((struct sock *)sk)->sk_drops);
-   NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
+   __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS);
 }
 
 #endif /* _TCP_H */
diff --git a/net/core/dev.c b/net/core/dev.c
index 6324bc9267f7..e96a3bc2c634 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4982,8 +4982,8 @@ bool sk_busy_loop(struct sock *sk, int nonblock)
netpoll_poll_unlock(have);
}
if (rc > 0)
-   NET_ADD_STATS_BH(sock_net(sk),
-LINUX_MIB_BUSYPOLLRXPACKETS, rc);
+   __NET_ADD_STATS(sock_net(sk),
+   LINUX_MIB_BUSYPOLLRXPACKETS, rc);
local_bh_enable();
 
if (rc == LL_FLUSH_FAILED)
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index a9c75e79ba99..a8164272e0f4 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -205,7 +205,7 @@ void dccp_req_err(struct sock *sk, u64 seq)
 * socket here.
 */
if (!between48(seq, dccp_rsk(req)->dreq_iss, dccp_rsk(req)->dreq_gss)) {
-   NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
+   __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
} else {
/*
 * Still in RESPOND, just remove it silently.
@@ -273,7 +273,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
 * servers this needs to be solved differently.
 */
if (sock_owned_by_user(sk))
-   NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
+   __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
 
if (sk->sk_state == DCCP_CLOSED)
goto out;
@@ -281,7 +281,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
dp = dccp_sk(sk);
if ((1 << 

[PATCH net-next 11/17] net: rename IP_ADD_STATS_BH()

2016-04-27 Thread Eric Dumazet
Rename IP_ADD_STATS_BH() to __IP_ADD_STATS()

Signed-off-by: Eric Dumazet 
---
 include/net/ip.h  | 2 +-
 net/ipv4/ip_forward.c | 2 +-
 net/ipv4/ip_input.c   | 6 +++---
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/net/ip.h b/include/net/ip.h
index 0be0af3017ba..0df4809bc68a 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -189,7 +189,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff 
*skb,
 #define IP_INC_STATS(net, field)   
SNMP_INC_STATS64((net)->mib.ip_statistics, field)
 #define __IP_INC_STATS(net, field) 
SNMP_INC_STATS64_BH((net)->mib.ip_statistics, field)
 #define IP_ADD_STATS(net, field, val)  
SNMP_ADD_STATS64((net)->mib.ip_statistics, field, val)
-#define IP_ADD_STATS_BH(net, field, val) 
SNMP_ADD_STATS64_BH((net)->mib.ip_statistics, field, val)
+#define __IP_ADD_STATS(net, field, val) 
SNMP_ADD_STATS64_BH((net)->mib.ip_statistics, field, val)
 #define IP_UPD_PO_STATS(net, field, val) 
SNMP_UPD_PO_STATS64((net)->mib.ip_statistics, field, val)
 #define IP_UPD_PO_STATS_BH(net, field, val) 
SNMP_UPD_PO_STATS64_BH((net)->mib.ip_statistics, field, val)
 #define NET_INC_STATS(net, field)  
SNMP_INC_STATS((net)->mib.net_statistics, field)
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 42fbd59b0ba8..cbfb1808fcc4 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -66,7 +66,7 @@ static int ip_forward_finish(struct net *net, struct sock 
*sk, struct sk_buff *s
struct ip_options *opt  = &(IPCB(skb)->opt);
 
__IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS);
-   IP_ADD_STATS_BH(net, IPSTATS_MIB_OUTOCTETS, skb->len);
+   __IP_ADD_STATS(net, IPSTATS_MIB_OUTOCTETS, skb->len);
 
if (unlikely(opt->optlen))
ip_forward_options(skb);
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index cca6729cd6ee..11f34e421270 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -439,9 +439,9 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, 
struct packet_type *pt,
BUILD_BUG_ON(IPSTATS_MIB_ECT1PKTS != IPSTATS_MIB_NOECTPKTS + 
INET_ECN_ECT_1);
BUILD_BUG_ON(IPSTATS_MIB_ECT0PKTS != IPSTATS_MIB_NOECTPKTS + 
INET_ECN_ECT_0);
BUILD_BUG_ON(IPSTATS_MIB_CEPKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_CE);
-   IP_ADD_STATS_BH(net,
-   IPSTATS_MIB_NOECTPKTS + (iph->tos & INET_ECN_MASK),
-   max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs));
+   __IP_ADD_STATS(net,
+  IPSTATS_MIB_NOECTPKTS + (iph->tos & INET_ECN_MASK),
+  max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs));
 
if (!pskb_may_pull(skb, iph->ihl*4))
goto inhdr_error;
-- 
2.8.0.rc3.226.g39d4020



[PATCH net-next 17/17] net: snmp: kill STATS_BH macros

2016-04-27 Thread Eric Dumazet
There is nothing related to BH in SNMP counters anymore,
since linux-3.0.

Rename helpers to use __ prefix instead of _BH prefix,
for contexts where preemption is disabled.

This more closely matches convention used to update
percpu variables.

Signed-off-by: Eric Dumazet 
---
 include/net/icmp.h  |  2 +-
 include/net/ip.h| 10 +-
 include/net/ipv6.h  | 36 ++--
 include/net/sctp/sctp.h |  6 +++---
 include/net/snmp.h  | 24 
 include/net/tcp.h   |  2 +-
 include/net/udp.h   |  8 
 net/dccp/dccp.h |  2 +-
 8 files changed, 45 insertions(+), 45 deletions(-)

diff --git a/include/net/icmp.h b/include/net/icmp.h
index 25edb740c648..3ef2743a8eec 100644
--- a/include/net/icmp.h
+++ b/include/net/icmp.h
@@ -30,7 +30,7 @@ struct icmp_err {
 
 extern const struct icmp_err icmp_err_convert[];
 #define ICMP_INC_STATS(net, field) 
SNMP_INC_STATS((net)->mib.icmp_statistics, field)
-#define __ICMP_INC_STATS(net, field)   
SNMP_INC_STATS_BH((net)->mib.icmp_statistics, field)
+#define __ICMP_INC_STATS(net, field)   
__SNMP_INC_STATS((net)->mib.icmp_statistics, field)
 #define ICMPMSGOUT_INC_STATS(net, field)   
SNMP_INC_STATS_ATOMIC_LONG((net)->mib.icmpmsg_statistics, field+256)
 #define ICMPMSGIN_INC_STATS(net, field)
SNMP_INC_STATS_ATOMIC_LONG((net)->mib.icmpmsg_statistics, field)
 
diff --git a/include/net/ip.h b/include/net/ip.h
index fb3b766ca1c7..247ac82e9cf2 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -187,15 +187,15 @@ void ip_send_unicast_reply(struct sock *sk, struct 
sk_buff *skb,
   unsigned int len);
 
 #define IP_INC_STATS(net, field)   
SNMP_INC_STATS64((net)->mib.ip_statistics, field)
-#define __IP_INC_STATS(net, field) 
SNMP_INC_STATS64_BH((net)->mib.ip_statistics, field)
+#define __IP_INC_STATS(net, field) 
__SNMP_INC_STATS64((net)->mib.ip_statistics, field)
 #define IP_ADD_STATS(net, field, val)  
SNMP_ADD_STATS64((net)->mib.ip_statistics, field, val)
-#define __IP_ADD_STATS(net, field, val) 
SNMP_ADD_STATS64_BH((net)->mib.ip_statistics, field, val)
+#define __IP_ADD_STATS(net, field, val) 
__SNMP_ADD_STATS64((net)->mib.ip_statistics, field, val)
 #define IP_UPD_PO_STATS(net, field, val) 
SNMP_UPD_PO_STATS64((net)->mib.ip_statistics, field, val)
-#define __IP_UPD_PO_STATS(net, field, val) 
SNMP_UPD_PO_STATS64_BH((net)->mib.ip_statistics, field, val)
+#define __IP_UPD_PO_STATS(net, field, val) 
__SNMP_UPD_PO_STATS64((net)->mib.ip_statistics, field, val)
 #define NET_INC_STATS(net, field)  
SNMP_INC_STATS((net)->mib.net_statistics, field)
-#define __NET_INC_STATS(net, field)
SNMP_INC_STATS_BH((net)->mib.net_statistics, field)
+#define __NET_INC_STATS(net, field)
__SNMP_INC_STATS((net)->mib.net_statistics, field)
 #define NET_ADD_STATS(net, field, adnd)
SNMP_ADD_STATS((net)->mib.net_statistics, field, adnd)
-#define __NET_ADD_STATS(net, field, adnd) 
SNMP_ADD_STATS_BH((net)->mib.net_statistics, field, adnd)
+#define __NET_ADD_STATS(net, field, adnd) 
__SNMP_ADD_STATS((net)->mib.net_statistics, field, adnd)
 
 u64 snmp_get_cpu_field(void __percpu *mib, int cpu, int offct);
 unsigned long snmp_fold_field(void __percpu *mib, int offt);
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 64ce3670d40a..415213da5be3 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -121,21 +121,21 @@ struct frag_hdr {
 extern int sysctl_mld_max_msf;
 extern int sysctl_mld_qrv;
 
-#define _DEVINC(net, statname, modifier, idev, field)  \
+#define _DEVINC(net, statname, mod, idev, field)   \
 ({ \
struct inet6_dev *_idev = (idev);   \
if (likely(_idev != NULL))  \
-   SNMP_INC_STATS##modifier((_idev)->stats.statname, (field)); \
-   SNMP_INC_STATS##modifier((net)->mib.statname##_statistics, (field));\
+   mod##SNMP_INC_STATS64((_idev)->stats.statname, (field));\
+   mod##SNMP_INC_STATS64((net)->mib.statname##_statistics, (field));\
 })
 
 /* per device counters are atomic_long_t */
-#define _DEVINCATOMIC(net, statname, modifier, idev, field)\
+#define _DEVINCATOMIC(net, statname, mod, idev, field) \
 ({ \
struct inet6_dev *_idev = (idev);   \
if (likely(_idev != NULL))  \
SNMP_INC_STATS_ATOMIC_LONG((_idev)->stats.statname##dev, 
(field)); \
-   SNMP_INC_STATS##modifier((net)->mib.statname##_statistics, (field));\
+   mod##SNMP_INC_STATS((net)->mib.statname##_statistics, (field));\
 })
 
 /* per device and per net counters are atomic_long_t */
@@ -147,40 +147,40 @@ extern int sysctl_mld_qrv;
 

[PATCH net-next 10/17] net: rename ICMP6_INC_STATS_BH()

2016-04-27 Thread Eric Dumazet
Rename ICMP6_INC_STATS_BH() to __ICMP6_INC_STATS()

Signed-off-by: Eric Dumazet 
---
 include/net/ipv6.h  |  2 +-
 net/dccp/ipv6.c |  8 
 net/ipv6/icmp.c | 10 +-
 net/ipv6/tcp_ipv6.c |  4 ++--
 net/ipv6/udp.c  |  4 ++--
 net/sctp/ipv6.c |  2 +-
 6 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index e93e947d04ff..a620fc56e2f5 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -179,7 +179,7 @@ extern int sysctl_mld_qrv;
_DEVUPD(net, ipv6, 64_BH, idev, field, val)
 #define ICMP6_INC_STATS(net, idev, field)  \
_DEVINCATOMIC(net, icmpv6, , idev, field)
-#define ICMP6_INC_STATS_BH(net, idev, field)   \
+#define __ICMP6_INC_STATS(net, idev, field)\
_DEVINCATOMIC(net, icmpv6, _BH, idev, field)
 
 #define ICMP6MSGOUT_INC_STATS(net, idev, field)\
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index e175b8fe1a87..323c6b595e31 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -80,8 +80,8 @@ static void dccp_v6_err(struct sk_buff *skb, struct 
inet6_skb_parm *opt,
 
if (skb->len < offset + sizeof(*dh) ||
skb->len < offset + __dccp_basic_hdr_len(dh)) {
-   ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev),
-  ICMP6_MIB_INERRORS);
+   __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
+ ICMP6_MIB_INERRORS);
return;
}
 
@@ -91,8 +91,8 @@ static void dccp_v6_err(struct sk_buff *skb, struct 
inet6_skb_parm *opt,
inet6_iif(skb));
 
if (!sk) {
-   ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev),
-  ICMP6_MIB_INERRORS);
+   __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
+ ICMP6_MIB_INERRORS);
return;
}
 
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 6b573ebe49de..823a1fc576e3 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -622,7 +622,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
np->dontfrag, _unused);
 
if (err) {
-   ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
+   __ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
ip6_flush_pending_frames(sk);
} else {
err = icmpv6_push_pending_frames(sk, , _hdr,
@@ -674,7 +674,7 @@ void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, 
__be32 info)
return;
 
 out:
-   ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
+   __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
 }
 
 /*
@@ -710,7 +710,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
skb_set_network_header(skb, nh);
}
 
-   ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INMSGS);
+   __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS);
 
saddr = _hdr(skb)->saddr;
daddr = _hdr(skb)->daddr;
@@ -812,9 +812,9 @@ static int icmpv6_rcv(struct sk_buff *skb)
return 0;
 
 csum_error:
-   ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
+   __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
 discard_it:
-   ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INERRORS);
+   __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
 drop_no_count:
kfree_skb(skb);
return 0;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 52ca8fac7429..78c45c027acc 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -336,8 +336,8 @@ static void tcp_v6_err(struct sk_buff *skb, struct 
inet6_skb_parm *opt,
skb->dev->ifindex);
 
if (!sk) {
-   ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev),
-  ICMP6_MIB_INERRORS);
+   __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
+ ICMP6_MIB_INERRORS);
return;
}
 
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 1243d22e2b1d..1ba5a74ac18f 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -521,8 +521,8 @@ void __udp6_lib_err(struct sk_buff *skb, struct 
inet6_skb_parm *opt,
sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source,
   inet6_iif(skb), udptable, skb);
if (!sk) {
-   ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev),
-  ICMP6_MIB_INERRORS);
+   __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
+ ICMP6_MIB_INERRORS);
return;
}
 
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index ce46f1c7f133..0657d18a85bf 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -162,7 +162,7 @@ static void 

[PATCH net-next 06/17] net: tcp: rename TCP_INC_STATS_BH

2016-04-27 Thread Eric Dumazet
Rename TCP_INC_STATS_BH() to __TCP_INC_STATS()

Signed-off-by: Eric Dumazet 
---
 include/net/tcp.h|  2 +-
 net/ipv4/tcp.c   |  2 +-
 net/ipv4/tcp_input.c |  8 
 net/ipv4/tcp_ipv4.c  | 16 
 net/ipv4/tcp_minisocks.c |  4 ++--
 net/ipv4/tcp_output.c|  4 ++--
 net/ipv6/tcp_ipv6.c  | 14 +++---
 7 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index cfe15f712164..939ebd5320a9 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -332,7 +332,7 @@ bool tcp_check_oom(struct sock *sk, int shift);
 extern struct proto tcp_prot;
 
 #define TCP_INC_STATS(net, field)  
SNMP_INC_STATS((net)->mib.tcp_statistics, field)
-#define TCP_INC_STATS_BH(net, field)   
SNMP_INC_STATS_BH((net)->mib.tcp_statistics, field)
+#define __TCP_INC_STATS(net, field)
SNMP_INC_STATS_BH((net)->mib.tcp_statistics, field)
 #define TCP_DEC_STATS(net, field)  
SNMP_DEC_STATS((net)->mib.tcp_statistics, field)
 #define TCP_ADD_STATS(net, field, val) 
SNMP_ADD_STATS((net)->mib.tcp_statistics, field, val)
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 55ef55ac9e38..96833433c2c3 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3091,7 +3091,7 @@ void tcp_done(struct sock *sk)
struct request_sock *req = tcp_sk(sk)->fastopen_rsk;
 
if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV)
-   TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
+   __TCP_INC_STATS(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
 
tcp_set_state(sk, TCP_CLOSE);
tcp_clear_xmit_timers(sk);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 967520dbe0bf..dad8d93262ed 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5233,7 +5233,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct 
sk_buff *skb,
if (th->syn) {
 syn_challenge:
if (syn_inerr)
-   TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
+   __TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNCHALLENGE);
tcp_send_challenge_ack(sk, skb);
goto discard;
@@ -5349,7 +5349,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff 
*skb,
tcp_data_snd_check(sk);
return;
} else { /* Header too small */
-   TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
+   __TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
goto discard;
}
} else {
@@ -5456,8 +5456,8 @@ step5:
return;
 
 csum_error:
-   TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
-   TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
+   __TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
+   __TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
 
 discard:
tcp_drop(sk, skb);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index ebd8f3b9e61b..378e92d41c6c 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -697,8 +697,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct 
sk_buff *skb)
  ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
  , arg.iov[0].iov_len);
 
-   TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
-   TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
+   __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
+   __TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
 
 #ifdef CONFIG_TCP_MD5SIG
 out:
@@ -779,7 +779,7 @@ static void tcp_v4_send_ack(struct net *net,
  ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
  , arg.iov[0].iov_len);
 
-   TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
+   __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
 }
 
 static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
@@ -1432,8 +1432,8 @@ discard:
return 0;
 
 csum_err:
-   TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
-   TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
+   __TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
+   __TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
goto discard;
 }
 EXPORT_SYMBOL(tcp_v4_do_rcv);
@@ -1547,7 +1547,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
goto discard_it;
 
/* Count it even if it's bad */
-   TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
+   __TCP_INC_STATS(net, TCP_MIB_INSEGS);
 
if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
goto discard_it;
@@ -1679,9 +1679,9 @@ no_tcp_socket:
 
if (tcp_checksum_complete(skb)) {
 csum_error:
-   TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS);
+   __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
 bad_packet:
-   TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
+   

[PATCH net-next 02/17] dccp: rename DCCP_INC_STATS_BH()

2016-04-27 Thread Eric Dumazet
Rename DCCP_INC_STATS_BH() to __DCCP_INC_STATS()

Signed-off-by: Eric Dumazet 
---
 net/dccp/dccp.h  | 6 +++---
 net/dccp/input.c | 2 +-
 net/dccp/ipv4.c  | 8 
 net/dccp/ipv6.c  | 8 
 net/dccp/minisocks.c | 2 +-
 net/dccp/options.c   | 2 +-
 net/dccp/timer.c | 4 ++--
 7 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index b0e28d24e1a7..a4c6e2fed91c 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -198,9 +198,9 @@ struct dccp_mib {
 };
 
 DECLARE_SNMP_STAT(struct dccp_mib, dccp_statistics);
-#define DCCP_INC_STATS(field)  SNMP_INC_STATS(dccp_statistics, field)
-#define DCCP_INC_STATS_BH(field)SNMP_INC_STATS_BH(dccp_statistics, field)
-#define DCCP_DEC_STATS(field)  SNMP_DEC_STATS(dccp_statistics, field)
+#define DCCP_INC_STATS(field)  SNMP_INC_STATS(dccp_statistics, field)
+#define __DCCP_INC_STATS(field)SNMP_INC_STATS_BH(dccp_statistics, 
field)
+#define DCCP_DEC_STATS(field)  SNMP_DEC_STATS(dccp_statistics, field)
 
 /*
  * Checksumming routines
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 3bd14e885396..2437ecc13b82 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -359,7 +359,7 @@ send_sync:
goto discard;
}
 
-   DCCP_INC_STATS_BH(DCCP_MIB_INERRS);
+   __DCCP_INC_STATS(DCCP_MIB_INERRS);
 discard:
__kfree_skb(skb);
return 0;
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index f6d183f8f332..4b78067669d6 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -318,7 +318,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
case DCCP_REQUESTING:
case DCCP_RESPOND:
if (!sock_owned_by_user(sk)) {
-   DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS);
+   __DCCP_INC_STATS(DCCP_MIB_ATTEMPTFAILS);
sk->sk_err = err;
 
sk->sk_error_report(sk);
@@ -533,8 +533,8 @@ static void dccp_v4_ctl_send_reset(const struct sock *sk, 
struct sk_buff *rxskb)
bh_unlock_sock(ctl_sk);
 
if (net_xmit_eval(err) == 0) {
-   DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS);
-   DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS);
+   __DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
+   __DCCP_INC_STATS(DCCP_MIB_OUTRSTS);
}
 out:
 dst_release(dst);
@@ -637,7 +637,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff 
*skb)
 drop_and_free:
reqsk_free(req);
 drop:
-   DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS);
+   __DCCP_INC_STATS(DCCP_MIB_ATTEMPTFAILS);
return -1;
 }
 EXPORT_SYMBOL_GPL(dccp_v4_conn_request);
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 8ceb3cebcad4..e175b8fe1a87 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -156,7 +156,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct 
inet6_skb_parm *opt,
case DCCP_RESPOND:  /* Cannot happen.
   It can, it SYNs are crossed. --ANK */
if (!sock_owned_by_user(sk)) {
-   DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS);
+   __DCCP_INC_STATS(DCCP_MIB_ATTEMPTFAILS);
sk->sk_err = err;
/*
 * Wake people up to see the error
@@ -277,8 +277,8 @@ static void dccp_v6_ctl_send_reset(const struct sock *sk, 
struct sk_buff *rxskb)
if (!IS_ERR(dst)) {
skb_dst_set(skb, dst);
ip6_xmit(ctl_sk, skb, , NULL, 0);
-   DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS);
-   DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS);
+   __DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
+   __DCCP_INC_STATS(DCCP_MIB_OUTRSTS);
return;
}
 
@@ -378,7 +378,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct 
sk_buff *skb)
 drop_and_free:
reqsk_free(req);
 drop:
-   DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS);
+   __DCCP_INC_STATS(DCCP_MIB_ATTEMPTFAILS);
return -1;
 }
 
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 1994f8af646b..53eddf99e4f6 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -127,7 +127,7 @@ struct sock *dccp_create_openreq_child(const struct sock 
*sk,
}
dccp_init_xmit_timers(newsk);
 
-   DCCP_INC_STATS_BH(DCCP_MIB_PASSIVEOPENS);
+   __DCCP_INC_STATS(DCCP_MIB_PASSIVEOPENS);
}
return newsk;
 }
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 9bce31886bda..b82b7ee9a1d2 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -253,7 +253,7 @@ out_nonsensical_length:
return 0;
 
 out_invalid_option:
-   DCCP_INC_STATS_BH(DCCP_MIB_INVALIDOPT);
+   __DCCP_INC_STATS(DCCP_MIB_INVALIDOPT);
rc = DCCP_RESET_CODE_OPTION_ERROR;
 out_featneg_failed:
DCCP_WARN("DCCP(%p): Option %d (len=%d) error=%u\n", sk, opt, len, rc);
diff 

[PATCH net-next 07/17] net: icmp: rename ICMPMSGIN_INC_STATS_BH()

2016-04-27 Thread Eric Dumazet
Remove misleading _BH suffix.

Signed-off-by: Eric Dumazet 
---
 include/net/icmp.h | 2 +-
 net/ipv4/icmp.c| 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/net/icmp.h b/include/net/icmp.h
index 5a60ce819078..25edb740c648 100644
--- a/include/net/icmp.h
+++ b/include/net/icmp.h
@@ -32,7 +32,7 @@ extern const struct icmp_err icmp_err_convert[];
 #define ICMP_INC_STATS(net, field) 
SNMP_INC_STATS((net)->mib.icmp_statistics, field)
 #define __ICMP_INC_STATS(net, field)   
SNMP_INC_STATS_BH((net)->mib.icmp_statistics, field)
 #define ICMPMSGOUT_INC_STATS(net, field)   
SNMP_INC_STATS_ATOMIC_LONG((net)->mib.icmpmsg_statistics, field+256)
-#define ICMPMSGIN_INC_STATS_BH(net, field) 
SNMP_INC_STATS_ATOMIC_LONG((net)->mib.icmpmsg_statistics, field)
+#define ICMPMSGIN_INC_STATS(net, field)
SNMP_INC_STATS_ATOMIC_LONG((net)->mib.icmpmsg_statistics, field)
 
 struct dst_entry;
 struct net_proto_family;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 995fef9c5099..38abe70e595f 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -1006,7 +1006,7 @@ int icmp_rcv(struct sk_buff *skb)
 
icmph = icmp_hdr(skb);
 
-   ICMPMSGIN_INC_STATS_BH(net, icmph->type);
+   ICMPMSGIN_INC_STATS(net, icmph->type);
/*
 *  18 is the highest 'known' ICMP type. Anything else is a mystery
 *
-- 
2.8.0.rc3.226.g39d4020



[PATCH net-next 16/17] ipv6: kill ICMP6MSGIN_INC_STATS_BH()

2016-04-27 Thread Eric Dumazet
IPv6 ICMP stats are atomics anyway.

Signed-off-by: Eric Dumazet 
---
 include/net/ipv6.h | 4 +---
 net/ipv6/icmp.c| 2 +-
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 9f3b53f2819b..64ce3670d40a 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -184,9 +184,7 @@ extern int sysctl_mld_qrv;
 
 #define ICMP6MSGOUT_INC_STATS(net, idev, field)\
_DEVINC_ATOMIC_ATOMIC(net, icmpv6msg, idev, field +256)
-#define ICMP6MSGOUT_INC_STATS_BH(net, idev, field) \
-   _DEVINC_ATOMIC_ATOMIC(net, icmpv6msg, idev, field +256)
-#define ICMP6MSGIN_INC_STATS_BH(net, idev, field)  \
+#define ICMP6MSGIN_INC_STATS(net, idev, field) \
_DEVINC_ATOMIC_ATOMIC(net, icmpv6msg, idev, field)
 
 struct ip6_ra_chain {
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 823a1fc576e3..23b9a4cc418e 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -728,7 +728,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
 
type = hdr->icmp6_type;
 
-   ICMP6MSGIN_INC_STATS_BH(dev_net(dev), idev, type);
+   ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type);
 
switch (type) {
case ICMPV6_ECHO_REQUEST:
-- 
2.8.0.rc3.226.g39d4020



[PATCH net-next 14/17] ipv6: rename IP6_INC_STATS_BH()

2016-04-27 Thread Eric Dumazet
Rename IP6_INC_STATS_BH() to __IP6_INC_STATS()
and IP6_ADD_STATS_BH() to __IP6_ADD_STATS()

Signed-off-by: Eric Dumazet 
---
 include/net/ipv6.h |  4 +--
 net/bridge/br_netfilter_ipv6.c | 10 +++
 net/ipv6/exthdrs.c | 66 +-
 net/ipv6/ip6_input.c   | 28 +-
 net/ipv6/ip6_output.c  | 34 +++---
 net/ipv6/ip6mr.c   |  8 ++---
 net/ipv6/reassembly.c  | 32 ++--
 7 files changed, 91 insertions(+), 91 deletions(-)

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index a620fc56e2f5..aba8760dd108 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -167,11 +167,11 @@ extern int sysctl_mld_qrv;
 
 #define IP6_INC_STATS(net, idev,field) \
_DEVINC(net, ipv6, 64, idev, field)
-#define IP6_INC_STATS_BH(net, idev,field)  \
+#define __IP6_INC_STATS(net, idev,field)   \
_DEVINC(net, ipv6, 64_BH, idev, field)
 #define IP6_ADD_STATS(net, idev,field,val) \
_DEVADD(net, ipv6, 64, idev, field, val)
-#define IP6_ADD_STATS_BH(net, idev,field,val)  \
+#define __IP6_ADD_STATS(net, idev,field,val)   \
_DEVADD(net, ipv6, 64_BH, idev, field, val)
 #define IP6_UPD_PO_STATS(net, idev,field,val)   \
_DEVUPD(net, ipv6, 64, idev, field, val)
diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c
index d61f56efc8dc..5e59a8457e7b 100644
--- a/net/bridge/br_netfilter_ipv6.c
+++ b/net/bridge/br_netfilter_ipv6.c
@@ -122,13 +122,13 @@ int br_validate_ipv6(struct net *net, struct sk_buff *skb)
 
if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) {
if (pkt_len + ip6h_len > skb->len) {
-   IP6_INC_STATS_BH(net, idev,
-IPSTATS_MIB_INTRUNCATEDPKTS);
+   __IP6_INC_STATS(net, idev,
+   IPSTATS_MIB_INTRUNCATEDPKTS);
goto drop;
}
if (pskb_trim_rcsum(skb, pkt_len + ip6h_len)) {
-   IP6_INC_STATS_BH(net, idev,
-IPSTATS_MIB_INDISCARDS);
+   __IP6_INC_STATS(net, idev,
+   IPSTATS_MIB_INDISCARDS);
goto drop;
}
}
@@ -142,7 +142,7 @@ int br_validate_ipv6(struct net *net, struct sk_buff *skb)
return 0;
 
 inhdr_error:
-   IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS);
+   __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
 drop:
return -1;
 }
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index ea7c4d64a00a..8de5dd7aaa05 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -258,8 +258,8 @@ static int ipv6_destopt_rcv(struct sk_buff *skb)
if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) ||
!pskb_may_pull(skb, (skb_transport_offset(skb) +
 ((skb_transport_header(skb)[1] + 1) << 3 {
-   IP6_INC_STATS_BH(dev_net(dst->dev), ip6_dst_idev(dst),
-IPSTATS_MIB_INHDRERRORS);
+   __IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
+   IPSTATS_MIB_INHDRERRORS);
kfree_skb(skb);
return -1;
}
@@ -280,8 +280,8 @@ static int ipv6_destopt_rcv(struct sk_buff *skb)
return 1;
}
 
-   IP6_INC_STATS_BH(dev_net(dst->dev),
-ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
+   __IP6_INC_STATS(dev_net(dst->dev),
+   ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
return -1;
 }
 
@@ -309,8 +309,8 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb)
if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) ||
!pskb_may_pull(skb, (skb_transport_offset(skb) +
 ((skb_transport_header(skb)[1] + 1) << 3 {
-   IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
-IPSTATS_MIB_INHDRERRORS);
+   __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+   IPSTATS_MIB_INHDRERRORS);
kfree_skb(skb);
return -1;
}
@@ -319,8 +319,8 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb)
 
if (ipv6_addr_is_multicast(_hdr(skb)->daddr) ||
skb->pkt_type != PACKET_HOST) {
-   IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
-IPSTATS_MIB_INADDRERRORS);
+   __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+   IPSTATS_MIB_INADDRERRORS);
kfree_skb(skb);
return -1;
}
@@ -334,8 +334,8 @@ looped_back:
 * processed by own
 */

[PATCH net-next 15/17] ipv6: rename IP6_UPD_PO_STATS_BH()

2016-04-27 Thread Eric Dumazet
Rename IP6_UPD_PO_STATS_BH() to __IP6_UPD_PO_STATS()

Signed-off-by: Eric Dumazet 
---
 include/net/ipv6.h   | 2 +-
 net/ipv6/ip6_input.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index aba8760dd108..9f3b53f2819b 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -175,7 +175,7 @@ extern int sysctl_mld_qrv;
_DEVADD(net, ipv6, 64_BH, idev, field, val)
 #define IP6_UPD_PO_STATS(net, idev,field,val)   \
_DEVUPD(net, ipv6, 64, idev, field, val)
-#define IP6_UPD_PO_STATS_BH(net, idev,field,val)   \
+#define __IP6_UPD_PO_STATS(net, idev,field,val)   \
_DEVUPD(net, ipv6, 64_BH, idev, field, val)
 #define ICMP6_INC_STATS(net, idev, field)  \
_DEVINCATOMIC(net, icmpv6, , idev, field)
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 218bb906c620..6ed56012005d 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -78,7 +78,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, 
struct packet_type *pt
 
idev = __in6_dev_get(skb->dev);
 
-   IP6_UPD_PO_STATS_BH(net, idev, IPSTATS_MIB_IN, skb->len);
+   __IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_IN, skb->len);
 
if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL ||
!idev || unlikely(idev->cnf.disable_ipv6)) {
@@ -297,7 +297,7 @@ int ip6_mc_input(struct sk_buff *skb)
const struct ipv6hdr *hdr;
bool deliver;
 
-   IP6_UPD_PO_STATS_BH(dev_net(skb_dst(skb)->dev),
+   __IP6_UPD_PO_STATS(dev_net(skb_dst(skb)->dev),
 ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INMCAST,
 skb->len);
 
-- 
2.8.0.rc3.226.g39d4020



[PATCH net-next 09/17] net: rename IP_INC_STATS_BH()

2016-04-27 Thread Eric Dumazet
Rename IP_INC_STATS_BH() to __IP_INC_STATS(), to
better express this is used in non preemptible context.

Signed-off-by: Eric Dumazet 
---
 include/net/ip.h|  2 +-
 net/bridge/br_netfilter_hooks.c |  6 +++---
 net/dccp/ipv4.c |  2 +-
 net/ipv4/inet_connection_sock.c |  4 ++--
 net/ipv4/ip_forward.c   |  4 ++--
 net/ipv4/ip_fragment.c  | 14 +++---
 net/ipv4/ip_input.c | 20 ++--
 net/ipv4/route.c|  6 +++---
 8 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/include/net/ip.h b/include/net/ip.h
index ae0e85d018e8..0be0af3017ba 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -187,7 +187,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff 
*skb,
   unsigned int len);
 
 #define IP_INC_STATS(net, field)   
SNMP_INC_STATS64((net)->mib.ip_statistics, field)
-#define IP_INC_STATS_BH(net, field)
SNMP_INC_STATS64_BH((net)->mib.ip_statistics, field)
+#define __IP_INC_STATS(net, field) 
SNMP_INC_STATS64_BH((net)->mib.ip_statistics, field)
 #define IP_ADD_STATS(net, field, val)  
SNMP_ADD_STATS64((net)->mib.ip_statistics, field, val)
 #define IP_ADD_STATS_BH(net, field, val) 
SNMP_ADD_STATS64_BH((net)->mib.ip_statistics, field, val)
 #define IP_UPD_PO_STATS(net, field, val) 
SNMP_UPD_PO_STATS64((net)->mib.ip_statistics, field, val)
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 44114a94c576..2d25979273a6 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -217,13 +217,13 @@ static int br_validate_ipv4(struct net *net, struct 
sk_buff *skb)
 
len = ntohs(iph->tot_len);
if (skb->len < len) {
-   IP_INC_STATS_BH(net, IPSTATS_MIB_INTRUNCATEDPKTS);
+   __IP_INC_STATS(net, IPSTATS_MIB_INTRUNCATEDPKTS);
goto drop;
} else if (len < (iph->ihl*4))
goto inhdr_error;
 
if (pskb_trim_rcsum(skb, len)) {
-   IP_INC_STATS_BH(net, IPSTATS_MIB_INDISCARDS);
+   __IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS);
goto drop;
}
 
@@ -236,7 +236,7 @@ static int br_validate_ipv4(struct net *net, struct sk_buff 
*skb)
return 0;
 
 inhdr_error:
-   IP_INC_STATS_BH(net, IPSTATS_MIB_INHDRERRORS);
+   __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS);
 drop:
return -1;
 }
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 14e30584e59d..a9c75e79ba99 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -462,7 +462,7 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, 
struct sock *sk,
security_skb_classify_flow(skb, flowi4_to_flowi());
rt = ip_route_output_flow(net, , sk);
if (IS_ERR(rt)) {
-   IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
+   __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
return NULL;
}
 
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index ab69da2d2a77..7ce112aa3a7b 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -427,7 +427,7 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk,
 route_err:
ip_rt_put(rt);
 no_route:
-   IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
+   __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
return NULL;
 }
 EXPORT_SYMBOL_GPL(inet_csk_route_req);
@@ -466,7 +466,7 @@ route_err:
ip_rt_put(rt);
 no_route:
rcu_read_unlock();
-   IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
+   __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
return NULL;
 }
 EXPORT_SYMBOL_GPL(inet_csk_route_child_sock);
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index af18f1e4889e..42fbd59b0ba8 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -65,7 +65,7 @@ static int ip_forward_finish(struct net *net, struct sock 
*sk, struct sk_buff *s
 {
struct ip_options *opt  = &(IPCB(skb)->opt);
 
-   IP_INC_STATS_BH(net, IPSTATS_MIB_OUTFORWDATAGRAMS);
+   __IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS);
IP_ADD_STATS_BH(net, IPSTATS_MIB_OUTOCTETS, skb->len);
 
if (unlikely(opt->optlen))
@@ -157,7 +157,7 @@ sr_failed:
 
 too_many_hops:
/* Tell the sender its packet died... */
-   IP_INC_STATS_BH(net, IPSTATS_MIB_INHDRERRORS);
+   __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS);
icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0);
 drop:
kfree_skb(skb);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index efbd47d1a531..bbe7f72db9c1 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -204,14 +204,14 @@ static void ip_expire(unsigned long arg)
goto out;
 
ipq_kill(qp);
-   IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);
+   __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
 
if 

[PATCH net-next 12/17] net: rename IP_UPD_PO_STATS_BH()

2016-04-27 Thread Eric Dumazet
Rename IP_UPD_PO_STATS_BH() to __IP_UPD_PO_STATS()

Signed-off-by: Eric Dumazet 
---
 include/net/ip.h| 2 +-
 net/ipv4/ip_input.c | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/net/ip.h b/include/net/ip.h
index 0df4809bc68a..55f5de50a564 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -191,7 +191,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff 
*skb,
 #define IP_ADD_STATS(net, field, val)  
SNMP_ADD_STATS64((net)->mib.ip_statistics, field, val)
 #define __IP_ADD_STATS(net, field, val) 
SNMP_ADD_STATS64_BH((net)->mib.ip_statistics, field, val)
 #define IP_UPD_PO_STATS(net, field, val) 
SNMP_UPD_PO_STATS64((net)->mib.ip_statistics, field, val)
-#define IP_UPD_PO_STATS_BH(net, field, val) 
SNMP_UPD_PO_STATS64_BH((net)->mib.ip_statistics, field, val)
+#define __IP_UPD_PO_STATS(net, field, val) 
SNMP_UPD_PO_STATS64_BH((net)->mib.ip_statistics, field, val)
 #define NET_INC_STATS(net, field)  
SNMP_INC_STATS((net)->mib.net_statistics, field)
 #define NET_INC_STATS_BH(net, field)   
SNMP_INC_STATS_BH((net)->mib.net_statistics, field)
 #define NET_ADD_STATS(net, field, adnd)
SNMP_ADD_STATS((net)->mib.net_statistics, field, adnd)
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 11f34e421270..8fda63d78435 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -358,9 +358,9 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, 
struct sk_buff *skb)
 
rt = skb_rtable(skb);
if (rt->rt_type == RTN_MULTICAST) {
-   IP_UPD_PO_STATS_BH(net, IPSTATS_MIB_INMCAST, skb->len);
+   __IP_UPD_PO_STATS(net, IPSTATS_MIB_INMCAST, skb->len);
} else if (rt->rt_type == RTN_BROADCAST) {
-   IP_UPD_PO_STATS_BH(net, IPSTATS_MIB_INBCAST, skb->len);
+   __IP_UPD_PO_STATS(net, IPSTATS_MIB_INBCAST, skb->len);
} else if (skb->pkt_type == PACKET_BROADCAST ||
   skb->pkt_type == PACKET_MULTICAST) {
struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
@@ -409,7 +409,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, 
struct packet_type *pt,
 
 
net = dev_net(dev);
-   IP_UPD_PO_STATS_BH(net, IPSTATS_MIB_IN, skb->len);
+   __IP_UPD_PO_STATS(net, IPSTATS_MIB_IN, skb->len);
 
skb = skb_share_check(skb, GFP_ATOMIC);
if (!skb) {
-- 
2.8.0.rc3.226.g39d4020



[PATCH net-next 08/17] net: sctp: rename SCTP_INC_STATS_BH()

2016-04-27 Thread Eric Dumazet
Rename SCTP_INC_STATS_BH() to __SCTP_INC_STATS()

Signed-off-by: Eric Dumazet 
---
 include/net/sctp/sctp.h |  2 +-
 net/sctp/input.c| 12 ++--
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 5a2c4c3307a7..5607c009f738 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -206,7 +206,7 @@ extern int sysctl_sctp_wmem[3];
 
 /* SCTP SNMP MIB stats handlers */
 #define SCTP_INC_STATS(net, field)  
SNMP_INC_STATS((net)->sctp.sctp_statistics, field)
-#define SCTP_INC_STATS_BH(net, field)   
SNMP_INC_STATS_BH((net)->sctp.sctp_statistics, field)
+#define __SCTP_INC_STATS(net, field)   
SNMP_INC_STATS_BH((net)->sctp.sctp_statistics, field)
 #define SCTP_DEC_STATS(net, field)  
SNMP_DEC_STATS((net)->sctp.sctp_statistics, field)
 
 /* sctp mib definitions */
diff --git a/net/sctp/input.c b/net/sctp/input.c
index f8eca792dbcf..12332fc3eb44 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -84,7 +84,7 @@ static inline int sctp_rcv_checksum(struct net *net, struct 
sk_buff *skb)
 
if (val != cmp) {
/* CRC failure, dump it. */
-   SCTP_INC_STATS_BH(net, SCTP_MIB_CHECKSUMERRORS);
+   __SCTP_INC_STATS(net, SCTP_MIB_CHECKSUMERRORS);
return -1;
}
return 0;
@@ -122,7 +122,7 @@ int sctp_rcv(struct sk_buff *skb)
if (skb->pkt_type != PACKET_HOST)
goto discard_it;
 
-   SCTP_INC_STATS_BH(net, SCTP_MIB_INSCTPPACKS);
+   __SCTP_INC_STATS(net, SCTP_MIB_INSCTPPACKS);
 
if (skb_linearize(skb))
goto discard_it;
@@ -208,7 +208,7 @@ int sctp_rcv(struct sk_buff *skb)
 */
if (!asoc) {
if (sctp_rcv_ootb(skb)) {
-   SCTP_INC_STATS_BH(net, SCTP_MIB_OUTOFBLUES);
+   __SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES);
goto discard_release;
}
}
@@ -264,9 +264,9 @@ int sctp_rcv(struct sk_buff *skb)
skb = NULL; /* sctp_chunk_free already freed the skb */
goto discard_release;
}
-   SCTP_INC_STATS_BH(net, SCTP_MIB_IN_PKT_BACKLOG);
+   __SCTP_INC_STATS(net, SCTP_MIB_IN_PKT_BACKLOG);
} else {
-   SCTP_INC_STATS_BH(net, SCTP_MIB_IN_PKT_SOFTIRQ);
+   __SCTP_INC_STATS(net, SCTP_MIB_IN_PKT_SOFTIRQ);
sctp_inq_push(>rcvr->inqueue, chunk);
}
 
@@ -281,7 +281,7 @@ int sctp_rcv(struct sk_buff *skb)
return 0;
 
 discard_it:
-   SCTP_INC_STATS_BH(net, SCTP_MIB_IN_PKT_DISCARDS);
+   __SCTP_INC_STATS(net, SCTP_MIB_IN_PKT_DISCARDS);
kfree_skb(skb);
return 0;
 
-- 
2.8.0.rc3.226.g39d4020



[PATCH net-next 01/17] net: snmp: kill various STATS_USER() helpers

2016-04-27 Thread Eric Dumazet
In the old days (before linux-3.0), SNMP counters were duplicated,
one for user context, and one for BH context.

After commit 8f0ea0fe3a03 ("snmp: reduce percpu needs by 50%")
we have a single copy, and what really matters is preemption being
enabled or disabled, since we use this_cpu_inc() or __this_cpu_inc()
respectively.

We therefore kill SNMP_INC_STATS_USER(), SNMP_ADD_STATS_USER(),
NET_INC_STATS_USER(), NET_ADD_STATS_USER(), SCTP_INC_STATS_USER(),
SNMP_INC_STATS64_USER(), SNMP_ADD_STATS64_USER(), TCP_ADD_STATS_USER(),
UDP_INC_STATS_USER(), UDP6_INC_STATS_USER(), and XFRM_INC_STATS_USER()

Following patches will rename __BH helpers to make clear their
usage is not tied to BH being disabled.

Signed-off-by: Eric Dumazet 
---
 include/net/ip.h|  2 --
 include/net/sctp/sctp.h |  1 -
 include/net/snmp.h  | 22 +-
 include/net/tcp.h   |  9 -
 include/net/udp.h   | 14 +++---
 include/net/xfrm.h  |  2 --
 net/ipv4/tcp.c  | 12 ++--
 net/ipv4/udp.c  | 24 
 net/ipv6/udp.c  | 49 -
 net/sctp/chunk.c|  2 +-
 10 files changed, 59 insertions(+), 78 deletions(-)

diff --git a/include/net/ip.h b/include/net/ip.h
index 93725e546758..ae0e85d018e8 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -194,10 +194,8 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff 
*skb,
 #define IP_UPD_PO_STATS_BH(net, field, val) 
SNMP_UPD_PO_STATS64_BH((net)->mib.ip_statistics, field, val)
 #define NET_INC_STATS(net, field)  
SNMP_INC_STATS((net)->mib.net_statistics, field)
 #define NET_INC_STATS_BH(net, field)   
SNMP_INC_STATS_BH((net)->mib.net_statistics, field)
-#define NET_INC_STATS_USER(net, field) 
SNMP_INC_STATS_USER((net)->mib.net_statistics, field)
 #define NET_ADD_STATS(net, field, adnd)
SNMP_ADD_STATS((net)->mib.net_statistics, field, adnd)
 #define NET_ADD_STATS_BH(net, field, adnd) 
SNMP_ADD_STATS_BH((net)->mib.net_statistics, field, adnd)
-#define NET_ADD_STATS_USER(net, field, adnd) 
SNMP_ADD_STATS_USER((net)->mib.net_statistics, field, adnd)
 
 u64 snmp_get_cpu_field(void __percpu *mib, int cpu, int offct);
 unsigned long snmp_fold_field(void __percpu *mib, int offt);
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 3f1c0ff7d4b6..5a2c4c3307a7 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -207,7 +207,6 @@ extern int sysctl_sctp_wmem[3];
 /* SCTP SNMP MIB stats handlers */
 #define SCTP_INC_STATS(net, field)  
SNMP_INC_STATS((net)->sctp.sctp_statistics, field)
 #define SCTP_INC_STATS_BH(net, field)   
SNMP_INC_STATS_BH((net)->sctp.sctp_statistics, field)
-#define SCTP_INC_STATS_USER(net, field) 
SNMP_INC_STATS_USER((net)->sctp.sctp_statistics, field)
 #define SCTP_DEC_STATS(net, field)  
SNMP_DEC_STATS((net)->sctp.sctp_statistics, field)
 
 /* sctp mib definitions */
diff --git a/include/net/snmp.h b/include/net/snmp.h
index 35512ac6dcfb..56239fc05c51 100644
--- a/include/net/snmp.h
+++ b/include/net/snmp.h
@@ -126,9 +126,6 @@ struct linux_xfrm_mib {
 #define SNMP_INC_STATS_BH(mib, field)  \
__this_cpu_inc(mib->mibs[field])
 
-#define SNMP_INC_STATS_USER(mib, field)\
-   this_cpu_inc(mib->mibs[field])
-
 #define SNMP_INC_STATS_ATOMIC_LONG(mib, field) \
atomic_long_inc(>mibs[field])
 
@@ -141,9 +138,6 @@ struct linux_xfrm_mib {
 #define SNMP_ADD_STATS_BH(mib, field, addend)  \
__this_cpu_add(mib->mibs[field], addend)
 
-#define SNMP_ADD_STATS_USER(mib, field, addend)\
-   this_cpu_add(mib->mibs[field], addend)
-
 #define SNMP_ADD_STATS(mib, field, addend) \
this_cpu_add(mib->mibs[field], addend)
 #define SNMP_UPD_PO_STATS(mib, basefield, addend)  \
@@ -170,18 +164,14 @@ struct linux_xfrm_mib {
u64_stats_update_end(>syncp);  \
} while (0)
 
-#define SNMP_ADD_STATS64_USER(mib, field, addend)  \
+#define SNMP_ADD_STATS64(mib, field, addend)   \
do {\
-   local_bh_disable(); \
+   preempt_disable();  \
SNMP_ADD_STATS64_BH(mib, field, addend);\
-   local_bh_enable();  \
+   preempt_enable();   \
} while (0)
 
-#define SNMP_ADD_STATS64(mib, field, addend)   \
-   SNMP_ADD_STATS64_USER(mib, field, addend)
-
 #define SNMP_INC_STATS64_BH(mib, field) SNMP_ADD_STATS64_BH(mib, field, 1)
-#define SNMP_INC_STATS64_USER(mib, field) SNMP_ADD_STATS64_USER(mib, field, 1)
 #define 

[PATCH net-next 04/17] net: udp: rename UDP_INC_STATS_BH()

2016-04-27 Thread Eric Dumazet
Rename UDP_INC_STATS_BH() to __UDP_INC_STATS(),
and UDP6_INC_STATS_BH() to __UDP6_INC_STATS()

Signed-off-by: Eric Dumazet 
---
 include/net/udp.h | 12 ++--
 net/ipv4/udp.c| 46 +++---
 net/ipv6/udp.c| 38 +++---
 net/rxrpc/ar-input.c  |  4 ++--
 net/sunrpc/xprtsock.c |  4 ++--
 5 files changed, 52 insertions(+), 52 deletions(-)

diff --git a/include/net/udp.h b/include/net/udp.h
index 2f37f689d85a..bf6a7c29cf6a 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -292,11 +292,11 @@ struct sock *udp6_lib_lookup_skb(struct sk_buff *skb,
 #define UDP_INC_STATS(net, field, is_udplite)do { \
if (is_udplite) SNMP_INC_STATS((net)->mib.udplite_statistics, field);   
\
elseSNMP_INC_STATS((net)->mib.udp_statistics, field);  }  
while(0)
-#define UDP_INC_STATS_BH(net, field, is_udplite) do { \
+#define __UDP_INC_STATS(net, field, is_udplite)  do { \
if (is_udplite) SNMP_INC_STATS_BH((net)->mib.udplite_statistics, 
field); \
elseSNMP_INC_STATS_BH((net)->mib.udp_statistics, field);
}  while(0)
 
-#define UDP6_INC_STATS_BH(net, field, is_udplite)  do { \
+#define __UDP6_INC_STATS(net, field, is_udplite)   do { \
if (is_udplite) SNMP_INC_STATS_BH((net)->mib.udplite_stats_in6, field);\
elseSNMP_INC_STATS_BH((net)->mib.udp_stats_in6, field);  \
 } while(0)
@@ -306,15 +306,15 @@ struct sock *udp6_lib_lookup_skb(struct sk_buff *skb,
 } while(0)
 
 #if IS_ENABLED(CONFIG_IPV6)
-#define UDPX_INC_STATS_BH(sk, field)   \
+#define __UDPX_INC_STATS(sk, field)\
 do {   \
if ((sk)->sk_family == AF_INET) \
-   UDP_INC_STATS_BH(sock_net(sk), field, 0);   \
+   __UDP_INC_STATS(sock_net(sk), field, 0);\
else\
-   UDP6_INC_STATS_BH(sock_net(sk), field, 0);  \
+   __UDP6_INC_STATS(sock_net(sk), field, 0);   \
 } while (0)
 #else
-#define UDPX_INC_STATS_BH(sk, field) UDP_INC_STATS_BH(sock_net(sk), field, 0)
+#define __UDPX_INC_STATS(sk, field) __UDP_INC_STATS(sock_net(sk), field, 0)
 #endif
 
 /* /proc */
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 6b004b838966..093284c5c03b 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1242,10 +1242,10 @@ static unsigned int first_packet_length(struct sock *sk)
spin_lock_bh(>lock);
while ((skb = skb_peek(rcvq)) != NULL &&
udp_lib_checksum_complete(skb)) {
-   UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_CSUMERRORS,
-IS_UDPLITE(sk));
-   UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS,
-IS_UDPLITE(sk));
+   __UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS,
+   IS_UDPLITE(sk));
+   __UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS,
+   IS_UDPLITE(sk));
atomic_inc(>sk_drops);
__skb_unlink(skb, rcvq);
__skb_queue_tail(_kill, skb);
@@ -1514,9 +1514,9 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct 
sk_buff *skb)
 
/* Note that an ENOMEM error is charged twice */
if (rc == -ENOMEM)
-   UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS,
-is_udplite);
-   UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
+   __UDP_INC_STATS(sock_net(sk), UDP_MIB_RCVBUFERRORS,
+   is_udplite);
+   __UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
kfree_skb(skb);
trace_udp_fail_queue_rcv_skb(rc, sk);
return -1;
@@ -1580,9 +1580,9 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff 
*skb)
 
ret = encap_rcv(sk, skb);
if (ret <= 0) {
-   UDP_INC_STATS_BH(sock_net(sk),
-UDP_MIB_INDATAGRAMS,
-is_udplite);
+   __UDP_INC_STATS(sock_net(sk),
+   UDP_MIB_INDATAGRAMS,
+   is_udplite);
return -ret;
}
}
@@ -1633,8 +1633,8 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff 
*skb)
 
udp_csum_pull_header(skb);
if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) {
-  

[PATCH net-next 05/17] net: xfrm: kill XFRM_INC_STATS_BH()

2016-04-27 Thread Eric Dumazet
Not used anymore.

Signed-off-by: Eric Dumazet 
---
 include/net/xfrm.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index dab9e1b82963..adfebd6f243c 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -45,10 +45,8 @@
 
 #ifdef CONFIG_XFRM_STATISTICS
 #define XFRM_INC_STATS(net, field) 
SNMP_INC_STATS((net)->mib.xfrm_statistics, field)
-#define XFRM_INC_STATS_BH(net, field)  
SNMP_INC_STATS_BH((net)->mib.xfrm_statistics, field)
 #else
 #define XFRM_INC_STATS(net, field) ((void)(net))
-#define XFRM_INC_STATS_BH(net, field)  ((void)(net))
 #endif
 
 
-- 
2.8.0.rc3.226.g39d4020



[PATCH net-next 03/17] net: rename ICMP_INC_STATS_BH()

2016-04-27 Thread Eric Dumazet
Rename ICMP_INC_STATS_BH() to __ICMP_INC_STATS()

Signed-off-by: Eric Dumazet 
---
 include/net/icmp.h  |  2 +-
 net/dccp/ipv4.c |  4 ++--
 net/ipv4/icmp.c | 16 
 net/ipv4/tcp_ipv4.c |  2 +-
 net/ipv4/udp.c  |  2 +-
 net/sctp/input.c|  2 +-
 6 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/include/net/icmp.h b/include/net/icmp.h
index 970028e13382..5a60ce819078 100644
--- a/include/net/icmp.h
+++ b/include/net/icmp.h
@@ -30,7 +30,7 @@ struct icmp_err {
 
 extern const struct icmp_err icmp_err_convert[];
 #define ICMP_INC_STATS(net, field) 
SNMP_INC_STATS((net)->mib.icmp_statistics, field)
-#define ICMP_INC_STATS_BH(net, field)  
SNMP_INC_STATS_BH((net)->mib.icmp_statistics, field)
+#define __ICMP_INC_STATS(net, field)   
SNMP_INC_STATS_BH((net)->mib.icmp_statistics, field)
 #define ICMPMSGOUT_INC_STATS(net, field)   
SNMP_INC_STATS_ATOMIC_LONG((net)->mib.icmpmsg_statistics, field+256)
 #define ICMPMSGIN_INC_STATS_BH(net, field) 
SNMP_INC_STATS_ATOMIC_LONG((net)->mib.icmpmsg_statistics, field)
 
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 4b78067669d6..14e30584e59d 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -247,7 +247,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
 
if (skb->len < offset + sizeof(*dh) ||
skb->len < offset + __dccp_basic_hdr_len(dh)) {
-   ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
+   __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
return;
}
 
@@ -256,7 +256,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
   iph->saddr, ntohs(dh->dccph_sport),
   inet_iif(skb));
if (!sk) {
-   ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
+   __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
return;
}
 
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 6333489771ed..995fef9c5099 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -363,7 +363,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param,
   icmp_param->data_len+icmp_param->head_len,
   icmp_param->head_len,
   ipc, rt, MSG_DONTWAIT) < 0) {
-   ICMP_INC_STATS_BH(sock_net(sk), ICMP_MIB_OUTERRORS);
+   __ICMP_INC_STATS(sock_net(sk), ICMP_MIB_OUTERRORS);
ip_flush_pending_frames(sk);
} else if ((skb = skb_peek(>sk_write_queue)) != NULL) {
struct icmphdr *icmph = icmp_hdr(skb);
@@ -744,7 +744,7 @@ static void icmp_socket_deliver(struct sk_buff *skb, u32 
info)
 * avoid additional coding at protocol handlers.
 */
if (!pskb_may_pull(skb, iph->ihl * 4 + 8)) {
-   ICMP_INC_STATS_BH(dev_net(skb->dev), ICMP_MIB_INERRORS);
+   __ICMP_INC_STATS(dev_net(skb->dev), ICMP_MIB_INERRORS);
return;
}
 
@@ -865,7 +865,7 @@ static bool icmp_unreach(struct sk_buff *skb)
 out:
return true;
 out_err:
-   ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
+   __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
return false;
 }
 
@@ -877,7 +877,7 @@ out_err:
 static bool icmp_redirect(struct sk_buff *skb)
 {
if (skb->len < sizeof(struct iphdr)) {
-   ICMP_INC_STATS_BH(dev_net(skb->dev), ICMP_MIB_INERRORS);
+   __ICMP_INC_STATS(dev_net(skb->dev), ICMP_MIB_INERRORS);
return false;
}
 
@@ -956,7 +956,7 @@ static bool icmp_timestamp(struct sk_buff *skb)
return true;
 
 out_err:
-   ICMP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ICMP_MIB_INERRORS);
+   __ICMP_INC_STATS(dev_net(skb_dst(skb)->dev), ICMP_MIB_INERRORS);
return false;
 }
 
@@ -996,7 +996,7 @@ int icmp_rcv(struct sk_buff *skb)
skb_set_network_header(skb, nh);
}
 
-   ICMP_INC_STATS_BH(net, ICMP_MIB_INMSGS);
+   __ICMP_INC_STATS(net, ICMP_MIB_INMSGS);
 
if (skb_checksum_simple_validate(skb))
goto csum_error;
@@ -1052,9 +1052,9 @@ drop:
kfree_skb(skb);
return 0;
 csum_error:
-   ICMP_INC_STATS_BH(net, ICMP_MIB_CSUMERRORS);
+   __ICMP_INC_STATS(net, ICMP_MIB_CSUMERRORS);
 error:
-   ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
+   __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
goto drop;
 }
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index d2a5763e5abc..ebd8f3b9e61b 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -372,7 +372,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
   th->dest, iph->saddr, ntohs(th->source),
   inet_iif(icmp_skb));
if (!sk) {
-   ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
+   __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
return;
}
if (sk->sk_state 

[PATCH net-next 00/17] net: snmp: update SNMP methods

2016-04-27 Thread Eric Dumazet
In the old days (before linux-3.0), SNMP counters were duplicated,
one set for user context, and anther one for BH context.

After commit 8f0ea0fe3a03 ("snmp: reduce percpu needs by 50%")
we have a single copy, and what really matters is preemption being
enabled or disabled, since we use this_cpu_inc() or __this_cpu_inc()
respectively.

This patch series kills the obsolete STATS_USER() helpers,
and rename all XXX_BH() helpers to __XXX() ones, to more
closely match conventions used to update per cpu variables.

This is probably going to hurt maintainers job for a while,
since cherry-picks will not be clean, but this had to be
cleaned at one point. I am so sorry guys.

Eric Dumazet (17):
  net: snmp: kill various STATS_USER() helpers
  dccp: rename DCCP_INC_STATS_BH()
  net: rename ICMP_INC_STATS_BH()
  net: udp: rename UDP_INC_STATS_BH()
  net: xfrm: kill XFRM_INC_STATS_BH()
  net: tcp: rename TCP_INC_STATS_BH
  net: icmp: rename ICMPMSGIN_INC_STATS_BH()
  net: sctp: rename SCTP_INC_STATS_BH()
  net: rename IP_INC_STATS_BH()
  net: rename ICMP6_INC_STATS_BH()
  net: rename IP_ADD_STATS_BH()
  net: rename IP_UPD_PO_STATS_BH()
  net: rename NET_{ADD|INC}_STATS_BH()
  ipv6: rename IP6_INC_STATS_BH()
  ipv6: rename IP6_UPD_PO_STATS_BH()
  ipv6: kill ICMP6MSGIN_INC_STATS_BH()
  net: snmp: kill STATS_BH macros

 include/net/icmp.h  |   4 +-
 include/net/ip.h|  12 ++---
 include/net/ipv6.h  |  48 +-
 include/net/sctp/sctp.h |   7 ++-
 include/net/snmp.h  |  44 ++--
 include/net/tcp.h   |  15 +++---
 include/net/udp.h   |  34 ++---
 include/net/xfrm.h  |   4 --
 net/bridge/br_netfilter_hooks.c |   6 +--
 net/bridge/br_netfilter_ipv6.c  |  10 ++--
 net/core/dev.c  |   4 +-
 net/dccp/dccp.h |   6 +--
 net/dccp/input.c|   2 +-
 net/dccp/ipv4.c |  24 -
 net/dccp/ipv6.c |  24 -
 net/dccp/minisocks.c|   2 +-
 net/dccp/options.c  |   2 +-
 net/dccp/timer.c|   8 +--
 net/ipv4/arp.c  |   2 +-
 net/ipv4/icmp.c |  18 +++
 net/ipv4/inet_connection_sock.c |   4 +-
 net/ipv4/inet_hashtables.c  |   2 +-
 net/ipv4/inet_timewait_sock.c   |   4 +-
 net/ipv4/ip_forward.c   |   6 +--
 net/ipv4/ip_fragment.c  |  14 +++---
 net/ipv4/ip_input.c |  34 ++---
 net/ipv4/route.c|   6 +--
 net/ipv4/syncookies.c   |   4 +-
 net/ipv4/tcp.c  |  18 +++
 net/ipv4/tcp_cdg.c  |  20 
 net/ipv4/tcp_cubic.c|  20 
 net/ipv4/tcp_fastopen.c |  14 +++---
 net/ipv4/tcp_input.c| 108 +---
 net/ipv4/tcp_ipv4.c |  40 +++
 net/ipv4/tcp_minisocks.c|  14 +++---
 net/ipv4/tcp_output.c   |  18 +++
 net/ipv4/tcp_recovery.c |   4 +-
 net/ipv4/tcp_timer.c|  22 
 net/ipv4/udp.c  |  72 +--
 net/ipv6/exthdrs.c  |  66 
 net/ipv6/icmp.c |  12 ++---
 net/ipv6/inet6_hashtables.c |   2 +-
 net/ipv6/ip6_input.c|  32 ++--
 net/ipv6/ip6_output.c   |  34 ++---
 net/ipv6/ip6mr.c|   8 +--
 net/ipv6/reassembly.c   |  32 ++--
 net/ipv6/syncookies.c   |   4 +-
 net/ipv6/tcp_ipv6.c |  34 ++---
 net/ipv6/udp.c  |  91 +
 net/rxrpc/ar-input.c|   4 +-
 net/sctp/chunk.c|   2 +-
 net/sctp/input.c|  16 +++---
 net/sctp/ipv6.c |   2 +-
 net/sunrpc/xprtsock.c   |   4 +-
 54 files changed, 512 insertions(+), 531 deletions(-)

-- 
2.8.0.rc3.226.g39d4020



Re: [PATCH net-next v2 0/7] net: unify dst caching for tunnel devices

2016-04-27 Thread Eric Dumazet
On Tue, 2016-02-16 at 20:22 -0500, David Miller wrote:
> From: Paolo Abeni 
> Date: Fri, 12 Feb 2016 15:43:52 +0100
> 
> > This patch series try to unify the dst cache implementations currently
> > present in the kernel, namely in ip_tunnel.c and ip6_tunnel.c, introducing a
> > new generic implementation, replacing the existing ones, and then using
> > the new implementation in other tunnel devices which currently lack it.
> > 
> > The new dst implementation is compiled, as built-in, only if any device 
> > using
> > it is enabled.
> > 
> > Caching the dst for the tunnel remote address gives small, but measurable,
> > performance improvement when tunneling over ipv4 (in the 2%-4% range) and
> > significant ones when tunneling over ipv6 (roughly 60% when no
> > fragmentation/segmentation take place and the tunnel local address
> > is not specified).
> > 
> > v2:
> > - move the vxlan dst_cache usage inside the device lookup functions
> > - fix usage after free for lwt tunnel moving the dst cache storage inside
> >   the dst_metadata,
> > - sparse codying style cleanup
> 
> Series applied, thanks for doing this work as it is a major improvement.

Paolo, please check following warning :

This might be caused by e09acddf873bf775b208b452a4c3a3fd26fa9427
("ip_tunnel: replace dst_cache with generic implementation")


[   73.982267] BUG: using smp_processor_id() in preemptible [] code: 
ip/10604
[   73.990978] caller is debug_smp_processor_id+0x17/0x20
[   73.990981] CPU: 26 PID: 10604 Comm: ip Not tainted 4.6.0-dbx-DEV #1075
[   73.990982] Hardware name: ...
[   73.990983]   881fc11d3b98 8140a51f 
001a
[   73.990987]  81a585c5 881fc11d3bc8 8142700f 
60bfa000e0c0
[   73.990989]  881fcb6b0f00 9807f60a 881fcb6b0f00 
881fc11d3bd8
[   73.990992] Call Trace:
[   73.990996]  [] dump_stack+0x67/0x98
[   73.990998]  [] check_preemption_disabled+0xef/0x100
[   73.991000]  [] debug_smp_processor_id+0x17/0x20
[   73.991003]  [] dst_cache_set_ip4+0x2c/0x70
[   73.991006]  [] ip_tunnel_bind_dev+0x101/0x170
[   73.991008]  [] ip_tunnel_ioctl+0x330/0x430
[   73.991010]  [] ? ip_tunnel_ioctl+0x5/0x430
[   73.991012]  [] ipgre_tunnel_ioctl+0xdb/0x160
[   73.991015]  [] ? rtnl_lock+0x17/0x20
[   73.991017]  [] dev_ifsioc+0x325/0x370
[   73.991018]  [] dev_ioctl+0xd2/0x630
[   73.991022]  [] sock_ioctl+0xd3/0x270
[   73.991025]  [] do_vfs_ioctl+0x93/0x6f0
[   73.991026]  [] ? sock_alloc_file+0x91/0x120
[   73.991029]  [] ? __fget_light+0x6c/0x90
[   73.991031]  [] SyS_ioctl+0x8b/0xa0
[   73.991042]  [] entry_SYSCALL_64_fastpath+0x18/0xa8

Thanks




Re: [RFC 12/20] net: dsa: rename dst->ds to dst->switches

2016-04-27 Thread Andrew Lunn
On Wed, Apr 27, 2016 at 06:30:09PM -0400, Vivien Didelot wrote:
> dsa_switch stores the net_device pointers in a "ports" member. Be
> consistent and store the dsa_switch pointer in a "switches" member of
> the dsa_switch_tree structure.
> 
> This free us the "ds" member for a future dsa_switch list.

NACK.

Or you need to change ds absolutely everywhere, in all drivers and
APIs. We cannot have ds meaning two different things.

   Andrew

> 
> Signed-off-by: Vivien Didelot 
> ---
>  include/net/dsa.h | 2 +-
>  net/dsa/dsa.c | 8 
>  net/dsa/tag_brcm.c| 2 +-
>  net/dsa/tag_dsa.c | 2 +-
>  net/dsa/tag_edsa.c| 2 +-
>  net/dsa/tag_trailer.c | 2 +-
>  6 files changed, 9 insertions(+), 9 deletions(-)
> 
> diff --git a/include/net/dsa.h b/include/net/dsa.h
> index 5f2e7df..389227d 100644
> --- a/include/net/dsa.h
> +++ b/include/net/dsa.h
> @@ -124,7 +124,7 @@ struct dsa_switch_tree {
>   /*
>* Data for the individual switch chips.
>*/
> - struct dsa_switch   *ds[DSA_MAX_SWITCHES];
> + struct dsa_switch   *switches[DSA_MAX_SWITCHES];
>  };
>  
>  struct dsa_port {
> diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
> index 3daffb6..aa4a61a 100644
> --- a/net/dsa/dsa.c
> +++ b/net/dsa/dsa.c
> @@ -857,7 +857,7 @@ static int dsa_setup_dst(struct dsa_switch_tree *dst, 
> struct net_device *dev,
>   continue;
>   }
>  
> - dst->ds[i] = ds;
> + dst->switches[i] = ds;
>  
>   ++configured;
>   }
> @@ -953,7 +953,7 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst)
>   wmb();
>  
>   for (i = 0; i < dst->pd->nr_chips; i++) {
> - struct dsa_switch *ds = dst->ds[i];
> + struct dsa_switch *ds = dst->switches[i];
>  
>   if (ds)
>   dsa_switch_destroy(ds);
> @@ -1006,7 +1006,7 @@ static int dsa_suspend(struct device *d)
>   int i, ret = 0;
>  
>   for (i = 0; i < dst->pd->nr_chips; i++) {
> - struct dsa_switch *ds = dst->ds[i];
> + struct dsa_switch *ds = dst->switches[i];
>  
>   if (ds != NULL)
>   ret = dsa_switch_suspend(ds);
> @@ -1022,7 +1022,7 @@ static int dsa_resume(struct device *d)
>   int i, ret = 0;
>  
>   for (i = 0; i < dst->pd->nr_chips; i++) {
> - struct dsa_switch *ds = dst->ds[i];
> + struct dsa_switch *ds = dst->switches[i];
>  
>   if (ds != NULL)
>   ret = dsa_switch_resume(ds);
> diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
> index 3d5aabc..35fc75b 100644
> --- a/net/dsa/tag_brcm.c
> +++ b/net/dsa/tag_brcm.c
> @@ -102,7 +102,7 @@ static int brcm_tag_rcv(struct sk_buff *skb, struct 
> net_device *dev,
>   if (unlikely(dst == NULL))
>   goto out_drop;
>  
> - ds = dst->ds[0];
> + ds = dst->switches[0];
>  
>   skb = skb_unshare(skb, GFP_ATOMIC);
>   if (skb == NULL)
> diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
> index c870cfa..bf3eebf8 100644
> --- a/net/dsa/tag_dsa.c
> +++ b/net/dsa/tag_dsa.c
> @@ -109,7 +109,7 @@ static int dsa_rcv(struct sk_buff *skb, struct net_device 
> *dev,
>*/
>   if (source_device >= dst->pd->nr_chips)
>   goto out_drop;
> - ds = dst->ds[source_device];
> + ds = dst->switches[source_device];
>   if (source_port >= DSA_MAX_PORTS || ds->ports[source_port] == NULL)
>   goto out_drop;
>  
> diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c
> index 898f949d..4ddbb85 100644
> --- a/net/dsa/tag_edsa.c
> +++ b/net/dsa/tag_edsa.c
> @@ -122,7 +122,7 @@ static int edsa_rcv(struct sk_buff *skb, struct 
> net_device *dev,
>*/
>   if (source_device >= dst->pd->nr_chips)
>   goto out_drop;
> - ds = dst->ds[source_device];
> + ds = dst->switches[source_device];
>   if (source_port >= DSA_MAX_PORTS || ds->ports[source_port] == NULL)
>   goto out_drop;
>  
> diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c
> index eaa3440..ade0bbf 100644
> --- a/net/dsa/tag_trailer.c
> +++ b/net/dsa/tag_trailer.c
> @@ -67,7 +67,7 @@ static int trailer_rcv(struct sk_buff *skb, struct 
> net_device *dev,
>  
>   if (unlikely(dst == NULL))
>   goto out_drop;
> - ds = dst->ds[0];
> + ds = dst->switches[0];
>  
>   skb = skb_unshare(skb, GFP_ATOMIC);
>   if (skb == NULL)
> -- 
> 2.8.0
> 


Re: [RFC 07/20] net: dsa: list ports in switch\\

2016-04-27 Thread Andrew Lunn
On Wed, Apr 27, 2016 at 06:30:04PM -0400, Vivien Didelot wrote:
> List DSA port structures in their switch structure, so that drivers can
> iterate on them to retrieve information such as their ports membership.

And this would be so much easier using a plan array.

Andrew

> 
> Signed-off-by: Vivien Didelot 
> ---
>  include/net/dsa.h | 9 +
>  net/dsa/dsa.c | 4 
>  2 files changed, 13 insertions(+)
> 
> diff --git a/include/net/dsa.h b/include/net/dsa.h
> index 69e467c..5f2e7df 100644
> --- a/include/net/dsa.h
> +++ b/include/net/dsa.h
> @@ -32,6 +32,11 @@ enum dsa_tag_protocol {
>  #define DSA_MAX_SWITCHES 4
>  #define DSA_MAX_PORTS12
>  
> +#define dsa_switch_for_each_port(_ds, _dp, _num_ports)   
> \
> + for (_dp = list_first_entry(&_ds->dp, typeof(*_dp), list);  \
> +  &_dp->list != (&_ds->dp) && _dp->port < _num_ports;\
> +  _dp = list_next_entry(_dp, list))
> +
>  struct dsa_chip_data {
>   /*
>* How to access the switch configuration registers.
> @@ -123,6 +128,8 @@ struct dsa_switch_tree {
>  };
>  
>  struct dsa_port {
> + struct list_headlist;
> +
>   struct dsa_switch   *ds;
>   int port;
>  
> @@ -173,6 +180,8 @@ struct dsa_switch {
>   u32 phys_mii_mask;
>   struct mii_bus  *slave_mii_bus;
>   struct net_device   *ports[DSA_MAX_PORTS];
> +
> + struct list_headdp;
>  };
>  
>  static inline bool dsa_is_cpu_port(struct dsa_switch *ds, int p)
> diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
> index 222494c..3daffb6 100644
> --- a/net/dsa/dsa.c
> +++ b/net/dsa/dsa.c
> @@ -225,6 +225,8 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, 
> struct device *parent)
>   int index = ds->index;
>   int i, ret;
>  
> + INIT_LIST_HEAD(>dp);
> +
>   /*
>* Validate supplied switch configuration.
>*/
> @@ -238,6 +240,8 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, 
> struct device *parent)
>   dp[i]->ds = ds;
>   dp[i]->port = i;
>  
> + list_add_tail([i]->list, >dp);
> +
>   name = pd->port_names[i];
>   if (name == NULL)
>   continue;
> -- 
> 2.8.0
> 


Re: [RFC 03/20] net: dsa: pass dsa_port down to drivers bridge ops

2016-04-27 Thread Andrew Lunn
On Wed, Apr 27, 2016 at 06:30:00PM -0400, Vivien Didelot wrote:
> Now that DSA as proper structure for DSA ports, pass it down to the
> port_bridge_join and port_bridge_leave driver functions.

I should look at the later patches, but this looks like a step
backwards.

If your ports array is a member of ds, you have no need for this patch
at all.

What advantage does this change bring?

  Andrew

> 
> Signed-off-by: Vivien Didelot 
> ---
>  drivers/net/dsa/bcm_sf2.c   | 28 ++--
>  drivers/net/dsa/mv88e6xxx.c | 10 +-
>  drivers/net/dsa/mv88e6xxx.h |  4 ++--
>  include/net/dsa.h   |  4 ++--
>  net/dsa/slave.c |  4 ++--
>  5 files changed, 25 insertions(+), 25 deletions(-)
> 
> diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
> index f394ea9..2d7b297 100644
> --- a/drivers/net/dsa/bcm_sf2.c
> +++ b/drivers/net/dsa/bcm_sf2.c
> @@ -491,15 +491,15 @@ static int bcm_sf2_sw_fast_age_port(struct dsa_switch  
> *ds, int port)
>   return 0;
>  }
>  
> -static int bcm_sf2_sw_br_join(struct dsa_switch *ds, int port,
> +static int bcm_sf2_sw_br_join(struct dsa_switch *ds, struct dsa_port *dp,
> struct net_device *bridge)
>  {
>   struct bcm_sf2_priv *priv = ds_to_priv(ds);
>   unsigned int i;
>   u32 reg, p_ctl;
>  
> - priv->port_sts[port].bridge_dev = bridge;
> - p_ctl = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(port));
> + priv->port_sts[dp->port].bridge_dev = bridge;
> + p_ctl = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(dp->port));
>  
>   for (i = 0; i < priv->hw_params.num_ports; i++) {
>   if (priv->port_sts[i].bridge_dev != bridge)
> @@ -509,7 +509,7 @@ static int bcm_sf2_sw_br_join(struct dsa_switch *ds, int 
> port,
>* membership and update the remote port bitmask
>*/
>   reg = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(i));
> - reg |= 1 << port;
> + reg |= 1 << dp->port;
>   core_writel(priv, reg, CORE_PORT_VLAN_CTL_PORT(i));
>   priv->port_sts[i].vlan_ctl_mask = reg;
>  
> @@ -519,20 +519,20 @@ static int bcm_sf2_sw_br_join(struct dsa_switch *ds, 
> int port,
>   /* Configure the local port VLAN control membership to include
>* remote ports and update the local port bitmask
>*/
> - core_writel(priv, p_ctl, CORE_PORT_VLAN_CTL_PORT(port));
> - priv->port_sts[port].vlan_ctl_mask = p_ctl;
> + core_writel(priv, p_ctl, CORE_PORT_VLAN_CTL_PORT(dp->port));
> + priv->port_sts[dp->port].vlan_ctl_mask = p_ctl;
>  
>   return 0;
>  }
>  
> -static void bcm_sf2_sw_br_leave(struct dsa_switch *ds, int port,
> +static void bcm_sf2_sw_br_leave(struct dsa_switch *ds, struct dsa_port *dp,
>   struct net_device *bridge)
>  {
>   struct bcm_sf2_priv *priv = ds_to_priv(ds);
>   unsigned int i;
>   u32 reg, p_ctl;
>  
> - p_ctl = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(port));
> + p_ctl = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(dp->port));
>  
>   for (i = 0; i < priv->hw_params.num_ports; i++) {
>   /* Don't touch the remaining ports */
> @@ -540,18 +540,18 @@ static void bcm_sf2_sw_br_leave(struct dsa_switch *ds, 
> int port,
>   continue;
>  
>   reg = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(i));
> - reg &= ~(1 << port);
> + reg &= ~(1 << dp->port);
>   core_writel(priv, reg, CORE_PORT_VLAN_CTL_PORT(i));
> - priv->port_sts[port].vlan_ctl_mask = reg;
> + priv->port_sts[dp->port].vlan_ctl_mask = reg;
>  
>   /* Prevent self removal to preserve isolation */
> - if (port != i)
> + if (dp->port != i)
>   p_ctl &= ~(1 << i);
>   }
>  
> - core_writel(priv, p_ctl, CORE_PORT_VLAN_CTL_PORT(port));
> - priv->port_sts[port].vlan_ctl_mask = p_ctl;
> - priv->port_sts[port].bridge_dev = NULL;
> + core_writel(priv, p_ctl, CORE_PORT_VLAN_CTL_PORT(dp->port));
> + priv->port_sts[dp->port].vlan_ctl_mask = p_ctl;
> + priv->port_sts[dp->port].bridge_dev = NULL;
>  }
>  
>  static void bcm_sf2_sw_br_set_stp_state(struct dsa_switch *ds, int port,
> diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c
> index 86f8f2f..3f78c73 100644
> --- a/drivers/net/dsa/mv88e6xxx.c
> +++ b/drivers/net/dsa/mv88e6xxx.c
> @@ -2203,7 +2203,7 @@ unlock:
>   return err;
>  }
>  
> -int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, int port,
> +int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, struct dsa_port *dp,
>  struct net_device *bridge)
>  {
>   struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
> @@ -2212,7 +2212,7 @@ int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, 
> int port,
>   mutex_lock(>smi_mutex);
>  
>   /* Assign the bridge and remap 

Re: [RFC 01/20] net: dsa: introduce a dsa_port structure

2016-04-27 Thread Andrew Lunn
> @@ -230,6 +231,13 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, 
> struct device *parent)
>   for (i = 0; i < DSA_MAX_PORTS; i++) {
>   char *name;
>  
> + dp[i] = devm_kzalloc(parent, sizeof(*dp), GFP_KERNEL);
> + if (dp[i] == NULL)
> + return -ENOMEM;

You are not saving anything here by dynamically allocating the memory,
since you do it for all ports. So just make it a member of ds with
size DSA_MAX_PORTS. I would then call this array structure ports.

Humm, i also think keeping it in dsa_slave_priv is wrong, if you have
defined the structure in the global include/net/dsa.h. dsa_switch is a
better place for it.

 Andrew


[RFC 00/20] net: dsa: dsa_port structure and tree-wide ops

2016-04-27 Thread Vivien Didelot
In a previous RFC [1], I introduced the need to implement cross-chip operations
in the DSA layer.

Here's a summary. In a multiple switches setup such as the following, every
switch of the tree must be aware of its configuration in order to configure a
correct data path between chips.

  sw0 sw1 sw2   
  
[ 0 1 2 3 4 5 ] [ 0 1 2 3 4 5 ] [ 0 1 2 3 4 5 ] 
  
  |   ' ^ ^ ^ ^ '   
  
  v   ' | | | | '   
  
 CPU  ' `-DSA-' `-DSA-' '   
  
  ' '   
  
  + - - - - - - - br0 - - - - - - - +  

For instance, bridging sw0p2 and sw2p3 together in a VLAN 42 requires both
chips to allow frames from the external port to egress its internal port, all
DSA ports between them must learn their address, and sw1 must also be aware of
the VLAN 42 in order to allow tagged packets to cross the chip.

To implement all that nicely, we need a way to progagate such notification to
every switch of a DSA tree.

The patchset introduces a dsa_port structure to bundle port-centric info such
as its switch index, port number, bridge device, and change the DSA driver
functions to take such structure as parameter instead of a internal port
number.

The DSA layer then introduces tree-wide operations, which calls every switch
driver when a port operation occurs. This is the responsibility of a switch
driver to check if the related port is internal or external to its chip, and
behave in consequence.

See the patchset as different logical groups (that may be split later):

  * patches 1 to 5: introduce the dsa_port structure to DSA drivers

  * patches 6 to 11: put the bridge device in the dsa_port structure and allow
the DSA drivers to get rid of their private bridge_dev pointer

  * patches 12 to 16: introduce tree-wide operations. Driver are now aware of
cross-chip port operations

  * patches 17 to 20: implement cross-chip hardware bridging in mv88e6xxx

A branch is available here [2] and a debugfs patch is maintained here [3] in
order to inspect the Marvell switch's internal structures, such as the PVT.

Many things remains to do after this, such as using dsa_port_is_{cpu,dsa}
helpers, getting rid of dst->switches and ds->ports in favor of their related
switch and port lists, and introduce dynamic number of switches and ports.

[1] https://lkml.org/lkml/2016/4/20/733
[2] https://github.com/vivien/linux/tree/dsa/dev
[3] 
https://github.com/vivien/linux/commit/da33b1a698fef3a66515a05e2b9f31d0279a89d4.patch

Cheers,

Vivien Didelot (20):
  net: dsa: introduce a dsa_port structure
  net: dsa: be consistent with NETDEV_CHANGEUPPER
  net: dsa: pass dsa_port down to drivers bridge ops
  net: dsa: pass dsa_port down to drivers FDB ops
  net: dsa: pass dsa_port down to drivers VLAN ops
  net: dsa: move bridge device in dsa_port
  net: dsa: list ports in switch
  net: dsa: bcm_sf2: use bridge device from dsa_port
  net: dsa: mv88e6xxx: check HW vlan with dsa_port
  net: dsa: mv88e6xxx: setup a dsa_port
  net: dsa: mv88e6xxx: use bridge from dsa_port
  net: dsa: rename dst->ds to dst->switches
  net: dsa: list switches in tree
  net: dsa: add tree-wide bridge ops
  net: dsa: add tree-wide FDB ops
  net: dsa: add tree-wide VLAN ops
  net: dsa: mv88e6xxx: factorize port bridge change
  net: dsa: mv88e6xxx: add flags to info
  net: dsa: mv88e6xxx: conditionally init PVT
  net: dsa: mv88e6xxx: setup PVT on cross-chip ops

 drivers/net/dsa/bcm_sf2.c   |  92 +-
 drivers/net/dsa/bcm_sf2.h   |   2 -
 drivers/net/dsa/mv88e6352.c |   1 +
 drivers/net/dsa/mv88e6xxx.c | 397 
 drivers/net/dsa/mv88e6xxx.h |  41 +++--
 include/net/dsa.h   |  57 +--
 net/dsa/Makefile|   2 +-
 net/dsa/dsa.c   |  25 ++-
 net/dsa/dsa_priv.h  |  37 +++--
 net/dsa/slave.c | 283 +--
 net/dsa/tag_brcm.c  |   6 +-
 net/dsa/tag_dsa.c   |  10 +-
 net/dsa/tag_edsa.c  |  10 +-
 net/dsa/tag_trailer.c   |   4 +-
 net/dsa/tree.c  | 187 +
 15 files changed, 751 insertions(+), 403 deletions(-)
 create mode 100644 net/dsa/tree.c

-- 
2.8.0



[RFC 05/20] net: dsa: pass dsa_port down to drivers VLAN ops

2016-04-27 Thread Vivien Didelot
Now that DSA as proper structure for DSA ports, pass it down to the
port_vlan_{filtering,prepare,add,del,dump} driver functions.

Signed-off-by: Vivien Didelot 
---
 drivers/net/dsa/mv88e6xxx.c | 41 +
 drivers/net/dsa/mv88e6xxx.h | 10 +-
 include/net/dsa.h   | 11 ++-
 net/dsa/slave.c | 10 +-
 4 files changed, 37 insertions(+), 35 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c
index c1ff763..7e03f4c 100644
--- a/drivers/net/dsa/mv88e6xxx.c
+++ b/drivers/net/dsa/mv88e6xxx.c
@@ -1367,7 +1367,7 @@ static int _mv88e6xxx_vtu_getnext(struct dsa_switch *ds,
return 0;
 }
 
-int mv88e6xxx_port_vlan_dump(struct dsa_switch *ds, int port,
+int mv88e6xxx_port_vlan_dump(struct dsa_switch *ds, struct dsa_port *dp,
 struct switchdev_obj_port_vlan *vlan,
 int (*cb)(struct switchdev_obj *obj))
 {
@@ -1378,7 +1378,7 @@ int mv88e6xxx_port_vlan_dump(struct dsa_switch *ds, int 
port,
 
mutex_lock(>smi_mutex);
 
-   err = _mv88e6xxx_port_pvid_get(ds, port, );
+   err = _mv88e6xxx_port_pvid_get(ds, dp->port, );
if (err)
goto unlock;
 
@@ -1394,14 +1394,15 @@ int mv88e6xxx_port_vlan_dump(struct dsa_switch *ds, int 
port,
if (!next.valid)
break;
 
-   if (next.data[port] == GLOBAL_VTU_DATA_MEMBER_TAG_NON_MEMBER)
+   if (next.data[dp->port] ==
+   GLOBAL_VTU_DATA_MEMBER_TAG_NON_MEMBER)
continue;
 
/* reinit and dump this VLAN obj */
vlan->vid_begin = vlan->vid_end = next.vid;
vlan->flags = 0;
 
-   if (next.data[port] == GLOBAL_VTU_DATA_MEMBER_TAG_UNTAGGED)
+   if (next.data[dp->port] == GLOBAL_VTU_DATA_MEMBER_TAG_UNTAGGED)
vlan->flags |= BRIDGE_VLAN_INFO_UNTAGGED;
 
if (next.vid == pvid)
@@ -1789,7 +1790,7 @@ static const char * const 
mv88e6xxx_port_8021q_mode_names[] = {
[PORT_CONTROL_2_8021Q_SECURE] = "Secure",
 };
 
-int mv88e6xxx_port_vlan_filtering(struct dsa_switch *ds, int port,
+int mv88e6xxx_port_vlan_filtering(struct dsa_switch *ds, struct dsa_port *dp,
  bool vlan_filtering)
 {
struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
@@ -1799,7 +1800,7 @@ int mv88e6xxx_port_vlan_filtering(struct dsa_switch *ds, 
int port,
 
mutex_lock(>smi_mutex);
 
-   ret = _mv88e6xxx_reg_read(ds, REG_PORT(port), PORT_CONTROL_2);
+   ret = _mv88e6xxx_reg_read(ds, REG_PORT(dp->port), PORT_CONTROL_2);
if (ret < 0)
goto unlock;
 
@@ -1809,12 +1810,12 @@ int mv88e6xxx_port_vlan_filtering(struct dsa_switch 
*ds, int port,
ret &= ~PORT_CONTROL_2_8021Q_MASK;
ret |= new & PORT_CONTROL_2_8021Q_MASK;
 
-   ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), PORT_CONTROL_2,
-  ret);
+   ret = _mv88e6xxx_reg_write(ds, REG_PORT(dp->port),
+  PORT_CONTROL_2, ret);
if (ret < 0)
goto unlock;
 
-   netdev_dbg(ds->ports[port], "802.1Q Mode %s (was %s)\n",
+   netdev_dbg(ds->ports[dp->port], "802.1Q Mode %s (was %s)\n",
   mv88e6xxx_port_8021q_mode_names[new],
   mv88e6xxx_port_8021q_mode_names[old]);
}
@@ -1826,7 +1827,7 @@ unlock:
return ret;
 }
 
-int mv88e6xxx_port_vlan_prepare(struct dsa_switch *ds, int port,
+int mv88e6xxx_port_vlan_prepare(struct dsa_switch *ds, struct dsa_port *dp,
const struct switchdev_obj_port_vlan *vlan,
struct switchdev_trans *trans)
 {
@@ -1835,7 +1836,7 @@ int mv88e6xxx_port_vlan_prepare(struct dsa_switch *ds, 
int port,
/* If the requested port doesn't belong to the same bridge as the VLAN
 * members, do not support it (yet) and fallback to software VLAN.
 */
-   err = mv88e6xxx_port_check_hw_vlan(ds, port, vlan->vid_begin,
+   err = mv88e6xxx_port_check_hw_vlan(ds, dp->port, vlan->vid_begin,
   vlan->vid_end);
if (err)
return err;
@@ -1863,7 +1864,7 @@ static int _mv88e6xxx_port_vlan_add(struct dsa_switch 
*ds, int port, u16 vid,
return _mv88e6xxx_vtu_loadpurge(ds, );
 }
 
-void mv88e6xxx_port_vlan_add(struct dsa_switch *ds, int port,
+void mv88e6xxx_port_vlan_add(struct dsa_switch *ds, struct dsa_port *dp,
 const struct switchdev_obj_port_vlan *vlan,
 struct switchdev_trans *trans)
 {
@@ -1875,12 +1876,12 @@ void mv88e6xxx_port_vlan_add(struct dsa_switch *ds, int 
port,

[RFC 01/20] net: dsa: introduce a dsa_port structure

2016-04-27 Thread Vivien Didelot
Introduce a new dsa_port structure, used to store port-centric
information, such as a pointer to its DSA switch and its port number.
It will later contains further data, such as its bridge device.

This is a first step towards implementing cross-chip port operations.

Signed-off-by: Vivien Didelot 
---
 include/net/dsa.h |   5 ++
 net/dsa/dsa.c |  10 +++-
 net/dsa/dsa_priv.h|  13 ++---
 net/dsa/slave.c   | 147 +-
 net/dsa/tag_brcm.c|   4 +-
 net/dsa/tag_dsa.c |   8 +--
 net/dsa/tag_edsa.c|   8 +--
 net/dsa/tag_trailer.c |   2 +-
 8 files changed, 104 insertions(+), 93 deletions(-)

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 2d280ab..255c108 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -122,6 +122,11 @@ struct dsa_switch_tree {
struct dsa_switch   *ds[DSA_MAX_SWITCHES];
 };
 
+struct dsa_port {
+   struct dsa_switch   *ds;
+   int port;
+};
+
 struct dsa_switch {
/*
 * Parent switch tree, and switch index.
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index d61ceed..222494c 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -219,6 +219,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, 
struct device *parent)
 {
struct dsa_switch_driver *drv = ds->drv;
struct dsa_switch_tree *dst = ds->dst;
+   struct dsa_port *dp[DSA_MAX_PORTS];
struct dsa_chip_data *pd = ds->pd;
bool valid_name_found = false;
int index = ds->index;
@@ -230,6 +231,13 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, 
struct device *parent)
for (i = 0; i < DSA_MAX_PORTS; i++) {
char *name;
 
+   dp[i] = devm_kzalloc(parent, sizeof(*dp), GFP_KERNEL);
+   if (dp[i] == NULL)
+   return -ENOMEM;
+
+   dp[i]->ds = ds;
+   dp[i]->port = i;
+
name = pd->port_names[i];
if (name == NULL)
continue;
@@ -328,7 +336,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, 
struct device *parent)
if (!(ds->enabled_port_mask & (1 << i)))
continue;
 
-   ret = dsa_slave_create(ds, parent, i, pd->port_names[i]);
+   ret = dsa_slave_create(dp[i], parent, pd->port_names[i]);
if (ret < 0) {
netdev_err(dst->master_netdev, "[%d]: can't create dsa 
slave device for port %d(%s): %d\n",
   index, i, pd->port_names[i], ret);
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index dfa3377..c7d5df0 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -26,13 +26,6 @@ struct dsa_slave_priv {
struct net_device *dev);
 
/*
-* Which switch this port is a part of, and the port index
-* for this port.
-*/
-   struct dsa_switch   *parent;
-   u8  port;
-
-   /*
 * The phylib phy_device pointer for the PHY connected
 * to this port.
 */
@@ -46,6 +39,9 @@ struct dsa_slave_priv {
 #ifdef CONFIG_NET_POLL_CONTROLLER
struct netpoll  *netpoll;
 #endif
+
+   /* DSA specific data */
+   struct dsa_port *dp;
 };
 
 /* dsa.c */
@@ -54,8 +50,7 @@ extern char dsa_driver_version[];
 /* slave.c */
 extern const struct dsa_device_ops notag_netdev_ops;
 void dsa_slave_mii_bus_init(struct dsa_switch *ds);
-int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
-int port, char *name);
+int dsa_slave_create(struct dsa_port *dp, struct device *parent, char *name);
 void dsa_slave_destroy(struct net_device *slave_dev);
 int dsa_slave_suspend(struct net_device *slave_dev);
 int dsa_slave_resume(struct net_device *slave_dev);
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 3b6750f..6115444 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -61,7 +61,7 @@ static int dsa_slave_get_iflink(const struct net_device *dev)
 {
struct dsa_slave_priv *p = netdev_priv(dev);
 
-   return p->parent->dst->master_netdev->ifindex;
+   return p->dp->ds->dst->master_netdev->ifindex;
 }
 
 static inline bool dsa_port_is_bridged(struct dsa_slave_priv *p)
@@ -72,8 +72,8 @@ static inline bool dsa_port_is_bridged(struct dsa_slave_priv 
*p)
 static int dsa_slave_open(struct net_device *dev)
 {
struct dsa_slave_priv *p = netdev_priv(dev);
-   struct net_device *master = p->parent->dst->master_netdev;
-   struct dsa_switch *ds = p->parent;
+   struct dsa_switch *ds = p->dp->ds;
+   struct net_device *master = ds->dst->master_netdev;
u8 stp_state = dsa_port_is_bridged(p) ?
BR_STATE_BLOCKING : BR_STATE_FORWARDING;
int err;
@@ -99,13 +99,13 @@ static int dsa_slave_open(struct net_device *dev)
}

[RFC 03/20] net: dsa: pass dsa_port down to drivers bridge ops

2016-04-27 Thread Vivien Didelot
Now that DSA as proper structure for DSA ports, pass it down to the
port_bridge_join and port_bridge_leave driver functions.

Signed-off-by: Vivien Didelot 
---
 drivers/net/dsa/bcm_sf2.c   | 28 ++--
 drivers/net/dsa/mv88e6xxx.c | 10 +-
 drivers/net/dsa/mv88e6xxx.h |  4 ++--
 include/net/dsa.h   |  4 ++--
 net/dsa/slave.c |  4 ++--
 5 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index f394ea9..2d7b297 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -491,15 +491,15 @@ static int bcm_sf2_sw_fast_age_port(struct dsa_switch  
*ds, int port)
return 0;
 }
 
-static int bcm_sf2_sw_br_join(struct dsa_switch *ds, int port,
+static int bcm_sf2_sw_br_join(struct dsa_switch *ds, struct dsa_port *dp,
  struct net_device *bridge)
 {
struct bcm_sf2_priv *priv = ds_to_priv(ds);
unsigned int i;
u32 reg, p_ctl;
 
-   priv->port_sts[port].bridge_dev = bridge;
-   p_ctl = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(port));
+   priv->port_sts[dp->port].bridge_dev = bridge;
+   p_ctl = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(dp->port));
 
for (i = 0; i < priv->hw_params.num_ports; i++) {
if (priv->port_sts[i].bridge_dev != bridge)
@@ -509,7 +509,7 @@ static int bcm_sf2_sw_br_join(struct dsa_switch *ds, int 
port,
 * membership and update the remote port bitmask
 */
reg = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(i));
-   reg |= 1 << port;
+   reg |= 1 << dp->port;
core_writel(priv, reg, CORE_PORT_VLAN_CTL_PORT(i));
priv->port_sts[i].vlan_ctl_mask = reg;
 
@@ -519,20 +519,20 @@ static int bcm_sf2_sw_br_join(struct dsa_switch *ds, int 
port,
/* Configure the local port VLAN control membership to include
 * remote ports and update the local port bitmask
 */
-   core_writel(priv, p_ctl, CORE_PORT_VLAN_CTL_PORT(port));
-   priv->port_sts[port].vlan_ctl_mask = p_ctl;
+   core_writel(priv, p_ctl, CORE_PORT_VLAN_CTL_PORT(dp->port));
+   priv->port_sts[dp->port].vlan_ctl_mask = p_ctl;
 
return 0;
 }
 
-static void bcm_sf2_sw_br_leave(struct dsa_switch *ds, int port,
+static void bcm_sf2_sw_br_leave(struct dsa_switch *ds, struct dsa_port *dp,
struct net_device *bridge)
 {
struct bcm_sf2_priv *priv = ds_to_priv(ds);
unsigned int i;
u32 reg, p_ctl;
 
-   p_ctl = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(port));
+   p_ctl = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(dp->port));
 
for (i = 0; i < priv->hw_params.num_ports; i++) {
/* Don't touch the remaining ports */
@@ -540,18 +540,18 @@ static void bcm_sf2_sw_br_leave(struct dsa_switch *ds, 
int port,
continue;
 
reg = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(i));
-   reg &= ~(1 << port);
+   reg &= ~(1 << dp->port);
core_writel(priv, reg, CORE_PORT_VLAN_CTL_PORT(i));
-   priv->port_sts[port].vlan_ctl_mask = reg;
+   priv->port_sts[dp->port].vlan_ctl_mask = reg;
 
/* Prevent self removal to preserve isolation */
-   if (port != i)
+   if (dp->port != i)
p_ctl &= ~(1 << i);
}
 
-   core_writel(priv, p_ctl, CORE_PORT_VLAN_CTL_PORT(port));
-   priv->port_sts[port].vlan_ctl_mask = p_ctl;
-   priv->port_sts[port].bridge_dev = NULL;
+   core_writel(priv, p_ctl, CORE_PORT_VLAN_CTL_PORT(dp->port));
+   priv->port_sts[dp->port].vlan_ctl_mask = p_ctl;
+   priv->port_sts[dp->port].bridge_dev = NULL;
 }
 
 static void bcm_sf2_sw_br_set_stp_state(struct dsa_switch *ds, int port,
diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c
index 86f8f2f..3f78c73 100644
--- a/drivers/net/dsa/mv88e6xxx.c
+++ b/drivers/net/dsa/mv88e6xxx.c
@@ -2203,7 +2203,7 @@ unlock:
return err;
 }
 
-int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, int port,
+int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, struct dsa_port *dp,
   struct net_device *bridge)
 {
struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
@@ -2212,7 +2212,7 @@ int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, int 
port,
mutex_lock(>smi_mutex);
 
/* Assign the bridge and remap each port's VLANTable */
-   ps->ports[port].bridge_dev = bridge;
+   ps->ports[dp->port].bridge_dev = bridge;
 
for (i = 0; i < ps->info->num_ports; ++i) {
if (ps->ports[i].bridge_dev == bridge) {
@@ -2227,7 +2227,7 @@ int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, int 
port,
return err;
 }
 
-void mv88e6xxx_port_bridge_leave(struct 

[RFC 02/20] net: dsa: be consistent with NETDEV_CHANGEUPPER

2016-04-27 Thread Vivien Didelot
Once NETDEV_CHANGEUPPER is emitted, the device is already (un)bridged.

If an error is returned on port_bridge_join, the bridge layer will
rollback the operation and unbridge the port.

Respect this by setting bridge_dev to NULL on error.

Also the DSA layer shouldn't assume that the drivers know about the
bridge device a port was previously bridged to. So pass the bridge
device to port_bridge_leave.

Signed-off-by: Vivien Didelot 
---
 drivers/net/dsa/bcm_sf2.c   |  4 ++--
 drivers/net/dsa/mv88e6xxx.c |  4 ++--
 drivers/net/dsa/mv88e6xxx.h |  3 ++-
 include/net/dsa.h   |  3 ++-
 net/dsa/slave.c | 13 +
 5 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index 448deb5..f394ea9 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -525,10 +525,10 @@ static int bcm_sf2_sw_br_join(struct dsa_switch *ds, int 
port,
return 0;
 }
 
-static void bcm_sf2_sw_br_leave(struct dsa_switch *ds, int port)
+static void bcm_sf2_sw_br_leave(struct dsa_switch *ds, int port,
+   struct net_device *bridge)
 {
struct bcm_sf2_priv *priv = ds_to_priv(ds);
-   struct net_device *bridge = priv->port_sts[port].bridge_dev;
unsigned int i;
u32 reg, p_ctl;
 
diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c
index 028f92f..86f8f2f 100644
--- a/drivers/net/dsa/mv88e6xxx.c
+++ b/drivers/net/dsa/mv88e6xxx.c
@@ -2227,10 +2227,10 @@ int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, 
int port,
return err;
 }
 
-void mv88e6xxx_port_bridge_leave(struct dsa_switch *ds, int port)
+void mv88e6xxx_port_bridge_leave(struct dsa_switch *ds, int port,
+struct net_device *bridge)
 {
struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-   struct net_device *bridge = ps->ports[port].bridge_dev;
int i;
 
mutex_lock(>smi_mutex);
diff --git a/drivers/net/dsa/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx.h
index 0dbe2d1..2eb9a82 100644
--- a/drivers/net/dsa/mv88e6xxx.h
+++ b/drivers/net/dsa/mv88e6xxx.h
@@ -492,7 +492,8 @@ int mv88e6xxx_set_eee(struct dsa_switch *ds, int port,
  struct phy_device *phydev, struct ethtool_eee *e);
 int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, int port,
   struct net_device *bridge);
-void mv88e6xxx_port_bridge_leave(struct dsa_switch *ds, int port);
+void mv88e6xxx_port_bridge_leave(struct dsa_switch *ds, int port,
+struct net_device *bridge);
 void mv88e6xxx_port_stp_state_set(struct dsa_switch *ds, int port, u8 state);
 int mv88e6xxx_port_vlan_filtering(struct dsa_switch *ds, int port,
  bool vlan_filtering);
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 255c108..ed33500 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -305,7 +305,8 @@ struct dsa_switch_driver {
 */
int (*port_bridge_join)(struct dsa_switch *ds, int port,
struct net_device *bridge);
-   void(*port_bridge_leave)(struct dsa_switch *ds, int port);
+   void(*port_bridge_leave)(struct dsa_switch *ds, int port,
+struct net_device *bridge);
void(*port_stp_state_set)(struct dsa_switch *ds, int port,
  u8 state);
 
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 6115444..f2ec13d 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -443,19 +443,24 @@ static int dsa_slave_bridge_port_join(struct net_device 
*dev,
if (ds->drv->port_bridge_join)
ret = ds->drv->port_bridge_join(ds, p->dp->port, br);
 
-   return ret == -EOPNOTSUPP ? 0 : ret;
+   if (ret && ret != -EOPNOTSUPP) {
+   p->bridge_dev = NULL;
+   return ret;
+   }
+
+   return 0;
 }
 
 static void dsa_slave_bridge_port_leave(struct net_device *dev)
 {
struct dsa_slave_priv *p = netdev_priv(dev);
struct dsa_switch *ds = p->dp->ds;
+   struct net_device *br = p->bridge_dev;
 
+   p->bridge_dev = NULL;
 
if (ds->drv->port_bridge_leave)
-   ds->drv->port_bridge_leave(ds, p->dp->port);
-
-   p->bridge_dev = NULL;
+   ds->drv->port_bridge_leave(ds, p->dp->port, br);
 
/* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer,
 * so allow it to be in BR_STATE_FORWARDING to be kept functional
-- 
2.8.0



[RFC 12/20] net: dsa: rename dst->ds to dst->switches

2016-04-27 Thread Vivien Didelot
dsa_switch stores the net_device pointers in a "ports" member. Be
consistent and store the dsa_switch pointer in a "switches" member of
the dsa_switch_tree structure.

This free us the "ds" member for a future dsa_switch list.

Signed-off-by: Vivien Didelot 
---
 include/net/dsa.h | 2 +-
 net/dsa/dsa.c | 8 
 net/dsa/tag_brcm.c| 2 +-
 net/dsa/tag_dsa.c | 2 +-
 net/dsa/tag_edsa.c| 2 +-
 net/dsa/tag_trailer.c | 2 +-
 6 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 5f2e7df..389227d 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -124,7 +124,7 @@ struct dsa_switch_tree {
/*
 * Data for the individual switch chips.
 */
-   struct dsa_switch   *ds[DSA_MAX_SWITCHES];
+   struct dsa_switch   *switches[DSA_MAX_SWITCHES];
 };
 
 struct dsa_port {
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 3daffb6..aa4a61a 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -857,7 +857,7 @@ static int dsa_setup_dst(struct dsa_switch_tree *dst, 
struct net_device *dev,
continue;
}
 
-   dst->ds[i] = ds;
+   dst->switches[i] = ds;
 
++configured;
}
@@ -953,7 +953,7 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst)
wmb();
 
for (i = 0; i < dst->pd->nr_chips; i++) {
-   struct dsa_switch *ds = dst->ds[i];
+   struct dsa_switch *ds = dst->switches[i];
 
if (ds)
dsa_switch_destroy(ds);
@@ -1006,7 +1006,7 @@ static int dsa_suspend(struct device *d)
int i, ret = 0;
 
for (i = 0; i < dst->pd->nr_chips; i++) {
-   struct dsa_switch *ds = dst->ds[i];
+   struct dsa_switch *ds = dst->switches[i];
 
if (ds != NULL)
ret = dsa_switch_suspend(ds);
@@ -1022,7 +1022,7 @@ static int dsa_resume(struct device *d)
int i, ret = 0;
 
for (i = 0; i < dst->pd->nr_chips; i++) {
-   struct dsa_switch *ds = dst->ds[i];
+   struct dsa_switch *ds = dst->switches[i];
 
if (ds != NULL)
ret = dsa_switch_resume(ds);
diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
index 3d5aabc..35fc75b 100644
--- a/net/dsa/tag_brcm.c
+++ b/net/dsa/tag_brcm.c
@@ -102,7 +102,7 @@ static int brcm_tag_rcv(struct sk_buff *skb, struct 
net_device *dev,
if (unlikely(dst == NULL))
goto out_drop;
 
-   ds = dst->ds[0];
+   ds = dst->switches[0];
 
skb = skb_unshare(skb, GFP_ATOMIC);
if (skb == NULL)
diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
index c870cfa..bf3eebf8 100644
--- a/net/dsa/tag_dsa.c
+++ b/net/dsa/tag_dsa.c
@@ -109,7 +109,7 @@ static int dsa_rcv(struct sk_buff *skb, struct net_device 
*dev,
 */
if (source_device >= dst->pd->nr_chips)
goto out_drop;
-   ds = dst->ds[source_device];
+   ds = dst->switches[source_device];
if (source_port >= DSA_MAX_PORTS || ds->ports[source_port] == NULL)
goto out_drop;
 
diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c
index 898f949d..4ddbb85 100644
--- a/net/dsa/tag_edsa.c
+++ b/net/dsa/tag_edsa.c
@@ -122,7 +122,7 @@ static int edsa_rcv(struct sk_buff *skb, struct net_device 
*dev,
 */
if (source_device >= dst->pd->nr_chips)
goto out_drop;
-   ds = dst->ds[source_device];
+   ds = dst->switches[source_device];
if (source_port >= DSA_MAX_PORTS || ds->ports[source_port] == NULL)
goto out_drop;
 
diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c
index eaa3440..ade0bbf 100644
--- a/net/dsa/tag_trailer.c
+++ b/net/dsa/tag_trailer.c
@@ -67,7 +67,7 @@ static int trailer_rcv(struct sk_buff *skb, struct net_device 
*dev,
 
if (unlikely(dst == NULL))
goto out_drop;
-   ds = dst->ds[0];
+   ds = dst->switches[0];
 
skb = skb_unshare(skb, GFP_ATOMIC);
if (skb == NULL)
-- 
2.8.0



[RFC 10/20] net: dsa: mv88e6xxx: setup a dsa_port

2016-04-27 Thread Vivien Didelot
Change the mv88e6xxx_setup_port function to take a dsa_port structure as
parameter instead of a port index. This will help us get rid of the
private bridge_dev pointer.

Signed-off-by: Vivien Didelot 
---
 drivers/net/dsa/mv88e6xxx.c | 64 -
 1 file changed, 34 insertions(+), 30 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c
index 00a0b92..0687894 100644
--- a/drivers/net/dsa/mv88e6xxx.c
+++ b/drivers/net/dsa/mv88e6xxx.c
@@ -2317,7 +2317,7 @@ static int mv88e6xxx_power_on_serdes(struct dsa_switch 
*ds)
return ret;
 }
 
-static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port)
+static int mv88e6xxx_setup_port(struct dsa_switch *ds, struct dsa_port *dp)
 {
struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
int ret;
@@ -2335,8 +2335,10 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, 
int port)
 * and all DSA ports to their maximum bandwidth and
 * full duplex.
 */
-   reg = _mv88e6xxx_reg_read(ds, REG_PORT(port), PORT_PCS_CTRL);
-   if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) {
+   reg = _mv88e6xxx_reg_read(ds, REG_PORT(dp->port),
+ PORT_PCS_CTRL);
+   if (dsa_is_cpu_port(ds, dp->port) ||
+   dsa_is_dsa_port(ds, dp->port)) {
reg &= ~PORT_PCS_CTRL_UNFORCED;
reg |= PORT_PCS_CTRL_FORCE_LINK |
PORT_PCS_CTRL_LINK_UP |
@@ -2350,7 +2352,7 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, 
int port)
reg |= PORT_PCS_CTRL_UNFORCED;
}
 
-   ret = _mv88e6xxx_reg_write(ds, REG_PORT(port),
+   ret = _mv88e6xxx_reg_write(ds, REG_PORT(dp->port),
   PORT_PCS_CTRL, reg);
if (ret)
goto abort;
@@ -2378,7 +2380,7 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, 
int port)
reg = PORT_CONTROL_IGMP_MLD_SNOOP |
PORT_CONTROL_USE_TAG | PORT_CONTROL_USE_IP |
PORT_CONTROL_STATE_FORWARDING;
-   if (dsa_is_cpu_port(ds, port)) {
+   if (dsa_is_cpu_port(ds, dp->port)) {
if (mv88e6xxx_6095_family(ds) || mv88e6xxx_6185_family(ds))
reg |= PORT_CONTROL_DSA_TAG;
if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) ||
@@ -2400,7 +2402,7 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, 
int port)
reg |= PORT_CONTROL_EGRESS_ADD_TAG;
}
}
-   if (dsa_is_dsa_port(ds, port)) {
+   if (dsa_is_dsa_port(ds, dp->port)) {
if (mv88e6xxx_6095_family(ds) || mv88e6xxx_6185_family(ds))
reg |= PORT_CONTROL_DSA_TAG;
if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) ||
@@ -2409,13 +2411,13 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, 
int port)
reg |= PORT_CONTROL_FRAME_MODE_DSA;
}
 
-   if (port == dsa_upstream_port(ds))
+   if (dp->port == dsa_upstream_port(ds))
reg |= PORT_CONTROL_FORWARD_UNKNOWN |
PORT_CONTROL_FORWARD_UNKNOWN_MC;
}
if (reg) {
-   ret = _mv88e6xxx_reg_write(ds, REG_PORT(port),
-  PORT_CONTROL, reg);
+   ret = _mv88e6xxx_reg_write(ds, REG_PORT(dp->port), PORT_CONTROL,
+  reg);
if (ret)
goto abort;
}
@@ -2424,7 +2426,7 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, 
int port)
 * powered down.
 */
if (mv88e6xxx_6352_family(ds)) {
-   ret = _mv88e6xxx_reg_read(ds, REG_PORT(port), PORT_STATUS);
+   ret = _mv88e6xxx_reg_read(ds, REG_PORT(dp->port), PORT_STATUS);
if (ret < 0)
goto abort;
ret &= PORT_STATUS_CMODE_MASK;
@@ -2460,14 +2462,14 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, 
int port)
/* enable forwarding of unknown multicast addresses to
 * the upstream port
 */
-   if (port == dsa_upstream_port(ds))
+   if (dp->port == dsa_upstream_port(ds))
reg |= PORT_CONTROL_2_FORWARD_UNKNOWN;
}
 
reg |= PORT_CONTROL_2_8021Q_DISABLED;
 
if (reg) {
-   ret = _mv88e6xxx_reg_write(ds, REG_PORT(port),
+   ret = _mv88e6xxx_reg_write(ds, REG_PORT(dp->port),
   PORT_CONTROL_2, reg);
if (ret)
goto abort;
@@ 

[RFC 07/20] net: dsa: list ports in switch

2016-04-27 Thread Vivien Didelot
List DSA port structures in their switch structure, so that drivers can
iterate on them to retrieve information such as their ports membership.

Signed-off-by: Vivien Didelot 
---
 include/net/dsa.h | 9 +
 net/dsa/dsa.c | 4 
 2 files changed, 13 insertions(+)

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 69e467c..5f2e7df 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -32,6 +32,11 @@ enum dsa_tag_protocol {
 #define DSA_MAX_SWITCHES   4
 #define DSA_MAX_PORTS  12
 
+#define dsa_switch_for_each_port(_ds, _dp, _num_ports) \
+   for (_dp = list_first_entry(&_ds->dp, typeof(*_dp), list);  \
+&_dp->list != (&_ds->dp) && _dp->port < _num_ports;\
+_dp = list_next_entry(_dp, list))
+
 struct dsa_chip_data {
/*
 * How to access the switch configuration registers.
@@ -123,6 +128,8 @@ struct dsa_switch_tree {
 };
 
 struct dsa_port {
+   struct list_headlist;
+
struct dsa_switch   *ds;
int port;
 
@@ -173,6 +180,8 @@ struct dsa_switch {
u32 phys_mii_mask;
struct mii_bus  *slave_mii_bus;
struct net_device   *ports[DSA_MAX_PORTS];
+
+   struct list_headdp;
 };
 
 static inline bool dsa_is_cpu_port(struct dsa_switch *ds, int p)
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 222494c..3daffb6 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -225,6 +225,8 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, 
struct device *parent)
int index = ds->index;
int i, ret;
 
+   INIT_LIST_HEAD(>dp);
+
/*
 * Validate supplied switch configuration.
 */
@@ -238,6 +240,8 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, 
struct device *parent)
dp[i]->ds = ds;
dp[i]->port = i;
 
+   list_add_tail([i]->list, >dp);
+
name = pd->port_names[i];
if (name == NULL)
continue;
-- 
2.8.0



[RFC 11/20] net: dsa: mv88e6xxx: use bridge from dsa_port

2016-04-27 Thread Vivien Didelot
Change the _mv88e6xxx_port_based_vlan_map function for a
_mv88e6xxx_port_map_vlantable which takes a dsa_port structure as
parameter. This allows us to iterate on dsa_port's bridge device pointer
and thus get rid of the private bridge_dev structure.

Signed-off-by: Vivien Didelot 
---
 drivers/net/dsa/mv88e6xxx.c | 48 ++---
 drivers/net/dsa/mv88e6xxx.h |  1 -
 2 files changed, 24 insertions(+), 25 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c
index 0687894..89d0206 100644
--- a/drivers/net/dsa/mv88e6xxx.c
+++ b/drivers/net/dsa/mv88e6xxx.c
@@ -,27 +,29 @@ static int _mv88e6xxx_port_state(struct dsa_switch *ds, 
int port, u8 state)
return ret;
 }
 
-static int _mv88e6xxx_port_based_vlan_map(struct dsa_switch *ds, int port)
+static int _mv88e6xxx_port_map_vlantable(struct dsa_switch *ds,
+struct dsa_port *dp)
 {
struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-   struct net_device *bridge = ps->ports[port].bridge_dev;
const u16 mask = (1 << ps->info->num_ports) - 1;
u16 output_ports = 0;
+   int port = dp->port;
+   struct dsa_port *intp;
int reg;
-   int i;
 
/* allow CPU port or DSA link(s) to send frames to every port */
if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) {
output_ports = mask;
} else {
-   for (i = 0; i < ps->info->num_ports; ++i) {
+   dsa_switch_for_each_port(ds, intp, ps->info->num_ports) {
/* allow sending frames to every group member */
-   if (bridge && ps->ports[i].bridge_dev == bridge)
-   output_ports |= BIT(i);
+   if (intp->br && intp->br == dp->br)
+   output_ports |= BIT(intp->port);
 
/* allow sending frames to CPU port and DSA link(s) */
-   if (dsa_is_cpu_port(ds, i) || dsa_is_dsa_port(ds, i))
-   output_ports |= BIT(i);
+   if (dsa_is_cpu_port(ds, intp->port) ||
+   dsa_is_dsa_port(ds, intp->port))
+   output_ports |= BIT(intp->port);
}
}
 
@@ -2207,16 +2209,15 @@ int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, 
struct dsa_port *dp,
   struct net_device *bridge)
 {
struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-   int i, err;
+   struct dsa_port *intp;
+   int err;
 
mutex_lock(>smi_mutex);
 
-   /* Assign the bridge and remap each port's VLANTable */
-   ps->ports[dp->port].bridge_dev = bridge;
-
-   for (i = 0; i < ps->info->num_ports; ++i) {
-   if (ps->ports[i].bridge_dev == bridge) {
-   err = _mv88e6xxx_port_based_vlan_map(ds, i);
+   /* Remap each port's VLANTable */
+   dsa_switch_for_each_port(ds, intp, ps->info->num_ports) {
+   if (intp->br == bridge) {
+   err = _mv88e6xxx_port_map_vlantable(ds, intp);
if (err)
break;
}
@@ -2231,17 +2232,16 @@ void mv88e6xxx_port_bridge_leave(struct dsa_switch *ds, 
struct dsa_port *dp,
 struct net_device *bridge)
 {
struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-   int i;
+   struct dsa_port *intp;
 
mutex_lock(>smi_mutex);
 
-   /* Unassign the bridge and remap each port's VLANTable */
-   ps->ports[dp->port].bridge_dev = NULL;
-
-   for (i = 0; i < ps->info->num_ports; ++i)
-   if (i == dp->port || ps->ports[i].bridge_dev == bridge)
-   if (_mv88e6xxx_port_based_vlan_map(ds, i))
-   netdev_warn(ds->ports[i], "failed to remap\n");
+   /* Remap each port's VLANTable */
+   dsa_switch_for_each_port(ds, intp, ps->info->num_ports)
+   if (intp == dp || intp->br == bridge)
+   if (_mv88e6xxx_port_map_vlantable(ds, intp))
+   netdev_warn(ds->ports[intp->port],
+   "failed to remap\n");
 
mutex_unlock(>smi_mutex);
 }
@@ -2573,7 +2573,7 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, 
struct dsa_port *dp)
if (ret)
goto abort;
 
-   ret = _mv88e6xxx_port_based_vlan_map(ds, dp->port);
+   ret = _mv88e6xxx_port_map_vlantable(ds, dp);
if (ret)
goto abort;
 
diff --git a/drivers/net/dsa/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx.h
index c49a514..56e3347 100644
--- a/drivers/net/dsa/mv88e6xxx.h
+++ b/drivers/net/dsa/mv88e6xxx.h
@@ -378,7 +378,6 @@ struct mv88e6xxx_vtu_stu_entry {
 };
 
 struct mv88e6xxx_priv_port {
-   struct net_device 

[RFC 08/20] net: dsa: bcm_sf2: use bridge device from dsa_port

2016-04-27 Thread Vivien Didelot
Now that the DSA layer exposes the DSA port structures to drivers, use
that to retrieve the port bridge membership and thus get rid of the
private bridge_dev pointer.

Signed-off-by: Vivien Didelot 
---
 drivers/net/dsa/bcm_sf2.c | 30 ++
 drivers/net/dsa/bcm_sf2.h |  2 --
 2 files changed, 14 insertions(+), 18 deletions(-)

diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index f7b53fa..6e3b844 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -495,25 +495,24 @@ static int bcm_sf2_sw_br_join(struct dsa_switch *ds, 
struct dsa_port *dp,
  struct net_device *bridge)
 {
struct bcm_sf2_priv *priv = ds_to_priv(ds);
-   unsigned int i;
+   struct dsa_port *intp;
u32 reg, p_ctl;
 
-   priv->port_sts[dp->port].bridge_dev = bridge;
p_ctl = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(dp->port));
 
-   for (i = 0; i < priv->hw_params.num_ports; i++) {
-   if (priv->port_sts[i].bridge_dev != bridge)
+   dsa_switch_for_each_port(ds, intp, priv->hw_params.num_ports) {
+   if (intp->br != bridge)
continue;
 
/* Add this local port to the remote port VLAN control
 * membership and update the remote port bitmask
 */
-   reg = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(i));
+   reg = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(intp->port));
reg |= 1 << dp->port;
-   core_writel(priv, reg, CORE_PORT_VLAN_CTL_PORT(i));
-   priv->port_sts[i].vlan_ctl_mask = reg;
+   core_writel(priv, reg, CORE_PORT_VLAN_CTL_PORT(intp->port));
+   priv->port_sts[intp->port].vlan_ctl_mask = reg;
 
-   p_ctl |= 1 << i;
+   p_ctl |= 1 << intp->port;
}
 
/* Configure the local port VLAN control membership to include
@@ -529,29 +528,28 @@ static void bcm_sf2_sw_br_leave(struct dsa_switch *ds, 
struct dsa_port *dp,
struct net_device *bridge)
 {
struct bcm_sf2_priv *priv = ds_to_priv(ds);
-   unsigned int i;
+   struct dsa_port *intp;
u32 reg, p_ctl;
 
p_ctl = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(dp->port));
 
-   for (i = 0; i < priv->hw_params.num_ports; i++) {
+   dsa_switch_for_each_port(ds, intp, priv->hw_params.num_ports) {
/* Don't touch the remaining ports */
-   if (priv->port_sts[i].bridge_dev != bridge)
+   if (intp->br != bridge)
continue;
 
-   reg = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(i));
+   reg = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(intp->port));
reg &= ~(1 << dp->port);
-   core_writel(priv, reg, CORE_PORT_VLAN_CTL_PORT(i));
+   core_writel(priv, reg, CORE_PORT_VLAN_CTL_PORT(intp->port));
priv->port_sts[dp->port].vlan_ctl_mask = reg;
 
/* Prevent self removal to preserve isolation */
-   if (dp->port != i)
-   p_ctl &= ~(1 << i);
+   if (dp != intp)
+   p_ctl &= ~(1 << intp->port);
}
 
core_writel(priv, p_ctl, CORE_PORT_VLAN_CTL_PORT(dp->port));
priv->port_sts[dp->port].vlan_ctl_mask = p_ctl;
-   priv->port_sts[dp->port].bridge_dev = NULL;
 }
 
 static void bcm_sf2_sw_br_set_stp_state(struct dsa_switch *ds, int port,
diff --git a/drivers/net/dsa/bcm_sf2.h b/drivers/net/dsa/bcm_sf2.h
index 200b1f5..6bba1c9 100644
--- a/drivers/net/dsa/bcm_sf2.h
+++ b/drivers/net/dsa/bcm_sf2.h
@@ -50,8 +50,6 @@ struct bcm_sf2_port_status {
struct ethtool_eee eee;
 
u32 vlan_ctl_mask;
-
-   struct net_device *bridge_dev;
 };
 
 struct bcm_sf2_arl_entry {
-- 
2.8.0



[RFC 06/20] net: dsa: move bridge device in dsa_port

2016-04-27 Thread Vivien Didelot
Move the pointer to the bridge device in the DSA port structure instead
of cluttering the dsa_slave_priv structure.

This can later be used by drivers to help them configuring their bridge
group ports membership.

Signed-off-by: Vivien Didelot 
---
 include/net/dsa.h  |  2 ++
 net/dsa/dsa_priv.h |  1 -
 net/dsa/slave.c| 16 +---
 3 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 08a9536..69e467c 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -125,6 +125,8 @@ struct dsa_switch_tree {
 struct dsa_port {
struct dsa_switch   *ds;
int port;
+
+   struct net_device   *br;
 };
 
 struct dsa_switch {
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index c7d5df0..c5afddd 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -35,7 +35,6 @@ struct dsa_slave_priv {
int old_pause;
int old_duplex;
 
-   struct net_device   *bridge_dev;
 #ifdef CONFIG_NET_POLL_CONTROLLER
struct netpoll  *netpoll;
 #endif
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index d6b6019..b90caf8 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -64,18 +64,12 @@ static int dsa_slave_get_iflink(const struct net_device 
*dev)
return p->dp->ds->dst->master_netdev->ifindex;
 }
 
-static inline bool dsa_port_is_bridged(struct dsa_slave_priv *p)
-{
-   return !!p->bridge_dev;
-}
-
 static int dsa_slave_open(struct net_device *dev)
 {
struct dsa_slave_priv *p = netdev_priv(dev);
struct dsa_switch *ds = p->dp->ds;
struct net_device *master = ds->dst->master_netdev;
-   u8 stp_state = dsa_port_is_bridged(p) ?
-   BR_STATE_BLOCKING : BR_STATE_FORWARDING;
+   u8 stp_state = p->dp->br ? BR_STATE_BLOCKING : BR_STATE_FORWARDING;
int err;
 
if (!(master->flags & IFF_UP))
@@ -438,13 +432,13 @@ static int dsa_slave_bridge_port_join(struct net_device 
*dev,
struct dsa_switch *ds = p->dp->ds;
int ret = -EOPNOTSUPP;
 
-   p->bridge_dev = br;
+   p->dp->br = br;
 
if (ds->drv->port_bridge_join)
ret = ds->drv->port_bridge_join(ds, p->dp, br);
 
if (ret && ret != -EOPNOTSUPP) {
-   p->bridge_dev = NULL;
+   p->dp->br = NULL;
return ret;
}
 
@@ -455,9 +449,9 @@ static void dsa_slave_bridge_port_leave(struct net_device 
*dev)
 {
struct dsa_slave_priv *p = netdev_priv(dev);
struct dsa_switch *ds = p->dp->ds;
-   struct net_device *br = p->bridge_dev;
+   struct net_device *br = p->dp->br;
 
-   p->bridge_dev = NULL;
+   p->dp->br = NULL;
 
if (ds->drv->port_bridge_leave)
ds->drv->port_bridge_leave(ds, p->dp, br);
-- 
2.8.0



[RFC 09/20] net: dsa: mv88e6xxx: check HW vlan with dsa_port

2016-04-27 Thread Vivien Didelot
Change the mv88e6xxx_port_check_hw_vlan function for a
mv88e6xxx_port_check_vtu which takes a dsa_port structure as parameter.
This will help us get rid of the bridge_dev pointer.

Signed-off-by: Vivien Didelot 
---
 drivers/net/dsa/mv88e6xxx.c | 25 -
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c
index 7e03f4c..00a0b92 100644
--- a/drivers/net/dsa/mv88e6xxx.c
+++ b/drivers/net/dsa/mv88e6xxx.c
@@ -1729,12 +1729,13 @@ static int _mv88e6xxx_vtu_get(struct dsa_switch *ds, 
u16 vid,
return err;
 }
 
-static int mv88e6xxx_port_check_hw_vlan(struct dsa_switch *ds, int port,
-   u16 vid_begin, u16 vid_end)
+static int mv88e6xxx_port_check_vtu(struct dsa_switch *ds, struct dsa_port *dp,
+   u16 vid_begin, u16 vid_end)
 {
struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
struct mv88e6xxx_vtu_stu_entry vlan;
-   int i, err;
+   struct dsa_port *intp;
+   int err;
 
if (!vid_begin)
return -EOPNOTSUPP;
@@ -1756,22 +1757,21 @@ static int mv88e6xxx_port_check_hw_vlan(struct 
dsa_switch *ds, int port,
if (vlan.vid > vid_end)
break;
 
-   for (i = 0; i < ps->info->num_ports; ++i) {
-   if (dsa_is_dsa_port(ds, i) || dsa_is_cpu_port(ds, i))
+   dsa_switch_for_each_port(ds, intp, ps->info->num_ports) {
+   if (dsa_is_dsa_port(ds, intp->port) ||
+   dsa_is_cpu_port(ds, intp->port))
continue;
 
-   if (vlan.data[i] ==
+   if (vlan.data[intp->port] ==
GLOBAL_VTU_DATA_MEMBER_TAG_NON_MEMBER)
continue;
 
-   if (ps->ports[i].bridge_dev ==
-   ps->ports[port].bridge_dev)
+   if (intp->br == dp->br)
break; /* same bridge, check next VLAN */
 
-   netdev_warn(ds->ports[port],
+   netdev_warn(ds->ports[dp->port],
"hardware VLAN %d already used by %s\n",
-   vlan.vid,
-   netdev_name(ps->ports[i].bridge_dev));
+   vlan.vid, netdev_name(intp->br));
err = -EOPNOTSUPP;
goto unlock;
}
@@ -1836,8 +1836,7 @@ int mv88e6xxx_port_vlan_prepare(struct dsa_switch *ds, 
struct dsa_port *dp,
/* If the requested port doesn't belong to the same bridge as the VLAN
 * members, do not support it (yet) and fallback to software VLAN.
 */
-   err = mv88e6xxx_port_check_hw_vlan(ds, dp->port, vlan->vid_begin,
-  vlan->vid_end);
+   err = mv88e6xxx_port_check_vtu(ds, dp, vlan->vid_begin, vlan->vid_end);
if (err)
return err;
 
-- 
2.8.0



[RFC 20/20] net: dsa: mv88e6xxx: setup PVT on cross-chip ops

2016-04-27 Thread Vivien Didelot
Switches with a Cross-chip Port VLAN Table are currently configured to
allow cross-chip frames to egress any internal ports. This means that
unbridged cross-chip ports can actually talk to each other, and this is
not what we want.

In order to restrict that, we need to setup the PVT entry for an
external port when it joins or leave a bridge group crossing the switch.

Also initialize the PVT to forbid egressing of cross-chip frames to
internal user ports by default.

Note that a PVT-less switch cannot forbid such frames to egress its
internal ports, unless the kernel supports VLAN filtering. In such
systems, a bridge group is also implemented as a 802.1Q VLAN and thus a
global VTU-based logic can be used to correctly implement cross-chip
hardware bridging. Warn the user if the setup doesn't respect this.

Signed-off-by: Vivien Didelot 
---
 drivers/net/dsa/mv88e6xxx.c | 98 +++--
 1 file changed, 95 insertions(+), 3 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c
index 4341ffd..e0f9e93 100644
--- a/drivers/net/dsa/mv88e6xxx.c
+++ b/drivers/net/dsa/mv88e6xxx.c
@@ -2272,8 +2272,29 @@ static int _mv88e6xxx_pvt_cmd(struct dsa_switch *ds, int 
src_dev, int src_port,
return _mv88e6xxx_pvt_wait(ds);
 }
 
+static int _mv88e6xxx_pvt_write(struct dsa_switch *ds, int src_dev,
+   int src_port, u16 data)
+{
+   int err;
+
+   err = _mv88e6xxx_pvt_wait(ds);
+   if (err)
+   return err;
+
+   err = _mv88e6xxx_reg_write(ds, REG_GLOBAL2, GLOBAL2_PVT_DATA, data);
+   if (err)
+   return err;
+
+return _mv88e6xxx_pvt_cmd(ds, src_dev, src_port,
+ GLOBAL2_PVT_ADDR_OP_WRITE_PVLAN);
+}
+
 static int _mv88e6xxx_pvt_init(struct dsa_switch *ds)
 {
+   struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
+   struct dsa_port *intp;
+   int src_dev, src_port;
+   u16 pv = 0;
int err;
 
/* Clear 5 Bit Port for usage with Marvell Link Street devices:
@@ -2284,8 +2305,60 @@ static int _mv88e6xxx_pvt_init(struct dsa_switch *ds)
if (err)
return err;
 
-   /* Allow any cross-chip frames to egress any internal ports */
-   return _mv88e6xxx_pvt_cmd(ds, 0, 0, GLOBAL2_PVT_ADDR_OP_INIT_ONES);
+   /* Forbid cross-chip frames to egress internal ports */
+   dsa_switch_for_each_port(ds, intp, ps->info->num_ports)
+   if (dsa_is_cpu_port(ds, intp->port) ||
+   dsa_is_dsa_port(ds, intp->port))
+   pv |= BIT(intp->port);
+
+   for (src_dev = 0; src_dev < 32; ++src_dev) {
+   for (src_port = 0; src_port < 16; ++src_port) {
+   err = _mv88e6xxx_pvt_write(ds, src_dev, src_port, pv);
+   if (err)
+   return err;
+   }
+   }
+
+   return 0;
+}
+
+static int _mv88e6xxx_port_map_pvt(struct dsa_switch *ds, struct dsa_port *dp)
+{
+   struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
+   struct dsa_port *intp;
+   u16 pvlan = 0;
+
+   /* Cross-chip frames can egress CPU and DSA ports, and bridge members */
+   dsa_switch_for_each_port(ds, intp, ps->info->num_ports)
+   if (dsa_is_cpu_port(ds, intp->port) ||
+   dsa_is_dsa_port(ds, intp->port) ||
+   (intp->br && intp->br == dp->br))
+   pvlan |= BIT(intp->port);
+
+   return _mv88e6xxx_pvt_write(ds, dp->ds->index, dp->port, pvlan);
+}
+
+static int _mv88e6xxx_remap_pvt(struct dsa_switch *ds,
+   struct net_device *bridge)
+{
+   struct dsa_switch *dsa_sw;
+   struct dsa_port *dsa_p;
+   int err;
+
+   dsa_tree_for_each_switch(ds->dst, dsa_sw) {
+   if (dsa_sw == ds)
+   continue;
+
+   dsa_switch_for_each_port(dsa_sw, dsa_p, DSA_MAX_PORTS) {
+   if (dsa_p->br == bridge) {
+   err = _mv88e6xxx_port_map_pvt(ds, dsa_p);
+   if (err)
+   return err;
+   }
+   }
+   }
+
+   return 0;
 }
 
 int mv88e6xxx_port_bridge_change(struct dsa_switch *ds, struct dsa_port *dp,
@@ -2297,7 +2370,19 @@ int mv88e6xxx_port_bridge_change(struct dsa_switch *ds, 
struct dsa_port *dp,
mutex_lock(>smi_mutex);
 
if (dsa_port_is_external(dp, ds)) {
-   err = -EOPNOTSUPP;
+   /* Forbidding hardware bridging of cross-chip frames requires a
+* Cross-chip Port VLAN Table (PVT), unless VLAN filtering is
+* enabled, in which case a global VTU-based logic works.
+*/
+   if (mv88e6xxx_has(ps, MV88E6XXX_FLAG_PVT)) {
+   err = _mv88e6xxx_port_map_pvt(ds, dp);
+ 

[RFC 19/20] net: dsa: mv88e6xxx: conditionally init PVT

2016-04-27 Thread Vivien Didelot
The current code initialize the Cross-chip Port VLAN Table to all ones,
even tough the switch model doesn't have one.

It also assumes that the switch is configured to support up to
32-switch/16-port cross-chip devices.

Implement the access to the PVT and initialize it only if the switch has
such feature. Support only 88E6352 for the moment.

This commit brings no functional change for devices with a PVT.

Signed-off-by: Vivien Didelot 
---
 drivers/net/dsa/mv88e6352.c |  1 +
 drivers/net/dsa/mv88e6xxx.c | 54 +++--
 drivers/net/dsa/mv88e6xxx.h |  6 +
 3 files changed, 54 insertions(+), 7 deletions(-)

diff --git a/drivers/net/dsa/mv88e6352.c b/drivers/net/dsa/mv88e6352.c
index 4afc24d..29d9fd76 100644
--- a/drivers/net/dsa/mv88e6352.c
+++ b/drivers/net/dsa/mv88e6352.c
@@ -59,6 +59,7 @@ static const struct mv88e6xxx_info mv88e6352_table[] = {
.name = "Marvell 88E6352",
.num_databases = 4096,
.num_ports = 7,
+   .flags = BIT(MV88E6XXX_FLAG_PVT),
}
 };
 
diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c
index 25852ee..4341ffd 100644
--- a/drivers/net/dsa/mv88e6xxx.c
+++ b/drivers/net/dsa/mv88e6xxx.c
@@ -2247,6 +2247,47 @@ unlock:
return err;
 }
 
+static int _mv88e6xxx_pvt_wait(struct dsa_switch *ds)
+{
+   return _mv88e6xxx_wait(ds, REG_GLOBAL2, GLOBAL2_PVT_ADDR,
+  GLOBAL2_PVT_ADDR_BUSY);
+}
+
+static int _mv88e6xxx_pvt_cmd(struct dsa_switch *ds, int src_dev, int src_port,
+ u16 op)
+{
+   u16 reg = op;
+   int err;
+
+   /* 9-bit Cross-chip PVT pointer: with GLOBAL2_MISC_5_BIT_PORT cleared,
+* source device is 5-bit, source port is 4-bit.
+*/
+   reg |= (src_dev & 0x1f) << 4;
+   reg |= (src_port & 0xf);
+
+   err = _mv88e6xxx_reg_write(ds, REG_GLOBAL2, GLOBAL2_PVT_ADDR, reg);
+   if (err)
+   return err;
+
+   return _mv88e6xxx_pvt_wait(ds);
+}
+
+static int _mv88e6xxx_pvt_init(struct dsa_switch *ds)
+{
+   int err;
+
+   /* Clear 5 Bit Port for usage with Marvell Link Street devices:
+* use 4 bits for the Src_Port/Src_Trunk and 5 bits for the Src_Dev.
+*/
+   err = _mv88e6xxx_reg_write(ds, REG_GLOBAL2, GLOBAL2_MISC,
+  0 & ~GLOBAL2_MISC_5_BIT_PORT);
+   if (err)
+   return err;
+
+   /* Allow any cross-chip frames to egress any internal ports */
+   return _mv88e6xxx_pvt_cmd(ds, 0, 0, GLOBAL2_PVT_ADDR_OP_INIT_ONES);
+}
+
 int mv88e6xxx_port_bridge_change(struct dsa_switch *ds, struct dsa_port *dp,
 struct net_device *bridge)
 {
@@ -2770,13 +2811,12 @@ int mv88e6xxx_setup_global(struct dsa_switch *ds)
if (err)
goto unlock;
 
-   /* Initialise cross-chip port VLAN table to reset
-* defaults.
-*/
-   err = _mv88e6xxx_reg_write(ds, REG_GLOBAL2,
-  GLOBAL2_PVT_ADDR, 0x9000);
-   if (err)
-   goto unlock;
+   /* Initialize Cross-chip Port VLAN Table (PVT) */
+   if (mv88e6xxx_has(ps, MV88E6XXX_FLAG_PVT)) {
+   err = _mv88e6xxx_pvt_init(ds);
+   if (err)
+   goto unlock;
+   }
 
/* Clear the priority override table. */
for (i = 0; i < 16; i++) {
diff --git a/drivers/net/dsa/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx.h
index 325caf8..fbde8b4 100644
--- a/drivers/net/dsa/mv88e6xxx.h
+++ b/drivers/net/dsa/mv88e6xxx.h
@@ -298,6 +298,10 @@
 #define GLOBAL2_INGRESS_OP 0x09
 #define GLOBAL2_INGRESS_DATA   0x0a
 #define GLOBAL2_PVT_ADDR   0x0b
+#define GLOBAL2_PVT_ADDR_BUSY  BIT(15)
+#define GLOBAL2_PVT_ADDR_OP_INIT_ONES  ((0x01 << 12) | GLOBAL2_PVT_ADDR_BUSY)
+#define GLOBAL2_PVT_ADDR_OP_WRITE_PVLAN((0x03 << 12) | 
GLOBAL2_PVT_ADDR_BUSY)
+#define GLOBAL2_PVT_ADDR_OP_READ   ((0x04 << 12) | GLOBAL2_PVT_ADDR_BUSY)
 #define GLOBAL2_PVT_DATA   0x0c
 #define GLOBAL2_SWITCH_MAC 0x0d
 #define GLOBAL2_SWITCH_MAC_BUSY BIT(15)
@@ -335,10 +339,12 @@
 #define GLOBAL2_WDOG_CONTROL   0x1b
 #define GLOBAL2_QOS_WEIGHT 0x1c
 #define GLOBAL2_MISC   0x1d
+#define GLOBAL2_MISC_5_BIT_PORTBIT(14)
 
 #define MV88E6XXX_N_FID4096
 
 enum mv88e6xxx_flag {
+   MV88E6XXX_FLAG_PVT,
MV88E6XXX_NUM_FLAGS,
 };
 
-- 
2.8.0



[RFC 18/20] net: dsa: mv88e6xxx: add flags to info

2016-04-27 Thread Vivien Didelot
Add a flags bitmap to the mv88e6xxx_info structure to help describing
features supported or not by a switch model.

Signed-off-by: Vivien Didelot 
---
 drivers/net/dsa/mv88e6xxx.h | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/drivers/net/dsa/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx.h
index 56e3347..325caf8 100644
--- a/drivers/net/dsa/mv88e6xxx.h
+++ b/drivers/net/dsa/mv88e6xxx.h
@@ -338,6 +338,10 @@
 
 #define MV88E6XXX_N_FID4096
 
+enum mv88e6xxx_flag {
+   MV88E6XXX_NUM_FLAGS,
+};
+
 enum mv88e6xxx_family {
MV88E6XXX_FAMILY_NONE,
MV88E6XXX_FAMILY_6065,  /* 6031 6035 6061 6065 */
@@ -356,6 +360,7 @@ struct mv88e6xxx_info {
const char *name;
unsigned int num_databases;
unsigned int num_ports;
+   unsigned long flags;
 };
 
 struct mv88e6xxx_atu_entry {
@@ -445,6 +450,12 @@ struct mv88e6xxx_hw_stat {
enum stat_type type;
 };
 
+static inline bool mv88e6xxx_has(struct mv88e6xxx_priv_state *ps,
+enum mv88e6xxx_flag flag)
+{
+   return !!(ps->info->flags & BIT(flag));
+}
+
 int mv88e6xxx_switch_reset(struct dsa_switch *ds, bool ppu_active);
 const char *mv88e6xxx_drv_probe(struct device *dsa_dev, struct device 
*host_dev,
int sw_addr, void **priv,
-- 
2.8.0



[RFC 17/20] net: dsa: mv88e6xxx: factorize port bridge change

2016-04-27 Thread Vivien Didelot
Implement a mv88e6xxx_port_bridge_change function to factorize the
configuration needed when a port joins or leaves a bridge group.

This will simplify the implementation of cross-chip bridging.

Signed-off-by: Vivien Didelot 
---
 drivers/net/dsa/mv88e6xxx.c | 67 +++--
 1 file changed, 40 insertions(+), 27 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c
index 8004d00..25852ee 100644
--- a/drivers/net/dsa/mv88e6xxx.c
+++ b/drivers/net/dsa/mv88e6xxx.c
@@ -1150,6 +1150,24 @@ static int _mv88e6xxx_port_map_vlantable(struct 
dsa_switch *ds,
return _mv88e6xxx_reg_write(ds, REG_PORT(port), PORT_BASE_VLAN, reg);
 }
 
+static int _mv88e6xxx_remap_vlantable(struct dsa_switch *ds,
+ struct net_device *bridge)
+{
+   struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
+   struct dsa_port *intp;
+   int err;
+
+   dsa_switch_for_each_port(ds, intp, ps->info->num_ports) {
+   if (intp->br == bridge) {
+   err = _mv88e6xxx_port_map_vlantable(ds, intp);
+   if (err)
+   return err;
+   }
+   }
+
+   return 0;
+}
+
 void mv88e6xxx_port_stp_state_set(struct dsa_switch *ds, int port, u8 state)
 {
struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
@@ -2229,51 +2247,46 @@ unlock:
return err;
 }
 
-int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, struct dsa_port *dp,
-  struct net_device *bridge)
+int mv88e6xxx_port_bridge_change(struct dsa_switch *ds, struct dsa_port *dp,
+struct net_device *bridge)
 {
struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-   struct dsa_port *intp;
int err;
 
-   if (dsa_port_is_external(dp, ds))
-   return -EOPNOTSUPP;
-
mutex_lock(>smi_mutex);
 
-   /* Remap each port's VLANTable */
-   dsa_switch_for_each_port(ds, intp, ps->info->num_ports) {
-   if (intp->br == bridge) {
-   err = _mv88e6xxx_port_map_vlantable(ds, intp);
+   if (dsa_port_is_external(dp, ds)) {
+   err = -EOPNOTSUPP;
+   } else {
+   /* Remap VLANTable of concerned in-chip ports */
+   if (!dp->br) {
+   err = _mv88e6xxx_port_map_vlantable(ds, dp);
if (err)
-   break;
+   goto unlock;
}
+
+   err = _mv88e6xxx_remap_vlantable(ds, bridge);
+   if (err)
+   goto unlock;
}
 
+unlock:
mutex_unlock(>smi_mutex);
 
return err;
 }
 
+int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, struct dsa_port *dp,
+  struct net_device *bridge)
+{
+   return mv88e6xxx_port_bridge_change(ds, dp, bridge);
+}
+
 void mv88e6xxx_port_bridge_leave(struct dsa_switch *ds, struct dsa_port *dp,
 struct net_device *bridge)
 {
-   struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-   struct dsa_port *intp;
-
-   if (dsa_port_is_external(dp, ds))
-   return;
-
-   mutex_lock(>smi_mutex);
-
-   /* Remap each port's VLANTable */
-   dsa_switch_for_each_port(ds, intp, ps->info->num_ports)
-   if (intp == dp || intp->br == bridge)
-   if (_mv88e6xxx_port_map_vlantable(ds, intp))
-   netdev_warn(ds->ports[intp->port],
-   "failed to remap\n");
-
-   mutex_unlock(>smi_mutex);
+   if (mv88e6xxx_port_bridge_change(ds, dp, bridge))
+   netdev_err(ds->ports[dp->port], "failed to unbridge\n");
 }
 
 static void mv88e6xxx_bridge_work(struct work_struct *work)
-- 
2.8.0



[RFC 13/20] net: dsa: list switches in tree

2016-04-27 Thread Vivien Didelot
List the registered dsa_switch structures in a "ds" member of the
dsa_switch_tree structure. This allows the drivers to easily iterate on
the DSA switch structures of their related DSA tree.

Signed-off-by: Vivien Didelot 
---
 include/net/dsa.h | 9 +
 net/dsa/dsa.c | 3 +++
 2 files changed, 12 insertions(+)

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 389227d..85fac8a 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -32,11 +32,16 @@ enum dsa_tag_protocol {
 #define DSA_MAX_SWITCHES   4
 #define DSA_MAX_PORTS  12
 
+
+#define dsa_tree_for_each_switch(_dst, _ds)\
+   list_for_each_entry(_ds, &_dst->ds, list)
+
 #define dsa_switch_for_each_port(_ds, _dp, _num_ports) \
for (_dp = list_first_entry(&_ds->dp, typeof(*_dp), list);  \
 &_dp->list != (&_ds->dp) && _dp->port < _num_ports;\
 _dp = list_next_entry(_dp, list))
 
+
 struct dsa_chip_data {
/*
 * How to access the switch configuration registers.
@@ -125,6 +130,8 @@ struct dsa_switch_tree {
 * Data for the individual switch chips.
 */
struct dsa_switch   *switches[DSA_MAX_SWITCHES];
+
+   struct list_headds;
 };
 
 struct dsa_port {
@@ -137,6 +144,8 @@ struct dsa_port {
 };
 
 struct dsa_switch {
+   struct list_headlist;
+
/*
 * Parent switch tree, and switch index.
 */
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index aa4a61a..b0055c7 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -842,6 +842,8 @@ static int dsa_setup_dst(struct dsa_switch_tree *dst, 
struct net_device *dev,
int i;
unsigned configured = 0;
 
+   INIT_LIST_HEAD(>ds);
+
dst->pd = pd;
dst->master_netdev = dev;
dst->cpu_switch = -1;
@@ -858,6 +860,7 @@ static int dsa_setup_dst(struct dsa_switch_tree *dst, 
struct net_device *dev,
}
 
dst->switches[i] = ds;
+   list_add_tail(>list, >ds);
 
++configured;
}
-- 
2.8.0



[RFC 14/20] net: dsa: add tree-wide bridge ops

2016-04-27 Thread Vivien Didelot
In order to support cross-chip operations, we need to inform each switch
driver when a port operation occurs in a DSA tree.

This allows drivers to configure cross-chip port-based VLAN table, VTU
or FDB entries on DSA links, in order to implement a correct hardware
switching of frames.

Add a new tree.c file to implement tree-wide operations, propagating a
port-based operation on each switch of a tree.

Implement tree-wide bridge operations.

Signed-off-by: Vivien Didelot 
---
 drivers/net/dsa/bcm_sf2.c   |  6 +
 drivers/net/dsa/mv88e6xxx.c |  6 +
 include/net/dsa.h   |  6 +
 net/dsa/Makefile|  2 +-
 net/dsa/dsa_priv.h  |  6 +
 net/dsa/slave.c | 46 ---
 net/dsa/tree.c  | 66 +
 7 files changed, 96 insertions(+), 42 deletions(-)
 create mode 100644 net/dsa/tree.c

diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index 6e3b844..0a91ea9 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -498,6 +498,9 @@ static int bcm_sf2_sw_br_join(struct dsa_switch *ds, struct 
dsa_port *dp,
struct dsa_port *intp;
u32 reg, p_ctl;
 
+   if (dsa_port_is_external(dp, ds))
+   return -EOPNOTSUPP;
+
p_ctl = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(dp->port));
 
dsa_switch_for_each_port(ds, intp, priv->hw_params.num_ports) {
@@ -531,6 +534,9 @@ static void bcm_sf2_sw_br_leave(struct dsa_switch *ds, 
struct dsa_port *dp,
struct dsa_port *intp;
u32 reg, p_ctl;
 
+   if (dsa_port_is_external(dp, ds))
+   return;
+
p_ctl = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(dp->port));
 
dsa_switch_for_each_port(ds, intp, priv->hw_params.num_ports) {
diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c
index 89d0206..6fef29b 100644
--- a/drivers/net/dsa/mv88e6xxx.c
+++ b/drivers/net/dsa/mv88e6xxx.c
@@ -2212,6 +2212,9 @@ int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, 
struct dsa_port *dp,
struct dsa_port *intp;
int err;
 
+   if (dsa_port_is_external(dp, ds))
+   return -EOPNOTSUPP;
+
mutex_lock(>smi_mutex);
 
/* Remap each port's VLANTable */
@@ -2234,6 +2237,9 @@ void mv88e6xxx_port_bridge_leave(struct dsa_switch *ds, 
struct dsa_port *dp,
struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
struct dsa_port *intp;
 
+   if (dsa_port_is_external(dp, ds))
+   return;
+
mutex_lock(>smi_mutex);
 
/* Remap each port's VLANTable */
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 85fac8a..33172c9 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -193,6 +193,12 @@ struct dsa_switch {
struct list_headdp;
 };
 
+static inline bool dsa_port_is_external(struct dsa_port *dp,
+   struct dsa_switch *ds)
+{
+   return dp->ds != ds;
+}
+
 static inline bool dsa_is_cpu_port(struct dsa_switch *ds, int p)
 {
return !!(ds->index == ds->dst->cpu_switch && p == ds->dst->cpu_port);
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index da06ed1..bf8d12c 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -1,6 +1,6 @@
 # the core
 obj-$(CONFIG_NET_DSA) += dsa_core.o
-dsa_core-y += dsa.o slave.o
+dsa_core-y += dsa.o tree.o slave.o
 
 # tagging formats
 dsa_core-$(CONFIG_NET_DSA_TAG_BRCM) += tag_brcm.o
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index c5afddd..6e08b3d 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -46,6 +46,12 @@ struct dsa_slave_priv {
 /* dsa.c */
 extern char dsa_driver_version[];
 
+/* tree.c */
+int dsa_tree_bridge_port_join(struct dsa_switch_tree *dst, struct dsa_port *dp,
+ struct net_device *br);
+void dsa_tree_bridge_port_leave(struct dsa_switch_tree *dst,
+   struct dsa_port *dp, struct net_device *br);
+
 /* slave.c */
 extern const struct dsa_device_ops notag_netdev_ops;
 void dsa_slave_mii_bus_init(struct dsa_switch *ds);
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index b90caf8..7123ae2 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -425,45 +425,6 @@ static int dsa_slave_port_obj_dump(struct net_device *dev,
return err;
 }
 
-static int dsa_slave_bridge_port_join(struct net_device *dev,
- struct net_device *br)
-{
-   struct dsa_slave_priv *p = netdev_priv(dev);
-   struct dsa_switch *ds = p->dp->ds;
-   int ret = -EOPNOTSUPP;
-
-   p->dp->br = br;
-
-   if (ds->drv->port_bridge_join)
-   ret = ds->drv->port_bridge_join(ds, p->dp, br);
-
-   if (ret && ret != -EOPNOTSUPP) {
-   p->dp->br = NULL;
-   return ret;
-   }
-
-   return 0;
-}
-
-static void dsa_slave_bridge_port_leave(struct net_device *dev)
-{
-   struct dsa_slave_priv *p 

[RFC 16/20] net: dsa: add tree-wide VLAN ops

2016-04-27 Thread Vivien Didelot
In order to support cross-chip operations, we need to inform each switch
driver when a port operation occurs in a DSA tree.

Implement tree-wide VLAN operations.

Signed-off-by: Vivien Didelot 
---
 drivers/net/dsa/mv88e6xxx.c | 12 +
 net/dsa/dsa_priv.h  |  8 ++
 net/dsa/slave.c | 59 ++--
 net/dsa/tree.c  | 60 +
 4 files changed, 87 insertions(+), 52 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c
index 7d29de3..8004d00 100644
--- a/drivers/net/dsa/mv88e6xxx.c
+++ b/drivers/net/dsa/mv88e6xxx.c
@@ -1378,6 +1378,9 @@ int mv88e6xxx_port_vlan_dump(struct dsa_switch *ds, 
struct dsa_port *dp,
u16 pvid;
int err;
 
+   if (dsa_port_is_external(dp, ds))
+   return -EOPNOTSUPP;
+
mutex_lock(>smi_mutex);
 
err = _mv88e6xxx_port_pvid_get(ds, dp->port, );
@@ -1835,6 +1838,9 @@ int mv88e6xxx_port_vlan_prepare(struct dsa_switch *ds, 
struct dsa_port *dp,
 {
int err;
 
+   if (dsa_port_is_external(dp, ds))
+   return -EOPNOTSUPP;
+
/* If the requested port doesn't belong to the same bridge as the VLAN
 * members, do not support it (yet) and fallback to software VLAN.
 */
@@ -1874,6 +1880,9 @@ void mv88e6xxx_port_vlan_add(struct dsa_switch *ds, 
struct dsa_port *dp,
bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID;
u16 vid;
 
+   if (dsa_port_is_external(dp, ds))
+   return;
+
mutex_lock(>smi_mutex);
 
for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid)
@@ -1930,6 +1939,9 @@ int mv88e6xxx_port_vlan_del(struct dsa_switch *ds, struct 
dsa_port *dp,
u16 pvid, vid;
int err = 0;
 
+   if (dsa_port_is_external(dp, ds))
+   return -EOPNOTSUPP;
+
mutex_lock(>smi_mutex);
 
err = _mv88e6xxx_port_pvid_get(ds, dp->port, );
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index e8765c3..d743d6a 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -60,6 +60,14 @@ int dsa_tree_port_fdb_del(struct dsa_switch_tree *dst, 
struct dsa_port *dp,
 int dsa_tree_port_fdb_dump(struct dsa_switch_tree *dst, struct dsa_port *dp,
   struct switchdev_obj_port_fdb *fdb,
   switchdev_obj_dump_cb_t *cb);
+int dsa_tree_port_vlan_add(struct dsa_switch_tree *dst, struct dsa_port *dp,
+  const struct switchdev_obj_port_vlan *vlan,
+  struct switchdev_trans *trans);
+int dsa_tree_port_vlan_del(struct dsa_switch_tree *dst, struct dsa_port *dp,
+  const struct switchdev_obj_port_vlan *vlan);
+int dsa_tree_port_vlan_dump(struct dsa_switch_tree *dst, struct dsa_port *dp,
+   struct switchdev_obj_port_vlan *vlan,
+   switchdev_obj_dump_cb_t *cb);
 
 /* slave.c */
 extern const struct dsa_device_ops notag_netdev_ops;
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 90bcf8a..19469dc 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -195,50 +195,6 @@ out:
return 0;
 }
 
-static int dsa_slave_port_vlan_add(struct net_device *dev,
-  const struct switchdev_obj_port_vlan *vlan,
-  struct switchdev_trans *trans)
-{
-   struct dsa_slave_priv *p = netdev_priv(dev);
-   struct dsa_switch *ds = p->dp->ds;
-
-   if (switchdev_trans_ph_prepare(trans)) {
-   if (!ds->drv->port_vlan_prepare || !ds->drv->port_vlan_add)
-   return -EOPNOTSUPP;
-
-   return ds->drv->port_vlan_prepare(ds, p->dp, vlan, trans);
-   }
-
-   ds->drv->port_vlan_add(ds, p->dp, vlan, trans);
-
-   return 0;
-}
-
-static int dsa_slave_port_vlan_del(struct net_device *dev,
-  const struct switchdev_obj_port_vlan *vlan)
-{
-   struct dsa_slave_priv *p = netdev_priv(dev);
-   struct dsa_switch *ds = p->dp->ds;
-
-   if (!ds->drv->port_vlan_del)
-   return -EOPNOTSUPP;
-
-   return ds->drv->port_vlan_del(ds, p->dp, vlan);
-}
-
-static int dsa_slave_port_vlan_dump(struct net_device *dev,
-   struct switchdev_obj_port_vlan *vlan,
-   switchdev_obj_dump_cb_t *cb)
-{
-   struct dsa_slave_priv *p = netdev_priv(dev);
-   struct dsa_switch *ds = p->dp->ds;
-
-   if (ds->drv->port_vlan_dump)
-   return ds->drv->port_vlan_dump(ds, p->dp, vlan, cb);
-
-   return -EOPNOTSUPP;
-}
-
 static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
struct dsa_slave_priv *p = netdev_priv(dev);
@@ -323,9 +279,9 @@ static int dsa_slave_port_obj_add(struct net_device *dev,

[RFC 04/20] net: dsa: pass dsa_port down to drivers FDB ops

2016-04-27 Thread Vivien Didelot
Now that DSA as proper structure for DSA ports, pass it down to the
port_fdb_{prepare,add,del,dump} driver functions.

Signed-off-by: Vivien Didelot 
---
 drivers/net/dsa/bcm_sf2.c   | 20 +++-
 drivers/net/dsa/mv88e6xxx.c | 22 +++---
 drivers/net/dsa/mv88e6xxx.h |  8 
 include/net/dsa.h   |  8 
 net/dsa/slave.c |  8 
 5 files changed, 34 insertions(+), 32 deletions(-)

diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index 2d7b297..f7b53fa 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -725,7 +725,7 @@ static int bcm_sf2_arl_op(struct bcm_sf2_priv *priv, int 
op, int port,
return bcm_sf2_arl_read(priv, mac, vid, , , is_valid);
 }
 
-static int bcm_sf2_sw_fdb_prepare(struct dsa_switch *ds, int port,
+static int bcm_sf2_sw_fdb_prepare(struct dsa_switch *ds, struct dsa_port *dp,
  const struct switchdev_obj_port_fdb *fdb,
  struct switchdev_trans *trans)
 {
@@ -733,22 +733,22 @@ static int bcm_sf2_sw_fdb_prepare(struct dsa_switch *ds, 
int port,
return 0;
 }
 
-static void bcm_sf2_sw_fdb_add(struct dsa_switch *ds, int port,
+static void bcm_sf2_sw_fdb_add(struct dsa_switch *ds, struct dsa_port *dp,
   const struct switchdev_obj_port_fdb *fdb,
   struct switchdev_trans *trans)
 {
struct bcm_sf2_priv *priv = ds_to_priv(ds);
 
-   if (bcm_sf2_arl_op(priv, 0, port, fdb->addr, fdb->vid, true))
+   if (bcm_sf2_arl_op(priv, 0, dp->port, fdb->addr, fdb->vid, true))
pr_err("%s: failed to add MAC address\n", __func__);
 }
 
-static int bcm_sf2_sw_fdb_del(struct dsa_switch *ds, int port,
+static int bcm_sf2_sw_fdb_del(struct dsa_switch *ds, struct dsa_port *dp,
  const struct switchdev_obj_port_fdb *fdb)
 {
struct bcm_sf2_priv *priv = ds_to_priv(ds);
 
-   return bcm_sf2_arl_op(priv, 0, port, fdb->addr, fdb->vid, false);
+   return bcm_sf2_arl_op(priv, 0, dp->port, fdb->addr, fdb->vid, false);
 }
 
 static int bcm_sf2_arl_search_wait(struct bcm_sf2_priv *priv)
@@ -799,16 +799,18 @@ static int bcm_sf2_sw_fdb_copy(struct net_device *dev, 
int port,
return cb(>obj);
 }
 
-static int bcm_sf2_sw_fdb_dump(struct dsa_switch *ds, int port,
+static int bcm_sf2_sw_fdb_dump(struct dsa_switch *ds, struct dsa_port *dp,
   struct switchdev_obj_port_fdb *fdb,
   int (*cb)(struct switchdev_obj *obj))
 {
struct bcm_sf2_priv *priv = ds_to_priv(ds);
-   struct net_device *dev = ds->ports[port];
+   struct net_device *dev;
struct bcm_sf2_arl_entry results[2];
unsigned int count = 0;
int ret;
 
+   dev = ds->ports[dp->port];
+
/* Start search operation */
core_writel(priv, ARLA_SRCH_STDN, CORE_ARLA_SRCH_CTL);
 
@@ -819,12 +821,12 @@ static int bcm_sf2_sw_fdb_dump(struct dsa_switch *ds, int 
port,
 
/* Read both entries, then return their values back */
bcm_sf2_arl_search_rd(priv, 0, [0]);
-   ret = bcm_sf2_sw_fdb_copy(dev, port, [0], fdb, cb);
+   ret = bcm_sf2_sw_fdb_copy(dev, dp->port, [0], fdb, cb);
if (ret)
return ret;
 
bcm_sf2_arl_search_rd(priv, 1, [1]);
-   ret = bcm_sf2_sw_fdb_copy(dev, port, [1], fdb, cb);
+   ret = bcm_sf2_sw_fdb_copy(dev, dp->port, [1], fdb, cb);
if (ret)
return ret;
 
diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c
index 3f78c73..c1ff763 100644
--- a/drivers/net/dsa/mv88e6xxx.c
+++ b/drivers/net/dsa/mv88e6xxx.c
@@ -2031,7 +2031,7 @@ static int _mv88e6xxx_port_fdb_load(struct dsa_switch 
*ds, int port,
return _mv88e6xxx_atu_load(ds, );
 }
 
-int mv88e6xxx_port_fdb_prepare(struct dsa_switch *ds, int port,
+int mv88e6xxx_port_fdb_prepare(struct dsa_switch *ds, struct dsa_port *dp,
   const struct switchdev_obj_port_fdb *fdb,
   struct switchdev_trans *trans)
 {
@@ -2041,7 +2041,7 @@ int mv88e6xxx_port_fdb_prepare(struct dsa_switch *ds, int 
port,
return 0;
 }
 
-void mv88e6xxx_port_fdb_add(struct dsa_switch *ds, int port,
+void mv88e6xxx_port_fdb_add(struct dsa_switch *ds, struct dsa_port *dp,
const struct switchdev_obj_port_fdb *fdb,
struct switchdev_trans *trans)
 {
@@ -2051,19 +2051,19 @@ void mv88e6xxx_port_fdb_add(struct dsa_switch *ds, int 
port,
struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
 
mutex_lock(>smi_mutex);
-   if (_mv88e6xxx_port_fdb_load(ds, port, fdb->addr, fdb->vid, state))
-   netdev_err(ds->ports[port], "failed to load MAC address\n");
+   if 

[RFC 15/20] net: dsa: add tree-wide FDB ops

2016-04-27 Thread Vivien Didelot
In order to support cross-chip operations, we need to inform each switch
driver when a port operation occurs in a DSA tree.

Implement tree-wide FDB operations.

Signed-off-by: Vivien Didelot 
---
 drivers/net/dsa/bcm_sf2.c   | 12 
 drivers/net/dsa/mv88e6xxx.c | 12 
 net/dsa/dsa_priv.h  |  9 ++
 net/dsa/slave.c | 68 ++---
 net/dsa/tree.c  | 61 
 5 files changed, 109 insertions(+), 53 deletions(-)

diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index 0a91ea9..6e634e5 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -733,6 +733,9 @@ static int bcm_sf2_sw_fdb_prepare(struct dsa_switch *ds, 
struct dsa_port *dp,
  const struct switchdev_obj_port_fdb *fdb,
  struct switchdev_trans *trans)
 {
+   if (dsa_port_is_external(dp, ds))
+   return -EOPNOTSUPP;
+
/* We do not need to do anything specific here yet */
return 0;
 }
@@ -743,6 +746,9 @@ static void bcm_sf2_sw_fdb_add(struct dsa_switch *ds, 
struct dsa_port *dp,
 {
struct bcm_sf2_priv *priv = ds_to_priv(ds);
 
+   if (dsa_port_is_external(dp, ds))
+   return;
+
if (bcm_sf2_arl_op(priv, 0, dp->port, fdb->addr, fdb->vid, true))
pr_err("%s: failed to add MAC address\n", __func__);
 }
@@ -752,6 +758,9 @@ static int bcm_sf2_sw_fdb_del(struct dsa_switch *ds, struct 
dsa_port *dp,
 {
struct bcm_sf2_priv *priv = ds_to_priv(ds);
 
+   if (dsa_port_is_external(dp, ds))
+   return -EOPNOTSUPP;
+
return bcm_sf2_arl_op(priv, 0, dp->port, fdb->addr, fdb->vid, false);
 }
 
@@ -813,6 +822,9 @@ static int bcm_sf2_sw_fdb_dump(struct dsa_switch *ds, 
struct dsa_port *dp,
unsigned int count = 0;
int ret;
 
+   if (dsa_port_is_external(dp, ds))
+   return -EOPNOTSUPP;
+
dev = ds->ports[dp->port];
 
/* Start search operation */
diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c
index 6fef29b..7d29de3 100644
--- a/drivers/net/dsa/mv88e6xxx.c
+++ b/drivers/net/dsa/mv88e6xxx.c
@@ -2037,6 +2037,9 @@ int mv88e6xxx_port_fdb_prepare(struct dsa_switch *ds, 
struct dsa_port *dp,
   const struct switchdev_obj_port_fdb *fdb,
   struct switchdev_trans *trans)
 {
+   if (dsa_port_is_external(dp, ds))
+   return -EOPNOTSUPP;
+
/* We don't need any dynamic resource from the kernel (yet),
 * so skip the prepare phase.
 */
@@ -2052,6 +2055,9 @@ void mv88e6xxx_port_fdb_add(struct dsa_switch *ds, struct 
dsa_port *dp,
GLOBAL_ATU_DATA_STATE_UC_STATIC;
struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
 
+   if (dsa_port_is_external(dp, ds))
+   return;
+
mutex_lock(>smi_mutex);
if (_mv88e6xxx_port_fdb_load(ds, dp->port, fdb->addr, fdb->vid, state))
netdev_err(ds->ports[dp->port], "failed to load MAC address\n");
@@ -2064,6 +2070,9 @@ int mv88e6xxx_port_fdb_del(struct dsa_switch *ds, struct 
dsa_port *dp,
struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
int ret;
 
+   if (dsa_port_is_external(dp, ds))
+   return -EOPNOTSUPP;
+
mutex_lock(>smi_mutex);
ret = _mv88e6xxx_port_fdb_load(ds, dp->port, fdb->addr, fdb->vid,
   GLOBAL_ATU_DATA_STATE_UNUSED);
@@ -2169,6 +2178,9 @@ int mv88e6xxx_port_fdb_dump(struct dsa_switch *ds, struct 
dsa_port *dp,
u16 fid;
int err;
 
+   if (dsa_port_is_external(dp, ds))
+   return -EOPNOTSUPP;
+
mutex_lock(>smi_mutex);
 
/* Dump port's default Filtering Information Database (VLAN ID 0) */
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 6e08b3d..e8765c3 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -14,6 +14,7 @@
 #include 
 #include 
 #include 
+#include 
 
 struct dsa_device_ops {
struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev);
@@ -51,6 +52,14 @@ int dsa_tree_bridge_port_join(struct dsa_switch_tree *dst, 
struct dsa_port *dp,
  struct net_device *br);
 void dsa_tree_bridge_port_leave(struct dsa_switch_tree *dst,
struct dsa_port *dp, struct net_device *br);
+int dsa_tree_port_fdb_add(struct dsa_switch_tree *dst, struct dsa_port *dp,
+ const struct switchdev_obj_port_fdb *fdb,
+ struct switchdev_trans *trans);
+int dsa_tree_port_fdb_del(struct dsa_switch_tree *dst, struct dsa_port *dp,
+ const struct switchdev_obj_port_fdb *fdb);
+int dsa_tree_port_fdb_dump(struct dsa_switch_tree *dst, struct dsa_port *dp,
+  struct 

Re: [PATCH 1/5] phylib: don't return NULL from get_phy_device()

2016-04-27 Thread Arnd Bergmann
On Wednesday 27 April 2016 14:47:29 Florian Fainelli wrote:
> diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
> index 499003ee8055..94a27b028dd8 100644
> --- a/drivers/net/phy/mdio_bus.c
> +++ b/drivers/net/phy/mdio_bus.c
> @@ -333,7 +333,7 @@ int __mdiobus_register(struct mii_bus *bus, struct
> module *owner)
> struct phy_device *phydev;
> 
> phydev = mdiobus_scan(bus, i);
> -   if (IS_ERR(phydev)) {
> +   if (IS_ERR(phydev) && PTR_ERR(phydev) != -ENODEV) {
> err = PTR_ERR(phydev);
> goto error;
> }
> 
> 

I think that is an improvement over the original code, and better than
reverting the series. Out of the three callers of mdiobus_scan, I already
commented on drivers/net/ethernet/marvell/pxa168_eth.c being wrong to
start with, and drivers/net/ethernet/cadence/macb.c seems to require
the same fix that you did here for mdio_bus.c

Arnd


Re: [PATCH 1/5] phylib: don't return NULL from get_phy_device()

2016-04-27 Thread Arnd Bergmann
On Wednesday 27 April 2016 23:09:37 Sergei Shtylyov wrote:
> Hello.
> 
> On 04/27/2016 10:49 PM, Andrew Lunn wrote:
> 
> >> Sergei Shtylyov  writes:
> >>
> >>> Arnd Bergmann asked that get_phy_device() returns either NULL or the error
> >>> value,  not both on error.  Do as he said, return ERR_PTR(-ENODEV) instead
> >>> of NULL when the PHY ID registers read as  all ones.
> >>>
> >>> Suggested-by: Arnd Bergmann 
> >>> Signed-off-by: Sergei Shtylyov 
> >>>
> >>> ---
> >>>   drivers/net/phy/phy_device.c |2 +-
> >>>   1 file changed, 1 insertion(+), 1 deletion(-)
> >>>
> >>> Index: net-next/drivers/net/phy/phy_device.c
> >>> ===
> >>> --- net-next.orig/drivers/net/phy/phy_device.c
> >>> +++ net-next/drivers/net/phy/phy_device.c
> >>> @@ -529,7 +529,7 @@ struct phy_device *get_phy_device(struct
> >>>
> >>>   /* If the phy_id is mostly Fs, there is no device there */
> >>>   if ((phy_id & 0x1fff) == 0x1fff)
> >>> - return NULL;
> >>> + return ERR_PTR(-ENODEV);
> >>>
> >>>   return phy_device_create(bus, addr, phy_id, is_c45, _ids);
> >>>   }
> >
> > This change is wrong, it needs reverting, or the call sights need
> > fixing to expect ENODEV.
> 
> So this function had a good reason to return NULL, as it turned out... :-(
> 
> > The point is, the device not being there is not an error, with respect
> > to the code calling this function.
> >
> > It gets called by mdiobus_scan()
> >
> > struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr)
> > {
> >  struct phy_device *phydev;
> >  int err;
> >
> >  phydev = get_phy_device(bus, addr, false);
> >  if (IS_ERR(phydev) || phydev == NULL)
> >  return phydev;
> >
> > So before, we return NULL, if the device was not there. Now we return
> > ERR_PTR(-ENODEV).
> >
> > This is being called by:
> >
> > int __mdiobus_register(struct mii_bus *bus, struct module *owner)
> > {
> >  struct mdio_device *mdiodev;
> > ...
> >  for (i = 0; i < PHY_MAX_ADDR; i++) {
> >  if ((bus->phy_mask & (1 << i)) == 0) {
> >  struct phy_device *phydev;
> >
> >  phydev = mdiobus_scan(bus, i);
> >  if (IS_ERR(phydev)) {
> >  err = PTR_ERR(phydev);
> >  goto error;
> >  }
> >  }
> >  }
> >
> > This is treating ERR_PTR(-ENODEV) as a fatal error, where as before
> > IS_ERR(NULL) would be false and it would continue scanning other
> > addresses on the bus.
> 
> Thank you for the detailed analysis! (And shame on me for the lack of it.)
> 
> > Please revert this, or fix all the callsites such that ENODEV is not a
> > fatal error.
> 
> OK, I'll do what DaveM decides.

I found one other user that remains broken: pxa168_init_phy() looks 
wrong before and after the patch:

pep->phy = mdiobus_scan(pep->smi_bus, pep->phy_addr);
if (!pep->phy)
return -ENODEV;

err = phy_connect_direct(dev, pep->phy, pxa168_eth_adjust_link,
 pep->phy_intf);

as phy_connect_direct() will go on and dereference an error pointer. This
should check for IS_ERR(), and with the patches applied, we can drop the
!pep->phy check.

Arnd


[PATCH next v2] ipvlan: Fix failure path in dev registration during link creation

2016-04-27 Thread Mahesh Bandewar
From: Mahesh Bandewar 

When newlink creation fails at device-registration, the port->count
is decremented twice. Francesco Ruggeri (frugg...@arista.com) found
this issue in Macvlan and the same exists in IPvlan driver too.

While fixing this issue I noticed another issue of missing unregister
in case of failure, so adding it to the fix which is similar to the
macvlan fix by Francesco in commit 308379607548 ("macvlan: fix failure
during registration v3")

Reported-by: Francesco Ruggeri 
Signed-off-by: Mahesh Bandewar 
CC: Eric Dumazet 
CC: Eric W. Biederman 
---
 drivers/net/ipvlan/ipvlan_main.c | 19 ---
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index 57941d3f4227..1c4d395fbd49 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -113,6 +113,7 @@ static int ipvlan_init(struct net_device *dev)
 {
struct ipvl_dev *ipvlan = netdev_priv(dev);
const struct net_device *phy_dev = ipvlan->phy_dev;
+   struct ipvl_port *port = ipvlan->port;
 
dev->state = (dev->state & ~IPVLAN_STATE_MASK) |
 (phy_dev->state & IPVLAN_STATE_MASK);
@@ -128,6 +129,8 @@ static int ipvlan_init(struct net_device *dev)
if (!ipvlan->pcpu_stats)
return -ENOMEM;
 
+   port->count += 1;
+
return 0;
 }
 
@@ -481,27 +484,21 @@ static int ipvlan_link_new(struct net *src_net, struct 
net_device *dev,
 
dev->priv_flags |= IFF_IPVLAN_SLAVE;
 
-   port->count += 1;
err = register_netdevice(dev);
if (err < 0)
-   goto ipvlan_destroy_port;
+   return err;
 
err = netdev_upper_dev_link(phy_dev, dev);
-   if (err)
-   goto ipvlan_destroy_port;
+   if (err) {
+   unregister_netdevice(dev);
+   return err;
+   }
 
list_add_tail_rcu(>pnode, >ipvlans);
ipvlan_set_port_mode(port, mode);
 
netif_stacked_transfer_operstate(phy_dev, dev);
return 0;
-
-ipvlan_destroy_port:
-   port->count -= 1;
-   if (!port->count)
-   ipvlan_port_destroy(phy_dev);
-
-   return err;
 }
 
 static void ipvlan_link_delete(struct net_device *dev, struct list_head *head)
-- 
2.8.0.rc3.226.g39d4020



Re: [PATCH 1/5] phylib: don't return NULL from get_phy_device()

2016-04-27 Thread Florian Fainelli
On 27/04/16 12:49, Andrew Lunn wrote:
> On Wed, Apr 27, 2016 at 03:30:57PM -0400, Vivien Didelot wrote:
>> Hi David, All,
>>
>> Sergei Shtylyov  writes:
>>
>>> Arnd Bergmann asked that get_phy_device() returns either NULL or the error
>>> value,  not both on error.  Do as he said, return ERR_PTR(-ENODEV) instead
>>> of NULL when the PHY ID registers read as  all ones.
>>>
>>> Suggested-by: Arnd Bergmann 
>>> Signed-off-by: Sergei Shtylyov 
>>>
>>> ---
>>>  drivers/net/phy/phy_device.c |2 +-
>>>  1 file changed, 1 insertion(+), 1 deletion(-)
>>>
>>> Index: net-next/drivers/net/phy/phy_device.c
>>> ===
>>> --- net-next.orig/drivers/net/phy/phy_device.c
>>> +++ net-next/drivers/net/phy/phy_device.c
>>> @@ -529,7 +529,7 @@ struct phy_device *get_phy_device(struct
>>>  
>>> /* If the phy_id is mostly Fs, there is no device there */
>>> if ((phy_id & 0x1fff) == 0x1fff)
>>> -   return NULL;
>>> +   return ERR_PTR(-ENODEV);
>>>  
>>> return phy_device_create(bus, addr, phy_id, is_c45, _ids);
>>>  }
> 
> This change is wrong, it needs reverting, or the call sights need
> fixing to expect ENODEV.
> 
> The point is, the device not being there is not an error, with respect
> to the code calling this function.
> 
> It gets called by mdiobus_scan()
> 
> struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr)
> {
> struct phy_device *phydev;
> int err;
> 
> phydev = get_phy_device(bus, addr, false);
> if (IS_ERR(phydev) || phydev == NULL)
> return phydev;
> 
> So before, we return NULL, if the device was not there. Now we return
> ERR_PTR(-ENODEV).
> 
> This is being called by:
> 
> int __mdiobus_register(struct mii_bus *bus, struct module *owner)
> {
> struct mdio_device *mdiodev;
> ...
> for (i = 0; i < PHY_MAX_ADDR; i++) {
> if ((bus->phy_mask & (1 << i)) == 0) {
> struct phy_device *phydev;
> 
> phydev = mdiobus_scan(bus, i);
> if (IS_ERR(phydev)) {
> err = PTR_ERR(phydev);
> goto error;
> }
> }
> }
> 
> This is treating ERR_PTR(-ENODEV) as a fatal error, where as before
> IS_ERR(NULL) would be false and it would continue scanning other
> addresses on the bus.
> 
> Please revert this, or fix all the callsites such that ENODEV is not a
> fatal error.

So the one you pointed out in __mdiobus_register() is definitively
needed, though I did get a different issue than Vivien's (-EBUSY vs.
-EINVAL). The get_phy_device() in drivers/of/of_mdio.c probably needs
something similar too, here is what I locally have for the moment:

diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index 499003ee8055..94a27b028dd8 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -333,7 +333,7 @@ int __mdiobus_register(struct mii_bus *bus, struct
module *owner)
struct phy_device *phydev;

phydev = mdiobus_scan(bus, i);
-   if (IS_ERR(phydev)) {
+   if (IS_ERR(phydev) && PTR_ERR(phydev) != -ENODEV) {
err = PTR_ERR(phydev);
goto error;
}

-- 
Florian


Re: [PATCH net-next] net: dsa: Provide CPU port statistics to master netdev

2016-04-27 Thread Florian Fainelli
On 27/04/16 12:03, Andrew Lunn wrote:
>> +if (stringset == ETH_SS_STATS && ds->drv->get_strings) {
>> +ndata = data + mcount * len;
>> +/* This function copies ETH_GSTRINGS_LEN bytes, we will mangle
>> + * the output after to prepend our CPU port prefix we
>> + * constructed earlier
>> + */
>> +ds->drv->get_strings(ds, cpu_port, ndata);
>> +count = ds->drv->get_sset_count(ds);
>> +for (i = 0; i < count; i++) {
>> +memmove(ndata + (i * len + sizeof(pfx)),
>> +ndata + i * len, len - sizeof(pfx));
>> +memcpy(ndata + i * len, pfx, sizeof(pfx));
> 
> Hi Florian
> 
> Did you check what happens if this causes the NULL terminator to be
> discarded? Does ethtool handle that? As i said before, it is unclear
> if one is required.

I just did yes. So ethtool has a do_gstringset() function which
NULL-terminates every strings set except the statistics kind
(ETH_SS_STATS or ETH_SS_PHY_STATS) but this is not much of a problem
because it limits the output to ETH_GSTRING_LEN anyway.

After injecting a bit of error in net/dsa/slave.c to have a much bigger
prefix making us push the stats names, the stats are correcty truncated
by ethtool. So we seem to be good to go with the current code in kernel
and user space.
-- 
Florian


[PATCH net-next #2 1/1] pch_gbe: replace private tx ring lock with common netif_tx_lock

2016-04-27 Thread Francois Romieu
pch_gbe_tx_ring.tx_lock is only used in the hard_xmit handler and
in the transmit completion reaper called from NAPI context.

Compile-tested only. Potential victims Cced.

Someone more knowledgeable may check if pch_gbe_tx_queue could
have some use for a mmiowb.

Signed-off-by: Francois Romieu 
Cc: Darren Hart 
Cc: Andy Cress 
Cc: br...@fossetcon.org

---
 Includes Nikolay's fix.

 drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h  |  2 --
 drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c | 10 ++
 2 files changed, 2 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h 
b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h
index 2a55d6d..8d710a3 100644
--- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h
+++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h
@@ -481,7 +481,6 @@ struct pch_gbe_buffer {
 
 /**
  * struct pch_gbe_tx_ring - tx ring information
- * @tx_lock:   spinlock structs
  * @desc:  pointer to the descriptor ring memory
  * @dma:   physical address of the descriptor ring
  * @size:  length of descriptor ring in bytes
@@ -491,7 +490,6 @@ struct pch_gbe_buffer {
  * @buffer_info:   array of buffer information structs
  */
 struct pch_gbe_tx_ring {
-   spinlock_t tx_lock;
struct pch_gbe_tx_desc *desc;
dma_addr_t dma;
unsigned int size;
diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c 
b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
index ca4add7..3cd87a4 100644
--- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
+++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
@@ -1640,7 +1640,7 @@ pch_gbe_clean_tx(struct pch_gbe_adapter *adapter,
   cleaned_count);
if (cleaned_count > 0)  { /*skip this if nothing cleaned*/
/* Recover from running out of Tx resources in xmit_frame */
-   spin_lock(_ring->tx_lock);
+   netif_tx_lock(adapter->netdev);
if (unlikely(cleaned && (netif_queue_stopped(adapter->netdev
{
netif_wake_queue(adapter->netdev);
@@ -1652,7 +1652,7 @@ pch_gbe_clean_tx(struct pch_gbe_adapter *adapter,
 
netdev_dbg(adapter->netdev, "next_to_clean : %d\n",
   tx_ring->next_to_clean);
-   spin_unlock(_ring->tx_lock);
+   netif_tx_unlock(adapter->netdev);
}
return cleaned;
 }
@@ -1805,7 +1805,6 @@ int pch_gbe_setup_tx_resources(struct pch_gbe_adapter 
*adapter,
 
tx_ring->next_to_use = 0;
tx_ring->next_to_clean = 0;
-   spin_lock_init(_ring->tx_lock);
 
for (desNo = 0; desNo < tx_ring->count; desNo++) {
tx_desc = PCH_GBE_TX_DESC(*tx_ring, desNo);
@@ -2135,13 +2134,9 @@ static int pch_gbe_xmit_frame(struct sk_buff *skb, 
struct net_device *netdev)
 {
struct pch_gbe_adapter *adapter = netdev_priv(netdev);
struct pch_gbe_tx_ring *tx_ring = adapter->tx_ring;
-   unsigned long flags;
-
-   spin_lock_irqsave(_ring->tx_lock, flags);
 
if (unlikely(!PCH_GBE_DESC_UNUSED(tx_ring))) {
netif_stop_queue(netdev);
-   spin_unlock_irqrestore(_ring->tx_lock, flags);
netdev_dbg(netdev,
   "Return : BUSY  next_to use : 0x%08x  next_to clean 
: 0x%08x\n",
   tx_ring->next_to_use, tx_ring->next_to_clean);
@@ -2150,7 +2145,6 @@ static int pch_gbe_xmit_frame(struct sk_buff *skb, struct 
net_device *netdev)
 
/* CRC,ITAG no support */
pch_gbe_tx_queue(adapter, tx_ring, skb);
-   spin_unlock_irqrestore(_ring->tx_lock, flags);
return NETDEV_TX_OK;
 }
 
-- 
2.5.5



Re: [PATCH net-next 1/1] pch_gbe: replace private tx ring lock with common netif_tx_lock

2016-04-27 Thread Francois Romieu
Nikolay Aleksandrov  :
> On 04/27/2016 12:49 AM, Francois Romieu wrote:
[...]
> > @@ -1652,7 +1652,7 @@ pch_gbe_clean_tx(struct pch_gbe_adapter *adapter,
> >  
> > netdev_dbg(adapter->netdev, "next_to_clean : %d\n",
> >tx_ring->next_to_clean);
> > -   spin_unlock(_ring->tx_lock);
> > +   netif_tx_lock(adapter->netdev);
> 
> Shouldn't this be netif_tx_unlock ?

It should. Thanks for reviewing.

-- 
Ueimor


Re: [PATCH next] ipvlan: Fix failure path in dev registration during link creation

2016-04-27 Thread Mahesh Bandewar
On Wed, Apr 27, 2016 at 11:57 AM, David Miller  wrote:
> From: Mahesh Bandewar 
> Date: Wed, 27 Apr 2016 11:37:39 -0700
>
>> While fixing this issue I noticed another issue of missing unregister
>> in case of failure, so adding it to the fix which is similar to the
>> macvlan fix by Francesco in SHA1:308379607548524b8d86dbf20134681024935e0b
>
> This is not the correct way to refer to commits.
>
> You should specify, exactly, 12 digits of the SHA1 value, followed by
> a space, followed by the header line text of that commit contained in
> parenthesis and double quotes, like how Fixes: tags specify commits.
Ok, will fix that soon.


Re: [PATCH 1/5] phylib: don't return NULL from get_phy_device()

2016-04-27 Thread David Miller
From: Sergei Shtylyov 
Date: Wed, 27 Apr 2016 23:09:37 +0300

> On 04/27/2016 10:49 PM, Andrew Lunn wrote:
> 
>> Please revert this, or fix all the callsites such that ENODEV is not a
>> fatal error.
> 
>OK, I'll do what DaveM decides.

If you feel confident getting all the ENODEV checks right, please just do
that.

Thanks.


Re: [PATCH net 0/3] bnxt_en: Bug fixes for net.

2016-04-27 Thread David Miller
From: Michael Chan 
Date: Mon, 25 Apr 2016 02:30:48 -0400

> Only use MSIX on VF, and fix rx page buffers on architectures with
> PAGE_SIZE >= 64K.

Series applied, thanks Michael.


[net-next v2 11/14] i40e/i40evf: Only offload VLAN tag if enabled

2016-04-27 Thread Jeff Kirsher
From: Jesse Brandeburg 

The driver was offloading the VLAN tag into the skb
any time there was a VLAN tag and the hardware stripping was
enabled.  Just check to make sure it's enabled before put_tag.

Change-Id: Ife95290c06edd9a616393b38679923938b382241
Signed-off-by: Jesse Brandeburg 
Tested-by: Andrew Bowers 
Signed-off-by: Jeff Kirsher 
---
 drivers/net/ethernet/intel/i40e/i40e_txrx.c   | 3 ++-
 drivers/net/ethernet/intel/i40evf/i40e_txrx.c | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c 
b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 6e44cf1..285efe9 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -1370,7 +1370,8 @@ static void i40e_receive_skb(struct i40e_ring *rx_ring,
 {
struct i40e_q_vector *q_vector = rx_ring->q_vector;
 
-   if (vlan_tag & VLAN_VID_MASK)
+   if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
+   (vlan_tag & VLAN_VID_MASK))
__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
 
napi_gro_receive(_vector->napi, skb);
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c 
b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
index f101895..4633235 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
@@ -842,7 +842,8 @@ static void i40e_receive_skb(struct i40e_ring *rx_ring,
 {
struct i40e_q_vector *q_vector = rx_ring->q_vector;
 
-   if (vlan_tag & VLAN_VID_MASK)
+   if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
+   (vlan_tag & VLAN_VID_MASK))
__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
 
napi_gro_receive(_vector->napi, skb);
-- 
2.5.5



[net-next v2 04/14] i40evf: Don't Panic

2016-04-27 Thread Jeff Kirsher
From: Mitch Williams 

Under some circumstances the driver remove function may be called before
the driver is fully initialized. So we can't assume that we know where
our towel is at, or that all of the data structures are initialized.

To ensure that we don't panic, check that the vsi_res pointer is valid
before dereferencing it. Then drink beer and eat peanuts.

Change-ID: If697b4db57348e39f9538793e16aa755e3e1af03
Signed-off-by: Mitch Williams 
Tested-by: Andrew Bowers 
Signed-off-by: Jeff Kirsher 
---
 drivers/net/ethernet/intel/i40evf/i40evf.h | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h 
b/drivers/net/ethernet/intel/i40evf/i40evf.h
index e657ecc..017c83b 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf.h
+++ b/drivers/net/ethernet/intel/i40evf/i40evf.h
@@ -256,8 +256,10 @@ struct i40evf_adapter {
bool netdev_registered;
bool link_up;
enum i40e_virtchnl_ops current_op;
-#define CLIENT_ENABLED(_a) ((_a)->vf_res->vf_offload_flags & \
-   I40E_VIRTCHNL_VF_OFFLOAD_IWARP)
+#define CLIENT_ENABLED(_a) ((_a)->vf_res ? \
+   (_a)->vf_res->vf_offload_flags & \
+   I40E_VIRTCHNL_VF_OFFLOAD_IWARP : \
+   0)
 #define RSS_AQ(_a) ((_a)->vf_res->vf_offload_flags & \
I40E_VIRTCHNL_VF_OFFLOAD_RSS_AQ)
 #define VLAN_ALLOWED(_a) ((_a)->vf_res->vf_offload_flags & \
-- 
2.5.5



[net-next v2 01/14] i40e/i40evf: Clean up feature flags

2016-04-27 Thread Jeff Kirsher
From: Alexander Duyck 

The feature flags list for i40e and i40evf is beginning to become pretty
massive.  I plan to add another 4 or so features to these drivers and
duplicating the flags for each and every flags list is becoming a bit
repetitive.

The primary change here is that we now build our features list around
hw_encap_features.  After that we assign that to vlan_features,
hw_features, and finally map that onto features.  In addition we end up
throwing features onto hw_encap_features that end up having no effect such
as the Rx offloads and SCTP_CRC.  However that should have no impact and
makes things a bit easier for us as hw_encap_features is one of the less
updated features maps available.

For i40evf I went through and sanity checked a few features as well.
Specifically RXCSUM was being set as a read-only feature which didn't make
much sense.  I have updated things so we can clear the NETIF_F_RXCSUM flag
since that is really a software feature and not a hardware one anyway so
disabling it is just a matter of ignoring the result from the hardware.

Signed-off-by: Alexander Duyck 
Tested-by: Andrew Bowers 
Signed-off-by: Jeff Kirsher 
---
 drivers/net/ethernet/intel/i40e/i40e_main.c | 61 ++-
 drivers/net/ethernet/intel/i40evf/i40evf_main.c | 66 -
 2 files changed, 58 insertions(+), 69 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c 
b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 0b071ce..f2e83fe 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -9111,40 +9111,36 @@ static int i40e_config_netdev(struct i40e_vsi *vsi)
np = netdev_priv(netdev);
np->vsi = vsi;
 
-   netdev->hw_enc_features |= NETIF_F_IP_CSUM |
-  NETIF_F_IPV6_CSUM   |
-  NETIF_F_TSO |
-  NETIF_F_TSO6|
-  NETIF_F_TSO_ECN |
-  NETIF_F_GSO_GRE |
-  NETIF_F_GSO_UDP_TUNNEL  |
-  NETIF_F_GSO_UDP_TUNNEL_CSUM |
+   netdev->hw_enc_features |= NETIF_F_SG   |
+  NETIF_F_IP_CSUM  |
+  NETIF_F_IPV6_CSUM|
+  NETIF_F_HIGHDMA  |
+  NETIF_F_SOFT_FEATURES|
+  NETIF_F_TSO  |
+  NETIF_F_TSO_ECN  |
+  NETIF_F_TSO6 |
+  NETIF_F_GSO_GRE  |
+  NETIF_F_GSO_UDP_TUNNEL   |
+  NETIF_F_GSO_UDP_TUNNEL_CSUM  |
+  NETIF_F_SCTP_CRC |
+  NETIF_F_RXHASH   |
+  NETIF_F_RXCSUM   |
   0;
 
-   netdev->features = NETIF_F_SG  |
-  NETIF_F_IP_CSUM |
-  NETIF_F_SCTP_CRC|
-  NETIF_F_HIGHDMA |
-  NETIF_F_GSO_UDP_TUNNEL  |
-  NETIF_F_GSO_GRE |
-  NETIF_F_HW_VLAN_CTAG_TX |
-  NETIF_F_HW_VLAN_CTAG_RX |
-  NETIF_F_HW_VLAN_CTAG_FILTER |
-  NETIF_F_IPV6_CSUM   |
-  NETIF_F_TSO |
-  NETIF_F_TSO_ECN |
-  NETIF_F_TSO6|
-  NETIF_F_RXCSUM  |
-  NETIF_F_RXHASH  |
-  0;
+   if (!(pf->flags & I40E_FLAG_OUTER_UDP_CSUM_CAPABLE))
+   netdev->hw_enc_features ^= NETIF_F_GSO_UDP_TUNNEL_CSUM;
+
+   /* record features VLANs can make use of */
+   netdev->vlan_features |= netdev->hw_enc_features;
 
if (!(pf->flags & I40E_FLAG_MFP_ENABLED))
-   netdev->features |= NETIF_F_NTUPLE;
-   if (pf->flags & I40E_FLAG_OUTER_UDP_CSUM_CAPABLE)
-   netdev->features |= NETIF_F_GSO_UDP_TUNNEL_CSUM;
+   netdev->hw_features |= NETIF_F_NTUPLE;
+
+   netdev->hw_features |= netdev->hw_enc_features  |
+  NETIF_F_HW_VLAN_CTAG_TX  |
+  NETIF_F_HW_VLAN_CTAG_RX;
 
-   /* copy netdev features into list of user selectable features */
-   

[net-next v2 13/14] i40e: Add VF promiscuous mode driver support

2016-04-27 Thread Jeff Kirsher
From: Anjali Singhai Jain 

Add infrastructure for Network Function Virtualization VLAN tagged
packet steering feature.

Change-Id: I9b873d8fcc253858e6baba65ac68ec5b9363944e
Signed-off-by: Anjali Singhai Jain 
Signed-off-by: Greg Rose 
Tested-by: Andrew Bowers 
Signed-off-by: Jeff Kirsher 
---
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 153 -
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h |   2 +
 2 files changed, 149 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c 
b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index c364588..f47b0e8 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -1427,6 +1427,25 @@ static void i40e_vc_reset_vf_msg(struct i40e_vf *vf)
 }
 
 /**
+ * i40e_getnum_vf_vsi_vlan_filters
+ * @vsi: pointer to the vsi
+ *
+ * called to get the number of VLANs offloaded on this VF
+ **/
+static inline int i40e_getnum_vf_vsi_vlan_filters(struct i40e_vsi *vsi)
+{
+   struct i40e_mac_filter *f;
+   int num_vlans = 0;
+
+   list_for_each_entry(f, >mac_filter_list, list) {
+   if (f->vlan >= 0 && f->vlan <= I40E_MAX_VLANID)
+   num_vlans++;
+   }
+
+   return num_vlans;
+}
+
+/**
  * i40e_vc_config_promiscuous_mode_msg
  * @vf: pointer to the VF info
  * @msg: pointer to the msg buffer
@@ -1442,22 +1461,122 @@ static int i40e_vc_config_promiscuous_mode_msg(struct 
i40e_vf *vf,
(struct i40e_virtchnl_promisc_info *)msg;
struct i40e_pf *pf = vf->pf;
struct i40e_hw *hw = >hw;
-   struct i40e_vsi *vsi;
+   struct i40e_mac_filter *f;
+   i40e_status aq_ret = 0;
bool allmulti = false;
-   i40e_status aq_ret;
+   struct i40e_vsi *vsi;
+   bool alluni = false;
+   int aq_err = 0;
 
vsi = i40e_find_vsi_from_id(pf, info->vsi_id);
if (!test_bit(I40E_VF_STAT_ACTIVE, >vf_states) ||
!test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, >vf_caps) ||
-   !i40e_vc_isvalid_vsi_id(vf, info->vsi_id) ||
-   (vsi->type != I40E_VSI_FCOE)) {
+   !i40e_vc_isvalid_vsi_id(vf, info->vsi_id)) {
+   dev_err(>pdev->dev,
+   "VF %d doesn't meet requirements to enter promiscuous 
mode\n",
+   vf->vf_id);
aq_ret = I40E_ERR_PARAM;
goto error_param;
}
+   /* Multicast promiscuous handling*/
if (info->flags & I40E_FLAG_VF_MULTICAST_PROMISC)
allmulti = true;
-   aq_ret = i40e_aq_set_vsi_multicast_promiscuous(hw, vsi->seid,
-  allmulti, NULL);
+
+   if (vf->port_vlan_id) {
+   aq_ret = i40e_aq_set_vsi_mc_promisc_on_vlan(hw, vsi->seid,
+   allmulti,
+   vf->port_vlan_id,
+   NULL);
+   } else if (i40e_getnum_vf_vsi_vlan_filters(vsi)) {
+   list_for_each_entry(f, >mac_filter_list, list) {
+   if (f->vlan >= 0 && f->vlan <= I40E_MAX_VLANID)
+   aq_ret = i40e_aq_set_vsi_mc_promisc_on_vlan
+  (hw,
+  vsi->seid,
+  allmulti,
+  f->vlan,
+  NULL);
+   aq_err = pf->hw.aq.asq_last_status;
+   if (aq_ret) {
+   dev_err(>pdev->dev,
+   "Could not add VLAN %d to multicast 
promiscuous domain err %s aq_err %s\n",
+   f->vlan,
+   i40e_stat_str(>hw, aq_ret),
+   i40e_aq_str(>hw, aq_err));
+   break;
+   }
+   }
+   } else {
+   aq_ret = i40e_aq_set_vsi_multicast_promiscuous(hw, vsi->seid,
+  allmulti, NULL);
+   aq_err = pf->hw.aq.asq_last_status;
+   if (aq_ret) {
+   dev_err(>pdev->dev,
+   "VF %d failed to set multicast promiscuous mode 
err %s aq_err %s\n",
+   vf->vf_id,
+   i40e_stat_str(>hw, aq_ret),
+   i40e_aq_str(>hw, aq_err));
+   goto error_param_int;
+

[net-next v2 03/14] i40e: Add support for configuring VF RSS

2016-04-27 Thread Jeff Kirsher
From: Mitch Williams 

Add support for configuring RSS on behalf of the VFs. This removes the
burden of dealing with different hardware interfaces from the VF
drivers, allowing for better future compatibility.

Change-ID: Icea75d3f37241ee8e447be5779e5abb53ddf04c0
Signed-off-by: Mitch Williams 
Tested-by: Andrew Bowers 
Signed-off-by: Jeff Kirsher 
---
 drivers/net/ethernet/intel/i40e/i40e.h |   1 +
 drivers/net/ethernet/intel/i40e/i40e_main.c|  35 +++-
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 193 -
 3 files changed, 217 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e.h 
b/drivers/net/ethernet/intel/i40e/i40e.h
index d25b3be..e312adf 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -202,6 +202,7 @@ struct i40e_lump_tracking {
 
 #define I40E_HKEY_ARRAY_SIZE ((I40E_PFQF_HKEY_MAX_INDEX + 1) * 4)
 #define I40E_HLUT_ARRAY_SIZE ((I40E_PFQF_HLUT_MAX_INDEX + 1) * 4)
+#define I40E_VF_HLUT_ARRAY_SIZE ((I40E_VFQF_HLUT1_MAX_INDEX + 1) * 4)
 
 enum i40e_fd_stat_idx {
I40E_FD_STAT_ATR,
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c 
b/drivers/net/ethernet/intel/i40e/i40e_main.c
index ec94ad6c..39b3b56 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -8082,24 +8082,45 @@ static int i40e_config_rss_reg(struct i40e_vsi *vsi, 
const u8 *seed,
 {
struct i40e_pf *pf = vsi->back;
struct i40e_hw *hw = >hw;
+   u16 vf_id = vsi->vf_id;
u8 i;
 
/* Fill out hash function seed */
if (seed) {
u32 *seed_dw = (u32 *)seed;
 
-   for (i = 0; i <= I40E_PFQF_HKEY_MAX_INDEX; i++)
-   i40e_write_rx_ctl(hw, I40E_PFQF_HKEY(i), seed_dw[i]);
+   if (vsi->type == I40E_VSI_MAIN) {
+   for (i = 0; i <= I40E_PFQF_HKEY_MAX_INDEX; i++)
+   i40e_write_rx_ctl(hw, I40E_PFQF_HKEY(i),
+ seed_dw[i]);
+   } else if (vsi->type == I40E_VSI_SRIOV) {
+   for (i = 0; i <= I40E_VFQF_HKEY1_MAX_INDEX; i++)
+   i40e_write_rx_ctl(hw,
+ I40E_VFQF_HKEY1(i, vf_id),
+ seed_dw[i]);
+   } else {
+   dev_err(>pdev->dev, "Cannot set RSS seed - invalid 
VSI type\n");
+   }
}
 
if (lut) {
u32 *lut_dw = (u32 *)lut;
 
-   if (lut_size != I40E_HLUT_ARRAY_SIZE)
-   return -EINVAL;
-
-   for (i = 0; i <= I40E_PFQF_HLUT_MAX_INDEX; i++)
-   wr32(hw, I40E_PFQF_HLUT(i), lut_dw[i]);
+   if (vsi->type == I40E_VSI_MAIN) {
+   if (lut_size != I40E_HLUT_ARRAY_SIZE)
+   return -EINVAL;
+   for (i = 0; i <= I40E_PFQF_HLUT_MAX_INDEX; i++)
+   wr32(hw, I40E_PFQF_HLUT(i), lut_dw[i]);
+   } else if (vsi->type == I40E_VSI_SRIOV) {
+   if (lut_size != I40E_VF_HLUT_ARRAY_SIZE)
+   return -EINVAL;
+   for (i = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++)
+   i40e_write_rx_ctl(hw,
+ I40E_VFQF_HLUT1(i, vf_id),
+ lut_dw[i]);
+   } else {
+   dev_err(>pdev->dev, "Cannot set RSS LUT - invalid 
VSI type\n");
+   }
}
i40e_flush(hw);
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c 
b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 30f8cbe..c364588 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -1348,12 +1348,16 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf 
*vf, u8 *msg)
set_bit(I40E_VF_STAT_IWARPENA, >vf_states);
}
 
-   if (pf->flags & I40E_FLAG_RSS_AQ_CAPABLE) {
-   if (vf->driver_caps & I40E_VIRTCHNL_VF_OFFLOAD_RSS_AQ)
-   vfres->vf_offload_flags |=
-   I40E_VIRTCHNL_VF_OFFLOAD_RSS_AQ;
+   if (vf->driver_caps & I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF) {
+   vfres->vf_offload_flags |= I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF;
} else {
-   vfres->vf_offload_flags |= I40E_VIRTCHNL_VF_OFFLOAD_RSS_REG;
+   if ((pf->flags & I40E_FLAG_RSS_AQ_CAPABLE) &&
+   (vf->driver_caps & I40E_VIRTCHNL_VF_OFFLOAD_RSS_AQ))
+   vfres->vf_offload_flags |=
+   

[net-next v2 06/14] i40e: Specify AQ event opcode to wait for

2016-04-27 Thread Jeff Kirsher
From: Shannon Nelson 

To add a little flexibility to the nvmupdate facility, this code adds the
ability to specify an AQ event opcode to wait on after the Exec_AQ request.

Change-ID: Iddbfd63c3de8df3edb9d3e90678b08989bc4946e
Signed-off-by: Shannon Nelson 
Signed-off-by: Jeff Kirsher 
---
 drivers/net/ethernet/intel/i40e/i40e_nvm.c| 49 +++
 drivers/net/ethernet/intel/i40e/i40e_type.h   |  1 +
 drivers/net/ethernet/intel/i40evf/i40e_type.h |  1 +
 3 files changed, 44 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c 
b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
index f2cea3d..954efe3 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
@@ -693,10 +693,10 @@ i40e_status i40e_nvmupd_command(struct i40e_hw *hw,
/* early check for status command and debug msgs */
upd_cmd = i40e_nvmupd_validate_command(hw, cmd, perrno);
 
-   i40e_debug(hw, I40E_DEBUG_NVM, "%s state %d nvm_release_on_hold %d cmd 
0x%08x config 0x%08x offset 0x%08x data_size 0x%08x\n",
+   i40e_debug(hw, I40E_DEBUG_NVM, "%s state %d nvm_release_on_hold %d opc 
0x%04x cmd 0x%08x config 0x%08x offset 0x%08x data_size 0x%08x\n",
   i40e_nvm_update_state_str[upd_cmd],
   hw->nvmupd_state,
-  hw->nvm_release_on_done,
+  hw->nvm_release_on_done, hw->nvm_wait_opcode,
   cmd->command, cmd->config, cmd->offset, cmd->data_size);
 
if (upd_cmd == I40E_NVMUPD_INVALID) {
@@ -710,7 +710,18 @@ i40e_status i40e_nvmupd_command(struct i40e_hw *hw,
 * going into the state machine
 */
if (upd_cmd == I40E_NVMUPD_STATUS) {
+   if (!cmd->data_size) {
+   *perrno = -EFAULT;
+   return I40E_ERR_BUF_TOO_SHORT;
+   }
+
bytes[0] = hw->nvmupd_state;
+
+   if (cmd->data_size >= 4) {
+   bytes[1] = 0;
+   *((u16 *)[2]) = hw->nvm_wait_opcode;
+   }
+
return 0;
}
 
@@ -729,6 +740,14 @@ i40e_status i40e_nvmupd_command(struct i40e_hw *hw,
 
case I40E_NVMUPD_STATE_INIT_WAIT:
case I40E_NVMUPD_STATE_WRITE_WAIT:
+   /* if we need to stop waiting for an event, clear
+* the wait info and return before doing anything else
+*/
+   if (cmd->offset == 0x) {
+   i40e_nvmupd_check_wait_event(hw, hw->nvm_wait_opcode);
+   return 0;
+   }
+
status = I40E_ERR_NOT_READY;
*perrno = -EBUSY;
break;
@@ -800,6 +819,7 @@ static i40e_status i40e_nvmupd_state_init(struct i40e_hw 
*hw,
i40e_release_nvm(hw);
} else {
hw->nvm_release_on_done = true;
+   hw->nvm_wait_opcode = i40e_aqc_opc_nvm_erase;
hw->nvmupd_state = I40E_NVMUPD_STATE_INIT_WAIT;
}
}
@@ -816,6 +836,7 @@ static i40e_status i40e_nvmupd_state_init(struct i40e_hw 
*hw,
i40e_release_nvm(hw);
} else {
hw->nvm_release_on_done = true;
+   hw->nvm_wait_opcode = i40e_aqc_opc_nvm_update;
hw->nvmupd_state = I40E_NVMUPD_STATE_INIT_WAIT;
}
}
@@ -828,10 +849,12 @@ static i40e_status i40e_nvmupd_state_init(struct i40e_hw 
*hw,
 hw->aq.asq_last_status);
} else {
status = i40e_nvmupd_nvm_write(hw, cmd, bytes, perrno);
-   if (status)
+   if (status) {
i40e_release_nvm(hw);
-   else
+   } else {
+   hw->nvm_wait_opcode = i40e_aqc_opc_nvm_update;
hw->nvmupd_state = I40E_NVMUPD_STATE_WRITE_WAIT;
+   }
}
break;
 
@@ -850,6 +873,7 @@ static i40e_status i40e_nvmupd_state_init(struct i40e_hw 
*hw,
i40e_release_nvm(hw);
} else {
hw->nvm_release_on_done = true;
+   hw->nvm_wait_opcode = i40e_aqc_opc_nvm_update;
hw->nvmupd_state = I40E_NVMUPD_STATE_INIT_WAIT;
}
}
@@ -940,8 +964,10 @@ retry:
switch (upd_cmd) {
case I40E_NVMUPD_WRITE_CON:
status = i40e_nvmupd_nvm_write(hw, cmd, bytes, perrno);
-   

[net-next v2 08/14] i40e: Add device capability which defines if update is available

2016-04-27 Thread Jeff Kirsher
From: Michal Kosiarz 

Add device capability which defines if update is available and security
check is needed during update process.

Change-ID: I380787c878275e1df18b39198df3ee3666342282
Signed-off-by: Michal Kosiarz 
Tested-by: Andrew Bowers 
Signed-off-by: Jeff Kirsher 
---
 drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h   | 1 +
 drivers/net/ethernet/intel/i40e/i40e_common.c   | 6 ++
 drivers/net/ethernet/intel/i40e/i40e_type.h | 5 +
 drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h | 1 +
 drivers/net/ethernet/intel/i40evf/i40e_type.h   | 5 +
 5 files changed, 18 insertions(+)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h 
b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
index 8d5c65a..5179b3b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
@@ -429,6 +429,7 @@ struct i40e_aqc_list_capabilities_element_resp {
 #define I40E_AQ_CAP_ID_SDP 0x0062
 #define I40E_AQ_CAP_ID_MDIO0x0063
 #define I40E_AQ_CAP_ID_WSR_PROT0x0064
+#define I40E_AQ_CAP_ID_NVM_MGMT0x0080
 #define I40E_AQ_CAP_ID_FLEX10  0x00F1
 #define I40E_AQ_CAP_ID_CEM 0x00F2
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c 
b/drivers/net/ethernet/intel/i40e/i40e_common.c
index f3c1d88..34e86f5 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_common.c
@@ -3138,6 +3138,12 @@ static void i40e_parse_discover_capabilities(struct 
i40e_hw *hw, void *buff,
p->wr_csr_prot = (u64)number;
p->wr_csr_prot |= (u64)logical_id << 32;
break;
+   case I40E_AQ_CAP_ID_NVM_MGMT:
+   if (number & I40E_NVM_MGMT_SEC_REV_DISABLED)
+   p->sec_rev_disabled = true;
+   if (number & I40E_NVM_MGMT_UPDATE_DISABLED)
+   p->update_disabled = true;
+   break;
default:
break;
}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h 
b/drivers/net/ethernet/intel/i40e/i40e_type.h
index bb57cd9..8aa14aa 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_type.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_type.h
@@ -275,6 +275,11 @@ struct i40e_hw_capabilities {
 #define I40E_FLEX10_STATUS_DCC_ERROR   0x1
 #define I40E_FLEX10_STATUS_VC_MODE 0x2
 
+   bool sec_rev_disabled;
+   bool update_disabled;
+#define I40E_NVM_MGMT_SEC_REV_DISABLED 0x1
+#define I40E_NVM_MGMT_UPDATE_DISABLED  0x2
+
bool mgmt_cem;
bool ieee_1588;
bool iwarp;
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h 
b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h
index aad8d62..1bcb8cf 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h
@@ -426,6 +426,7 @@ struct i40e_aqc_list_capabilities_element_resp {
 #define I40E_AQ_CAP_ID_SDP 0x0062
 #define I40E_AQ_CAP_ID_MDIO0x0063
 #define I40E_AQ_CAP_ID_WSR_PROT0x0064
+#define I40E_AQ_CAP_ID_NVM_MGMT0x0080
 #define I40E_AQ_CAP_ID_FLEX10  0x00F1
 #define I40E_AQ_CAP_ID_CEM 0x00F2
 
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_type.h 
b/drivers/net/ethernet/intel/i40evf/i40e_type.h
index b720713..bfc97c2 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_type.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_type.h
@@ -258,6 +258,11 @@ struct i40e_hw_capabilities {
 #define I40E_FLEX10_STATUS_DCC_ERROR   0x1
 #define I40E_FLEX10_STATUS_VC_MODE 0x2
 
+   bool sec_rev_disabled;
+   bool update_disabled;
+#define I40E_NVM_MGMT_SEC_REV_DISABLED 0x1
+#define I40E_NVM_MGMT_UPDATE_DISABLED  0x2
+
bool mgmt_cem;
bool ieee_1588;
bool iwarp;
-- 
2.5.5



[net-next v2 12/14] i40e: Add promiscuous on VLAN support

2016-04-27 Thread Jeff Kirsher
From: Greg Rose 

NFV use cases require the ability to steer packets to VSIs by VLAN tag
alone while being in promiscuous mode for multicast and unicast MAC
addresses.  These two new functions support that ability.

Signed-off-by: Greg Rose 
Tested-by: Andrew Bowers 
Signed-off-by: Jeff Kirsher 
---
 drivers/net/ethernet/intel/i40e/i40e_common.c| 70 
 drivers/net/ethernet/intel/i40e/i40e_prototype.h |  8 +++
 2 files changed, 78 insertions(+)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c 
b/drivers/net/ethernet/intel/i40e/i40e_common.c
index 25872f2..0e8552b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_common.c
@@ -2039,6 +2039,76 @@ i40e_status i40e_aq_set_vsi_multicast_promiscuous(struct 
i40e_hw *hw,
 }
 
 /**
+ * i40e_aq_set_vsi_mc_promisc_on_vlan
+ * @hw: pointer to the hw struct
+ * @seid: vsi number
+ * @enable: set MAC L2 layer unicast promiscuous enable/disable for a given 
VLAN
+ * @vid: The VLAN tag filter - capture any multicast packet with this VLAN tag
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+enum i40e_status_code i40e_aq_set_vsi_mc_promisc_on_vlan(struct i40e_hw *hw,
+u16 seid, bool enable,
+u16 vid,
+   struct i40e_asq_cmd_details *cmd_details)
+{
+   struct i40e_aq_desc desc;
+   struct i40e_aqc_set_vsi_promiscuous_modes *cmd =
+   (struct i40e_aqc_set_vsi_promiscuous_modes *)
+   enum i40e_status_code status;
+   u16 flags = 0;
+
+   i40e_fill_default_direct_cmd_desc(,
+ 
i40e_aqc_opc_set_vsi_promiscuous_modes);
+
+   if (enable)
+   flags |= I40E_AQC_SET_VSI_PROMISC_MULTICAST;
+
+   cmd->promiscuous_flags = cpu_to_le16(flags);
+   cmd->valid_flags = cpu_to_le16(I40E_AQC_SET_VSI_PROMISC_MULTICAST);
+   cmd->seid = cpu_to_le16(seid);
+   cmd->vlan_tag = cpu_to_le16(vid | I40E_AQC_SET_VSI_VLAN_VALID);
+
+   status = i40e_asq_send_command(hw, , NULL, 0, cmd_details);
+
+   return status;
+}
+
+/**
+ * i40e_aq_set_vsi_uc_promisc_on_vlan
+ * @hw: pointer to the hw struct
+ * @seid: vsi number
+ * @enable: set MAC L2 layer unicast promiscuous enable/disable for a given 
VLAN
+ * @vid: The VLAN tag filter - capture any unicast packet with this VLAN tag
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+enum i40e_status_code i40e_aq_set_vsi_uc_promisc_on_vlan(struct i40e_hw *hw,
+u16 seid, bool enable,
+u16 vid,
+   struct i40e_asq_cmd_details *cmd_details)
+{
+   struct i40e_aq_desc desc;
+   struct i40e_aqc_set_vsi_promiscuous_modes *cmd =
+   (struct i40e_aqc_set_vsi_promiscuous_modes *)
+   enum i40e_status_code status;
+   u16 flags = 0;
+
+   i40e_fill_default_direct_cmd_desc(,
+ 
i40e_aqc_opc_set_vsi_promiscuous_modes);
+
+   if (enable)
+   flags |= I40E_AQC_SET_VSI_PROMISC_UNICAST;
+
+   cmd->promiscuous_flags = cpu_to_le16(flags);
+   cmd->valid_flags = cpu_to_le16(I40E_AQC_SET_VSI_PROMISC_UNICAST);
+   cmd->seid = cpu_to_le16(seid);
+   cmd->vlan_tag = cpu_to_le16(vid | I40E_AQC_SET_VSI_VLAN_VALID);
+
+   status = i40e_asq_send_command(hw, , NULL, 0, cmd_details);
+
+   return status;
+}
+
+/**
  * i40e_aq_set_vsi_broadcast
  * @hw: pointer to the hw struct
  * @seid: vsi number
diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h 
b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
index 134035f..8afb237 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
@@ -133,6 +133,14 @@ i40e_status i40e_aq_set_vsi_unicast_promiscuous(struct 
i40e_hw *hw,
u16 vsi_id, bool set, struct i40e_asq_cmd_details *cmd_details);
 i40e_status i40e_aq_set_vsi_multicast_promiscuous(struct i40e_hw *hw,
u16 vsi_id, bool set, struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_set_vsi_mc_promisc_on_vlan(struct i40e_hw *hw,
+u16 seid, bool enable,
+u16 vid,
+   struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_set_vsi_uc_promisc_on_vlan(struct i40e_hw *hw,
+u16 seid, bool enable,
+u16 vid,
+   struct i40e_asq_cmd_details *cmd_details);
 

[net-next v2 02/14] i40e/i40evf: Add support for IPIP and SIT offloads

2016-04-27 Thread Jeff Kirsher
From: Alexander Duyck 

Looking over the documentation it turns out enabling IPIP and SIT offloads
for i40e is pretty straightforward.  As such I decided to enable them with
this patch.  In my testing I am seeing an improvement of 8 to 10 Gb/s
for IPIP and SIT tunnels with this offload enabled.

Signed-off-by: Alexander Duyck 
Tested-by: Andrew Bowers 
Signed-off-by: Jeff Kirsher 
---
 drivers/net/ethernet/intel/i40e/i40e_main.c |  2 ++
 drivers/net/ethernet/intel/i40e/i40e_txrx.c | 24 
 drivers/net/ethernet/intel/i40evf/i40e_txrx.c   | 24 
 drivers/net/ethernet/intel/i40evf/i40evf_main.c |  2 ++
 4 files changed, 36 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c 
b/drivers/net/ethernet/intel/i40e/i40e_main.c
index f2e83fe..ec94ad6c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -9120,6 +9120,8 @@ static int i40e_config_netdev(struct i40e_vsi *vsi)
   NETIF_F_TSO_ECN  |
   NETIF_F_TSO6 |
   NETIF_F_GSO_GRE  |
+  NETIF_F_GSO_IPIP |
+  NETIF_F_GSO_SIT  |
   NETIF_F_GSO_UDP_TUNNEL   |
   NETIF_F_GSO_UDP_TUNNEL_CSUM  |
   NETIF_F_SCTP_CRC |
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c 
b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 39efba0..6e44cf1 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -2299,7 +2299,10 @@ static int i40e_tso(struct sk_buff *skb, u8 *hdr_len, 
u64 *cd_type_cmd_tso_mss)
ip.v6->payload_len = 0;
}
 
-   if (skb_shinfo(skb)->gso_type & (SKB_GSO_UDP_TUNNEL | SKB_GSO_GRE |
+   if (skb_shinfo(skb)->gso_type & (SKB_GSO_GRE |
+SKB_GSO_IPIP |
+SKB_GSO_SIT |
+SKB_GSO_UDP_TUNNEL |
 SKB_GSO_UDP_TUNNEL_CSUM)) {
if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM) {
/* determine offset of outer transport header */
@@ -2442,13 +2445,6 @@ static int i40e_tx_enable_csum(struct sk_buff *skb, u32 
*tx_flags,
 _proto, _off);
}
 
-   /* compute outer L3 header size */
-   tunnel |= ((l4.hdr - ip.hdr) / 4) <<
- I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT;
-
-   /* switch IP header pointer from outer to inner header */
-   ip.hdr = skb_inner_network_header(skb);
-
/* define outer transport */
switch (l4_proto) {
case IPPROTO_UDP:
@@ -2459,6 +2455,11 @@ static int i40e_tx_enable_csum(struct sk_buff *skb, u32 
*tx_flags,
tunnel |= I40E_TXD_CTX_GRE_TUNNELING;
*tx_flags |= I40E_TX_FLAGS_UDP_TUNNEL;
break;
+   case IPPROTO_IPIP:
+   case IPPROTO_IPV6:
+   *tx_flags |= I40E_TX_FLAGS_UDP_TUNNEL;
+   l4.hdr = skb_inner_network_header(skb);
+   break;
default:
if (*tx_flags & I40E_TX_FLAGS_TSO)
return -1;
@@ -2467,6 +2468,13 @@ static int i40e_tx_enable_csum(struct sk_buff *skb, u32 
*tx_flags,
return 0;
}
 
+   /* compute outer L3 header size */
+   tunnel |= ((l4.hdr - ip.hdr) / 4) <<
+ I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT;
+
+   /* switch IP header pointer from outer to inner header */
+   ip.hdr = skb_inner_network_header(skb);
+
/* compute tunnel header size */
tunnel |= ((ip.hdr - l4.hdr) / 2) <<
  I40E_TXD_CTX_QW0_NATLEN_SHIFT;
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c 
b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
index fc22818..f101895 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
@@ -1564,7 +1564,10 @@ static int i40e_tso(struct sk_buff *skb, u8 *hdr_len, 
u64 *cd_type_cmd_tso_mss)
ip.v6->payload_len = 0;
}
 
-   if (skb_shinfo(skb)->gso_type & (SKB_GSO_UDP_TUNNEL | SKB_GSO_GRE |
+   if (skb_shinfo(skb)->gso_type & (SKB_GSO_GRE |
+SKB_GSO_IPIP |
+SKB_GSO_SIT |
+

[net-next v2 05/14] i40e: Code cleanup in i40e_add_fdir_ethtool

2016-04-27 Thread Jeff Kirsher
From: Shannon Nelson 

A little bit of code cleanup in prep for more cloud filter work.

Change-ID: I0dc33ce0d4c207944336a07437640fef920c100c
Signed-off-by: Shannon Nelson 
Tested-by: Andrew Bowers 
Signed-off-by: Jeff Kirsher 
---
 drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 12 +++-
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c 
b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 8a83d45..8e56c43 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -2506,7 +2506,6 @@ static int i40e_add_fdir_ethtool(struct i40e_vsi *vsi,
 
if (!vsi)
return -EINVAL;
-
pf = vsi->back;
 
if (!(pf->flags & I40E_FLAG_FD_SB_ENABLED))
@@ -2564,15 +2563,18 @@ static int i40e_add_fdir_ethtool(struct i40e_vsi *vsi,
input->src_ip[0] = fsp->h_u.tcp_ip4_spec.ip4dst;
 
if (ntohl(fsp->m_ext.data[1])) {
-   if (ntohl(fsp->h_ext.data[1]) >= pf->num_alloc_vfs) {
-   netif_info(pf, drv, vsi->netdev, "Invalid VF id\n");
+   vf_id = ntohl(fsp->h_ext.data[1]);
+   if (vf_id >= pf->num_alloc_vfs) {
+   netif_info(pf, drv, vsi->netdev,
+  "Invalid VF id %d\n", vf_id);
goto free_input;
}
-   vf_id = ntohl(fsp->h_ext.data[1]);
/* Find vsi id from vf id and override dest vsi */
input->dest_vsi = pf->vf[vf_id].lan_vsi_id;
if (input->q_index >= pf->vf[vf_id].num_queue_pairs) {
-   netif_info(pf, drv, vsi->netdev, "Invalid queue id\n");
+   netif_info(pf, drv, vsi->netdev,
+  "Invalid queue id %d for VF %d\n",
+  input->q_index, vf_id);
goto free_input;
}
}
-- 
2.5.5



[net-next v2 14/14] i40evf: Add driver support for promiscuous mode

2016-04-27 Thread Jeff Kirsher
From: Anjali Singhai Jain 

Add necessary Linux Ethernet driver support for promiscuous mode
operation. Add a flag so the VF knows it is in promiscuous mode
and two state flags to discreetly track multicast and unicast
promiscuous states.

Change-Id: Ib2f2dc7a7582304fec90fc917ebb7ded21ba1de4
Signed-off-by: Anjali Singhai Jain 
Signed-off-by: Greg Rose 
Signed-off-by: Jesse Brandeburg 
Tested-by: Andrew Bowers 
Signed-off-by: Jeff Kirsher 
---
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c  | 14 +++---
 drivers/net/ethernet/intel/i40evf/i40evf.h  |  3 +++
 drivers/net/ethernet/intel/i40evf/i40evf_main.c | 19 +++
 drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c | 11 +++
 4 files changed, 40 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c 
b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index f47b0e8..c226c2d 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -1489,13 +1489,13 @@ static int i40e_vc_config_promiscuous_mode_msg(struct 
i40e_vf *vf,
NULL);
} else if (i40e_getnum_vf_vsi_vlan_filters(vsi)) {
list_for_each_entry(f, >mac_filter_list, list) {
-   if (f->vlan >= 0 && f->vlan <= I40E_MAX_VLANID)
-   aq_ret = i40e_aq_set_vsi_mc_promisc_on_vlan
-  (hw,
-  vsi->seid,
-  allmulti,
-  f->vlan,
-  NULL);
+   if (f->vlan < 0 || f->vlan > I40E_MAX_VLANID)
+   continue;
+   aq_ret = i40e_aq_set_vsi_mc_promisc_on_vlan(hw,
+   vsi->seid,
+   allmulti,
+   f->vlan,
+   NULL);
aq_err = pf->hw.aq.asq_last_status;
if (aq_ret) {
dev_err(>pdev->dev,
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h 
b/drivers/net/ethernet/intel/i40evf/i40evf.h
index 63f7aae..25afabf 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf.h
+++ b/drivers/net/ethernet/intel/i40evf/i40evf.h
@@ -220,6 +220,7 @@ struct i40evf_adapter {
 #define I40EVF_FLAG_WB_ON_ITR_CAPABLE  BIT(11)
 #define I40EVF_FLAG_OUTER_UDP_CSUM_CAPABLE BIT(12)
 #define I40EVF_FLAG_ADDR_SET_BY_PF BIT(13)
+#define I40EVF_FLAG_PROMISC_ON BIT(15)
 /* duplicates for common code */
 #define I40E_FLAG_FDIR_ATR_ENABLED  0
 #define I40E_FLAG_DCB_ENABLED   0
@@ -244,6 +245,8 @@ struct i40evf_adapter {
 #define I40EVF_FLAG_AQ_SET_HENABIT(12)
 #define I40EVF_FLAG_AQ_SET_RSS_KEY BIT(13)
 #define I40EVF_FLAG_AQ_SET_RSS_LUT BIT(14)
+#define I40EVF_FLAG_AQ_REQUEST_PROMISC BIT(15)
+#define I40EVF_FLAG_AQ_RELEASE_PROMISC BIT(16)
 
/* OS defined structs */
struct net_device *netdev;
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c 
b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
index af53159..d1c4afd 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
@@ -943,6 +943,14 @@ static void i40evf_set_rx_mode(struct net_device *netdev)
 bottom_of_search_loop:
continue;
}
+
+   if (netdev->flags & IFF_PROMISC &&
+   !(adapter->flags & I40EVF_FLAG_PROMISC_ON))
+   adapter->aq_required |= I40EVF_FLAG_AQ_REQUEST_PROMISC;
+   else if (!(netdev->flags & IFF_PROMISC) &&
+adapter->flags & I40EVF_FLAG_PROMISC_ON)
+   adapter->aq_required |= I40EVF_FLAG_AQ_RELEASE_PROMISC;
+
clear_bit(__I40EVF_IN_CRITICAL_TASK, >crit_section);
 }
 
@@ -1622,6 +1630,17 @@ static void i40evf_watchdog_task(struct work_struct 
*work)
goto watchdog_done;
}
 
+   if (adapter->aq_required & I40EVF_FLAG_AQ_REQUEST_PROMISC) {
+   i40evf_set_promiscuous(adapter, I40E_FLAG_VF_UNICAST_PROMISC |
+  I40E_FLAG_VF_MULTICAST_PROMISC);
+   goto watchdog_done;
+   }
+
+   if (adapter->aq_required & I40EVF_FLAG_AQ_RELEASE_PROMISC) {
+   

[net-next v2 09/14] i40e: Add DeviceID for X722 QSFP+

2016-04-27 Thread Jeff Kirsher
From: Kamil Krawczyk 

Change-ID: I1370fbc7774e815ac1ad56561e97488e829592fc
Signed-off-by: Kamil Krawczyk 
Tested-by: Andrew Bowers 
Signed-off-by: Jeff Kirsher 
---
 drivers/net/ethernet/intel/i40e/i40e_common.c   | 1 +
 drivers/net/ethernet/intel/i40e/i40e_devids.h   | 1 +
 drivers/net/ethernet/intel/i40evf/i40e_common.c | 1 +
 drivers/net/ethernet/intel/i40evf/i40e_devids.h | 1 +
 4 files changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c 
b/drivers/net/ethernet/intel/i40e/i40e_common.c
index 34e86f5..1db4790 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_common.c
@@ -61,6 +61,7 @@ static i40e_status i40e_set_mac_type(struct i40e_hw *hw)
case I40E_DEV_ID_1G_BASE_T_X722:
case I40E_DEV_ID_10G_BASE_T_X722:
case I40E_DEV_ID_SFP_I_X722:
+   case I40E_DEV_ID_QSFP_I_X722:
hw->mac.type = I40E_MAC_X722;
break;
default:
diff --git a/drivers/net/ethernet/intel/i40e/i40e_devids.h 
b/drivers/net/ethernet/intel/i40e/i40e_devids.h
index dd4457d..d701861 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_devids.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_devids.h
@@ -45,6 +45,7 @@
 #define I40E_DEV_ID_1G_BASE_T_X722 0x37D1
 #define I40E_DEV_ID_10G_BASE_T_X7220x37D2
 #define I40E_DEV_ID_SFP_I_X722 0x37D3
+#define I40E_DEV_ID_QSFP_I_X7220x37D4
 
 #define i40e_is_40G_device(d)  ((d) == I40E_DEV_ID_QSFP_A  || \
 (d) == I40E_DEV_ID_QSFP_B  || \
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_common.c 
b/drivers/net/ethernet/intel/i40evf/i40e_common.c
index 4db0c03..8f64204 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_common.c
@@ -59,6 +59,7 @@ i40e_status i40e_set_mac_type(struct i40e_hw *hw)
case I40E_DEV_ID_1G_BASE_T_X722:
case I40E_DEV_ID_10G_BASE_T_X722:
case I40E_DEV_ID_SFP_I_X722:
+   case I40E_DEV_ID_QSFP_I_X722:
hw->mac.type = I40E_MAC_X722;
break;
case I40E_DEV_ID_X722_VF:
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_devids.h 
b/drivers/net/ethernet/intel/i40evf/i40e_devids.h
index 7023570..d34972b 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_devids.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_devids.h
@@ -45,6 +45,7 @@
 #define I40E_DEV_ID_1G_BASE_T_X722 0x37D1
 #define I40E_DEV_ID_10G_BASE_T_X7220x37D2
 #define I40E_DEV_ID_SFP_I_X722 0x37D3
+#define I40E_DEV_ID_QSFP_I_X7220x37D4
 #define I40E_DEV_ID_X722_VF0x37CD
 #define I40E_DEV_ID_X722_VF_HV 0x37D9
 
-- 
2.5.5



[net-next v2 00/14][pull request] 40GbE Intel Wired LAN Driver Updates 2016-04-27

2016-04-27 Thread Jeff Kirsher
This series contains updates to i40e and i40evf.

Alex Duyck cleans up the feature flags since they are becoming pretty
"massive", the primary change being that we now build our features list
around hw_encap_features.  Added support for IPIP and SIT offloads,
which should improvement in throughput for IPIP and SIT tunnels with
the offload enabled.

Mitch adds support for configuring RSS on behalf of the VFs, which removes
the burden of dealing with different hardware interfaces from the VF
drivers and improves future compatibility.  Fix to ensure that we do not
panic by checking that the vsi_res pointer is valid before dereferencing
it, after which we can drink beer and eat peanuts.

Shannon does come housekeeping in i40e_add_fdir_ethtool() in preparation
for more cloud filter work.  Added flexibility to the nvmupdate
facility by adding the ability to specify an AQ event opcode to wait on
after Exec_AQ request.

Michal adds device capability which defines if an update is available and
if a security check is needed during the update process.

Kamil just adds a device id to support X722 QSFP+ device.

Greg fixes an issue where a mirror rule ID may be zero, so do not return
invalid parameter when the user passes in a zero for a rule ID.  Adds
support to steer packets to VSIs by VLAN tag alone while being in
promiscuous mode for multicast and unicast MAC addresses.

Jesse fixes the driver from offloading the VLAN tag into the skb any
time there was a VLAN tag and the hardware stripping was enabled, to
making sure it is enabled before put_tag.

v2: Dropped patch 8 ("i40e: Allow user to change input set mask for flow
director") while Kiran reworks a more generalized solution based
on feedback from David Miller.

The following are changes since commit fab7b629a82da1b59620470d13152aff975239f6:
  Merge branch 'ila-csum-neutral'
and are available in the git repository at:
  git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/next-queue 40GbE

Alexander Duyck (2):
  i40e/i40evf: Clean up feature flags
  i40e/i40evf: Add support for IPIP and SIT offloads

Anjali Singhai Jain (2):
  i40e: Add VF promiscuous mode driver support
  i40evf: Add driver support for promiscuous mode

Greg Rose (2):
  i40e: Remove zero check
  i40e: Add promiscuous on VLAN support

Jesse Brandeburg (1):
  i40e/i40evf: Only offload VLAN tag if enabled

Kamil Krawczyk (1):
  i40e: Add DeviceID for X722 QSFP+

Michal Kosiarz (1):
  i40e: Add device capability which defines if update is available

Mitch Williams (3):
  i40e: Add support for configuring VF RSS
  i40evf: Don't Panic
  i40evf: Allow PF driver to configure RSS

Shannon Nelson (2):
  i40e: Code cleanup in i40e_add_fdir_ethtool
  i40e: Specify AQ event opcode to wait for

 drivers/net/ethernet/intel/i40e/i40e.h |   1 +
 drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h  |   1 +
 drivers/net/ethernet/intel/i40e/i40e_common.c  |  82 -
 drivers/net/ethernet/intel/i40e/i40e_devids.h  |   1 +
 drivers/net/ethernet/intel/i40e/i40e_ethtool.c |  12 +-
 drivers/net/ethernet/intel/i40e/i40e_main.c|  98 ++---
 drivers/net/ethernet/intel/i40e/i40e_nvm.c |  49 ++-
 drivers/net/ethernet/intel/i40e/i40e_prototype.h   |   8 +
 drivers/net/ethernet/intel/i40e/i40e_txrx.c|  27 +-
 drivers/net/ethernet/intel/i40e/i40e_type.h|   6 +
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 346 +-
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h |   2 +
 .../net/ethernet/intel/i40evf/i40e_adminq_cmd.h|   1 +
 drivers/net/ethernet/intel/i40evf/i40e_common.c|   1 +
 drivers/net/ethernet/intel/i40evf/i40e_devids.h|   1 +
 drivers/net/ethernet/intel/i40evf/i40e_txrx.c  |  27 +-
 drivers/net/ethernet/intel/i40evf/i40e_type.h  |   6 +
 drivers/net/ethernet/intel/i40evf/i40evf.h |  39 +-
 drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c | 121 +++
 drivers/net/ethernet/intel/i40evf/i40evf_main.c| 395 -
 .../net/ethernet/intel/i40evf/i40evf_virtchnl.c| 130 +++
 21 files changed, 947 insertions(+), 407 deletions(-)

-- 
2.5.5



  1   2   3   >