[PATCH net-next 6/6] net: do not block BH while processing socket backlog
Socket backlog processing is a major latency source. With current TCP socket sk_rcvbuf limits, I have sampled __release_sock() holding cpu for more than 5 ms, and packets being dropped by the NIC once ring buffer is filled. All users are now ready to be called from process context, we can unblock BH and let interrupts be serviced faster. cond_resched_softirq() could be removed, as it has no more user. Signed-off-by: Eric Dumazet--- net/core/sock.c | 22 -- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/net/core/sock.c b/net/core/sock.c index e16a5db853c6..70744dbb6c3f 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2019,33 +2019,27 @@ static void __release_sock(struct sock *sk) __releases(>sk_lock.slock) __acquires(>sk_lock.slock) { - struct sk_buff *skb = sk->sk_backlog.head; + struct sk_buff *skb, *next; - do { + while ((skb = sk->sk_backlog.head) != NULL) { sk->sk_backlog.head = sk->sk_backlog.tail = NULL; - bh_unlock_sock(sk); - do { - struct sk_buff *next = skb->next; + spin_unlock_bh(>sk_lock.slock); + do { + next = skb->next; prefetch(next); WARN_ON_ONCE(skb_dst_is_noref(skb)); skb->next = NULL; sk_backlog_rcv(sk, skb); - /* -* We are in process context here with softirqs -* disabled, use cond_resched_softirq() to preempt. -* This is safe to do because we've taken the backlog -* queue private: -*/ - cond_resched_softirq(); + cond_resched(); skb = next; } while (skb != NULL); - bh_lock_sock(sk); - } while ((skb = sk->sk_backlog.head) != NULL); + spin_lock_bh(>sk_lock.slock); + } /* * Doing the zeroing here guarantee we can not loop forever -- 2.8.0.rc3.226.g39d4020
[PATCH net-next 2/6] tcp: do not block bh during prequeue processing
AFAIK, nothing in current TCP stack absolutely wants BH being disabled once socket is owned by a thread running in process context. As mentioned in my prior patch ("tcp: give prequeue mode some care"), processing a batch of packets might take time, better not block BH at all. Signed-off-by: Eric Dumazet--- net/ipv4/tcp.c | 4 net/ipv4/tcp_input.c | 30 ++ 2 files changed, 2 insertions(+), 32 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 7f51389814e6..f8856b76f941 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1445,12 +1445,8 @@ static void tcp_prequeue_process(struct sock *sk) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPREQUEUED); - /* RX process wants to run with disabled BHs, though it is not -* necessary */ - local_bh_disable(); while ((skb = __skb_dequeue(>ucopy.prequeue)) != NULL) sk_backlog_rcv(sk, skb); - local_bh_enable(); /* Clear memory counter. */ tp->ucopy.memory = 0; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 0eb31df8edfa..44e0f9f15f32 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4608,14 +4608,12 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) __set_current_state(TASK_RUNNING); - local_bh_enable(); if (!skb_copy_datagram_msg(skb, 0, tp->ucopy.msg, chunk)) { tp->ucopy.len -= chunk; tp->copied_seq += chunk; eaten = (chunk == skb->len); tcp_rcv_space_adjust(sk); } - local_bh_disable(); } if (eaten <= 0) { @@ -5131,7 +5129,6 @@ static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen) int chunk = skb->len - hlen; int err; - local_bh_enable(); if (skb_csum_unnecessary(skb)) err = skb_copy_datagram_msg(skb, hlen, tp->ucopy.msg, chunk); else @@ -5143,32 +5140,9 @@ static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen) tcp_rcv_space_adjust(sk); } - local_bh_disable(); return err; } -static __sum16 __tcp_checksum_complete_user(struct sock *sk, - struct sk_buff *skb) -{ - __sum16 result; - - if (sock_owned_by_user(sk)) { - local_bh_enable(); - result = __tcp_checksum_complete(skb); - local_bh_disable(); - } else { - result = __tcp_checksum_complete(skb); - } - return result; -} - -static inline bool tcp_checksum_complete_user(struct sock *sk, -struct sk_buff *skb) -{ - return !skb_csum_unnecessary(skb) && - __tcp_checksum_complete_user(sk, skb); -} - /* Does PAWS and seqno based validation of an incoming segment, flags will * play significant role here. */ @@ -5383,7 +5357,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, } } if (!eaten) { - if (tcp_checksum_complete_user(sk, skb)) + if (tcp_checksum_complete(skb)) goto csum_error; if ((int)skb->truesize > sk->sk_forward_alloc) @@ -5427,7 +5401,7 @@ no_ack: } slow_path: - if (len < (th->doff << 2) || tcp_checksum_complete_user(sk, skb)) + if (len < (th->doff << 2) || tcp_checksum_complete(skb)) goto csum_error; if (!th->ack && !th->rst && !th->syn) -- 2.8.0.rc3.226.g39d4020
[PATCH net-next 4/6] udp: prepare for non BH masking at backlog processing
UDP uses the generic socket backlog code, and this will soon be changed to not disable BH when protocol is called back. We need to use appropriate SNMP accessors. Signed-off-by: Eric Dumazet--- net/ipv4/udp.c | 4 ++-- net/ipv6/udp.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 093284c5c03b..f67f52ba4809 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1514,9 +1514,9 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) /* Note that an ENOMEM error is charged twice */ if (rc == -ENOMEM) - __UDP_INC_STATS(sock_net(sk), UDP_MIB_RCVBUFERRORS, + UDP_INC_STATS(sock_net(sk), UDP_MIB_RCVBUFERRORS, is_udplite); - __UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); + UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); kfree_skb(skb); trace_udp_fail_queue_rcv_skb(rc, sk); return -1; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 1ba5a74ac18f..f911c63f79e6 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -570,9 +570,9 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) /* Note that an ENOMEM error is charged twice */ if (rc == -ENOMEM) - __UDP6_INC_STATS(sock_net(sk), + UDP6_INC_STATS(sock_net(sk), UDP_MIB_RCVBUFERRORS, is_udplite); - __UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); + UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); kfree_skb(skb); return -1; } -- 2.8.0.rc3.226.g39d4020
[PATCH net-next 3/6] dccp: do not assume DCCP code is non preemptible
DCCP uses the generic backlog code, and this will soon be changed to not disable BH when protocol is called back. Signed-off-by: Eric Dumazet--- net/dccp/input.c | 2 +- net/dccp/ipv4.c| 4 ++-- net/dccp/ipv6.c| 4 ++-- net/dccp/options.c | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/net/dccp/input.c b/net/dccp/input.c index 2437ecc13b82..ba347184bda9 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -359,7 +359,7 @@ send_sync: goto discard; } - __DCCP_INC_STATS(DCCP_MIB_INERRS); + DCCP_INC_STATS(DCCP_MIB_INERRS); discard: __kfree_skb(skb); return 0; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index a8164272e0f4..5c7e413a3ae4 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -533,8 +533,8 @@ static void dccp_v4_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb) bh_unlock_sock(ctl_sk); if (net_xmit_eval(err) == 0) { - __DCCP_INC_STATS(DCCP_MIB_OUTSEGS); - __DCCP_INC_STATS(DCCP_MIB_OUTRSTS); + DCCP_INC_STATS(DCCP_MIB_OUTSEGS); + DCCP_INC_STATS(DCCP_MIB_OUTRSTS); } out: dst_release(dst); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 0f4eb4ea57a5..d176f4e66369 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -277,8 +277,8 @@ static void dccp_v6_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb) if (!IS_ERR(dst)) { skb_dst_set(skb, dst); ip6_xmit(ctl_sk, skb, , NULL, 0); - __DCCP_INC_STATS(DCCP_MIB_OUTSEGS); - __DCCP_INC_STATS(DCCP_MIB_OUTRSTS); + DCCP_INC_STATS(DCCP_MIB_OUTSEGS); + DCCP_INC_STATS(DCCP_MIB_OUTRSTS); return; } diff --git a/net/dccp/options.c b/net/dccp/options.c index b82b7ee9a1d2..74d29c56c367 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -253,7 +253,7 @@ out_nonsensical_length: return 0; out_invalid_option: - __DCCP_INC_STATS(DCCP_MIB_INVALIDOPT); + DCCP_INC_STATS(DCCP_MIB_INVALIDOPT); rc = DCCP_RESET_CODE_OPTION_ERROR; out_featneg_failed: DCCP_WARN("DCCP(%p): Option %d (len=%d) error=%u\n", sk, opt, len, rc); -- 2.8.0.rc3.226.g39d4020
[PATCH net-next 1/6] tcp: do not assume TCP code is non preemptible
We want to to make TCP stack preemptible, as draining prequeue and backlog queues can take lot of time. Many SNMP updates were assuming that BH (and preemption) was disabled. Need to convert some __NET_INC_STATS() calls to NET_INC_STATS() and some __TCP_INC_STATS() to TCP_INC_STATS() Before using this_cpu_ptr(net->ipv4.tcp_sk) in tcp_v4_send_reset() and tcp_v4_send_ack(), we add an explicit preempt disabled section. Signed-off-by: Eric Dumazet--- net/ipv4/tcp.c | 2 +- net/ipv4/tcp_cdg.c | 20 +- net/ipv4/tcp_cubic.c | 20 +- net/ipv4/tcp_fastopen.c | 12 +++--- net/ipv4/tcp_input.c | 96 net/ipv4/tcp_ipv4.c | 14 --- net/ipv4/tcp_minisocks.c | 2 +- net/ipv4/tcp_output.c| 7 ++-- net/ipv4/tcp_recovery.c | 4 +- net/ipv4/tcp_timer.c | 10 +++-- net/ipv6/tcp_ipv6.c | 12 +++--- 11 files changed, 102 insertions(+), 97 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 040f35e7efe0..7f51389814e6 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3091,7 +3091,7 @@ void tcp_done(struct sock *sk) struct request_sock *req = tcp_sk(sk)->fastopen_rsk; if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV) - __TCP_INC_STATS(sock_net(sk), TCP_MIB_ATTEMPTFAILS); + TCP_INC_STATS(sock_net(sk), TCP_MIB_ATTEMPTFAILS); tcp_set_state(sk, TCP_CLOSE); tcp_clear_xmit_timers(sk); diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c index 3c00208c37f4..4e3007845888 100644 --- a/net/ipv4/tcp_cdg.c +++ b/net/ipv4/tcp_cdg.c @@ -155,11 +155,11 @@ static void tcp_cdg_hystart_update(struct sock *sk) ca->last_ack = now_us; if (after(now_us, ca->round_start + base_owd)) { - __NET_INC_STATS(sock_net(sk), - LINUX_MIB_TCPHYSTARTTRAINDETECT); - __NET_ADD_STATS(sock_net(sk), - LINUX_MIB_TCPHYSTARTTRAINCWND, - tp->snd_cwnd); + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPHYSTARTTRAINDETECT); + NET_ADD_STATS(sock_net(sk), + LINUX_MIB_TCPHYSTARTTRAINCWND, + pp>>sn__cwdd); tp->snd_ssthresh = tp->snd_cwnd; return; } @@ -174,11 +174,11 @@ static void tcp_cdg_hystart_update(struct sock *sk) 125U); if (ca->rtt.min > thresh) { - __NET_INC_STATS(sock_net(sk), - LINUX_MIB_TCPHYSTARTDELAYDETECT); - __NET_ADD_STATS(sock_net(sk), - LINUX_MIB_TCPHYSTARTDELAYCWND, - tp->snd_cwnd); + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPHYSTARTDELAYDETECT); + NET_ADD_STATS(sock_net(sk), + LINUX_MIB_TCPHYSTARTDELAYCWND, + tp->snd_cwnd); tp->snd_ssthresh = tp->snd_cwnd; } } diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 59155af9de5d..0ce946e395e1 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -402,11 +402,11 @@ static void hystart_update(struct sock *sk, u32 delay) ca->last_ack = now; if ((s32)(now - ca->round_start) > ca->delay_min >> 4) { ca->found |= HYSTART_ACK_TRAIN; - __NET_INC_STATS(sock_net(sk), - LINUX_MIB_TCPHYSTARTTRAINDETECT); - __NET_ADD_STATS(sock_net(sk), - LINUX_MIB_TCPHYSTARTTRAINCWND, - tp->snd_cwnd); + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPHYSTARTTRAINDETECT); + NET_ADD_STATS(sock_net(sk), + LINUX_MIB_TCPHYSTARTTRAINCWND, + tp->snd_cwnd); tp->snd_ssthresh = tp->snd_cwnd; } } @@ -423,11 +423,11 @@ static void hystart_update(struct sock *sk, u32 delay) if (ca->curr_rtt > ca->delay_min +
[PATCH net-next 5/6] sctp: prepare for socket backlog behavior change
sctp_inq_push() will soon be called without BH being blocked when generic socket code flushes the socket backlog. It is very possible SCTP can be converted to not rely on BH, but this needs to be done by SCTP experts. Signed-off-by: Eric Dumazet--- net/sctp/inqueue.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index b335ffcef0b9..9d87bba0ff1d 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -89,10 +89,12 @@ void sctp_inq_push(struct sctp_inq *q, struct sctp_chunk *chunk) * Eventually, we should clean up inqueue to not rely * on the BH related data structures. */ + local_bh_disable(); list_add_tail(>list, >in_chunk_list); if (chunk->asoc) chunk->asoc->stats.ipackets++; q->immediate.func(>immediate); + local_bh_enable(); } /* Peek at the next chunk on the inqeue. */ -- 2.8.0.rc3.226.g39d4020
[PATCH net-next 0/6] net: make TCP preemptible
Most of TCP stack assumed it was running from BH handler. This is great for most things, as TCP behavior is very sensitive to scheduling artifacts. However, the prequeue and backlog processing are problematic, as they need to be flushed with BH being blocked. To cope with modern needs, TCP sockets have big sk_rcvbuf values, in the order of 16 MB. This means that backlog can hold thousands of packets, and things like TCP coalescing or collapsing on this amount of packets can lead to insane latency spikes, since BH are blocked for too long. It is time to make UDP/TCP stacks preemptible. Note that fast path still runs from BH handler. Eric Dumazet (6): tcp: do not assume TCP code is non preemptible tcp: do not block bh during prequeue processing dccp: do not assume DCCP code is non preemptible udp: prepare for non BH masking at backlog processing sctp: prepare for socket backlog behavior change net: do not block BH while processing socket backlog net/core/sock.c | 22 +++-- net/dccp/input.c | 2 +- net/dccp/ipv4.c | 4 +- net/dccp/ipv6.c | 4 +- net/dccp/options.c | 2 +- net/ipv4/tcp.c | 6 +-- net/ipv4/tcp_cdg.c | 20 net/ipv4/tcp_cubic.c | 20 net/ipv4/tcp_fastopen.c | 12 ++--- net/ipv4/tcp_input.c | 126 +++ net/ipv4/tcp_ipv4.c | 14 -- net/ipv4/tcp_minisocks.c | 2 +- net/ipv4/tcp_output.c| 7 ++- net/ipv4/tcp_recovery.c | 4 +- net/ipv4/tcp_timer.c | 10 ++-- net/ipv4/udp.c | 4 +- net/ipv6/tcp_ipv6.c | 12 ++--- net/ipv6/udp.c | 4 +- net/sctp/inqueue.c | 2 + 19 files changed, 124 insertions(+), 153 deletions(-) -- 2.8.0.rc3.226.g39d4020
Re: [PATCH net-next 2/7] net: rtnetlink: allow only one idx saving stats attribute
On 4/27/16, 9:18 AM, Nikolay Aleksandrov wrote: > We can't allow more than one stats attribute which uses the local idx > since the result will be a mess. This is a simple check to make sure > only one is being used at a time. Later when the filter_mask's 32 bits > are over we can switch to a bitmap. > > Signed-off-by: Nikolay Aleksandrov> --- > include/net/rtnetlink.h | 6 ++ > net/core/rtnetlink.c| 17 +++-- > 2 files changed, 21 insertions(+), 2 deletions(-) > > diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h > index 2f87c1ba13de..3f3b0b1b8722 100644 > --- a/include/net/rtnetlink.h > +++ b/include/net/rtnetlink.h > @@ -150,4 +150,10 @@ int rtnl_nla_parse_ifla(struct nlattr **tb, const struct > nlattr *head, int len); > > #define MODULE_ALIAS_RTNL_LINK(kind) MODULE_ALIAS("rtnl-link-" kind) > > +/* at most one attribute which can save a local idx is allowed to be set > + * IFLA_STATS_IDX_ATTR_MASK has all the idx saving attributes set and is > + * used to check if more than one is being requested > + */ > +#define IFLA_STATS_IDX_ATTR_MASK 0 > + > #endif > diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c > index aeb2fa9b1cda..ea03b6cd3d3c 100644 > --- a/net/core/rtnetlink.c > +++ b/net/core/rtnetlink.c > @@ -3512,7 +3512,7 @@ static int rtnl_stats_get(struct sk_buff *skb, struct > nlmsghdr *nlh) > struct if_stats_msg *ifsm; > struct net_device *dev = NULL; > struct sk_buff *nskb; > - u32 filter_mask; > + u32 filter_mask, lidx_filter; > int lidx = 0; > int err; > > @@ -3529,6 +3529,14 @@ static int rtnl_stats_get(struct sk_buff *skb, struct > nlmsghdr *nlh) > if (!filter_mask) > return -EINVAL; > > + /* only one attribute which can save a local idx is allowed at a time > + * even though rtnl_stats_get doesn't save the lidx, we need to be > + * consistent with the dump side and error out > + */ > + lidx_filter = filter_mask & IFLA_STATS_IDX_ATTR_MASK; > + if (lidx_filter && !is_power_of_2(lidx_filter)) > + return -EINVAL; > + > nskb = nlmsg_new(if_nlmsg_stats_size(dev, filter_mask), GFP_KERNEL); > if (!nskb) > return -ENOBUFS; > @@ -3556,7 +3564,7 @@ static int rtnl_stats_dump(struct sk_buff *skb, struct > netlink_callback *cb) > struct net_device *dev; > struct hlist_head *head; > unsigned int flags = NLM_F_MULTI; > - u32 filter_mask = 0; > + u32 filter_mask = 0, lidx_filter; > int err; > > s_h = cb->args[0]; > @@ -3570,6 +3578,11 @@ static int rtnl_stats_dump(struct sk_buff *skb, struct > netlink_callback *cb) > if (!filter_mask) > return -EINVAL; > > + /* only one attribute which can save a local idx is allowed at a time */ > + lidx_filter = filter_mask & IFLA_STATS_IDX_ATTR_MASK; > + if (lidx_filter && !is_power_of_2(lidx_filter)) > + return -EINVAL; > + > instead of introducing the restriction at this level, is it possible to use two args for this like below and avoid the restriction ? cb->args[2] = current filter being processed cb->args[3] = private filter idx (your lidx)
Re: iproute2: bash completion function for tc
On Wed, 27 Apr 2016 20:19:26 -0700 Alexei Starovoitovwrote: > On Tue, Apr 26, 2016 at 09:28:17AM +0200, Quentin Monnet wrote: > > Hi Jamal, Stephen, > > > > I searched for a function providing auto-completion for `tc` utility in > > bash, but I found none. So I have created one, and I would like share it > > with the community. It is available here: > > https://github.com/6WIND/tc_bash-completion/blob/master/tc > > I would like to make it easily available to tc users, so here is a > > twofold request: > > > > * I do not know where to submit the code. Should I submit here on netdev > > for inclusion in iproute2 package, or rather to the bash-completion > > repository on GitHub? I feel like it would receive better feedback and > > updates if pushed to iproute2. Could you please provide some advice here? > > * The completion for `tc` seems to work well; I have tested it with many > > commands, but I am no tc expert, and there are probably some cases where > > the completion fails to propose the correct choices. I would be really > > interested in any feedback/bug reports that you, or anyone on this list > > who uses tc, could provide. > > that looks very interesting. > I think making it a part of iproute2 is a good thing. > How about installing it into /etc/iproute2/ ? > Stephen, any comments? > I am ok with keeping it in the repository. But it would need to be installed in the standard bash directory, is that distro dependent?
Re: [PATCH v2 net-next 2/2] tcp: remove SKBTX_ACK_TSTAMP since it is redundant
On Wed, 2016-04-27 at 23:39 -0400, Soheil Hassas Yeganeh wrote: > From: Soheil Hassas Yeganeh> > The SKBTX_ACK_TSTAMP flag is set in skb_shinfo->tx_flags when > the timestamp of the TCP acknowledgement should be reported on > error queue. Since accessing skb_shinfo is likely to incur a > cache-line miss at the time of receiving the ack, the > txstamp_ack bit was added in tcp_skb_cb, which is set iff > the SKBTX_ACK_TSTAMP flag is set for an skb. This makes > SKBTX_ACK_TSTAMP flag redundant. > > Remove the SKBTX_ACK_TSTAMP and instead use the txstamp_ack bit > everywhere. > > Note that this frees one bit in shinfo->tx_flags. > > Signed-off-by: Soheil Hassas Yeganeh > Acked-by: Martin KaFai Lau > Suggested-by: Willem de Bruijn > --- Acked-by: Eric Dumazet
Re: [PATCH v2 net-next 1/2] tcp: remove an unnecessary check in tcp_tx_timestamp
On Wed, 2016-04-27 at 23:39 -0400, Soheil Hassas Yeganeh wrote: > From: Soheil Hassas Yeganeh> > Remove the redundant check for sk->sk_tsflags in tcp_tx_timestamp. > > tcp_tx_timestamp() receives the tsflags as a parameter. As a > result the "sk->sk_tsflags || tsflags" is redundant, since > tsflags already includes sk->sk_tsflags plus overrides from > control messages. > > Signed-off-by: Soheil Hassas Yeganeh > --- Acked-by: Eric Dumazet
Re: [RFC PATCH 4/5] bnxt: Add support for segmentation of tunnels with outer checksums
On Wed, Apr 27, 2016 at 8:21 AM, Alexander Duyckwrote: > On Tue, Apr 26, 2016 at 10:55 PM, Michael Chan > wrote: >> On Tue, Apr 19, 2016 at 12:06 PM, Alexander Duyck >> wrote: >>> This patch assumes that the bnxt hardware will ignore existing IPv4/v6 >>> header fields for length and checksum as well as the length and checksum >>> fields for outer UDP and GRE headers. >>> >>> I have no means of testing this as I do not have any bnx2x hardware but >>> thought I would submit it as an RFC to see if anyone out there wants to >>> test this and see if this does in fact enable this functionality allowing >>> us to to segment tunneled frames that have an outer checksum. >>> >>> Signed-off-by: Alexander Duyck >> >> Hi Alex, I just did a very quick test of this patch on our bnxt >> hardware and it seemed to work. >> >> I created a vxlan endpoint with udpcsum enabled and I saw TSO packets >> getting through. I've verified that our hardware can be programmed to >> either ignore outer UDP checksum or to calculate it. Current default >> is to ignore ipv4 UDP checksum and calculate ipv6 UDP checksum. >> Thanks. > > Are you saying you can natively support UDP tunnel with outer checksum > offload then? Yes. Calculate or ignore the outer UDP checksum. > > I'm just trying to sort out if you actually need to have the partial > segmentation offload support or if we can handle it in hardware. Also > is there any documentation you could point me to that might help to > clarify what the hardware does/doesn't support so that I could improve > upon this patch in order to make sure we are getting the most bang for > the buck in terms of the features that can be offloaded by hardware? No public documentation yet. I think the plan is to publish the programmer's reference on our website at some point in the future.
[PATCH v2 net-next 2/2] tcp: remove SKBTX_ACK_TSTAMP since it is redundant
From: Soheil Hassas YeganehThe SKBTX_ACK_TSTAMP flag is set in skb_shinfo->tx_flags when the timestamp of the TCP acknowledgement should be reported on error queue. Since accessing skb_shinfo is likely to incur a cache-line miss at the time of receiving the ack, the txstamp_ack bit was added in tcp_skb_cb, which is set iff the SKBTX_ACK_TSTAMP flag is set for an skb. This makes SKBTX_ACK_TSTAMP flag redundant. Remove the SKBTX_ACK_TSTAMP and instead use the txstamp_ack bit everywhere. Note that this frees one bit in shinfo->tx_flags. Signed-off-by: Soheil Hassas Yeganeh Acked-by: Martin KaFai Lau Suggested-by: Willem de Bruijn --- include/linux/skbuff.h | 6 +- net/ipv4/tcp.c | 5 +++-- net/ipv4/tcp_input.c | 3 +-- net/ipv4/tcp_output.c | 17 +++-- net/socket.c | 3 --- 5 files changed, 16 insertions(+), 18 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index da0ace3..ae30555 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -382,14 +382,10 @@ enum { /* generate software time stamp when entering packet scheduling */ SKBTX_SCHED_TSTAMP = 1 << 6, - - /* generate software timestamp on peer data acknowledgment */ - SKBTX_ACK_TSTAMP = 1 << 7, }; #define SKBTX_ANY_SW_TSTAMP(SKBTX_SW_TSTAMP| \ -SKBTX_SCHED_TSTAMP | \ -SKBTX_ACK_TSTAMP) +SKBTX_SCHED_TSTAMP) #define SKBTX_ANY_TSTAMP (SKBTX_HW_TSTAMP | SKBTX_ANY_SW_TSTAMP) /* diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 3c542dc..8e05eb6 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -435,9 +435,10 @@ static void tcp_tx_timestamp(struct sock *sk, u16 tsflags, struct sk_buff *skb) struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); sock_tx_timestamp(sk, tsflags, >tx_flags); - if (shinfo->tx_flags & SKBTX_ANY_TSTAMP) + if (tsflags & SOF_TIMESTAMPING_TX_ACK) + tcb->txstamp_ack = 1; + if (tsflags & SOF_TIMESTAMPING_TX_RECORD_MASK) shinfo->tskey = TCP_SKB_CB(skb)->seq + skb->len - 1; - tcb->txstamp_ack = !!(shinfo->tx_flags & SKBTX_ACK_TSTAMP); } } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 967520d..2f3fd92 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3087,8 +3087,7 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb, return; shinfo = skb_shinfo(skb); - if ((shinfo->tx_flags & SKBTX_ACK_TSTAMP) && - !before(shinfo->tskey, prior_snd_una) && + if (!before(shinfo->tskey, prior_snd_una) && before(shinfo->tskey, tcp_sk(sk)->snd_una)) __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK); } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 9d3b4b3..ace183c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -,11 +,17 @@ static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int de tcp_verify_left_out(tp); } +static bool tcp_has_tx_tstamp(const struct sk_buff *skb) +{ + return TCP_SKB_CB(skb)->txstamp_ack || + (skb_shinfo(skb)->tx_flags & SKBTX_ANY_TSTAMP); +} + static void tcp_fragment_tstamp(struct sk_buff *skb, struct sk_buff *skb2) { struct skb_shared_info *shinfo = skb_shinfo(skb); - if (unlikely(shinfo->tx_flags & SKBTX_ANY_TSTAMP) && + if (unlikely(tcp_has_tx_tstamp(skb)) && !before(shinfo->tskey, TCP_SKB_CB(skb2)->seq)) { struct skb_shared_info *shinfo2 = skb_shinfo(skb2); u8 tsflags = shinfo->tx_flags & SKBTX_ANY_TSTAMP; @@ -2446,13 +2452,12 @@ u32 __tcp_select_window(struct sock *sk) void tcp_skb_collapse_tstamp(struct sk_buff *skb, const struct sk_buff *next_skb) { - const struct skb_shared_info *next_shinfo = skb_shinfo(next_skb); - u8 tsflags = next_shinfo->tx_flags & SKBTX_ANY_TSTAMP; - - if (unlikely(tsflags)) { + if (unlikely(tcp_has_tx_tstamp(next_skb))) { + const struct skb_shared_info *next_shinfo = + skb_shinfo(next_skb); struct skb_shared_info *shinfo = skb_shinfo(skb); - shinfo->tx_flags |= tsflags; + shinfo->tx_flags |= next_shinfo->tx_flags & SKBTX_ANY_TSTAMP; shinfo->tskey = next_shinfo->tskey; TCP_SKB_CB(skb)->txstamp_ack |= TCP_SKB_CB(next_skb)->txstamp_ack; diff --git a/net/socket.c b/net/socket.c index 5dbb0bb..7789d79 100644 --- a/net/socket.c +++ b/net/socket.c @@ -600,9 +600,6 @@ void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags) if (tsflags & SOF_TIMESTAMPING_TX_SCHED) flags |=
[PATCH v2 net-next 1/2] tcp: remove an unnecessary check in tcp_tx_timestamp
From: Soheil Hassas YeganehRemove the redundant check for sk->sk_tsflags in tcp_tx_timestamp. tcp_tx_timestamp() receives the tsflags as a parameter. As a result the "sk->sk_tsflags || tsflags" is redundant, since tsflags already includes sk->sk_tsflags plus overrides from control messages. Signed-off-by: Soheil Hassas Yeganeh --- net/ipv4/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 4d73858..3c542dc 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -430,7 +430,7 @@ EXPORT_SYMBOL(tcp_init_sock); static void tcp_tx_timestamp(struct sock *sk, u16 tsflags, struct sk_buff *skb) { - if (sk->sk_tsflags || tsflags) { + if (tsflags) { struct skb_shared_info *shinfo = skb_shinfo(skb); struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); -- 2.8.0.rc3.226.g39d4020
[PATCH v2 net-next 0/2] tcp: simplify ack tx timestamps
From: Soheil Hassas Yeganehv2: - Fully remove SKBTX_ACK_TSTAMP, as suggested by Willem de Bruijn. This patch series aims at removing redundant checks and fields for ack timestamps for TCP. Soheil Hassas Yeganeh (2): tcp: remove an unnecessary check in tcp_tx_timestamp tcp: remove SKBTX_ACK_TSTAMP since it is redundant include/linux/skbuff.h | 6 +- net/ipv4/tcp.c | 7 --- net/ipv4/tcp_input.c | 3 +-- net/ipv4/tcp_output.c | 17 +++-- net/socket.c | 3 --- 5 files changed, 17 insertions(+), 19 deletions(-) -- 2.8.0.rc3.226.g39d4020
Re: iproute2: bash completion function for tc
On Tue, Apr 26, 2016 at 09:28:17AM +0200, Quentin Monnet wrote: > Hi Jamal, Stephen, > > I searched for a function providing auto-completion for `tc` utility in > bash, but I found none. So I have created one, and I would like share it > with the community. It is available here: > https://github.com/6WIND/tc_bash-completion/blob/master/tc > I would like to make it easily available to tc users, so here is a > twofold request: > > * I do not know where to submit the code. Should I submit here on netdev > for inclusion in iproute2 package, or rather to the bash-completion > repository on GitHub? I feel like it would receive better feedback and > updates if pushed to iproute2. Could you please provide some advice here? > * The completion for `tc` seems to work well; I have tested it with many > commands, but I am no tc expert, and there are probably some cases where > the completion fails to propose the correct choices. I would be really > interested in any feedback/bug reports that you, or anyone on this list > who uses tc, could provide. that looks very interesting. I think making it a part of iproute2 is a good thing. How about installing it into /etc/iproute2/ ? Stephen, any comments?
Re: [PATCH net-next 0/2] net: avoid some atomic ops when FASYNC is not used
From: Eric DumazetDate: Mon, 25 Apr 2016 10:39:31 -0700 > We can avoid some atomic operations on sockets not using FASYNC I guess a user can do weird things and set/clear the FASYNC bit in the middle of the SOCKWQ_ASYNC_ bit being set, and reset the FASYNC bit later and the SOCKWQ_* state is stale. However, that's probably not worth handling explicitly. Series applied, thanks.
Re: [net-next PATCH V3 0/5] samples/bpf: Improve user experience
From: Jesper Dangaard BrouerDate: Wed, 27 Apr 2016 09:30:08 +0200 > It is a steep learning curve getting started with using the eBPF > examples in samples/bpf/. There are several dependencies, and > specific versions of these dependencies. Invoking make in the correct > manor is also slightly obscure. > > This patchset cleanup, document and hopefully improves the first time > user experience with the eBPF samples directory by auto-detecting > certain scenarios. > > V3: > - Add Alexei's ACKs > - Remove README paragraph about LLVM experimental BPF target >as it only existed between LLVM version 3.6 to 3.7. > > V2: > - Adjusted recommend minimum versions to 3.7.1 > - Included clang build instructions > - New patch adding CLANG variable and validation of command Please respin addressing Naveen's feedback, thanks.
Re: [PATCH net-next 00/17] net: snmp: update SNMP methods
From: Eric DumazetDate: Wed, 27 Apr 2016 16:44:26 -0700 > In the old days (before linux-3.0), SNMP counters were duplicated, > one set for user context, and anther one for BH context. > > After commit 8f0ea0fe3a03 ("snmp: reduce percpu needs by 50%") > we have a single copy, and what really matters is preemption being > enabled or disabled, since we use this_cpu_inc() or __this_cpu_inc() > respectively. > > This patch series kills the obsolete STATS_USER() helpers, > and rename all XXX_BH() helpers to __XXX() ones, to more > closely match conventions used to update per cpu variables. > > This is probably going to hurt maintainers job for a while, > since cherry-picks will not be clean, but this had to be > cleaned at one point. I am so sorry guys. Looks good to me, series applied, thanks Eric.
Re: [net-next v2 00/14][pull request] 40GbE Intel Wired LAN Driver Updates 2016-04-27
From: Jeff KirsherDate: Wed, 27 Apr 2016 13:15:39 -0700 > This series contains updates to i40e and i40evf. Pulled, thanks Jeff.
[PATCH net 3/3] samples/bpf: fix trace_output example
llvm cannot always recognize memset as builtin function and optimize it away, so just delete it. It was a leftover from testing of bpf_perf_event_output() with large data structures. Fixes: 39111695b1b8 ("samples: bpf: add bpf_perf_event_output example") Signed-off-by: Alexei Starovoitov--- samples/bpf/trace_output_kern.c | 1 - 1 file changed, 1 deletion(-) diff --git a/samples/bpf/trace_output_kern.c b/samples/bpf/trace_output_kern.c index 8d8d1ec429eb..9b96f4fb8cea 100644 --- a/samples/bpf/trace_output_kern.c +++ b/samples/bpf/trace_output_kern.c @@ -18,7 +18,6 @@ int bpf_prog1(struct pt_regs *ctx) u64 cookie; } data; - memset(, 0, sizeof(data)); data.pid = bpf_get_current_pid_tgid(); data.cookie = 0x12345678; -- 2.8.0
[PATCH net 1/3] bpf: fix refcnt overflow
On a system with >32Gbyte of phyiscal memory and infinite RLIMIT_MEMLOCK, the malicious application may overflow 32-bit bpf program refcnt. It's also possible to overflow map refcnt on 1Tb system. Impose 32k hard limit which means that the same bpf program or map cannot be shared by more than 32k processes. Fixes: 1be7f75d1668 ("bpf: enable non-root eBPF programs") Reported-by: Jann HornSigned-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann --- include/linux/bpf.h | 3 ++- kernel/bpf/inode.c| 7 --- kernel/bpf/syscall.c | 24 kernel/bpf/verifier.c | 11 +++ 4 files changed, 33 insertions(+), 12 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 21ee41b92e8a..f1d5c5acc8dd 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -171,12 +171,13 @@ void bpf_register_prog_type(struct bpf_prog_type_list *tl); void bpf_register_map_type(struct bpf_map_type_list *tl); struct bpf_prog *bpf_prog_get(u32 ufd); +struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog); void bpf_prog_put(struct bpf_prog *prog); void bpf_prog_put_rcu(struct bpf_prog *prog); struct bpf_map *bpf_map_get_with_uref(u32 ufd); struct bpf_map *__bpf_map_get(struct fd f); -void bpf_map_inc(struct bpf_map *map, bool uref); +struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref); void bpf_map_put_with_uref(struct bpf_map *map); void bpf_map_put(struct bpf_map *map); int bpf_map_precharge_memlock(u32 pages); diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index f2ece3c174a5..8f94ca1860cf 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -31,10 +31,10 @@ static void *bpf_any_get(void *raw, enum bpf_type type) { switch (type) { case BPF_TYPE_PROG: - atomic_inc(&((struct bpf_prog *)raw)->aux->refcnt); + raw = bpf_prog_inc(raw); break; case BPF_TYPE_MAP: - bpf_map_inc(raw, true); + raw = bpf_map_inc(raw, true); break; default: WARN_ON_ONCE(1); @@ -297,7 +297,8 @@ static void *bpf_obj_do_get(const struct filename *pathname, goto out; raw = bpf_any_get(inode->i_private, *type); - touch_atime(); + if (!IS_ERR(raw)) + touch_atime(); path_put(); return raw; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index adc5e4bd74f8..cf5e9f7ad13a 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -218,11 +218,18 @@ struct bpf_map *__bpf_map_get(struct fd f) return f.file->private_data; } -void bpf_map_inc(struct bpf_map *map, bool uref) +/* prog's and map's refcnt limit */ +#define BPF_MAX_REFCNT 32768 + +struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref) { - atomic_inc(>refcnt); + if (atomic_inc_return(>refcnt) > BPF_MAX_REFCNT) { + atomic_dec(>refcnt); + return ERR_PTR(-EBUSY); + } if (uref) atomic_inc(>usercnt); + return map; } struct bpf_map *bpf_map_get_with_uref(u32 ufd) @@ -234,7 +241,7 @@ struct bpf_map *bpf_map_get_with_uref(u32 ufd) if (IS_ERR(map)) return map; - bpf_map_inc(map, true); + map = bpf_map_inc(map, true); fdput(f); return map; @@ -658,6 +665,15 @@ static struct bpf_prog *__bpf_prog_get(struct fd f) return f.file->private_data; } +struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog) +{ + if (atomic_inc_return(>aux->refcnt) > BPF_MAX_REFCNT) { + atomic_dec(>aux->refcnt); + return ERR_PTR(-EBUSY); + } + return prog; +} + /* called by sockets/tracing/seccomp before attaching program to an event * pairs with bpf_prog_put() */ @@ -670,7 +686,7 @@ struct bpf_prog *bpf_prog_get(u32 ufd) if (IS_ERR(prog)) return prog; - atomic_inc(>aux->refcnt); + prog = bpf_prog_inc(prog); fdput(f); return prog; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index db2574e7b8b0..89bcaa0966da 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2049,15 +2049,18 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env) return -E2BIG; } - /* remember this map */ - env->used_maps[env->used_map_cnt++] = map; - /* hold the map. If the program is rejected by verifier, * the map will be released by release_maps() or it * will be used by the valid program until it's unloaded * and all maps are released in free_bpf_prog_info() */ - bpf_map_inc(map, false); + map = bpf_map_inc(map, false); +
[PATCH net 0/3] bpf: fix several bugs
First two patches address bugs found by Jann Horn. Last patch is a minor samples fix spotted during the testing. Alexei Starovoitov (3): bpf: fix refcnt overflow bpf: fix check_map_func_compatibility logic samples/bpf: fix trace_output example include/linux/bpf.h | 3 +- kernel/bpf/inode.c | 7 ++-- kernel/bpf/syscall.c| 24 ++--- kernel/bpf/verifier.c | 76 + samples/bpf/trace_output_kern.c | 1 - 5 files changed, 73 insertions(+), 38 deletions(-) -- 2.8.0
[PATCH net 2/3] bpf: fix check_map_func_compatibility logic
The commit 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU conuter") introduced clever way to check bpf_helper<->map_type compatibility. Later on commit a43eec304259 ("bpf: introduce bpf_perf_event_output() helper") adjusted the logic and inadvertently broke it. Get rid of the clever bool compare and go back to two-way check from map and from helper perspective. Fixes: a43eec304259 ("bpf: introduce bpf_perf_event_output() helper") Reported-by: Jann HornSigned-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- kernel/bpf/verifier.c | 65 +++ 1 file changed, 40 insertions(+), 25 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 89bcaa0966da..c5c17a62f509 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -239,16 +239,6 @@ static const char * const reg_type_str[] = { [CONST_IMM] = "imm", }; -static const struct { - int map_type; - int func_id; -} func_limit[] = { - {BPF_MAP_TYPE_PROG_ARRAY, BPF_FUNC_tail_call}, - {BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_read}, - {BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_output}, - {BPF_MAP_TYPE_STACK_TRACE, BPF_FUNC_get_stackid}, -}; - static void print_verifier_state(struct verifier_env *env) { enum bpf_reg_type t; @@ -921,27 +911,52 @@ static int check_func_arg(struct verifier_env *env, u32 regno, static int check_map_func_compatibility(struct bpf_map *map, int func_id) { - bool bool_map, bool_func; - int i; - if (!map) return 0; - for (i = 0; i < ARRAY_SIZE(func_limit); i++) { - bool_map = (map->map_type == func_limit[i].map_type); - bool_func = (func_id == func_limit[i].func_id); - /* only when map & func pair match it can continue. -* don't allow any other map type to be passed into -* the special func; -*/ - if (bool_func && bool_map != bool_func) { - verbose("cannot pass map_type %d into func %d\n", - map->map_type, func_id); - return -EINVAL; - } + /* We need a two way check, first is from map perspective ... */ + switch (map->map_type) { + case BPF_MAP_TYPE_PROG_ARRAY: + if (func_id != BPF_FUNC_tail_call) + goto error; + break; + case BPF_MAP_TYPE_PERF_EVENT_ARRAY: + if (func_id != BPF_FUNC_perf_event_read && + func_id != BPF_FUNC_perf_event_output) + goto error; + break; + case BPF_MAP_TYPE_STACK_TRACE: + if (func_id != BPF_FUNC_get_stackid) + goto error; + break; + default: + break; + } + + /* ... and second from the function itself. */ + switch (func_id) { + case BPF_FUNC_tail_call: + if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY) + goto error; + break; + case BPF_FUNC_perf_event_read: + case BPF_FUNC_perf_event_output: + if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) + goto error; + break; + case BPF_FUNC_get_stackid: + if (map->map_type != BPF_MAP_TYPE_STACK_TRACE) + goto error; + break; + default: + break; } return 0; +error: + verbose("cannot pass map_type %d into func %d\n", + map->map_type, func_id); + return -EINVAL; } static int check_call(struct verifier_env *env, int func_id) -- 2.8.0
Re: [PATCH v2 net-next 11/13] Documentation: Bindings: Update DT binding for separating dsaf dev support
Hi Rob, Thanks for you comments. 在 2016/4/27 23:25, Rob Herring 写道: > On Tue, Apr 26, 2016 at 10:33 PM, Yisen Zhuang> wrote: >> Hi Rob and David, >> >> Please see my comments inline. >> >> David have merged this series to net-next, but we need to modify some codes >> according >> to Rob's comments. I am not sure if i need to send V3 for this series, or >> separate >> patches of documentation to independent series and generate a new patch for >> hns base >> on current net-next? > > That's David's call. I'm guessing he wants follow-up patches on top of these. Okay, I will send a new series base on current net-next. > >> 在 2016/4/26 20:48, Rob Herring 写道: >>> On Sat, Apr 23, 2016 at 05:05:15PM +0800, Yisen Zhuang wrote: Because debug dsaf port was separated from service dsaf port, this patch updates the related information of DT binding. >>> >>> Separated when? New version of the h/w? If so, where's the new >>> compatible string? This is quite a big binding change. >> >> There isn't any change of h/w. I separated debug dsaf port from sevice dsaf >> port to make the code more simple and readability. > > Okay. > > [...] > + serdes-syscon rather than this address. The third region is the PPE register base and size. - The fourth region is dsa fabric base register and size. - The fifth region is cpld base register and size, it is not required if do not use cpld. -- phy-handle: phy handle of physicl port, 0 if not any phy device. see ethernet.txt [1]. + The fourth region is dsa fabric base register and size. It is not required for + single-port mode. +- reg-names: may be ppe-base and(or) dsaf-base. It is used to find the + corresponding reg's index. >>> >>> But you have up to 5 regions. >>> >>> The variable nature of what regions you have tells me you need more >>> specific compatible strings for each chip. >> >> we didn't add support of new h/w. We added these regions to make code simple >> and readability. >> If we need to add support of next h/w version next time, we don't need to >> add many branches >> for these attributes. So we didn't add a new compatible here. > > Not sure what you mean by branches. It's fine to put properties for > things that vary among h/w versions, but new compatible strings will > be needed for any new versions. I mean than we put properties for things that vary among h/w versions. If we add support for new h/w versions next time, we will add new compatible strings. > > +- port: subnodes of dsaf. A dsaf node may contain several port nodes(Depending + on mode of dsaf). Port node contain some attributes listed below: +- port-id: is physical port index in one dsaf. >>> >>> Indexes should generally be avoided. What does the number correspond >>> to in h/w (if anything)? >> >> port-id is index for a port in dsaf, it is correspond to index of PHY showed >> below. > > Okay, you should use reg property here instead. Agree, thanks. > >> >> CPU >> | >> --- >> | | | >> --- - >> | | || | | | | >> |PPE || PPE | | PPE | >> | | || | | | | | >> | | || | | | | | >> | crossbar || | | | | | >> | | || | | | | | >> | -- || | | | | | >> | | | | | | | || | | | | | >> | | | | | | | || | | | | | >> | MAC MAC MACMACMACMAC || MAC | | MAC | >> | | | | | | | || | | | | | >> --- - >> | | | | | |\/ |/ | >>PHY PHY PHYPHYPHYPHY\ / PHY / PHY >> \/ / >> \ / / >> DSAF(three platform device) >> >>> +- phy-handle: phy handle of physicl port. It is not required if there isn't > > Another typo here. Agree, thanks. > > Rob > > . >
[PATCH net v3 5/5] drivers: net: cpsw: use of_phy_connect() in fixed-link case
From: David RivshinIf a fixed-link DT subnode is used, the phy_device was looked up so that a PHY ID string could be constructed and passed to phy_connect(). This is not necessary, as the device_node can be passed directly to of_phy_connect() instead. This reuses the same codepath as if the phy-handle DT property was used. Signed-off-by: David Rivshin Tested-by: Nicolas Chauvet Tested-by: Andrew Goodbody Reviewed-by: Mugunthan V N Reviewed-by: Grygorii Strashko --- Changes since v2 [1]: - Added Tested-by from Andrew Goodbody [3] - Added Reviewed-by from Mugunthan V N [4] - Added Reviewed-by from Grygorii Strashko [5] Changes since v1 [2]: - Rebased (trivial conflict, e5a03bfd modified the deleted snprintf) - Added Tested-by from Nicolas Chauvet [1] http://patchwork.ozlabs.org/patch/613276/ [2] http://patchwork.ozlabs.org/patch/560327/ [3] https://lkml.org/lkml/2016/4/22/537 [4] https://lkml.org/lkml/2016/4/22/63 [5] https://lkml.org/lkml/2016/4/22/529 drivers/net/ethernet/ti/cpsw.c | 11 +-- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 712bc6d..e2fcdf1 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -2044,30 +2044,21 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data, "phy-handle", 0); parp = of_get_property(slave_node, "phy_id", ); if (slave_data->phy_node) { dev_dbg(>dev, "slave[%d] using phy-handle=\"%s\"\n", i, slave_data->phy_node->full_name); } else if (of_phy_is_fixed_link(slave_node)) { - struct device_node *phy_node; - struct phy_device *phy_dev; - /* In the case of a fixed PHY, the DT node associated * to the PHY is the Ethernet MAC DT node. */ ret = of_phy_register_fixed_link(slave_node); if (ret) return ret; - phy_node = of_node_get(slave_node); - phy_dev = of_phy_find_device(phy_node); - if (!phy_dev) - return -ENODEV; - snprintf(slave_data->phy_id, sizeof(slave_data->phy_id), -PHY_ID_FMT, phy_dev->mdio.bus->id, -phy_dev->mdio.addr); + slave_data->phy_node = of_node_get(slave_node); } else if (parp) { u32 phyid; struct device_node *mdio_node; struct platform_device *mdio; if (lenp != (sizeof(__be32) * 2)) { dev_err(>dev, "Invalid slave[%d] phy_id property\n", i); -- 2.5.5
[PATCH net v3 4/5] dt: cpsw: phy-handle, phy_id, and fixed-link are mutually exclusive
From: David RivshinThe phy-handle, phy_id, and fixed-link properties are mutually exclusive, and only one need be specified. Make this clear in the binding doc. Also mark the phy_id property as deprecated, as phy-handle should be used instead. Signed-off-by: David Rivshin --- Changes since v2 [1]: - split from previous patch 2 - marked the phy_id property as deprecated [3] - removed Rob Herring's Acked-by due to above change Changes since v1 [2]: - Rebased (no conflicts) - Added Tested-by from Nicolas Chauvet - Added Acked-by from Rob Herring for the binding change [1] http://patchwork.ozlabs.org/patch/613260/ [2] http://patchwork.ozlabs.org/patch/560324/ [3] https://lkml.org/lkml/2016/4/22/494 Documentation/devicetree/bindings/net/cpsw.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/net/cpsw.txt b/Documentation/devicetree/bindings/net/cpsw.txt index 28a4781..0ae0649 100644 --- a/Documentation/devicetree/bindings/net/cpsw.txt +++ b/Documentation/devicetree/bindings/net/cpsw.txt @@ -41,21 +41,21 @@ Optional properties: Slave Properties: Required properties: - phy-mode : See ethernet.txt file in the same directory Optional properties: - dual_emac_res_vlan : Specifies VID to be used to segregate the ports - mac-address : See ethernet.txt file in the same directory -- phy_id : Specifies slave phy id +- phy_id : Specifies slave phy id (deprecated, use phy-handle) - phy-handle : See ethernet.txt file in the same directory Slave sub-nodes: - fixed-link : See fixed-link.txt file in the same directory - Either the property phy_id, or the sub-node - fixed-link can be specified + +Note: Exactly one of phy_id, phy-handle, or fixed-link must be specified. Note: "ti,hwmods" field is used to fetch the base address and irq resources from TI, omap hwmod data base during device registration. Future plan is to migrate hwmod data base contents into device tree blob so that, all the required data will be used from device tree dts file. -- 2.5.5
[PATCH net v3 3/5] drivers: net: cpsw: don't ignore phy-mode if phy-handle is used
From: David RivshinThe phy-mode emac property was only being processed in the phy_id or fixed-link cases. However if phy-handle was specified instead, an error message would complain about the lack of phy_id or fixed-link, and then jump past the of_get_phy_mode(). This would result in the PHY mode defaulting to MII, regardless of what the devicetree specified. Fixes: 9e42f715264f ("drivers: net: cpsw: add phy-handle parsing") Signed-off-by: David Rivshin Tested-by: Nicolas Chauvet Tested-by: Andrew Goodbody Reviewed-by: Mugunthan V N --- I would suggest this for -stable. It should apply cleanly as far back as 4.4. Changes since v2 [1]: - split from previous patch 2 - Added Tested-by from Andrew Goodbody [3] - Added Reviewed-by from Mugunthan V N [4] - rewrote commit log to focus on the functional bug fixed, rather than the bogus error message Changes since v1 [2]: - Rebased (no conflicts) - Added Tested-by from Nicolas Chauvet - Added Acked-by from Rob Herring for the binding change [1] http://patchwork.ozlabs.org/patch/613260/ [2] http://patchwork.ozlabs.org/patch/560324/ [3] https://lkml.org/lkml/2016/4/22/537 [4] https://lkml.org/lkml/2016/4/22/63 drivers/net/ethernet/ti/cpsw.c | 10 -- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 5903448..712bc6d 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -2039,15 +2039,19 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data, /* This is no slave child node, continue */ if (strcmp(slave_node->name, "slave")) continue; slave_data->phy_node = of_parse_phandle(slave_node, "phy-handle", 0); parp = of_get_property(slave_node, "phy_id", ); - if (of_phy_is_fixed_link(slave_node)) { + if (slave_data->phy_node) { + dev_dbg(>dev, + "slave[%d] using phy-handle=\"%s\"\n", + i, slave_data->phy_node->full_name); + } else if (of_phy_is_fixed_link(slave_node)) { struct device_node *phy_node; struct phy_device *phy_dev; /* In the case of a fixed PHY, the DT node associated * to the PHY is the Ethernet MAC DT node. */ ret = of_phy_register_fixed_link(slave_node); @@ -2076,15 +2080,17 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data, if (!mdio) { dev_err(>dev, "Missing mdio platform device\n"); return -EINVAL; } snprintf(slave_data->phy_id, sizeof(slave_data->phy_id), PHY_ID_FMT, mdio->name, phyid); } else { - dev_err(>dev, "No slave[%d] phy_id or fixed-link property\n", i); + dev_err(>dev, + "No slave[%d] phy_id, phy-handle, or fixed-link property\n", + i); goto no_phy_slave; } slave_data->phy_if = of_get_phy_mode(slave_node); if (slave_data->phy_if < 0) { dev_err(>dev, "Missing or malformed slave[%d] phy-mode property\n", i); return slave_data->phy_if; -- 2.5.5
[PATCH net v3 2/5] drivers: net: cpsw: fix segfault in case of bad phy-handle
From: David RivshinIf an emac node has a phy-handle property that points to something which is not a phy, then a segmentation fault will occur when the interface is brought up. This is because while phy_connect() will return ERR_PTR() on failure, of_phy_connect() will return NULL. The common error check uses IS_ERR(), and so missed when of_phy_connect() fails. The NULL pointer is then dereferenced. Also, the common error message referenced slave->data->phy_id, which would be empty in the case of phy-handle. Instead, use the name of the device_node as a useful identifier. And in the phy_id case add the error code for completeness. Fixes: 9e42f715264f ("drivers: net: cpsw: add phy-handle parsing") Signed-off-by: David Rivshin --- I would suggest this for -stable. It should apply cleanly as far back as 4.5, although there is a trivial conflict in 4.4. I can produce a separate patch against linux-4.4.y if preferred. Changes since v2: - new patch, although fixing part of previous patch 2 [1] Changes since v1 [2]: - Rebased (no conflicts) - Added Tested-by from Nicolas Chauvet - Added Acked-by from Rob Herring for the binding change [1] http://patchwork.ozlabs.org/patch/613260/ [2] http://patchwork.ozlabs.org/patch/560324/ drivers/net/ethernet/ti/cpsw.c | 37 +++-- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index ce0b0ca..5903448 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -1143,33 +1143,42 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv) if (priv->data.dual_emac) cpsw_add_dual_emac_def_ale_entries(priv, slave, slave_port); else cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast, 1 << slave_port, 0, 0, ALE_MCAST_FWD_2); - if (slave->data->phy_node) + if (slave->data->phy_node) { slave->phy = of_phy_connect(priv->ndev, slave->data->phy_node, _adjust_link, 0, slave->data->phy_if); - else + if (!slave->phy) { + dev_err(priv->dev, "phy \"%s\" not found on slave %d\n", + slave->data->phy_node->full_name, + slave->slave_num); + return; + } + } else { slave->phy = phy_connect(priv->ndev, slave->data->phy_id, _adjust_link, slave->data->phy_if); - if (IS_ERR(slave->phy)) { - dev_err(priv->dev, "phy %s not found on slave %d\n", - slave->data->phy_id, slave->slave_num); - slave->phy = NULL; - } else { - phy_attached_info(slave->phy); - - phy_start(slave->phy); - - /* Configure GMII_SEL register */ - cpsw_phy_sel(>pdev->dev, slave->phy->interface, -slave->slave_num); + if (IS_ERR(slave->phy)) { + dev_err(priv->dev, + "phy \"%s\" not found on slave %d, err %ld\n", + slave->data->phy_id, slave->slave_num, + PTR_ERR(slave->phy)); + slave->phy = NULL; + return; + } } + + phy_attached_info(slave->phy); + + phy_start(slave->phy); + + /* Configure GMII_SEL register */ + cpsw_phy_sel(>pdev->dev, slave->phy->interface, slave->slave_num); } static inline void cpsw_add_default_vlan(struct cpsw_priv *priv) { const int vlan = priv->data.default_vlan; const int port = priv->host_port; u32 reg; -- 2.5.5
[PATCH net v3 1/5] drivers: net: cpsw: fix parsing of phy-handle DT property in dual_emac config
From: David RivshinCommit 9e42f715264ff158478fa30eaed847f6e131366b ("drivers: net: cpsw: add phy-handle parsing") saved the "phy-handle" phandle into a new cpsw_priv field. However, phy connections are per-slave, so the phy_node field should be in cpsw_slave_data rather than cpsw_priv. This would go unnoticed in a single emac configuration. But in dual_emac mode, the last "phy-handle" property parsed for either slave would be used by both of them, causing them both to refer to the same phy_device. Fixes: 9e42f715264f ("drivers: net: cpsw: add phy-handle parsing") Signed-off-by: David Rivshin Tested-by: Nicolas Chauvet Tested-by: Andrew Goodbody Reviewed-by: Mugunthan V N Reviewed-by: Grygorii Strashko --- I would suggest this for -stable. It should apply cleanly as far back as 4.4. Changes since v2 [1]: - Added Tested-by from Andrew Goodbody [3] - Added Reviewed-by from Mugunthan V N [4] - Added Reviewed-by from Grygorii Strashko [5] Changes since v1 [2]: - Rebased (no conflicts) - Added Tested-by from Nicolas Chauvet [1] http://patchwork.ozlabs.org/patch/613237/ [2] http://patchwork.ozlabs.org/patch/560326/ [3] https://lkml.org/lkml/2016/4/22/537 [4] https://lkml.org/lkml/2016/4/22/63 [5] https://lkml.org/lkml/2016/4/22/496 drivers/net/ethernet/ti/cpsw.c | 13 ++--- drivers/net/ethernet/ti/cpsw.h | 1 + 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index bbb77cd..ce0b0ca 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -363,15 +363,14 @@ static inline void slave_write(struct cpsw_slave *slave, u32 val, u32 offset) __raw_writel(val, slave->regs + offset); } struct cpsw_priv { spinlock_t lock; struct platform_device *pdev; struct net_device *ndev; - struct device_node *phy_node; struct napi_struct napi_rx; struct napi_struct napi_tx; struct device *dev; struct cpsw_platform_data data; struct cpsw_ss_regs __iomem *regs; struct cpsw_wr_regs __iomem *wr_regs; u8 __iomem *hw_stats; @@ -1144,16 +1143,16 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv) if (priv->data.dual_emac) cpsw_add_dual_emac_def_ale_entries(priv, slave, slave_port); else cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast, 1 << slave_port, 0, 0, ALE_MCAST_FWD_2); - if (priv->phy_node) - slave->phy = of_phy_connect(priv->ndev, priv->phy_node, + if (slave->data->phy_node) + slave->phy = of_phy_connect(priv->ndev, slave->data->phy_node, _adjust_link, 0, slave->data->phy_if); else slave->phy = phy_connect(priv->ndev, slave->data->phy_id, _adjust_link, slave->data->phy_if); if (IS_ERR(slave->phy)) { dev_err(priv->dev, "phy %s not found on slave %d\n", slave->data->phy_id, slave->slave_num); @@ -1936,20 +1935,19 @@ static void cpsw_slave_init(struct cpsw_slave *slave, struct cpsw_priv *priv, slave->data = data; slave->regs = regs + slave_reg_ofs; slave->sliver = regs + sliver_reg_ofs; slave->port_vlan = data->dual_emac_res_vlan; } -static int cpsw_probe_dt(struct cpsw_priv *priv, +static int cpsw_probe_dt(struct cpsw_platform_data *data, struct platform_device *pdev) { struct device_node *node = pdev->dev.of_node; struct device_node *slave_node; - struct cpsw_platform_data *data = >data; int i = 0, ret; u32 prop; if (!node) return -EINVAL; if (of_property_read_u32(node, "slaves", )) { @@ -2029,15 +2027,16 @@ static int cpsw_probe_dt(struct cpsw_priv *priv, int lenp; const __be32 *parp; /* This is no slave child node, continue */ if (strcmp(slave_node->name, "slave")) continue; - priv->phy_node = of_parse_phandle(slave_node, "phy-handle", 0); + slave_data->phy_node = of_parse_phandle(slave_node, + "phy-handle", 0); parp = of_get_property(slave_node, "phy_id", ); if (of_phy_is_fixed_link(slave_node)) { struct device_node *phy_node; struct phy_device *phy_dev; /* In the case of a fixed PHY, the DT node associated
[PATCH net v3 0/5] drivers: net: cpsw: phy-handle fixes
From: David RivshinThis series fixes a number of related issues around using phy-handle properties in cpsw emac nodes. Patch 1 fixes a bug if more than one slave is used, and either slave uses the phy-handle property in the devicetree. Patch 2 fixes a NULL pointer dereference which can occur if a phy-handle property is used and of_phy_connect() return NULL, such as with a bad devicetree. Patch 3 fixes an issue where the phy-mode property would be ignored if a phy-handle property was used. This also fixes a bogus error message that would be emitted. Patch 4 fixes makes the binding documentation more explicit that exactly one PHY property should be used, and also marks phy_id as deprecated. Patch 5 cleans up the fixed-link case to work like the now-fixed phy-handle case. I have tested on the following hardware configurations: - (EVMSK) dual emac, phy_id property in both slaves - (EVMSK) dual emac, phy-handle property in both slaves - (EVMSK) a bad phy-handle property pointing to - (EVMSK) phy_id property with incorrect PHY address - (BeagleBoneBlack) single emac, phy_id property - (custom) single emac, fixed-link subnode Andrew Goodbody reported testing v2 on a board that doesn't use dual_emac mode, but with 2 PHYs using phy-handle properties [1]. Nicolas Chauvet reported testing v2 on an HP t410 (dm8148). Markus Brunner reported testing v1 on the following [2]: - emac0 with phy_id and emac1 with fixed phy - emac0 with phy-handle and emac1 with fixed phy - emac0 with fixed phy and emac1 with fixed phy [1] https://lkml.org/lkml/2016/4/22/537 [2] http://www.spinics.net/lists/netdev/msg357890.html David Rivshin (5): drivers: net: cpsw: fix parsing of phy-handle DT property in dual_emac config drivers: net: cpsw: fix segfault in case of bad phy-handle drivers: net: cpsw: don't ignore phy-mode if phy-handle is used dt: cpsw: phy-handle, phy_id, and fixed-link are mutually exclusive drivers: net: cpsw: use of_phy_connect() in fixed-link case Documentation/devicetree/bindings/net/cpsw.txt | 6 +-- drivers/net/ethernet/ti/cpsw.c | 69 ++ drivers/net/ethernet/ti/cpsw.h | 1 + 3 files changed, 41 insertions(+), 35 deletions(-) -- 2.5.5
Re: [PATCH 3.2 085/115] veth: don’t modify ip_summed; doing so treats packets with bad checksums as good.
On 04/27/2016 05:00 PM, Hannes Frederic Sowa wrote: Hi Ben, On Wed, Apr 27, 2016, at 20:07, Ben Hutchings wrote: On Wed, 2016-04-27 at 08:59 -0700, Ben Greear wrote: On 04/26/2016 04:02 PM, Ben Hutchings wrote: 3.2.80-rc1 review patch. If anyone has any objections, please let me know. I would be careful about this. It causes regressions when sending PACKET_SOCKET buffers from user-space to veth devices. There was a proposed upstream fix for the regression, but it has not gone into the tree as far as I know. http://www.spinics.net/lists/netdev/msg370436.html [...] OK, I'll drop this for now. The fall out from not having this patch is in my opinion a bigger fallout than not having this patch. This patch fixes silent data corruption vs. the problem Ben Greear is talking about, which might not be that a common usage. What do others think? Bye, Hannes This patch from Cong Wang seems to fix the regression for me, I think it should be added and tested in the main tree, and then apply them to stable as a pair. http://dmz2.candelatech.com/?p=linux-4.4.dev.y/.git;a=commitdiff;h=8153e983c0e5eba1aafe1fc296248ed2a553f1ac;hp=454b07405d694dad52e7f41af5816eed0190da8a diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index da1ae0e..f8cc758 100644 (file) --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1926,6 +1926,7 @@ retry: goto out_unlock; } + skb->ip_summed = CHECKSUM_UNNECESSARY; skb->protocol = proto; skb->dev = dev; skb->priority = sk->sk_priority; @@ -2352,6 +2353,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, ph.raw = frame; + skb->ip_summed = CHECKSUM_UNNECESSARY; skb->protocol = proto; skb->dev = dev; skb->priority = po->sk.sk_priority; @@ -2776,6 +2778,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) goto out_free; } + skb->ip_summed = CHECKSUM_UNNECESSARY; skb->protocol = proto; skb->dev = dev; skb->priority = sk->sk_priority; Thanks, Ben -- Ben GreearCandela Technologies Inc http://www.candelatech.com
Re: [PATCH 3.2 085/115] veth: don’t modify ip_summed; doing so treats packets with bad checksums as good.
Hi Ben, On Wed, Apr 27, 2016, at 20:07, Ben Hutchings wrote: > On Wed, 2016-04-27 at 08:59 -0700, Ben Greear wrote: > > On 04/26/2016 04:02 PM, Ben Hutchings wrote: > > > > > > 3.2.80-rc1 review patch. If anyone has any objections, please let me > > > know. > > I would be careful about this. It causes regressions when sending > > PACKET_SOCKET buffers from user-space to veth devices. > > > > There was a proposed upstream fix for the regression, but it has not gone > > into the tree as far as I know. > > > > http://www.spinics.net/lists/netdev/msg370436.html > [...] > > OK, I'll drop this for now. The fall out from not having this patch is in my opinion a bigger fallout than not having this patch. This patch fixes silent data corruption vs. the problem Ben Greear is talking about, which might not be that a common usage. What do others think? Bye, Hannes
[PATCH net-next 13/17] net: rename NET_{ADD|INC}_STATS_BH()
Rename NET_INC_STATS_BH() to __NET_INC_STATS() and NET_ADD_STATS_BH() to __NET_ADD_STATS() Signed-off-by: Eric Dumazet--- include/net/ip.h | 4 +- include/net/tcp.h | 4 +- net/core/dev.c| 4 +- net/dccp/ipv4.c | 10 ++--- net/dccp/ipv6.c | 8 ++-- net/dccp/timer.c | 4 +- net/ipv4/arp.c| 2 +- net/ipv4/inet_hashtables.c| 2 +- net/ipv4/inet_timewait_sock.c | 4 +- net/ipv4/ip_input.c | 2 +- net/ipv4/syncookies.c | 4 +- net/ipv4/tcp.c| 4 +- net/ipv4/tcp_cdg.c| 20 - net/ipv4/tcp_cubic.c | 20 - net/ipv4/tcp_fastopen.c | 14 +++--- net/ipv4/tcp_input.c | 100 ++ net/ipv4/tcp_ipv4.c | 22 +- net/ipv4/tcp_minisocks.c | 10 ++--- net/ipv4/tcp_output.c | 14 +++--- net/ipv4/tcp_recovery.c | 4 +- net/ipv4/tcp_timer.c | 22 +- net/ipv6/inet6_hashtables.c | 2 +- net/ipv6/syncookies.c | 4 +- net/ipv6/tcp_ipv6.c | 16 +++ net/sctp/input.c | 2 +- 25 files changed, 153 insertions(+), 149 deletions(-) diff --git a/include/net/ip.h b/include/net/ip.h index 55f5de50a564..fb3b766ca1c7 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -193,9 +193,9 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, #define IP_UPD_PO_STATS(net, field, val) SNMP_UPD_PO_STATS64((net)->mib.ip_statistics, field, val) #define __IP_UPD_PO_STATS(net, field, val) SNMP_UPD_PO_STATS64_BH((net)->mib.ip_statistics, field, val) #define NET_INC_STATS(net, field) SNMP_INC_STATS((net)->mib.net_statistics, field) -#define NET_INC_STATS_BH(net, field) SNMP_INC_STATS_BH((net)->mib.net_statistics, field) +#define __NET_INC_STATS(net, field) SNMP_INC_STATS_BH((net)->mib.net_statistics, field) #define NET_ADD_STATS(net, field, adnd) SNMP_ADD_STATS((net)->mib.net_statistics, field, adnd) -#define NET_ADD_STATS_BH(net, field, adnd) SNMP_ADD_STATS_BH((net)->mib.net_statistics, field, adnd) +#define __NET_ADD_STATS(net, field, adnd) SNMP_ADD_STATS_BH((net)->mib.net_statistics, field, adnd) u64 snmp_get_cpu_field(void __percpu *mib, int cpu, int offct); unsigned long snmp_fold_field(void __percpu *mib, int offt); diff --git a/include/net/tcp.h b/include/net/tcp.h index 939ebd5320a9..ff8b4265cb2b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1743,7 +1743,7 @@ static inline __u32 cookie_init_sequence(const struct tcp_request_sock_ops *ops, __u16 *mss) { tcp_synq_overflow(sk); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT); return ops->cookie_init_seq(skb, mss); } #else @@ -1852,7 +1852,7 @@ static inline void tcp_segs_in(struct tcp_sock *tp, const struct sk_buff *skb) static inline void tcp_listendrop(const struct sock *sk) { atomic_inc(&((struct sock *)sk)->sk_drops); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS); } #endif /* _TCP_H */ diff --git a/net/core/dev.c b/net/core/dev.c index 6324bc9267f7..e96a3bc2c634 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4982,8 +4982,8 @@ bool sk_busy_loop(struct sock *sk, int nonblock) netpoll_poll_unlock(have); } if (rc > 0) - NET_ADD_STATS_BH(sock_net(sk), -LINUX_MIB_BUSYPOLLRXPACKETS, rc); + __NET_ADD_STATS(sock_net(sk), + LINUX_MIB_BUSYPOLLRXPACKETS, rc); local_bh_enable(); if (rc == LL_FLUSH_FAILED) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index a9c75e79ba99..a8164272e0f4 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -205,7 +205,7 @@ void dccp_req_err(struct sock *sk, u64 seq) * socket here. */ if (!between48(seq, dccp_rsk(req)->dreq_iss, dccp_rsk(req)->dreq_gss)) { - NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); + __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS); } else { /* * Still in RESPOND, just remove it silently. @@ -273,7 +273,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) * servers this needs to be solved differently. */ if (sock_owned_by_user(sk)) - NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); + __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS); if (sk->sk_state == DCCP_CLOSED) goto out; @@ -281,7 +281,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) dp = dccp_sk(sk); if ((1 <<
[PATCH net-next 11/17] net: rename IP_ADD_STATS_BH()
Rename IP_ADD_STATS_BH() to __IP_ADD_STATS() Signed-off-by: Eric Dumazet--- include/net/ip.h | 2 +- net/ipv4/ip_forward.c | 2 +- net/ipv4/ip_input.c | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/net/ip.h b/include/net/ip.h index 0be0af3017ba..0df4809bc68a 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -189,7 +189,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, #define IP_INC_STATS(net, field) SNMP_INC_STATS64((net)->mib.ip_statistics, field) #define __IP_INC_STATS(net, field) SNMP_INC_STATS64_BH((net)->mib.ip_statistics, field) #define IP_ADD_STATS(net, field, val) SNMP_ADD_STATS64((net)->mib.ip_statistics, field, val) -#define IP_ADD_STATS_BH(net, field, val) SNMP_ADD_STATS64_BH((net)->mib.ip_statistics, field, val) +#define __IP_ADD_STATS(net, field, val) SNMP_ADD_STATS64_BH((net)->mib.ip_statistics, field, val) #define IP_UPD_PO_STATS(net, field, val) SNMP_UPD_PO_STATS64((net)->mib.ip_statistics, field, val) #define IP_UPD_PO_STATS_BH(net, field, val) SNMP_UPD_PO_STATS64_BH((net)->mib.ip_statistics, field, val) #define NET_INC_STATS(net, field) SNMP_INC_STATS((net)->mib.net_statistics, field) diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 42fbd59b0ba8..cbfb1808fcc4 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -66,7 +66,7 @@ static int ip_forward_finish(struct net *net, struct sock *sk, struct sk_buff *s struct ip_options *opt = &(IPCB(skb)->opt); __IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS); - IP_ADD_STATS_BH(net, IPSTATS_MIB_OUTOCTETS, skb->len); + __IP_ADD_STATS(net, IPSTATS_MIB_OUTOCTETS, skb->len); if (unlikely(opt->optlen)) ip_forward_options(skb); diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index cca6729cd6ee..11f34e421270 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -439,9 +439,9 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, BUILD_BUG_ON(IPSTATS_MIB_ECT1PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_1); BUILD_BUG_ON(IPSTATS_MIB_ECT0PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_0); BUILD_BUG_ON(IPSTATS_MIB_CEPKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_CE); - IP_ADD_STATS_BH(net, - IPSTATS_MIB_NOECTPKTS + (iph->tos & INET_ECN_MASK), - max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs)); + __IP_ADD_STATS(net, + IPSTATS_MIB_NOECTPKTS + (iph->tos & INET_ECN_MASK), + max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs)); if (!pskb_may_pull(skb, iph->ihl*4)) goto inhdr_error; -- 2.8.0.rc3.226.g39d4020
[PATCH net-next 17/17] net: snmp: kill STATS_BH macros
There is nothing related to BH in SNMP counters anymore, since linux-3.0. Rename helpers to use __ prefix instead of _BH prefix, for contexts where preemption is disabled. This more closely matches convention used to update percpu variables. Signed-off-by: Eric Dumazet--- include/net/icmp.h | 2 +- include/net/ip.h| 10 +- include/net/ipv6.h | 36 ++-- include/net/sctp/sctp.h | 6 +++--- include/net/snmp.h | 24 include/net/tcp.h | 2 +- include/net/udp.h | 8 net/dccp/dccp.h | 2 +- 8 files changed, 45 insertions(+), 45 deletions(-) diff --git a/include/net/icmp.h b/include/net/icmp.h index 25edb740c648..3ef2743a8eec 100644 --- a/include/net/icmp.h +++ b/include/net/icmp.h @@ -30,7 +30,7 @@ struct icmp_err { extern const struct icmp_err icmp_err_convert[]; #define ICMP_INC_STATS(net, field) SNMP_INC_STATS((net)->mib.icmp_statistics, field) -#define __ICMP_INC_STATS(net, field) SNMP_INC_STATS_BH((net)->mib.icmp_statistics, field) +#define __ICMP_INC_STATS(net, field) __SNMP_INC_STATS((net)->mib.icmp_statistics, field) #define ICMPMSGOUT_INC_STATS(net, field) SNMP_INC_STATS_ATOMIC_LONG((net)->mib.icmpmsg_statistics, field+256) #define ICMPMSGIN_INC_STATS(net, field) SNMP_INC_STATS_ATOMIC_LONG((net)->mib.icmpmsg_statistics, field) diff --git a/include/net/ip.h b/include/net/ip.h index fb3b766ca1c7..247ac82e9cf2 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -187,15 +187,15 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, unsigned int len); #define IP_INC_STATS(net, field) SNMP_INC_STATS64((net)->mib.ip_statistics, field) -#define __IP_INC_STATS(net, field) SNMP_INC_STATS64_BH((net)->mib.ip_statistics, field) +#define __IP_INC_STATS(net, field) __SNMP_INC_STATS64((net)->mib.ip_statistics, field) #define IP_ADD_STATS(net, field, val) SNMP_ADD_STATS64((net)->mib.ip_statistics, field, val) -#define __IP_ADD_STATS(net, field, val) SNMP_ADD_STATS64_BH((net)->mib.ip_statistics, field, val) +#define __IP_ADD_STATS(net, field, val) __SNMP_ADD_STATS64((net)->mib.ip_statistics, field, val) #define IP_UPD_PO_STATS(net, field, val) SNMP_UPD_PO_STATS64((net)->mib.ip_statistics, field, val) -#define __IP_UPD_PO_STATS(net, field, val) SNMP_UPD_PO_STATS64_BH((net)->mib.ip_statistics, field, val) +#define __IP_UPD_PO_STATS(net, field, val) __SNMP_UPD_PO_STATS64((net)->mib.ip_statistics, field, val) #define NET_INC_STATS(net, field) SNMP_INC_STATS((net)->mib.net_statistics, field) -#define __NET_INC_STATS(net, field) SNMP_INC_STATS_BH((net)->mib.net_statistics, field) +#define __NET_INC_STATS(net, field) __SNMP_INC_STATS((net)->mib.net_statistics, field) #define NET_ADD_STATS(net, field, adnd) SNMP_ADD_STATS((net)->mib.net_statistics, field, adnd) -#define __NET_ADD_STATS(net, field, adnd) SNMP_ADD_STATS_BH((net)->mib.net_statistics, field, adnd) +#define __NET_ADD_STATS(net, field, adnd) __SNMP_ADD_STATS((net)->mib.net_statistics, field, adnd) u64 snmp_get_cpu_field(void __percpu *mib, int cpu, int offct); unsigned long snmp_fold_field(void __percpu *mib, int offt); diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 64ce3670d40a..415213da5be3 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -121,21 +121,21 @@ struct frag_hdr { extern int sysctl_mld_max_msf; extern int sysctl_mld_qrv; -#define _DEVINC(net, statname, modifier, idev, field) \ +#define _DEVINC(net, statname, mod, idev, field) \ ({ \ struct inet6_dev *_idev = (idev); \ if (likely(_idev != NULL)) \ - SNMP_INC_STATS##modifier((_idev)->stats.statname, (field)); \ - SNMP_INC_STATS##modifier((net)->mib.statname##_statistics, (field));\ + mod##SNMP_INC_STATS64((_idev)->stats.statname, (field));\ + mod##SNMP_INC_STATS64((net)->mib.statname##_statistics, (field));\ }) /* per device counters are atomic_long_t */ -#define _DEVINCATOMIC(net, statname, modifier, idev, field)\ +#define _DEVINCATOMIC(net, statname, mod, idev, field) \ ({ \ struct inet6_dev *_idev = (idev); \ if (likely(_idev != NULL)) \ SNMP_INC_STATS_ATOMIC_LONG((_idev)->stats.statname##dev, (field)); \ - SNMP_INC_STATS##modifier((net)->mib.statname##_statistics, (field));\ + mod##SNMP_INC_STATS((net)->mib.statname##_statistics, (field));\ }) /* per device and per net counters are atomic_long_t */ @@ -147,40 +147,40 @@ extern int sysctl_mld_qrv;
[PATCH net-next 10/17] net: rename ICMP6_INC_STATS_BH()
Rename ICMP6_INC_STATS_BH() to __ICMP6_INC_STATS() Signed-off-by: Eric Dumazet--- include/net/ipv6.h | 2 +- net/dccp/ipv6.c | 8 net/ipv6/icmp.c | 10 +- net/ipv6/tcp_ipv6.c | 4 ++-- net/ipv6/udp.c | 4 ++-- net/sctp/ipv6.c | 2 +- 6 files changed, 15 insertions(+), 15 deletions(-) diff --git a/include/net/ipv6.h b/include/net/ipv6.h index e93e947d04ff..a620fc56e2f5 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -179,7 +179,7 @@ extern int sysctl_mld_qrv; _DEVUPD(net, ipv6, 64_BH, idev, field, val) #define ICMP6_INC_STATS(net, idev, field) \ _DEVINCATOMIC(net, icmpv6, , idev, field) -#define ICMP6_INC_STATS_BH(net, idev, field) \ +#define __ICMP6_INC_STATS(net, idev, field)\ _DEVINCATOMIC(net, icmpv6, _BH, idev, field) #define ICMP6MSGOUT_INC_STATS(net, idev, field)\ diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index e175b8fe1a87..323c6b595e31 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -80,8 +80,8 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (skb->len < offset + sizeof(*dh) || skb->len < offset + __dccp_basic_hdr_len(dh)) { - ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), - ICMP6_MIB_INERRORS); + __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), + ICMP6_MIB_INERRORS); return; } @@ -91,8 +91,8 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, inet6_iif(skb)); if (!sk) { - ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), - ICMP6_MIB_INERRORS); + __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), + ICMP6_MIB_INERRORS); return; } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 6b573ebe49de..823a1fc576e3 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -622,7 +622,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) np->dontfrag, _unused); if (err) { - ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS); + __ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS); ip6_flush_pending_frames(sk); } else { err = icmpv6_push_pending_frames(sk, , _hdr, @@ -674,7 +674,7 @@ void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info) return; out: - ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); + __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); } /* @@ -710,7 +710,7 @@ static int icmpv6_rcv(struct sk_buff *skb) skb_set_network_header(skb, nh); } - ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INMSGS); + __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS); saddr = _hdr(skb)->saddr; daddr = _hdr(skb)->daddr; @@ -812,9 +812,9 @@ static int icmpv6_rcv(struct sk_buff *skb) return 0; csum_error: - ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS); + __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS); discard_it: - ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INERRORS); + __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS); drop_no_count: kfree_skb(skb); return 0; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 52ca8fac7429..78c45c027acc 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -336,8 +336,8 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, skb->dev->ifindex); if (!sk) { - ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), - ICMP6_MIB_INERRORS); + __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), + ICMP6_MIB_INERRORS); return; } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 1243d22e2b1d..1ba5a74ac18f 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -521,8 +521,8 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source, inet6_iif(skb), udptable, skb); if (!sk) { - ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), - ICMP6_MIB_INERRORS); + __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), + ICMP6_MIB_INERRORS); return; } diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index ce46f1c7f133..0657d18a85bf 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -162,7 +162,7 @@ static void
[PATCH net-next 06/17] net: tcp: rename TCP_INC_STATS_BH
Rename TCP_INC_STATS_BH() to __TCP_INC_STATS() Signed-off-by: Eric Dumazet--- include/net/tcp.h| 2 +- net/ipv4/tcp.c | 2 +- net/ipv4/tcp_input.c | 8 net/ipv4/tcp_ipv4.c | 16 net/ipv4/tcp_minisocks.c | 4 ++-- net/ipv4/tcp_output.c| 4 ++-- net/ipv6/tcp_ipv6.c | 14 +++--- 7 files changed, 25 insertions(+), 25 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index cfe15f712164..939ebd5320a9 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -332,7 +332,7 @@ bool tcp_check_oom(struct sock *sk, int shift); extern struct proto tcp_prot; #define TCP_INC_STATS(net, field) SNMP_INC_STATS((net)->mib.tcp_statistics, field) -#define TCP_INC_STATS_BH(net, field) SNMP_INC_STATS_BH((net)->mib.tcp_statistics, field) +#define __TCP_INC_STATS(net, field) SNMP_INC_STATS_BH((net)->mib.tcp_statistics, field) #define TCP_DEC_STATS(net, field) SNMP_DEC_STATS((net)->mib.tcp_statistics, field) #define TCP_ADD_STATS(net, field, val) SNMP_ADD_STATS((net)->mib.tcp_statistics, field, val) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 55ef55ac9e38..96833433c2c3 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3091,7 +3091,7 @@ void tcp_done(struct sock *sk) struct request_sock *req = tcp_sk(sk)->fastopen_rsk; if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV) - TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS); + __TCP_INC_STATS(sock_net(sk), TCP_MIB_ATTEMPTFAILS); tcp_set_state(sk, TCP_CLOSE); tcp_clear_xmit_timers(sk); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 967520dbe0bf..dad8d93262ed 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5233,7 +5233,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, if (th->syn) { syn_challenge: if (syn_inerr) - TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); + __TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNCHALLENGE); tcp_send_challenge_ack(sk, skb); goto discard; @@ -5349,7 +5349,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, tcp_data_snd_check(sk); return; } else { /* Header too small */ - TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); + __TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); goto discard; } } else { @@ -5456,8 +5456,8 @@ step5: return; csum_error: - TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS); - TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); + __TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS); + __TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); discard: tcp_drop(sk, skb); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index ebd8f3b9e61b..378e92d41c6c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -697,8 +697,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, , arg.iov[0].iov_len); - TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); - TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS); + __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); + __TCP_INC_STATS(net, TCP_MIB_OUTRSTS); #ifdef CONFIG_TCP_MD5SIG out: @@ -779,7 +779,7 @@ static void tcp_v4_send_ack(struct net *net, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, , arg.iov[0].iov_len); - TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); + __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); } static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) @@ -1432,8 +1432,8 @@ discard: return 0; csum_err: - TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS); - TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); + __TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS); + __TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); goto discard; } EXPORT_SYMBOL(tcp_v4_do_rcv); @@ -1547,7 +1547,7 @@ int tcp_v4_rcv(struct sk_buff *skb) goto discard_it; /* Count it even if it's bad */ - TCP_INC_STATS_BH(net, TCP_MIB_INSEGS); + __TCP_INC_STATS(net, TCP_MIB_INSEGS); if (!pskb_may_pull(skb, sizeof(struct tcphdr))) goto discard_it; @@ -1679,9 +1679,9 @@ no_tcp_socket: if (tcp_checksum_complete(skb)) { csum_error: - TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS); + __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS); bad_packet: - TCP_INC_STATS_BH(net, TCP_MIB_INERRS); +
[PATCH net-next 02/17] dccp: rename DCCP_INC_STATS_BH()
Rename DCCP_INC_STATS_BH() to __DCCP_INC_STATS() Signed-off-by: Eric Dumazet--- net/dccp/dccp.h | 6 +++--- net/dccp/input.c | 2 +- net/dccp/ipv4.c | 8 net/dccp/ipv6.c | 8 net/dccp/minisocks.c | 2 +- net/dccp/options.c | 2 +- net/dccp/timer.c | 4 ++-- 7 files changed, 16 insertions(+), 16 deletions(-) diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index b0e28d24e1a7..a4c6e2fed91c 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -198,9 +198,9 @@ struct dccp_mib { }; DECLARE_SNMP_STAT(struct dccp_mib, dccp_statistics); -#define DCCP_INC_STATS(field) SNMP_INC_STATS(dccp_statistics, field) -#define DCCP_INC_STATS_BH(field)SNMP_INC_STATS_BH(dccp_statistics, field) -#define DCCP_DEC_STATS(field) SNMP_DEC_STATS(dccp_statistics, field) +#define DCCP_INC_STATS(field) SNMP_INC_STATS(dccp_statistics, field) +#define __DCCP_INC_STATS(field)SNMP_INC_STATS_BH(dccp_statistics, field) +#define DCCP_DEC_STATS(field) SNMP_DEC_STATS(dccp_statistics, field) /* * Checksumming routines diff --git a/net/dccp/input.c b/net/dccp/input.c index 3bd14e885396..2437ecc13b82 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -359,7 +359,7 @@ send_sync: goto discard; } - DCCP_INC_STATS_BH(DCCP_MIB_INERRS); + __DCCP_INC_STATS(DCCP_MIB_INERRS); discard: __kfree_skb(skb); return 0; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index f6d183f8f332..4b78067669d6 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -318,7 +318,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) case DCCP_REQUESTING: case DCCP_RESPOND: if (!sock_owned_by_user(sk)) { - DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); + __DCCP_INC_STATS(DCCP_MIB_ATTEMPTFAILS); sk->sk_err = err; sk->sk_error_report(sk); @@ -533,8 +533,8 @@ static void dccp_v4_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb) bh_unlock_sock(ctl_sk); if (net_xmit_eval(err) == 0) { - DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); - DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); + __DCCP_INC_STATS(DCCP_MIB_OUTSEGS); + __DCCP_INC_STATS(DCCP_MIB_OUTRSTS); } out: dst_release(dst); @@ -637,7 +637,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) drop_and_free: reqsk_free(req); drop: - DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); + __DCCP_INC_STATS(DCCP_MIB_ATTEMPTFAILS); return -1; } EXPORT_SYMBOL_GPL(dccp_v4_conn_request); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 8ceb3cebcad4..e175b8fe1a87 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -156,7 +156,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, case DCCP_RESPOND: /* Cannot happen. It can, it SYNs are crossed. --ANK */ if (!sock_owned_by_user(sk)) { - DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); + __DCCP_INC_STATS(DCCP_MIB_ATTEMPTFAILS); sk->sk_err = err; /* * Wake people up to see the error @@ -277,8 +277,8 @@ static void dccp_v6_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb) if (!IS_ERR(dst)) { skb_dst_set(skb, dst); ip6_xmit(ctl_sk, skb, , NULL, 0); - DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); - DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); + __DCCP_INC_STATS(DCCP_MIB_OUTSEGS); + __DCCP_INC_STATS(DCCP_MIB_OUTRSTS); return; } @@ -378,7 +378,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) drop_and_free: reqsk_free(req); drop: - DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); + __DCCP_INC_STATS(DCCP_MIB_ATTEMPTFAILS); return -1; } diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 1994f8af646b..53eddf99e4f6 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -127,7 +127,7 @@ struct sock *dccp_create_openreq_child(const struct sock *sk, } dccp_init_xmit_timers(newsk); - DCCP_INC_STATS_BH(DCCP_MIB_PASSIVEOPENS); + __DCCP_INC_STATS(DCCP_MIB_PASSIVEOPENS); } return newsk; } diff --git a/net/dccp/options.c b/net/dccp/options.c index 9bce31886bda..b82b7ee9a1d2 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -253,7 +253,7 @@ out_nonsensical_length: return 0; out_invalid_option: - DCCP_INC_STATS_BH(DCCP_MIB_INVALIDOPT); + __DCCP_INC_STATS(DCCP_MIB_INVALIDOPT); rc = DCCP_RESET_CODE_OPTION_ERROR; out_featneg_failed: DCCP_WARN("DCCP(%p): Option %d (len=%d) error=%u\n", sk, opt, len, rc); diff
[PATCH net-next 07/17] net: icmp: rename ICMPMSGIN_INC_STATS_BH()
Remove misleading _BH suffix. Signed-off-by: Eric Dumazet--- include/net/icmp.h | 2 +- net/ipv4/icmp.c| 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/icmp.h b/include/net/icmp.h index 5a60ce819078..25edb740c648 100644 --- a/include/net/icmp.h +++ b/include/net/icmp.h @@ -32,7 +32,7 @@ extern const struct icmp_err icmp_err_convert[]; #define ICMP_INC_STATS(net, field) SNMP_INC_STATS((net)->mib.icmp_statistics, field) #define __ICMP_INC_STATS(net, field) SNMP_INC_STATS_BH((net)->mib.icmp_statistics, field) #define ICMPMSGOUT_INC_STATS(net, field) SNMP_INC_STATS_ATOMIC_LONG((net)->mib.icmpmsg_statistics, field+256) -#define ICMPMSGIN_INC_STATS_BH(net, field) SNMP_INC_STATS_ATOMIC_LONG((net)->mib.icmpmsg_statistics, field) +#define ICMPMSGIN_INC_STATS(net, field) SNMP_INC_STATS_ATOMIC_LONG((net)->mib.icmpmsg_statistics, field) struct dst_entry; struct net_proto_family; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 995fef9c5099..38abe70e595f 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -1006,7 +1006,7 @@ int icmp_rcv(struct sk_buff *skb) icmph = icmp_hdr(skb); - ICMPMSGIN_INC_STATS_BH(net, icmph->type); + ICMPMSGIN_INC_STATS(net, icmph->type); /* * 18 is the highest 'known' ICMP type. Anything else is a mystery * -- 2.8.0.rc3.226.g39d4020
[PATCH net-next 16/17] ipv6: kill ICMP6MSGIN_INC_STATS_BH()
IPv6 ICMP stats are atomics anyway. Signed-off-by: Eric Dumazet--- include/net/ipv6.h | 4 +--- net/ipv6/icmp.c| 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 9f3b53f2819b..64ce3670d40a 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -184,9 +184,7 @@ extern int sysctl_mld_qrv; #define ICMP6MSGOUT_INC_STATS(net, idev, field)\ _DEVINC_ATOMIC_ATOMIC(net, icmpv6msg, idev, field +256) -#define ICMP6MSGOUT_INC_STATS_BH(net, idev, field) \ - _DEVINC_ATOMIC_ATOMIC(net, icmpv6msg, idev, field +256) -#define ICMP6MSGIN_INC_STATS_BH(net, idev, field) \ +#define ICMP6MSGIN_INC_STATS(net, idev, field) \ _DEVINC_ATOMIC_ATOMIC(net, icmpv6msg, idev, field) struct ip6_ra_chain { diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 823a1fc576e3..23b9a4cc418e 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -728,7 +728,7 @@ static int icmpv6_rcv(struct sk_buff *skb) type = hdr->icmp6_type; - ICMP6MSGIN_INC_STATS_BH(dev_net(dev), idev, type); + ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type); switch (type) { case ICMPV6_ECHO_REQUEST: -- 2.8.0.rc3.226.g39d4020
[PATCH net-next 14/17] ipv6: rename IP6_INC_STATS_BH()
Rename IP6_INC_STATS_BH() to __IP6_INC_STATS() and IP6_ADD_STATS_BH() to __IP6_ADD_STATS() Signed-off-by: Eric Dumazet--- include/net/ipv6.h | 4 +-- net/bridge/br_netfilter_ipv6.c | 10 +++ net/ipv6/exthdrs.c | 66 +- net/ipv6/ip6_input.c | 28 +- net/ipv6/ip6_output.c | 34 +++--- net/ipv6/ip6mr.c | 8 ++--- net/ipv6/reassembly.c | 32 ++-- 7 files changed, 91 insertions(+), 91 deletions(-) diff --git a/include/net/ipv6.h b/include/net/ipv6.h index a620fc56e2f5..aba8760dd108 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -167,11 +167,11 @@ extern int sysctl_mld_qrv; #define IP6_INC_STATS(net, idev,field) \ _DEVINC(net, ipv6, 64, idev, field) -#define IP6_INC_STATS_BH(net, idev,field) \ +#define __IP6_INC_STATS(net, idev,field) \ _DEVINC(net, ipv6, 64_BH, idev, field) #define IP6_ADD_STATS(net, idev,field,val) \ _DEVADD(net, ipv6, 64, idev, field, val) -#define IP6_ADD_STATS_BH(net, idev,field,val) \ +#define __IP6_ADD_STATS(net, idev,field,val) \ _DEVADD(net, ipv6, 64_BH, idev, field, val) #define IP6_UPD_PO_STATS(net, idev,field,val) \ _DEVUPD(net, ipv6, 64, idev, field, val) diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c index d61f56efc8dc..5e59a8457e7b 100644 --- a/net/bridge/br_netfilter_ipv6.c +++ b/net/bridge/br_netfilter_ipv6.c @@ -122,13 +122,13 @@ int br_validate_ipv6(struct net *net, struct sk_buff *skb) if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) { if (pkt_len + ip6h_len > skb->len) { - IP6_INC_STATS_BH(net, idev, -IPSTATS_MIB_INTRUNCATEDPKTS); + __IP6_INC_STATS(net, idev, + IPSTATS_MIB_INTRUNCATEDPKTS); goto drop; } if (pskb_trim_rcsum(skb, pkt_len + ip6h_len)) { - IP6_INC_STATS_BH(net, idev, -IPSTATS_MIB_INDISCARDS); + __IP6_INC_STATS(net, idev, + IPSTATS_MIB_INDISCARDS); goto drop; } } @@ -142,7 +142,7 @@ int br_validate_ipv6(struct net *net, struct sk_buff *skb) return 0; inhdr_error: - IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS); + __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); drop: return -1; } diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index ea7c4d64a00a..8de5dd7aaa05 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -258,8 +258,8 @@ static int ipv6_destopt_rcv(struct sk_buff *skb) if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) || !pskb_may_pull(skb, (skb_transport_offset(skb) + ((skb_transport_header(skb)[1] + 1) << 3 { - IP6_INC_STATS_BH(dev_net(dst->dev), ip6_dst_idev(dst), -IPSTATS_MIB_INHDRERRORS); + __IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), + IPSTATS_MIB_INHDRERRORS); kfree_skb(skb); return -1; } @@ -280,8 +280,8 @@ static int ipv6_destopt_rcv(struct sk_buff *skb) return 1; } - IP6_INC_STATS_BH(dev_net(dst->dev), -ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); + __IP6_INC_STATS(dev_net(dst->dev), + ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); return -1; } @@ -309,8 +309,8 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb) if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) || !pskb_may_pull(skb, (skb_transport_offset(skb) + ((skb_transport_header(skb)[1] + 1) << 3 { - IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), -IPSTATS_MIB_INHDRERRORS); + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_INHDRERRORS); kfree_skb(skb); return -1; } @@ -319,8 +319,8 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb) if (ipv6_addr_is_multicast(_hdr(skb)->daddr) || skb->pkt_type != PACKET_HOST) { - IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), -IPSTATS_MIB_INADDRERRORS); + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); return -1; } @@ -334,8 +334,8 @@ looped_back: * processed by own */
[PATCH net-next 15/17] ipv6: rename IP6_UPD_PO_STATS_BH()
Rename IP6_UPD_PO_STATS_BH() to __IP6_UPD_PO_STATS() Signed-off-by: Eric Dumazet--- include/net/ipv6.h | 2 +- net/ipv6/ip6_input.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/net/ipv6.h b/include/net/ipv6.h index aba8760dd108..9f3b53f2819b 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -175,7 +175,7 @@ extern int sysctl_mld_qrv; _DEVADD(net, ipv6, 64_BH, idev, field, val) #define IP6_UPD_PO_STATS(net, idev,field,val) \ _DEVUPD(net, ipv6, 64, idev, field, val) -#define IP6_UPD_PO_STATS_BH(net, idev,field,val) \ +#define __IP6_UPD_PO_STATS(net, idev,field,val) \ _DEVUPD(net, ipv6, 64_BH, idev, field, val) #define ICMP6_INC_STATS(net, idev, field) \ _DEVINCATOMIC(net, icmpv6, , idev, field) diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 218bb906c620..6ed56012005d 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -78,7 +78,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt idev = __in6_dev_get(skb->dev); - IP6_UPD_PO_STATS_BH(net, idev, IPSTATS_MIB_IN, skb->len); + __IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_IN, skb->len); if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL || !idev || unlikely(idev->cnf.disable_ipv6)) { @@ -297,7 +297,7 @@ int ip6_mc_input(struct sk_buff *skb) const struct ipv6hdr *hdr; bool deliver; - IP6_UPD_PO_STATS_BH(dev_net(skb_dst(skb)->dev), + __IP6_UPD_PO_STATS(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INMCAST, skb->len); -- 2.8.0.rc3.226.g39d4020
[PATCH net-next 09/17] net: rename IP_INC_STATS_BH()
Rename IP_INC_STATS_BH() to __IP_INC_STATS(), to better express this is used in non preemptible context. Signed-off-by: Eric Dumazet--- include/net/ip.h| 2 +- net/bridge/br_netfilter_hooks.c | 6 +++--- net/dccp/ipv4.c | 2 +- net/ipv4/inet_connection_sock.c | 4 ++-- net/ipv4/ip_forward.c | 4 ++-- net/ipv4/ip_fragment.c | 14 +++--- net/ipv4/ip_input.c | 20 ++-- net/ipv4/route.c| 6 +++--- 8 files changed, 29 insertions(+), 29 deletions(-) diff --git a/include/net/ip.h b/include/net/ip.h index ae0e85d018e8..0be0af3017ba 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -187,7 +187,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, unsigned int len); #define IP_INC_STATS(net, field) SNMP_INC_STATS64((net)->mib.ip_statistics, field) -#define IP_INC_STATS_BH(net, field) SNMP_INC_STATS64_BH((net)->mib.ip_statistics, field) +#define __IP_INC_STATS(net, field) SNMP_INC_STATS64_BH((net)->mib.ip_statistics, field) #define IP_ADD_STATS(net, field, val) SNMP_ADD_STATS64((net)->mib.ip_statistics, field, val) #define IP_ADD_STATS_BH(net, field, val) SNMP_ADD_STATS64_BH((net)->mib.ip_statistics, field, val) #define IP_UPD_PO_STATS(net, field, val) SNMP_UPD_PO_STATS64((net)->mib.ip_statistics, field, val) diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 44114a94c576..2d25979273a6 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -217,13 +217,13 @@ static int br_validate_ipv4(struct net *net, struct sk_buff *skb) len = ntohs(iph->tot_len); if (skb->len < len) { - IP_INC_STATS_BH(net, IPSTATS_MIB_INTRUNCATEDPKTS); + __IP_INC_STATS(net, IPSTATS_MIB_INTRUNCATEDPKTS); goto drop; } else if (len < (iph->ihl*4)) goto inhdr_error; if (pskb_trim_rcsum(skb, len)) { - IP_INC_STATS_BH(net, IPSTATS_MIB_INDISCARDS); + __IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS); goto drop; } @@ -236,7 +236,7 @@ static int br_validate_ipv4(struct net *net, struct sk_buff *skb) return 0; inhdr_error: - IP_INC_STATS_BH(net, IPSTATS_MIB_INHDRERRORS); + __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS); drop: return -1; } diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 14e30584e59d..a9c75e79ba99 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -462,7 +462,7 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk, security_skb_classify_flow(skb, flowi4_to_flowi()); rt = ip_route_output_flow(net, , sk); if (IS_ERR(rt)) { - IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); + __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); return NULL; } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index ab69da2d2a77..7ce112aa3a7b 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -427,7 +427,7 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk, route_err: ip_rt_put(rt); no_route: - IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); + __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); return NULL; } EXPORT_SYMBOL_GPL(inet_csk_route_req); @@ -466,7 +466,7 @@ route_err: ip_rt_put(rt); no_route: rcu_read_unlock(); - IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); + __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); return NULL; } EXPORT_SYMBOL_GPL(inet_csk_route_child_sock); diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index af18f1e4889e..42fbd59b0ba8 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -65,7 +65,7 @@ static int ip_forward_finish(struct net *net, struct sock *sk, struct sk_buff *s { struct ip_options *opt = &(IPCB(skb)->opt); - IP_INC_STATS_BH(net, IPSTATS_MIB_OUTFORWDATAGRAMS); + __IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS); IP_ADD_STATS_BH(net, IPSTATS_MIB_OUTOCTETS, skb->len); if (unlikely(opt->optlen)) @@ -157,7 +157,7 @@ sr_failed: too_many_hops: /* Tell the sender its packet died... */ - IP_INC_STATS_BH(net, IPSTATS_MIB_INHDRERRORS); + __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS); icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0); drop: kfree_skb(skb); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index efbd47d1a531..bbe7f72db9c1 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -204,14 +204,14 @@ static void ip_expire(unsigned long arg) goto out; ipq_kill(qp); - IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); + __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); if
[PATCH net-next 12/17] net: rename IP_UPD_PO_STATS_BH()
Rename IP_UPD_PO_STATS_BH() to __IP_UPD_PO_STATS() Signed-off-by: Eric Dumazet--- include/net/ip.h| 2 +- net/ipv4/ip_input.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/net/ip.h b/include/net/ip.h index 0df4809bc68a..55f5de50a564 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -191,7 +191,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, #define IP_ADD_STATS(net, field, val) SNMP_ADD_STATS64((net)->mib.ip_statistics, field, val) #define __IP_ADD_STATS(net, field, val) SNMP_ADD_STATS64_BH((net)->mib.ip_statistics, field, val) #define IP_UPD_PO_STATS(net, field, val) SNMP_UPD_PO_STATS64((net)->mib.ip_statistics, field, val) -#define IP_UPD_PO_STATS_BH(net, field, val) SNMP_UPD_PO_STATS64_BH((net)->mib.ip_statistics, field, val) +#define __IP_UPD_PO_STATS(net, field, val) SNMP_UPD_PO_STATS64_BH((net)->mib.ip_statistics, field, val) #define NET_INC_STATS(net, field) SNMP_INC_STATS((net)->mib.net_statistics, field) #define NET_INC_STATS_BH(net, field) SNMP_INC_STATS_BH((net)->mib.net_statistics, field) #define NET_ADD_STATS(net, field, adnd) SNMP_ADD_STATS((net)->mib.net_statistics, field, adnd) diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 11f34e421270..8fda63d78435 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -358,9 +358,9 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) rt = skb_rtable(skb); if (rt->rt_type == RTN_MULTICAST) { - IP_UPD_PO_STATS_BH(net, IPSTATS_MIB_INMCAST, skb->len); + __IP_UPD_PO_STATS(net, IPSTATS_MIB_INMCAST, skb->len); } else if (rt->rt_type == RTN_BROADCAST) { - IP_UPD_PO_STATS_BH(net, IPSTATS_MIB_INBCAST, skb->len); + __IP_UPD_PO_STATS(net, IPSTATS_MIB_INBCAST, skb->len); } else if (skb->pkt_type == PACKET_BROADCAST || skb->pkt_type == PACKET_MULTICAST) { struct in_device *in_dev = __in_dev_get_rcu(skb->dev); @@ -409,7 +409,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, net = dev_net(dev); - IP_UPD_PO_STATS_BH(net, IPSTATS_MIB_IN, skb->len); + __IP_UPD_PO_STATS(net, IPSTATS_MIB_IN, skb->len); skb = skb_share_check(skb, GFP_ATOMIC); if (!skb) { -- 2.8.0.rc3.226.g39d4020
[PATCH net-next 08/17] net: sctp: rename SCTP_INC_STATS_BH()
Rename SCTP_INC_STATS_BH() to __SCTP_INC_STATS() Signed-off-by: Eric Dumazet--- include/net/sctp/sctp.h | 2 +- net/sctp/input.c| 12 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 5a2c4c3307a7..5607c009f738 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -206,7 +206,7 @@ extern int sysctl_sctp_wmem[3]; /* SCTP SNMP MIB stats handlers */ #define SCTP_INC_STATS(net, field) SNMP_INC_STATS((net)->sctp.sctp_statistics, field) -#define SCTP_INC_STATS_BH(net, field) SNMP_INC_STATS_BH((net)->sctp.sctp_statistics, field) +#define __SCTP_INC_STATS(net, field) SNMP_INC_STATS_BH((net)->sctp.sctp_statistics, field) #define SCTP_DEC_STATS(net, field) SNMP_DEC_STATS((net)->sctp.sctp_statistics, field) /* sctp mib definitions */ diff --git a/net/sctp/input.c b/net/sctp/input.c index f8eca792dbcf..12332fc3eb44 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -84,7 +84,7 @@ static inline int sctp_rcv_checksum(struct net *net, struct sk_buff *skb) if (val != cmp) { /* CRC failure, dump it. */ - SCTP_INC_STATS_BH(net, SCTP_MIB_CHECKSUMERRORS); + __SCTP_INC_STATS(net, SCTP_MIB_CHECKSUMERRORS); return -1; } return 0; @@ -122,7 +122,7 @@ int sctp_rcv(struct sk_buff *skb) if (skb->pkt_type != PACKET_HOST) goto discard_it; - SCTP_INC_STATS_BH(net, SCTP_MIB_INSCTPPACKS); + __SCTP_INC_STATS(net, SCTP_MIB_INSCTPPACKS); if (skb_linearize(skb)) goto discard_it; @@ -208,7 +208,7 @@ int sctp_rcv(struct sk_buff *skb) */ if (!asoc) { if (sctp_rcv_ootb(skb)) { - SCTP_INC_STATS_BH(net, SCTP_MIB_OUTOFBLUES); + __SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES); goto discard_release; } } @@ -264,9 +264,9 @@ int sctp_rcv(struct sk_buff *skb) skb = NULL; /* sctp_chunk_free already freed the skb */ goto discard_release; } - SCTP_INC_STATS_BH(net, SCTP_MIB_IN_PKT_BACKLOG); + __SCTP_INC_STATS(net, SCTP_MIB_IN_PKT_BACKLOG); } else { - SCTP_INC_STATS_BH(net, SCTP_MIB_IN_PKT_SOFTIRQ); + __SCTP_INC_STATS(net, SCTP_MIB_IN_PKT_SOFTIRQ); sctp_inq_push(>rcvr->inqueue, chunk); } @@ -281,7 +281,7 @@ int sctp_rcv(struct sk_buff *skb) return 0; discard_it: - SCTP_INC_STATS_BH(net, SCTP_MIB_IN_PKT_DISCARDS); + __SCTP_INC_STATS(net, SCTP_MIB_IN_PKT_DISCARDS); kfree_skb(skb); return 0; -- 2.8.0.rc3.226.g39d4020
[PATCH net-next 01/17] net: snmp: kill various STATS_USER() helpers
In the old days (before linux-3.0), SNMP counters were duplicated, one for user context, and one for BH context. After commit 8f0ea0fe3a03 ("snmp: reduce percpu needs by 50%") we have a single copy, and what really matters is preemption being enabled or disabled, since we use this_cpu_inc() or __this_cpu_inc() respectively. We therefore kill SNMP_INC_STATS_USER(), SNMP_ADD_STATS_USER(), NET_INC_STATS_USER(), NET_ADD_STATS_USER(), SCTP_INC_STATS_USER(), SNMP_INC_STATS64_USER(), SNMP_ADD_STATS64_USER(), TCP_ADD_STATS_USER(), UDP_INC_STATS_USER(), UDP6_INC_STATS_USER(), and XFRM_INC_STATS_USER() Following patches will rename __BH helpers to make clear their usage is not tied to BH being disabled. Signed-off-by: Eric Dumazet--- include/net/ip.h| 2 -- include/net/sctp/sctp.h | 1 - include/net/snmp.h | 22 +- include/net/tcp.h | 9 - include/net/udp.h | 14 +++--- include/net/xfrm.h | 2 -- net/ipv4/tcp.c | 12 ++-- net/ipv4/udp.c | 24 net/ipv6/udp.c | 49 - net/sctp/chunk.c| 2 +- 10 files changed, 59 insertions(+), 78 deletions(-) diff --git a/include/net/ip.h b/include/net/ip.h index 93725e546758..ae0e85d018e8 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -194,10 +194,8 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, #define IP_UPD_PO_STATS_BH(net, field, val) SNMP_UPD_PO_STATS64_BH((net)->mib.ip_statistics, field, val) #define NET_INC_STATS(net, field) SNMP_INC_STATS((net)->mib.net_statistics, field) #define NET_INC_STATS_BH(net, field) SNMP_INC_STATS_BH((net)->mib.net_statistics, field) -#define NET_INC_STATS_USER(net, field) SNMP_INC_STATS_USER((net)->mib.net_statistics, field) #define NET_ADD_STATS(net, field, adnd) SNMP_ADD_STATS((net)->mib.net_statistics, field, adnd) #define NET_ADD_STATS_BH(net, field, adnd) SNMP_ADD_STATS_BH((net)->mib.net_statistics, field, adnd) -#define NET_ADD_STATS_USER(net, field, adnd) SNMP_ADD_STATS_USER((net)->mib.net_statistics, field, adnd) u64 snmp_get_cpu_field(void __percpu *mib, int cpu, int offct); unsigned long snmp_fold_field(void __percpu *mib, int offt); diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 3f1c0ff7d4b6..5a2c4c3307a7 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -207,7 +207,6 @@ extern int sysctl_sctp_wmem[3]; /* SCTP SNMP MIB stats handlers */ #define SCTP_INC_STATS(net, field) SNMP_INC_STATS((net)->sctp.sctp_statistics, field) #define SCTP_INC_STATS_BH(net, field) SNMP_INC_STATS_BH((net)->sctp.sctp_statistics, field) -#define SCTP_INC_STATS_USER(net, field) SNMP_INC_STATS_USER((net)->sctp.sctp_statistics, field) #define SCTP_DEC_STATS(net, field) SNMP_DEC_STATS((net)->sctp.sctp_statistics, field) /* sctp mib definitions */ diff --git a/include/net/snmp.h b/include/net/snmp.h index 35512ac6dcfb..56239fc05c51 100644 --- a/include/net/snmp.h +++ b/include/net/snmp.h @@ -126,9 +126,6 @@ struct linux_xfrm_mib { #define SNMP_INC_STATS_BH(mib, field) \ __this_cpu_inc(mib->mibs[field]) -#define SNMP_INC_STATS_USER(mib, field)\ - this_cpu_inc(mib->mibs[field]) - #define SNMP_INC_STATS_ATOMIC_LONG(mib, field) \ atomic_long_inc(>mibs[field]) @@ -141,9 +138,6 @@ struct linux_xfrm_mib { #define SNMP_ADD_STATS_BH(mib, field, addend) \ __this_cpu_add(mib->mibs[field], addend) -#define SNMP_ADD_STATS_USER(mib, field, addend)\ - this_cpu_add(mib->mibs[field], addend) - #define SNMP_ADD_STATS(mib, field, addend) \ this_cpu_add(mib->mibs[field], addend) #define SNMP_UPD_PO_STATS(mib, basefield, addend) \ @@ -170,18 +164,14 @@ struct linux_xfrm_mib { u64_stats_update_end(>syncp); \ } while (0) -#define SNMP_ADD_STATS64_USER(mib, field, addend) \ +#define SNMP_ADD_STATS64(mib, field, addend) \ do {\ - local_bh_disable(); \ + preempt_disable(); \ SNMP_ADD_STATS64_BH(mib, field, addend);\ - local_bh_enable(); \ + preempt_enable(); \ } while (0) -#define SNMP_ADD_STATS64(mib, field, addend) \ - SNMP_ADD_STATS64_USER(mib, field, addend) - #define SNMP_INC_STATS64_BH(mib, field) SNMP_ADD_STATS64_BH(mib, field, 1) -#define SNMP_INC_STATS64_USER(mib, field) SNMP_ADD_STATS64_USER(mib, field, 1) #define
[PATCH net-next 04/17] net: udp: rename UDP_INC_STATS_BH()
Rename UDP_INC_STATS_BH() to __UDP_INC_STATS(), and UDP6_INC_STATS_BH() to __UDP6_INC_STATS() Signed-off-by: Eric Dumazet--- include/net/udp.h | 12 ++-- net/ipv4/udp.c| 46 +++--- net/ipv6/udp.c| 38 +++--- net/rxrpc/ar-input.c | 4 ++-- net/sunrpc/xprtsock.c | 4 ++-- 5 files changed, 52 insertions(+), 52 deletions(-) diff --git a/include/net/udp.h b/include/net/udp.h index 2f37f689d85a..bf6a7c29cf6a 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -292,11 +292,11 @@ struct sock *udp6_lib_lookup_skb(struct sk_buff *skb, #define UDP_INC_STATS(net, field, is_udplite)do { \ if (is_udplite) SNMP_INC_STATS((net)->mib.udplite_statistics, field); \ elseSNMP_INC_STATS((net)->mib.udp_statistics, field); } while(0) -#define UDP_INC_STATS_BH(net, field, is_udplite) do { \ +#define __UDP_INC_STATS(net, field, is_udplite) do { \ if (is_udplite) SNMP_INC_STATS_BH((net)->mib.udplite_statistics, field); \ elseSNMP_INC_STATS_BH((net)->mib.udp_statistics, field); } while(0) -#define UDP6_INC_STATS_BH(net, field, is_udplite) do { \ +#define __UDP6_INC_STATS(net, field, is_udplite) do { \ if (is_udplite) SNMP_INC_STATS_BH((net)->mib.udplite_stats_in6, field);\ elseSNMP_INC_STATS_BH((net)->mib.udp_stats_in6, field); \ } while(0) @@ -306,15 +306,15 @@ struct sock *udp6_lib_lookup_skb(struct sk_buff *skb, } while(0) #if IS_ENABLED(CONFIG_IPV6) -#define UDPX_INC_STATS_BH(sk, field) \ +#define __UDPX_INC_STATS(sk, field)\ do { \ if ((sk)->sk_family == AF_INET) \ - UDP_INC_STATS_BH(sock_net(sk), field, 0); \ + __UDP_INC_STATS(sock_net(sk), field, 0);\ else\ - UDP6_INC_STATS_BH(sock_net(sk), field, 0); \ + __UDP6_INC_STATS(sock_net(sk), field, 0); \ } while (0) #else -#define UDPX_INC_STATS_BH(sk, field) UDP_INC_STATS_BH(sock_net(sk), field, 0) +#define __UDPX_INC_STATS(sk, field) __UDP_INC_STATS(sock_net(sk), field, 0) #endif /* /proc */ diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 6b004b838966..093284c5c03b 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1242,10 +1242,10 @@ static unsigned int first_packet_length(struct sock *sk) spin_lock_bh(>lock); while ((skb = skb_peek(rcvq)) != NULL && udp_lib_checksum_complete(skb)) { - UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_CSUMERRORS, -IS_UDPLITE(sk)); - UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, -IS_UDPLITE(sk)); + __UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, + IS_UDPLITE(sk)); + __UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, + IS_UDPLITE(sk)); atomic_inc(>sk_drops); __skb_unlink(skb, rcvq); __skb_queue_tail(_kill, skb); @@ -1514,9 +1514,9 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) /* Note that an ENOMEM error is charged twice */ if (rc == -ENOMEM) - UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS, -is_udplite); - UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); + __UDP_INC_STATS(sock_net(sk), UDP_MIB_RCVBUFERRORS, + is_udplite); + __UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); kfree_skb(skb); trace_udp_fail_queue_rcv_skb(rc, sk); return -1; @@ -1580,9 +1580,9 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) ret = encap_rcv(sk, skb); if (ret <= 0) { - UDP_INC_STATS_BH(sock_net(sk), -UDP_MIB_INDATAGRAMS, -is_udplite); + __UDP_INC_STATS(sock_net(sk), + UDP_MIB_INDATAGRAMS, + is_udplite); return -ret; } } @@ -1633,8 +1633,8 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) udp_csum_pull_header(skb); if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) { -
[PATCH net-next 05/17] net: xfrm: kill XFRM_INC_STATS_BH()
Not used anymore. Signed-off-by: Eric Dumazet--- include/net/xfrm.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index dab9e1b82963..adfebd6f243c 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -45,10 +45,8 @@ #ifdef CONFIG_XFRM_STATISTICS #define XFRM_INC_STATS(net, field) SNMP_INC_STATS((net)->mib.xfrm_statistics, field) -#define XFRM_INC_STATS_BH(net, field) SNMP_INC_STATS_BH((net)->mib.xfrm_statistics, field) #else #define XFRM_INC_STATS(net, field) ((void)(net)) -#define XFRM_INC_STATS_BH(net, field) ((void)(net)) #endif -- 2.8.0.rc3.226.g39d4020
[PATCH net-next 03/17] net: rename ICMP_INC_STATS_BH()
Rename ICMP_INC_STATS_BH() to __ICMP_INC_STATS() Signed-off-by: Eric Dumazet--- include/net/icmp.h | 2 +- net/dccp/ipv4.c | 4 ++-- net/ipv4/icmp.c | 16 net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/udp.c | 2 +- net/sctp/input.c| 2 +- 6 files changed, 14 insertions(+), 14 deletions(-) diff --git a/include/net/icmp.h b/include/net/icmp.h index 970028e13382..5a60ce819078 100644 --- a/include/net/icmp.h +++ b/include/net/icmp.h @@ -30,7 +30,7 @@ struct icmp_err { extern const struct icmp_err icmp_err_convert[]; #define ICMP_INC_STATS(net, field) SNMP_INC_STATS((net)->mib.icmp_statistics, field) -#define ICMP_INC_STATS_BH(net, field) SNMP_INC_STATS_BH((net)->mib.icmp_statistics, field) +#define __ICMP_INC_STATS(net, field) SNMP_INC_STATS_BH((net)->mib.icmp_statistics, field) #define ICMPMSGOUT_INC_STATS(net, field) SNMP_INC_STATS_ATOMIC_LONG((net)->mib.icmpmsg_statistics, field+256) #define ICMPMSGIN_INC_STATS_BH(net, field) SNMP_INC_STATS_ATOMIC_LONG((net)->mib.icmpmsg_statistics, field) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 4b78067669d6..14e30584e59d 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -247,7 +247,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) if (skb->len < offset + sizeof(*dh) || skb->len < offset + __dccp_basic_hdr_len(dh)) { - ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); + __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); return; } @@ -256,7 +256,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) iph->saddr, ntohs(dh->dccph_sport), inet_iif(skb)); if (!sk) { - ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); + __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); return; } diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 6333489771ed..995fef9c5099 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -363,7 +363,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param, icmp_param->data_len+icmp_param->head_len, icmp_param->head_len, ipc, rt, MSG_DONTWAIT) < 0) { - ICMP_INC_STATS_BH(sock_net(sk), ICMP_MIB_OUTERRORS); + __ICMP_INC_STATS(sock_net(sk), ICMP_MIB_OUTERRORS); ip_flush_pending_frames(sk); } else if ((skb = skb_peek(>sk_write_queue)) != NULL) { struct icmphdr *icmph = icmp_hdr(skb); @@ -744,7 +744,7 @@ static void icmp_socket_deliver(struct sk_buff *skb, u32 info) * avoid additional coding at protocol handlers. */ if (!pskb_may_pull(skb, iph->ihl * 4 + 8)) { - ICMP_INC_STATS_BH(dev_net(skb->dev), ICMP_MIB_INERRORS); + __ICMP_INC_STATS(dev_net(skb->dev), ICMP_MIB_INERRORS); return; } @@ -865,7 +865,7 @@ static bool icmp_unreach(struct sk_buff *skb) out: return true; out_err: - ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); + __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); return false; } @@ -877,7 +877,7 @@ out_err: static bool icmp_redirect(struct sk_buff *skb) { if (skb->len < sizeof(struct iphdr)) { - ICMP_INC_STATS_BH(dev_net(skb->dev), ICMP_MIB_INERRORS); + __ICMP_INC_STATS(dev_net(skb->dev), ICMP_MIB_INERRORS); return false; } @@ -956,7 +956,7 @@ static bool icmp_timestamp(struct sk_buff *skb) return true; out_err: - ICMP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ICMP_MIB_INERRORS); + __ICMP_INC_STATS(dev_net(skb_dst(skb)->dev), ICMP_MIB_INERRORS); return false; } @@ -996,7 +996,7 @@ int icmp_rcv(struct sk_buff *skb) skb_set_network_header(skb, nh); } - ICMP_INC_STATS_BH(net, ICMP_MIB_INMSGS); + __ICMP_INC_STATS(net, ICMP_MIB_INMSGS); if (skb_checksum_simple_validate(skb)) goto csum_error; @@ -1052,9 +1052,9 @@ drop: kfree_skb(skb); return 0; csum_error: - ICMP_INC_STATS_BH(net, ICMP_MIB_CSUMERRORS); + __ICMP_INC_STATS(net, ICMP_MIB_CSUMERRORS); error: - ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); + __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); goto drop; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index d2a5763e5abc..ebd8f3b9e61b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -372,7 +372,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) th->dest, iph->saddr, ntohs(th->source), inet_iif(icmp_skb)); if (!sk) { - ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); + __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); return; } if (sk->sk_state
[PATCH net-next 00/17] net: snmp: update SNMP methods
In the old days (before linux-3.0), SNMP counters were duplicated, one set for user context, and anther one for BH context. After commit 8f0ea0fe3a03 ("snmp: reduce percpu needs by 50%") we have a single copy, and what really matters is preemption being enabled or disabled, since we use this_cpu_inc() or __this_cpu_inc() respectively. This patch series kills the obsolete STATS_USER() helpers, and rename all XXX_BH() helpers to __XXX() ones, to more closely match conventions used to update per cpu variables. This is probably going to hurt maintainers job for a while, since cherry-picks will not be clean, but this had to be cleaned at one point. I am so sorry guys. Eric Dumazet (17): net: snmp: kill various STATS_USER() helpers dccp: rename DCCP_INC_STATS_BH() net: rename ICMP_INC_STATS_BH() net: udp: rename UDP_INC_STATS_BH() net: xfrm: kill XFRM_INC_STATS_BH() net: tcp: rename TCP_INC_STATS_BH net: icmp: rename ICMPMSGIN_INC_STATS_BH() net: sctp: rename SCTP_INC_STATS_BH() net: rename IP_INC_STATS_BH() net: rename ICMP6_INC_STATS_BH() net: rename IP_ADD_STATS_BH() net: rename IP_UPD_PO_STATS_BH() net: rename NET_{ADD|INC}_STATS_BH() ipv6: rename IP6_INC_STATS_BH() ipv6: rename IP6_UPD_PO_STATS_BH() ipv6: kill ICMP6MSGIN_INC_STATS_BH() net: snmp: kill STATS_BH macros include/net/icmp.h | 4 +- include/net/ip.h| 12 ++--- include/net/ipv6.h | 48 +- include/net/sctp/sctp.h | 7 ++- include/net/snmp.h | 44 ++-- include/net/tcp.h | 15 +++--- include/net/udp.h | 34 ++--- include/net/xfrm.h | 4 -- net/bridge/br_netfilter_hooks.c | 6 +-- net/bridge/br_netfilter_ipv6.c | 10 ++-- net/core/dev.c | 4 +- net/dccp/dccp.h | 6 +-- net/dccp/input.c| 2 +- net/dccp/ipv4.c | 24 - net/dccp/ipv6.c | 24 - net/dccp/minisocks.c| 2 +- net/dccp/options.c | 2 +- net/dccp/timer.c| 8 +-- net/ipv4/arp.c | 2 +- net/ipv4/icmp.c | 18 +++ net/ipv4/inet_connection_sock.c | 4 +- net/ipv4/inet_hashtables.c | 2 +- net/ipv4/inet_timewait_sock.c | 4 +- net/ipv4/ip_forward.c | 6 +-- net/ipv4/ip_fragment.c | 14 +++--- net/ipv4/ip_input.c | 34 ++--- net/ipv4/route.c| 6 +-- net/ipv4/syncookies.c | 4 +- net/ipv4/tcp.c | 18 +++ net/ipv4/tcp_cdg.c | 20 net/ipv4/tcp_cubic.c| 20 net/ipv4/tcp_fastopen.c | 14 +++--- net/ipv4/tcp_input.c| 108 +--- net/ipv4/tcp_ipv4.c | 40 +++ net/ipv4/tcp_minisocks.c| 14 +++--- net/ipv4/tcp_output.c | 18 +++ net/ipv4/tcp_recovery.c | 4 +- net/ipv4/tcp_timer.c| 22 net/ipv4/udp.c | 72 +-- net/ipv6/exthdrs.c | 66 net/ipv6/icmp.c | 12 ++--- net/ipv6/inet6_hashtables.c | 2 +- net/ipv6/ip6_input.c| 32 ++-- net/ipv6/ip6_output.c | 34 ++--- net/ipv6/ip6mr.c| 8 +-- net/ipv6/reassembly.c | 32 ++-- net/ipv6/syncookies.c | 4 +- net/ipv6/tcp_ipv6.c | 34 ++--- net/ipv6/udp.c | 91 + net/rxrpc/ar-input.c| 4 +- net/sctp/chunk.c| 2 +- net/sctp/input.c| 16 +++--- net/sctp/ipv6.c | 2 +- net/sunrpc/xprtsock.c | 4 +- 54 files changed, 512 insertions(+), 531 deletions(-) -- 2.8.0.rc3.226.g39d4020
Re: [PATCH net-next v2 0/7] net: unify dst caching for tunnel devices
On Tue, 2016-02-16 at 20:22 -0500, David Miller wrote: > From: Paolo Abeni> Date: Fri, 12 Feb 2016 15:43:52 +0100 > > > This patch series try to unify the dst cache implementations currently > > present in the kernel, namely in ip_tunnel.c and ip6_tunnel.c, introducing a > > new generic implementation, replacing the existing ones, and then using > > the new implementation in other tunnel devices which currently lack it. > > > > The new dst implementation is compiled, as built-in, only if any device > > using > > it is enabled. > > > > Caching the dst for the tunnel remote address gives small, but measurable, > > performance improvement when tunneling over ipv4 (in the 2%-4% range) and > > significant ones when tunneling over ipv6 (roughly 60% when no > > fragmentation/segmentation take place and the tunnel local address > > is not specified). > > > > v2: > > - move the vxlan dst_cache usage inside the device lookup functions > > - fix usage after free for lwt tunnel moving the dst cache storage inside > > the dst_metadata, > > - sparse codying style cleanup > > Series applied, thanks for doing this work as it is a major improvement. Paolo, please check following warning : This might be caused by e09acddf873bf775b208b452a4c3a3fd26fa9427 ("ip_tunnel: replace dst_cache with generic implementation") [ 73.982267] BUG: using smp_processor_id() in preemptible [] code: ip/10604 [ 73.990978] caller is debug_smp_processor_id+0x17/0x20 [ 73.990981] CPU: 26 PID: 10604 Comm: ip Not tainted 4.6.0-dbx-DEV #1075 [ 73.990982] Hardware name: ... [ 73.990983] 881fc11d3b98 8140a51f 001a [ 73.990987] 81a585c5 881fc11d3bc8 8142700f 60bfa000e0c0 [ 73.990989] 881fcb6b0f00 9807f60a 881fcb6b0f00 881fc11d3bd8 [ 73.990992] Call Trace: [ 73.990996] [] dump_stack+0x67/0x98 [ 73.990998] [] check_preemption_disabled+0xef/0x100 [ 73.991000] [] debug_smp_processor_id+0x17/0x20 [ 73.991003] [] dst_cache_set_ip4+0x2c/0x70 [ 73.991006] [] ip_tunnel_bind_dev+0x101/0x170 [ 73.991008] [] ip_tunnel_ioctl+0x330/0x430 [ 73.991010] [] ? ip_tunnel_ioctl+0x5/0x430 [ 73.991012] [] ipgre_tunnel_ioctl+0xdb/0x160 [ 73.991015] [] ? rtnl_lock+0x17/0x20 [ 73.991017] [] dev_ifsioc+0x325/0x370 [ 73.991018] [] dev_ioctl+0xd2/0x630 [ 73.991022] [] sock_ioctl+0xd3/0x270 [ 73.991025] [] do_vfs_ioctl+0x93/0x6f0 [ 73.991026] [] ? sock_alloc_file+0x91/0x120 [ 73.991029] [] ? __fget_light+0x6c/0x90 [ 73.991031] [] SyS_ioctl+0x8b/0xa0 [ 73.991042] [] entry_SYSCALL_64_fastpath+0x18/0xa8 Thanks
Re: [RFC 12/20] net: dsa: rename dst->ds to dst->switches
On Wed, Apr 27, 2016 at 06:30:09PM -0400, Vivien Didelot wrote: > dsa_switch stores the net_device pointers in a "ports" member. Be > consistent and store the dsa_switch pointer in a "switches" member of > the dsa_switch_tree structure. > > This free us the "ds" member for a future dsa_switch list. NACK. Or you need to change ds absolutely everywhere, in all drivers and APIs. We cannot have ds meaning two different things. Andrew > > Signed-off-by: Vivien Didelot> --- > include/net/dsa.h | 2 +- > net/dsa/dsa.c | 8 > net/dsa/tag_brcm.c| 2 +- > net/dsa/tag_dsa.c | 2 +- > net/dsa/tag_edsa.c| 2 +- > net/dsa/tag_trailer.c | 2 +- > 6 files changed, 9 insertions(+), 9 deletions(-) > > diff --git a/include/net/dsa.h b/include/net/dsa.h > index 5f2e7df..389227d 100644 > --- a/include/net/dsa.h > +++ b/include/net/dsa.h > @@ -124,7 +124,7 @@ struct dsa_switch_tree { > /* >* Data for the individual switch chips. >*/ > - struct dsa_switch *ds[DSA_MAX_SWITCHES]; > + struct dsa_switch *switches[DSA_MAX_SWITCHES]; > }; > > struct dsa_port { > diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c > index 3daffb6..aa4a61a 100644 > --- a/net/dsa/dsa.c > +++ b/net/dsa/dsa.c > @@ -857,7 +857,7 @@ static int dsa_setup_dst(struct dsa_switch_tree *dst, > struct net_device *dev, > continue; > } > > - dst->ds[i] = ds; > + dst->switches[i] = ds; > > ++configured; > } > @@ -953,7 +953,7 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst) > wmb(); > > for (i = 0; i < dst->pd->nr_chips; i++) { > - struct dsa_switch *ds = dst->ds[i]; > + struct dsa_switch *ds = dst->switches[i]; > > if (ds) > dsa_switch_destroy(ds); > @@ -1006,7 +1006,7 @@ static int dsa_suspend(struct device *d) > int i, ret = 0; > > for (i = 0; i < dst->pd->nr_chips; i++) { > - struct dsa_switch *ds = dst->ds[i]; > + struct dsa_switch *ds = dst->switches[i]; > > if (ds != NULL) > ret = dsa_switch_suspend(ds); > @@ -1022,7 +1022,7 @@ static int dsa_resume(struct device *d) > int i, ret = 0; > > for (i = 0; i < dst->pd->nr_chips; i++) { > - struct dsa_switch *ds = dst->ds[i]; > + struct dsa_switch *ds = dst->switches[i]; > > if (ds != NULL) > ret = dsa_switch_resume(ds); > diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c > index 3d5aabc..35fc75b 100644 > --- a/net/dsa/tag_brcm.c > +++ b/net/dsa/tag_brcm.c > @@ -102,7 +102,7 @@ static int brcm_tag_rcv(struct sk_buff *skb, struct > net_device *dev, > if (unlikely(dst == NULL)) > goto out_drop; > > - ds = dst->ds[0]; > + ds = dst->switches[0]; > > skb = skb_unshare(skb, GFP_ATOMIC); > if (skb == NULL) > diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c > index c870cfa..bf3eebf8 100644 > --- a/net/dsa/tag_dsa.c > +++ b/net/dsa/tag_dsa.c > @@ -109,7 +109,7 @@ static int dsa_rcv(struct sk_buff *skb, struct net_device > *dev, >*/ > if (source_device >= dst->pd->nr_chips) > goto out_drop; > - ds = dst->ds[source_device]; > + ds = dst->switches[source_device]; > if (source_port >= DSA_MAX_PORTS || ds->ports[source_port] == NULL) > goto out_drop; > > diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c > index 898f949d..4ddbb85 100644 > --- a/net/dsa/tag_edsa.c > +++ b/net/dsa/tag_edsa.c > @@ -122,7 +122,7 @@ static int edsa_rcv(struct sk_buff *skb, struct > net_device *dev, >*/ > if (source_device >= dst->pd->nr_chips) > goto out_drop; > - ds = dst->ds[source_device]; > + ds = dst->switches[source_device]; > if (source_port >= DSA_MAX_PORTS || ds->ports[source_port] == NULL) > goto out_drop; > > diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c > index eaa3440..ade0bbf 100644 > --- a/net/dsa/tag_trailer.c > +++ b/net/dsa/tag_trailer.c > @@ -67,7 +67,7 @@ static int trailer_rcv(struct sk_buff *skb, struct > net_device *dev, > > if (unlikely(dst == NULL)) > goto out_drop; > - ds = dst->ds[0]; > + ds = dst->switches[0]; > > skb = skb_unshare(skb, GFP_ATOMIC); > if (skb == NULL) > -- > 2.8.0 >
Re: [RFC 07/20] net: dsa: list ports in switch\\
On Wed, Apr 27, 2016 at 06:30:04PM -0400, Vivien Didelot wrote: > List DSA port structures in their switch structure, so that drivers can > iterate on them to retrieve information such as their ports membership. And this would be so much easier using a plan array. Andrew > > Signed-off-by: Vivien Didelot> --- > include/net/dsa.h | 9 + > net/dsa/dsa.c | 4 > 2 files changed, 13 insertions(+) > > diff --git a/include/net/dsa.h b/include/net/dsa.h > index 69e467c..5f2e7df 100644 > --- a/include/net/dsa.h > +++ b/include/net/dsa.h > @@ -32,6 +32,11 @@ enum dsa_tag_protocol { > #define DSA_MAX_SWITCHES 4 > #define DSA_MAX_PORTS12 > > +#define dsa_switch_for_each_port(_ds, _dp, _num_ports) > \ > + for (_dp = list_first_entry(&_ds->dp, typeof(*_dp), list); \ > + &_dp->list != (&_ds->dp) && _dp->port < _num_ports;\ > + _dp = list_next_entry(_dp, list)) > + > struct dsa_chip_data { > /* >* How to access the switch configuration registers. > @@ -123,6 +128,8 @@ struct dsa_switch_tree { > }; > > struct dsa_port { > + struct list_headlist; > + > struct dsa_switch *ds; > int port; > > @@ -173,6 +180,8 @@ struct dsa_switch { > u32 phys_mii_mask; > struct mii_bus *slave_mii_bus; > struct net_device *ports[DSA_MAX_PORTS]; > + > + struct list_headdp; > }; > > static inline bool dsa_is_cpu_port(struct dsa_switch *ds, int p) > diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c > index 222494c..3daffb6 100644 > --- a/net/dsa/dsa.c > +++ b/net/dsa/dsa.c > @@ -225,6 +225,8 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, > struct device *parent) > int index = ds->index; > int i, ret; > > + INIT_LIST_HEAD(>dp); > + > /* >* Validate supplied switch configuration. >*/ > @@ -238,6 +240,8 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, > struct device *parent) > dp[i]->ds = ds; > dp[i]->port = i; > > + list_add_tail([i]->list, >dp); > + > name = pd->port_names[i]; > if (name == NULL) > continue; > -- > 2.8.0 >
Re: [RFC 03/20] net: dsa: pass dsa_port down to drivers bridge ops
On Wed, Apr 27, 2016 at 06:30:00PM -0400, Vivien Didelot wrote: > Now that DSA as proper structure for DSA ports, pass it down to the > port_bridge_join and port_bridge_leave driver functions. I should look at the later patches, but this looks like a step backwards. If your ports array is a member of ds, you have no need for this patch at all. What advantage does this change bring? Andrew > > Signed-off-by: Vivien Didelot> --- > drivers/net/dsa/bcm_sf2.c | 28 ++-- > drivers/net/dsa/mv88e6xxx.c | 10 +- > drivers/net/dsa/mv88e6xxx.h | 4 ++-- > include/net/dsa.h | 4 ++-- > net/dsa/slave.c | 4 ++-- > 5 files changed, 25 insertions(+), 25 deletions(-) > > diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c > index f394ea9..2d7b297 100644 > --- a/drivers/net/dsa/bcm_sf2.c > +++ b/drivers/net/dsa/bcm_sf2.c > @@ -491,15 +491,15 @@ static int bcm_sf2_sw_fast_age_port(struct dsa_switch > *ds, int port) > return 0; > } > > -static int bcm_sf2_sw_br_join(struct dsa_switch *ds, int port, > +static int bcm_sf2_sw_br_join(struct dsa_switch *ds, struct dsa_port *dp, > struct net_device *bridge) > { > struct bcm_sf2_priv *priv = ds_to_priv(ds); > unsigned int i; > u32 reg, p_ctl; > > - priv->port_sts[port].bridge_dev = bridge; > - p_ctl = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(port)); > + priv->port_sts[dp->port].bridge_dev = bridge; > + p_ctl = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(dp->port)); > > for (i = 0; i < priv->hw_params.num_ports; i++) { > if (priv->port_sts[i].bridge_dev != bridge) > @@ -509,7 +509,7 @@ static int bcm_sf2_sw_br_join(struct dsa_switch *ds, int > port, >* membership and update the remote port bitmask >*/ > reg = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(i)); > - reg |= 1 << port; > + reg |= 1 << dp->port; > core_writel(priv, reg, CORE_PORT_VLAN_CTL_PORT(i)); > priv->port_sts[i].vlan_ctl_mask = reg; > > @@ -519,20 +519,20 @@ static int bcm_sf2_sw_br_join(struct dsa_switch *ds, > int port, > /* Configure the local port VLAN control membership to include >* remote ports and update the local port bitmask >*/ > - core_writel(priv, p_ctl, CORE_PORT_VLAN_CTL_PORT(port)); > - priv->port_sts[port].vlan_ctl_mask = p_ctl; > + core_writel(priv, p_ctl, CORE_PORT_VLAN_CTL_PORT(dp->port)); > + priv->port_sts[dp->port].vlan_ctl_mask = p_ctl; > > return 0; > } > > -static void bcm_sf2_sw_br_leave(struct dsa_switch *ds, int port, > +static void bcm_sf2_sw_br_leave(struct dsa_switch *ds, struct dsa_port *dp, > struct net_device *bridge) > { > struct bcm_sf2_priv *priv = ds_to_priv(ds); > unsigned int i; > u32 reg, p_ctl; > > - p_ctl = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(port)); > + p_ctl = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(dp->port)); > > for (i = 0; i < priv->hw_params.num_ports; i++) { > /* Don't touch the remaining ports */ > @@ -540,18 +540,18 @@ static void bcm_sf2_sw_br_leave(struct dsa_switch *ds, > int port, > continue; > > reg = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(i)); > - reg &= ~(1 << port); > + reg &= ~(1 << dp->port); > core_writel(priv, reg, CORE_PORT_VLAN_CTL_PORT(i)); > - priv->port_sts[port].vlan_ctl_mask = reg; > + priv->port_sts[dp->port].vlan_ctl_mask = reg; > > /* Prevent self removal to preserve isolation */ > - if (port != i) > + if (dp->port != i) > p_ctl &= ~(1 << i); > } > > - core_writel(priv, p_ctl, CORE_PORT_VLAN_CTL_PORT(port)); > - priv->port_sts[port].vlan_ctl_mask = p_ctl; > - priv->port_sts[port].bridge_dev = NULL; > + core_writel(priv, p_ctl, CORE_PORT_VLAN_CTL_PORT(dp->port)); > + priv->port_sts[dp->port].vlan_ctl_mask = p_ctl; > + priv->port_sts[dp->port].bridge_dev = NULL; > } > > static void bcm_sf2_sw_br_set_stp_state(struct dsa_switch *ds, int port, > diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c > index 86f8f2f..3f78c73 100644 > --- a/drivers/net/dsa/mv88e6xxx.c > +++ b/drivers/net/dsa/mv88e6xxx.c > @@ -2203,7 +2203,7 @@ unlock: > return err; > } > > -int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, int port, > +int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, struct dsa_port *dp, > struct net_device *bridge) > { > struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); > @@ -2212,7 +2212,7 @@ int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, > int port, > mutex_lock(>smi_mutex); > > /* Assign the bridge and remap
Re: [RFC 01/20] net: dsa: introduce a dsa_port structure
> @@ -230,6 +231,13 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, > struct device *parent) > for (i = 0; i < DSA_MAX_PORTS; i++) { > char *name; > > + dp[i] = devm_kzalloc(parent, sizeof(*dp), GFP_KERNEL); > + if (dp[i] == NULL) > + return -ENOMEM; You are not saving anything here by dynamically allocating the memory, since you do it for all ports. So just make it a member of ds with size DSA_MAX_PORTS. I would then call this array structure ports. Humm, i also think keeping it in dsa_slave_priv is wrong, if you have defined the structure in the global include/net/dsa.h. dsa_switch is a better place for it. Andrew
[RFC 00/20] net: dsa: dsa_port structure and tree-wide ops
In a previous RFC [1], I introduced the need to implement cross-chip operations in the DSA layer. Here's a summary. In a multiple switches setup such as the following, every switch of the tree must be aware of its configuration in order to configure a correct data path between chips. sw0 sw1 sw2 [ 0 1 2 3 4 5 ] [ 0 1 2 3 4 5 ] [ 0 1 2 3 4 5 ] | ' ^ ^ ^ ^ ' v ' | | | | ' CPU ' `-DSA-' `-DSA-' ' ' ' + - - - - - - - br0 - - - - - - - + For instance, bridging sw0p2 and sw2p3 together in a VLAN 42 requires both chips to allow frames from the external port to egress its internal port, all DSA ports between them must learn their address, and sw1 must also be aware of the VLAN 42 in order to allow tagged packets to cross the chip. To implement all that nicely, we need a way to progagate such notification to every switch of a DSA tree. The patchset introduces a dsa_port structure to bundle port-centric info such as its switch index, port number, bridge device, and change the DSA driver functions to take such structure as parameter instead of a internal port number. The DSA layer then introduces tree-wide operations, which calls every switch driver when a port operation occurs. This is the responsibility of a switch driver to check if the related port is internal or external to its chip, and behave in consequence. See the patchset as different logical groups (that may be split later): * patches 1 to 5: introduce the dsa_port structure to DSA drivers * patches 6 to 11: put the bridge device in the dsa_port structure and allow the DSA drivers to get rid of their private bridge_dev pointer * patches 12 to 16: introduce tree-wide operations. Driver are now aware of cross-chip port operations * patches 17 to 20: implement cross-chip hardware bridging in mv88e6xxx A branch is available here [2] and a debugfs patch is maintained here [3] in order to inspect the Marvell switch's internal structures, such as the PVT. Many things remains to do after this, such as using dsa_port_is_{cpu,dsa} helpers, getting rid of dst->switches and ds->ports in favor of their related switch and port lists, and introduce dynamic number of switches and ports. [1] https://lkml.org/lkml/2016/4/20/733 [2] https://github.com/vivien/linux/tree/dsa/dev [3] https://github.com/vivien/linux/commit/da33b1a698fef3a66515a05e2b9f31d0279a89d4.patch Cheers, Vivien Didelot (20): net: dsa: introduce a dsa_port structure net: dsa: be consistent with NETDEV_CHANGEUPPER net: dsa: pass dsa_port down to drivers bridge ops net: dsa: pass dsa_port down to drivers FDB ops net: dsa: pass dsa_port down to drivers VLAN ops net: dsa: move bridge device in dsa_port net: dsa: list ports in switch net: dsa: bcm_sf2: use bridge device from dsa_port net: dsa: mv88e6xxx: check HW vlan with dsa_port net: dsa: mv88e6xxx: setup a dsa_port net: dsa: mv88e6xxx: use bridge from dsa_port net: dsa: rename dst->ds to dst->switches net: dsa: list switches in tree net: dsa: add tree-wide bridge ops net: dsa: add tree-wide FDB ops net: dsa: add tree-wide VLAN ops net: dsa: mv88e6xxx: factorize port bridge change net: dsa: mv88e6xxx: add flags to info net: dsa: mv88e6xxx: conditionally init PVT net: dsa: mv88e6xxx: setup PVT on cross-chip ops drivers/net/dsa/bcm_sf2.c | 92 +- drivers/net/dsa/bcm_sf2.h | 2 - drivers/net/dsa/mv88e6352.c | 1 + drivers/net/dsa/mv88e6xxx.c | 397 drivers/net/dsa/mv88e6xxx.h | 41 +++-- include/net/dsa.h | 57 +-- net/dsa/Makefile| 2 +- net/dsa/dsa.c | 25 ++- net/dsa/dsa_priv.h | 37 +++-- net/dsa/slave.c | 283 +-- net/dsa/tag_brcm.c | 6 +- net/dsa/tag_dsa.c | 10 +- net/dsa/tag_edsa.c | 10 +- net/dsa/tag_trailer.c | 4 +- net/dsa/tree.c | 187 + 15 files changed, 751 insertions(+), 403 deletions(-) create mode 100644 net/dsa/tree.c -- 2.8.0
[RFC 05/20] net: dsa: pass dsa_port down to drivers VLAN ops
Now that DSA as proper structure for DSA ports, pass it down to the port_vlan_{filtering,prepare,add,del,dump} driver functions. Signed-off-by: Vivien Didelot--- drivers/net/dsa/mv88e6xxx.c | 41 + drivers/net/dsa/mv88e6xxx.h | 10 +- include/net/dsa.h | 11 ++- net/dsa/slave.c | 10 +- 4 files changed, 37 insertions(+), 35 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c index c1ff763..7e03f4c 100644 --- a/drivers/net/dsa/mv88e6xxx.c +++ b/drivers/net/dsa/mv88e6xxx.c @@ -1367,7 +1367,7 @@ static int _mv88e6xxx_vtu_getnext(struct dsa_switch *ds, return 0; } -int mv88e6xxx_port_vlan_dump(struct dsa_switch *ds, int port, +int mv88e6xxx_port_vlan_dump(struct dsa_switch *ds, struct dsa_port *dp, struct switchdev_obj_port_vlan *vlan, int (*cb)(struct switchdev_obj *obj)) { @@ -1378,7 +1378,7 @@ int mv88e6xxx_port_vlan_dump(struct dsa_switch *ds, int port, mutex_lock(>smi_mutex); - err = _mv88e6xxx_port_pvid_get(ds, port, ); + err = _mv88e6xxx_port_pvid_get(ds, dp->port, ); if (err) goto unlock; @@ -1394,14 +1394,15 @@ int mv88e6xxx_port_vlan_dump(struct dsa_switch *ds, int port, if (!next.valid) break; - if (next.data[port] == GLOBAL_VTU_DATA_MEMBER_TAG_NON_MEMBER) + if (next.data[dp->port] == + GLOBAL_VTU_DATA_MEMBER_TAG_NON_MEMBER) continue; /* reinit and dump this VLAN obj */ vlan->vid_begin = vlan->vid_end = next.vid; vlan->flags = 0; - if (next.data[port] == GLOBAL_VTU_DATA_MEMBER_TAG_UNTAGGED) + if (next.data[dp->port] == GLOBAL_VTU_DATA_MEMBER_TAG_UNTAGGED) vlan->flags |= BRIDGE_VLAN_INFO_UNTAGGED; if (next.vid == pvid) @@ -1789,7 +1790,7 @@ static const char * const mv88e6xxx_port_8021q_mode_names[] = { [PORT_CONTROL_2_8021Q_SECURE] = "Secure", }; -int mv88e6xxx_port_vlan_filtering(struct dsa_switch *ds, int port, +int mv88e6xxx_port_vlan_filtering(struct dsa_switch *ds, struct dsa_port *dp, bool vlan_filtering) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); @@ -1799,7 +1800,7 @@ int mv88e6xxx_port_vlan_filtering(struct dsa_switch *ds, int port, mutex_lock(>smi_mutex); - ret = _mv88e6xxx_reg_read(ds, REG_PORT(port), PORT_CONTROL_2); + ret = _mv88e6xxx_reg_read(ds, REG_PORT(dp->port), PORT_CONTROL_2); if (ret < 0) goto unlock; @@ -1809,12 +1810,12 @@ int mv88e6xxx_port_vlan_filtering(struct dsa_switch *ds, int port, ret &= ~PORT_CONTROL_2_8021Q_MASK; ret |= new & PORT_CONTROL_2_8021Q_MASK; - ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), PORT_CONTROL_2, - ret); + ret = _mv88e6xxx_reg_write(ds, REG_PORT(dp->port), + PORT_CONTROL_2, ret); if (ret < 0) goto unlock; - netdev_dbg(ds->ports[port], "802.1Q Mode %s (was %s)\n", + netdev_dbg(ds->ports[dp->port], "802.1Q Mode %s (was %s)\n", mv88e6xxx_port_8021q_mode_names[new], mv88e6xxx_port_8021q_mode_names[old]); } @@ -1826,7 +1827,7 @@ unlock: return ret; } -int mv88e6xxx_port_vlan_prepare(struct dsa_switch *ds, int port, +int mv88e6xxx_port_vlan_prepare(struct dsa_switch *ds, struct dsa_port *dp, const struct switchdev_obj_port_vlan *vlan, struct switchdev_trans *trans) { @@ -1835,7 +1836,7 @@ int mv88e6xxx_port_vlan_prepare(struct dsa_switch *ds, int port, /* If the requested port doesn't belong to the same bridge as the VLAN * members, do not support it (yet) and fallback to software VLAN. */ - err = mv88e6xxx_port_check_hw_vlan(ds, port, vlan->vid_begin, + err = mv88e6xxx_port_check_hw_vlan(ds, dp->port, vlan->vid_begin, vlan->vid_end); if (err) return err; @@ -1863,7 +1864,7 @@ static int _mv88e6xxx_port_vlan_add(struct dsa_switch *ds, int port, u16 vid, return _mv88e6xxx_vtu_loadpurge(ds, ); } -void mv88e6xxx_port_vlan_add(struct dsa_switch *ds, int port, +void mv88e6xxx_port_vlan_add(struct dsa_switch *ds, struct dsa_port *dp, const struct switchdev_obj_port_vlan *vlan, struct switchdev_trans *trans) { @@ -1875,12 +1876,12 @@ void mv88e6xxx_port_vlan_add(struct dsa_switch *ds, int port,
[RFC 01/20] net: dsa: introduce a dsa_port structure
Introduce a new dsa_port structure, used to store port-centric information, such as a pointer to its DSA switch and its port number. It will later contains further data, such as its bridge device. This is a first step towards implementing cross-chip port operations. Signed-off-by: Vivien Didelot--- include/net/dsa.h | 5 ++ net/dsa/dsa.c | 10 +++- net/dsa/dsa_priv.h| 13 ++--- net/dsa/slave.c | 147 +- net/dsa/tag_brcm.c| 4 +- net/dsa/tag_dsa.c | 8 +-- net/dsa/tag_edsa.c| 8 +-- net/dsa/tag_trailer.c | 2 +- 8 files changed, 104 insertions(+), 93 deletions(-) diff --git a/include/net/dsa.h b/include/net/dsa.h index 2d280ab..255c108 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -122,6 +122,11 @@ struct dsa_switch_tree { struct dsa_switch *ds[DSA_MAX_SWITCHES]; }; +struct dsa_port { + struct dsa_switch *ds; + int port; +}; + struct dsa_switch { /* * Parent switch tree, and switch index. diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index d61ceed..222494c 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -219,6 +219,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) { struct dsa_switch_driver *drv = ds->drv; struct dsa_switch_tree *dst = ds->dst; + struct dsa_port *dp[DSA_MAX_PORTS]; struct dsa_chip_data *pd = ds->pd; bool valid_name_found = false; int index = ds->index; @@ -230,6 +231,13 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) for (i = 0; i < DSA_MAX_PORTS; i++) { char *name; + dp[i] = devm_kzalloc(parent, sizeof(*dp), GFP_KERNEL); + if (dp[i] == NULL) + return -ENOMEM; + + dp[i]->ds = ds; + dp[i]->port = i; + name = pd->port_names[i]; if (name == NULL) continue; @@ -328,7 +336,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) if (!(ds->enabled_port_mask & (1 << i))) continue; - ret = dsa_slave_create(ds, parent, i, pd->port_names[i]); + ret = dsa_slave_create(dp[i], parent, pd->port_names[i]); if (ret < 0) { netdev_err(dst->master_netdev, "[%d]: can't create dsa slave device for port %d(%s): %d\n", index, i, pd->port_names[i], ret); diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index dfa3377..c7d5df0 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -26,13 +26,6 @@ struct dsa_slave_priv { struct net_device *dev); /* -* Which switch this port is a part of, and the port index -* for this port. -*/ - struct dsa_switch *parent; - u8 port; - - /* * The phylib phy_device pointer for the PHY connected * to this port. */ @@ -46,6 +39,9 @@ struct dsa_slave_priv { #ifdef CONFIG_NET_POLL_CONTROLLER struct netpoll *netpoll; #endif + + /* DSA specific data */ + struct dsa_port *dp; }; /* dsa.c */ @@ -54,8 +50,7 @@ extern char dsa_driver_version[]; /* slave.c */ extern const struct dsa_device_ops notag_netdev_ops; void dsa_slave_mii_bus_init(struct dsa_switch *ds); -int dsa_slave_create(struct dsa_switch *ds, struct device *parent, -int port, char *name); +int dsa_slave_create(struct dsa_port *dp, struct device *parent, char *name); void dsa_slave_destroy(struct net_device *slave_dev); int dsa_slave_suspend(struct net_device *slave_dev); int dsa_slave_resume(struct net_device *slave_dev); diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 3b6750f..6115444 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -61,7 +61,7 @@ static int dsa_slave_get_iflink(const struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); - return p->parent->dst->master_netdev->ifindex; + return p->dp->ds->dst->master_netdev->ifindex; } static inline bool dsa_port_is_bridged(struct dsa_slave_priv *p) @@ -72,8 +72,8 @@ static inline bool dsa_port_is_bridged(struct dsa_slave_priv *p) static int dsa_slave_open(struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); - struct net_device *master = p->parent->dst->master_netdev; - struct dsa_switch *ds = p->parent; + struct dsa_switch *ds = p->dp->ds; + struct net_device *master = ds->dst->master_netdev; u8 stp_state = dsa_port_is_bridged(p) ? BR_STATE_BLOCKING : BR_STATE_FORWARDING; int err; @@ -99,13 +99,13 @@ static int dsa_slave_open(struct net_device *dev) }
[RFC 03/20] net: dsa: pass dsa_port down to drivers bridge ops
Now that DSA as proper structure for DSA ports, pass it down to the port_bridge_join and port_bridge_leave driver functions. Signed-off-by: Vivien Didelot--- drivers/net/dsa/bcm_sf2.c | 28 ++-- drivers/net/dsa/mv88e6xxx.c | 10 +- drivers/net/dsa/mv88e6xxx.h | 4 ++-- include/net/dsa.h | 4 ++-- net/dsa/slave.c | 4 ++-- 5 files changed, 25 insertions(+), 25 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index f394ea9..2d7b297 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -491,15 +491,15 @@ static int bcm_sf2_sw_fast_age_port(struct dsa_switch *ds, int port) return 0; } -static int bcm_sf2_sw_br_join(struct dsa_switch *ds, int port, +static int bcm_sf2_sw_br_join(struct dsa_switch *ds, struct dsa_port *dp, struct net_device *bridge) { struct bcm_sf2_priv *priv = ds_to_priv(ds); unsigned int i; u32 reg, p_ctl; - priv->port_sts[port].bridge_dev = bridge; - p_ctl = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(port)); + priv->port_sts[dp->port].bridge_dev = bridge; + p_ctl = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(dp->port)); for (i = 0; i < priv->hw_params.num_ports; i++) { if (priv->port_sts[i].bridge_dev != bridge) @@ -509,7 +509,7 @@ static int bcm_sf2_sw_br_join(struct dsa_switch *ds, int port, * membership and update the remote port bitmask */ reg = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(i)); - reg |= 1 << port; + reg |= 1 << dp->port; core_writel(priv, reg, CORE_PORT_VLAN_CTL_PORT(i)); priv->port_sts[i].vlan_ctl_mask = reg; @@ -519,20 +519,20 @@ static int bcm_sf2_sw_br_join(struct dsa_switch *ds, int port, /* Configure the local port VLAN control membership to include * remote ports and update the local port bitmask */ - core_writel(priv, p_ctl, CORE_PORT_VLAN_CTL_PORT(port)); - priv->port_sts[port].vlan_ctl_mask = p_ctl; + core_writel(priv, p_ctl, CORE_PORT_VLAN_CTL_PORT(dp->port)); + priv->port_sts[dp->port].vlan_ctl_mask = p_ctl; return 0; } -static void bcm_sf2_sw_br_leave(struct dsa_switch *ds, int port, +static void bcm_sf2_sw_br_leave(struct dsa_switch *ds, struct dsa_port *dp, struct net_device *bridge) { struct bcm_sf2_priv *priv = ds_to_priv(ds); unsigned int i; u32 reg, p_ctl; - p_ctl = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(port)); + p_ctl = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(dp->port)); for (i = 0; i < priv->hw_params.num_ports; i++) { /* Don't touch the remaining ports */ @@ -540,18 +540,18 @@ static void bcm_sf2_sw_br_leave(struct dsa_switch *ds, int port, continue; reg = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(i)); - reg &= ~(1 << port); + reg &= ~(1 << dp->port); core_writel(priv, reg, CORE_PORT_VLAN_CTL_PORT(i)); - priv->port_sts[port].vlan_ctl_mask = reg; + priv->port_sts[dp->port].vlan_ctl_mask = reg; /* Prevent self removal to preserve isolation */ - if (port != i) + if (dp->port != i) p_ctl &= ~(1 << i); } - core_writel(priv, p_ctl, CORE_PORT_VLAN_CTL_PORT(port)); - priv->port_sts[port].vlan_ctl_mask = p_ctl; - priv->port_sts[port].bridge_dev = NULL; + core_writel(priv, p_ctl, CORE_PORT_VLAN_CTL_PORT(dp->port)); + priv->port_sts[dp->port].vlan_ctl_mask = p_ctl; + priv->port_sts[dp->port].bridge_dev = NULL; } static void bcm_sf2_sw_br_set_stp_state(struct dsa_switch *ds, int port, diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c index 86f8f2f..3f78c73 100644 --- a/drivers/net/dsa/mv88e6xxx.c +++ b/drivers/net/dsa/mv88e6xxx.c @@ -2203,7 +2203,7 @@ unlock: return err; } -int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, int port, +int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, struct dsa_port *dp, struct net_device *bridge) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); @@ -2212,7 +2212,7 @@ int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, int port, mutex_lock(>smi_mutex); /* Assign the bridge and remap each port's VLANTable */ - ps->ports[port].bridge_dev = bridge; + ps->ports[dp->port].bridge_dev = bridge; for (i = 0; i < ps->info->num_ports; ++i) { if (ps->ports[i].bridge_dev == bridge) { @@ -2227,7 +2227,7 @@ int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, int port, return err; } -void mv88e6xxx_port_bridge_leave(struct
[RFC 02/20] net: dsa: be consistent with NETDEV_CHANGEUPPER
Once NETDEV_CHANGEUPPER is emitted, the device is already (un)bridged. If an error is returned on port_bridge_join, the bridge layer will rollback the operation and unbridge the port. Respect this by setting bridge_dev to NULL on error. Also the DSA layer shouldn't assume that the drivers know about the bridge device a port was previously bridged to. So pass the bridge device to port_bridge_leave. Signed-off-by: Vivien Didelot--- drivers/net/dsa/bcm_sf2.c | 4 ++-- drivers/net/dsa/mv88e6xxx.c | 4 ++-- drivers/net/dsa/mv88e6xxx.h | 3 ++- include/net/dsa.h | 3 ++- net/dsa/slave.c | 13 + 5 files changed, 17 insertions(+), 10 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 448deb5..f394ea9 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -525,10 +525,10 @@ static int bcm_sf2_sw_br_join(struct dsa_switch *ds, int port, return 0; } -static void bcm_sf2_sw_br_leave(struct dsa_switch *ds, int port) +static void bcm_sf2_sw_br_leave(struct dsa_switch *ds, int port, + struct net_device *bridge) { struct bcm_sf2_priv *priv = ds_to_priv(ds); - struct net_device *bridge = priv->port_sts[port].bridge_dev; unsigned int i; u32 reg, p_ctl; diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c index 028f92f..86f8f2f 100644 --- a/drivers/net/dsa/mv88e6xxx.c +++ b/drivers/net/dsa/mv88e6xxx.c @@ -2227,10 +2227,10 @@ int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, int port, return err; } -void mv88e6xxx_port_bridge_leave(struct dsa_switch *ds, int port) +void mv88e6xxx_port_bridge_leave(struct dsa_switch *ds, int port, +struct net_device *bridge) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - struct net_device *bridge = ps->ports[port].bridge_dev; int i; mutex_lock(>smi_mutex); diff --git a/drivers/net/dsa/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx.h index 0dbe2d1..2eb9a82 100644 --- a/drivers/net/dsa/mv88e6xxx.h +++ b/drivers/net/dsa/mv88e6xxx.h @@ -492,7 +492,8 @@ int mv88e6xxx_set_eee(struct dsa_switch *ds, int port, struct phy_device *phydev, struct ethtool_eee *e); int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, int port, struct net_device *bridge); -void mv88e6xxx_port_bridge_leave(struct dsa_switch *ds, int port); +void mv88e6xxx_port_bridge_leave(struct dsa_switch *ds, int port, +struct net_device *bridge); void mv88e6xxx_port_stp_state_set(struct dsa_switch *ds, int port, u8 state); int mv88e6xxx_port_vlan_filtering(struct dsa_switch *ds, int port, bool vlan_filtering); diff --git a/include/net/dsa.h b/include/net/dsa.h index 255c108..ed33500 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -305,7 +305,8 @@ struct dsa_switch_driver { */ int (*port_bridge_join)(struct dsa_switch *ds, int port, struct net_device *bridge); - void(*port_bridge_leave)(struct dsa_switch *ds, int port); + void(*port_bridge_leave)(struct dsa_switch *ds, int port, +struct net_device *bridge); void(*port_stp_state_set)(struct dsa_switch *ds, int port, u8 state); diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 6115444..f2ec13d 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -443,19 +443,24 @@ static int dsa_slave_bridge_port_join(struct net_device *dev, if (ds->drv->port_bridge_join) ret = ds->drv->port_bridge_join(ds, p->dp->port, br); - return ret == -EOPNOTSUPP ? 0 : ret; + if (ret && ret != -EOPNOTSUPP) { + p->bridge_dev = NULL; + return ret; + } + + return 0; } static void dsa_slave_bridge_port_leave(struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->dp->ds; + struct net_device *br = p->bridge_dev; + p->bridge_dev = NULL; if (ds->drv->port_bridge_leave) - ds->drv->port_bridge_leave(ds, p->dp->port); - - p->bridge_dev = NULL; + ds->drv->port_bridge_leave(ds, p->dp->port, br); /* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer, * so allow it to be in BR_STATE_FORWARDING to be kept functional -- 2.8.0
[RFC 12/20] net: dsa: rename dst->ds to dst->switches
dsa_switch stores the net_device pointers in a "ports" member. Be consistent and store the dsa_switch pointer in a "switches" member of the dsa_switch_tree structure. This free us the "ds" member for a future dsa_switch list. Signed-off-by: Vivien Didelot--- include/net/dsa.h | 2 +- net/dsa/dsa.c | 8 net/dsa/tag_brcm.c| 2 +- net/dsa/tag_dsa.c | 2 +- net/dsa/tag_edsa.c| 2 +- net/dsa/tag_trailer.c | 2 +- 6 files changed, 9 insertions(+), 9 deletions(-) diff --git a/include/net/dsa.h b/include/net/dsa.h index 5f2e7df..389227d 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -124,7 +124,7 @@ struct dsa_switch_tree { /* * Data for the individual switch chips. */ - struct dsa_switch *ds[DSA_MAX_SWITCHES]; + struct dsa_switch *switches[DSA_MAX_SWITCHES]; }; struct dsa_port { diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 3daffb6..aa4a61a 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -857,7 +857,7 @@ static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev, continue; } - dst->ds[i] = ds; + dst->switches[i] = ds; ++configured; } @@ -953,7 +953,7 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst) wmb(); for (i = 0; i < dst->pd->nr_chips; i++) { - struct dsa_switch *ds = dst->ds[i]; + struct dsa_switch *ds = dst->switches[i]; if (ds) dsa_switch_destroy(ds); @@ -1006,7 +1006,7 @@ static int dsa_suspend(struct device *d) int i, ret = 0; for (i = 0; i < dst->pd->nr_chips; i++) { - struct dsa_switch *ds = dst->ds[i]; + struct dsa_switch *ds = dst->switches[i]; if (ds != NULL) ret = dsa_switch_suspend(ds); @@ -1022,7 +1022,7 @@ static int dsa_resume(struct device *d) int i, ret = 0; for (i = 0; i < dst->pd->nr_chips; i++) { - struct dsa_switch *ds = dst->ds[i]; + struct dsa_switch *ds = dst->switches[i]; if (ds != NULL) ret = dsa_switch_resume(ds); diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c index 3d5aabc..35fc75b 100644 --- a/net/dsa/tag_brcm.c +++ b/net/dsa/tag_brcm.c @@ -102,7 +102,7 @@ static int brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev, if (unlikely(dst == NULL)) goto out_drop; - ds = dst->ds[0]; + ds = dst->switches[0]; skb = skb_unshare(skb, GFP_ATOMIC); if (skb == NULL) diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c index c870cfa..bf3eebf8 100644 --- a/net/dsa/tag_dsa.c +++ b/net/dsa/tag_dsa.c @@ -109,7 +109,7 @@ static int dsa_rcv(struct sk_buff *skb, struct net_device *dev, */ if (source_device >= dst->pd->nr_chips) goto out_drop; - ds = dst->ds[source_device]; + ds = dst->switches[source_device]; if (source_port >= DSA_MAX_PORTS || ds->ports[source_port] == NULL) goto out_drop; diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c index 898f949d..4ddbb85 100644 --- a/net/dsa/tag_edsa.c +++ b/net/dsa/tag_edsa.c @@ -122,7 +122,7 @@ static int edsa_rcv(struct sk_buff *skb, struct net_device *dev, */ if (source_device >= dst->pd->nr_chips) goto out_drop; - ds = dst->ds[source_device]; + ds = dst->switches[source_device]; if (source_port >= DSA_MAX_PORTS || ds->ports[source_port] == NULL) goto out_drop; diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c index eaa3440..ade0bbf 100644 --- a/net/dsa/tag_trailer.c +++ b/net/dsa/tag_trailer.c @@ -67,7 +67,7 @@ static int trailer_rcv(struct sk_buff *skb, struct net_device *dev, if (unlikely(dst == NULL)) goto out_drop; - ds = dst->ds[0]; + ds = dst->switches[0]; skb = skb_unshare(skb, GFP_ATOMIC); if (skb == NULL) -- 2.8.0
[RFC 10/20] net: dsa: mv88e6xxx: setup a dsa_port
Change the mv88e6xxx_setup_port function to take a dsa_port structure as parameter instead of a port index. This will help us get rid of the private bridge_dev pointer. Signed-off-by: Vivien Didelot--- drivers/net/dsa/mv88e6xxx.c | 64 - 1 file changed, 34 insertions(+), 30 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c index 00a0b92..0687894 100644 --- a/drivers/net/dsa/mv88e6xxx.c +++ b/drivers/net/dsa/mv88e6xxx.c @@ -2317,7 +2317,7 @@ static int mv88e6xxx_power_on_serdes(struct dsa_switch *ds) return ret; } -static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port) +static int mv88e6xxx_setup_port(struct dsa_switch *ds, struct dsa_port *dp) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); int ret; @@ -2335,8 +2335,10 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port) * and all DSA ports to their maximum bandwidth and * full duplex. */ - reg = _mv88e6xxx_reg_read(ds, REG_PORT(port), PORT_PCS_CTRL); - if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) { + reg = _mv88e6xxx_reg_read(ds, REG_PORT(dp->port), + PORT_PCS_CTRL); + if (dsa_is_cpu_port(ds, dp->port) || + dsa_is_dsa_port(ds, dp->port)) { reg &= ~PORT_PCS_CTRL_UNFORCED; reg |= PORT_PCS_CTRL_FORCE_LINK | PORT_PCS_CTRL_LINK_UP | @@ -2350,7 +2352,7 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port) reg |= PORT_PCS_CTRL_UNFORCED; } - ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), + ret = _mv88e6xxx_reg_write(ds, REG_PORT(dp->port), PORT_PCS_CTRL, reg); if (ret) goto abort; @@ -2378,7 +2380,7 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port) reg = PORT_CONTROL_IGMP_MLD_SNOOP | PORT_CONTROL_USE_TAG | PORT_CONTROL_USE_IP | PORT_CONTROL_STATE_FORWARDING; - if (dsa_is_cpu_port(ds, port)) { + if (dsa_is_cpu_port(ds, dp->port)) { if (mv88e6xxx_6095_family(ds) || mv88e6xxx_6185_family(ds)) reg |= PORT_CONTROL_DSA_TAG; if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) || @@ -2400,7 +2402,7 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port) reg |= PORT_CONTROL_EGRESS_ADD_TAG; } } - if (dsa_is_dsa_port(ds, port)) { + if (dsa_is_dsa_port(ds, dp->port)) { if (mv88e6xxx_6095_family(ds) || mv88e6xxx_6185_family(ds)) reg |= PORT_CONTROL_DSA_TAG; if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) || @@ -2409,13 +2411,13 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port) reg |= PORT_CONTROL_FRAME_MODE_DSA; } - if (port == dsa_upstream_port(ds)) + if (dp->port == dsa_upstream_port(ds)) reg |= PORT_CONTROL_FORWARD_UNKNOWN | PORT_CONTROL_FORWARD_UNKNOWN_MC; } if (reg) { - ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), - PORT_CONTROL, reg); + ret = _mv88e6xxx_reg_write(ds, REG_PORT(dp->port), PORT_CONTROL, + reg); if (ret) goto abort; } @@ -2424,7 +2426,7 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port) * powered down. */ if (mv88e6xxx_6352_family(ds)) { - ret = _mv88e6xxx_reg_read(ds, REG_PORT(port), PORT_STATUS); + ret = _mv88e6xxx_reg_read(ds, REG_PORT(dp->port), PORT_STATUS); if (ret < 0) goto abort; ret &= PORT_STATUS_CMODE_MASK; @@ -2460,14 +2462,14 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port) /* enable forwarding of unknown multicast addresses to * the upstream port */ - if (port == dsa_upstream_port(ds)) + if (dp->port == dsa_upstream_port(ds)) reg |= PORT_CONTROL_2_FORWARD_UNKNOWN; } reg |= PORT_CONTROL_2_8021Q_DISABLED; if (reg) { - ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), + ret = _mv88e6xxx_reg_write(ds, REG_PORT(dp->port), PORT_CONTROL_2, reg); if (ret) goto abort; @@
[RFC 07/20] net: dsa: list ports in switch
List DSA port structures in their switch structure, so that drivers can iterate on them to retrieve information such as their ports membership. Signed-off-by: Vivien Didelot--- include/net/dsa.h | 9 + net/dsa/dsa.c | 4 2 files changed, 13 insertions(+) diff --git a/include/net/dsa.h b/include/net/dsa.h index 69e467c..5f2e7df 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -32,6 +32,11 @@ enum dsa_tag_protocol { #define DSA_MAX_SWITCHES 4 #define DSA_MAX_PORTS 12 +#define dsa_switch_for_each_port(_ds, _dp, _num_ports) \ + for (_dp = list_first_entry(&_ds->dp, typeof(*_dp), list); \ +&_dp->list != (&_ds->dp) && _dp->port < _num_ports;\ +_dp = list_next_entry(_dp, list)) + struct dsa_chip_data { /* * How to access the switch configuration registers. @@ -123,6 +128,8 @@ struct dsa_switch_tree { }; struct dsa_port { + struct list_headlist; + struct dsa_switch *ds; int port; @@ -173,6 +180,8 @@ struct dsa_switch { u32 phys_mii_mask; struct mii_bus *slave_mii_bus; struct net_device *ports[DSA_MAX_PORTS]; + + struct list_headdp; }; static inline bool dsa_is_cpu_port(struct dsa_switch *ds, int p) diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 222494c..3daffb6 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -225,6 +225,8 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) int index = ds->index; int i, ret; + INIT_LIST_HEAD(>dp); + /* * Validate supplied switch configuration. */ @@ -238,6 +240,8 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) dp[i]->ds = ds; dp[i]->port = i; + list_add_tail([i]->list, >dp); + name = pd->port_names[i]; if (name == NULL) continue; -- 2.8.0
[RFC 11/20] net: dsa: mv88e6xxx: use bridge from dsa_port
Change the _mv88e6xxx_port_based_vlan_map function for a _mv88e6xxx_port_map_vlantable which takes a dsa_port structure as parameter. This allows us to iterate on dsa_port's bridge device pointer and thus get rid of the private bridge_dev structure. Signed-off-by: Vivien Didelot--- drivers/net/dsa/mv88e6xxx.c | 48 ++--- drivers/net/dsa/mv88e6xxx.h | 1 - 2 files changed, 24 insertions(+), 25 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c index 0687894..89d0206 100644 --- a/drivers/net/dsa/mv88e6xxx.c +++ b/drivers/net/dsa/mv88e6xxx.c @@ -,27 +,29 @@ static int _mv88e6xxx_port_state(struct dsa_switch *ds, int port, u8 state) return ret; } -static int _mv88e6xxx_port_based_vlan_map(struct dsa_switch *ds, int port) +static int _mv88e6xxx_port_map_vlantable(struct dsa_switch *ds, +struct dsa_port *dp) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - struct net_device *bridge = ps->ports[port].bridge_dev; const u16 mask = (1 << ps->info->num_ports) - 1; u16 output_ports = 0; + int port = dp->port; + struct dsa_port *intp; int reg; - int i; /* allow CPU port or DSA link(s) to send frames to every port */ if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) { output_ports = mask; } else { - for (i = 0; i < ps->info->num_ports; ++i) { + dsa_switch_for_each_port(ds, intp, ps->info->num_ports) { /* allow sending frames to every group member */ - if (bridge && ps->ports[i].bridge_dev == bridge) - output_ports |= BIT(i); + if (intp->br && intp->br == dp->br) + output_ports |= BIT(intp->port); /* allow sending frames to CPU port and DSA link(s) */ - if (dsa_is_cpu_port(ds, i) || dsa_is_dsa_port(ds, i)) - output_ports |= BIT(i); + if (dsa_is_cpu_port(ds, intp->port) || + dsa_is_dsa_port(ds, intp->port)) + output_ports |= BIT(intp->port); } } @@ -2207,16 +2209,15 @@ int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, struct dsa_port *dp, struct net_device *bridge) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - int i, err; + struct dsa_port *intp; + int err; mutex_lock(>smi_mutex); - /* Assign the bridge and remap each port's VLANTable */ - ps->ports[dp->port].bridge_dev = bridge; - - for (i = 0; i < ps->info->num_ports; ++i) { - if (ps->ports[i].bridge_dev == bridge) { - err = _mv88e6xxx_port_based_vlan_map(ds, i); + /* Remap each port's VLANTable */ + dsa_switch_for_each_port(ds, intp, ps->info->num_ports) { + if (intp->br == bridge) { + err = _mv88e6xxx_port_map_vlantable(ds, intp); if (err) break; } @@ -2231,17 +2232,16 @@ void mv88e6xxx_port_bridge_leave(struct dsa_switch *ds, struct dsa_port *dp, struct net_device *bridge) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - int i; + struct dsa_port *intp; mutex_lock(>smi_mutex); - /* Unassign the bridge and remap each port's VLANTable */ - ps->ports[dp->port].bridge_dev = NULL; - - for (i = 0; i < ps->info->num_ports; ++i) - if (i == dp->port || ps->ports[i].bridge_dev == bridge) - if (_mv88e6xxx_port_based_vlan_map(ds, i)) - netdev_warn(ds->ports[i], "failed to remap\n"); + /* Remap each port's VLANTable */ + dsa_switch_for_each_port(ds, intp, ps->info->num_ports) + if (intp == dp || intp->br == bridge) + if (_mv88e6xxx_port_map_vlantable(ds, intp)) + netdev_warn(ds->ports[intp->port], + "failed to remap\n"); mutex_unlock(>smi_mutex); } @@ -2573,7 +2573,7 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, struct dsa_port *dp) if (ret) goto abort; - ret = _mv88e6xxx_port_based_vlan_map(ds, dp->port); + ret = _mv88e6xxx_port_map_vlantable(ds, dp); if (ret) goto abort; diff --git a/drivers/net/dsa/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx.h index c49a514..56e3347 100644 --- a/drivers/net/dsa/mv88e6xxx.h +++ b/drivers/net/dsa/mv88e6xxx.h @@ -378,7 +378,6 @@ struct mv88e6xxx_vtu_stu_entry { }; struct mv88e6xxx_priv_port { - struct net_device
[RFC 08/20] net: dsa: bcm_sf2: use bridge device from dsa_port
Now that the DSA layer exposes the DSA port structures to drivers, use that to retrieve the port bridge membership and thus get rid of the private bridge_dev pointer. Signed-off-by: Vivien Didelot--- drivers/net/dsa/bcm_sf2.c | 30 ++ drivers/net/dsa/bcm_sf2.h | 2 -- 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index f7b53fa..6e3b844 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -495,25 +495,24 @@ static int bcm_sf2_sw_br_join(struct dsa_switch *ds, struct dsa_port *dp, struct net_device *bridge) { struct bcm_sf2_priv *priv = ds_to_priv(ds); - unsigned int i; + struct dsa_port *intp; u32 reg, p_ctl; - priv->port_sts[dp->port].bridge_dev = bridge; p_ctl = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(dp->port)); - for (i = 0; i < priv->hw_params.num_ports; i++) { - if (priv->port_sts[i].bridge_dev != bridge) + dsa_switch_for_each_port(ds, intp, priv->hw_params.num_ports) { + if (intp->br != bridge) continue; /* Add this local port to the remote port VLAN control * membership and update the remote port bitmask */ - reg = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(i)); + reg = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(intp->port)); reg |= 1 << dp->port; - core_writel(priv, reg, CORE_PORT_VLAN_CTL_PORT(i)); - priv->port_sts[i].vlan_ctl_mask = reg; + core_writel(priv, reg, CORE_PORT_VLAN_CTL_PORT(intp->port)); + priv->port_sts[intp->port].vlan_ctl_mask = reg; - p_ctl |= 1 << i; + p_ctl |= 1 << intp->port; } /* Configure the local port VLAN control membership to include @@ -529,29 +528,28 @@ static void bcm_sf2_sw_br_leave(struct dsa_switch *ds, struct dsa_port *dp, struct net_device *bridge) { struct bcm_sf2_priv *priv = ds_to_priv(ds); - unsigned int i; + struct dsa_port *intp; u32 reg, p_ctl; p_ctl = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(dp->port)); - for (i = 0; i < priv->hw_params.num_ports; i++) { + dsa_switch_for_each_port(ds, intp, priv->hw_params.num_ports) { /* Don't touch the remaining ports */ - if (priv->port_sts[i].bridge_dev != bridge) + if (intp->br != bridge) continue; - reg = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(i)); + reg = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(intp->port)); reg &= ~(1 << dp->port); - core_writel(priv, reg, CORE_PORT_VLAN_CTL_PORT(i)); + core_writel(priv, reg, CORE_PORT_VLAN_CTL_PORT(intp->port)); priv->port_sts[dp->port].vlan_ctl_mask = reg; /* Prevent self removal to preserve isolation */ - if (dp->port != i) - p_ctl &= ~(1 << i); + if (dp != intp) + p_ctl &= ~(1 << intp->port); } core_writel(priv, p_ctl, CORE_PORT_VLAN_CTL_PORT(dp->port)); priv->port_sts[dp->port].vlan_ctl_mask = p_ctl; - priv->port_sts[dp->port].bridge_dev = NULL; } static void bcm_sf2_sw_br_set_stp_state(struct dsa_switch *ds, int port, diff --git a/drivers/net/dsa/bcm_sf2.h b/drivers/net/dsa/bcm_sf2.h index 200b1f5..6bba1c9 100644 --- a/drivers/net/dsa/bcm_sf2.h +++ b/drivers/net/dsa/bcm_sf2.h @@ -50,8 +50,6 @@ struct bcm_sf2_port_status { struct ethtool_eee eee; u32 vlan_ctl_mask; - - struct net_device *bridge_dev; }; struct bcm_sf2_arl_entry { -- 2.8.0
[RFC 06/20] net: dsa: move bridge device in dsa_port
Move the pointer to the bridge device in the DSA port structure instead of cluttering the dsa_slave_priv structure. This can later be used by drivers to help them configuring their bridge group ports membership. Signed-off-by: Vivien Didelot--- include/net/dsa.h | 2 ++ net/dsa/dsa_priv.h | 1 - net/dsa/slave.c| 16 +--- 3 files changed, 7 insertions(+), 12 deletions(-) diff --git a/include/net/dsa.h b/include/net/dsa.h index 08a9536..69e467c 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -125,6 +125,8 @@ struct dsa_switch_tree { struct dsa_port { struct dsa_switch *ds; int port; + + struct net_device *br; }; struct dsa_switch { diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index c7d5df0..c5afddd 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -35,7 +35,6 @@ struct dsa_slave_priv { int old_pause; int old_duplex; - struct net_device *bridge_dev; #ifdef CONFIG_NET_POLL_CONTROLLER struct netpoll *netpoll; #endif diff --git a/net/dsa/slave.c b/net/dsa/slave.c index d6b6019..b90caf8 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -64,18 +64,12 @@ static int dsa_slave_get_iflink(const struct net_device *dev) return p->dp->ds->dst->master_netdev->ifindex; } -static inline bool dsa_port_is_bridged(struct dsa_slave_priv *p) -{ - return !!p->bridge_dev; -} - static int dsa_slave_open(struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->dp->ds; struct net_device *master = ds->dst->master_netdev; - u8 stp_state = dsa_port_is_bridged(p) ? - BR_STATE_BLOCKING : BR_STATE_FORWARDING; + u8 stp_state = p->dp->br ? BR_STATE_BLOCKING : BR_STATE_FORWARDING; int err; if (!(master->flags & IFF_UP)) @@ -438,13 +432,13 @@ static int dsa_slave_bridge_port_join(struct net_device *dev, struct dsa_switch *ds = p->dp->ds; int ret = -EOPNOTSUPP; - p->bridge_dev = br; + p->dp->br = br; if (ds->drv->port_bridge_join) ret = ds->drv->port_bridge_join(ds, p->dp, br); if (ret && ret != -EOPNOTSUPP) { - p->bridge_dev = NULL; + p->dp->br = NULL; return ret; } @@ -455,9 +449,9 @@ static void dsa_slave_bridge_port_leave(struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->dp->ds; - struct net_device *br = p->bridge_dev; + struct net_device *br = p->dp->br; - p->bridge_dev = NULL; + p->dp->br = NULL; if (ds->drv->port_bridge_leave) ds->drv->port_bridge_leave(ds, p->dp, br); -- 2.8.0
[RFC 09/20] net: dsa: mv88e6xxx: check HW vlan with dsa_port
Change the mv88e6xxx_port_check_hw_vlan function for a mv88e6xxx_port_check_vtu which takes a dsa_port structure as parameter. This will help us get rid of the bridge_dev pointer. Signed-off-by: Vivien Didelot--- drivers/net/dsa/mv88e6xxx.c | 25 - 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c index 7e03f4c..00a0b92 100644 --- a/drivers/net/dsa/mv88e6xxx.c +++ b/drivers/net/dsa/mv88e6xxx.c @@ -1729,12 +1729,13 @@ static int _mv88e6xxx_vtu_get(struct dsa_switch *ds, u16 vid, return err; } -static int mv88e6xxx_port_check_hw_vlan(struct dsa_switch *ds, int port, - u16 vid_begin, u16 vid_end) +static int mv88e6xxx_port_check_vtu(struct dsa_switch *ds, struct dsa_port *dp, + u16 vid_begin, u16 vid_end) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); struct mv88e6xxx_vtu_stu_entry vlan; - int i, err; + struct dsa_port *intp; + int err; if (!vid_begin) return -EOPNOTSUPP; @@ -1756,22 +1757,21 @@ static int mv88e6xxx_port_check_hw_vlan(struct dsa_switch *ds, int port, if (vlan.vid > vid_end) break; - for (i = 0; i < ps->info->num_ports; ++i) { - if (dsa_is_dsa_port(ds, i) || dsa_is_cpu_port(ds, i)) + dsa_switch_for_each_port(ds, intp, ps->info->num_ports) { + if (dsa_is_dsa_port(ds, intp->port) || + dsa_is_cpu_port(ds, intp->port)) continue; - if (vlan.data[i] == + if (vlan.data[intp->port] == GLOBAL_VTU_DATA_MEMBER_TAG_NON_MEMBER) continue; - if (ps->ports[i].bridge_dev == - ps->ports[port].bridge_dev) + if (intp->br == dp->br) break; /* same bridge, check next VLAN */ - netdev_warn(ds->ports[port], + netdev_warn(ds->ports[dp->port], "hardware VLAN %d already used by %s\n", - vlan.vid, - netdev_name(ps->ports[i].bridge_dev)); + vlan.vid, netdev_name(intp->br)); err = -EOPNOTSUPP; goto unlock; } @@ -1836,8 +1836,7 @@ int mv88e6xxx_port_vlan_prepare(struct dsa_switch *ds, struct dsa_port *dp, /* If the requested port doesn't belong to the same bridge as the VLAN * members, do not support it (yet) and fallback to software VLAN. */ - err = mv88e6xxx_port_check_hw_vlan(ds, dp->port, vlan->vid_begin, - vlan->vid_end); + err = mv88e6xxx_port_check_vtu(ds, dp, vlan->vid_begin, vlan->vid_end); if (err) return err; -- 2.8.0
[RFC 20/20] net: dsa: mv88e6xxx: setup PVT on cross-chip ops
Switches with a Cross-chip Port VLAN Table are currently configured to allow cross-chip frames to egress any internal ports. This means that unbridged cross-chip ports can actually talk to each other, and this is not what we want. In order to restrict that, we need to setup the PVT entry for an external port when it joins or leave a bridge group crossing the switch. Also initialize the PVT to forbid egressing of cross-chip frames to internal user ports by default. Note that a PVT-less switch cannot forbid such frames to egress its internal ports, unless the kernel supports VLAN filtering. In such systems, a bridge group is also implemented as a 802.1Q VLAN and thus a global VTU-based logic can be used to correctly implement cross-chip hardware bridging. Warn the user if the setup doesn't respect this. Signed-off-by: Vivien Didelot--- drivers/net/dsa/mv88e6xxx.c | 98 +++-- 1 file changed, 95 insertions(+), 3 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c index 4341ffd..e0f9e93 100644 --- a/drivers/net/dsa/mv88e6xxx.c +++ b/drivers/net/dsa/mv88e6xxx.c @@ -2272,8 +2272,29 @@ static int _mv88e6xxx_pvt_cmd(struct dsa_switch *ds, int src_dev, int src_port, return _mv88e6xxx_pvt_wait(ds); } +static int _mv88e6xxx_pvt_write(struct dsa_switch *ds, int src_dev, + int src_port, u16 data) +{ + int err; + + err = _mv88e6xxx_pvt_wait(ds); + if (err) + return err; + + err = _mv88e6xxx_reg_write(ds, REG_GLOBAL2, GLOBAL2_PVT_DATA, data); + if (err) + return err; + +return _mv88e6xxx_pvt_cmd(ds, src_dev, src_port, + GLOBAL2_PVT_ADDR_OP_WRITE_PVLAN); +} + static int _mv88e6xxx_pvt_init(struct dsa_switch *ds) { + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + struct dsa_port *intp; + int src_dev, src_port; + u16 pv = 0; int err; /* Clear 5 Bit Port for usage with Marvell Link Street devices: @@ -2284,8 +2305,60 @@ static int _mv88e6xxx_pvt_init(struct dsa_switch *ds) if (err) return err; - /* Allow any cross-chip frames to egress any internal ports */ - return _mv88e6xxx_pvt_cmd(ds, 0, 0, GLOBAL2_PVT_ADDR_OP_INIT_ONES); + /* Forbid cross-chip frames to egress internal ports */ + dsa_switch_for_each_port(ds, intp, ps->info->num_ports) + if (dsa_is_cpu_port(ds, intp->port) || + dsa_is_dsa_port(ds, intp->port)) + pv |= BIT(intp->port); + + for (src_dev = 0; src_dev < 32; ++src_dev) { + for (src_port = 0; src_port < 16; ++src_port) { + err = _mv88e6xxx_pvt_write(ds, src_dev, src_port, pv); + if (err) + return err; + } + } + + return 0; +} + +static int _mv88e6xxx_port_map_pvt(struct dsa_switch *ds, struct dsa_port *dp) +{ + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + struct dsa_port *intp; + u16 pvlan = 0; + + /* Cross-chip frames can egress CPU and DSA ports, and bridge members */ + dsa_switch_for_each_port(ds, intp, ps->info->num_ports) + if (dsa_is_cpu_port(ds, intp->port) || + dsa_is_dsa_port(ds, intp->port) || + (intp->br && intp->br == dp->br)) + pvlan |= BIT(intp->port); + + return _mv88e6xxx_pvt_write(ds, dp->ds->index, dp->port, pvlan); +} + +static int _mv88e6xxx_remap_pvt(struct dsa_switch *ds, + struct net_device *bridge) +{ + struct dsa_switch *dsa_sw; + struct dsa_port *dsa_p; + int err; + + dsa_tree_for_each_switch(ds->dst, dsa_sw) { + if (dsa_sw == ds) + continue; + + dsa_switch_for_each_port(dsa_sw, dsa_p, DSA_MAX_PORTS) { + if (dsa_p->br == bridge) { + err = _mv88e6xxx_port_map_pvt(ds, dsa_p); + if (err) + return err; + } + } + } + + return 0; } int mv88e6xxx_port_bridge_change(struct dsa_switch *ds, struct dsa_port *dp, @@ -2297,7 +2370,19 @@ int mv88e6xxx_port_bridge_change(struct dsa_switch *ds, struct dsa_port *dp, mutex_lock(>smi_mutex); if (dsa_port_is_external(dp, ds)) { - err = -EOPNOTSUPP; + /* Forbidding hardware bridging of cross-chip frames requires a +* Cross-chip Port VLAN Table (PVT), unless VLAN filtering is +* enabled, in which case a global VTU-based logic works. +*/ + if (mv88e6xxx_has(ps, MV88E6XXX_FLAG_PVT)) { + err = _mv88e6xxx_port_map_pvt(ds, dp); +
[RFC 19/20] net: dsa: mv88e6xxx: conditionally init PVT
The current code initialize the Cross-chip Port VLAN Table to all ones, even tough the switch model doesn't have one. It also assumes that the switch is configured to support up to 32-switch/16-port cross-chip devices. Implement the access to the PVT and initialize it only if the switch has such feature. Support only 88E6352 for the moment. This commit brings no functional change for devices with a PVT. Signed-off-by: Vivien Didelot--- drivers/net/dsa/mv88e6352.c | 1 + drivers/net/dsa/mv88e6xxx.c | 54 +++-- drivers/net/dsa/mv88e6xxx.h | 6 + 3 files changed, 54 insertions(+), 7 deletions(-) diff --git a/drivers/net/dsa/mv88e6352.c b/drivers/net/dsa/mv88e6352.c index 4afc24d..29d9fd76 100644 --- a/drivers/net/dsa/mv88e6352.c +++ b/drivers/net/dsa/mv88e6352.c @@ -59,6 +59,7 @@ static const struct mv88e6xxx_info mv88e6352_table[] = { .name = "Marvell 88E6352", .num_databases = 4096, .num_ports = 7, + .flags = BIT(MV88E6XXX_FLAG_PVT), } }; diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c index 25852ee..4341ffd 100644 --- a/drivers/net/dsa/mv88e6xxx.c +++ b/drivers/net/dsa/mv88e6xxx.c @@ -2247,6 +2247,47 @@ unlock: return err; } +static int _mv88e6xxx_pvt_wait(struct dsa_switch *ds) +{ + return _mv88e6xxx_wait(ds, REG_GLOBAL2, GLOBAL2_PVT_ADDR, + GLOBAL2_PVT_ADDR_BUSY); +} + +static int _mv88e6xxx_pvt_cmd(struct dsa_switch *ds, int src_dev, int src_port, + u16 op) +{ + u16 reg = op; + int err; + + /* 9-bit Cross-chip PVT pointer: with GLOBAL2_MISC_5_BIT_PORT cleared, +* source device is 5-bit, source port is 4-bit. +*/ + reg |= (src_dev & 0x1f) << 4; + reg |= (src_port & 0xf); + + err = _mv88e6xxx_reg_write(ds, REG_GLOBAL2, GLOBAL2_PVT_ADDR, reg); + if (err) + return err; + + return _mv88e6xxx_pvt_wait(ds); +} + +static int _mv88e6xxx_pvt_init(struct dsa_switch *ds) +{ + int err; + + /* Clear 5 Bit Port for usage with Marvell Link Street devices: +* use 4 bits for the Src_Port/Src_Trunk and 5 bits for the Src_Dev. +*/ + err = _mv88e6xxx_reg_write(ds, REG_GLOBAL2, GLOBAL2_MISC, + 0 & ~GLOBAL2_MISC_5_BIT_PORT); + if (err) + return err; + + /* Allow any cross-chip frames to egress any internal ports */ + return _mv88e6xxx_pvt_cmd(ds, 0, 0, GLOBAL2_PVT_ADDR_OP_INIT_ONES); +} + int mv88e6xxx_port_bridge_change(struct dsa_switch *ds, struct dsa_port *dp, struct net_device *bridge) { @@ -2770,13 +2811,12 @@ int mv88e6xxx_setup_global(struct dsa_switch *ds) if (err) goto unlock; - /* Initialise cross-chip port VLAN table to reset -* defaults. -*/ - err = _mv88e6xxx_reg_write(ds, REG_GLOBAL2, - GLOBAL2_PVT_ADDR, 0x9000); - if (err) - goto unlock; + /* Initialize Cross-chip Port VLAN Table (PVT) */ + if (mv88e6xxx_has(ps, MV88E6XXX_FLAG_PVT)) { + err = _mv88e6xxx_pvt_init(ds); + if (err) + goto unlock; + } /* Clear the priority override table. */ for (i = 0; i < 16; i++) { diff --git a/drivers/net/dsa/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx.h index 325caf8..fbde8b4 100644 --- a/drivers/net/dsa/mv88e6xxx.h +++ b/drivers/net/dsa/mv88e6xxx.h @@ -298,6 +298,10 @@ #define GLOBAL2_INGRESS_OP 0x09 #define GLOBAL2_INGRESS_DATA 0x0a #define GLOBAL2_PVT_ADDR 0x0b +#define GLOBAL2_PVT_ADDR_BUSY BIT(15) +#define GLOBAL2_PVT_ADDR_OP_INIT_ONES ((0x01 << 12) | GLOBAL2_PVT_ADDR_BUSY) +#define GLOBAL2_PVT_ADDR_OP_WRITE_PVLAN((0x03 << 12) | GLOBAL2_PVT_ADDR_BUSY) +#define GLOBAL2_PVT_ADDR_OP_READ ((0x04 << 12) | GLOBAL2_PVT_ADDR_BUSY) #define GLOBAL2_PVT_DATA 0x0c #define GLOBAL2_SWITCH_MAC 0x0d #define GLOBAL2_SWITCH_MAC_BUSY BIT(15) @@ -335,10 +339,12 @@ #define GLOBAL2_WDOG_CONTROL 0x1b #define GLOBAL2_QOS_WEIGHT 0x1c #define GLOBAL2_MISC 0x1d +#define GLOBAL2_MISC_5_BIT_PORTBIT(14) #define MV88E6XXX_N_FID4096 enum mv88e6xxx_flag { + MV88E6XXX_FLAG_PVT, MV88E6XXX_NUM_FLAGS, }; -- 2.8.0
[RFC 18/20] net: dsa: mv88e6xxx: add flags to info
Add a flags bitmap to the mv88e6xxx_info structure to help describing features supported or not by a switch model. Signed-off-by: Vivien Didelot--- drivers/net/dsa/mv88e6xxx.h | 11 +++ 1 file changed, 11 insertions(+) diff --git a/drivers/net/dsa/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx.h index 56e3347..325caf8 100644 --- a/drivers/net/dsa/mv88e6xxx.h +++ b/drivers/net/dsa/mv88e6xxx.h @@ -338,6 +338,10 @@ #define MV88E6XXX_N_FID4096 +enum mv88e6xxx_flag { + MV88E6XXX_NUM_FLAGS, +}; + enum mv88e6xxx_family { MV88E6XXX_FAMILY_NONE, MV88E6XXX_FAMILY_6065, /* 6031 6035 6061 6065 */ @@ -356,6 +360,7 @@ struct mv88e6xxx_info { const char *name; unsigned int num_databases; unsigned int num_ports; + unsigned long flags; }; struct mv88e6xxx_atu_entry { @@ -445,6 +450,12 @@ struct mv88e6xxx_hw_stat { enum stat_type type; }; +static inline bool mv88e6xxx_has(struct mv88e6xxx_priv_state *ps, +enum mv88e6xxx_flag flag) +{ + return !!(ps->info->flags & BIT(flag)); +} + int mv88e6xxx_switch_reset(struct dsa_switch *ds, bool ppu_active); const char *mv88e6xxx_drv_probe(struct device *dsa_dev, struct device *host_dev, int sw_addr, void **priv, -- 2.8.0
[RFC 17/20] net: dsa: mv88e6xxx: factorize port bridge change
Implement a mv88e6xxx_port_bridge_change function to factorize the configuration needed when a port joins or leaves a bridge group. This will simplify the implementation of cross-chip bridging. Signed-off-by: Vivien Didelot--- drivers/net/dsa/mv88e6xxx.c | 67 +++-- 1 file changed, 40 insertions(+), 27 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c index 8004d00..25852ee 100644 --- a/drivers/net/dsa/mv88e6xxx.c +++ b/drivers/net/dsa/mv88e6xxx.c @@ -1150,6 +1150,24 @@ static int _mv88e6xxx_port_map_vlantable(struct dsa_switch *ds, return _mv88e6xxx_reg_write(ds, REG_PORT(port), PORT_BASE_VLAN, reg); } +static int _mv88e6xxx_remap_vlantable(struct dsa_switch *ds, + struct net_device *bridge) +{ + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + struct dsa_port *intp; + int err; + + dsa_switch_for_each_port(ds, intp, ps->info->num_ports) { + if (intp->br == bridge) { + err = _mv88e6xxx_port_map_vlantable(ds, intp); + if (err) + return err; + } + } + + return 0; +} + void mv88e6xxx_port_stp_state_set(struct dsa_switch *ds, int port, u8 state) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); @@ -2229,51 +2247,46 @@ unlock: return err; } -int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, struct dsa_port *dp, - struct net_device *bridge) +int mv88e6xxx_port_bridge_change(struct dsa_switch *ds, struct dsa_port *dp, +struct net_device *bridge) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - struct dsa_port *intp; int err; - if (dsa_port_is_external(dp, ds)) - return -EOPNOTSUPP; - mutex_lock(>smi_mutex); - /* Remap each port's VLANTable */ - dsa_switch_for_each_port(ds, intp, ps->info->num_ports) { - if (intp->br == bridge) { - err = _mv88e6xxx_port_map_vlantable(ds, intp); + if (dsa_port_is_external(dp, ds)) { + err = -EOPNOTSUPP; + } else { + /* Remap VLANTable of concerned in-chip ports */ + if (!dp->br) { + err = _mv88e6xxx_port_map_vlantable(ds, dp); if (err) - break; + goto unlock; } + + err = _mv88e6xxx_remap_vlantable(ds, bridge); + if (err) + goto unlock; } +unlock: mutex_unlock(>smi_mutex); return err; } +int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, struct dsa_port *dp, + struct net_device *bridge) +{ + return mv88e6xxx_port_bridge_change(ds, dp, bridge); +} + void mv88e6xxx_port_bridge_leave(struct dsa_switch *ds, struct dsa_port *dp, struct net_device *bridge) { - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - struct dsa_port *intp; - - if (dsa_port_is_external(dp, ds)) - return; - - mutex_lock(>smi_mutex); - - /* Remap each port's VLANTable */ - dsa_switch_for_each_port(ds, intp, ps->info->num_ports) - if (intp == dp || intp->br == bridge) - if (_mv88e6xxx_port_map_vlantable(ds, intp)) - netdev_warn(ds->ports[intp->port], - "failed to remap\n"); - - mutex_unlock(>smi_mutex); + if (mv88e6xxx_port_bridge_change(ds, dp, bridge)) + netdev_err(ds->ports[dp->port], "failed to unbridge\n"); } static void mv88e6xxx_bridge_work(struct work_struct *work) -- 2.8.0
[RFC 13/20] net: dsa: list switches in tree
List the registered dsa_switch structures in a "ds" member of the dsa_switch_tree structure. This allows the drivers to easily iterate on the DSA switch structures of their related DSA tree. Signed-off-by: Vivien Didelot--- include/net/dsa.h | 9 + net/dsa/dsa.c | 3 +++ 2 files changed, 12 insertions(+) diff --git a/include/net/dsa.h b/include/net/dsa.h index 389227d..85fac8a 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -32,11 +32,16 @@ enum dsa_tag_protocol { #define DSA_MAX_SWITCHES 4 #define DSA_MAX_PORTS 12 + +#define dsa_tree_for_each_switch(_dst, _ds)\ + list_for_each_entry(_ds, &_dst->ds, list) + #define dsa_switch_for_each_port(_ds, _dp, _num_ports) \ for (_dp = list_first_entry(&_ds->dp, typeof(*_dp), list); \ &_dp->list != (&_ds->dp) && _dp->port < _num_ports;\ _dp = list_next_entry(_dp, list)) + struct dsa_chip_data { /* * How to access the switch configuration registers. @@ -125,6 +130,8 @@ struct dsa_switch_tree { * Data for the individual switch chips. */ struct dsa_switch *switches[DSA_MAX_SWITCHES]; + + struct list_headds; }; struct dsa_port { @@ -137,6 +144,8 @@ struct dsa_port { }; struct dsa_switch { + struct list_headlist; + /* * Parent switch tree, and switch index. */ diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index aa4a61a..b0055c7 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -842,6 +842,8 @@ static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev, int i; unsigned configured = 0; + INIT_LIST_HEAD(>ds); + dst->pd = pd; dst->master_netdev = dev; dst->cpu_switch = -1; @@ -858,6 +860,7 @@ static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev, } dst->switches[i] = ds; + list_add_tail(>list, >ds); ++configured; } -- 2.8.0
[RFC 14/20] net: dsa: add tree-wide bridge ops
In order to support cross-chip operations, we need to inform each switch driver when a port operation occurs in a DSA tree. This allows drivers to configure cross-chip port-based VLAN table, VTU or FDB entries on DSA links, in order to implement a correct hardware switching of frames. Add a new tree.c file to implement tree-wide operations, propagating a port-based operation on each switch of a tree. Implement tree-wide bridge operations. Signed-off-by: Vivien Didelot--- drivers/net/dsa/bcm_sf2.c | 6 + drivers/net/dsa/mv88e6xxx.c | 6 + include/net/dsa.h | 6 + net/dsa/Makefile| 2 +- net/dsa/dsa_priv.h | 6 + net/dsa/slave.c | 46 --- net/dsa/tree.c | 66 + 7 files changed, 96 insertions(+), 42 deletions(-) create mode 100644 net/dsa/tree.c diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 6e3b844..0a91ea9 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -498,6 +498,9 @@ static int bcm_sf2_sw_br_join(struct dsa_switch *ds, struct dsa_port *dp, struct dsa_port *intp; u32 reg, p_ctl; + if (dsa_port_is_external(dp, ds)) + return -EOPNOTSUPP; + p_ctl = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(dp->port)); dsa_switch_for_each_port(ds, intp, priv->hw_params.num_ports) { @@ -531,6 +534,9 @@ static void bcm_sf2_sw_br_leave(struct dsa_switch *ds, struct dsa_port *dp, struct dsa_port *intp; u32 reg, p_ctl; + if (dsa_port_is_external(dp, ds)) + return; + p_ctl = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(dp->port)); dsa_switch_for_each_port(ds, intp, priv->hw_params.num_ports) { diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c index 89d0206..6fef29b 100644 --- a/drivers/net/dsa/mv88e6xxx.c +++ b/drivers/net/dsa/mv88e6xxx.c @@ -2212,6 +2212,9 @@ int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, struct dsa_port *dp, struct dsa_port *intp; int err; + if (dsa_port_is_external(dp, ds)) + return -EOPNOTSUPP; + mutex_lock(>smi_mutex); /* Remap each port's VLANTable */ @@ -2234,6 +2237,9 @@ void mv88e6xxx_port_bridge_leave(struct dsa_switch *ds, struct dsa_port *dp, struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); struct dsa_port *intp; + if (dsa_port_is_external(dp, ds)) + return; + mutex_lock(>smi_mutex); /* Remap each port's VLANTable */ diff --git a/include/net/dsa.h b/include/net/dsa.h index 85fac8a..33172c9 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -193,6 +193,12 @@ struct dsa_switch { struct list_headdp; }; +static inline bool dsa_port_is_external(struct dsa_port *dp, + struct dsa_switch *ds) +{ + return dp->ds != ds; +} + static inline bool dsa_is_cpu_port(struct dsa_switch *ds, int p) { return !!(ds->index == ds->dst->cpu_switch && p == ds->dst->cpu_port); diff --git a/net/dsa/Makefile b/net/dsa/Makefile index da06ed1..bf8d12c 100644 --- a/net/dsa/Makefile +++ b/net/dsa/Makefile @@ -1,6 +1,6 @@ # the core obj-$(CONFIG_NET_DSA) += dsa_core.o -dsa_core-y += dsa.o slave.o +dsa_core-y += dsa.o tree.o slave.o # tagging formats dsa_core-$(CONFIG_NET_DSA_TAG_BRCM) += tag_brcm.o diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index c5afddd..6e08b3d 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -46,6 +46,12 @@ struct dsa_slave_priv { /* dsa.c */ extern char dsa_driver_version[]; +/* tree.c */ +int dsa_tree_bridge_port_join(struct dsa_switch_tree *dst, struct dsa_port *dp, + struct net_device *br); +void dsa_tree_bridge_port_leave(struct dsa_switch_tree *dst, + struct dsa_port *dp, struct net_device *br); + /* slave.c */ extern const struct dsa_device_ops notag_netdev_ops; void dsa_slave_mii_bus_init(struct dsa_switch *ds); diff --git a/net/dsa/slave.c b/net/dsa/slave.c index b90caf8..7123ae2 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -425,45 +425,6 @@ static int dsa_slave_port_obj_dump(struct net_device *dev, return err; } -static int dsa_slave_bridge_port_join(struct net_device *dev, - struct net_device *br) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; - int ret = -EOPNOTSUPP; - - p->dp->br = br; - - if (ds->drv->port_bridge_join) - ret = ds->drv->port_bridge_join(ds, p->dp, br); - - if (ret && ret != -EOPNOTSUPP) { - p->dp->br = NULL; - return ret; - } - - return 0; -} - -static void dsa_slave_bridge_port_leave(struct net_device *dev) -{ - struct dsa_slave_priv *p
[RFC 16/20] net: dsa: add tree-wide VLAN ops
In order to support cross-chip operations, we need to inform each switch driver when a port operation occurs in a DSA tree. Implement tree-wide VLAN operations. Signed-off-by: Vivien Didelot--- drivers/net/dsa/mv88e6xxx.c | 12 + net/dsa/dsa_priv.h | 8 ++ net/dsa/slave.c | 59 ++-- net/dsa/tree.c | 60 + 4 files changed, 87 insertions(+), 52 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c index 7d29de3..8004d00 100644 --- a/drivers/net/dsa/mv88e6xxx.c +++ b/drivers/net/dsa/mv88e6xxx.c @@ -1378,6 +1378,9 @@ int mv88e6xxx_port_vlan_dump(struct dsa_switch *ds, struct dsa_port *dp, u16 pvid; int err; + if (dsa_port_is_external(dp, ds)) + return -EOPNOTSUPP; + mutex_lock(>smi_mutex); err = _mv88e6xxx_port_pvid_get(ds, dp->port, ); @@ -1835,6 +1838,9 @@ int mv88e6xxx_port_vlan_prepare(struct dsa_switch *ds, struct dsa_port *dp, { int err; + if (dsa_port_is_external(dp, ds)) + return -EOPNOTSUPP; + /* If the requested port doesn't belong to the same bridge as the VLAN * members, do not support it (yet) and fallback to software VLAN. */ @@ -1874,6 +1880,9 @@ void mv88e6xxx_port_vlan_add(struct dsa_switch *ds, struct dsa_port *dp, bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID; u16 vid; + if (dsa_port_is_external(dp, ds)) + return; + mutex_lock(>smi_mutex); for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) @@ -1930,6 +1939,9 @@ int mv88e6xxx_port_vlan_del(struct dsa_switch *ds, struct dsa_port *dp, u16 pvid, vid; int err = 0; + if (dsa_port_is_external(dp, ds)) + return -EOPNOTSUPP; + mutex_lock(>smi_mutex); err = _mv88e6xxx_port_pvid_get(ds, dp->port, ); diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index e8765c3..d743d6a 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -60,6 +60,14 @@ int dsa_tree_port_fdb_del(struct dsa_switch_tree *dst, struct dsa_port *dp, int dsa_tree_port_fdb_dump(struct dsa_switch_tree *dst, struct dsa_port *dp, struct switchdev_obj_port_fdb *fdb, switchdev_obj_dump_cb_t *cb); +int dsa_tree_port_vlan_add(struct dsa_switch_tree *dst, struct dsa_port *dp, + const struct switchdev_obj_port_vlan *vlan, + struct switchdev_trans *trans); +int dsa_tree_port_vlan_del(struct dsa_switch_tree *dst, struct dsa_port *dp, + const struct switchdev_obj_port_vlan *vlan); +int dsa_tree_port_vlan_dump(struct dsa_switch_tree *dst, struct dsa_port *dp, + struct switchdev_obj_port_vlan *vlan, + switchdev_obj_dump_cb_t *cb); /* slave.c */ extern const struct dsa_device_ops notag_netdev_ops; diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 90bcf8a..19469dc 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -195,50 +195,6 @@ out: return 0; } -static int dsa_slave_port_vlan_add(struct net_device *dev, - const struct switchdev_obj_port_vlan *vlan, - struct switchdev_trans *trans) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; - - if (switchdev_trans_ph_prepare(trans)) { - if (!ds->drv->port_vlan_prepare || !ds->drv->port_vlan_add) - return -EOPNOTSUPP; - - return ds->drv->port_vlan_prepare(ds, p->dp, vlan, trans); - } - - ds->drv->port_vlan_add(ds, p->dp, vlan, trans); - - return 0; -} - -static int dsa_slave_port_vlan_del(struct net_device *dev, - const struct switchdev_obj_port_vlan *vlan) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; - - if (!ds->drv->port_vlan_del) - return -EOPNOTSUPP; - - return ds->drv->port_vlan_del(ds, p->dp, vlan); -} - -static int dsa_slave_port_vlan_dump(struct net_device *dev, - struct switchdev_obj_port_vlan *vlan, - switchdev_obj_dump_cb_t *cb) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; - - if (ds->drv->port_vlan_dump) - return ds->drv->port_vlan_dump(ds, p->dp, vlan, cb); - - return -EOPNOTSUPP; -} - static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { struct dsa_slave_priv *p = netdev_priv(dev); @@ -323,9 +279,9 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
[RFC 04/20] net: dsa: pass dsa_port down to drivers FDB ops
Now that DSA as proper structure for DSA ports, pass it down to the port_fdb_{prepare,add,del,dump} driver functions. Signed-off-by: Vivien Didelot--- drivers/net/dsa/bcm_sf2.c | 20 +++- drivers/net/dsa/mv88e6xxx.c | 22 +++--- drivers/net/dsa/mv88e6xxx.h | 8 include/net/dsa.h | 8 net/dsa/slave.c | 8 5 files changed, 34 insertions(+), 32 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 2d7b297..f7b53fa 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -725,7 +725,7 @@ static int bcm_sf2_arl_op(struct bcm_sf2_priv *priv, int op, int port, return bcm_sf2_arl_read(priv, mac, vid, , , is_valid); } -static int bcm_sf2_sw_fdb_prepare(struct dsa_switch *ds, int port, +static int bcm_sf2_sw_fdb_prepare(struct dsa_switch *ds, struct dsa_port *dp, const struct switchdev_obj_port_fdb *fdb, struct switchdev_trans *trans) { @@ -733,22 +733,22 @@ static int bcm_sf2_sw_fdb_prepare(struct dsa_switch *ds, int port, return 0; } -static void bcm_sf2_sw_fdb_add(struct dsa_switch *ds, int port, +static void bcm_sf2_sw_fdb_add(struct dsa_switch *ds, struct dsa_port *dp, const struct switchdev_obj_port_fdb *fdb, struct switchdev_trans *trans) { struct bcm_sf2_priv *priv = ds_to_priv(ds); - if (bcm_sf2_arl_op(priv, 0, port, fdb->addr, fdb->vid, true)) + if (bcm_sf2_arl_op(priv, 0, dp->port, fdb->addr, fdb->vid, true)) pr_err("%s: failed to add MAC address\n", __func__); } -static int bcm_sf2_sw_fdb_del(struct dsa_switch *ds, int port, +static int bcm_sf2_sw_fdb_del(struct dsa_switch *ds, struct dsa_port *dp, const struct switchdev_obj_port_fdb *fdb) { struct bcm_sf2_priv *priv = ds_to_priv(ds); - return bcm_sf2_arl_op(priv, 0, port, fdb->addr, fdb->vid, false); + return bcm_sf2_arl_op(priv, 0, dp->port, fdb->addr, fdb->vid, false); } static int bcm_sf2_arl_search_wait(struct bcm_sf2_priv *priv) @@ -799,16 +799,18 @@ static int bcm_sf2_sw_fdb_copy(struct net_device *dev, int port, return cb(>obj); } -static int bcm_sf2_sw_fdb_dump(struct dsa_switch *ds, int port, +static int bcm_sf2_sw_fdb_dump(struct dsa_switch *ds, struct dsa_port *dp, struct switchdev_obj_port_fdb *fdb, int (*cb)(struct switchdev_obj *obj)) { struct bcm_sf2_priv *priv = ds_to_priv(ds); - struct net_device *dev = ds->ports[port]; + struct net_device *dev; struct bcm_sf2_arl_entry results[2]; unsigned int count = 0; int ret; + dev = ds->ports[dp->port]; + /* Start search operation */ core_writel(priv, ARLA_SRCH_STDN, CORE_ARLA_SRCH_CTL); @@ -819,12 +821,12 @@ static int bcm_sf2_sw_fdb_dump(struct dsa_switch *ds, int port, /* Read both entries, then return their values back */ bcm_sf2_arl_search_rd(priv, 0, [0]); - ret = bcm_sf2_sw_fdb_copy(dev, port, [0], fdb, cb); + ret = bcm_sf2_sw_fdb_copy(dev, dp->port, [0], fdb, cb); if (ret) return ret; bcm_sf2_arl_search_rd(priv, 1, [1]); - ret = bcm_sf2_sw_fdb_copy(dev, port, [1], fdb, cb); + ret = bcm_sf2_sw_fdb_copy(dev, dp->port, [1], fdb, cb); if (ret) return ret; diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c index 3f78c73..c1ff763 100644 --- a/drivers/net/dsa/mv88e6xxx.c +++ b/drivers/net/dsa/mv88e6xxx.c @@ -2031,7 +2031,7 @@ static int _mv88e6xxx_port_fdb_load(struct dsa_switch *ds, int port, return _mv88e6xxx_atu_load(ds, ); } -int mv88e6xxx_port_fdb_prepare(struct dsa_switch *ds, int port, +int mv88e6xxx_port_fdb_prepare(struct dsa_switch *ds, struct dsa_port *dp, const struct switchdev_obj_port_fdb *fdb, struct switchdev_trans *trans) { @@ -2041,7 +2041,7 @@ int mv88e6xxx_port_fdb_prepare(struct dsa_switch *ds, int port, return 0; } -void mv88e6xxx_port_fdb_add(struct dsa_switch *ds, int port, +void mv88e6xxx_port_fdb_add(struct dsa_switch *ds, struct dsa_port *dp, const struct switchdev_obj_port_fdb *fdb, struct switchdev_trans *trans) { @@ -2051,19 +2051,19 @@ void mv88e6xxx_port_fdb_add(struct dsa_switch *ds, int port, struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); mutex_lock(>smi_mutex); - if (_mv88e6xxx_port_fdb_load(ds, port, fdb->addr, fdb->vid, state)) - netdev_err(ds->ports[port], "failed to load MAC address\n"); + if
[RFC 15/20] net: dsa: add tree-wide FDB ops
In order to support cross-chip operations, we need to inform each switch driver when a port operation occurs in a DSA tree. Implement tree-wide FDB operations. Signed-off-by: Vivien Didelot--- drivers/net/dsa/bcm_sf2.c | 12 drivers/net/dsa/mv88e6xxx.c | 12 net/dsa/dsa_priv.h | 9 ++ net/dsa/slave.c | 68 ++--- net/dsa/tree.c | 61 5 files changed, 109 insertions(+), 53 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 0a91ea9..6e634e5 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -733,6 +733,9 @@ static int bcm_sf2_sw_fdb_prepare(struct dsa_switch *ds, struct dsa_port *dp, const struct switchdev_obj_port_fdb *fdb, struct switchdev_trans *trans) { + if (dsa_port_is_external(dp, ds)) + return -EOPNOTSUPP; + /* We do not need to do anything specific here yet */ return 0; } @@ -743,6 +746,9 @@ static void bcm_sf2_sw_fdb_add(struct dsa_switch *ds, struct dsa_port *dp, { struct bcm_sf2_priv *priv = ds_to_priv(ds); + if (dsa_port_is_external(dp, ds)) + return; + if (bcm_sf2_arl_op(priv, 0, dp->port, fdb->addr, fdb->vid, true)) pr_err("%s: failed to add MAC address\n", __func__); } @@ -752,6 +758,9 @@ static int bcm_sf2_sw_fdb_del(struct dsa_switch *ds, struct dsa_port *dp, { struct bcm_sf2_priv *priv = ds_to_priv(ds); + if (dsa_port_is_external(dp, ds)) + return -EOPNOTSUPP; + return bcm_sf2_arl_op(priv, 0, dp->port, fdb->addr, fdb->vid, false); } @@ -813,6 +822,9 @@ static int bcm_sf2_sw_fdb_dump(struct dsa_switch *ds, struct dsa_port *dp, unsigned int count = 0; int ret; + if (dsa_port_is_external(dp, ds)) + return -EOPNOTSUPP; + dev = ds->ports[dp->port]; /* Start search operation */ diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c index 6fef29b..7d29de3 100644 --- a/drivers/net/dsa/mv88e6xxx.c +++ b/drivers/net/dsa/mv88e6xxx.c @@ -2037,6 +2037,9 @@ int mv88e6xxx_port_fdb_prepare(struct dsa_switch *ds, struct dsa_port *dp, const struct switchdev_obj_port_fdb *fdb, struct switchdev_trans *trans) { + if (dsa_port_is_external(dp, ds)) + return -EOPNOTSUPP; + /* We don't need any dynamic resource from the kernel (yet), * so skip the prepare phase. */ @@ -2052,6 +2055,9 @@ void mv88e6xxx_port_fdb_add(struct dsa_switch *ds, struct dsa_port *dp, GLOBAL_ATU_DATA_STATE_UC_STATIC; struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + if (dsa_port_is_external(dp, ds)) + return; + mutex_lock(>smi_mutex); if (_mv88e6xxx_port_fdb_load(ds, dp->port, fdb->addr, fdb->vid, state)) netdev_err(ds->ports[dp->port], "failed to load MAC address\n"); @@ -2064,6 +2070,9 @@ int mv88e6xxx_port_fdb_del(struct dsa_switch *ds, struct dsa_port *dp, struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); int ret; + if (dsa_port_is_external(dp, ds)) + return -EOPNOTSUPP; + mutex_lock(>smi_mutex); ret = _mv88e6xxx_port_fdb_load(ds, dp->port, fdb->addr, fdb->vid, GLOBAL_ATU_DATA_STATE_UNUSED); @@ -2169,6 +2178,9 @@ int mv88e6xxx_port_fdb_dump(struct dsa_switch *ds, struct dsa_port *dp, u16 fid; int err; + if (dsa_port_is_external(dp, ds)) + return -EOPNOTSUPP; + mutex_lock(>smi_mutex); /* Dump port's default Filtering Information Database (VLAN ID 0) */ diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 6e08b3d..e8765c3 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -14,6 +14,7 @@ #include #include #include +#include struct dsa_device_ops { struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev); @@ -51,6 +52,14 @@ int dsa_tree_bridge_port_join(struct dsa_switch_tree *dst, struct dsa_port *dp, struct net_device *br); void dsa_tree_bridge_port_leave(struct dsa_switch_tree *dst, struct dsa_port *dp, struct net_device *br); +int dsa_tree_port_fdb_add(struct dsa_switch_tree *dst, struct dsa_port *dp, + const struct switchdev_obj_port_fdb *fdb, + struct switchdev_trans *trans); +int dsa_tree_port_fdb_del(struct dsa_switch_tree *dst, struct dsa_port *dp, + const struct switchdev_obj_port_fdb *fdb); +int dsa_tree_port_fdb_dump(struct dsa_switch_tree *dst, struct dsa_port *dp, + struct
Re: [PATCH 1/5] phylib: don't return NULL from get_phy_device()
On Wednesday 27 April 2016 14:47:29 Florian Fainelli wrote: > diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c > index 499003ee8055..94a27b028dd8 100644 > --- a/drivers/net/phy/mdio_bus.c > +++ b/drivers/net/phy/mdio_bus.c > @@ -333,7 +333,7 @@ int __mdiobus_register(struct mii_bus *bus, struct > module *owner) > struct phy_device *phydev; > > phydev = mdiobus_scan(bus, i); > - if (IS_ERR(phydev)) { > + if (IS_ERR(phydev) && PTR_ERR(phydev) != -ENODEV) { > err = PTR_ERR(phydev); > goto error; > } > > I think that is an improvement over the original code, and better than reverting the series. Out of the three callers of mdiobus_scan, I already commented on drivers/net/ethernet/marvell/pxa168_eth.c being wrong to start with, and drivers/net/ethernet/cadence/macb.c seems to require the same fix that you did here for mdio_bus.c Arnd
Re: [PATCH 1/5] phylib: don't return NULL from get_phy_device()
On Wednesday 27 April 2016 23:09:37 Sergei Shtylyov wrote: > Hello. > > On 04/27/2016 10:49 PM, Andrew Lunn wrote: > > >> Sergei Shtylyovwrites: > >> > >>> Arnd Bergmann asked that get_phy_device() returns either NULL or the error > >>> value, not both on error. Do as he said, return ERR_PTR(-ENODEV) instead > >>> of NULL when the PHY ID registers read as all ones. > >>> > >>> Suggested-by: Arnd Bergmann > >>> Signed-off-by: Sergei Shtylyov > >>> > >>> --- > >>> drivers/net/phy/phy_device.c |2 +- > >>> 1 file changed, 1 insertion(+), 1 deletion(-) > >>> > >>> Index: net-next/drivers/net/phy/phy_device.c > >>> === > >>> --- net-next.orig/drivers/net/phy/phy_device.c > >>> +++ net-next/drivers/net/phy/phy_device.c > >>> @@ -529,7 +529,7 @@ struct phy_device *get_phy_device(struct > >>> > >>> /* If the phy_id is mostly Fs, there is no device there */ > >>> if ((phy_id & 0x1fff) == 0x1fff) > >>> - return NULL; > >>> + return ERR_PTR(-ENODEV); > >>> > >>> return phy_device_create(bus, addr, phy_id, is_c45, _ids); > >>> } > > > > This change is wrong, it needs reverting, or the call sights need > > fixing to expect ENODEV. > > So this function had a good reason to return NULL, as it turned out... :-( > > > The point is, the device not being there is not an error, with respect > > to the code calling this function. > > > > It gets called by mdiobus_scan() > > > > struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr) > > { > > struct phy_device *phydev; > > int err; > > > > phydev = get_phy_device(bus, addr, false); > > if (IS_ERR(phydev) || phydev == NULL) > > return phydev; > > > > So before, we return NULL, if the device was not there. Now we return > > ERR_PTR(-ENODEV). > > > > This is being called by: > > > > int __mdiobus_register(struct mii_bus *bus, struct module *owner) > > { > > struct mdio_device *mdiodev; > > ... > > for (i = 0; i < PHY_MAX_ADDR; i++) { > > if ((bus->phy_mask & (1 << i)) == 0) { > > struct phy_device *phydev; > > > > phydev = mdiobus_scan(bus, i); > > if (IS_ERR(phydev)) { > > err = PTR_ERR(phydev); > > goto error; > > } > > } > > } > > > > This is treating ERR_PTR(-ENODEV) as a fatal error, where as before > > IS_ERR(NULL) would be false and it would continue scanning other > > addresses on the bus. > > Thank you for the detailed analysis! (And shame on me for the lack of it.) > > > Please revert this, or fix all the callsites such that ENODEV is not a > > fatal error. > > OK, I'll do what DaveM decides. I found one other user that remains broken: pxa168_init_phy() looks wrong before and after the patch: pep->phy = mdiobus_scan(pep->smi_bus, pep->phy_addr); if (!pep->phy) return -ENODEV; err = phy_connect_direct(dev, pep->phy, pxa168_eth_adjust_link, pep->phy_intf); as phy_connect_direct() will go on and dereference an error pointer. This should check for IS_ERR(), and with the patches applied, we can drop the !pep->phy check. Arnd
[PATCH next v2] ipvlan: Fix failure path in dev registration during link creation
From: Mahesh BandewarWhen newlink creation fails at device-registration, the port->count is decremented twice. Francesco Ruggeri (frugg...@arista.com) found this issue in Macvlan and the same exists in IPvlan driver too. While fixing this issue I noticed another issue of missing unregister in case of failure, so adding it to the fix which is similar to the macvlan fix by Francesco in commit 308379607548 ("macvlan: fix failure during registration v3") Reported-by: Francesco Ruggeri Signed-off-by: Mahesh Bandewar CC: Eric Dumazet CC: Eric W. Biederman --- drivers/net/ipvlan/ipvlan_main.c | 19 --- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 57941d3f4227..1c4d395fbd49 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -113,6 +113,7 @@ static int ipvlan_init(struct net_device *dev) { struct ipvl_dev *ipvlan = netdev_priv(dev); const struct net_device *phy_dev = ipvlan->phy_dev; + struct ipvl_port *port = ipvlan->port; dev->state = (dev->state & ~IPVLAN_STATE_MASK) | (phy_dev->state & IPVLAN_STATE_MASK); @@ -128,6 +129,8 @@ static int ipvlan_init(struct net_device *dev) if (!ipvlan->pcpu_stats) return -ENOMEM; + port->count += 1; + return 0; } @@ -481,27 +484,21 @@ static int ipvlan_link_new(struct net *src_net, struct net_device *dev, dev->priv_flags |= IFF_IPVLAN_SLAVE; - port->count += 1; err = register_netdevice(dev); if (err < 0) - goto ipvlan_destroy_port; + return err; err = netdev_upper_dev_link(phy_dev, dev); - if (err) - goto ipvlan_destroy_port; + if (err) { + unregister_netdevice(dev); + return err; + } list_add_tail_rcu(>pnode, >ipvlans); ipvlan_set_port_mode(port, mode); netif_stacked_transfer_operstate(phy_dev, dev); return 0; - -ipvlan_destroy_port: - port->count -= 1; - if (!port->count) - ipvlan_port_destroy(phy_dev); - - return err; } static void ipvlan_link_delete(struct net_device *dev, struct list_head *head) -- 2.8.0.rc3.226.g39d4020
Re: [PATCH 1/5] phylib: don't return NULL from get_phy_device()
On 27/04/16 12:49, Andrew Lunn wrote: > On Wed, Apr 27, 2016 at 03:30:57PM -0400, Vivien Didelot wrote: >> Hi David, All, >> >> Sergei Shtylyovwrites: >> >>> Arnd Bergmann asked that get_phy_device() returns either NULL or the error >>> value, not both on error. Do as he said, return ERR_PTR(-ENODEV) instead >>> of NULL when the PHY ID registers read as all ones. >>> >>> Suggested-by: Arnd Bergmann >>> Signed-off-by: Sergei Shtylyov >>> >>> --- >>> drivers/net/phy/phy_device.c |2 +- >>> 1 file changed, 1 insertion(+), 1 deletion(-) >>> >>> Index: net-next/drivers/net/phy/phy_device.c >>> === >>> --- net-next.orig/drivers/net/phy/phy_device.c >>> +++ net-next/drivers/net/phy/phy_device.c >>> @@ -529,7 +529,7 @@ struct phy_device *get_phy_device(struct >>> >>> /* If the phy_id is mostly Fs, there is no device there */ >>> if ((phy_id & 0x1fff) == 0x1fff) >>> - return NULL; >>> + return ERR_PTR(-ENODEV); >>> >>> return phy_device_create(bus, addr, phy_id, is_c45, _ids); >>> } > > This change is wrong, it needs reverting, or the call sights need > fixing to expect ENODEV. > > The point is, the device not being there is not an error, with respect > to the code calling this function. > > It gets called by mdiobus_scan() > > struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr) > { > struct phy_device *phydev; > int err; > > phydev = get_phy_device(bus, addr, false); > if (IS_ERR(phydev) || phydev == NULL) > return phydev; > > So before, we return NULL, if the device was not there. Now we return > ERR_PTR(-ENODEV). > > This is being called by: > > int __mdiobus_register(struct mii_bus *bus, struct module *owner) > { > struct mdio_device *mdiodev; > ... > for (i = 0; i < PHY_MAX_ADDR; i++) { > if ((bus->phy_mask & (1 << i)) == 0) { > struct phy_device *phydev; > > phydev = mdiobus_scan(bus, i); > if (IS_ERR(phydev)) { > err = PTR_ERR(phydev); > goto error; > } > } > } > > This is treating ERR_PTR(-ENODEV) as a fatal error, where as before > IS_ERR(NULL) would be false and it would continue scanning other > addresses on the bus. > > Please revert this, or fix all the callsites such that ENODEV is not a > fatal error. So the one you pointed out in __mdiobus_register() is definitively needed, though I did get a different issue than Vivien's (-EBUSY vs. -EINVAL). The get_phy_device() in drivers/of/of_mdio.c probably needs something similar too, here is what I locally have for the moment: diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 499003ee8055..94a27b028dd8 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -333,7 +333,7 @@ int __mdiobus_register(struct mii_bus *bus, struct module *owner) struct phy_device *phydev; phydev = mdiobus_scan(bus, i); - if (IS_ERR(phydev)) { + if (IS_ERR(phydev) && PTR_ERR(phydev) != -ENODEV) { err = PTR_ERR(phydev); goto error; } -- Florian
Re: [PATCH net-next] net: dsa: Provide CPU port statistics to master netdev
On 27/04/16 12:03, Andrew Lunn wrote: >> +if (stringset == ETH_SS_STATS && ds->drv->get_strings) { >> +ndata = data + mcount * len; >> +/* This function copies ETH_GSTRINGS_LEN bytes, we will mangle >> + * the output after to prepend our CPU port prefix we >> + * constructed earlier >> + */ >> +ds->drv->get_strings(ds, cpu_port, ndata); >> +count = ds->drv->get_sset_count(ds); >> +for (i = 0; i < count; i++) { >> +memmove(ndata + (i * len + sizeof(pfx)), >> +ndata + i * len, len - sizeof(pfx)); >> +memcpy(ndata + i * len, pfx, sizeof(pfx)); > > Hi Florian > > Did you check what happens if this causes the NULL terminator to be > discarded? Does ethtool handle that? As i said before, it is unclear > if one is required. I just did yes. So ethtool has a do_gstringset() function which NULL-terminates every strings set except the statistics kind (ETH_SS_STATS or ETH_SS_PHY_STATS) but this is not much of a problem because it limits the output to ETH_GSTRING_LEN anyway. After injecting a bit of error in net/dsa/slave.c to have a much bigger prefix making us push the stats names, the stats are correcty truncated by ethtool. So we seem to be good to go with the current code in kernel and user space. -- Florian
[PATCH net-next #2 1/1] pch_gbe: replace private tx ring lock with common netif_tx_lock
pch_gbe_tx_ring.tx_lock is only used in the hard_xmit handler and in the transmit completion reaper called from NAPI context. Compile-tested only. Potential victims Cced. Someone more knowledgeable may check if pch_gbe_tx_queue could have some use for a mmiowb. Signed-off-by: Francois RomieuCc: Darren Hart Cc: Andy Cress Cc: br...@fossetcon.org --- Includes Nikolay's fix. drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h | 2 -- drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c | 10 ++ 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h index 2a55d6d..8d710a3 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h @@ -481,7 +481,6 @@ struct pch_gbe_buffer { /** * struct pch_gbe_tx_ring - tx ring information - * @tx_lock: spinlock structs * @desc: pointer to the descriptor ring memory * @dma: physical address of the descriptor ring * @size: length of descriptor ring in bytes @@ -491,7 +490,6 @@ struct pch_gbe_buffer { * @buffer_info: array of buffer information structs */ struct pch_gbe_tx_ring { - spinlock_t tx_lock; struct pch_gbe_tx_desc *desc; dma_addr_t dma; unsigned int size; diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c index ca4add7..3cd87a4 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c @@ -1640,7 +1640,7 @@ pch_gbe_clean_tx(struct pch_gbe_adapter *adapter, cleaned_count); if (cleaned_count > 0) { /*skip this if nothing cleaned*/ /* Recover from running out of Tx resources in xmit_frame */ - spin_lock(_ring->tx_lock); + netif_tx_lock(adapter->netdev); if (unlikely(cleaned && (netif_queue_stopped(adapter->netdev { netif_wake_queue(adapter->netdev); @@ -1652,7 +1652,7 @@ pch_gbe_clean_tx(struct pch_gbe_adapter *adapter, netdev_dbg(adapter->netdev, "next_to_clean : %d\n", tx_ring->next_to_clean); - spin_unlock(_ring->tx_lock); + netif_tx_unlock(adapter->netdev); } return cleaned; } @@ -1805,7 +1805,6 @@ int pch_gbe_setup_tx_resources(struct pch_gbe_adapter *adapter, tx_ring->next_to_use = 0; tx_ring->next_to_clean = 0; - spin_lock_init(_ring->tx_lock); for (desNo = 0; desNo < tx_ring->count; desNo++) { tx_desc = PCH_GBE_TX_DESC(*tx_ring, desNo); @@ -2135,13 +2134,9 @@ static int pch_gbe_xmit_frame(struct sk_buff *skb, struct net_device *netdev) { struct pch_gbe_adapter *adapter = netdev_priv(netdev); struct pch_gbe_tx_ring *tx_ring = adapter->tx_ring; - unsigned long flags; - - spin_lock_irqsave(_ring->tx_lock, flags); if (unlikely(!PCH_GBE_DESC_UNUSED(tx_ring))) { netif_stop_queue(netdev); - spin_unlock_irqrestore(_ring->tx_lock, flags); netdev_dbg(netdev, "Return : BUSY next_to use : 0x%08x next_to clean : 0x%08x\n", tx_ring->next_to_use, tx_ring->next_to_clean); @@ -2150,7 +2145,6 @@ static int pch_gbe_xmit_frame(struct sk_buff *skb, struct net_device *netdev) /* CRC,ITAG no support */ pch_gbe_tx_queue(adapter, tx_ring, skb); - spin_unlock_irqrestore(_ring->tx_lock, flags); return NETDEV_TX_OK; } -- 2.5.5
Re: [PATCH net-next 1/1] pch_gbe: replace private tx ring lock with common netif_tx_lock
Nikolay Aleksandrov: > On 04/27/2016 12:49 AM, Francois Romieu wrote: [...] > > @@ -1652,7 +1652,7 @@ pch_gbe_clean_tx(struct pch_gbe_adapter *adapter, > > > > netdev_dbg(adapter->netdev, "next_to_clean : %d\n", > >tx_ring->next_to_clean); > > - spin_unlock(_ring->tx_lock); > > + netif_tx_lock(adapter->netdev); > > Shouldn't this be netif_tx_unlock ? It should. Thanks for reviewing. -- Ueimor
Re: [PATCH next] ipvlan: Fix failure path in dev registration during link creation
On Wed, Apr 27, 2016 at 11:57 AM, David Millerwrote: > From: Mahesh Bandewar > Date: Wed, 27 Apr 2016 11:37:39 -0700 > >> While fixing this issue I noticed another issue of missing unregister >> in case of failure, so adding it to the fix which is similar to the >> macvlan fix by Francesco in SHA1:308379607548524b8d86dbf20134681024935e0b > > This is not the correct way to refer to commits. > > You should specify, exactly, 12 digits of the SHA1 value, followed by > a space, followed by the header line text of that commit contained in > parenthesis and double quotes, like how Fixes: tags specify commits. Ok, will fix that soon.
Re: [PATCH 1/5] phylib: don't return NULL from get_phy_device()
From: Sergei ShtylyovDate: Wed, 27 Apr 2016 23:09:37 +0300 > On 04/27/2016 10:49 PM, Andrew Lunn wrote: > >> Please revert this, or fix all the callsites such that ENODEV is not a >> fatal error. > >OK, I'll do what DaveM decides. If you feel confident getting all the ENODEV checks right, please just do that. Thanks.
Re: [PATCH net 0/3] bnxt_en: Bug fixes for net.
From: Michael ChanDate: Mon, 25 Apr 2016 02:30:48 -0400 > Only use MSIX on VF, and fix rx page buffers on architectures with > PAGE_SIZE >= 64K. Series applied, thanks Michael.
[net-next v2 11/14] i40e/i40evf: Only offload VLAN tag if enabled
From: Jesse BrandeburgThe driver was offloading the VLAN tag into the skb any time there was a VLAN tag and the hardware stripping was enabled. Just check to make sure it's enabled before put_tag. Change-Id: Ife95290c06edd9a616393b38679923938b382241 Signed-off-by: Jesse Brandeburg Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 3 ++- drivers/net/ethernet/intel/i40evf/i40e_txrx.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 6e44cf1..285efe9 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -1370,7 +1370,8 @@ static void i40e_receive_skb(struct i40e_ring *rx_ring, { struct i40e_q_vector *q_vector = rx_ring->q_vector; - if (vlan_tag & VLAN_VID_MASK) + if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) && + (vlan_tag & VLAN_VID_MASK)) __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag); napi_gro_receive(_vector->napi, skb); diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index f101895..4633235 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -842,7 +842,8 @@ static void i40e_receive_skb(struct i40e_ring *rx_ring, { struct i40e_q_vector *q_vector = rx_ring->q_vector; - if (vlan_tag & VLAN_VID_MASK) + if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) && + (vlan_tag & VLAN_VID_MASK)) __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag); napi_gro_receive(_vector->napi, skb); -- 2.5.5
[net-next v2 04/14] i40evf: Don't Panic
From: Mitch WilliamsUnder some circumstances the driver remove function may be called before the driver is fully initialized. So we can't assume that we know where our towel is at, or that all of the data structures are initialized. To ensure that we don't panic, check that the vsi_res pointer is valid before dereferencing it. Then drink beer and eat peanuts. Change-ID: If697b4db57348e39f9538793e16aa755e3e1af03 Signed-off-by: Mitch Williams Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40evf/i40evf.h | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h index e657ecc..017c83b 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf.h +++ b/drivers/net/ethernet/intel/i40evf/i40evf.h @@ -256,8 +256,10 @@ struct i40evf_adapter { bool netdev_registered; bool link_up; enum i40e_virtchnl_ops current_op; -#define CLIENT_ENABLED(_a) ((_a)->vf_res->vf_offload_flags & \ - I40E_VIRTCHNL_VF_OFFLOAD_IWARP) +#define CLIENT_ENABLED(_a) ((_a)->vf_res ? \ + (_a)->vf_res->vf_offload_flags & \ + I40E_VIRTCHNL_VF_OFFLOAD_IWARP : \ + 0) #define RSS_AQ(_a) ((_a)->vf_res->vf_offload_flags & \ I40E_VIRTCHNL_VF_OFFLOAD_RSS_AQ) #define VLAN_ALLOWED(_a) ((_a)->vf_res->vf_offload_flags & \ -- 2.5.5
[net-next v2 01/14] i40e/i40evf: Clean up feature flags
From: Alexander DuyckThe feature flags list for i40e and i40evf is beginning to become pretty massive. I plan to add another 4 or so features to these drivers and duplicating the flags for each and every flags list is becoming a bit repetitive. The primary change here is that we now build our features list around hw_encap_features. After that we assign that to vlan_features, hw_features, and finally map that onto features. In addition we end up throwing features onto hw_encap_features that end up having no effect such as the Rx offloads and SCTP_CRC. However that should have no impact and makes things a bit easier for us as hw_encap_features is one of the less updated features maps available. For i40evf I went through and sanity checked a few features as well. Specifically RXCSUM was being set as a read-only feature which didn't make much sense. I have updated things so we can clear the NETIF_F_RXCSUM flag since that is really a software feature and not a hardware one anyway so disabling it is just a matter of ignoring the result from the hardware. Signed-off-by: Alexander Duyck Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 61 ++- drivers/net/ethernet/intel/i40evf/i40evf_main.c | 66 - 2 files changed, 58 insertions(+), 69 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 0b071ce..f2e83fe 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -9111,40 +9111,36 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) np = netdev_priv(netdev); np->vsi = vsi; - netdev->hw_enc_features |= NETIF_F_IP_CSUM | - NETIF_F_IPV6_CSUM | - NETIF_F_TSO | - NETIF_F_TSO6| - NETIF_F_TSO_ECN | - NETIF_F_GSO_GRE | - NETIF_F_GSO_UDP_TUNNEL | - NETIF_F_GSO_UDP_TUNNEL_CSUM | + netdev->hw_enc_features |= NETIF_F_SG | + NETIF_F_IP_CSUM | + NETIF_F_IPV6_CSUM| + NETIF_F_HIGHDMA | + NETIF_F_SOFT_FEATURES| + NETIF_F_TSO | + NETIF_F_TSO_ECN | + NETIF_F_TSO6 | + NETIF_F_GSO_GRE | + NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM | + NETIF_F_SCTP_CRC | + NETIF_F_RXHASH | + NETIF_F_RXCSUM | 0; - netdev->features = NETIF_F_SG | - NETIF_F_IP_CSUM | - NETIF_F_SCTP_CRC| - NETIF_F_HIGHDMA | - NETIF_F_GSO_UDP_TUNNEL | - NETIF_F_GSO_GRE | - NETIF_F_HW_VLAN_CTAG_TX | - NETIF_F_HW_VLAN_CTAG_RX | - NETIF_F_HW_VLAN_CTAG_FILTER | - NETIF_F_IPV6_CSUM | - NETIF_F_TSO | - NETIF_F_TSO_ECN | - NETIF_F_TSO6| - NETIF_F_RXCSUM | - NETIF_F_RXHASH | - 0; + if (!(pf->flags & I40E_FLAG_OUTER_UDP_CSUM_CAPABLE)) + netdev->hw_enc_features ^= NETIF_F_GSO_UDP_TUNNEL_CSUM; + + /* record features VLANs can make use of */ + netdev->vlan_features |= netdev->hw_enc_features; if (!(pf->flags & I40E_FLAG_MFP_ENABLED)) - netdev->features |= NETIF_F_NTUPLE; - if (pf->flags & I40E_FLAG_OUTER_UDP_CSUM_CAPABLE) - netdev->features |= NETIF_F_GSO_UDP_TUNNEL_CSUM; + netdev->hw_features |= NETIF_F_NTUPLE; + + netdev->hw_features |= netdev->hw_enc_features | + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX; - /* copy netdev features into list of user selectable features */ -
[net-next v2 13/14] i40e: Add VF promiscuous mode driver support
From: Anjali Singhai JainAdd infrastructure for Network Function Virtualization VLAN tagged packet steering feature. Change-Id: I9b873d8fcc253858e6baba65ac68ec5b9363944e Signed-off-by: Anjali Singhai Jain Signed-off-by: Greg Rose Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 153 - drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h | 2 + 2 files changed, 149 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index c364588..f47b0e8 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -1427,6 +1427,25 @@ static void i40e_vc_reset_vf_msg(struct i40e_vf *vf) } /** + * i40e_getnum_vf_vsi_vlan_filters + * @vsi: pointer to the vsi + * + * called to get the number of VLANs offloaded on this VF + **/ +static inline int i40e_getnum_vf_vsi_vlan_filters(struct i40e_vsi *vsi) +{ + struct i40e_mac_filter *f; + int num_vlans = 0; + + list_for_each_entry(f, >mac_filter_list, list) { + if (f->vlan >= 0 && f->vlan <= I40E_MAX_VLANID) + num_vlans++; + } + + return num_vlans; +} + +/** * i40e_vc_config_promiscuous_mode_msg * @vf: pointer to the VF info * @msg: pointer to the msg buffer @@ -1442,22 +1461,122 @@ static int i40e_vc_config_promiscuous_mode_msg(struct i40e_vf *vf, (struct i40e_virtchnl_promisc_info *)msg; struct i40e_pf *pf = vf->pf; struct i40e_hw *hw = >hw; - struct i40e_vsi *vsi; + struct i40e_mac_filter *f; + i40e_status aq_ret = 0; bool allmulti = false; - i40e_status aq_ret; + struct i40e_vsi *vsi; + bool alluni = false; + int aq_err = 0; vsi = i40e_find_vsi_from_id(pf, info->vsi_id); if (!test_bit(I40E_VF_STAT_ACTIVE, >vf_states) || !test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, >vf_caps) || - !i40e_vc_isvalid_vsi_id(vf, info->vsi_id) || - (vsi->type != I40E_VSI_FCOE)) { + !i40e_vc_isvalid_vsi_id(vf, info->vsi_id)) { + dev_err(>pdev->dev, + "VF %d doesn't meet requirements to enter promiscuous mode\n", + vf->vf_id); aq_ret = I40E_ERR_PARAM; goto error_param; } + /* Multicast promiscuous handling*/ if (info->flags & I40E_FLAG_VF_MULTICAST_PROMISC) allmulti = true; - aq_ret = i40e_aq_set_vsi_multicast_promiscuous(hw, vsi->seid, - allmulti, NULL); + + if (vf->port_vlan_id) { + aq_ret = i40e_aq_set_vsi_mc_promisc_on_vlan(hw, vsi->seid, + allmulti, + vf->port_vlan_id, + NULL); + } else if (i40e_getnum_vf_vsi_vlan_filters(vsi)) { + list_for_each_entry(f, >mac_filter_list, list) { + if (f->vlan >= 0 && f->vlan <= I40E_MAX_VLANID) + aq_ret = i40e_aq_set_vsi_mc_promisc_on_vlan + (hw, + vsi->seid, + allmulti, + f->vlan, + NULL); + aq_err = pf->hw.aq.asq_last_status; + if (aq_ret) { + dev_err(>pdev->dev, + "Could not add VLAN %d to multicast promiscuous domain err %s aq_err %s\n", + f->vlan, + i40e_stat_str(>hw, aq_ret), + i40e_aq_str(>hw, aq_err)); + break; + } + } + } else { + aq_ret = i40e_aq_set_vsi_multicast_promiscuous(hw, vsi->seid, + allmulti, NULL); + aq_err = pf->hw.aq.asq_last_status; + if (aq_ret) { + dev_err(>pdev->dev, + "VF %d failed to set multicast promiscuous mode err %s aq_err %s\n", + vf->vf_id, + i40e_stat_str(>hw, aq_ret), + i40e_aq_str(>hw, aq_err)); + goto error_param_int; +
[net-next v2 03/14] i40e: Add support for configuring VF RSS
From: Mitch WilliamsAdd support for configuring RSS on behalf of the VFs. This removes the burden of dealing with different hardware interfaces from the VF drivers, allowing for better future compatibility. Change-ID: Icea75d3f37241ee8e447be5779e5abb53ddf04c0 Signed-off-by: Mitch Williams Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e.h | 1 + drivers/net/ethernet/intel/i40e/i40e_main.c| 35 +++- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 193 - 3 files changed, 217 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index d25b3be..e312adf 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -202,6 +202,7 @@ struct i40e_lump_tracking { #define I40E_HKEY_ARRAY_SIZE ((I40E_PFQF_HKEY_MAX_INDEX + 1) * 4) #define I40E_HLUT_ARRAY_SIZE ((I40E_PFQF_HLUT_MAX_INDEX + 1) * 4) +#define I40E_VF_HLUT_ARRAY_SIZE ((I40E_VFQF_HLUT1_MAX_INDEX + 1) * 4) enum i40e_fd_stat_idx { I40E_FD_STAT_ATR, diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index ec94ad6c..39b3b56 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -8082,24 +8082,45 @@ static int i40e_config_rss_reg(struct i40e_vsi *vsi, const u8 *seed, { struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = >hw; + u16 vf_id = vsi->vf_id; u8 i; /* Fill out hash function seed */ if (seed) { u32 *seed_dw = (u32 *)seed; - for (i = 0; i <= I40E_PFQF_HKEY_MAX_INDEX; i++) - i40e_write_rx_ctl(hw, I40E_PFQF_HKEY(i), seed_dw[i]); + if (vsi->type == I40E_VSI_MAIN) { + for (i = 0; i <= I40E_PFQF_HKEY_MAX_INDEX; i++) + i40e_write_rx_ctl(hw, I40E_PFQF_HKEY(i), + seed_dw[i]); + } else if (vsi->type == I40E_VSI_SRIOV) { + for (i = 0; i <= I40E_VFQF_HKEY1_MAX_INDEX; i++) + i40e_write_rx_ctl(hw, + I40E_VFQF_HKEY1(i, vf_id), + seed_dw[i]); + } else { + dev_err(>pdev->dev, "Cannot set RSS seed - invalid VSI type\n"); + } } if (lut) { u32 *lut_dw = (u32 *)lut; - if (lut_size != I40E_HLUT_ARRAY_SIZE) - return -EINVAL; - - for (i = 0; i <= I40E_PFQF_HLUT_MAX_INDEX; i++) - wr32(hw, I40E_PFQF_HLUT(i), lut_dw[i]); + if (vsi->type == I40E_VSI_MAIN) { + if (lut_size != I40E_HLUT_ARRAY_SIZE) + return -EINVAL; + for (i = 0; i <= I40E_PFQF_HLUT_MAX_INDEX; i++) + wr32(hw, I40E_PFQF_HLUT(i), lut_dw[i]); + } else if (vsi->type == I40E_VSI_SRIOV) { + if (lut_size != I40E_VF_HLUT_ARRAY_SIZE) + return -EINVAL; + for (i = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++) + i40e_write_rx_ctl(hw, + I40E_VFQF_HLUT1(i, vf_id), + lut_dw[i]); + } else { + dev_err(>pdev->dev, "Cannot set RSS LUT - invalid VSI type\n"); + } } i40e_flush(hw); diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 30f8cbe..c364588 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -1348,12 +1348,16 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg) set_bit(I40E_VF_STAT_IWARPENA, >vf_states); } - if (pf->flags & I40E_FLAG_RSS_AQ_CAPABLE) { - if (vf->driver_caps & I40E_VIRTCHNL_VF_OFFLOAD_RSS_AQ) - vfres->vf_offload_flags |= - I40E_VIRTCHNL_VF_OFFLOAD_RSS_AQ; + if (vf->driver_caps & I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF) { + vfres->vf_offload_flags |= I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF; } else { - vfres->vf_offload_flags |= I40E_VIRTCHNL_VF_OFFLOAD_RSS_REG; + if ((pf->flags & I40E_FLAG_RSS_AQ_CAPABLE) && + (vf->driver_caps & I40E_VIRTCHNL_VF_OFFLOAD_RSS_AQ)) + vfres->vf_offload_flags |= +
[net-next v2 06/14] i40e: Specify AQ event opcode to wait for
From: Shannon NelsonTo add a little flexibility to the nvmupdate facility, this code adds the ability to specify an AQ event opcode to wait on after the Exec_AQ request. Change-ID: Iddbfd63c3de8df3edb9d3e90678b08989bc4946e Signed-off-by: Shannon Nelson Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_nvm.c| 49 +++ drivers/net/ethernet/intel/i40e/i40e_type.h | 1 + drivers/net/ethernet/intel/i40evf/i40e_type.h | 1 + 3 files changed, 44 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c index f2cea3d..954efe3 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c +++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c @@ -693,10 +693,10 @@ i40e_status i40e_nvmupd_command(struct i40e_hw *hw, /* early check for status command and debug msgs */ upd_cmd = i40e_nvmupd_validate_command(hw, cmd, perrno); - i40e_debug(hw, I40E_DEBUG_NVM, "%s state %d nvm_release_on_hold %d cmd 0x%08x config 0x%08x offset 0x%08x data_size 0x%08x\n", + i40e_debug(hw, I40E_DEBUG_NVM, "%s state %d nvm_release_on_hold %d opc 0x%04x cmd 0x%08x config 0x%08x offset 0x%08x data_size 0x%08x\n", i40e_nvm_update_state_str[upd_cmd], hw->nvmupd_state, - hw->nvm_release_on_done, + hw->nvm_release_on_done, hw->nvm_wait_opcode, cmd->command, cmd->config, cmd->offset, cmd->data_size); if (upd_cmd == I40E_NVMUPD_INVALID) { @@ -710,7 +710,18 @@ i40e_status i40e_nvmupd_command(struct i40e_hw *hw, * going into the state machine */ if (upd_cmd == I40E_NVMUPD_STATUS) { + if (!cmd->data_size) { + *perrno = -EFAULT; + return I40E_ERR_BUF_TOO_SHORT; + } + bytes[0] = hw->nvmupd_state; + + if (cmd->data_size >= 4) { + bytes[1] = 0; + *((u16 *)[2]) = hw->nvm_wait_opcode; + } + return 0; } @@ -729,6 +740,14 @@ i40e_status i40e_nvmupd_command(struct i40e_hw *hw, case I40E_NVMUPD_STATE_INIT_WAIT: case I40E_NVMUPD_STATE_WRITE_WAIT: + /* if we need to stop waiting for an event, clear +* the wait info and return before doing anything else +*/ + if (cmd->offset == 0x) { + i40e_nvmupd_check_wait_event(hw, hw->nvm_wait_opcode); + return 0; + } + status = I40E_ERR_NOT_READY; *perrno = -EBUSY; break; @@ -800,6 +819,7 @@ static i40e_status i40e_nvmupd_state_init(struct i40e_hw *hw, i40e_release_nvm(hw); } else { hw->nvm_release_on_done = true; + hw->nvm_wait_opcode = i40e_aqc_opc_nvm_erase; hw->nvmupd_state = I40E_NVMUPD_STATE_INIT_WAIT; } } @@ -816,6 +836,7 @@ static i40e_status i40e_nvmupd_state_init(struct i40e_hw *hw, i40e_release_nvm(hw); } else { hw->nvm_release_on_done = true; + hw->nvm_wait_opcode = i40e_aqc_opc_nvm_update; hw->nvmupd_state = I40E_NVMUPD_STATE_INIT_WAIT; } } @@ -828,10 +849,12 @@ static i40e_status i40e_nvmupd_state_init(struct i40e_hw *hw, hw->aq.asq_last_status); } else { status = i40e_nvmupd_nvm_write(hw, cmd, bytes, perrno); - if (status) + if (status) { i40e_release_nvm(hw); - else + } else { + hw->nvm_wait_opcode = i40e_aqc_opc_nvm_update; hw->nvmupd_state = I40E_NVMUPD_STATE_WRITE_WAIT; + } } break; @@ -850,6 +873,7 @@ static i40e_status i40e_nvmupd_state_init(struct i40e_hw *hw, i40e_release_nvm(hw); } else { hw->nvm_release_on_done = true; + hw->nvm_wait_opcode = i40e_aqc_opc_nvm_update; hw->nvmupd_state = I40E_NVMUPD_STATE_INIT_WAIT; } } @@ -940,8 +964,10 @@ retry: switch (upd_cmd) { case I40E_NVMUPD_WRITE_CON: status = i40e_nvmupd_nvm_write(hw, cmd, bytes, perrno); -
[net-next v2 08/14] i40e: Add device capability which defines if update is available
From: Michal KosiarzAdd device capability which defines if update is available and security check is needed during update process. Change-ID: I380787c878275e1df18b39198df3ee3666342282 Signed-off-by: Michal Kosiarz Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h | 1 + drivers/net/ethernet/intel/i40e/i40e_common.c | 6 ++ drivers/net/ethernet/intel/i40e/i40e_type.h | 5 + drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h | 1 + drivers/net/ethernet/intel/i40evf/i40e_type.h | 5 + 5 files changed, 18 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h index 8d5c65a..5179b3b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h @@ -429,6 +429,7 @@ struct i40e_aqc_list_capabilities_element_resp { #define I40E_AQ_CAP_ID_SDP 0x0062 #define I40E_AQ_CAP_ID_MDIO0x0063 #define I40E_AQ_CAP_ID_WSR_PROT0x0064 +#define I40E_AQ_CAP_ID_NVM_MGMT0x0080 #define I40E_AQ_CAP_ID_FLEX10 0x00F1 #define I40E_AQ_CAP_ID_CEM 0x00F2 diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index f3c1d88..34e86f5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -3138,6 +3138,12 @@ static void i40e_parse_discover_capabilities(struct i40e_hw *hw, void *buff, p->wr_csr_prot = (u64)number; p->wr_csr_prot |= (u64)logical_id << 32; break; + case I40E_AQ_CAP_ID_NVM_MGMT: + if (number & I40E_NVM_MGMT_SEC_REV_DISABLED) + p->sec_rev_disabled = true; + if (number & I40E_NVM_MGMT_UPDATE_DISABLED) + p->update_disabled = true; + break; default: break; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h index bb57cd9..8aa14aa 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_type.h +++ b/drivers/net/ethernet/intel/i40e/i40e_type.h @@ -275,6 +275,11 @@ struct i40e_hw_capabilities { #define I40E_FLEX10_STATUS_DCC_ERROR 0x1 #define I40E_FLEX10_STATUS_VC_MODE 0x2 + bool sec_rev_disabled; + bool update_disabled; +#define I40E_NVM_MGMT_SEC_REV_DISABLED 0x1 +#define I40E_NVM_MGMT_UPDATE_DISABLED 0x2 + bool mgmt_cem; bool ieee_1588; bool iwarp; diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h index aad8d62..1bcb8cf 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h @@ -426,6 +426,7 @@ struct i40e_aqc_list_capabilities_element_resp { #define I40E_AQ_CAP_ID_SDP 0x0062 #define I40E_AQ_CAP_ID_MDIO0x0063 #define I40E_AQ_CAP_ID_WSR_PROT0x0064 +#define I40E_AQ_CAP_ID_NVM_MGMT0x0080 #define I40E_AQ_CAP_ID_FLEX10 0x00F1 #define I40E_AQ_CAP_ID_CEM 0x00F2 diff --git a/drivers/net/ethernet/intel/i40evf/i40e_type.h b/drivers/net/ethernet/intel/i40evf/i40e_type.h index b720713..bfc97c2 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_type.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_type.h @@ -258,6 +258,11 @@ struct i40e_hw_capabilities { #define I40E_FLEX10_STATUS_DCC_ERROR 0x1 #define I40E_FLEX10_STATUS_VC_MODE 0x2 + bool sec_rev_disabled; + bool update_disabled; +#define I40E_NVM_MGMT_SEC_REV_DISABLED 0x1 +#define I40E_NVM_MGMT_UPDATE_DISABLED 0x2 + bool mgmt_cem; bool ieee_1588; bool iwarp; -- 2.5.5
[net-next v2 12/14] i40e: Add promiscuous on VLAN support
From: Greg RoseNFV use cases require the ability to steer packets to VSIs by VLAN tag alone while being in promiscuous mode for multicast and unicast MAC addresses. These two new functions support that ability. Signed-off-by: Greg Rose Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_common.c| 70 drivers/net/ethernet/intel/i40e/i40e_prototype.h | 8 +++ 2 files changed, 78 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index 25872f2..0e8552b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -2039,6 +2039,76 @@ i40e_status i40e_aq_set_vsi_multicast_promiscuous(struct i40e_hw *hw, } /** + * i40e_aq_set_vsi_mc_promisc_on_vlan + * @hw: pointer to the hw struct + * @seid: vsi number + * @enable: set MAC L2 layer unicast promiscuous enable/disable for a given VLAN + * @vid: The VLAN tag filter - capture any multicast packet with this VLAN tag + * @cmd_details: pointer to command details structure or NULL + **/ +enum i40e_status_code i40e_aq_set_vsi_mc_promisc_on_vlan(struct i40e_hw *hw, +u16 seid, bool enable, +u16 vid, + struct i40e_asq_cmd_details *cmd_details) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_set_vsi_promiscuous_modes *cmd = + (struct i40e_aqc_set_vsi_promiscuous_modes *) + enum i40e_status_code status; + u16 flags = 0; + + i40e_fill_default_direct_cmd_desc(, + i40e_aqc_opc_set_vsi_promiscuous_modes); + + if (enable) + flags |= I40E_AQC_SET_VSI_PROMISC_MULTICAST; + + cmd->promiscuous_flags = cpu_to_le16(flags); + cmd->valid_flags = cpu_to_le16(I40E_AQC_SET_VSI_PROMISC_MULTICAST); + cmd->seid = cpu_to_le16(seid); + cmd->vlan_tag = cpu_to_le16(vid | I40E_AQC_SET_VSI_VLAN_VALID); + + status = i40e_asq_send_command(hw, , NULL, 0, cmd_details); + + return status; +} + +/** + * i40e_aq_set_vsi_uc_promisc_on_vlan + * @hw: pointer to the hw struct + * @seid: vsi number + * @enable: set MAC L2 layer unicast promiscuous enable/disable for a given VLAN + * @vid: The VLAN tag filter - capture any unicast packet with this VLAN tag + * @cmd_details: pointer to command details structure or NULL + **/ +enum i40e_status_code i40e_aq_set_vsi_uc_promisc_on_vlan(struct i40e_hw *hw, +u16 seid, bool enable, +u16 vid, + struct i40e_asq_cmd_details *cmd_details) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_set_vsi_promiscuous_modes *cmd = + (struct i40e_aqc_set_vsi_promiscuous_modes *) + enum i40e_status_code status; + u16 flags = 0; + + i40e_fill_default_direct_cmd_desc(, + i40e_aqc_opc_set_vsi_promiscuous_modes); + + if (enable) + flags |= I40E_AQC_SET_VSI_PROMISC_UNICAST; + + cmd->promiscuous_flags = cpu_to_le16(flags); + cmd->valid_flags = cpu_to_le16(I40E_AQC_SET_VSI_PROMISC_UNICAST); + cmd->seid = cpu_to_le16(seid); + cmd->vlan_tag = cpu_to_le16(vid | I40E_AQC_SET_VSI_VLAN_VALID); + + status = i40e_asq_send_command(hw, , NULL, 0, cmd_details); + + return status; +} + +/** * i40e_aq_set_vsi_broadcast * @hw: pointer to the hw struct * @seid: vsi number diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h index 134035f..8afb237 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h +++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h @@ -133,6 +133,14 @@ i40e_status i40e_aq_set_vsi_unicast_promiscuous(struct i40e_hw *hw, u16 vsi_id, bool set, struct i40e_asq_cmd_details *cmd_details); i40e_status i40e_aq_set_vsi_multicast_promiscuous(struct i40e_hw *hw, u16 vsi_id, bool set, struct i40e_asq_cmd_details *cmd_details); +enum i40e_status_code i40e_aq_set_vsi_mc_promisc_on_vlan(struct i40e_hw *hw, +u16 seid, bool enable, +u16 vid, + struct i40e_asq_cmd_details *cmd_details); +enum i40e_status_code i40e_aq_set_vsi_uc_promisc_on_vlan(struct i40e_hw *hw, +u16 seid, bool enable, +u16 vid, + struct i40e_asq_cmd_details *cmd_details);
[net-next v2 02/14] i40e/i40evf: Add support for IPIP and SIT offloads
From: Alexander DuyckLooking over the documentation it turns out enabling IPIP and SIT offloads for i40e is pretty straightforward. As such I decided to enable them with this patch. In my testing I am seeing an improvement of 8 to 10 Gb/s for IPIP and SIT tunnels with this offload enabled. Signed-off-by: Alexander Duyck Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 ++ drivers/net/ethernet/intel/i40e/i40e_txrx.c | 24 drivers/net/ethernet/intel/i40evf/i40e_txrx.c | 24 drivers/net/ethernet/intel/i40evf/i40evf_main.c | 2 ++ 4 files changed, 36 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index f2e83fe..ec94ad6c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -9120,6 +9120,8 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_GSO_GRE | + NETIF_F_GSO_IPIP | + NETIF_F_GSO_SIT | NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_SCTP_CRC | diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 39efba0..6e44cf1 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -2299,7 +2299,10 @@ static int i40e_tso(struct sk_buff *skb, u8 *hdr_len, u64 *cd_type_cmd_tso_mss) ip.v6->payload_len = 0; } - if (skb_shinfo(skb)->gso_type & (SKB_GSO_UDP_TUNNEL | SKB_GSO_GRE | + if (skb_shinfo(skb)->gso_type & (SKB_GSO_GRE | +SKB_GSO_IPIP | +SKB_GSO_SIT | +SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM)) { if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM) { /* determine offset of outer transport header */ @@ -2442,13 +2445,6 @@ static int i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags, _proto, _off); } - /* compute outer L3 header size */ - tunnel |= ((l4.hdr - ip.hdr) / 4) << - I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT; - - /* switch IP header pointer from outer to inner header */ - ip.hdr = skb_inner_network_header(skb); - /* define outer transport */ switch (l4_proto) { case IPPROTO_UDP: @@ -2459,6 +2455,11 @@ static int i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags, tunnel |= I40E_TXD_CTX_GRE_TUNNELING; *tx_flags |= I40E_TX_FLAGS_UDP_TUNNEL; break; + case IPPROTO_IPIP: + case IPPROTO_IPV6: + *tx_flags |= I40E_TX_FLAGS_UDP_TUNNEL; + l4.hdr = skb_inner_network_header(skb); + break; default: if (*tx_flags & I40E_TX_FLAGS_TSO) return -1; @@ -2467,6 +2468,13 @@ static int i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags, return 0; } + /* compute outer L3 header size */ + tunnel |= ((l4.hdr - ip.hdr) / 4) << + I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT; + + /* switch IP header pointer from outer to inner header */ + ip.hdr = skb_inner_network_header(skb); + /* compute tunnel header size */ tunnel |= ((ip.hdr - l4.hdr) / 2) << I40E_TXD_CTX_QW0_NATLEN_SHIFT; diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index fc22818..f101895 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -1564,7 +1564,10 @@ static int i40e_tso(struct sk_buff *skb, u8 *hdr_len, u64 *cd_type_cmd_tso_mss) ip.v6->payload_len = 0; } - if (skb_shinfo(skb)->gso_type & (SKB_GSO_UDP_TUNNEL | SKB_GSO_GRE | + if (skb_shinfo(skb)->gso_type & (SKB_GSO_GRE | +SKB_GSO_IPIP | +SKB_GSO_SIT | +
[net-next v2 05/14] i40e: Code cleanup in i40e_add_fdir_ethtool
From: Shannon NelsonA little bit of code cleanup in prep for more cloud filter work. Change-ID: I0dc33ce0d4c207944336a07437640fef920c100c Signed-off-by: Shannon Nelson Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 12 +++- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 8a83d45..8e56c43 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -2506,7 +2506,6 @@ static int i40e_add_fdir_ethtool(struct i40e_vsi *vsi, if (!vsi) return -EINVAL; - pf = vsi->back; if (!(pf->flags & I40E_FLAG_FD_SB_ENABLED)) @@ -2564,15 +2563,18 @@ static int i40e_add_fdir_ethtool(struct i40e_vsi *vsi, input->src_ip[0] = fsp->h_u.tcp_ip4_spec.ip4dst; if (ntohl(fsp->m_ext.data[1])) { - if (ntohl(fsp->h_ext.data[1]) >= pf->num_alloc_vfs) { - netif_info(pf, drv, vsi->netdev, "Invalid VF id\n"); + vf_id = ntohl(fsp->h_ext.data[1]); + if (vf_id >= pf->num_alloc_vfs) { + netif_info(pf, drv, vsi->netdev, + "Invalid VF id %d\n", vf_id); goto free_input; } - vf_id = ntohl(fsp->h_ext.data[1]); /* Find vsi id from vf id and override dest vsi */ input->dest_vsi = pf->vf[vf_id].lan_vsi_id; if (input->q_index >= pf->vf[vf_id].num_queue_pairs) { - netif_info(pf, drv, vsi->netdev, "Invalid queue id\n"); + netif_info(pf, drv, vsi->netdev, + "Invalid queue id %d for VF %d\n", + input->q_index, vf_id); goto free_input; } } -- 2.5.5
[net-next v2 14/14] i40evf: Add driver support for promiscuous mode
From: Anjali Singhai JainAdd necessary Linux Ethernet driver support for promiscuous mode operation. Add a flag so the VF knows it is in promiscuous mode and two state flags to discreetly track multicast and unicast promiscuous states. Change-Id: Ib2f2dc7a7582304fec90fc917ebb7ded21ba1de4 Signed-off-by: Anjali Singhai Jain Signed-off-by: Greg Rose Signed-off-by: Jesse Brandeburg Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 14 +++--- drivers/net/ethernet/intel/i40evf/i40evf.h | 3 +++ drivers/net/ethernet/intel/i40evf/i40evf_main.c | 19 +++ drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c | 11 +++ 4 files changed, 40 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index f47b0e8..c226c2d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -1489,13 +1489,13 @@ static int i40e_vc_config_promiscuous_mode_msg(struct i40e_vf *vf, NULL); } else if (i40e_getnum_vf_vsi_vlan_filters(vsi)) { list_for_each_entry(f, >mac_filter_list, list) { - if (f->vlan >= 0 && f->vlan <= I40E_MAX_VLANID) - aq_ret = i40e_aq_set_vsi_mc_promisc_on_vlan - (hw, - vsi->seid, - allmulti, - f->vlan, - NULL); + if (f->vlan < 0 || f->vlan > I40E_MAX_VLANID) + continue; + aq_ret = i40e_aq_set_vsi_mc_promisc_on_vlan(hw, + vsi->seid, + allmulti, + f->vlan, + NULL); aq_err = pf->hw.aq.asq_last_status; if (aq_ret) { dev_err(>pdev->dev, diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h index 63f7aae..25afabf 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf.h +++ b/drivers/net/ethernet/intel/i40evf/i40evf.h @@ -220,6 +220,7 @@ struct i40evf_adapter { #define I40EVF_FLAG_WB_ON_ITR_CAPABLE BIT(11) #define I40EVF_FLAG_OUTER_UDP_CSUM_CAPABLE BIT(12) #define I40EVF_FLAG_ADDR_SET_BY_PF BIT(13) +#define I40EVF_FLAG_PROMISC_ON BIT(15) /* duplicates for common code */ #define I40E_FLAG_FDIR_ATR_ENABLED 0 #define I40E_FLAG_DCB_ENABLED 0 @@ -244,6 +245,8 @@ struct i40evf_adapter { #define I40EVF_FLAG_AQ_SET_HENABIT(12) #define I40EVF_FLAG_AQ_SET_RSS_KEY BIT(13) #define I40EVF_FLAG_AQ_SET_RSS_LUT BIT(14) +#define I40EVF_FLAG_AQ_REQUEST_PROMISC BIT(15) +#define I40EVF_FLAG_AQ_RELEASE_PROMISC BIT(16) /* OS defined structs */ struct net_device *netdev; diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index af53159..d1c4afd 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -943,6 +943,14 @@ static void i40evf_set_rx_mode(struct net_device *netdev) bottom_of_search_loop: continue; } + + if (netdev->flags & IFF_PROMISC && + !(adapter->flags & I40EVF_FLAG_PROMISC_ON)) + adapter->aq_required |= I40EVF_FLAG_AQ_REQUEST_PROMISC; + else if (!(netdev->flags & IFF_PROMISC) && +adapter->flags & I40EVF_FLAG_PROMISC_ON) + adapter->aq_required |= I40EVF_FLAG_AQ_RELEASE_PROMISC; + clear_bit(__I40EVF_IN_CRITICAL_TASK, >crit_section); } @@ -1622,6 +1630,17 @@ static void i40evf_watchdog_task(struct work_struct *work) goto watchdog_done; } + if (adapter->aq_required & I40EVF_FLAG_AQ_REQUEST_PROMISC) { + i40evf_set_promiscuous(adapter, I40E_FLAG_VF_UNICAST_PROMISC | + I40E_FLAG_VF_MULTICAST_PROMISC); + goto watchdog_done; + } + + if (adapter->aq_required & I40EVF_FLAG_AQ_RELEASE_PROMISC) { +
[net-next v2 09/14] i40e: Add DeviceID for X722 QSFP+
From: Kamil KrawczykChange-ID: I1370fbc7774e815ac1ad56561e97488e829592fc Signed-off-by: Kamil Krawczyk Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_common.c | 1 + drivers/net/ethernet/intel/i40e/i40e_devids.h | 1 + drivers/net/ethernet/intel/i40evf/i40e_common.c | 1 + drivers/net/ethernet/intel/i40evf/i40e_devids.h | 1 + 4 files changed, 4 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index 34e86f5..1db4790 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -61,6 +61,7 @@ static i40e_status i40e_set_mac_type(struct i40e_hw *hw) case I40E_DEV_ID_1G_BASE_T_X722: case I40E_DEV_ID_10G_BASE_T_X722: case I40E_DEV_ID_SFP_I_X722: + case I40E_DEV_ID_QSFP_I_X722: hw->mac.type = I40E_MAC_X722; break; default: diff --git a/drivers/net/ethernet/intel/i40e/i40e_devids.h b/drivers/net/ethernet/intel/i40e/i40e_devids.h index dd4457d..d701861 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_devids.h +++ b/drivers/net/ethernet/intel/i40e/i40e_devids.h @@ -45,6 +45,7 @@ #define I40E_DEV_ID_1G_BASE_T_X722 0x37D1 #define I40E_DEV_ID_10G_BASE_T_X7220x37D2 #define I40E_DEV_ID_SFP_I_X722 0x37D3 +#define I40E_DEV_ID_QSFP_I_X7220x37D4 #define i40e_is_40G_device(d) ((d) == I40E_DEV_ID_QSFP_A || \ (d) == I40E_DEV_ID_QSFP_B || \ diff --git a/drivers/net/ethernet/intel/i40evf/i40e_common.c b/drivers/net/ethernet/intel/i40evf/i40e_common.c index 4db0c03..8f64204 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_common.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_common.c @@ -59,6 +59,7 @@ i40e_status i40e_set_mac_type(struct i40e_hw *hw) case I40E_DEV_ID_1G_BASE_T_X722: case I40E_DEV_ID_10G_BASE_T_X722: case I40E_DEV_ID_SFP_I_X722: + case I40E_DEV_ID_QSFP_I_X722: hw->mac.type = I40E_MAC_X722; break; case I40E_DEV_ID_X722_VF: diff --git a/drivers/net/ethernet/intel/i40evf/i40e_devids.h b/drivers/net/ethernet/intel/i40evf/i40e_devids.h index 7023570..d34972b 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_devids.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_devids.h @@ -45,6 +45,7 @@ #define I40E_DEV_ID_1G_BASE_T_X722 0x37D1 #define I40E_DEV_ID_10G_BASE_T_X7220x37D2 #define I40E_DEV_ID_SFP_I_X722 0x37D3 +#define I40E_DEV_ID_QSFP_I_X7220x37D4 #define I40E_DEV_ID_X722_VF0x37CD #define I40E_DEV_ID_X722_VF_HV 0x37D9 -- 2.5.5
[net-next v2 00/14][pull request] 40GbE Intel Wired LAN Driver Updates 2016-04-27
This series contains updates to i40e and i40evf. Alex Duyck cleans up the feature flags since they are becoming pretty "massive", the primary change being that we now build our features list around hw_encap_features. Added support for IPIP and SIT offloads, which should improvement in throughput for IPIP and SIT tunnels with the offload enabled. Mitch adds support for configuring RSS on behalf of the VFs, which removes the burden of dealing with different hardware interfaces from the VF drivers and improves future compatibility. Fix to ensure that we do not panic by checking that the vsi_res pointer is valid before dereferencing it, after which we can drink beer and eat peanuts. Shannon does come housekeeping in i40e_add_fdir_ethtool() in preparation for more cloud filter work. Added flexibility to the nvmupdate facility by adding the ability to specify an AQ event opcode to wait on after Exec_AQ request. Michal adds device capability which defines if an update is available and if a security check is needed during the update process. Kamil just adds a device id to support X722 QSFP+ device. Greg fixes an issue where a mirror rule ID may be zero, so do not return invalid parameter when the user passes in a zero for a rule ID. Adds support to steer packets to VSIs by VLAN tag alone while being in promiscuous mode for multicast and unicast MAC addresses. Jesse fixes the driver from offloading the VLAN tag into the skb any time there was a VLAN tag and the hardware stripping was enabled, to making sure it is enabled before put_tag. v2: Dropped patch 8 ("i40e: Allow user to change input set mask for flow director") while Kiran reworks a more generalized solution based on feedback from David Miller. The following are changes since commit fab7b629a82da1b59620470d13152aff975239f6: Merge branch 'ila-csum-neutral' and are available in the git repository at: git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/next-queue 40GbE Alexander Duyck (2): i40e/i40evf: Clean up feature flags i40e/i40evf: Add support for IPIP and SIT offloads Anjali Singhai Jain (2): i40e: Add VF promiscuous mode driver support i40evf: Add driver support for promiscuous mode Greg Rose (2): i40e: Remove zero check i40e: Add promiscuous on VLAN support Jesse Brandeburg (1): i40e/i40evf: Only offload VLAN tag if enabled Kamil Krawczyk (1): i40e: Add DeviceID for X722 QSFP+ Michal Kosiarz (1): i40e: Add device capability which defines if update is available Mitch Williams (3): i40e: Add support for configuring VF RSS i40evf: Don't Panic i40evf: Allow PF driver to configure RSS Shannon Nelson (2): i40e: Code cleanup in i40e_add_fdir_ethtool i40e: Specify AQ event opcode to wait for drivers/net/ethernet/intel/i40e/i40e.h | 1 + drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h | 1 + drivers/net/ethernet/intel/i40e/i40e_common.c | 82 - drivers/net/ethernet/intel/i40e/i40e_devids.h | 1 + drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 12 +- drivers/net/ethernet/intel/i40e/i40e_main.c| 98 ++--- drivers/net/ethernet/intel/i40e/i40e_nvm.c | 49 ++- drivers/net/ethernet/intel/i40e/i40e_prototype.h | 8 + drivers/net/ethernet/intel/i40e/i40e_txrx.c| 27 +- drivers/net/ethernet/intel/i40e/i40e_type.h| 6 + drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 346 +- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h | 2 + .../net/ethernet/intel/i40evf/i40e_adminq_cmd.h| 1 + drivers/net/ethernet/intel/i40evf/i40e_common.c| 1 + drivers/net/ethernet/intel/i40evf/i40e_devids.h| 1 + drivers/net/ethernet/intel/i40evf/i40e_txrx.c | 27 +- drivers/net/ethernet/intel/i40evf/i40e_type.h | 6 + drivers/net/ethernet/intel/i40evf/i40evf.h | 39 +- drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c | 121 +++ drivers/net/ethernet/intel/i40evf/i40evf_main.c| 395 - .../net/ethernet/intel/i40evf/i40evf_virtchnl.c| 130 +++ 21 files changed, 947 insertions(+), 407 deletions(-) -- 2.5.5