Re: [PATCH 1/3] ipv4: initialize fib_trie prior to register_netdev_notifier call.

2017-07-19 Thread David Miller

Where are patches #2 and #3?


Re: [PATCH net] ipv6: avoid overflow of offset in ip6_find_1stfragopt

2017-07-19 Thread David Miller
From: Sabrina Dubroca 
Date: Wed, 19 Jul 2017 22:28:55 +0200

> In some cases, offset can overflow and can cause an infinite loop in
> ip6_find_1stfragopt(). Make it unsigned int to prevent the overflow, and
> cap it at IPV6_MAXPLEN, since packets larger than that should be invalid.
> 
> This problem has been here since before the beginning of git history.
> 
> Signed-off-by: Sabrina Dubroca 
> Acked-by: Hannes Frederic Sowa 

Applied and queued up for -stable, thanks.


Re: [PATCH] net: tehuti: don't process data if it has not been copied from userspace

2017-07-19 Thread David Miller
From: Colin King 
Date: Wed, 19 Jul 2017 18:46:59 +0100

> From: Colin Ian King 
> 
> The array data is only populated with valid information from userspace
> if cmd != SIOCDEVPRIVATE, other cases the array contains garbage on
> the stack. The subsequent switch statement acts on a subcommand in
> data[0] which could be any garbage value if cmd is SIOCDEVPRIVATE which
> seems incorrect to me.  Instead, just return EOPNOTSUPP for the case
> where cmd == SIOCDEVPRIVATE to avoid this issue.
> 
> As a side note, I suspect that the original intention of the code
> was for this ioctl to work just for cmd == SIOCDEVPRIVATE (and the
> current logic is reversed). However, I don't wont to change the current
> semantics in case any userspace code relies on this existing behaviour.
> 
> Detected by CoverityScan, CID#139647 ("Uninitialized scalar variable")
> 
> Signed-off-by: Colin Ian King 

Yeah this is the safest change for now, applied.

Francois added the register address range checking a year after the
driver was added, so maybe someone used this facility.

It should have been done via ethtool getregs...


[PATCH v2 1/1] iroute2: Add support for GRE ignore-df knob

2017-07-19 Thread Philip Prindeville
From: Philip Prindeville 

In the presence of firewalls which improperly block ICMP Unreachable
(including Fragmentation Required) messages, Path MTU Discovery is
prevented from working.

The workaround is to handle IPv4 payloads opaquely, ignoring the DF bit.

Reviewed-by: Stephen Hemminger 
Signed-off-by: Philip Prindeville 
---
 doc/ip-cref.tex  |  7 +++
 ip/link_gre.c| 16 
 man/man8/ip-tunnel.8 |  9 +
 3 files changed, 32 insertions(+)

diff --git a/doc/ip-cref.tex b/doc/ip-cref.tex
index 
242cc266b7acd34adcf390e6272944e333d6160e..179baa2fb75e7013274f030d6ae47a0c64ff728a
 100644
--- a/doc/ip-cref.tex
+++ b/doc/ip-cref.tex
@@ -2524,6 +2524,13 @@ It must be an address on another interface of this host.
It is enabled by default. Note that a fixed ttl is incompatible
with this option: tunnelling with a fixed ttl always makes pmtu 
discovery.
 
+\item \verb|ignore-df|
+
+--- (only GRE tunnels) enable IPv4 DF flag suppression on this tunnel.
+   If is disabled by default. Enabling this option will cause IPv4
+   payloads to be handled like any other GRE payload,
+   regardless of the DF flag.
+
 \item \verb|key K|, \verb|ikey K|, \verb|okey K|
 
 --- (only GRE tunnels) use keyed GRE with key \verb|K|. \verb|K| is
diff --git a/ip/link_gre.c b/ip/link_gre.c
index 
82df900614bfdb1aa53985e1d36d4b99d4990291..ad3a7ad44c8b610ff6bf6148bb9160afecad968d
 100644
--- a/ip/link_gre.c
+++ b/ip/link_gre.c
@@ -34,6 +34,7 @@ static void print_usage(FILE *f)
"[ ttl TTL ]\n"
"[ tos TOS ]\n"
"[ [no]pmtudisc ]\n"
+   "[ [no]ignore-df ]\n"
"[ dev PHYS_DEV ]\n"
"[ noencap ]\n"
"[ encap { fou | gue | none } ]\n"
@@ -93,6 +94,7 @@ static int gre_parse_opt(struct link_util *lu, int argc, char 
**argv,
__u16 encapsport = 0;
__u16 encapdport = 0;
__u8 metadata = 0;
+   __u8 ignore_df = 0;
__u32 fwmark = 0;
 
if (!(n->nlmsg_flags & NLM_F_CREATE)) {
@@ -164,6 +166,9 @@ get_failed:
if (greinfo[IFLA_GRE_COLLECT_METADATA])
metadata = 1;
 
+   if (greinfo[IFLA_GRE_IGNORE_DF])
+   ignore_df = 
!!rta_getattr_u8(greinfo[IFLA_GRE_IGNORE_DF]);
+
if (greinfo[IFLA_GRE_FWMARK])
fwmark = rta_getattr_u32(greinfo[IFLA_GRE_FWMARK]);
}
@@ -311,6 +316,11 @@ get_failed:
encapflags |= ~TUNNEL_ENCAP_FLAG_REMCSUM;
} else if (strcmp(*argv, "external") == 0) {
metadata = 1;
+   } else if (strcmp(*argv, "ignore-df") == 0) {
+   ignore_df = 1;
+   } else if (strcmp(*argv, "noignore-df") == 0) {
+   // only the lsb is significant, use 2 for presence */
+   ignore_df = 2;
} else if (strcmp(*argv, "fwmark") == 0) {
NEXT_ARG();
if (get_u32(, *argv, 0))
@@ -355,6 +365,9 @@ get_failed:
addattr16(n, 1024, IFLA_GRE_ENCAP_SPORT, htons(encapsport));
addattr16(n, 1024, IFLA_GRE_ENCAP_DPORT, htons(encapdport));
 
+   if (ignore_df)
+   addattr8(n, 1024, IFLA_GRE_IGNORE_DF, ignore_df & 1);
+
return 0;
 }
 
@@ -454,6 +467,9 @@ static void gre_print_opt(struct link_util *lu, FILE *f, 
struct rtattr *tb[])
else
fputs("external ", f);
 
+   if (tb[IFLA_GRE_IGNORE_DF] && rta_getattr_u8(tb[IFLA_GRE_IGNORE_DF]))
+   fputs("ignore-df ", f);
+
if (tb[IFLA_GRE_ENCAP_TYPE] &&
rta_getattr_u16(tb[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE) {
__u16 type = rta_getattr_u16(tb[IFLA_GRE_ENCAP_TYPE]);
diff --git a/man/man8/ip-tunnel.8 b/man/man8/ip-tunnel.8
index 
4938c7405ce850c0774837b8654cfe236344319c..7ddbffb23cff1b01a348f3b664d578ccaa8a52cb
 100644
--- a/man/man8/ip-tunnel.8
+++ b/man/man8/ip-tunnel.8
@@ -49,6 +49,7 @@ ip-tunnel - tunnel configuration
 .BR 6rd-reset " ]"
 .br
 .RB "[ [" no "]" pmtudisc " ]"
+.RB "[ [" no "]" ignore-df " ]"
 .RB "[ " dev
 .IR PHYS_DEV " ]"
 
@@ -176,6 +177,14 @@ with this option: tunneling with a fixed ttl always makes 
pmtu
 discovery.
 
 .TP
+.B ignore-df
+enable IPv4 DF suppression on this tunnel.
+Normally datagrams that exceed the MTU will be fragmented; the presence
+of the DF flag inhibits this, resulting instead in an ICMP Unreachable
+(Fragmentation Required) message.  Enabling this attribute casues the
+DF flag to be ignored.
+
+.TP
 .BI key " K"
 .TP
 .BI ikey " K"
-- 
2.9.4



[PATCH] net: bonding: Fix transmit load balancing in balance-alb mode

2017-07-19 Thread Kosuke Tatsukawa
balance-alb mode used to have transmit dynamic load balancing feature
enabled by default.  However, transmit dynamic load balancing no longer
works in balance-alb after commit 8b426dc54cf4 ("bonding: remove
hardcoded value").

Both balance-tlb and balance-alb use the function bond_do_alb_xmit() to
send packets.  This function uses the parameter tlb_dynamic_lb.
tlb_dynamic_lb used to have the default value of 1 for balance-alb, but
now the value is set to 0 except in balance-tlb.

Re-enable transmit dyanmic load balancing by initializing tlb_dynamic_lb
for balance-alb similar to balance-tlb.

Signed-off-by: Kosuke Tatsukawa 
Cc: sta...@vger.kernel.org
---
 drivers/net/bonding/bond_main.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 14ff622..181839d 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -4596,7 +4596,7 @@ static int bond_check_params(struct bond_params *params)
}
ad_user_port_key = valptr->value;
 
-   if (bond_mode == BOND_MODE_TLB) {
+   if ((bond_mode == BOND_MODE_TLB) || (bond_mode == BOND_MODE_ALB)) {
bond_opt_initstr(, "default");
valptr = bond_opt_parse(bond_opt_get(BOND_OPT_TLB_DYNAMIC_LB),
);



Re: [PATCH] net: dsa: mv88e6xxx: Enable CMODE config support for 6390X

2017-07-19 Thread David Miller
From: Martin Hundebøll 
Date: Wed, 19 Jul 2017 08:17:02 +0200

> Commit f39908d3b1c45 ('net: dsa: mv88e6xxx: Set the CMODE for mv88e6390
> ports 9 & 10') added support for setting the CMODE for the 6390X family,
> but only enabled it for 9290 and 6390 - and left out 6390X.
> 
> Fix support for setting the CMODE on 6390X also by assigning
> mv88e6390x_port_set_cmode() to the .port_set_cmode function pointer in
> mv88e6390x_ops too.
> 
> Signed-off-by: Martin Hundebøll 

Applied and queued up for -stable, thanks.


Re: [PATCH v2 net-next] net: systemport: Support 64bit statistics

2017-07-19 Thread David Miller
From: "Jianming.qiao" 
Date: Wed, 19 Jul 2017 01:18:40 +0100

> Signed-off-by: Jianming.qiao 

Supporting both deprecated .ndo_get_stats and 64-bit .ndo_get_stats64
at the same time makes no sense.

.ndo_get_stats will never be called if .ndo_get_stats64 is non-NULL

The lack of a commit log message, explaining in detail, why you are
doing this and why you are doing it this way, concerns me as well.

This submission so far has been a bit of a mess.  You don't
communicate enough, your commit message is empty, and therefore we
have no idea why you are doing things, and in particular the reasons
for your decisions.

I'm not applying this, sorry.


Re: [PATCH net-next 0/8] netvsc: lockdep and related fixes

2017-07-19 Thread David Miller
From: Stephen Hemminger 
Date: Wed, 19 Jul 2017 11:53:11 -0700

> These fix sparse and lockdep warnings from netvsc driver.
> Targeting these at net-next since no actual related failures
> have been observed in non-debug kernels.

Series applied, thanks Stephen.


Re: [PATCH net-next v2 1/1] geneve: add rtnl changelink support

2017-07-19 Thread Pravin Shelar
On Tue, Jul 18, 2017 at 4:33 PM, Girish Moodalbail
 wrote:
> This patch adds changelink rtnl operation support for geneve devices
> and the code changes involve:
>
>   - add geneve_quiesce() which quiesces the geneve device data path
> for both TX and RX. This lets us perform the changelink operation
> atomically w.r.t data path. Also add geneve_unquiesce() to
> reverse the operation of geneve_quiesce().
>
>   - refactor geneve_newlink into geneve_nl2info to be used by both
> geneve_newlink and geneve_changelink
>
>   - geneve_nl2info takes a changelink boolean argument to isolate
> changelink checks.
>
>   - Allow changing only a few attributes (ttl, tos, and remote tunnel
> endpoint IP address (within the same address family)):
> - return -EOPNOTSUPP for attributes that cannot be changed for
>   now. Incremental patches can make the non-supported one
>   available in the future if needed.
>

> Signed-off-by: Girish Moodalbail 
> ---
> v0 -> v1:
>- added geneve_quiesce() and geneve_unquiesce() functions to
>  perform the changelink operation atomically w.r.t data path
> ---

>  drivers/net/geneve.c | 192 
> +--
>  1 file changed, 157 insertions(+), 35 deletions(-)
>
> diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
> index de8156c..829f541 100644
> --- a/drivers/net/geneve.c
> +++ b/drivers/net/geneve.c
...
...

> +/* Quiesces the geneve device data path for both TX and RX. */
> +static inline void geneve_quiesce(struct geneve_dev *geneve,
> + struct geneve_sock **gs4,
> + struct geneve_sock **gs6)
> +{
> +   *gs4 = rtnl_dereference(geneve->sock4);
> +   rcu_assign_pointer(geneve->sock4, NULL);
> +
> +#if IS_ENABLED(CONFIG_IPV6)
> +   *gs6 = rtnl_dereference(geneve->sock6);
> +   rcu_assign_pointer(geneve->sock6, NULL);
> +#else
> +   *gs6 = NULL;
> +#endif
> +   synchronize_net();
> +}
> +
> +/* Resumes the geneve device data path for both TX and RX. */
> +static inline void geneve_unquiesce(struct geneve_dev *geneve,
> +   struct geneve_sock *gs4,
> +   struct geneve_sock __maybe_unused *gs6)
> +{
> +   rcu_assign_pointer(geneve->sock4, gs4);
> +#if IS_ENABLED(CONFIG_IPV6)
> +   rcu_assign_pointer(geneve->sock6, gs6);
> +#endif
> +   synchronize_net();
> +}
> +
> +static int geneve_changelink(struct net_device *dev, struct nlattr *tb[],
> +struct nlattr *data[],
> +struct netlink_ext_ack *extack)
> +{
> +   struct geneve_dev *geneve = netdev_priv(dev);
> +   struct geneve_sock *gs4, *gs6;
> +   struct ip_tunnel_info info;
> +   bool metadata;
> +   bool use_udp6_rx_checksums;
> +   int err;
> +
> +   /* If the geneve device is configured for metadata (or externally
> +* controlled, for example, OVS), then nothing can be changed.
> +*/
> +   if (geneve->collect_md)
> +   return -EOPNOTSUPP;
> +
> +   /* Start with the existing info. */
> +   memcpy(, >info, sizeof(info));
> +   metadata = geneve->collect_md;
> +   use_udp6_rx_checksums = geneve->use_udp6_rx_checksums;
> +   err = geneve_nl2info(dev, tb, data, , ,
> +_udp6_rx_checksums, true);
> +   if (err)
> +   return err;
> +
> +   if (!geneve_dst_addr_equal(>info, ))
> +   dst_cache_reset(_cache);
> +
> +   geneve_quiesce(geneve, , );
> +   geneve->info = info;
> +   geneve->collect_md = metadata;
> +   geneve->use_udp6_rx_checksums = use_udp6_rx_checksums;
> +   geneve_unquiesce(geneve, gs4, gs6);
> +
This is nice trick. But it adds check for the socket in datapath. did
you explore updating entire device state in single atomic transaction?


> +   return 0;
> +}
> +
>  static void geneve_dellink(struct net_device *dev, struct list_head *head)
>  {
> struct geneve_dev *geneve = netdev_priv(dev);
> @@ -1375,6 +1496,7 @@ static int geneve_fill_info(struct sk_buff *skb, const 
> struct net_device *dev)
> .setup  = geneve_setup,
> .validate   = geneve_validate,
> .newlink= geneve_newlink,
> +   .changelink = geneve_changelink,
> .dellink= geneve_dellink,
> .get_size   = geneve_get_size,
> .fill_info  = geneve_fill_info,
> --
> 1.8.3.1
>


Re: [PATCH net-next v2 1/1] geneve: add rtnl changelink support

2017-07-19 Thread Girish Moodalbail

On 7/19/17 4:51 PM, David Miller wrote:

From: Girish Moodalbail 
Date: Tue, 18 Jul 2017 16:33:06 -0700


+static inline bool geneve_dst_addr_equal(struct ip_tunnel_info *a,

  ...

+static inline void geneve_quiesce(struct geneve_dev *geneve,

  ...

+static inline void geneve_unquiesce(struct geneve_dev *geneve,


Please no inline functions in foo.c files, let the compiler
decide.


Sure thing. Will do.

regards,
~Girish



Thanks.





Re: commit 16ecba59 breaks 82574L under heavy load.

2017-07-19 Thread Benjamin Poirier
On 2017/07/19 10:19, Lennart Sorensen wrote:
> On Tue, Jul 18, 2017 at 04:14:35PM -0700, Benjamin Poirier wrote:
> > Thanks for the detailed analysis.
> > 
> > Refering to the original discussion around this patch series, it seemed like
> > the IMS bit for a condition had to be set for the Other interrupt to be 
> > raised
> > for that condition.
> > 
> > https://lkml.org/lkml/2015/11/4/683
> > 
> > In this case however, E1000_ICR_RXT0 is not set in IMS so Other shouldn't be
> > raised for Receiver Overrun. Apparently something is going on...
> > 
> > I can reproduce the spurious Other interrupts with a simple mdelay()
> > With the debugging patch at the end of the mail I see stuff like this
> > while blasting with udp frames:
> >   -0 [086] d.h1 15338.742675: e1000_msix_other: got Other 
> > interrupt, count 15127
> ><...>-54504 [086] d.h. 15338.742724: e1000_msix_other: got Other 
> > interrupt, count 1
> ><...>-54504 [086] d.h. 15338.742774: e1000_msix_other: got Other 
> > interrupt, count 1
> ><...>-54504 [086] d.h. 15338.742824: e1000_msix_other: got Other 
> > interrupt, count 1
> >   -0 [086] d.h1 15340.745123: e1000_msix_other: got Other 
> > interrupt, count 27584
> ><...>-54504 [086] d.h. 15340.745172: e1000_msix_other: got Other 
> > interrupt, count 1
> ><...>-54504 [086] d.h. 15340.745222: e1000_msix_other: got Other 
> > interrupt, count 1
> ><...>-54504 [086] d.h. 15340.745272: e1000_msix_other: got Other 
> > interrupt, count 1
> > 
> > > hence sets the flag that (unfortunately) means both link is down and link
> > > state should be checked.  Since this now happens 3000 times per second,
> > > the chances of it happening while the watchdog_task is checking the link
> > > state becomes pretty high, and it if does happen to coincice, then the
> > > watchdog_task will reset the adapter, which causes a real loss of link.
> > 
> > Through which path does watchdog_task reset the adapter? I didn't
> > reproduce that.
> 
> The other interrupt happens and sets get_link_status to true.  At some
> point the watchdog_task runs on some core and calls e1000e_has_link,
> which then calls check_for_link to find out the current link status.
> While e1000e_check_for_copper_link is checking the link state and
> after updating get_link_status to false to indicate link is up, another
> interrupt occurs and another core handles it and changes get_link_status
> to true again.  So by the time e1000e_has_link goes to determine the
> return value, get_link_state has changed back again so now it returns
> link down, and as a result the watchdog_task calls reset, because we
> have packets in the transmit queue (we were busy forwarding over 10
> packets per second when it happened).

Ah I see. Thanks again.

In your previous mail,
On 2017/07/18 10:21, Lennart Sorensen wrote:
[...]
> I tried checking what the bits in the ICR actually were under these
> conditions, and it would appear that the only bit set is 24 (the Other
> Causes interrupt bit).  So I don't know what the real cause is although

Are you sure about this? In my testing, while triggering the overrun
with the msleep, I read ICR when entering e1000_msix_other() and RXO is
consistently set.

I'm working on a patch that uses that fact to handle the situation and
limit the interrupt.


Re: [PATCH net-next v2 1/1] geneve: add rtnl changelink support

2017-07-19 Thread David Miller
From: Girish Moodalbail 
Date: Tue, 18 Jul 2017 16:33:06 -0700

> +static inline bool geneve_dst_addr_equal(struct ip_tunnel_info *a,
 ...
> +static inline void geneve_quiesce(struct geneve_dev *geneve,
 ...
> +static inline void geneve_unquiesce(struct geneve_dev *geneve,

Please no inline functions in foo.c files, let the compiler
decide.

Thanks.


Re: [PATCH net-next 0/3] net: make dev_close void

2017-07-19 Thread David Miller
From: Stephen Hemminger 
Date: Tue, 18 Jul 2017 15:59:24 -0700

> Noticed while working on other changes. Why is dev_close()
> returning int, it should be void.  Should also change
> ndo_close to be void, but that requires more work and someone
> with more coccinelle foo (smpl) than me.

Series applied, thanks.


[PATCH 0/4] can: Add new binding to limit bit rate used

2017-07-19 Thread Franklin S Cooper Jr
Add a new generic binding that CAN drivers can use to specify the max
arbitration and data bit rate supported by a transceiver. This is
useful since in some instances the maximum speeds may be limited by
the transceiver used. However, transceivers may not provide a means
to determine this limitation at runtime. Therefore, create a new binding
that mimics "fixed-link" that allows a user to hardcode the max speeds
that can be used.

Also add support for this new binding in the MCAN driver.

Note this is an optional subnode so even if a driver adds support for
parsing fixed-transceiver the user does not have to define it in their
device tree.

Franklin S Cooper Jr (4):
  can: dev: Add support for limiting configured bitrate
  can: fixed-transceiver: Add documentation for CAN fixed transceiver
bindings
  can: m_can: Update documentation to mention new fixed transceiver
binding
  can: m_can: Add call to of_transceiver_is_fixed

 .../bindings/net/can/fixed-transceiver.txt | 31 ++
 .../devicetree/bindings/net/can/m_can.txt  | 10 +
 drivers/net/can/dev.c  | 48 ++
 drivers/net/can/m_can/m_can.c  |  2 +
 include/linux/can/dev.h|  5 +++
 5 files changed, 96 insertions(+)
 create mode 100644 
Documentation/devicetree/bindings/net/can/fixed-transceiver.txt

-- 
2.10.0



[PATCH 1/4] can: dev: Add support for limiting configured bitrate

2017-07-19 Thread Franklin S Cooper Jr
Various CAN or CAN-FD IP may be able to run at a faster rate than
what the transceiver the CAN node is connected to. This can lead to
unexpected errors. However, CAN transceivers typically have fixed
limitations and provide no means to discover these limitations at
runtime. Therefore, add support for a fixed-transceiver node that
can be reused by other CAN peripheral drivers to determine for both
CAN and CAN-FD what the max bitrate that can be used. If the user
tries to configure CAN to pass these maximum bitrates it will throw
an error.

Signed-off-by: Franklin S Cooper Jr 
---
 drivers/net/can/dev.c   | 48 
 include/linux/can/dev.h |  5 +
 2 files changed, 53 insertions(+)

diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index 365a8cc..fbab87d 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -27,6 +27,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #define MOD_DESC "CAN device driver interface"
@@ -806,6 +807,21 @@ int open_candev(struct net_device *dev)
return -EINVAL;
}
 
+   if (priv->max_trans_arbitration_speed > 0 &&
+   priv->bittiming.bitrate > priv->max_trans_arbitration_speed) {
+   netdev_err(dev, "arbitration bitrate surpasses transceiver 
capabilities of %d bps\n",
+  priv->max_trans_arbitration_speed);
+   return -EINVAL;
+   }
+
+   if (priv->max_trans_data_speed  >= 0 &&
+   (priv->ctrlmode & CAN_CTRLMODE_FD) &&
+   (priv->data_bittiming.bitrate > priv->max_trans_data_speed)) {
+   netdev_err(dev, "canfd data bitrate surpasses transceiver 
capabilities of %d bps\n",
+  priv->max_trans_data_speed);
+   return -EINVAL;
+   }
+
/* Switch carrier on if device was stopped while in bus-off state */
if (!netif_carrier_ok(dev))
netif_carrier_on(dev);
@@ -814,6 +830,38 @@ int open_candev(struct net_device *dev)
 }
 EXPORT_SYMBOL_GPL(open_candev);
 
+#ifdef CONFIG_OF
+void of_transceiver_is_fixed(struct net_device *dev)
+{
+   struct device_node *dn;
+   struct can_priv *priv = netdev_priv(dev);
+   u32 max_frequency;
+   struct device_node *np;
+
+   np = dev->dev.parent->of_node;
+
+   /* New binding */
+   dn = of_get_child_by_name(np, "fixed-transceiver");
+   if (!dn)
+   return;
+
+   of_property_read_u32(dn, "max-arbitration-speed", _frequency);
+
+   if (max_frequency > 0)
+   priv->max_trans_arbitration_speed = max_frequency;
+   else
+   priv->max_trans_arbitration_speed = -1;
+
+   of_property_read_u32(dn, "max-data-speed", _frequency);
+
+   if (max_frequency >= 0)
+   priv->max_trans_data_speed = max_frequency;
+   else
+   priv->max_trans_data_speed = -1;
+}
+EXPORT_SYMBOL(of_transceiver_is_fixed);
+#endif
+
 /*
  * Common close function for cleanup before the device gets closed.
  *
diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index 141b05a..aec72b5 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -69,6 +69,9 @@ struct can_priv {
unsigned int echo_skb_max;
struct sk_buff **echo_skb;
 
+   unsigned int max_trans_arbitration_speed;
+   unsigned int max_trans_data_speed;
+
 #ifdef CONFIG_CAN_LEDS
struct led_trigger *tx_led_trig;
char tx_led_trig_name[CAN_LED_NAME_SZ];
@@ -165,6 +168,8 @@ void can_put_echo_skb(struct sk_buff *skb, struct 
net_device *dev,
 unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx);
 void can_free_echo_skb(struct net_device *dev, unsigned int idx);
 
+void of_transceiver_is_fixed(struct net_device *dev);
+
 struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf);
 struct sk_buff *alloc_canfd_skb(struct net_device *dev,
struct canfd_frame **cfd);
-- 
2.10.0



[PATCH 4/4] can: m_can: Add call to of_transceiver_is_fixed

2017-07-19 Thread Franklin S Cooper Jr
Add call to new generic functions that provides support via a binding
to limit the arbitration rate and/or data rate imposed by the physical
transceiver connected to the MCAN peripheral.

Signed-off-by: Franklin S Cooper Jr 
---
 drivers/net/can/m_can/m_can.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c
index f4947a7..db1882c 100644
--- a/drivers/net/can/m_can/m_can.c
+++ b/drivers/net/can/m_can/m_can.c
@@ -1649,6 +1649,8 @@ static int m_can_plat_probe(struct platform_device *pdev)
 
devm_can_led_init(dev);
 
+   of_transceiver_is_fixed(dev);
+
dev_info(>dev, "%s device registered (irq=%d, version=%d)\n",
 KBUILD_MODNAME, dev->irq, priv->version);
 
-- 
2.10.0



[PATCH 3/4] can: m_can: Update documentation to mention new fixed transceiver binding

2017-07-19 Thread Franklin S Cooper Jr
Add information regarding fixed transceiver binding. This is especially
important for MCAN since the IP allows CAN FD mode to run significantly
faster than what most transceivers are capable of.

Signed-off-by: Franklin S Cooper Jr 
---
 Documentation/devicetree/bindings/net/can/m_can.txt | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/Documentation/devicetree/bindings/net/can/m_can.txt 
b/Documentation/devicetree/bindings/net/can/m_can.txt
index 9e33177..4440e4b 100644
--- a/Documentation/devicetree/bindings/net/can/m_can.txt
+++ b/Documentation/devicetree/bindings/net/can/m_can.txt
@@ -43,6 +43,11 @@ Required properties:
  Please refer to 2.4.1 Message RAM Configuration in
  Bosch M_CAN user manual for details.
 
+Optional properties:
+- fixed-transceiver: Fixed-transceiver subnode describing maximum speed
+ that can be used for CAN and/or CAN-FD modes.  See
+ 
Documentation/devicetree/bindings/net/can/fixed-transceiver.txt
+ for details.
 Example:
 SoC dtsi:
 m_can1: can@020e8000 {
@@ -64,4 +69,9 @@ Board dts:
pinctrl-names = "default";
pinctrl-0 = <_m_can1>;
status = "enabled";
+
+   fixed-transceiver@0 {
+   max-arbitration-speed = <100>;
+   max-data-speed = <500>;
+   };
 };
-- 
2.10.0



[PATCH 2/4] can: fixed-transceiver: Add documentation for CAN fixed transceiver bindings

2017-07-19 Thread Franklin S Cooper Jr
Add documentation to describe usage of the new fixed transceiver binding.
This new binding is applicable for any CAN device therefore it exist as
its own document.

Signed-off-by: Franklin S Cooper Jr 
---
 .../bindings/net/can/fixed-transceiver.txt | 31 ++
 1 file changed, 31 insertions(+)
 create mode 100644 
Documentation/devicetree/bindings/net/can/fixed-transceiver.txt

diff --git a/Documentation/devicetree/bindings/net/can/fixed-transceiver.txt 
b/Documentation/devicetree/bindings/net/can/fixed-transceiver.txt
new file mode 100644
index 000..7c093c3
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/can/fixed-transceiver.txt
@@ -0,0 +1,31 @@
+Fixed transceiver Device Tree binding
+--
+
+CAN transceiver typically limits the max speed in standard CAN and CAN FD
+modes. Typically these limitations are static and the transceivers themselves
+provide no way to detect this limitation at runtime. For this situation,
+the "fixed-transceiver" node can be used.
+
+Properties:
+
+Optional:
+ max-arbitration-speed: a positive value non 0 value that determines the max
+speed CAN can run in non CAN-FD mode or during the
+arbitration phase in CAN-FD mode.
+
+ max-data-speed:a positive value that determines the max data rate
+that can be used in CAN-FD mode. A value of 0
+implies CAN-FD is not supported by the transceiver.
+
+Examples:
+
+Based on Texas Instrument's TCAN1042HGV CAN Transceiver
+
+m_can0 {
+   
+   fixed-transceiver@0 {
+   max-arbitration-speed = <100>;
+   max-data-speed = <500>;
+   };
+   ...
+};
-- 
2.10.0



Re: [PATCH] liquidio: lio_main: remove unnecessary static in setup_io_queues()

2017-07-19 Thread David Miller
From: "Gustavo A. R. Silva" 
Date: Tue, 18 Jul 2017 15:53:48 -0500

> Remove unnecessary static on local variables cpu_id_modulus and cpu_id.
> Such variables are initialized before being used, on every execution
> path throughout the function. The static has no benefit and, removing
> it reduces the object file size.
> 
> This issue was detected using Coccinelle and the following semantic patch:
...
> Signed-off-by: Gustavo A. R. Silva 

Applied.


Re: [PATCH] wireless: airo: remove unnecessary static in writerids()

2017-07-19 Thread David Miller
From: "Gustavo A. R. Silva" 
Date: Tue, 18 Jul 2017 15:37:11 -0500

> Remove unnecessary static on local function pointer _writer_.
> Such pointer is initialized before being used, on every
> execution path throughout the function. The static has no
> benefit and, removing it reduces the object file size.
> 
> This issue was detected using Coccinelle and the following semantic patch:
 ...
> Signed-off-by: Gustavo A. R. Silva 

Applied.


Re: [PATCH] net: tulip: remove useless code in tulip_init_one()

2017-07-19 Thread David Miller
From: "Gustavo A. R. Silva" 
Date: Tue, 18 Jul 2017 15:43:33 -0500

> Remove useless local variable multiport_cnt and the code related.
> 
> Signed-off-by: Gustavo A. R. Silva 

Applied.


Re: [PATCH] rtlwifi: remove useless code

2017-07-19 Thread David Miller
From: "Gustavo A. R. Silva" 
Date: Tue, 18 Jul 2017 15:41:06 -0500

> Remove useless local variables last_read_point and last_txw_point and
> the code related.
> 
> Signed-off-by: Gustavo A. R. Silva 

Applied.


Re: [PATCH] qlcnic: remove unnecessary static in qlcnic_dump_fw()

2017-07-19 Thread David Miller
From: "Gustavo A. R. Silva" 
Date: Tue, 18 Jul 2017 15:45:29 -0500

> Remove unnecessary static on local variable fw_dump_ops.
> Such variable is initialized before being used, on every
> execution path throughout the function. The static has no
> benefit and, removing it reduces the object file size.
> 
> This issue was detected using Coccinelle and the following semantic patch:
 ...
> Signed-off-by: Gustavo A. R. Silva 

Applied.


Re: [PATCH] liquidio: lio_vf_main: remove unnecessary static in setup_io_queues()

2017-07-19 Thread David Miller
From: "Gustavo A. R. Silva" 
Date: Tue, 18 Jul 2017 15:50:15 -0500

> Remove unnecessary static on local variables cpu_id_modulus and cpu_id.
> Such variables are initialized before being used, on every execution
> path throughout the function. The static has no benefit and, removing
> it reduces the object file size.
> 
> This issue was detected using Coccinelle and the following semantic patch:
 ...
> Signed-off-by: Gustavo A. R. Silva 

Applied.


Re: [PATCH] net: ethernet: mediatek: remove useless code in mtk_poll_tx()

2017-07-19 Thread David Miller
From: "Gustavo A. R. Silva" 
Date: Tue, 18 Jul 2017 15:48:06 -0500

> Remove useless local variable _condition_ and the code related.
> 
> Signed-off-by: Gustavo A. R. Silva 

Applied.


Re: [PATCH net-next] net: dsa: unexport dsa_is_port_initialized

2017-07-19 Thread David Miller
From: Vivien Didelot 
Date: Tue, 18 Jul 2017 16:23:56 -0400

> The dsa_is_port_initialized helper is only used by dsa_switch_resume and
> dsa_switch_suspend, if CONFIG_PM_SLEEP is enabled. Make it static to
> dsa.c.
> 
> Signed-off-by: Vivien Didelot 

Applied.


Re: [PATCH net-next] net/packet: remove unused PGV_FROM_VMALLOC definition.

2017-07-19 Thread David Miller
From: Rami Rosen 
Date: Tue, 18 Jul 2017 22:23:30 +0300

> This patch removes the definition of PGV_FROM_VMALLOC from af_packet.c.
> The PGV_FROM_VMALLOC definition was already removed by 
> commit 441c793a5650 ("net: cleanup unused macros in net directory"),
> and its usage was removed even before by commit c56b4d90123b 
> ("af_packet: remove pgv.flags"); but it was added back by mistake later on,
> in commit f6fb8f100b80 ("af-packet: TPACKET_V3 flexible buffer 
> implementation").
> 
> Signed-off-by: Rami Rosen 

Applied, thanks Rami.


Re: [PATCH v1 1/1] dt-binding: ptp: Add SoC compatibility strings for dte ptp clock

2017-07-19 Thread David Miller
From: Arun Parameswaran 
Date: Tue, 18 Jul 2017 10:14:16 -0700

> Hi David,
> 
> On 17-07-10 06:44 AM, Rob Herring wrote:
>> On Thu, Jul 06, 2017 at 10:37:57AM -0700, Arun Parameswaran wrote:
>>> Add SoC specific compatibility strings to the Broadcom DTE
>>> based PTP clock binding document.
>>>
>>> Fixed the document heading and node name.
>>>
>>> Fixes: 80d6076140b2 ("dt-binding: ptp: add bindings document for dte based 
>>> ptp clock")
>>> Signed-off-by: Arun Parameswaran 
>>> ---
>>>  Documentation/devicetree/bindings/ptp/brcm,ptp-dte.txt | 15 +++
>>>  1 file changed, 11 insertions(+), 4 deletions(-)
>> 
>> Acked-by: Rob Herring 
>>
> Will you be picking up this change ?

Sure, done.


Re: [Resend, PATCH v1] ISDN: eicon: switch to use native bitmaps

2017-07-19 Thread David Miller
From: Andy Shevchenko 
Date: Tue, 18 Jul 2017 18:49:26 +0300

> Two arrays are clearly bit maps, so, make that explicit by converting to
> bitmap API and remove custom helpers.
> 
> Note sig_ind() uses out of boundary bit to (looks like) protect against
> potential bitmap_empty() checks for the same bitmap.
> 
> This patch removes that since:
> 1) that didn't guarantee atomicity anyway;
> 2) the first operation inside the for-loop is set bit in the bitmap
>(which effectively makes it non-empty);
> 3) group_optimization() doesn't utilize possible emptiness of the bitmap
>in question.
> 
> Thus, if there is a protection needed it should be implemented properly.
> 
> Signed-off-by: Andy Shevchenko 

Applied, thanks.


Re: [PATCH net-next] sfc: Add ethtool -m support for QSFP modules

2017-07-19 Thread David Miller
From: Martin Habets 
Date: Tue, 18 Jul 2017 16:43:19 +0100

> This also adds support for non-QSFP modules attached to QSFP.
> 
> Signed-off-by: Martin Habets 

Applied, thanks!


Re: [PATCH net-next] tcp: adjust tail loss probe timeout

2017-07-19 Thread David Miller
From: Yuchung Cheng 
Date: Wed, 19 Jul 2017 15:41:26 -0700

> This patch adjusts the timeout formula to schedule the TCP loss probe
> (TLP). The previous formula uses 2*SRTT or 1.5*RTT + DelayACKMax if
> only one packet is in flight. It keeps a lower bound of 10 msec which
> is too large for short RTT connections (e.g. within a data-center).
> The new formula = 2*RTT + (inflight == 1 ? 200ms : 2ticks) which
> performs better for short and fast connections.
> 
> Signed-off-by: Yuchung Cheng 
> Signed-off-by: Neal Cardwell 

Applied, thanks!


Re: [PATCH V2 net-next 12/21] net-next/hinic: Add qp resources

2017-07-19 Thread David Miller
From: Aviad Krawczyk 
Date: Wed, 19 Jul 2017 17:19:10 +0800

> diff --git a/drivers/net/ethernet/huawei/hinic/Makefile 
> b/drivers/net/ethernet/huawei/hinic/Makefile
> index 519382b..24728f0 100644
> --- a/drivers/net/ethernet/huawei/hinic/Makefile
> +++ b/drivers/net/ethernet/huawei/hinic/Makefile
> @@ -1,5 +1,5 @@
>  obj-$(CONFIG_HINIC) += hinic.o
>  
>  hinic-y := hinic_main.o hinic_tx.o hinic_rx.o hinic_port.o hinic_hw_dev.o \
> -hinic_hw_io.o hinic_hw_wq.o hinic_hw_mgmt.o hinic_hw_api_cmd.o \
> -hinic_hw_eqs.o hinic_hw_if.o
> \ No newline at end of file
> +hinic_hw_io.o hinic_hw_qp.o hinic_hw_wq.o hinic_hw_mgmt.o \
> +hinic_hw_api_cmd.o hinic_hw_eqs.o hinic_hw_if.o
> \ No newline at end of file

Please add the missing newline to the end of this Makefile so that these patches
don't keep emitting this message.


[PATCH 1/3] can: m_can: Make hclk optional

2017-07-19 Thread Franklin S Cooper Jr
Hclk is the MCAN's interface clock. However, for OMAP based devices such as
DRA7 SoC family the interface clock is handled by hwmod. Therefore, this
interface clock is managed by hwmod driver via pm_runtime_get and
pm_runtime_put calls. Therefore, this interface clock isn't defined in DT
and thus the driver shouldn't fail if this clock isn't found.

Signed-off-by: Franklin S Cooper Jr 
---
 drivers/net/can/m_can/m_can.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c
index f4947a7..7fe9145 100644
--- a/drivers/net/can/m_can/m_can.c
+++ b/drivers/net/can/m_can/m_can.c
@@ -1568,8 +1568,13 @@ static int m_can_plat_probe(struct platform_device *pdev)
hclk = devm_clk_get(>dev, "hclk");
cclk = devm_clk_get(>dev, "cclk");
 
-   if (IS_ERR(hclk) || IS_ERR(cclk)) {
-   dev_err(>dev, "no clock found\n");
+   if (IS_ERR(hclk)) {
+   dev_warn(>dev, "can't find hclk\n");
+   hclk = 0;
+   }
+
+   if (IS_ERR(cclk)) {
+   dev_err(>dev, "cclk could not be found\n");
ret = -ENODEV;
goto failed_ret;
}
-- 
2.10.0



[PATCH 3/3] can: m_can: Add PM Runtime

2017-07-19 Thread Franklin S Cooper Jr
Add support for PM Runtime which is the new way to handle managing clocks.
However, to avoid breaking SoCs not using PM_RUNTIME leave the old clk
management approach in place.

PM_RUNTIME is required by OMAP based devices to handle clock management.
Therefore, this allows future Texas Instruments SoCs that have the MCAN IP
to work with this driver.

Signed-off-by: Franklin S Cooper Jr 
---
 drivers/net/can/m_can/m_can.c | 13 +
 1 file changed, 13 insertions(+)

diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c
index 7fe9145..eb45cd5 100644
--- a/drivers/net/can/m_can/m_can.c
+++ b/drivers/net/can/m_can/m_can.c
@@ -23,6 +23,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -633,11 +634,15 @@ static int m_can_clk_start(struct m_can_priv *priv)
if (err)
clk_disable_unprepare(priv->hclk);
 
+   pm_runtime_get_sync(priv->device);
+
return err;
 }
 
 static void m_can_clk_stop(struct m_can_priv *priv)
 {
+   pm_runtime_put_sync(priv->device);
+
clk_disable_unprepare(priv->cclk);
clk_disable_unprepare(priv->hclk);
 }
@@ -1582,6 +1587,8 @@ static int m_can_plat_probe(struct platform_device *pdev)
/* Enable clocks. Necessary to read Core Release in order to determine
 * M_CAN version
 */
+   pm_runtime_enable(>dev);
+
ret = clk_prepare_enable(hclk);
if (ret)
goto disable_hclk_ret;
@@ -1626,6 +1633,8 @@ static int m_can_plat_probe(struct platform_device *pdev)
 */
tx_fifo_size = mram_config_vals[7];
 
+   pm_runtime_get_sync(>dev);
+
/* allocate the m_can device */
dev = alloc_m_can_dev(pdev, addr, tx_fifo_size);
if (!dev) {
@@ -1670,6 +1679,7 @@ static int m_can_plat_probe(struct platform_device *pdev)
 disable_hclk_ret:
clk_disable_unprepare(hclk);
 failed_ret:
+   pm_runtime_put_sync(>dev);
return ret;
 }
 
@@ -1726,6 +1736,9 @@ static int m_can_plat_remove(struct platform_device *pdev)
struct net_device *dev = platform_get_drvdata(pdev);
 
unregister_m_can_dev(dev);
+
+   pm_runtime_disable(>dev);
+
platform_set_drvdata(pdev, NULL);
 
free_m_can_dev(dev);
-- 
2.10.0



[PATCH 2/3] can: m_can: Update documentation to indicate that hclk may be optional

2017-07-19 Thread Franklin S Cooper Jr
Update the documentation to reflect that hclk is now an optional clock.

Signed-off-by: Franklin S Cooper Jr 
---
 Documentation/devicetree/bindings/net/can/m_can.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/net/can/m_can.txt 
b/Documentation/devicetree/bindings/net/can/m_can.txt
index 9e33177..2a0fe5b 100644
--- a/Documentation/devicetree/bindings/net/can/m_can.txt
+++ b/Documentation/devicetree/bindings/net/can/m_can.txt
@@ -12,7 +12,8 @@ Required properties:
 - interrupt-names  : Should contain "int0" and "int1"
 - clocks   : Clocks used by controller, should be host clock
  and CAN clock.
-- clock-names  : Should contain "hclk" and "cclk"
+- clock-names  : Should contain "hclk" and "cclk". For some socs hclk
+ may be optional.
 - pinctrl-  : Pinctrl states as described in 
bindings/pinctrl/pinctrl-bindings.txt
 - pinctrl-names: Names corresponding to the numbered pinctrl states
 - bosch,mram-cfg   : Message RAM configuration data.
-- 
2.10.0



[PATCH 0/3] can: m_can: Add PM Runtime Support

2017-07-19 Thread Franklin S Cooper Jr
Add PM runtime support to the MCAN driver. To support devices that don't use
PM runtime leave the original clk calls in the driver. Perhaps in the future
when it makes sense we can remove these non pm runtime clk calls.

Franklin S Cooper Jr (3):
  can: m_can: Make hclk optional
  can: m_can: Update documentation to indicate that hclk may be optional
  can: m_can: Add PM Runtime

 .../devicetree/bindings/net/can/m_can.txt  |  3 ++-
 drivers/net/can/m_can/m_can.c  | 22 --
 2 files changed, 22 insertions(+), 3 deletions(-)

-- 
2.10.0



Re: [PATCH 1/3] ipv4: initialize fib_trie prior to register_netdev_notifier call.

2017-07-19 Thread Eric W. Biederman
Mahesh Bandewar  writes:

> From: Mahesh Bandewar 
>
> Net stack initialization currently initializes fib-trie after the
> first call to netdevice_notifier() call. In fact fib_trie initialization
> needs to happen before first rtnl_register(). It does not cause any problem
> since there are no devices UP at this moment, but trying to bring 'lo'
> UP at initialization would make this assumption wrong and exposes the issue.
>
> Fixes following crash
>
>  Call Trace:
>   ? alternate_node_alloc+0x76/0xa0
>   fib_table_insert+0x1b7/0x4b0
>   fib_magic.isra.17+0xea/0x120
>   fib_add_ifaddr+0x7b/0x190
>   fib_netdev_event+0xc0/0x130
>   register_netdevice_notifier+0x1c1/0x1d0
>   ip_fib_init+0x72/0x85
>   ip_rt_init+0x187/0x1e9
>   ip_init+0xe/0x1a
>   inet_init+0x171/0x26c
>   ? ipv4_offload_init+0x66/0x66
>   do_one_initcall+0x43/0x160
>   kernel_init_freeable+0x191/0x219
>   ? rest_init+0x80/0x80
>   kernel_init+0xe/0x150
>   ret_from_fork+0x22/0x30
>  Code: f6 46 23 04 74 86 4c 89 f7 e8 ae 45 01 00 49 89 c7 4d 85 ff 0f 85 7b 
> ff ff ff 31 db eb 08 4c 89 ff e8 16 47 01 00 48 8b 44 24 38 <45> 8b 6e 14 4d 
> 63 76 74 48 89 04 24 0f 1f 44 00 00 48 83 c4 08
>  RIP: kmem_cache_alloc+0xcf/0x1c0 RSP: 9b1500017c28
>  CR2: 0014
>
> Fixes: 7b1a74fdbb9e ("[NETNS]: Refactor fib initialization so it can handle 
> multiple namespaces.")
> Fixes: 7f9b80529b8a ("[IPV4]: fib hash|trie initialization")

Acked-by: "Eric W. Biederman" 

>
> Signed-off-by: Mahesh Bandewar 
> ---
>  net/ipv4/fib_frontend.c | 9 +
>  1 file changed, 5 insertions(+), 4 deletions(-)
>
> diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
> index 4e678fa892dd..044d2a159a3c 100644
> --- a/net/ipv4/fib_frontend.c
> +++ b/net/ipv4/fib_frontend.c
> @@ -1334,13 +1334,14 @@ static struct pernet_operations fib_net_ops = {
>  
>  void __init ip_fib_init(void)
>  {
> - rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL);
> - rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL);
> - rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL);
> + fib_trie_init();
>  
>   register_pernet_subsys(_net_ops);
> +
>   register_netdevice_notifier(_netdev_notifier);
>   register_inetaddr_notifier(_inetaddr_notifier);
>  
> - fib_trie_init();
> + rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL);
> + rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL);
> + rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL);
>  }


[PATCH net-next] tcp: adjust tail loss probe timeout

2017-07-19 Thread Yuchung Cheng
This patch adjusts the timeout formula to schedule the TCP loss probe
(TLP). The previous formula uses 2*SRTT or 1.5*RTT + DelayACKMax if
only one packet is in flight. It keeps a lower bound of 10 msec which
is too large for short RTT connections (e.g. within a data-center).
The new formula = 2*RTT + (inflight == 1 ? 200ms : 2ticks) which
performs better for short and fast connections.

Signed-off-by: Yuchung Cheng 
Signed-off-by: Neal Cardwell 
---
 include/net/tcp.h   |  3 +--
 net/ipv4/tcp_output.c   | 17 ++---
 net/ipv4/tcp_recovery.c |  2 +-
 3 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 70483296157f..4f056ea79df2 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -139,6 +139,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 #endif
 #define TCP_RTO_MAX((unsigned)(120*HZ))
 #define TCP_RTO_MIN((unsigned)(HZ/5))
+#define TCP_TIMEOUT_MIN(2U) /* Min timeout for TCP timers in jiffies */
 #define TCP_TIMEOUT_INIT ((unsigned)(1*HZ))/* RFC6298 2.1 initial RTO 
value*/
 #define TCP_TIMEOUT_FALLBACK ((unsigned)(3*HZ))/* RFC 1122 initial RTO 
value, now
 * used as a fallback RTO for 
the
@@ -150,8 +151,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 #define TCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ/2U)) /* Maximal interval 
between probes
 * for local resources.
 */
-#define TCP_REO_TIMEOUT_MIN(2000) /* Min RACK reordering timeout in usec */
-
 #define TCP_KEEPALIVE_TIME (120*60*HZ) /* two hours */
 #define TCP_KEEPALIVE_PROBES   9   /* Max of 9 keepalive probes
*/
 #define TCP_KEEPALIVE_INTVL(75*HZ)
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 4e985dea1dd2..886d874775df 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2377,7 +2377,6 @@ bool tcp_schedule_loss_probe(struct sock *sk)
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
u32 timeout, tlp_time_stamp, rto_time_stamp;
-   u32 rtt = usecs_to_jiffies(tp->srtt_us >> 3);
 
/* No consecutive loss probes. */
if (WARN_ON(icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)) {
@@ -2406,15 +2405,19 @@ bool tcp_schedule_loss_probe(struct sock *sk)
 tcp_send_head(sk))
return false;
 
-   /* Probe timeout is at least 1.5*rtt + TCP_DELACK_MAX to account
+   /* Probe timeout is 2*rtt. Add minimum RTO to account
 * for delayed ack when there's one outstanding packet. If no RTT
 * sample is available then probe after TCP_TIMEOUT_INIT.
 */
-   timeout = rtt << 1 ? : TCP_TIMEOUT_INIT;
-   if (tp->packets_out == 1)
-   timeout = max_t(u32, timeout,
-   (rtt + (rtt >> 1) + TCP_DELACK_MAX));
-   timeout = max_t(u32, timeout, msecs_to_jiffies(10));
+   if (tp->srtt_us) {
+   timeout = usecs_to_jiffies(tp->srtt_us >> 2);
+   if (tp->packets_out == 1)
+   timeout += TCP_RTO_MIN;
+   else
+   timeout += TCP_TIMEOUT_MIN;
+   } else {
+   timeout = TCP_TIMEOUT_INIT;
+   }
 
/* If RTO is shorter, just schedule TLP in its place. */
tlp_time_stamp = tcp_jiffies32 + timeout;
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index fe9a493d0208..449cd914d58e 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -113,7 +113,7 @@ void tcp_rack_mark_lost(struct sock *sk)
tp->rack.advanced = 0;
tcp_rack_detect_loss(sk, );
if (timeout) {
-   timeout = usecs_to_jiffies(timeout + TCP_REO_TIMEOUT_MIN);
+   timeout = usecs_to_jiffies(timeout) + TCP_TIMEOUT_MIN;
inet_csk_reset_xmit_timer(sk, ICSK_TIME_REO_TIMEOUT,
  timeout, inet_csk(sk)->icsk_rto);
}
-- 
2.14.0.rc0.284.gd933b75aa4-goog



[PATCH 1/3] ipv4: initialize fib_trie prior to register_netdev_notifier call.

2017-07-19 Thread Mahesh Bandewar
From: Mahesh Bandewar 

Net stack initialization currently initializes fib-trie after the
first call to netdevice_notifier() call. In fact fib_trie initialization
needs to happen before first rtnl_register(). It does not cause any problem
since there are no devices UP at this moment, but trying to bring 'lo'
UP at initialization would make this assumption wrong and exposes the issue.

Fixes following crash

 Call Trace:
  ? alternate_node_alloc+0x76/0xa0
  fib_table_insert+0x1b7/0x4b0
  fib_magic.isra.17+0xea/0x120
  fib_add_ifaddr+0x7b/0x190
  fib_netdev_event+0xc0/0x130
  register_netdevice_notifier+0x1c1/0x1d0
  ip_fib_init+0x72/0x85
  ip_rt_init+0x187/0x1e9
  ip_init+0xe/0x1a
  inet_init+0x171/0x26c
  ? ipv4_offload_init+0x66/0x66
  do_one_initcall+0x43/0x160
  kernel_init_freeable+0x191/0x219
  ? rest_init+0x80/0x80
  kernel_init+0xe/0x150
  ret_from_fork+0x22/0x30
 Code: f6 46 23 04 74 86 4c 89 f7 e8 ae 45 01 00 49 89 c7 4d 85 ff 0f 85 7b ff 
ff ff 31 db eb 08 4c 89 ff e8 16 47 01 00 48 8b 44 24 38 <45> 8b 6e 14 4d 63 76 
74 48 89 04 24 0f 1f 44 00 00 48 83 c4 08
 RIP: kmem_cache_alloc+0xcf/0x1c0 RSP: 9b1500017c28
 CR2: 0014

Fixes: 7b1a74fdbb9e ("[NETNS]: Refactor fib initialization so it can handle 
multiple namespaces.")
Fixes: 7f9b80529b8a ("[IPV4]: fib hash|trie initialization")

Signed-off-by: Mahesh Bandewar 
---
 net/ipv4/fib_frontend.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 4e678fa892dd..044d2a159a3c 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1334,13 +1334,14 @@ static struct pernet_operations fib_net_ops = {
 
 void __init ip_fib_init(void)
 {
-   rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL);
-   rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL);
-   rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL);
+   fib_trie_init();
 
register_pernet_subsys(_net_ops);
+
register_netdevice_notifier(_netdev_notifier);
register_inetaddr_notifier(_inetaddr_notifier);
 
-   fib_trie_init();
+   rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL);
+   rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL);
+   rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL);
 }
-- 
2.14.0.rc0.284.gd933b75aa4-goog



Re: [PATCH net-next] mdio_bus: Remove unneeded gpiod NULL check

2017-07-19 Thread Fabio Estevam
Hi Andrew,

On Tue, Jul 18, 2017 at 10:32 AM, Andrew Lunn  wrote:

> http://elixir.free-electrons.com/linux/latest/source/include/linux/gpio/consumer.h#L345
> static inline void gpiod_set_value_cansleep(struct gpio_desc *desc, int value)
> {
> /* GPIO can never have been requested */
> WARN_ON(1);
> }
>
> But i would say this is a gpio problem. If GPIO enabled does not care,
> GPIO disabled should also not care.

Agreed.

Sergei, sorry for taking so long to understand your point.

> Adding Linus Walleij.

Just sent a RFC patch to linux-gpio.

Thanks


[PATCH v2] netns: avoid directory traversal (was: ip netns: Make sure netns name is sane)

2017-07-19 Thread Matteo Croce
v2: reword commit message

ip netns keeps track of created namespaces with bind mounts named
/var/run/netns/. No input sanitization is done, allowing creation and
deletion of files relatives to /var/run/netns or, if the path is non existent or
invalid, allows to create "untracked" namespaces (invisible to the tool).

This commit denies creation or deletion of namespaces with names contaning
"/" or matching exactly "." or "..".

Signed-off-by: Matteo Croce 
---
 ip/ipnetns.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/ip/ipnetns.c b/ip/ipnetns.c
index 0b0378ab..42549944 100644
--- a/ip/ipnetns.c
+++ b/ip/ipnetns.c
@@ -766,6 +766,11 @@ static int netns_monitor(int argc, char **argv)
return 0;
 }
 
+static int invalid_name(const char *name)
+{
+   return strchr(name, '/') || !strcmp(name, ".") || !strcmp(name, "..");
+}
+
 int do_netns(int argc, char **argv)
 {
netns_nsid_socket_init();
@@ -775,6 +780,11 @@ int do_netns(int argc, char **argv)
return netns_list(0, NULL);
}
 
+   if (argc > 1 && invalid_name(argv[1])) {
+   fprintf(stderr, "Invalid netns name \"%s\"\n", argv[1]);
+   exit(-1);
+   }
+
if ((matches(*argv, "list") == 0) || (matches(*argv, "show") == 0) ||
(matches(*argv, "lst") == 0)) {
netns_map_init();
-- 
2.13.3



Re: [PATCH V2 net-next 01/21] net-next/hinic: Initialize hw interface

2017-07-19 Thread Francois Romieu
Aviad Krawczyk  :
[...]
> diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c 
> b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
> new file mode 100644
> index 000..fbc9de4
> --- /dev/null
> +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
[...]
> +/**
> + * hinic_init_hwdev - Initialize the NIC HW
> + * @hwdev: the NIC HW device that is returned from the initialization
> + * @pdev: the NIC pci device
> + *
> + * Return 0 - Success, negative - Failure
> + *
> + * Initialize the NIC HW device and return a pointer to it in the first arg
> + **/

Return a pointer and use ERR_PTR / IS_ERR ?

> +int hinic_init_hwdev(struct hinic_hwdev **hwdev, struct pci_dev *pdev)
> +{
> + struct hinic_pfhwdev *pfhwdev;
> + struct hinic_hwif *hwif;
> + int err;
> +
> + hwif = devm_kzalloc(>dev, sizeof(*hwif), GFP_KERNEL);
> + if (!hwif)
> + return -ENOMEM;
> +
> + err = hinic_init_hwif(hwif, pdev);
> + if (err) {
> + dev_err(>dev, "Failed to init HW interface\n");
> + return err;
> + }
> +
> + if (!HINIC_IS_PF(hwif) && !HINIC_IS_PPF(hwif)) {
> + dev_err(>dev, "Unsupported PCI Function type\n");
> + err = -EFAULT;
> + goto func_type_err;
> + }
> +
> + pfhwdev = devm_kzalloc(>dev, sizeof(*pfhwdev), GFP_KERNEL);
> + if (!pfhwdev) {
> + err = -ENOMEM;
> + goto pfhwdev_alloc_err;

Intel, Mellanox, Broadcom, Amazon and friends use "err_xyz" labels.

Please consider using the same style.

[...]
> +void hinic_free_hwdev(struct hinic_hwdev *hwdev)
> +{
> + struct hinic_hwif *hwif = hwdev->hwif;
> + struct pci_dev *pdev = hwif->pdev;
> + struct hinic_pfhwdev *pfhwdev;
> +
> + if (!HINIC_IS_PF(hwif) && !HINIC_IS_PPF(hwif)) {
> + dev_err(>dev, "unsupported PCI Function type\n");
> + return;
> + }

If it succeeded in hinic_init_hwdev, how could it fail here ?

If it failed in hinic_init_hwdev, hinic_free_hwdev should not even
be called.

-> remove ?

[...]
> diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c 
> b/drivers/net/ethernet/huawei/hinic/hinic_main.c
> new file mode 100644
> index 000..c61c769
> --- /dev/null
> +++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c
[...]
> +static void hinic_remove(struct pci_dev *pdev)
> +{
> + struct net_device *netdev = pci_get_drvdata(pdev);
> + struct hinic_dev *nic_dev;
> +
> + if (!netdev)
> + return;

Your driver is flawed if this test can ever succeed.

[...]
> +static int __init hinic_init(void)
> +{
> + return pci_register_driver(_driver);
> +}
> +
> +static void __exit hinic_exit(void)
> +{
> + pci_unregister_driver(_driver);
> +}

Use module_pci_driver(hinic_driver).

Remove hinic_init() and hinic_exit().

> diff --git a/drivers/net/ethernet/huawei/hinic/hinic_pci_id_tbl.h 
> b/drivers/net/ethernet/huawei/hinic/hinic_pci_id_tbl.h
> new file mode 100644
> index 000..1d92617
> --- /dev/null
> +++ b/drivers/net/ethernet/huawei/hinic/hinic_pci_id_tbl.h
[...]
> +#ifndef HINIC_PCI_ID_TBL_H
> +#define HINIC_PCI_ID_TBL_H
> +
> +#ifndef PCI_VENDOR_ID_HUAWEI
> +#define PCI_VENDOR_ID_HUAWEI0x19e5
> +#endif

Useless: it duplicates include/linux/pci_ids.h

> +
> +#ifndef PCI_DEVICE_ID_HI1822_PF
> +#define PCI_DEVICE_ID_HI1822_PF 0x1822
> +#endif

Please move it to the .c file where it is actually used.


Extra:

grep -E 'void\ \*' drivers/net/ethernet/huawei/hinic/* makes me nervous.

At some point one function will be fed with a wrong pointer and the
compiler won't notice it.

For instance hinic_sq_read_wqe is only called with  There's no
reason to declare it using a 'void **' argument.

-- 
Ueimor


Re: [PATCH 1/2] net: phy: at803x: Fix RGMII RX and TX clock delays setup

2017-07-19 Thread Florian Fainelli
On 07/19/2017 02:29 PM, Mason wrote:
> On 19/07/2017 21:30, Florian Fainelli wrote:
>> On 07/19/2017 12:24 PM, Grygorii Strashko wrote:
>>> Hi
>>>
>>> On 07/19/2017 10:31 AM, Marc Gonzalez wrote:
 The current code supports enabling RGMII RX and TX clock delays.
 The unstated assumption is that these settings are disabled by
 default at reset, which is not the case.

 RX clock delay is enabled at reset. And TX clock delay "survives"
 across SW resets. Thus, if the bootloader enables TX clock delay,
 it will remain enabled at reset in Linux.

 Provide disable functions to configure the RGMII clock delays
 exactly as specified in the fwspec.

 Signed-off-by: Marc Gonzalez 
 ---
   drivers/net/phy/at803x.c | 32 
   1 file changed, 24 insertions(+), 8 deletions(-)
>>> This patch breaks am335x-evm networking.
>>>
>>> To restore it I've had to apply below diff:
>>> diff --git a/arch/arm/boot/dts/am335x-evm.dts 
>>> b/arch/arm/boot/dts/am335x-evm.dts
>>> index 200d6ab..9578bdf 100644
>>> --- a/arch/arm/boot/dts/am335x-evm.dts
>>> +++ b/arch/arm/boot/dts/am335x-evm.dts
>>> @@ -724,12 +724,12 @@
>>>  
>>>  _emac0 {
>>> phy_id = <_mdio>, <0>;
>>> -   phy-mode = "rgmii-txid";
>>> +   phy-mode = "rgmii-id";
>>>  };
>>>  
>>>  _emac1 {
>>> phy_id = <_mdio>, <1>;
>>> -   phy-mode = "rgmii-txid";
>>> +   phy-mode = "rgmii-id";
>>>  };
>>>  
>>>   {
>>>
>>> Sry, can't comment here to much - not E-PHY expert.
>>
>> It's useful feedback, since we had poorly defined "phy-mode" semantics
>> for too long, this is totally expected, Marc this is exactly why Mans is
>> suggesting additional MAC-specific properties to define delays.
> 
> In the current situation, it is impossible to configure
> the at803x to disable RX clock delay or TX clock delay
> (in case the boot loader enabled it).
> 
> Are you saying that, because no one has had a problem
> so far, it is not possible to fix it now, as it would
> break boards like am335x-evm.dts which didn't request
> RX clock delay, but got one anyway?

First it means that your patch as-is broke Grygorii's board, and you
need to at least integrate his patch if you plan on having your own
patch accepted. This will fix am335x-evm.dts, but we have no visibility
into the other DTSes out there that may be using an at803x PHY. If you u
break something you need to fix it, and touching how PHY delays are

> 
> Does that mean we cannot support boards using AR8035
> that need the RX and TX clock delays disabled?

No, that is not what that means, it means that you cannot change how an
existing PHY driver with active and existing deployments is interpreting
the phy_interface_t value in a way that it breaks people setups, which
your patch just did. Yes this makes it non-conforming to the revised
definition of "phy-mode", but it is just how it is, people did not know
any better before.

See below for what you could do.

> 
> I'm not sure how the MAC-specific properties can save
> the day?

If you introduced PHY and/or MAC specific properties to configure the
delays in the appropriate unit of time (say ps), you could use a
non-compliant 'phy-mode' just to satisfy the driver/PHY library and
still override the delays you need.
-- 
Florian


Re: [PATCH 2/2] net: ethernet: nb8800: Fix RGMII TX clock delay setup

2017-07-19 Thread Florian Fainelli
On 07/19/2017 02:15 PM, Mason wrote:
> On 19/07/2017 20:30, Florian Fainelli wrote:
>> On 07/19/2017 10:36 AM, Mason wrote:
>>> On 19/07/2017 19:17, Måns Rullgård wrote:
>>>
 Marc Gonzalez writes:

> According to commit e5f3a4a56ce2a707b2fb8ce37e4414dcac89c672
> ("Documentation: devicetree: clarify usage of the RGMII phy-modes")
> there are 4 RGMII phy-modes to handle:
>
> "rgmii" (RX and TX delays are added by the MAC when required)
> "rgmii-id" (RGMII with internal RX and TX delays provided by the PHY,
>   the MAC should not add the RX or TX delays in this case)
> "rgmii-rxid" (RGMII with internal RX delay provided by the PHY,
>   the MAC should not add an RX delay in this case)
> "rgmii-txid" (RGMII with internal TX delay provided by the PHY,
>   the MAC should not add an TX delay in this case)
>
> Let the MAC handle TX clock delay for rgmii and rgmii-rxid.
>
> Signed-off-by: Marc Gonzalez 
> ---
>  drivers/net/ethernet/aurora/nb8800.c | 8 +---
>  1 file changed, 5 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/net/ethernet/aurora/nb8800.c 
> b/drivers/net/ethernet/aurora/nb8800.c
> index 041cfb7952f8..f3ed320eb4ad 100644
> --- a/drivers/net/ethernet/aurora/nb8800.c
> +++ b/drivers/net/ethernet/aurora/nb8800.c
> @@ -609,7 +609,7 @@ static void nb8800_mac_config(struct net_device *dev)
>   mac_mode |= HALF_DUPLEX;
>
>   if (gigabit) {
> - if (priv->phy_mode == PHY_INTERFACE_MODE_RGMII)
> + if (phy_interface_is_rgmii(dev->phydev))
>   mac_mode |= RGMII_MODE;
>
>   mac_mode |= GMAC_MODE;

 This is a separate issue, and the change is obviously correct.

> @@ -1268,11 +1268,13 @@ static int nb8800_tangox_init(struct net_device 
> *dev)
>   break;
>
>   case PHY_INTERFACE_MODE_RGMII:
> - pad_mode = PAD_MODE_RGMII;
> + case PHY_INTERFACE_MODE_RGMII_RXID:
> + pad_mode = PAD_MODE_RGMII | PAD_MODE_GTX_CLK_DELAY;
>   break;
>
> + case PHY_INTERFACE_MODE_RGMII_ID:
>   case PHY_INTERFACE_MODE_RGMII_TXID:
> - pad_mode = PAD_MODE_RGMII | PAD_MODE_GTX_CLK_DELAY;
> + pad_mode = PAD_MODE_RGMII;
>   break;

 Won't this just make it break in a different set of circumstances?
>>>
>>> I don't think so, and here's my reasoning:
>>>
>>> AFAIU, the HW block always requires a TX clock delay
>>> (I don't know what the "safe" interval is. PHY adds
>>> 2.4 ns, MAC adds ~1 ns, both work.)
>>
>> The nominal delay should be 2ns because that's exactly what a 90 degrees
>> shift at a 125Mhz would be. The RGMII specification defines the following:
>>
>> TskewT - Data to Clock output Skew (At Transmitter) Min: -500ns, Nom: 0,
>> Max: + 500 ns
>> TskewR - Data to Clock input Skew (At Receiver) Min: 1ns, Nom: 0, Max:
>> 2.6ns (see note 1)
>>
>> note 1: This implies that PC board design will require clocks to be
>> routed such that an additional trace delay of greater than 1.5ns and
>> less than 2.0ns will be added to the associated clock signal. For 10/100
>> the Max value is unspecified.
>>
>> So it seems to me like you are borderline spec in both delays you gave
>> here and the "HW block always requires a TX clock delay" statement is
>> true for a given board design only.
> 
> I must confess that my understanding of clock delays,
> clock skew, routing, traces, etc is nil.
> 
> Is TskewT the TX clock delay?
> And TskewR the RX clock delay?
> 
> Doesn't wire delay factor in too?
> (So longer wires require more delay.)

How about you start reading the RGMII specification so we can at least,
if nothing else agree on the terminology? It's public:

http://web.archive.org/web/20160303171328/http://www.hp.com/rnd/pdfs/RGMIIv2_0_final_hp.pdf

> 
>>> RX clock delay seems to be "Don't Care" (tested both
>>> enabled and disabled by PHY)
>>> By "tested", I mean ability to ping remote system.
>>
>> Can you do something a bit more stressful than just a ping, also if you
>> have the ability to change the inter-packet gap, do it, and see if you
>> start seeing FCS or any other decoding errors.
> 
> Errr... "Inter-packet gap"?
> Is there supposed to be a HW knob to tweak how long
> the HW waits between sending two frames?

Some Ethernet controllers let you change it, some don't, if nb8800
allows it, it's good for testing in that it packs more frames per
quantum of time. If not, do you have at least a FCS error counter?

> 
>>> If phy-mode is RGMII or RGMII_RXID, then don't add
>>> TX clock delay from PHY, therefore add it from MAC.
>>>
>>> If phy_mode is RGMII_ID or RGMII_TXID, then do add
>>> TX clock delay from PHY, therefore don't add it from MAC.
>>>
>>> What set of circumstances would create an issue?
>>
>> Existing Device Tree sources that do not correspond to that description
>> you just 

Re: [PATCH 1/2] net: phy: at803x: Fix RGMII RX and TX clock delays setup

2017-07-19 Thread Mason
On 19/07/2017 21:30, Florian Fainelli wrote:
> On 07/19/2017 12:24 PM, Grygorii Strashko wrote:
>> Hi
>>
>> On 07/19/2017 10:31 AM, Marc Gonzalez wrote:
>>> The current code supports enabling RGMII RX and TX clock delays.
>>> The unstated assumption is that these settings are disabled by
>>> default at reset, which is not the case.
>>>
>>> RX clock delay is enabled at reset. And TX clock delay "survives"
>>> across SW resets. Thus, if the bootloader enables TX clock delay,
>>> it will remain enabled at reset in Linux.
>>>
>>> Provide disable functions to configure the RGMII clock delays
>>> exactly as specified in the fwspec.
>>>
>>> Signed-off-by: Marc Gonzalez 
>>> ---
>>>   drivers/net/phy/at803x.c | 32 
>>>   1 file changed, 24 insertions(+), 8 deletions(-)
>> This patch breaks am335x-evm networking.
>>
>> To restore it I've had to apply below diff:
>> diff --git a/arch/arm/boot/dts/am335x-evm.dts 
>> b/arch/arm/boot/dts/am335x-evm.dts
>> index 200d6ab..9578bdf 100644
>> --- a/arch/arm/boot/dts/am335x-evm.dts
>> +++ b/arch/arm/boot/dts/am335x-evm.dts
>> @@ -724,12 +724,12 @@
>>  
>>  _emac0 {
>> phy_id = <_mdio>, <0>;
>> -   phy-mode = "rgmii-txid";
>> +   phy-mode = "rgmii-id";
>>  };
>>  
>>  _emac1 {
>> phy_id = <_mdio>, <1>;
>> -   phy-mode = "rgmii-txid";
>> +   phy-mode = "rgmii-id";
>>  };
>>  
>>   {
>>
>> Sry, can't comment here to much - not E-PHY expert.
> 
> It's useful feedback, since we had poorly defined "phy-mode" semantics
> for too long, this is totally expected, Marc this is exactly why Mans is
> suggesting additional MAC-specific properties to define delays.

In the current situation, it is impossible to configure
the at803x to disable RX clock delay or TX clock delay
(in case the boot loader enabled it).

Are you saying that, because no one has had a problem
so far, it is not possible to fix it now, as it would
break boards like am335x-evm.dts which didn't request
RX clock delay, but got one anyway?

Does that mean we cannot support boards using AR8035
that need the RX and TX clock delays disabled?

I'm not sure how the MAC-specific properties can save
the day?

Regards.


Re: [PATCH 2/2] net: ethernet: nb8800: Fix RGMII TX clock delay setup

2017-07-19 Thread Mason
On 19/07/2017 20:30, Florian Fainelli wrote:
> On 07/19/2017 10:36 AM, Mason wrote:
>> On 19/07/2017 19:17, Måns Rullgård wrote:
>>
>>> Marc Gonzalez writes:
>>>
 According to commit e5f3a4a56ce2a707b2fb8ce37e4414dcac89c672
 ("Documentation: devicetree: clarify usage of the RGMII phy-modes")
 there are 4 RGMII phy-modes to handle:

 "rgmii" (RX and TX delays are added by the MAC when required)
 "rgmii-id" (RGMII with internal RX and TX delays provided by the PHY,
the MAC should not add the RX or TX delays in this case)
 "rgmii-rxid" (RGMII with internal RX delay provided by the PHY,
the MAC should not add an RX delay in this case)
 "rgmii-txid" (RGMII with internal TX delay provided by the PHY,
the MAC should not add an TX delay in this case)

 Let the MAC handle TX clock delay for rgmii and rgmii-rxid.

 Signed-off-by: Marc Gonzalez 
 ---
  drivers/net/ethernet/aurora/nb8800.c | 8 +---
  1 file changed, 5 insertions(+), 3 deletions(-)

 diff --git a/drivers/net/ethernet/aurora/nb8800.c 
 b/drivers/net/ethernet/aurora/nb8800.c
 index 041cfb7952f8..f3ed320eb4ad 100644
 --- a/drivers/net/ethernet/aurora/nb8800.c
 +++ b/drivers/net/ethernet/aurora/nb8800.c
 @@ -609,7 +609,7 @@ static void nb8800_mac_config(struct net_device *dev)
mac_mode |= HALF_DUPLEX;

if (gigabit) {
 -  if (priv->phy_mode == PHY_INTERFACE_MODE_RGMII)
 +  if (phy_interface_is_rgmii(dev->phydev))
mac_mode |= RGMII_MODE;

mac_mode |= GMAC_MODE;
>>>
>>> This is a separate issue, and the change is obviously correct.
>>>
 @@ -1268,11 +1268,13 @@ static int nb8800_tangox_init(struct net_device 
 *dev)
break;

case PHY_INTERFACE_MODE_RGMII:
 -  pad_mode = PAD_MODE_RGMII;
 +  case PHY_INTERFACE_MODE_RGMII_RXID:
 +  pad_mode = PAD_MODE_RGMII | PAD_MODE_GTX_CLK_DELAY;
break;

 +  case PHY_INTERFACE_MODE_RGMII_ID:
case PHY_INTERFACE_MODE_RGMII_TXID:
 -  pad_mode = PAD_MODE_RGMII | PAD_MODE_GTX_CLK_DELAY;
 +  pad_mode = PAD_MODE_RGMII;
break;
>>>
>>> Won't this just make it break in a different set of circumstances?
>>
>> I don't think so, and here's my reasoning:
>>
>> AFAIU, the HW block always requires a TX clock delay
>> (I don't know what the "safe" interval is. PHY adds
>> 2.4 ns, MAC adds ~1 ns, both work.)
> 
> The nominal delay should be 2ns because that's exactly what a 90 degrees
> shift at a 125Mhz would be. The RGMII specification defines the following:
> 
> TskewT - Data to Clock output Skew (At Transmitter) Min: -500ns, Nom: 0,
> Max: + 500 ns
> TskewR - Data to Clock input Skew (At Receiver) Min: 1ns, Nom: 0, Max:
> 2.6ns (see note 1)
> 
> note 1: This implies that PC board design will require clocks to be
> routed such that an additional trace delay of greater than 1.5ns and
> less than 2.0ns will be added to the associated clock signal. For 10/100
> the Max value is unspecified.
> 
> So it seems to me like you are borderline spec in both delays you gave
> here and the "HW block always requires a TX clock delay" statement is
> true for a given board design only.

I must confess that my understanding of clock delays,
clock skew, routing, traces, etc is nil.

Is TskewT the TX clock delay?
And TskewR the RX clock delay?

Doesn't wire delay factor in too?
(So longer wires require more delay.)

>> RX clock delay seems to be "Don't Care" (tested both
>> enabled and disabled by PHY)
>> By "tested", I mean ability to ping remote system.
> 
> Can you do something a bit more stressful than just a ping, also if you
> have the ability to change the inter-packet gap, do it, and see if you
> start seeing FCS or any other decoding errors.

Errr... "Inter-packet gap"?
Is there supposed to be a HW knob to tweak how long
the HW waits between sending two frames?

>> If phy-mode is RGMII or RGMII_RXID, then don't add
>> TX clock delay from PHY, therefore add it from MAC.
>>
>> If phy_mode is RGMII_ID or RGMII_TXID, then do add
>> TX clock delay from PHY, therefore don't add it from MAC.
>>
>> What set of circumstances would create an issue?
> 
> Existing Device Tree sources that do not correspond to that description
> you just did, I suppose they are all out of tree?

The problem with PHY drivers is that there is no
simple compatible string to grep for.

The tango boards use "ethernet-phy-id004d.d072"
but not a single other DT uses that string.
For example, am335x-evm.dts doesn't seem to name the PHY.
Hmmm, how does the at803x probe function match for that
board?

How does one estimate the impact of driver changes in
the eth PHY layer?

Regards.


Re: [PATCH] NET: dwmac: Make dwmac reset unconditional

2017-07-19 Thread David Miller
From: Eugeniy Paltsev 
Date: Tue, 18 Jul 2017 17:07:15 +0300

> Unconditional reset dwmac before HW init if reset controller is present.
> 
> In existing implementation we reset dwmac only after second module
> probing:
> (module load -> unload -> load again [reset happens])
> 
> Now we reset dwmac at every module load:
> (module load [reset happens] -> unload -> load again [reset happens])
> 
> Also some reset controllers have only reset callback instead of
> assert + deassert callbacks pair, so handle this case.
> 
> Signed-off-by: Eugeniy Paltsev 

Applied.


Re: [PATCH v2 2/2] openvswitch: Optimize operations for OvS flow_stats.

2017-07-19 Thread David Miller
From: Tonghao Zhang 
Date: Mon, 17 Jul 2017 23:28:06 -0700

> When calling the flow_free() to free the flow, we call many times
> (cpu_possible_mask, eg. 128 as default) cpumask_next(). That will
> take up our CPU usage if we call the flow_free() frequently.
> When we put all packets to userspace via upcall, and OvS will send
> them back via netlink to ovs_packet_cmd_execute(will call flow_free).
> 
> The test topo is shown as below. VM01 sends TCP packets to VM02,
> and OvS forward packtets. When testing, we use perf to report the
> system performance.
> 
> VM01 --- OvS-VM --- VM02
> 
> Without this patch, perf-top show as below: The flow_free() is
> 3.02% CPU usage.
> 
>   4.23%  [kernel][k] _raw_spin_unlock_irqrestore
>   3.62%  [kernel][k] __do_softirq
>   3.16%  [kernel][k] __memcpy
>   3.02%  [kernel][k] flow_free
>   2.42%  libc-2.17.so[.] __memcpy_ssse3_back
>   2.18%  [kernel][k] copy_user_generic_unrolled
>   2.17%  [kernel][k] find_next_bit
> 
> When applied this patch, perf-top show as below: Not shown on
> the list anymore.
> 
>   4.11%  [kernel][k] _raw_spin_unlock_irqrestore
>   3.79%  [kernel][k] __do_softirq
>   3.46%  [kernel][k] __memcpy
>   2.73%  libc-2.17.so[.] __memcpy_ssse3_back
>   2.25%  [kernel][k] copy_user_generic_unrolled
>   1.89%  libc-2.17.so[.] _int_malloc
>   1.53%  ovs-vswitchd[.] xlate_actions
> 
> With this patch, the TCP throughput(we dont use Megaflow Cache
> + Microflow Cache) between VMs is 1.18Gbs/sec up to 1.30Gbs/sec
> (maybe ~10% performance imporve).
> 
> This patch adds cpumask struct, the cpu_used_mask stores the cpu_id
> that the flow used. And we only check the flow_stats on the cpu we
> used, and it is unncessary to check all possible cpu when getting,
> cleaning, and updating the flow_stats. Adding the cpu_used_mask to
> sw_flow struct does’t increase the cacheline number.
> 
> Signed-off-by: Tonghao Zhang 
> Acked-by: Pravin B Shelar 

Applied.


Re: [PATCH v2 1/2] openvswitch: Optimize updating for OvS flow_stats.

2017-07-19 Thread David Miller
From: Tonghao Zhang 
Date: Mon, 17 Jul 2017 23:28:05 -0700

> In the ovs_flow_stats_update(), we only use the node
> var to alloc flow_stats struct. But this is not a
> common case, it is unnecessary to call the numa_node_id()
> everytime. This patch is not a bugfix, but there maybe
> a small increase.
> 
> Signed-off-by: Tonghao Zhang 

Applied.


Re: [PATCH] wireless: wext: terminate ifr name coming from userspace

2017-07-19 Thread David Miller
From: "Levin, Alexander (Sasha Levin)" 
Date: Tue, 18 Jul 2017 04:23:16 +

> ifr name is assumed to be a valid string by the kernel, but nothing
> was forcing username to pass a valid string.
> 
> In turn, this would cause panics as we tried to access the string
> past it's valid memory.
> 
> Signed-off-by: Sasha Levin 

Applied and queued up for -stable.

dev_ifname() has the same bug, I'll post a patch for that.


[PATCH] net: Zero terminate ifr_name in dev_ifname().

2017-07-19 Thread David Miller

The ifr.ifr_name is passed around and assumed to be NULL terminated.

Signed-off-by: David S. Miller 
---
 net/core/dev_ioctl.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 7657ad6bc13d..06b147d7d9e2 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -28,6 +28,7 @@ static int dev_ifname(struct net *net, struct ifreq __user 
*arg)
 
if (copy_from_user(, arg, sizeof(struct ifreq)))
return -EFAULT;
+   ifr.ifr_name[IFNAMSIZ-1] = 0;
 
error = netdev_get_name(net, ifr.ifr_name, ifr.ifr_ifindex);
if (error)
-- 
2.13.3



[PATCH net] ipv6: avoid overflow of offset in ip6_find_1stfragopt

2017-07-19 Thread Sabrina Dubroca
In some cases, offset can overflow and can cause an infinite loop in
ip6_find_1stfragopt(). Make it unsigned int to prevent the overflow, and
cap it at IPV6_MAXPLEN, since packets larger than that should be invalid.

This problem has been here since before the beginning of git history.

Signed-off-by: Sabrina Dubroca 
Acked-by: Hannes Frederic Sowa 
---
 net/ipv6/output_core.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index e9065b8d3af8..abb2c307fbe8 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -78,7 +78,7 @@ EXPORT_SYMBOL(ipv6_select_ident);
 
 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
 {
-   u16 offset = sizeof(struct ipv6hdr);
+   unsigned int offset = sizeof(struct ipv6hdr);
unsigned int packet_len = skb_tail_pointer(skb) -
skb_network_header(skb);
int found_rhdr = 0;
@@ -86,6 +86,7 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
 
while (offset <= packet_len) {
struct ipv6_opt_hdr *exthdr;
+   unsigned int len;
 
switch (**nexthdr) {
 
@@ -111,7 +112,10 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
 
exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
 offset);
-   offset += ipv6_optlen(exthdr);
+   len = ipv6_optlen(exthdr);
+   if (len + offset >= IPV6_MAXPLEN)
+   return -EINVAL;
+   offset += len;
*nexthdr = >nexthdr;
}
 
-- 
2.13.2



Re: [PATCH v2 net-next 0/3] liquidio: avoid vm low memory crashes

2017-07-19 Thread David Miller
From: Felix Manlunas 
Date: Mon, 17 Jul 2017 17:49:20 -0700

> From: Rick Farrington 
> 
> This patchset addresses issues brought about by low memory conditions
> in a VM.  These conditions were not seen when the driver was exercised
> normally.  Rather, they were brought about through manual fault injection.
> They are being included in the interest of hardening the driver against
> unforeseen circumstances.
> 
> 1. Fix GPF in octeon_init_droq(); zero the allocated block 'recv_buf_list'.
>This prevents a GPF trying to access an invalid 'recv_buf_list[i]' entry
>in octeon_droq_destroy_ring_buffers() if init didn't alloc all entries.
> 2. Don't dereference a NULL ptr in octeon_droq_destroy_ring_buffers().
> 3. For defensive programming, zero the allocated block 'oct->droq[0]' in
>octeon_setup_output_queues() and 'oct->instr_queue[0]' in
>octeon_setup_instr_queues().
> 
> change log:
> V1 -> V2:
> 1. Corrected syntax in 'Subject' lines; no functional or code changes.

Series applied.  I removed the "[0]" in this commit log message.

Thanks.


Re: [PATCH net-next v2 00/13] Change DSA's FDB API and perform switchdev cleanup

2017-07-19 Thread Vivien Didelot
Hi Arkadi,

I am testing your patch series the behavior changes suspiciously:

# brctl show br0
bridge name bridge id   STP enabled interfaces
br0 8000.f6d5ef06ccdd   no  lan0
lan1
lan2
lan3
lan4
lan5
lan6
lan7
lan8
optical3
optical4

Without the patchset I have this behavior:

# bridge fdb add 00:11:22:33:44:55 dev lan4
# bridge fdb add 22:33:44:55:66:77 dev lan2 self
# bridge fdb show   
01:00:5e:00:00:01 dev eth0 self permanent
01:00:5e:00:00:01 dev eth1 self permanent
0a:3f:f6:06:a2:ee dev lan0 master br0 permanent
22:33:44:55:66:77 dev lan2 self static
00:11:22:33:44:55 dev lan4 self static
01:00:5e:00:00:01 dev br0 self permanent

And now with the patchset applied I have:

# bridge fdb add 00:11:22:33:44:55 dev lan4
# bridge fdb add 22:33:44:55:66:77 dev lan2 self
# bridge fdb show
01:00:5e:00:00:01 dev eth0 self permanent
00:11:22:33:44:55 dev eth1 self permanent
22:33:44:55:66:77 dev eth1 self permanent
01:00:5e:00:00:01 dev eth1 self permanent
0a:ca:c8:6b:05:65 dev lan0 master br0 permanent
01:00:5e:00:00:01 dev br0 self permanent


It looks like the FDB entries are reported to be associated with the
master net device (eth1). Is the dump broken or is it the whole add?

Thanks,

Vivien


Re: [PATCH net-next] liquidio: support new firmware statistic fw_err_pki

2017-07-19 Thread David Miller
From: Felix Manlunas 
Date: Mon, 17 Jul 2017 13:33:14 -0700

> From: Rick Farrington 
> 
> Added support for new firmware statistic 'tx_err_pki'.
> 
> Signed-off-by: Rick Farrington 
> Signed-off-by: Derek Chickles 
> Signed-off-by: Felix Manlunas 

Applied.


Re: [PATCH 1/2] net: phy: at803x: Fix RGMII RX and TX clock delays setup

2017-07-19 Thread Grygorii Strashko



On 07/19/2017 02:30 PM, Florian Fainelli wrote:

On 07/19/2017 12:24 PM, Grygorii Strashko wrote:

Hi

On 07/19/2017 10:31 AM, Marc Gonzalez wrote:

The current code supports enabling RGMII RX and TX clock delays.
The unstated assumption is that these settings are disabled by
default at reset, which is not the case.

RX clock delay is enabled at reset. And TX clock delay "survives"
across SW resets. Thus, if the bootloader enables TX clock delay,
it will remain enabled at reset in Linux.

Provide disable functions to configure the RGMII clock delays
exactly as specified in the fwspec.

Signed-off-by: Marc Gonzalez 
---
   drivers/net/phy/at803x.c | 32 
   1 file changed, 24 insertions(+), 8 deletions(-)

This patch breaks am335x-evm networking.

To restore it I've had to apply below diff:
diff --git a/arch/arm/boot/dts/am335x-evm.dts b/arch/arm/boot/dts/am335x-evm.dts
index 200d6ab..9578bdf 100644
--- a/arch/arm/boot/dts/am335x-evm.dts
+++ b/arch/arm/boot/dts/am335x-evm.dts
@@ -724,12 +724,12 @@
  
  _emac0 {

 phy_id = <_mdio>, <0>;
-   phy-mode = "rgmii-txid";
+   phy-mode = "rgmii-id";
  };
  
  _emac1 {

 phy_id = <_mdio>, <1>;
-   phy-mode = "rgmii-txid";
+   phy-mode = "rgmii-id";
  };
  
   {


Sry, can't comment here to much - not E-PHY expert.


It's useful feedback, since we had poorly defined "phy-mode" semantics
for too long, this is totally expected, Marc this is exactly why Mans is
suggesting additional MAC-specific properties to define delays.



Yeah. original commit is pretty old and description is not very useful

commit 6d75afe2916adf9e9de6862275cdf89b9b7e4d0e
Author: Mugunthan V N 
Date:   Mon Jun 3 20:10:11 2013 +

ARM: dts: AM33XX: Add phy-mode to CPSW node


--
regards,
-grygorii


ATENCIÓN;

2017-07-19 Thread administrador
ATENCIÓN;

Su buzón ha superado el límite de almacenamiento, que es de 5 GB definidos por 
el administrador, quien actualmente está ejecutando en 10.9GB, no puede ser 
capaz de enviar o recibir correo nuevo hasta que vuelva a validar su buzón de 
correo electrónico. Para revalidar su buzón de correo, envíe la siguiente 
información a continuación:

nombre: 
Nombre de usuario: 
contraseña:
Confirmar contraseña:
E-mail: 
teléfono:
Si usted no puede revalidar su buzón, el buzón se deshabilitará!

Disculpa las molestias.
Código de verificación: es: 006524
Correo Soporte Técnico © 2017

¡gracias
Sistemas administrador


Re: A buggy behavior for Linux TCP Reno and HTCP

2017-07-19 Thread Yuchung Cheng
On Tue, Jul 18, 2017 at 2:36 PM, Wei Sun  wrote:
> Hi there,
>
> We find a buggy behavior when using Linux TCP Reno and HTCP in low
> bandwidth or highly congested network environments.
>
> In a simple word, their undo functions may mistakenly double the cwnd,
> leading to a more aggressive behavior in a highly congested scenario.
>
>
> The detailed reason:
>
> The current reno undo function assumes cwnd halving (and thus doubles
> the cwnd), but it doesn't consider a corner case condition that
> ssthresh is at least 2.
>
> e.g.,
>  cwnd  ssth
> An initial state: 25
> A spurious loss:   12
> Undo:   45
>
> Here the cwnd after undo is two times as that before undo. Attached is
> a simple script to reproduce it.
the packetdrill script is a bit confusing: it disables SACK but then
the client returns ACK w/ SACKs, also 3 dupacks happen after RTO so
the sender isn't technically going through a fast recovery...

could you provide a better test?

>
> A similar reason for HTCP, so we recommend to store the cwnd on loss
> in .ssthresh implementation and restore it again in .undo_cwnd for TCP
> Reno and HTCP implementations.
>
> Thanks


Re: [PATCH 1/2] net: phy: at803x: Fix RGMII RX and TX clock delays setup

2017-07-19 Thread Florian Fainelli
On 07/19/2017 12:24 PM, Grygorii Strashko wrote:
> Hi
> 
> On 07/19/2017 10:31 AM, Marc Gonzalez wrote:
>> The current code supports enabling RGMII RX and TX clock delays.
>> The unstated assumption is that these settings are disabled by
>> default at reset, which is not the case.
>>
>> RX clock delay is enabled at reset. And TX clock delay "survives"
>> across SW resets. Thus, if the bootloader enables TX clock delay,
>> it will remain enabled at reset in Linux.
>>
>> Provide disable functions to configure the RGMII clock delays
>> exactly as specified in the fwspec.
>>
>> Signed-off-by: Marc Gonzalez 
>> ---
>>   drivers/net/phy/at803x.c | 32 
>>   1 file changed, 24 insertions(+), 8 deletions(-)
> This patch breaks am335x-evm networking.
> 
> To restore it I've had to apply below diff:
> diff --git a/arch/arm/boot/dts/am335x-evm.dts 
> b/arch/arm/boot/dts/am335x-evm.dts
> index 200d6ab..9578bdf 100644
> --- a/arch/arm/boot/dts/am335x-evm.dts
> +++ b/arch/arm/boot/dts/am335x-evm.dts
> @@ -724,12 +724,12 @@
>  
>  _emac0 {
> phy_id = <_mdio>, <0>;
> -   phy-mode = "rgmii-txid";
> +   phy-mode = "rgmii-id";
>  };
>  
>  _emac1 {
> phy_id = <_mdio>, <1>;
> -   phy-mode = "rgmii-txid";
> +   phy-mode = "rgmii-id";
>  };
>  
>   {
> 
> Sry, can't comment here to much - not E-PHY expert.

It's useful feedback, since we had poorly defined "phy-mode" semantics
for too long, this is totally expected, Marc this is exactly why Mans is
suggesting additional MAC-specific properties to define delays.
-- 
Florian


Re: [PATCH 1/2] net: phy: at803x: Fix RGMII RX and TX clock delays setup

2017-07-19 Thread Grygorii Strashko
Hi

On 07/19/2017 10:31 AM, Marc Gonzalez wrote:
> The current code supports enabling RGMII RX and TX clock delays.
> The unstated assumption is that these settings are disabled by
> default at reset, which is not the case.
> 
> RX clock delay is enabled at reset. And TX clock delay "survives"
> across SW resets. Thus, if the bootloader enables TX clock delay,
> it will remain enabled at reset in Linux.
> 
> Provide disable functions to configure the RGMII clock delays
> exactly as specified in the fwspec.
> 
> Signed-off-by: Marc Gonzalez 
> ---
>   drivers/net/phy/at803x.c | 32 
>   1 file changed, 24 insertions(+), 8 deletions(-)
This patch breaks am335x-evm networking.

To restore it I've had to apply below diff:
diff --git a/arch/arm/boot/dts/am335x-evm.dts b/arch/arm/boot/dts/am335x-evm.dts
index 200d6ab..9578bdf 100644
--- a/arch/arm/boot/dts/am335x-evm.dts
+++ b/arch/arm/boot/dts/am335x-evm.dts
@@ -724,12 +724,12 @@
 
 _emac0 {
phy_id = <_mdio>, <0>;
-   phy-mode = "rgmii-txid";
+   phy-mode = "rgmii-id";
 };
 
 _emac1 {
phy_id = <_mdio>, <1>;
-   phy-mode = "rgmii-txid";
+   phy-mode = "rgmii-id";
 };
 
  {

Sry, can't comment here to much - not E-PHY expert.

-- 
regards,
-grygorii


[PATCH v3] ath10k: ath10k_htt_rx_amsdu_allowed() use ath10k_dbg()

2017-07-19 Thread Gabriel Craciunescu
Each time we get disconencted from AP we get flooded with messages like:

...
ath10k_pci :03:00.0: no channel configured; ignoring frame(s)!

ath10k_warn: 155 callbacks suppressed
...

Use ath10k_dbg() here too

Signed-off-by: Gabriel Craciunescu 
---
 drivers/net/wireless/ath/ath10k/htt_rx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c 
b/drivers/net/wireless/ath/ath10k/htt_rx.c
index 398dda978d6e..75d9b59b7e63 100644
--- a/drivers/net/wireless/ath/ath10k/htt_rx.c
+++ b/drivers/net/wireless/ath/ath10k/htt_rx.c
@@ -1514,7 +1514,7 @@ static bool ath10k_htt_rx_amsdu_allowed(struct ath10k *ar,
 */
 
if (!rx_status->freq) {
-   ath10k_warn(ar, "no channel configured; ignoring frame(s)!\n");
+   ath10k_dbg(ar, ATH10K_DBG_HTT, "no channel configured; ignoring 
frame(s)!\n");
return false;
}
 
-- 
2.13.3



RE: [PATCH net-next 3/8] netvsc: change order of steps in setting queues

2017-07-19 Thread Haiyang Zhang


> -Original Message-
> From: Stephen Hemminger [mailto:step...@networkplumber.org]
> Sent: Wednesday, July 19, 2017 2:53 PM
> To: KY Srinivasan ; Haiyang Zhang
> ; Stephen Hemminger 
> Cc: de...@linuxdriverproject.org; netdev@vger.kernel.org
> Subject: [PATCH net-next 3/8] netvsc: change order of steps in setting
> queues
> 
> This fixes the error unwind logic for incorrect number of queues.
> If netif_set_real_num_XX_queues failed then rndis_filter_device_add
> would have been called twice. Since input arguments are already
> ranged checked this is a hypothetical only problem, not possible
> in actual code.
> 
> Signed-off-by: Stephen Hemminger 
> ---
>  drivers/net/hyperv/netvsc_drv.c | 8 +++-
>  1 file changed, 3 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/net/hyperv/netvsc_drv.c
> b/drivers/net/hyperv/netvsc_drv.c
> index e8e82a6a4b1a..91637336d1fb 100644
> --- a/drivers/net/hyperv/netvsc_drv.c
> +++ b/drivers/net/hyperv/netvsc_drv.c
> @@ -724,17 +724,15 @@ static int netvsc_set_queues(struct net_device
> *net, struct hv_device *dev,
>   device_info.ring_size = ring_size;
>   device_info.max_num_vrss_chns = num_chn;
> 
> - ret = rndis_filter_device_add(dev, _info);
> - if (ret)
> - return ret;
> -
>   ret = netif_set_real_num_tx_queues(net, num_chn);
>   if (ret)
>   return ret;
> 
>   ret = netif_set_real_num_rx_queues(net, num_chn);
> + if (ret)
> + return ret;
> 
> - return ret;
> + return rndis_filter_device_add(dev, _info);
>  }

The existing code has a bug here. After rndis_filter_device_add(),
the number of queues granted by the host may change:
net_device->num_chn = 1 +
init_packet->msg.v5_msg.subchn_comp.num_subchannels;
So we should call rndis_filter_device_add() first, then
assign net_device->num_chn to "real number tx queues".

Thanks,
- Haiyang



[PATCH net-next 7/8] netvsc: save pointer to parent netvsc_device in channel table

2017-07-19 Thread Stephen Hemminger
Keep back pointer in the per-channel data structure to
avoid any possible RCU related issues when napi poll is
called but netvsc_device is in RCU limbo.

Signed-off-by: Stephen Hemminger 
---
 drivers/net/hyperv/hyperv_net.h | 1 +
 drivers/net/hyperv/netvsc.c | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 0054b6929f6e..d13572879e7e 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -727,6 +727,7 @@ struct net_device_context {
 /* Per channel data */
 struct netvsc_channel {
struct vmbus_channel *channel;
+   struct netvsc_device *net_device;
const struct vmpacket_descriptor *desc;
struct napi_struct napi;
struct multi_send_data msd;
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 3c6f3ae520d9..c15640c6fd83 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -1224,11 +1224,11 @@ int netvsc_poll(struct napi_struct *napi, int budget)
 {
struct netvsc_channel *nvchan
= container_of(napi, struct netvsc_channel, napi);
+   struct netvsc_device *net_device = nvchan->net_device;
struct vmbus_channel *channel = nvchan->channel;
struct hv_device *device = netvsc_channel_to_device(channel);
u16 q_idx = channel->offermsg.offer.sub_channel_index;
struct net_device *ndev = hv_get_drvdata(device);
-   struct netvsc_device *net_device = net_device_to_netvsc_device(ndev);
int work_done = 0;
 
/* If starting a new interval */
@@ -1307,6 +1307,7 @@ struct netvsc_device *netvsc_device_add(struct hv_device 
*device,
struct netvsc_channel *nvchan = _device->chan_table[i];
 
nvchan->channel = device->channel;
+   nvchan->net_device = net_device;
}
 
/* Enable NAPI handler before init callbacks */
-- 
2.11.0



[PATCH net-next 1/8] netvsc: force link update after MTU change

2017-07-19 Thread Stephen Hemminger
If two MTU changes are in less than update interval (2 seconds),
then the netvsc network device may get stuck with no carrier.

The netvsc driver debounces link status events which is fine
for unsolicited updates, but blocks getting the update after
down/up from MTU reinitialization.

Signed-off-by: Stephen Hemminger 
---
 drivers/net/hyperv/netvsc_drv.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 63c98bbbc596..09b07ca9e69a 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -783,6 +783,7 @@ static int netvsc_set_channels(struct net_device *net,
ret = netvsc_open(net);
 
/* We may have missed link change notifications */
+   net_device_ctx->last_reconfig = 0;
schedule_delayed_work(_device_ctx->dwork, 0);
 
return ret;
-- 
2.11.0



[PATCH net-next 2/8] netvsc: add some rtnl_dereference annotations

2017-07-19 Thread Stephen Hemminger
In a couple places RTNL is held, and the netvsc_device pointer
is acquired without annotation.

Signed-off-by: Stephen Hemminger 
---
 drivers/net/hyperv/netvsc.c | 5 +++--
 drivers/net/hyperv/netvsc_drv.c | 7 ---
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 0a9167dd72fb..e202ec5d6f63 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -41,7 +41,7 @@ void netvsc_switch_datapath(struct net_device *ndev, bool vf)
 {
struct net_device_context *net_device_ctx = netdev_priv(ndev);
struct hv_device *dev = net_device_ctx->device_ctx;
-   struct netvsc_device *nv_dev = net_device_ctx->nvdev;
+   struct netvsc_device *nv_dev = rtnl_dereference(net_device_ctx->nvdev);
struct nvsp_message *init_pkt = _dev->channel_init_pkt;
 
memset(init_pkt, 0, sizeof(struct nvsp_message));
@@ -549,7 +549,8 @@ void netvsc_device_remove(struct hv_device *device)
 {
struct net_device *ndev = hv_get_drvdata(device);
struct net_device_context *net_device_ctx = netdev_priv(ndev);
-   struct netvsc_device *net_device = net_device_ctx->nvdev;
+   struct netvsc_device *net_device
+   = rtnl_dereference(net_device_ctx->nvdev);
int i;
 
netvsc_disconnect_vsp(device);
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 09b07ca9e69a..e8e82a6a4b1a 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -69,7 +69,7 @@ static void netvsc_set_multicast_list(struct net_device *net)
 static int netvsc_open(struct net_device *net)
 {
struct net_device_context *ndev_ctx = netdev_priv(net);
-   struct netvsc_device *nvdev = ndev_ctx->nvdev;
+   struct netvsc_device *nvdev = rtnl_dereference(ndev_ctx->nvdev);
struct rndis_device *rdev;
int ret = 0;
 
@@ -1364,7 +1364,7 @@ static struct net_device *get_netvsc_byref(struct 
net_device *vf_netdev)
continue;   /* not a netvsc device */
 
net_device_ctx = netdev_priv(dev);
-   if (net_device_ctx->nvdev == NULL)
+   if (!rtnl_dereference(net_device_ctx->nvdev))
continue;   /* device is removed */
 
if (rtnl_dereference(net_device_ctx->vf_netdev) == vf_netdev)
@@ -1589,7 +1589,8 @@ static int netvsc_remove(struct hv_device *dev)
 * removed. Also blocks mtu and channel changes.
 */
rtnl_lock();
-   rndis_filter_device_remove(dev, ndev_ctx->nvdev);
+   rndis_filter_device_remove(dev,
+  rtnl_dereference(ndev_ctx->nvdev));
rtnl_unlock();
 
unregister_netdev(net);
-- 
2.11.0



[PATCH net-next 6/8] netvsc: need rcu_derefence when accessing internal device info

2017-07-19 Thread Stephen Hemminger
The netvsc_device structure should be accessed by rcu_dereference
in the send path.  Change arguments to netvsc_send() to make
this easier to do correctly.

Remove no longer needed hv_device_to_netvsc_device.

Signed-off-by: Stephen Hemminger 
---
 drivers/net/hyperv/hyperv_net.h   | 10 +++---
 drivers/net/hyperv/netvsc.c   |  8 +---
 drivers/net/hyperv/netvsc_drv.c   |  4 ++--
 drivers/net/hyperv/rndis_filter.c |  2 +-
 4 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index e620374727c8..0054b6929f6e 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -183,10 +183,12 @@ struct rndis_device {
 /* Interface */
 struct rndis_message;
 struct netvsc_device;
+struct net_device_context;
+
 struct netvsc_device *netvsc_device_add(struct hv_device *device,
const struct netvsc_device_info *info);
 void netvsc_device_remove(struct hv_device *device);
-int netvsc_send(struct hv_device *device,
+int netvsc_send(struct net_device_context *ndc,
struct hv_netvsc_packet *packet,
struct rndis_message *rndis_msg,
struct hv_page_buffer **page_buffer,
@@ -790,12 +792,6 @@ net_device_to_netvsc_device(struct net_device *ndev)
return ((struct net_device_context *)netdev_priv(ndev))->nvdev;
 }
 
-static inline struct netvsc_device *
-hv_device_to_netvsc_device(struct hv_device *device)
-{
-   return net_device_to_netvsc_device(hv_get_drvdata(device));
-}
-
 /* NdisInitialize message */
 struct rndis_initialize_request {
u32 req_id;
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 4a2550559442..3c6f3ae520d9 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -822,13 +822,15 @@ static inline void move_pkt_msd(struct hv_netvsc_packet 
**msd_send,
msdp->count = 0;
 }
 
-int netvsc_send(struct hv_device *device,
+/* RCU already held by caller */
+int netvsc_send(struct net_device_context *ndev_ctx,
struct hv_netvsc_packet *packet,
struct rndis_message *rndis_msg,
struct hv_page_buffer **pb,
struct sk_buff *skb)
 {
-   struct netvsc_device *net_device = hv_device_to_netvsc_device(device);
+   struct netvsc_device *net_device = rcu_dereference(ndev_ctx->nvdev);
+   struct hv_device *device = ndev_ctx->device_ctx;
int ret = 0;
struct netvsc_channel *nvchan;
u32 pktlen = packet->total_data_buflen, msd_len = 0;
@@ -840,7 +842,7 @@ int netvsc_send(struct hv_device *device,
bool xmit_more = (skb != NULL) ? skb->xmit_more : false;
 
/* If device is rescinded, return error and packet will get dropped. */
-   if (unlikely(net_device->destroy))
+   if (unlikely(!net_device || net_device->destroy))
return -ENODEV;
 
/* We may race with netvsc_connect_vsp()/netvsc_init_buf() and get
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 0ca8c74143b4..1238600d717e 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -505,8 +505,8 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct 
net_device *net)
 
/* timestamp packet in software */
skb_tx_timestamp(skb);
-   ret = netvsc_send(net_device_ctx->device_ctx, packet,
- rndis_msg, , skb);
+
+   ret = netvsc_send(net_device_ctx, packet, rndis_msg, , skb);
if (likely(ret == 0))
return NETDEV_TX_OK;
 
diff --git a/drivers/net/hyperv/rndis_filter.c 
b/drivers/net/hyperv/rndis_filter.c
index cacf1e5536f7..9ab67c8309ff 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -243,7 +243,7 @@ static int rndis_filter_send_request(struct rndis_device 
*dev,
pb[0].len;
}
 
-   ret = netvsc_send(net_device_ctx->device_ctx, packet, NULL, , NULL);
+   ret = netvsc_send(net_device_ctx, packet, NULL, , NULL);
return ret;
 }
 
-- 
2.11.0



[PATCH net-next 5/8] netvsc: use ERR_PTR to avoid dereference issues

2017-07-19 Thread Stephen Hemminger
The rndis_filter_device_add function is called both in
probe context and RTNL context,and creates the netvsc_device
inner structure. It is easier to get the RTNL lock annotation
correct if it returns the object directly, rather than implicitly
by updating network device private data.

Signed-off-by: Stephen Hemminger 
---
 drivers/net/hyperv/hyperv_net.h   |  8 
 drivers/net/hyperv/netvsc.c   | 13 ++--
 drivers/net/hyperv/netvsc_drv.c   | 34 ++-
 drivers/net/hyperv/rndis_filter.c | 43 ---
 4 files changed, 49 insertions(+), 49 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 5d541a1462c2..e620374727c8 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -183,8 +183,8 @@ struct rndis_device {
 /* Interface */
 struct rndis_message;
 struct netvsc_device;
-int netvsc_device_add(struct hv_device *device,
- const struct netvsc_device_info *info);
+struct netvsc_device *netvsc_device_add(struct hv_device *device,
+   const struct netvsc_device_info *info);
 void netvsc_device_remove(struct hv_device *device);
 int netvsc_send(struct hv_device *device,
struct hv_netvsc_packet *packet,
@@ -203,8 +203,8 @@ int netvsc_poll(struct napi_struct *napi, int budget);
 bool rndis_filter_opened(const struct netvsc_device *nvdev);
 int rndis_filter_open(struct netvsc_device *nvdev);
 int rndis_filter_close(struct netvsc_device *nvdev);
-int rndis_filter_device_add(struct hv_device *dev,
-   struct netvsc_device_info *info);
+struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
+ struct netvsc_device_info *info);
 void rndis_filter_update(struct netvsc_device *nvdev);
 void rndis_filter_device_remove(struct hv_device *dev,
struct netvsc_device *nvdev);
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index e202ec5d6f63..4a2550559442 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -29,6 +29,8 @@
 #include 
 #include 
 #include 
+#include 
+
 #include 
 
 #include "hyperv_net.h"
@@ -1272,8 +1274,8 @@ void netvsc_channel_cb(void *context)
  * netvsc_device_add - Callback when the device belonging to this
  * driver is added
  */
-int netvsc_device_add(struct hv_device *device,
- const struct netvsc_device_info *device_info)
+struct netvsc_device *netvsc_device_add(struct hv_device *device,
+   const struct netvsc_device_info *device_info)
 {
int i, ret = 0;
int ring_size = device_info->ring_size;
@@ -1283,7 +1285,7 @@ int netvsc_device_add(struct hv_device *device,
 
net_device = alloc_net_device();
if (!net_device)
-   return -ENOMEM;
+   return ERR_PTR(-ENOMEM);
 
net_device->ring_size = ring_size;
 
@@ -1339,7 +1341,7 @@ int netvsc_device_add(struct hv_device *device,
goto close;
}
 
-   return ret;
+   return net_device;
 
 close:
netif_napi_del(_device->chan_table[0].napi);
@@ -1350,6 +1352,5 @@ int netvsc_device_add(struct hv_device *device,
 cleanup:
free_netvsc_device(_device->rcu);
 
-   return ret;
-
+   return ERR_PTR(ret);
 }
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 82e41c056e53..0ca8c74143b4 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -717,6 +717,7 @@ static int netvsc_set_queues(struct net_device *net, struct 
hv_device *dev,
 u32 num_chn)
 {
struct netvsc_device_info device_info;
+   struct netvsc_device *net_device;
int ret;
 
memset(_info, 0, sizeof(device_info));
@@ -732,7 +733,8 @@ static int netvsc_set_queues(struct net_device *net, struct 
hv_device *dev,
if (ret)
return ret;
 
-   return rndis_filter_device_add(dev, _info);
+   net_device = rndis_filter_device_add(dev, _info);
+   return IS_ERR(net_device) ? PTR_ERR(net_device) : 0;
 }
 
 static int netvsc_set_channels(struct net_device *net,
@@ -845,8 +847,10 @@ static int netvsc_change_mtu(struct net_device *ndev, int 
mtu)
struct net_device_context *ndevctx = netdev_priv(ndev);
struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
struct hv_device *hdev = ndevctx->device_ctx;
+   int orig_mtu = ndev->mtu;
struct netvsc_device_info device_info;
bool was_opened;
+   int ret = 0;
 
if (!nvdev || nvdev->destroy)
return -ENODEV;
@@ -863,16 +867,16 @@ static int netvsc_change_mtu(struct net_device *ndev, int 
mtu)
 
rndis_filter_device_remove(hdev, nvdev);
 
-   /* 'nvdev' has been freed in 

[PATCH net-next 4/8] netvsc: change logic for change mtu and set_queues

2017-07-19 Thread Stephen Hemminger
Use device detach/attach to ensure that no packets are handed
to device during state changes. Call rndis_filter_open/close
directly as part of later VF related changes.

Signed-off-by: Stephen Hemminger 
---
 drivers/net/hyperv/hyperv_net.h   |  1 +
 drivers/net/hyperv/netvsc_drv.c   | 38 ++
 drivers/net/hyperv/rndis_filter.c |  5 +
 3 files changed, 24 insertions(+), 20 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index d6c25580f8dd..5d541a1462c2 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -200,6 +200,7 @@ int netvsc_recv_callback(struct net_device *net,
 const struct ndis_pkt_8021q_info *vlan);
 void netvsc_channel_cb(void *context);
 int netvsc_poll(struct napi_struct *napi, int budget);
+bool rndis_filter_opened(const struct netvsc_device *nvdev);
 int rndis_filter_open(struct netvsc_device *nvdev);
 int rndis_filter_close(struct netvsc_device *nvdev);
 int rndis_filter_device_add(struct hv_device *dev,
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 91637336d1fb..82e41c056e53 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -742,7 +742,7 @@ static int netvsc_set_channels(struct net_device *net,
struct hv_device *dev = net_device_ctx->device_ctx;
struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
unsigned int count = channels->combined_count;
-   bool was_running;
+   bool was_opened;
int ret;
 
/* We do not support separate count for rx, tx, or other */
@@ -762,12 +762,9 @@ static int netvsc_set_channels(struct net_device *net,
if (count > nvdev->max_chn)
return -EINVAL;
 
-   was_running = netif_running(net);
-   if (was_running) {
-   ret = netvsc_close(net);
-   if (ret)
-   return ret;
-   }
+   was_opened = rndis_filter_opened(nvdev);
+   if (was_opened)
+   rndis_filter_close(nvdev);
 
rndis_filter_device_remove(dev, nvdev);
 
@@ -777,8 +774,9 @@ static int netvsc_set_channels(struct net_device *net,
else
netvsc_set_queues(net, dev, nvdev->num_chn);
 
-   if (was_running)
-   ret = netvsc_open(net);
+   nvdev = rtnl_dereference(net_device_ctx->nvdev);
+   if (was_opened)
+   rndis_filter_open(nvdev);
 
/* We may have missed link change notifications */
net_device_ctx->last_reconfig = 0;
@@ -848,18 +846,15 @@ static int netvsc_change_mtu(struct net_device *ndev, int 
mtu)
struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
struct hv_device *hdev = ndevctx->device_ctx;
struct netvsc_device_info device_info;
-   bool was_running;
-   int ret = 0;
+   bool was_opened;
 
if (!nvdev || nvdev->destroy)
return -ENODEV;
 
-   was_running = netif_running(ndev);
-   if (was_running) {
-   ret = netvsc_close(ndev);
-   if (ret)
-   return ret;
-   }
+   netif_device_detach(ndev);
+   was_opened = rndis_filter_opened(nvdev);
+   if (was_opened)
+   rndis_filter_close(nvdev);
 
memset(_info, 0, sizeof(device_info));
device_info.ring_size = ring_size;
@@ -877,14 +872,17 @@ static int netvsc_change_mtu(struct net_device *ndev, int 
mtu)
ndev->mtu = mtu;
 
rndis_filter_device_add(hdev, _info);
+   nvdev = rtnl_dereference(ndevctx->nvdev);
 
-   if (was_running)
-   ret = netvsc_open(ndev);
+   if (was_opened)
+   rndis_filter_open(nvdev);
+
+   netif_device_attach(ndev);
 
/* We may have missed link change notifications */
schedule_delayed_work(>dwork, 0);
 
-   return ret;
+   return 0;
 }
 
 static void netvsc_get_stats64(struct net_device *net,
diff --git a/drivers/net/hyperv/rndis_filter.c 
b/drivers/net/hyperv/rndis_filter.c
index 85c00e1c52b6..313c6d00d7d9 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -1302,3 +1302,8 @@ int rndis_filter_close(struct netvsc_device *nvdev)
 
return rndis_filter_close_device(nvdev->extension);
 }
+
+bool rndis_filter_opened(const struct netvsc_device *nvdev)
+{
+   return atomic_read(>open_cnt) > 0;
+}
-- 
2.11.0



[PATCH net-next 8/8] netvsc: add rtnl annotations in rndis

2017-07-19 Thread Stephen Hemminger
The rndis functions are used when changing device state.
Therefore the references from network device to internal state
are protected by RTNL mutex.

Signed-off-by: Stephen Hemminger 
---
 drivers/net/hyperv/hyperv_net.h   |  6 --
 drivers/net/hyperv/netvsc.c   |  6 --
 drivers/net/hyperv/netvsc_drv.c   |  1 +
 drivers/net/hyperv/rndis_filter.c | 12 ++--
 4 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index d13572879e7e..afb65f753574 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -787,12 +787,6 @@ struct netvsc_device {
struct rcu_head rcu;
 };
 
-static inline struct netvsc_device *
-net_device_to_netvsc_device(struct net_device *ndev)
-{
-   return ((struct net_device_context *)netdev_priv(ndev))->nvdev;
-}
-
 /* NdisInitialize message */
 struct rndis_initialize_request {
u32 req_id;
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index c15640c6fd83..0a9d9feedc3f 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -105,7 +105,8 @@ static void netvsc_destroy_buf(struct hv_device *device)
 {
struct nvsp_message *revoke_packet;
struct net_device *ndev = hv_get_drvdata(device);
-   struct netvsc_device *net_device = net_device_to_netvsc_device(ndev);
+   struct net_device_context *ndc = netdev_priv(ndev);
+   struct netvsc_device *net_device = rtnl_dereference(ndc->nvdev);
int ret;
 
/*
@@ -829,7 +830,8 @@ int netvsc_send(struct net_device_context *ndev_ctx,
struct hv_page_buffer **pb,
struct sk_buff *skb)
 {
-   struct netvsc_device *net_device = rcu_dereference(ndev_ctx->nvdev);
+   struct netvsc_device *net_device
+   = rcu_dereference_rtnl(ndev_ctx->nvdev);
struct hv_device *device = ndev_ctx->device_ctx;
int ret = 0;
struct netvsc_channel *nvchan;
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 1238600d717e..a164981c15f7 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -1548,6 +1548,7 @@ static int netvsc_probe(struct hv_device *dev,
 
netif_set_real_num_tx_queues(net, nvdev->num_chn);
netif_set_real_num_rx_queues(net, nvdev->num_chn);
+   rtnl_unlock();
 
netdev_lockdep_set_classes(net);
 
diff --git a/drivers/net/hyperv/rndis_filter.c 
b/drivers/net/hyperv/rndis_filter.c
index 9ab67c8309ff..e439886f72c1 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -84,6 +84,14 @@ static struct rndis_device *get_rndis_device(void)
return device;
 }
 
+static struct netvsc_device *
+net_device_to_netvsc_device(struct net_device *ndev)
+{
+   struct net_device_context *net_device_ctx = netdev_priv(ndev);
+
+   return rtnl_dereference(net_device_ctx->nvdev);
+}
+
 static struct rndis_request *get_rndis_request(struct rndis_device *dev,
 u32 msg_type,
 u32 msg_len)
@@ -472,7 +480,7 @@ static int rndis_filter_query_device(struct rndis_device 
*dev, u32 oid,
 
if (oid == OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES) {
struct net_device_context *ndevctx = netdev_priv(dev->ndev);
-   struct netvsc_device *nvdev = ndevctx->nvdev;
+   struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
struct ndis_offload *hwcaps;
u32 nvsp_version = nvdev->nvsp_version;
u8 ndis_rev;
@@ -944,7 +952,7 @@ static void rndis_filter_halt_device(struct rndis_device 
*dev)
struct rndis_request *request;
struct rndis_halt_request *halt;
struct net_device_context *net_device_ctx = netdev_priv(dev->ndev);
-   struct netvsc_device *nvdev = net_device_ctx->nvdev;
+   struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
 
/* Attempt to do a rndis device halt */
request = get_rndis_request(dev, RNDIS_MSG_HALT,
-- 
2.11.0



[PATCH net-next 0/8] netvsc: lockdep and related fixes

2017-07-19 Thread Stephen Hemminger
These fix sparse and lockdep warnings from netvsc driver.
Targeting these at net-next since no actual related failures
have been observed in non-debug kernels.

Stephen Hemminger (8):
  netvsc: force link update after MTU change
  netvsc: add some rtnl_dereference annotations
  netvsc: change order of steps in setting queues
  netvsc: change logic for change mtu and set_queues
  netvsc: use ERR_PTR to avoid dereference issues
  netvsc: need rcu_derefence when accessing internal device info
  netvsc: save pointer to parent netvsc_device in channel table
  netvsc: add rtnl annotations in rndis

 drivers/net/hyperv/hyperv_net.h   | 26 +---
 drivers/net/hyperv/netvsc.c   | 33 ++--
 drivers/net/hyperv/netvsc_drv.c   | 83 +--
 drivers/net/hyperv/rndis_filter.c | 62 -
 4 files changed, 107 insertions(+), 97 deletions(-)

-- 
2.11.0



[PATCH net-next 3/8] netvsc: change order of steps in setting queues

2017-07-19 Thread Stephen Hemminger
This fixes the error unwind logic for incorrect number of queues.
If netif_set_real_num_XX_queues failed then rndis_filter_device_add
would have been called twice. Since input arguments are already
ranged checked this is a hypothetical only problem, not possible
in actual code.

Signed-off-by: Stephen Hemminger 
---
 drivers/net/hyperv/netvsc_drv.c | 8 +++-
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index e8e82a6a4b1a..91637336d1fb 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -724,17 +724,15 @@ static int netvsc_set_queues(struct net_device *net, 
struct hv_device *dev,
device_info.ring_size = ring_size;
device_info.max_num_vrss_chns = num_chn;
 
-   ret = rndis_filter_device_add(dev, _info);
-   if (ret)
-   return ret;
-
ret = netif_set_real_num_tx_queues(net, num_chn);
if (ret)
return ret;
 
ret = netif_set_real_num_rx_queues(net, num_chn);
+   if (ret)
+   return ret;
 
-   return ret;
+   return rndis_filter_device_add(dev, _info);
 }
 
 static int netvsc_set_channels(struct net_device *net,
-- 
2.11.0



Re: [PATCH] ath10k_htt_rx_amsdu_allowed(): use ath10k_dbg()

2017-07-19 Thread Ryan Hsu
On 07/19/2017 11:24 AM, Gabriel Craciunescu wrote:

To make it consistent, maybe rename the patch title starting with ath10k:

> From: Gabriel Craciunescu 
>
>   Each time we get disconencted from AP we get flooded with messages like:
>
>   ...
>   ath10k_pci :03:00.0: no channel configured; ignoring frame(s)!
>   
>   ath10k_warn: 155 callbacks suppressed
>   ...
>
>   Use ath10k_dbg() here too.

You don't need an indentation on every line of the commit.

> Signed-off-by: Gabriel Craciunescu 
> ---
>  drivers/net/wireless/ath/ath10k/htt_rx.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c 
> b/drivers/net/wireless/ath/ath10k/htt_rx.c
> index 398dda978d6e..75d9b59b7e63 100644
> --- a/drivers/net/wireless/ath/ath10k/htt_rx.c
> +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c
> @@ -1514,7 +1514,7 @@ static bool ath10k_htt_rx_amsdu_allowed(struct ath10k 
> *ar,
>*/
>  
>   if (!rx_status->freq) {
> - ath10k_warn(ar, "no channel configured; ignoring frame(s)!\n");
> + ath10k_dbg(ar, ATH10K_DBG_HTT, "no channel configured; ignoring 
> frame(s)!\n");
>   return false;
>   }
>  

-- 
Ryan Hsu


Re: out-of-bounds access with virtio-net

2017-07-19 Thread David Ahern
On 7/19/17 12:23 PM, Cong Wang wrote:
> On Wed, Jul 19, 2017 at 9:59 AM, David Ahern  wrote:
>> Changing the mac address on a virtio-net based nic is triggering an
>> out-of-bounds access. Nothing fancy with the command:
>>
>> ip li set dev eth2 addr 01:02:34:56:78:9a
>>
>> virtnet_set_mac_address is calling kmemdup for sizeof sockaddr, yet only
>> ETH_ALEN + sizeof(sa_family_t) bytes were malloc'ed.
> 
> Previously we have a dev->addr_len longer than sizeof sockaddr,
> now we a shorter one... This mac addr thing is really a mess currently.
> 
> 
> diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
> index d1ba90980be1..d13bebdf6465 100644
> --- a/net/core/rtnetlink.c
> +++ b/net/core/rtnetlink.c
> @@ -2031,7 +2031,7 @@ static int do_setlink(const struct sk_buff *skb,
> struct sockaddr *sa;
> int len;
> 
> -   len = sizeof(sa_family_t) + dev->addr_len;
> +   len = sizeof(sa_family_t) + max(dev->addr_len, sizeof(*sa));
> sa = kmalloc(len, GFP_KERNEL);
> if (!sa) {
> err = -ENOMEM;
> 

seems reasonable. I am heading to the mountains in a few hours; won't
have time to test until Friday. Please submit formally; no need to wait
for me.


Re: [PATCH 2/2] net: ethernet: nb8800: Fix RGMII TX clock delay setup

2017-07-19 Thread Florian Fainelli
On 07/19/2017 10:36 AM, Mason wrote:
> On 19/07/2017 19:17, Måns Rullgård wrote:
> 
>> Marc Gonzalez writes:
>>
>>> According to commit e5f3a4a56ce2a707b2fb8ce37e4414dcac89c672
>>> ("Documentation: devicetree: clarify usage of the RGMII phy-modes")
>>> there are 4 RGMII phy-modes to handle:
>>>
>>> "rgmii" (RX and TX delays are added by the MAC when required)
>>> "rgmii-id" (RGMII with internal RX and TX delays provided by the PHY,
>>> the MAC should not add the RX or TX delays in this case)
>>> "rgmii-rxid" (RGMII with internal RX delay provided by the PHY,
>>> the MAC should not add an RX delay in this case)
>>> "rgmii-txid" (RGMII with internal TX delay provided by the PHY,
>>> the MAC should not add an TX delay in this case)
>>>
>>> Let the MAC handle TX clock delay for rgmii and rgmii-rxid.
>>>
>>> Signed-off-by: Marc Gonzalez 
>>> ---
>>>  drivers/net/ethernet/aurora/nb8800.c | 8 +---
>>>  1 file changed, 5 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/drivers/net/ethernet/aurora/nb8800.c 
>>> b/drivers/net/ethernet/aurora/nb8800.c
>>> index 041cfb7952f8..f3ed320eb4ad 100644
>>> --- a/drivers/net/ethernet/aurora/nb8800.c
>>> +++ b/drivers/net/ethernet/aurora/nb8800.c
>>> @@ -609,7 +609,7 @@ static void nb8800_mac_config(struct net_device *dev)
>>> mac_mode |= HALF_DUPLEX;
>>>
>>> if (gigabit) {
>>> -   if (priv->phy_mode == PHY_INTERFACE_MODE_RGMII)
>>> +   if (phy_interface_is_rgmii(dev->phydev))
>>> mac_mode |= RGMII_MODE;
>>>
>>> mac_mode |= GMAC_MODE;
>>
>> This is a separate issue, and the change is obviously correct.
>>
>>> @@ -1268,11 +1268,13 @@ static int nb8800_tangox_init(struct net_device 
>>> *dev)
>>> break;
>>>
>>> case PHY_INTERFACE_MODE_RGMII:
>>> -   pad_mode = PAD_MODE_RGMII;
>>> +   case PHY_INTERFACE_MODE_RGMII_RXID:
>>> +   pad_mode = PAD_MODE_RGMII | PAD_MODE_GTX_CLK_DELAY;
>>> break;
>>>
>>> +   case PHY_INTERFACE_MODE_RGMII_ID:
>>> case PHY_INTERFACE_MODE_RGMII_TXID:
>>> -   pad_mode = PAD_MODE_RGMII | PAD_MODE_GTX_CLK_DELAY;
>>> +   pad_mode = PAD_MODE_RGMII;
>>> break;
>>
>> Won't this just make it break in a different set of circumstances?
> 
> I don't think so, and here's my reasoning:
> 
> AFAIU, the HW block always requires a TX clock delay
> (I don't know what the "safe" interval is. PHY adds
> 2.4 ns, MAC adds ~1 ns, both work.)

The nominal delay should be 2ns because that's exactly what a 90 degrees
shift at a 125Mhz would be. The RGMII specification defines the following:

TskewT - Data to Clock output Skew (At Transmitter) Min: -500ns, Nom: 0,
Max: + 500 ns
TskewR - Data to Clock input Skew (At Receiver) Min: 1ns, Nom: 0, Max:
2.6ns (see note 1)

note 1: This implies that PC board design will require clocks to be
routed such that an additional trace delay of greater than 1.5ns and
less than 2.0ns will be added to the associated clock signal. For 10/100
the Max value is unspecified.

So it seems to me like you are borderline spec in both delays you gave
here and the "HW block always requires a TX clock delay" statement is
true for a given board design only.


> RX clock delay seems to be "Don't Care" (tested both
> enabled and disabled by PHY)
> By "tested", I mean ability to ping remote system.

Can you do something a bit more stressful than just a ping, also if you
have the ability to change the inter-packet gap, do it, and see if you
start seeing FCS or any other decoding errors.

> 
> If phy-mode is RGMII or RGMII_RXID, then don't add
> TX clock delay from PHY, therefore add it from MAC.
> 
> If phy_mode is RGMII_ID or RGMII_TXID, then do add
> TX clock delay from PHY, therefore don't add it from MAC.
> 
> What set of circumstances would create an issue?

Existing Device Tree sources that do not correspond to that description
you just did, I suppose they are all out of tree?
-- 
Florian


[PATCH] ath10k_htt_rx_amsdu_allowed(): use ath10k_dbg()

2017-07-19 Thread Gabriel Craciunescu
From: Gabriel Craciunescu 

Each time we get disconencted from AP we get flooded with messages like:

...
ath10k_pci :03:00.0: no channel configured; ignoring frame(s)!

ath10k_warn: 155 callbacks suppressed
...

Use ath10k_dbg() here too.

Signed-off-by: Gabriel Craciunescu 
---
 drivers/net/wireless/ath/ath10k/htt_rx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c 
b/drivers/net/wireless/ath/ath10k/htt_rx.c
index 398dda978d6e..75d9b59b7e63 100644
--- a/drivers/net/wireless/ath/ath10k/htt_rx.c
+++ b/drivers/net/wireless/ath/ath10k/htt_rx.c
@@ -1514,7 +1514,7 @@ static bool ath10k_htt_rx_amsdu_allowed(struct ath10k *ar,
 */
 
if (!rx_status->freq) {
-   ath10k_warn(ar, "no channel configured; ignoring frame(s)!\n");
+   ath10k_dbg(ar, ATH10K_DBG_HTT, "no channel configured; ignoring 
frame(s)!\n");
return false;
}
 
-- 
2.13.3



Re: out-of-bounds access with virtio-net

2017-07-19 Thread Cong Wang
On Wed, Jul 19, 2017 at 9:59 AM, David Ahern  wrote:
> Changing the mac address on a virtio-net based nic is triggering an
> out-of-bounds access. Nothing fancy with the command:
>
> ip li set dev eth2 addr 01:02:34:56:78:9a
>
> virtnet_set_mac_address is calling kmemdup for sizeof sockaddr, yet only
> ETH_ALEN + sizeof(sa_family_t) bytes were malloc'ed.

Previously we have a dev->addr_len longer than sizeof sockaddr,
now we a shorter one... This mac addr thing is really a mess currently.


diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index d1ba90980be1..d13bebdf6465 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2031,7 +2031,7 @@ static int do_setlink(const struct sk_buff *skb,
struct sockaddr *sa;
int len;

-   len = sizeof(sa_family_t) + dev->addr_len;
+   len = sizeof(sa_family_t) + max(dev->addr_len, sizeof(*sa));
sa = kmalloc(len, GFP_KERNEL);
if (!sa) {
err = -ENOMEM;


Re: [PATCH] ath10k_htt_rx_amsdu_allowed(): use ath10k_dbg()

2017-07-19 Thread Gabriel C

On 19.07.2017 18:26, Joe Perches wrote:

On Wed, 2017-07-19 at 17:37 +0200, Gabriel C wrote:

  Each time we get disconencted from AP we get flooded with messages 
like:

  ...
  ath10k_pci :03:00.0: no channel configured; ignoring frame(s)!
  
  ath10k_warn: 155 callbacks suppressed


[]


diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c 
b/drivers/net/wireless/ath/ath10k/htt_rx.c

[]

@@ -1514,7 +1514,7 @@ static bool ath10k_htt_rx_amsdu_allowed(struct ath10k *ar,
   */

  if (!rx_status->freq) {
-   ath10k_warn(ar, "no channel configured; ignoring frame(s)!\n");
+   ath10k_dbg(ar, ATH10K_DBG_HTT, "no channel configured; ignoring 
frame(s)!\n");
  return false;
  }


Hi.  This doesn't apply because of tab to space conversions.

Please use git send-email to send your patch.


Yes you are right .. I think newer thunderbird just doesn't work anymore to 
send patches.

Will have a look to setup git send-email and resend.



Maybe read Documentation/process/email-clients.rst



I use tbird to send patches forver but it seems latest updates here broke it 
some sort.






Re: [PATCH 1/2] net: phy: at803x: Fix RGMII RX and TX clock delays setup

2017-07-19 Thread Timur Tabi

On 07/19/2017 10:31 AM, Marc Gonzalez wrote:

The current code supports enabling RGMII RX and TX clock delays.
The unstated assumption is that these settings are disabled by
default at reset, which is not the case.

RX clock delay is enabled at reset. And TX clock delay "survives"
across SW resets. Thus, if the bootloader enables TX clock delay,
it will remain enabled at reset in Linux.

Provide disable functions to configure the RGMII clock delays
exactly as specified in the fwspec.


I only use SGMII mode, and I tested and can confirm that this patch does 
not break SGMII, so:


Acked-by: Timur Tabi 

--
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm
Technologies, Inc.  Qualcomm Technologies, Inc. is a member of the
Code Aurora Forum, a Linux Foundation Collaborative Project.


[PATCH] net: tehuti: don't process data if it has not been copied from userspace

2017-07-19 Thread Colin King
From: Colin Ian King 

The array data is only populated with valid information from userspace
if cmd != SIOCDEVPRIVATE, other cases the array contains garbage on
the stack. The subsequent switch statement acts on a subcommand in
data[0] which could be any garbage value if cmd is SIOCDEVPRIVATE which
seems incorrect to me.  Instead, just return EOPNOTSUPP for the case
where cmd == SIOCDEVPRIVATE to avoid this issue.

As a side note, I suspect that the original intention of the code
was for this ioctl to work just for cmd == SIOCDEVPRIVATE (and the
current logic is reversed). However, I don't wont to change the current
semantics in case any userspace code relies on this existing behaviour.

Detected by CoverityScan, CID#139647 ("Uninitialized scalar variable")

Signed-off-by: Colin Ian King 
---
 drivers/net/ethernet/tehuti/tehuti.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/tehuti/tehuti.c 
b/drivers/net/ethernet/tehuti/tehuti.c
index 711fbbbc4b1f..163d8d16bc24 100644
--- a/drivers/net/ethernet/tehuti/tehuti.c
+++ b/drivers/net/ethernet/tehuti/tehuti.c
@@ -654,6 +654,8 @@ static int bdx_ioctl_priv(struct net_device *ndev, struct 
ifreq *ifr, int cmd)
RET(-EFAULT);
}
DBG("%d 0x%x 0x%x\n", data[0], data[1], data[2]);
+   } else {
+   return -EOPNOTSUPP;
}
 
if (!capable(CAP_SYS_RAWIO))
-- 
2.11.0



Re: [PATCH 2/2] net: ethernet: nb8800: Fix RGMII TX clock delay setup

2017-07-19 Thread Mason
On 19/07/2017 19:17, Måns Rullgård wrote:

> Marc Gonzalez writes:
> 
>> According to commit e5f3a4a56ce2a707b2fb8ce37e4414dcac89c672
>> ("Documentation: devicetree: clarify usage of the RGMII phy-modes")
>> there are 4 RGMII phy-modes to handle:
>>
>> "rgmii" (RX and TX delays are added by the MAC when required)
>> "rgmii-id" (RGMII with internal RX and TX delays provided by the PHY,
>>  the MAC should not add the RX or TX delays in this case)
>> "rgmii-rxid" (RGMII with internal RX delay provided by the PHY,
>>  the MAC should not add an RX delay in this case)
>> "rgmii-txid" (RGMII with internal TX delay provided by the PHY,
>>  the MAC should not add an TX delay in this case)
>>
>> Let the MAC handle TX clock delay for rgmii and rgmii-rxid.
>>
>> Signed-off-by: Marc Gonzalez 
>> ---
>>  drivers/net/ethernet/aurora/nb8800.c | 8 +---
>>  1 file changed, 5 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/aurora/nb8800.c 
>> b/drivers/net/ethernet/aurora/nb8800.c
>> index 041cfb7952f8..f3ed320eb4ad 100644
>> --- a/drivers/net/ethernet/aurora/nb8800.c
>> +++ b/drivers/net/ethernet/aurora/nb8800.c
>> @@ -609,7 +609,7 @@ static void nb8800_mac_config(struct net_device *dev)
>>  mac_mode |= HALF_DUPLEX;
>>
>>  if (gigabit) {
>> -if (priv->phy_mode == PHY_INTERFACE_MODE_RGMII)
>> +if (phy_interface_is_rgmii(dev->phydev))
>>  mac_mode |= RGMII_MODE;
>>
>>  mac_mode |= GMAC_MODE;
> 
> This is a separate issue, and the change is obviously correct.
> 
>> @@ -1268,11 +1268,13 @@ static int nb8800_tangox_init(struct net_device *dev)
>>  break;
>>
>>  case PHY_INTERFACE_MODE_RGMII:
>> -pad_mode = PAD_MODE_RGMII;
>> +case PHY_INTERFACE_MODE_RGMII_RXID:
>> +pad_mode = PAD_MODE_RGMII | PAD_MODE_GTX_CLK_DELAY;
>>  break;
>>
>> +case PHY_INTERFACE_MODE_RGMII_ID:
>>  case PHY_INTERFACE_MODE_RGMII_TXID:
>> -pad_mode = PAD_MODE_RGMII | PAD_MODE_GTX_CLK_DELAY;
>> +pad_mode = PAD_MODE_RGMII;
>>  break;
> 
> Won't this just make it break in a different set of circumstances?

I don't think so, and here's my reasoning:

AFAIU, the HW block always requires a TX clock delay
(I don't know what the "safe" interval is. PHY adds
2.4 ns, MAC adds ~1 ns, both work.)
RX clock delay seems to be "Don't Care" (tested both
enabled and disabled by PHY)
By "tested", I mean ability to ping remote system.

If phy-mode is RGMII or RGMII_RXID, then don't add
TX clock delay from PHY, therefore add it from MAC.

If phy_mode is RGMII_ID or RGMII_TXID, then do add
TX clock delay from PHY, therefore don't add it from MAC.

What set of circumstances would create an issue?

Regards.


[PATCH net] Revert "rtnetlink: Do not generate notifications for CHANGEADDR event"

2017-07-19 Thread David Ahern
This reverts commit cd8966e75ed3c6b41a37047a904617bc44fa481f.

The duplicate CHANGEADDR event message is sent regardless of link
status whereas the setlink changes only generate a notification when
the link is up. Not sending a notification when the link is down breaks
dhcpcd which only processes hwaddr changes when the link is down.

Fixes reported regression:
https://bugzilla.kernel.org/show_bug.cgi?id=196355

Reported-by: Yaroslav Isakov 
Signed-off-by: David Ahern 
---
 net/core/rtnetlink.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index d1ba90980be1..11b25fbf3dd2 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -4241,6 +4241,7 @@ static int rtnetlink_event(struct notifier_block *this, 
unsigned long event, voi
 
switch (event) {
case NETDEV_REBOOT:
+   case NETDEV_CHANGEADDR:
case NETDEV_CHANGENAME:
case NETDEV_FEAT_CHANGE:
case NETDEV_BONDING_FAILOVER:
-- 
2.1.4



Re: [PATCH 2/2] net: ethernet: nb8800: Fix RGMII TX clock delay setup

2017-07-19 Thread Måns Rullgård
Marc Gonzalez  writes:

> According to commit e5f3a4a56ce2a707b2fb8ce37e4414dcac89c672
> ("Documentation: devicetree: clarify usage of the RGMII phy-modes")
> there are 4 RGMII phy-modes to handle:
>
> "rgmii" (RX and TX delays are added by the MAC when required)
> "rgmii-id" (RGMII with internal RX and TX delays provided by the PHY,
>   the MAC should not add the RX or TX delays in this case)
> "rgmii-rxid" (RGMII with internal RX delay provided by the PHY,
>   the MAC should not add an RX delay in this case)
> "rgmii-txid" (RGMII with internal TX delay provided by the PHY,
>   the MAC should not add an TX delay in this case)
>
> Let the MAC handle TX clock delay for rgmii and rgmii-rxid.
>
> Signed-off-by: Marc Gonzalez 
> ---
>  drivers/net/ethernet/aurora/nb8800.c | 8 +---
>  1 file changed, 5 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/net/ethernet/aurora/nb8800.c 
> b/drivers/net/ethernet/aurora/nb8800.c
> index 041cfb7952f8..f3ed320eb4ad 100644
> --- a/drivers/net/ethernet/aurora/nb8800.c
> +++ b/drivers/net/ethernet/aurora/nb8800.c
> @@ -609,7 +609,7 @@ static void nb8800_mac_config(struct net_device *dev)
>   mac_mode |= HALF_DUPLEX;
>
>   if (gigabit) {
> - if (priv->phy_mode == PHY_INTERFACE_MODE_RGMII)
> + if (phy_interface_is_rgmii(dev->phydev))
>   mac_mode |= RGMII_MODE;
>
>   mac_mode |= GMAC_MODE;

This is a separate issue, and the change is obviously correct.

> @@ -1268,11 +1268,13 @@ static int nb8800_tangox_init(struct net_device *dev)
>   break;
>
>   case PHY_INTERFACE_MODE_RGMII:
> - pad_mode = PAD_MODE_RGMII;
> + case PHY_INTERFACE_MODE_RGMII_RXID:
> + pad_mode = PAD_MODE_RGMII | PAD_MODE_GTX_CLK_DELAY;
>   break;
>
> + case PHY_INTERFACE_MODE_RGMII_ID:
>   case PHY_INTERFACE_MODE_RGMII_TXID:
> - pad_mode = PAD_MODE_RGMII | PAD_MODE_GTX_CLK_DELAY;
> + pad_mode = PAD_MODE_RGMII;
>   break;

Won't this just make it break in a different set of circumstances?

I think the only sane solution to this mess is to never configure the
MAC delay based on the existing phy-connection-type property.  If some
board requires this delay (because the PHY can't do it), a new property
should probably be introduced for that.

-- 
Måns Rullgård


RE: [net-next v3 1/5] ixgbe: Ensure MAC filter was added before setting MACVLAN

2017-07-19 Thread Nguyen, Anthony L


> -Original Message-
> From: Joe Perches [mailto:j...@perches.com]
> Sent: Wednesday, July 19, 2017 3:55 AM
> Subject: Re: [net-next v3 1/5] ixgbe: Ensure MAC filter was added before 
> setting
> MACVLAN
> 
> On Tue, 2017-07-18 at 18:23 -0700, Jeff Kirsher wrote:
> > This patch adds a check to ensure that adding the MAC filter was
> > successful before setting the MACVLAN.  If it was unsuccessful,
> > propagate the error.
> []
> > diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
> > b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
> []
> > @@ -681,6 +681,7 @@ static int ixgbe_set_vf_macvlan(struct
> > ixgbe_adapter *adapter,  {
> > struct list_head *pos;
> > struct vf_macvlans *entry;
> > +   s32 retval = 0;
> 
> This function returns int, why use s32 here?
> 
> > if (index <= 1) {
> > list_for_each(pos, >vf_mvs.l) { @@ -721,14 +722,15
> @@
> > static int ixgbe_set_vf_macvlan(struct ixgbe_adapter *adapter,
> > if (!entry || !entry->free)
> > return -ENOSPC;
> >
> > -   entry->free = false;
> > -   entry->is_macvlan = true;
> > -   entry->vf = vf;
> > -   memcpy(entry->vf_macvlan, mac_addr, ETH_ALEN);
> > -
> > -   ixgbe_add_mac_filter(adapter, mac_addr, vf);
> > +   retval = ixgbe_add_mac_filter(adapter, mac_addr, vf);
> > +   if (retval >= 0) {
> > +   entry->free = false;
> > +   entry->is_macvlan = true;
> > +   entry->vf = vf;
> > +   memcpy(entry->vf_macvlan, mac_addr, ETH_ALEN);
> > +   }
> >
> > -   return 0;
> > +   return retval;
> 
> This is also backwards logic from typical style and unnecessarily indents 
> code.
> 
>   retval = ixgbe_add_mac_filter(adapter, mac_addr, vf);
>   if (retval < 0)
>   return retval;
> 
>   entry->free = false;
>   entry->is_macvlan = true;
>   entry->vf = vf;
>   memcpy(entry->vf_macvlan, mac_addr, ETH_ALEN);>
> 
>   return 0;
> }
> 
> This patch also sets the return value to a possible positive value.
> 
> Is that really desired?
> 
> The only code that seems to use a possible positive value also limits its 
> return to
> 0
> 
> static int ixgbe_uc_sync(struct net_device *netdev, const unsigned char 
> *addr) {
>   struct ixgbe_adapter *adapter = netdev_priv(netdev);
>   int ret;
> 
>   ret = ixgbe_add_mac_filter(adapter, addr, VMDQ_P(0));
> 
>   return min_t(int, ret, 0);
> }
> 

Hi Joe,

Thanks for the review.  I'll make those changes and get a v2 resubmitted.

Thanks,
Tony



out-of-bounds access with virtio-net

2017-07-19 Thread David Ahern
Changing the mac address on a virtio-net based nic is triggering an
out-of-bounds access. Nothing fancy with the command:

ip li set dev eth2 addr 01:02:34:56:78:9a

virtnet_set_mac_address is calling kmemdup for sizeof sockaddr, yet only
ETH_ALEN + sizeof(sa_family_t) bytes were malloc'ed.


Full KASAN dump:

[  236.863289]
==
[  236.864790] BUG: KASAN: slab-out-of-bounds in kmemdup+0x35/0x55
[  236.865928] Read of size 16 at addr 880035df3540 by task ip/770

[  236.867441] CPU: 0 PID: 770 Comm: ip Not tainted 4.12.0+ #308
[  236.868549] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
BIOS 1.7.5-20140531_083030-gandalf 04/01/2014
[  236.870298] Call Trace:
[  236.870665]  dump_stack+0x81/0xb6
[  236.871146]  print_address_description+0x76/0x225
[  236.871818]  ? kmemdup+0x35/0x55
[  236.872286]  kasan_report+0x23e/0x269
[  236.872820]  ? _free_receive_bufs+0x127/0x127
[  236.873439]  check_memory_region+0x2d/0x13e
[  236.874060]  __asan_loadN+0xf/0x11
[  236.874551]  kmemdup+0x35/0x55
[  236.875005]  virtnet_set_mac_address+0x47/0x1a0
[  236.875653]  ? rcu_read_lock_sched_held+0x6b/0x75
[  236.876321]  ? trace_kmalloc+0xad/0x146
[  236.876870]  ? do_setlink+0x267/0x127f
[  236.877420]  ? _free_receive_bufs+0x127/0x127
[  236.878052]  dev_set_mac_address+0x8c/0xf0
[  236.878641]  do_setlink+0x300/0x127f
[  236.879160]  ? paravirt_sched_clock+0x9/0xd
[  236.879760]  ? sched_clock+0x9/0xb
[  236.880276]  ? lock_release+0x27f/0x444
[  236.880841]  ? validate_nla+0x127/0x1cf
[  236.881404]  ? nla_parse+0x124/0x145
[  236.881950]  rtnl_newlink+0x4cf/0x93f
[  236.882479]  ? __asan_storeN+0x12/0x14
[  236.883017]  ? rtnl_newlink+0x193/0x93f
[  236.883582]  ? paravirt_sched_clock+0x9/0xd
[  236.884179]  ? trace_event_raw_event_lock+0xc0/0xeb
[  236.884882]  ? hlock_class+0x67/0x85
[  236.885400]  ? __lock_acquire+0x1306/0x1534
[  236.886013]  ? paravirt_sched_clock+0x9/0xd
[  236.886705]  rtnetlink_rcv_msg+0x268/0x277
[  236.887312]  ? rtnl_newlink+0x93f/0x93f
[  236.887869]  netlink_rcv_skb+0xaf/0x123
[  236.888442]  rtnetlink_rcv+0x23/0x2a
[  236.888956]  netlink_unicast+0x1b5/0x240
[  236.889532]  netlink_sendmsg+0x42e/0x464
[  236.890124]  ? netlink_unicast+0x240/0x240
[  236.890709]  sock_sendmsg_nosec+0x47/0x57
[  236.891290]  ___sys_sendmsg+0x2c4/0x393
[  236.891860]  ? lock_release+0x27f/0x444
[  236.892430]  ? rcu_read_unlock+0x5d/0x5f
[  236.892998]  ? mntput_no_expire+0x4b/0x226
[  236.893601]  ? lock_is_held_type+0x104/0x113
[  236.894226]  ? __fget_light+0x91/0xb9
[  236.894773]  __sys_sendmsg+0x45/0x70
[  236.895291]  ? __sys_sendmsg+0x45/0x70
[  236.895858]  SyS_sendmsg+0x19/0x1b
[  236.896352]  entry_SYSCALL_64_fastpath+0x1f/0xbe
[  236.897007] RIP: 0033:0x7fdce38e30b0
[  236.897517] RSP: 002b:7ffc0d6625c8 EFLAGS: 0246 ORIG_RAX:
002e
[  236.898574] RAX: ffda RBX: 596f8dc9 RCX:
7fdce38e30b0
[  236.899560] RDX:  RSI: 7ffc0d662610 RDI:
0003
[  236.900544] RBP: 810c7b92 R08: 0001 R09:
0003
[  236.901529] R10: 7e60 R11: 0246 R12:
88002e8aff98
[  236.902527] R13: 0046 R14: 810012e0 R15:
811673a3
[  236.903520]  ? time_hardirqs_off+0x15/0x28
[  236.904099]  ? trace_hardirqs_off_thunk+0x1a/0x1c
[  236.904771]  ? trace_hardirqs_off_caller+0x123/0x131

[  236.905726] Allocated by task 770:
[  236.906224]  save_stack_trace+0x1b/0x1d
[  236.906770]  save_stack+0x46/0xce
[  236.907245]  kasan_kmalloc+0x97/0xa6
[  236.907754]  __kmalloc+0xf7/0x11f
[  236.908222]  do_setlink+0x267/0x127f
[  236.908728]  rtnl_newlink+0x4cf/0x93f
[  236.909249]  rtnetlink_rcv_msg+0x268/0x277
[  236.909819]  netlink_rcv_skb+0xaf/0x123
[  236.910359]  rtnetlink_rcv+0x23/0x2a
[  236.910863]  netlink_unicast+0x1b5/0x240
[  236.911417]  netlink_sendmsg+0x42e/0x464
[  236.911970]  sock_sendmsg_nosec+0x47/0x57
[  236.912541]  ___sys_sendmsg+0x2c4/0x393
[  236.913082]  __sys_sendmsg+0x45/0x70
[  236.913582]  SyS_sendmsg+0x19/0x1b
[  236.914092]  entry_SYSCALL_64_fastpath+0x1f/0xbe

[  236.914964] Freed by task 117:
[  236.915408]  save_stack_trace+0x1b/0x1d
[  236.915956]  save_stack+0x46/0xce
[  236.916422]  kasan_slab_free+0x87/0xa9
[  236.916952]  __cache_free+0x17/0x30
[  236.917450]  kfree+0x10b/0x18c
[  236.917904]  single_release+0x53/0x5e
[  236.918423]  __fput+0x1c1/0x2f4
[  236.918870]  fput+0xe/0x10
[  236.919309]  task_work_run+0x8d/0xbb
[  236.919814]  prepare_exit_to_usermode+0xe7/0x10b
[  236.920464]  syscall_return_slowpath+0x1a8/0x22b
[  236.921108]  entry_SYSCALL_64_fastpath+0xbc/0xbe

[  236.921994] The buggy address belongs to the object at 880035df3540
 which belongs to the cache kmalloc-32 of size 32
[  236.923659] The buggy address is located 0 bytes inside of
 32-byte region [880035df3540, 880035df3560)
[  236.925210] The buggy address belongs to the page:
[  

Re: [PATCH] net: dsa: mv88e6xxx: Enable CMODE config support for 6390X

2017-07-19 Thread Vivien Didelot
Martin Hundebøll  writes:

> Commit f39908d3b1c45 ('net: dsa: mv88e6xxx: Set the CMODE for mv88e6390
> ports 9 & 10') added support for setting the CMODE for the 6390X family,
> but only enabled it for 9290 and 6390 - and left out 6390X.
>
> Fix support for setting the CMODE on 6390X also by assigning
> mv88e6390x_port_set_cmode() to the .port_set_cmode function pointer in
> mv88e6390x_ops too.
>
> Signed-off-by: Martin Hundebøll 

Reviewed-by: Vivien Didelot 


Re: [patch net-next 14/17] mlxsw: spectrum_router: Add support for IPv6 routes addition / deletion

2017-07-19 Thread Ido Schimmel
On Wed, Jul 19, 2017 at 10:36:52AM -0600, David Ahern wrote:
> >> 2. How are routes with devices unrelated to ports owned by this driver
> >> handled?
> > 
> > They are handled just like any other route, but they don't have a valid
> > RIF (for directly connected routes) or an adjacency group (for
> > gatewayed routes), so the check in mlxsw_sp_fib_entry_should_offload()
> > will return false and they will be programmed to the device with trap
> > action, but using a trap ID (RTR_INGRESS0) with a lower traffic class
> > than IP2ME, so packets that actually need to be locally received by the
> > CPU have a better QoS.
> 
> so mlxsw keeps a copy of the complete FIB for IPv4 and IPv6, even routes
> unrelated to its ports?

If we don't reflect all the routes in the system to the ASIC, then we'll
have a broken routing table and a different behavior from what you would
get with plain NICs.


Re: [patch net-next 14/17] mlxsw: spectrum_router: Add support for IPv6 routes addition / deletion

2017-07-19 Thread David Ahern
On 7/19/17 10:30 AM, Ido Schimmel wrote:
>> rif == 0 means the dst device is not related to a port owned by this
>> driver?
> 
> Yes.
> 
>>
>>
>> A lot to process so I am sure I missed the answer to these:
>>
>> 1. How do you handle host routes for local addresses? IPv6 inserts the
>> host and anycast routes with the device set to 'lo' (or VRF device)
>> instead of the device with the address. I have a patch to change this,
>> but needs more testing
> 
> In mlxsw_sp_fib6_entry_type_set() we check for RTF_LOCAL and set the
> FIB entry type to MLXSW_SP_FIB_ENTRY_TYPE_TRAP. Packets hitting these
> routes will be trapped with IP2ME trap ID towards the CPU.

got it. thanks.

> 
>> 2. How are routes with devices unrelated to ports owned by this driver
>> handled?
> 
> They are handled just like any other route, but they don't have a valid
> RIF (for directly connected routes) or an adjacency group (for
> gatewayed routes), so the check in mlxsw_sp_fib_entry_should_offload()
> will return false and they will be programmed to the device with trap
> action, but using a trap ID (RTR_INGRESS0) with a lower traffic class
> than IP2ME, so packets that actually need to be locally received by the
> CPU have a better QoS.

so mlxsw keeps a copy of the complete FIB for IPv4 and IPv6, even routes
unrelated to its ports?


Re: [patch net-next 16/17] mlxsw: spectrum_router: Abort on source-specific routes

2017-07-19 Thread Ido Schimmel
On Wed, Jul 19, 2017 at 10:16:19AM -0600, David Ahern wrote:
> On 7/19/17 1:02 AM, Jiri Pirko wrote:
> > From: Ido Schimmel 
> > 
> > Without resorting to ACLs, the device performs route lookup solely based
> > on the destination IP address.
> > 
> > In case source-specific routing is needed, an error is returned and the
> > abort mechanism is activated, thus allowing the kernel to take over
> > forwarding decisions.
> > 
> > Instead of aborting, we can trap specific destination prefixes where
> > source-specific routes are present, but this will result in a lot more
> > code that is unlikely to ever be used.
> 
> Do you have a document summarizing these for users?

As you know, we've a Wiki we maintain for the features covered by mlxsw.
Once these patches are applied to net-next I intend to extend it with
IPv6 documentation and mention the above there.

I did a similar thing with inter-VRF routes:
https://github.com/Mellanox/mlxsw/wiki/Virtual-Routing-and-Forwarding-(VRF)#inter-vrf-routing


Re: [patch net-next 14/17] mlxsw: spectrum_router: Add support for IPv6 routes addition / deletion

2017-07-19 Thread Ido Schimmel
On Wed, Jul 19, 2017 at 10:14:54AM -0600, David Ahern wrote:
> On 7/19/17 1:02 AM, Jiri Pirko wrote:
> > @@ -2094,6 +2106,40 @@ mlxsw_sp_fib_entry_should_offload(const struct 
> > mlxsw_sp_fib_entry *fib_entry)
> > }
> >  }
> >  
> > +static void
> > +mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
> > +{
> > +   struct mlxsw_sp_fib6_entry *fib6_entry;
> > +   struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
> > +
> > +   fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
> > + common);
> > +   list_for_each_entry(mlxsw_sp_rt6, _entry->rt6_list, list) {
> > +   struct rt6_info *rt = mlxsw_sp_rt6->rt;
> > +
> > +   write_lock_bh(>rt6i_table->tb6_lock);
> > +   rt->rt6i_flags |= RTF_OFFLOAD;
> > +   write_unlock_bh(>rt6i_table->tb6_lock);
> 
> Seems wrong. A device driver should not be taking FIB table locks.

Will remove this in v2.

[...]

> > +static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
> > + struct mlxsw_sp_nexthop_group *nh_grp,
> > + struct mlxsw_sp_nexthop *nh,
> > + const struct rt6_info *rt)
> > +{
> > +   struct net_device *dev = rt->dst.dev;
> > +   struct mlxsw_sp_rif *rif;
> > +   int err;
> > +
> > +   nh->nh_grp = nh_grp;
> > +   memcpy(>gw_addr, >rt6i_gateway, sizeof(nh->gw_addr));
> > +
> > +   if (!dev)
> > +   return 0;
> > +
> > +   rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
> > +   if (!rif)
> > +   return 0;
> 
> rif == 0 means the dst device is not related to a port owned by this
> driver?

Yes.

> 
> 
> A lot to process so I am sure I missed the answer to these:
> 
> 1. How do you handle host routes for local addresses? IPv6 inserts the
> host and anycast routes with the device set to 'lo' (or VRF device)
> instead of the device with the address. I have a patch to change this,
> but needs more testing

In mlxsw_sp_fib6_entry_type_set() we check for RTF_LOCAL and set the
FIB entry type to MLXSW_SP_FIB_ENTRY_TYPE_TRAP. Packets hitting these
routes will be trapped with IP2ME trap ID towards the CPU.

> 2. How are routes with devices unrelated to ports owned by this driver
> handled?

They are handled just like any other route, but they don't have a valid
RIF (for directly connected routes) or an adjacency group (for
gatewayed routes), so the check in mlxsw_sp_fib_entry_should_offload()
will return false and they will be programmed to the device with trap
action, but using a trap ID (RTR_INGRESS0) with a lower traffic class
than IP2ME, so packets that actually need to be locally received by the
CPU have a better QoS.


Re: [patch net-next 11/17] ipv6: fib: Allow non-FIB users to take reference on route

2017-07-19 Thread David Ahern
On 7/19/17 10:17 AM, Ido Schimmel wrote:
> I did exactly that in the beginning, but it didn't sit right with me for
> the exact reason you mentioned - it can be a PITA to debug.
> 
> If we use rt6i_ref for something other than FIB references, then it
> breaks existing code that relies on rt6i_ref being 0 to indicate it's
> no longer used by the FIB. A non-zero value can now mean "not used by
> the FIB, but waiting for some module to drop the reference in its
> workqueue".
> 
> The BUG_ON() mentioned in the commit message is just one example.
> Another check was added by you in commit 8048ced9b.
> 
> So I think we both want the same thing, but I'm not sure how your
> approach is safer.

A single reference counter rt6i_ref is best.

There are 2 reads of that counter to determine if the rt is still in the
FIB. Both of those stem from side effects of using the 'lo' for the
device for host addresses. I think an explicit flag can be used for that
purpose instead of trying to deduce it from the reference counter. The
commit you referenced copied what is done in init_loopback for
consistency (both have same end goal).


Re: [PATCH] ath10k_htt_rx_amsdu_allowed(): use ath10k_dbg()

2017-07-19 Thread Joe Perches
On Wed, 2017-07-19 at 17:37 +0200, Gabriel C wrote:
>  Each time we get disconencted from AP we get flooded with messages 
> like:
> 
>  ...
>  ath10k_pci :03:00.0: no channel configured; ignoring frame(s)!
>  
>  ath10k_warn: 155 callbacks suppressed

[]

> diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c 
> b/drivers/net/wireless/ath/ath10k/htt_rx.c
[]
> @@ -1514,7 +1514,7 @@ static bool ath10k_htt_rx_amsdu_allowed(struct ath10k 
> *ar,
>   */
> 
>  if (!rx_status->freq) {
> -   ath10k_warn(ar, "no channel configured; ignoring 
> frame(s)!\n");
> +   ath10k_dbg(ar, ATH10K_DBG_HTT, "no channel configured; 
> ignoring frame(s)!\n");
>  return false;
>  }

Hi.  This doesn't apply because of tab to space conversions.

Please use git send-email to send your patch.

Maybe read Documentation/process/email-clients.rst


Re: [patch net-next 10/17] ipv6: fib: Add offload indication to routes

2017-07-19 Thread Ido Schimmel
On Wed, Jul 19, 2017 at 09:53:28AM -0600, David Ahern wrote:
> On 7/19/17 9:49 AM, Ido Schimmel wrote:
> > On Wed, Jul 19, 2017 at 09:27:30AM -0600, David Ahern wrote:
> >> On 7/19/17 1:02 AM, Jiri Pirko wrote:
> >>> Allow user space applications to see which routes are offloaded and
> >>> which aren't by setting the RTNH_F_OFFLOAD flag when dumping them.
> >>>
> >>> To be consistent with IPv4, a multipath route is marked as offloaded if
> >>> one of its nexthops is offloaded. Individual nexthops aren't marked with
> >>> the 'offload' flag.
> >>
> >> It is more user friendly to report the offload per nexthop especially
> >> given the implications. There are already flags per nexthop and those
> >> flags are pushed to userspace so not an API change at all.
> > 
> > I thought about it, but then just decided to be consistent with IPv4.
> 
> And the comment stems from just that. I was looking at IPv4 ECMP routes
> a few days ago and the existence / lack of offload flag was not intuitive.

Understood. I intend to change that.


Re: [patch net-next 11/17] ipv6: fib: Allow non-FIB users to take reference on route

2017-07-19 Thread Ido Schimmel
On Wed, Jul 19, 2017 at 09:49:37AM -0600, David Ahern wrote:
> On 7/19/17 1:02 AM, Jiri Pirko wrote:
> > From: Ido Schimmel 
> > 
> > Listeners of the FIB notification chain are expected to be able to take
> > and release a reference on notified IPv6 routes. This is needed in the
> > case of drivers capable of offloading these routes to a capable device.
> > 
> > Since notifications are sent in an atomic context, these drivers need to
> > take a reference on the route, prepare a work item to offload the route
> > and release the reference at the end of the work.
> > 
> > Currently, rt6i_ref is used to indicate in how many FIB nodes a route
> > appears. Different code paths rely on rt6i_ref being 0 to indicate the
> > route is no longer used by the FIB.
> > 
> > For example, whenever a route is deleted or replaced, fib6_purge_rt() is
> > run to make sure the route is no longer present in intermediate nodes. A
> > BUG_ON() at the end of the function is executed in case the reference
> > count isn't 1, as it's only supposed to appear in the non-intermediate
> > node from which it's going to be deleted.
> > 
> > Instead of changing the semantics of rt6i_ref, a new reference count is
> > added, so that external users could also take a reference on routes
> > without modifying rt6i_ref.
> > 
> > To make sure external users don't release routes used by the FIB, the
> > reference count is set to 1 upon creation of a route and decremented by
> > the FIB upon rt6_release().
> > 
> > The reference count is atomic, as it's not protected by any locks and
> > placed in the 40 bytes hole after the existing rt6i_ref.
> 
> I'd rather not add another reference counter. Debugging reference leaks
> is a huge PITA now; adding another counter just makes it worse.
> 
> Why can't the BUG_ON in fib6_purge_rt be removed since there are other
> reference holders now?

I did exactly that in the beginning, but it didn't sit right with me for
the exact reason you mentioned - it can be a PITA to debug.

If we use rt6i_ref for something other than FIB references, then it
breaks existing code that relies on rt6i_ref being 0 to indicate it's
no longer used by the FIB. A non-zero value can now mean "not used by
the FIB, but waiting for some module to drop the reference in its
workqueue".

The BUG_ON() mentioned in the commit message is just one example.
Another check was added by you in commit 8048ced9b.

So I think we both want the same thing, but I'm not sure how your
approach is safer.

Thanks


Re: [patch net-next 16/17] mlxsw: spectrum_router: Abort on source-specific routes

2017-07-19 Thread David Ahern
On 7/19/17 1:02 AM, Jiri Pirko wrote:
> From: Ido Schimmel 
> 
> Without resorting to ACLs, the device performs route lookup solely based
> on the destination IP address.
> 
> In case source-specific routing is needed, an error is returned and the
> abort mechanism is activated, thus allowing the kernel to take over
> forwarding decisions.
> 
> Instead of aborting, we can trap specific destination prefixes where
> source-specific routes are present, but this will result in a lot more
> code that is unlikely to ever be used.


Do you have a document summarizing these for users?


Re: [patch net-next 14/17] mlxsw: spectrum_router: Add support for IPv6 routes addition / deletion

2017-07-19 Thread David Ahern
On 7/19/17 1:02 AM, Jiri Pirko wrote:
> @@ -2094,6 +2106,40 @@ mlxsw_sp_fib_entry_should_offload(const struct 
> mlxsw_sp_fib_entry *fib_entry)
>   }
>  }
>  
> +static void
> +mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
> +{
> + struct mlxsw_sp_fib6_entry *fib6_entry;
> + struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
> +
> + fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
> +   common);
> + list_for_each_entry(mlxsw_sp_rt6, _entry->rt6_list, list) {
> + struct rt6_info *rt = mlxsw_sp_rt6->rt;
> +
> + write_lock_bh(>rt6i_table->tb6_lock);
> + rt->rt6i_flags |= RTF_OFFLOAD;
> + write_unlock_bh(>rt6i_table->tb6_lock);

Seems wrong. A device driver should not be taking FIB table locks.


> + }
> +}
> +
> +static void
> +mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
> +{
> + struct mlxsw_sp_fib6_entry *fib6_entry;
> + struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
> +
> + fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
> +   common);
> + list_for_each_entry(mlxsw_sp_rt6, _entry->rt6_list, list) {
> + struct rt6_info *rt = mlxsw_sp_rt6->rt;
> +
> + write_lock_bh(>rt6i_table->tb6_lock);
> + rt->rt6i_flags &= ~RTF_OFFLOAD;
> + write_unlock_bh(>rt6i_table->tb6_lock);

same here.




> +static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
> +   struct mlxsw_sp_nexthop_group *nh_grp,
> +   struct mlxsw_sp_nexthop *nh,
> +   const struct rt6_info *rt)
> +{
> + struct net_device *dev = rt->dst.dev;
> + struct mlxsw_sp_rif *rif;
> + int err;
> +
> + nh->nh_grp = nh_grp;
> + memcpy(>gw_addr, >rt6i_gateway, sizeof(nh->gw_addr));
> +
> + if (!dev)
> + return 0;
> +
> + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
> + if (!rif)
> + return 0;

rif == 0 means the dst device is not related to a port owned by this
driver?


A lot to process so I am sure I missed the answer to these:

1. How do you handle host routes for local addresses? IPv6 inserts the
host and anycast routes with the device set to 'lo' (or VRF device)
instead of the device with the address. I have a patch to change this,
but needs more testing

2. How are routes with devices unrelated to ports owned by this driver
handled?


[PATCH net-next v2 00/13] Change DSA's FDB API and perform switchdev cleanup

2017-07-19 Thread Arkadi Sharshevsky
The patchset moves the DSA driver into learning static FDB entries via
the switchdev notification chain rather then by using bridge bypass SELF
flag. 

The DSA drivers cannot sync the software bridge with hardware learned
entries and use the switchdev's implementation of bypass FDB dumping.
Because they are the only ones using this functionality, the fdb_dump
implementation is moved from switchdev code into DSA.

Finally after this changes a major cleanup in switchdev can be done.
---
Please see individual patches for patch specific change logs.
v1->v2
- Split MDB/vlan dump removal into core/driver removal.

Arkadi Sharshevsky (13):
  net: dsa: Change DSA slave FDB API to be switchdev independent
  net: dsa: Remove prepare phase for FDB
  net: dsa: Remove switchdev dependency from DSA switch notifier chain
  net: dsa: Add support for learning FDB through notification
  net: dsa: Remove support for FDB add/del via SELF
  net: dsa: Add support for querying supported bridge flags
  net: dsa: Remove support for vlan dump from DSA's drivers
  net: dsa: Remove support for bypass bridge port attributes/vlan set
  net: dsa: Remove support for MDB dump from DSA's drivers
  net: dsa: Remove redundant MDB dump support
  net: dsa: Move FDB dump implementation inside DSA
  net: bridge: Remove FDB deletion through switchdev object
  net: switchdev: Remove bridge bypass support from switchdev

 drivers/net/dsa/b53/b53_common.c   |  83 +-
 drivers/net/dsa/b53/b53_priv.h |  16 +-
 drivers/net/dsa/bcm_sf2.c  |   2 -
 drivers/net/dsa/dsa_loop.c |  38 ---
 drivers/net/dsa/microchip/ksz_common.c | 124 ++--
 drivers/net/dsa/mt7530.c   |  41 +--
 drivers/net/dsa/mv88e6xxx/chip.c   | 147 ++
 drivers/net/dsa/qca8k.c|  42 +--
 include/net/dsa.h  |  23 +-
 include/net/switchdev.h|  87 --
 net/bridge/br_fdb.c|  18 --
 net/dsa/dsa.c  |  13 +
 net/dsa/dsa_priv.h |  22 +-
 net/dsa/port.c |  51 +---
 net/dsa/slave.c| 247 +---
 net/dsa/switch.c   |  21 +-
 net/switchdev/switchdev.c  | 519 -
 17 files changed, 329 insertions(+), 1165 deletions(-)

-- 
2.4.11



[PATCH net-next v2 08/13] net: dsa: Remove support for bypass bridge port attributes/vlan set

2017-07-19 Thread Arkadi Sharshevsky
The bridge port attributes/vlan for DSA devices should be set only
from bridge code. Furthermore, The vlans are synced totally with the
bridge so there is no need for special dump support.

Signed-off-by: Arkadi Sharshevsky 
---
 include/net/dsa.h  |  4 
 net/dsa/dsa_priv.h |  4 
 net/dsa/port.c | 12 
 net/dsa/slave.c|  6 --
 4 files changed, 26 deletions(-)

diff --git a/include/net/dsa.h b/include/net/dsa.h
index f054d41..4b82647 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -384,10 +384,6 @@ struct dsa_switch_ops {
 struct switchdev_trans *trans);
int (*port_vlan_del)(struct dsa_switch *ds, int port,
 const struct switchdev_obj_port_vlan *vlan);
-   int (*port_vlan_dump)(struct dsa_switch *ds, int port,
- struct switchdev_obj_port_vlan *vlan,
- switchdev_obj_dump_cb_t *cb);
-
/*
 * Forwarding database
 */
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 3ad666a..cddcea2 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -139,10 +139,6 @@ int dsa_port_vlan_add(struct dsa_port *dp,
  struct switchdev_trans *trans);
 int dsa_port_vlan_del(struct dsa_port *dp,
  const struct switchdev_obj_port_vlan *vlan);
-int dsa_port_vlan_dump(struct dsa_port *dp,
-  struct switchdev_obj_port_vlan *vlan,
-  switchdev_obj_dump_cb_t *cb);
-
 /* slave.c */
 extern const struct dsa_device_ops notag_netdev_ops;
 void dsa_slave_mii_bus_init(struct dsa_switch *ds);
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 86e0585..ce19216 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -246,15 +246,3 @@ int dsa_port_vlan_del(struct dsa_port *dp,
 
return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_DEL, );
 }
-
-int dsa_port_vlan_dump(struct dsa_port *dp,
-  struct switchdev_obj_port_vlan *vlan,
-  switchdev_obj_dump_cb_t *cb)
-{
-   struct dsa_switch *ds = dp->ds;
-
-   if (ds->ops->port_vlan_dump)
-   return ds->ops->port_vlan_dump(ds, dp->index, vlan, cb);
-
-   return -EOPNOTSUPP;
-}
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 3ad1f4d..f939d79 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -302,9 +302,6 @@ static int dsa_slave_port_obj_dump(struct net_device *dev,
case SWITCHDEV_OBJ_ID_PORT_MDB:
err = dsa_port_mdb_dump(dp, SWITCHDEV_OBJ_PORT_MDB(obj), cb);
break;
-   case SWITCHDEV_OBJ_ID_PORT_VLAN:
-   err = dsa_port_vlan_dump(dp, SWITCHDEV_OBJ_PORT_VLAN(obj), cb);
-   break;
default:
err = -EOPNOTSUPP;
break;
@@ -926,9 +923,6 @@ static const struct net_device_ops dsa_slave_netdev_ops = {
.ndo_netpoll_cleanup= dsa_slave_netpoll_cleanup,
.ndo_poll_controller= dsa_slave_poll_controller,
 #endif
-   .ndo_bridge_getlink = switchdev_port_bridge_getlink,
-   .ndo_bridge_setlink = switchdev_port_bridge_setlink,
-   .ndo_bridge_dellink = switchdev_port_bridge_dellink,
.ndo_get_phys_port_name = dsa_slave_get_phys_port_name,
.ndo_setup_tc   = dsa_slave_setup_tc,
 };
-- 
2.4.11



[PATCH net-next v2 02/13] net: dsa: Remove prepare phase for FDB

2017-07-19 Thread Arkadi Sharshevsky
The prepare phase for FDB add is unneeded because most of DSA devices
can have failures during bus transactions (SPI, I2C, etc.), thus, the
prepare phase cannot guarantee success of the commit stage.

The support for learning FDB through notification chain, which will be
introduced in the following patches, will provide the ability to notify
back the bridge about successful offload.

Signed-off-by: Arkadi Sharshevsky 
Reviewed-by: Vivien Didelot 
Reviewed-by: Florian Fainelli 
---
 drivers/net/dsa/b53/b53_common.c   | 17 +++--
 drivers/net/dsa/b53/b53_priv.h |  6 ++
 drivers/net/dsa/bcm_sf2.c  |  1 -
 drivers/net/dsa/microchip/ksz_common.c | 24 ++--
 drivers/net/dsa/mt7530.c   | 25 -
 drivers/net/dsa/mv88e6xxx/chip.c   | 23 +++
 drivers/net/dsa/qca8k.c| 18 +-
 include/net/dsa.h  |  4 +---
 net/dsa/dsa_priv.h |  4 +---
 net/dsa/port.c |  4 +---
 net/dsa/slave.c|  4 +++-
 net/dsa/switch.c   | 14 +++---
 12 files changed, 36 insertions(+), 108 deletions(-)

diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index d0156dc..c414b43 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -1213,8 +1213,8 @@ static int b53_arl_op(struct b53_device *dev, int op, int 
port,
return b53_arl_rw_op(dev, 0);
 }
 
-int b53_fdb_prepare(struct dsa_switch *ds, int port,
-   const unsigned char *addr, u16 vid)
+int b53_fdb_add(struct dsa_switch *ds, int port,
+   const unsigned char *addr, u16 vid)
 {
struct b53_device *priv = ds->priv;
 
@@ -1224,17 +1224,7 @@ int b53_fdb_prepare(struct dsa_switch *ds, int port,
if (is5325(priv) || is5365(priv))
return -EOPNOTSUPP;
 
-   return 0;
-}
-EXPORT_SYMBOL(b53_fdb_prepare);
-
-void b53_fdb_add(struct dsa_switch *ds, int port,
-const unsigned char *addr, u16 vid)
-{
-   struct b53_device *priv = ds->priv;
-
-   if (b53_arl_op(priv, 0, port, addr, vid, true))
-   pr_err("%s: failed to add MAC address\n", __func__);
+   return b53_arl_op(priv, 0, port, addr, vid, true);
 }
 EXPORT_SYMBOL(b53_fdb_add);
 
@@ -1563,7 +1553,6 @@ static const struct dsa_switch_ops b53_switch_ops = {
.port_vlan_add  = b53_vlan_add,
.port_vlan_del  = b53_vlan_del,
.port_vlan_dump = b53_vlan_dump,
-   .port_fdb_prepare   = b53_fdb_prepare,
.port_fdb_dump  = b53_fdb_dump,
.port_fdb_add   = b53_fdb_add,
.port_fdb_del   = b53_fdb_del,
diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h
index d417bca..f29c892 100644
--- a/drivers/net/dsa/b53/b53_priv.h
+++ b/drivers/net/dsa/b53/b53_priv.h
@@ -396,10 +396,8 @@ int b53_vlan_del(struct dsa_switch *ds, int port,
 int b53_vlan_dump(struct dsa_switch *ds, int port,
  struct switchdev_obj_port_vlan *vlan,
  switchdev_obj_dump_cb_t *cb);
-int b53_fdb_prepare(struct dsa_switch *ds, int port,
-   const unsigned char *addr, u16 vid);
-void b53_fdb_add(struct dsa_switch *ds, int port,
-const unsigned char *addr, u16 vid);
+int b53_fdb_add(struct dsa_switch *ds, int port,
+   const unsigned char *addr, u16 vid);
 int b53_fdb_del(struct dsa_switch *ds, int port,
const unsigned char *addr, u16 vid);
 int b53_fdb_dump(struct dsa_switch *ds, int port,
diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index 648f91b..a26e99d 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -1034,7 +1034,6 @@ static const struct dsa_switch_ops bcm_sf2_ops = {
.port_vlan_add  = b53_vlan_add,
.port_vlan_del  = b53_vlan_del,
.port_vlan_dump = b53_vlan_dump,
-   .port_fdb_prepare   = b53_fdb_prepare,
.port_fdb_dump  = b53_fdb_dump,
.port_fdb_add   = b53_fdb_add,
.port_fdb_del   = b53_fdb_del,
diff --git a/drivers/net/dsa/microchip/ksz_common.c 
b/drivers/net/dsa/microchip/ksz_common.c
index db82808..b55f364 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -678,14 +678,6 @@ static int ksz_port_vlan_dump(struct dsa_switch *ds, int 
port,
return err;
 }
 
-static int ksz_port_fdb_prepare(struct dsa_switch *ds, int port,
-   const unsigned char *addr, u16 vid)
-{
-   /* nothing needed */
-
-   return 0;
-}
-
 struct alu_struct {
/* entry 1 */
u8  is_static:1;
@@ -705,12 +697,13 @@ struct alu_struct {
u8  

[PATCH net-next v2 07/13] net: dsa: Remove support for vlan dump from DSA's drivers

2017-07-19 Thread Arkadi Sharshevsky
This is done as a preparation before removing support for vlan dump from
DSA core. The vlans are synced with the bridge and thus there is no
need for special dump operation support.

Signed-off-by: Arkadi Sharshevsky 
---
 drivers/net/dsa/b53/b53_common.c   | 44 --
 drivers/net/dsa/b53/b53_priv.h |  3 --
 drivers/net/dsa/bcm_sf2.c  |  1 -
 drivers/net/dsa/dsa_loop.c | 38 ---
 drivers/net/dsa/microchip/ksz_common.c | 41 -
 drivers/net/dsa/mv88e6xxx/chip.c   | 56 --
 6 files changed, 183 deletions(-)

diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index c414b43..6020e88 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -1053,49 +1053,6 @@ int b53_vlan_del(struct dsa_switch *ds, int port,
 }
 EXPORT_SYMBOL(b53_vlan_del);
 
-int b53_vlan_dump(struct dsa_switch *ds, int port,
- struct switchdev_obj_port_vlan *vlan,
- switchdev_obj_dump_cb_t *cb)
-{
-   struct b53_device *dev = ds->priv;
-   u16 vid, vid_start = 0, pvid;
-   struct b53_vlan *vl;
-   int err = 0;
-
-   if (is5325(dev) || is5365(dev))
-   vid_start = 1;
-
-   b53_read16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), );
-
-   /* Use our software cache for dumps, since we do not have any HW
-* operation returning only the used/valid VLANs
-*/
-   for (vid = vid_start; vid < dev->num_vlans; vid++) {
-   vl = >vlans[vid];
-
-   if (!vl->valid)
-   continue;
-
-   if (!(vl->members & BIT(port)))
-   continue;
-
-   vlan->vid_begin = vlan->vid_end = vid;
-   vlan->flags = 0;
-
-   if (vl->untag & BIT(port))
-   vlan->flags |= BRIDGE_VLAN_INFO_UNTAGGED;
-   if (pvid == vid)
-   vlan->flags |= BRIDGE_VLAN_INFO_PVID;
-
-   err = cb(>obj);
-   if (err)
-   break;
-   }
-
-   return err;
-}
-EXPORT_SYMBOL(b53_vlan_dump);
-
 /* Address Resolution Logic routines */
 static int b53_arl_op_wait(struct b53_device *dev)
 {
@@ -1552,7 +1509,6 @@ static const struct dsa_switch_ops b53_switch_ops = {
.port_vlan_prepare  = b53_vlan_prepare,
.port_vlan_add  = b53_vlan_add,
.port_vlan_del  = b53_vlan_del,
-   .port_vlan_dump = b53_vlan_dump,
.port_fdb_dump  = b53_fdb_dump,
.port_fdb_add   = b53_fdb_add,
.port_fdb_del   = b53_fdb_del,
diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h
index f29c892..af5d6c1 100644
--- a/drivers/net/dsa/b53/b53_priv.h
+++ b/drivers/net/dsa/b53/b53_priv.h
@@ -393,9 +393,6 @@ void b53_vlan_add(struct dsa_switch *ds, int port,
  struct switchdev_trans *trans);
 int b53_vlan_del(struct dsa_switch *ds, int port,
 const struct switchdev_obj_port_vlan *vlan);
-int b53_vlan_dump(struct dsa_switch *ds, int port,
- struct switchdev_obj_port_vlan *vlan,
- switchdev_obj_dump_cb_t *cb);
 int b53_fdb_add(struct dsa_switch *ds, int port,
const unsigned char *addr, u16 vid);
 int b53_fdb_del(struct dsa_switch *ds, int port,
diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index a26e99d..824a137 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -1033,7 +1033,6 @@ static const struct dsa_switch_ops bcm_sf2_ops = {
.port_vlan_prepare  = b53_vlan_prepare,
.port_vlan_add  = b53_vlan_add,
.port_vlan_del  = b53_vlan_del,
-   .port_vlan_dump = b53_vlan_dump,
.port_fdb_dump  = b53_fdb_dump,
.port_fdb_add   = b53_fdb_add,
.port_fdb_del   = b53_fdb_del,
diff --git a/drivers/net/dsa/dsa_loop.c b/drivers/net/dsa/dsa_loop.c
index fdd8f38..76d6660 100644
--- a/drivers/net/dsa/dsa_loop.c
+++ b/drivers/net/dsa/dsa_loop.c
@@ -257,43 +257,6 @@ static int dsa_loop_port_vlan_del(struct dsa_switch *ds, 
int port,
return 0;
 }
 
-static int dsa_loop_port_vlan_dump(struct dsa_switch *ds, int port,
-  struct switchdev_obj_port_vlan *vlan,
-  switchdev_obj_dump_cb_t *cb)
-{
-   struct dsa_loop_priv *ps = ds->priv;
-   struct mii_bus *bus = ps->bus;
-   struct dsa_loop_vlan *vl;
-   u16 vid, vid_start = 0;
-   int err = 0;
-
-   dev_dbg(ds->dev, "%s\n", __func__);
-
-   /* Just do a sleeping operation to make lockdep checks effective */
-   mdiobus_read(bus, ps->port_base + port, MII_BMSR);
-
-   for (vid = vid_start; vid < DSA_LOOP_VLANS; vid++) {
-   vl = >vlans[vid];
-
-   

[PATCH net-next v2 04/13] net: dsa: Add support for learning FDB through notification

2017-07-19 Thread Arkadi Sharshevsky
Add support for learning FDB through notification. The driver defers
the hardware update via ordered work queue. In case of a successful
FDB add a notification is sent back to bridge.

In case of hw FDB del failure the static FDB will be deleted from
the bridge, thus, the interface is moved to down state in order to
indicate inconsistent situation.

Signed-off-by: Arkadi Sharshevsky 
---
v1->v2
- Moved dsa_schdule_work decleration into net/dsa/dsa_priv.h.
- Fixed switchdev nb un-registration.
---
 net/dsa/dsa.c  |  13 ++
 net/dsa/dsa_priv.h |   1 +
 net/dsa/slave.c| 127 -
 3 files changed, 139 insertions(+), 2 deletions(-)

diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 416ac4e..9abe6dc 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -271,10 +271,22 @@ static struct packet_type dsa_pack_type __read_mostly = {
.func   = dsa_switch_rcv,
 };
 
+static struct workqueue_struct *dsa_owq;
+
+bool dsa_schedule_work(struct work_struct *work)
+{
+   return queue_work(dsa_owq, work);
+}
+
 static int __init dsa_init_module(void)
 {
int rc;
 
+   dsa_owq = alloc_ordered_workqueue("dsa_ordered",
+ WQ_MEM_RECLAIM);
+   if (!dsa_owq)
+   return -ENOMEM;
+
rc = dsa_slave_register_notifier();
if (rc)
return rc;
@@ -294,6 +306,7 @@ static void __exit dsa_cleanup_module(void)
dsa_slave_unregister_notifier();
dev_remove_pack(_pack_type);
dsa_legacy_unregister();
+   destroy_workqueue(dsa_owq);
 }
 module_exit(dsa_cleanup_module);
 
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 2b2f124..3ad666a 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -105,6 +105,7 @@ void dsa_cpu_dsa_destroy(struct dsa_port *dport);
 const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol);
 int dsa_cpu_port_ethtool_setup(struct dsa_port *cpu_dp);
 void dsa_cpu_port_ethtool_restore(struct dsa_port *cpu_dp);
+bool dsa_schedule_work(struct work_struct *work);
 
 /* legacy.c */
 int dsa_legacy_register(void);
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 19395cc..f595133 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1263,19 +1263,142 @@ static int dsa_slave_netdevice_event(struct 
notifier_block *nb,
return NOTIFY_DONE;
 }
 
+struct dsa_switchdev_event_work {
+   struct work_struct work;
+   struct switchdev_notifier_fdb_info fdb_info;
+   struct net_device *dev;
+   unsigned long event;
+};
+
+static void dsa_slave_switchdev_event_work(struct work_struct *work)
+{
+   struct dsa_switchdev_event_work *switchdev_work =
+   container_of(work, struct dsa_switchdev_event_work, work);
+   struct net_device *dev = switchdev_work->dev;
+   struct switchdev_notifier_fdb_info *fdb_info;
+   struct dsa_slave_priv *p = netdev_priv(dev);
+   int err;
+
+   rtnl_lock();
+   switch (switchdev_work->event) {
+   case SWITCHDEV_FDB_ADD_TO_DEVICE:
+   fdb_info = _work->fdb_info;
+   err = dsa_port_fdb_add(p->dp, fdb_info->addr, fdb_info->vid);
+   if (err) {
+   netdev_dbg(dev, "fdb add failed err=%d\n", err);
+   break;
+   }
+   call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED, dev,
+_info->info);
+   break;
+
+   case SWITCHDEV_FDB_DEL_TO_DEVICE:
+   fdb_info = _work->fdb_info;
+   err = dsa_port_fdb_del(p->dp, fdb_info->addr, fdb_info->vid);
+   if (err) {
+   netdev_dbg(dev, "fdb del failed err=%d\n", err);
+   dev_close(dev);
+   }
+   break;
+   }
+   rtnl_unlock();
+
+   kfree(switchdev_work->fdb_info.addr);
+   kfree(switchdev_work);
+   dev_put(dev);
+}
+
+static int
+dsa_slave_switchdev_fdb_work_init(struct dsa_switchdev_event_work *
+ switchdev_work,
+ const struct switchdev_notifier_fdb_info *
+ fdb_info)
+{
+   memcpy(_work->fdb_info, fdb_info,
+  sizeof(switchdev_work->fdb_info));
+   switchdev_work->fdb_info.addr = kzalloc(ETH_ALEN, GFP_ATOMIC);
+   if (!switchdev_work->fdb_info.addr)
+   return -ENOMEM;
+   ether_addr_copy((u8 *)switchdev_work->fdb_info.addr,
+   fdb_info->addr);
+   return 0;
+}
+
+/* Called under rcu_read_lock() */
+static int dsa_slave_switchdev_event(struct notifier_block *unused,
+unsigned long event, void *ptr)
+{
+   struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
+   struct dsa_switchdev_event_work *switchdev_work;
+
+   if (!dsa_slave_dev_check(dev))
+   return NOTIFY_DONE;
+
+ 

  1   2   >