date:20160205

[PATCH net-next V2 5/8] net: fec: add variable reg_desc_active to speed things up

2016-02-05 Thread Troy Kisky

There is no need for complex macros every time we need to activate
a queue. Also, no need to call skb_get_queue_mapping when we already
know which queue it is using.

Signed-off-by: Troy Kisky 
---
 drivers/net/ethernet/freescale/fec.h  |  7 +
 drivers/net/ethernet/freescale/fec_main.c | 44 +--
 2 files changed, 25 insertions(+), 26 deletions(-)

diff --git a/drivers/net/ethernet/freescale/fec.h 
b/drivers/net/ethernet/freescale/fec.h
index 53ec04f..bedd28a 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -310,12 +310,6 @@ struct bufdesc_ex {
 #define FEC_R_BUFF_SIZE(X) (((X) == 1) ? FEC_R_BUFF_SIZE_1 : \
(((X) == 2) ? \
FEC_R_BUFF_SIZE_2 : FEC_R_BUFF_SIZE_0))
-#define FEC_R_DES_ACTIVE(X)(((X) == 1) ? FEC_R_DES_ACTIVE_1 : \
-   (((X) == 2) ? \
-  FEC_R_DES_ACTIVE_2 : FEC_R_DES_ACTIVE_0))
-#define FEC_X_DES_ACTIVE(X)(((X) == 1) ? FEC_X_DES_ACTIVE_1 : \
-   (((X) == 2) ? \
-  FEC_X_DES_ACTIVE_2 : FEC_X_DES_ACTIVE_0))
 
 #define FEC_DMA_CFG(X) (((X) == 2) ? FEC_DMA_CFG_2 : FEC_DMA_CFG_1)
 
@@ -454,6 +448,7 @@ struct bufdesc_prop {
struct bufdesc  *base;
struct bufdesc  *last;
struct bufdesc  *cur;
+   void __iomem*reg_desc_active;
dma_addr_t  dma;
unsigned short ring_size;
unsigned char dsize;
diff --git a/drivers/net/ethernet/freescale/fec_main.c 
b/drivers/net/ethernet/freescale/fec_main.c
index b039288..712e3bb 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -328,7 +328,6 @@ fec_enet_txq_submit_frag_skb(struct fec_enet_priv_tx_q *txq,
struct bufdesc *bdp = txq->bd.cur;
struct bufdesc_ex *ebdp;
int nr_frags = skb_shinfo(skb)->nr_frags;
-   unsigned short queue = skb_get_queue_mapping(skb);
int frag, frag_len;
unsigned short status;
unsigned int estatus = 0;
@@ -361,7 +360,7 @@ fec_enet_txq_submit_frag_skb(struct fec_enet_priv_tx_q *txq,
 
if (fep->bufdesc_ex) {
if (fep->quirks & FEC_QUIRK_HAS_AVB)
-   estatus |= FEC_TX_BD_FTYPE(queue);
+   estatus |= FEC_TX_BD_FTYPE(txq->bd.qid);
if (skb->ip_summed == CHECKSUM_PARTIAL)
estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS;
ebdp->cbd_bdu = 0;
@@ -415,7 +414,6 @@ static int fec_enet_txq_submit_skb(struct 
fec_enet_priv_tx_q *txq,
dma_addr_t addr;
unsigned short status;
unsigned short buflen;
-   unsigned short queue;
unsigned int estatus = 0;
unsigned int index;
int entries_free;
@@ -444,7 +442,6 @@ static int fec_enet_txq_submit_skb(struct 
fec_enet_priv_tx_q *txq,
bufaddr = skb->data;
buflen = skb_headlen(skb);
 
-   queue = skb_get_queue_mapping(skb);
index = fec_enet_get_bd_index(bdp, >bd);
if (((unsigned long) bufaddr) & fep->tx_align ||
fep->quirks & FEC_QUIRK_SWAP_FRAME) {
@@ -487,7 +484,7 @@ static int fec_enet_txq_submit_skb(struct 
fec_enet_priv_tx_q *txq,
skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
 
if (fep->quirks & FEC_QUIRK_HAS_AVB)
-   estatus |= FEC_TX_BD_FTYPE(queue);
+   estatus |= FEC_TX_BD_FTYPE(txq->bd.qid);
 
if (skb->ip_summed == CHECKSUM_PARTIAL)
estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS;
@@ -521,7 +518,7 @@ static int fec_enet_txq_submit_skb(struct 
fec_enet_priv_tx_q *txq,
txq->bd.cur = bdp;
 
/* Trigger transmission start */
-   writel(0, fep->hwp + FEC_X_DES_ACTIVE(queue));
+   writel(0, txq->bd.reg_desc_active);
 
return 0;
 }
@@ -534,7 +531,6 @@ fec_enet_txq_put_data_tso(struct fec_enet_priv_tx_q *txq, 
struct sk_buff *skb,
 {
struct fec_enet_private *fep = netdev_priv(ndev);
struct bufdesc_ex *ebdp = container_of(bdp, struct bufdesc_ex, desc);
-   unsigned short queue = skb_get_queue_mapping(skb);
unsigned short status;
unsigned int estatus = 0;
dma_addr_t addr;
@@ -566,7 +562,7 @@ fec_enet_txq_put_data_tso(struct fec_enet_priv_tx_q *txq, 
struct sk_buff *skb,
 
if (fep->bufdesc_ex) {
if (fep->quirks & FEC_QUIRK_HAS_AVB)
-   estatus |= FEC_TX_BD_FTYPE(queue);
+   estatus |= FEC_TX_BD_FTYPE(txq->bd.qid);
if (skb->ip_summed == CHECKSUM_PARTIAL)
estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS;
ebdp->cbd_bdu = 0;
@@ -595,7 +591,6 @@ fec_enet_txq_put_hdr_tso(struct

[PATCH net-next V2 3/8] net: fec: fix fec_enet_get_free_txdesc_num

2016-02-05 Thread Troy Kisky

When first initialized, cur_tx points to the 1st
entry in the queue, and dirty_tx points to the last.
At this point, fec_enet_get_free_txdesc_num will
return tx_ring_size -2. If tx_ring_size -2 entries
are now queued, then fec_enet_get_free_txdesc_num
should return 0, but it returns tx_ring_size instead.

Signed-off-by: Troy Kisky 
---
 drivers/net/ethernet/freescale/fec_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/freescale/fec_main.c 
b/drivers/net/ethernet/freescale/fec_main.c
index 162fa59..adbddfd 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -296,7 +296,7 @@ static int fec_enet_get_free_txdesc_num(struct 
fec_enet_private *fep,
entries = ((const char *)txq->dirty_tx -
(const char *)txq->cur_tx) / fep->bufdesc_size - 1;
 
-   return entries > 0 ? entries : entries + txq->tx_ring_size;
+   return entries >= 0 ? entries : entries + txq->tx_ring_size;
 }
 
 static void swap_buffer(void *bufaddr, int len)
-- 
2.5.0

[PATCH] net: phy: dp83848: Add support for TI TLK10x Ethernet PHYs

2016-02-05 Thread Andrew F. Davis

The TI TLK10x Ethernet PHYs are similar in the interrupt relevant
registers and so are compatible with the DP83848x devices already
supported. Add these and re-order code to support additional PHYs.

Signed-off-by: Andrew F. Davis 
---
 drivers/net/phy/dp83848.c | 89 ---
 1 file changed, 53 insertions(+), 36 deletions(-)

diff --git a/drivers/net/phy/dp83848.c b/drivers/net/phy/dp83848.c
index 5e14e62..bc88259 100644
--- a/drivers/net/phy/dp83848.c
+++ b/drivers/net/phy/dp83848.c
@@ -1,7 +1,8 @@
 /*
  * Driver for the Texas Instruments DP83848 PHY
  *
- * Copyright (C) 2015 Texas Instruments Inc.
+ * Copyright (C) 2015-2016 Texas Instruments Incorporated - http://www.ti.com/
+ * Andrew F. Davis 
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -16,11 +17,13 @@
 #include 
 #include 
 
-#define DP83848_PHY_ID 0x20005c90
+#define DP83848C_PHY_ID0x20005c90
+#define DP83848I_PHY_ID0x20005ca0
+#define TLK10X_PHY_ID  0x2000a210
 
 /* Registers */
-#define DP83848_MICR   0x11
-#define DP83848_MISR   0x12
+#define DP83848_MICR   0x11 /* MII Interrupt Control Register 
*/
+#define DP83848_MISR   0x12 /* MII Interrupt Status Register */
 
 /* MICR Register Fields */
 #define DP83848_MICR_INT_OEBIT(0) /* Interrupt Output Enable */
@@ -36,6 +39,12 @@
 #define DP83848_MISR_ED_INT_EN BIT(6) /* Energy detect */
 #define DP83848_MISR_LQM_INT_ENBIT(7) /* Link Quality Monitor 
*/
 
+#define DP83848_INT_EN_MASK\
+   (DP83848_MISR_ANC_INT_EN |  \
+DP83848_MISR_DUP_INT_EN |  \
+DP83848_MISR_SPD_INT_EN |  \
+DP83848_MISR_LINK_INT_EN)
+
 static int dp83848_ack_interrupt(struct phy_device *phydev)
 {
int err = phy_read(phydev, DP83848_MISR);
@@ -45,50 +54,58 @@ static int dp83848_ack_interrupt(struct phy_device *phydev)
 
 static int dp83848_config_intr(struct phy_device *phydev)
 {
-   int err;
+   int control, ret;
+
+   control = phy_read(phydev, DP83848_MICR);
+   if (control < 0)
+   return control;
 
if (phydev->interrupts == PHY_INTERRUPT_ENABLED) {
-   err = phy_write(phydev, DP83848_MICR,
-   DP83848_MICR_INT_OE |
-   DP83848_MICR_INTEN);
-   if (err < 0)
-   return err;
-
-   return phy_write(phydev, DP83848_MISR,
-DP83848_MISR_ANC_INT_EN |
-DP83848_MISR_DUP_INT_EN |
-DP83848_MISR_SPD_INT_EN |
-DP83848_MISR_LINK_INT_EN);
+   control |= DP83848_MICR_INT_OE;
+   control |= DP83848_MICR_INTEN;
+
+   ret = phy_write(phydev, DP83848_MISR, DP83848_INT_EN_MASK);
+   if (ret < 0)
+   return ret;
+   } else {
+   control &= ~DP83848_MICR_INTEN;
}
 
-   return phy_write(phydev, DP83848_MICR, 0x0);
+   return phy_write(phydev, DP83848_MICR, control);
 }
 
 static struct mdio_device_id __maybe_unused dp83848_tbl[] = {
-   { DP83848_PHY_ID, 0xfff0 },
+   { DP83848C_PHY_ID, 0xfff0 },
+   { DP83848I_PHY_ID, 0xfff0 },
+   { TLK10X_PHY_ID, 0xfff0 },
{ }
 };
 MODULE_DEVICE_TABLE(mdio, dp83848_tbl);
 
+#define DP83848_PHY_DRIVER(_id, _name) \
+   {   \
+   .phy_id = _id,  \
+   .phy_id_mask= 0xfff0,   \
+   .name   = _name,\
+   .features   = PHY_BASIC_FEATURES,   \
+   .flags  = PHY_HAS_INTERRUPT,\
+   \
+   .soft_reset = genphy_soft_reset,\
+   .config_init= genphy_config_init,   \
+   .suspend= genphy_suspend,   \
+   .resume = genphy_resume,\
+   .config_aneg= genphy_config_aneg,   \
+   .read_status= genphy_read_status,   \
+   \
+   /* IRQ related */   \
+   .ack_interrupt  = dp83848_ack_interrupt,\
+   .config_intr= dp83848_config_intr,  \
+   }
+
 static struct phy_driver dp83848_driver[] = {
-   {
-   .phy_id = DP83848_PHY_ID,
-   .phy_id_mask=

Re: [net-next] igb: assume MSI-X interrupts during initialization

2016-02-05 Thread Stefan Assmann


On 02/05/2016 10:24 PM, Laine Stump wrote:

Stefan,

I have an AMD 990FX system with an Intel 82576 card that could not
successfully boot with any kernel starting somewhere prior to 4.2, but
does boot properly in 4.4+. After a lot of time bisecting, I found that
this patch, when applied to kernel 4.3.0, solves the problem (applying
to 4.2.0 has no effect, so there's some other patch/patches in the
interim that were also part of the fix).

Since I don't know the details of proposing this patch for 4.3 stable,
would it be possible for you to do that?

Thanks!


Hi Laine,

I took a quick look at 4.3 and the patch you mention should be
sufficient. For 4.2 I'll have to take a closer look. I'm currently
traveling but going to get back to you early next week.

I'd like double check things before taking any action.

Thanks!

  Stefan


The full saga of my problem and investigaton is here:

https://www.mail-archive.com/iommu@lists.linux-foundation.org/msg10687.html


On 09/17/2015 08:46 AM, Stefan Assmann wrote:

In igb_sw_init() the sequence of calls was changed from
igb_init_queue_configuration()
igb_init_interrupt_scheme()
igb_probe_vfs()
to
igb_probe_vfs()
igb_init_queue_configuration()
igb_init_interrupt_scheme()

This results in adapter->flags not having the IGB_FLAG_HAS_MSIX bit set
during igb_probe_vfs()->igb_enable_sriov(). Therefore SR-IOV does not
get enabled properly and we run into a NULL pointer if the max_vfs
module parameter is specified (adapter->vf_data does not get allocated,
crash on accessing the structure).

[7.419348] BUG: unable to handle kernel NULL pointer dereference
at 0048
[7.419367] IP: [] igb_reset+0xe6/0x5d0 [igb]
[7.419370] PGD 0
[7.419373] Oops: 0002 [#1] SMP
[7.419381] Modules linked in: ahci(+) libahci igb(+) i40e(+) vxlan
ip6_udp_tunnel udp_tunnel megaraid_sas(+) ixgbe(+) mdio
[7.419385] CPU: 0 PID: 4 Comm: kworker/0:0 Not tainted 4.2.0+ #153
[7.419387] Hardware name: Dell Inc. PowerEdge R720/0C4Y3R, BIOS
1.6.0 03/07/2013
[...]
[7.419431] Call Trace:
[7.419442]  [] igb_probe+0x8b6/0x1340 [igb]
[7.419447]  [] local_pci_probe+0x45/0xa0

Prevent this by setting the IGB_FLAG_HAS_MSIX bit before calling
igb_probe_vfs(). The real interrupt capabilities will be checked during
igb_init_interrupt_scheme() so this is safe to do.

Signed-off-by: Stefan Assmann 
---
  drivers/net/ethernet/intel/igb/igb_main.c | 3 +++
  1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/intel/igb/igb_main.c
b/drivers/net/ethernet/intel/igb/igb_main.c
index e174fbb..ba019fc 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -2986,6 +2986,9 @@ static int igb_sw_init(struct igb_adapter *adapter)
  }
  #endif /* CONFIG_PCI_IOV */

+/* Assume MSI-X interrupts, will be checked during IRQ allocation */
+adapter->flags |= IGB_FLAG_HAS_MSIX;
+
  igb_probe_vfs(adapter);

  igb_init_queue_configuration(adapter);

[PATCH net v4] r8169: Completition on Enable bios support.

2016-02-05 Thread Corcodel Marian

Signed-off-by: Corcodel Marian 
---
 drivers/net/ethernet/realtek/r8169.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/realtek/r8169.c 
b/drivers/net/ethernet/realtek/r8169.c
index 2e83059..1d119a6 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -7918,7 +7918,9 @@ static void rtl_wol_shutdown_quirk(struct rtl8169_private 
*tp)
case RTL_GIGA_MAC_VER_11:
case RTL_GIGA_MAC_VER_12:
case RTL_GIGA_MAC_VER_17:
+#ifndef CONFIG_BIOS_SUPPORT
pci_clear_master(tp->pci_dev);
+#endif
 
RTL_W8(ChipCmd, CmdRxEnb);
/* PCI commit */
-- 
2.5.0

[PATCH net v4] r8169: Add bios support.

2016-02-05 Thread Corcodel Marian

  This patch help do not compile functions relative to setting latency,
  setting bus master wich have provided on bios.Davem says "The chip
  can be used on systems without a BIOS and that should still
  work."

Signed-off-by: Corcodel Marian 
---
 drivers/Kconfig  | 4 
 drivers/net/ethernet/realtek/r8169.c | 3 ++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/Kconfig b/drivers/Kconfig
index d2ac339..46bdd2b 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -1,5 +1,9 @@
 menu "Device Drivers"
 
+config BIOS_SUPPORT
+   bool "bios support for systems wich have one"
+   def_bool y
+
 source "drivers/amba/Kconfig"
 
 source "drivers/base/Kconfig"
diff --git a/drivers/net/ethernet/realtek/r8169.c 
b/drivers/net/ethernet/realtek/r8169.c
index 6a2b7bb..2e83059 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -8262,8 +8262,9 @@ static int rtl_init_one(struct pci_dev *pdev, const 
struct pci_device_id *ent)
rtl_hw_reset(tp);
 
rtl_ack_events(tp, 0x);
-
+#ifndef CONFIG_BIOS_SUPPORT
pci_set_master(pdev);
+#endif
 
rtl_init_mdio_ops(tp);
rtl_init_pll_power_ops(tp);
-- 
2.5.0

Re: [PATCH 3/3] bonding: make device count build-time configurable

2016-02-05 Thread Lubomir Rintel

Hi Bjørn,

On Tue, 2016-01-12 at 22:40 +0100, Bjørn Mork wrote:
> David Miller  writes:
> > From: Lubomir Rintel 
> > Date: Tue, 12 Jan 2016 18:19:49 +0100
> > 
> > > It's still an improvement to let the distributions decide if
> > > they're
> > > keeping "ip link add" broken or possibly affecting the scripts.
> > 
> > That it is "broken" is your opinion.
> > 
> > Document the behavior.  It is not broken if the user is told to be
> > mindful of what devices are created by default.
> > 
> > There is way too much downside to changing this.
> 
> Besides, distributions or admins can already change that behaviour if
> they consider it "broken", using the existing module parameter:
> 
>  # echo "options bonding max_bonds=0" >/etc/modprobe.d/bonding.conf
>  # rmmod bonding
>  # ip link add bond0 type bond
>  (no error here)
> 
> This method should be well known and understood by most users,
> contrary
> to some odd CONFIG_ build time setting.

Yes, that's an alternative solution. We may end up shipping such
configuration file, though it's not really clear what package should
ship it (probably systemd?).

I'd still prefer a kernel build-time option. It's more likely for
distributions to do the decision they prefer when running make
oldconfig. I'm assuming most distros would like to drop the legacy
behavior; at this point noone probably relies on it anyway, given
NetworkManager works around this by manually loading the module with
the maxbonds=0 manually.

Also, there's prior art to addressing this in kernel; the block
loopback.

> Bjørn

Regards,
Lubo

Re: [PATCH 1/2] ethtool: add dynamic flag to ETHTOOL_{GS}RXFH commands

2016-02-05 Thread Keller, Jacob E

On Thu, 2016-02-04 at 19:30 -0500, David Miller wrote:
> From: "Keller, Jacob E" 
> Date: Thu, 4 Feb 2016 23:09:56 +
> 
> > So you're suggesting instead, to error when the second operation
> > (change number of queues) would fail the current settings?
> 
> Yes.
> 
> This is absolutely required.

I will investigate this route. I think there needs to some way for the
{GS}RXFH to pass enough information in a way the driver can clearly see
as "reset to default", but otherwise I think this is pretty straight
forward to implement.

Thanks,
Jake

Re: [PATCH v3] net: ethernet: support "fixed-link" DT node on nb8800 driver

2016-02-05 Thread Sebastian Frias


On 02/05/2016 04:08 PM, Måns Rullgård wrote:

Sebastian Frias  writes:


On 02/05/2016 03:34 PM, Måns Rullgård wrote:

Sebastian Frias  writes:


Signed-off-by: Sebastian Frias 


Please change the subject to something like "net: ethernet: nb8800:
support fixed-link DT node" and add a comment body.


The subject is pretty explicit for such a simple patch, what else
could I add that wouldn't be unnecessary chat?


It's customary to include a description body even if it's little more
than a restatement of the subject.  Also, while the subject usually only
says _what_ the patch does, the body should additionally state _why_ it
is needed.


I understand, but _why_ it is needed is also obvious in this case; I 
mean, without the patch "fixed-link" cannot be used.
Other patches may not be as obvious/simple and thus justify and require 
more details.


Anyway, I added "Properly handles the case where the PHY is not connected
to the real MDIO bus" would that be ok?




---
   drivers/net/ethernet/aurora/nb8800.c | 14 +-
   1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/aurora/nb8800.c
b/drivers/net/ethernet/aurora/nb8800.c
index ecc4a33..e1fb071 100644
--- a/drivers/net/ethernet/aurora/nb8800.c
+++ b/drivers/net/ethernet/aurora/nb8800.c
@@ -1460,7 +1460,19 @@ static int nb8800_probe(struct platform_device *pdev)
goto err_disable_clk;
}

-   priv->phy_node = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0);
+   if (of_phy_is_fixed_link(pdev->dev.of_node)) {
+   ret = of_phy_register_fixed_link(pdev->dev.of_node);
+   if (ret < 0) {
+   dev_err(>dev, "bad fixed-link spec\n");
+   goto err_free_bus;
+   }
+   priv->phy_node = of_node_get(pdev->dev.of_node);
+   }
+
+   if (!priv->phy_node)
+   priv->phy_node = of_parse_phandle(pdev->dev.of_node,
+ "phy-handle", 0);
+
if (!priv->phy_node) {
dev_err(>dev, "no PHY specified\n");
ret = -ENODEV;
--
2.1.4

[PATCH v3] openvswitch: allow management from inside user namespaces

2016-02-05 Thread Tycho Andersen

Operations with the GENL_ADMIN_PERM flag fail permissions checks because
this flag means we call netlink_capable, which uses the init user ns.

Instead, let's introduce a new flag, GENL_UNS_ADMIN_PERM for operations
which should be allowed inside a user namespace.

The motivation for this is to be able to run openvswitch in unprivileged
containers. I've tested this and it seems to work, but I really have no
idea about the security consequences of this patch, so thoughts would be
much appreciated.

v2: use the GENL_UNS_ADMIN_PERM flag instead of a check in each function
v3: use separate ifs for UNS_ADMIN_PERM and ADMIN_PERM, instead of one
massive one

Reported-by: James Page 
Signed-off-by: Tycho Andersen 
CC: Eric Biederman 
CC: Pravin Shelar 
CC: Justin Pettit 
CC: "David S. Miller" 
---
 include/uapi/linux/genetlink.h |  1 +
 net/netlink/genetlink.c|  4 
 net/openvswitch/datapath.c | 20 ++--
 3 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/include/uapi/linux/genetlink.h b/include/uapi/linux/genetlink.h
index c3363ba..5512c90 100644
--- a/include/uapi/linux/genetlink.h
+++ b/include/uapi/linux/genetlink.h
@@ -21,6 +21,7 @@ struct genlmsghdr {
 #define GENL_CMD_CAP_DO0x02
 #define GENL_CMD_CAP_DUMP  0x04
 #define GENL_CMD_CAP_HASPOL0x08
+#define GENL_UNS_ADMIN_PERM0x10
 
 /*
  * List of reserved static generic netlink identifiers:
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index f830326..0ffd721 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -580,6 +580,10 @@ static int genl_family_rcv_msg(struct genl_family *family,
!netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
 
+   if ((ops->flags & GENL_UNS_ADMIN_PERM) &&
+   !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
+   return -EPERM;
+
if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) {
int rc;
 
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index deadfda..d6f7fe9 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -654,7 +654,7 @@ static const struct nla_policy 
packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
 
 static const struct genl_ops dp_packet_genl_ops[] = {
{ .cmd = OVS_PACKET_CMD_EXECUTE,
- .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+ .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
  .policy = packet_policy,
  .doit = ovs_packet_cmd_execute
}
@@ -1391,12 +1391,12 @@ static const struct nla_policy 
flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
 
 static const struct genl_ops dp_flow_genl_ops[] = {
{ .cmd = OVS_FLOW_CMD_NEW,
- .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+ .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
  .policy = flow_policy,
  .doit = ovs_flow_cmd_new
},
{ .cmd = OVS_FLOW_CMD_DEL,
- .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+ .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
  .policy = flow_policy,
  .doit = ovs_flow_cmd_del
},
@@ -1407,7 +1407,7 @@ static const struct genl_ops dp_flow_genl_ops[] = {
  .dumpit = ovs_flow_cmd_dump
},
{ .cmd = OVS_FLOW_CMD_SET,
- .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+ .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
  .policy = flow_policy,
  .doit = ovs_flow_cmd_set,
},
@@ -1777,12 +1777,12 @@ static const struct nla_policy 
datapath_policy[OVS_DP_ATTR_MAX + 1] = {
 
 static const struct genl_ops dp_datapath_genl_ops[] = {
{ .cmd = OVS_DP_CMD_NEW,
- .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+ .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
  .policy = datapath_policy,
  .doit = ovs_dp_cmd_new
},
{ .cmd = OVS_DP_CMD_DEL,
- .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+ .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
  .policy = datapath_policy,
  .doit = ovs_dp_cmd_del
},
@@ -1793,7 +1793,7 @@ static const struct genl_ops dp_datapath_genl_ops[] = {
  .dumpit = ovs_dp_cmd_dump
},
{ .cmd = OVS_DP_CMD_SET,
- .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+ .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
  .policy = datapath_policy,
  .doit = ovs_dp_cmd_set,
},
@@ -2158,12 +2158,12 @@ static const struct nla_policy 
vport_policy[OVS_VPORT_ATTR_MAX + 1] = {

RE: bonding reports interface up with 0 Mbps

2016-02-05 Thread Tantilov, Emil S

>-Original Message-
>From: Jay Vosburgh [mailto:jay.vosbu...@canonical.com]
>Sent: Thursday, February 04, 2016 4:37 PM
>To: Tantilov, Emil S
>Cc: netdev@vger.kernel.org; go...@cumulusnetworks.com; zhuyj;
>j...@mellanox.com
>Subject: Re: bonding reports interface up with 0 Mbps
>
>Jay Vosburgh  wrote:
>[...]
>>  Thinking about the trace again... Emil: what happens in the
>>trace before this?  Is there ever a call to the ixgbe_get_settings?
>>Does a NETDEV_UP or NETDEV_CHANGE event ever hit the bond_netdev_event
>>function?
>
>   Emil kindly sent me the trace offline, and I think I see what's
>going on.  It looks like the sequence of events is:
>
>bond_enslave ->
>   bond_update_speed_duplex (device is down, thus DUPLEX/SPEED_UNKNOWN)
>   [ do rest of enslavement, start miimon periodic work ]
>
>   [ time passes, device goes carrier up ]
>
>ixgbe_service_task: eth1: NIC Link is Up 10 Gbps ->
>   netif_carrier_on (arranges for NETDEV_CHANGE notifier out of line)
>
>   [ a few microseconds later ]
>
>bond_mii_monitor ->
>   bond_check_dev_link (now is carrier up)
>   bond_miimon_commit ->   (emits "0 Mbps full duplex" message)
>   bond_lower_state_changed ->
>   bond_netdev_event (NETDEV_CHANGELOWERSTATE, is ignored)
>   bond_3ad_handle_link_change (sees DUPLEX/SPEED_UNKNOWN)
>
>   [ a few microseconds later, in response to ixgbe's netif_carrier_on ]
>
>notifier_call_chain ->
>   bond_netdev_event NETDEV_CHANGE ->
>   bond_update_speed_duplex (sees correct SPEED_1/FULL) ->
>   bond_3ad_adapter_speed_duplex_changed (updates 802.3ad)
>
>   Basically, the race is that the periodic bond_mii_monitor is
>squeezing in between the link going up and bonding's update of the speed
>and duplex in response to the NETDEV_CHANGE triggered by the driver's
>netif_carrier_on call.  bonding ends up using the stale duplex and speed
>information obtained at enslavement time.
>
>   I think that, nowadays, the initial speed and duplex will pretty
>much always be UNKNOWN, at least for real Ethernet devices, because it
>will take longer to autoneg than the time between the dev_open and
>bond_update_speed_duplex calls in bond_enslave.
>
>   Adding a case to bond_netdev_event for CHANGELOWERSTATE works
>because it's a synchronous call from bonding.  For purposes of fixing
>this, it's more or less equivalent to calling bond_update_speed_duplex
>from bond_miimon_commit (which is part of a test patch I posted earlier
>today).
>
>   If the above analysis is correct, then I would expect this patch
>to make the problem go away:
>
>diff --git a/drivers/net/bonding/bond_main.c
>b/drivers/net/bonding/bond_main.c
>index 56b560558884..cabaeb61333d 100644
>--- a/drivers/net/bonding/bond_main.c
>+++ b/drivers/net/bonding/bond_main.c
>@@ -2127,6 +2127,7 @@ static void bond_miimon_commit(struct bonding *bond)
>   continue;
>
>   case BOND_LINK_UP:
>+  bond_update_speed_duplex(slave);
>   bond_set_slave_link_state(slave, BOND_LINK_UP,
> BOND_SLAVE_NOTIFY_NOW);
>   slave->last_link_up = jiffies;
>
>
>   Emil, can you give just the above a test?

Test has been running all night and no failures so far. Looking at the logs
the condition triggering the race occurred 5 times. I will leave the test 
over the weekend just in case and post a final update on Monday.

Thanks,
Emil

Re: [PATCH 2/3] dummy: make device count build-time configurable

2016-02-05 Thread Lubomir Rintel

Hi Stephen,

On Tue, 2016-01-12 at 10:42 -0800, Stephen Hemminger wrote:
> On Tue, 12 Jan 2016 12:57:33 +0100
> Lubomir Rintel  wrote:
> 
> > The devices can be created at run-time for quite some time already
> > and the
> > load-time device creation collides with attempts to create the
> > device of
> > the same name:
> > 
> >   # rmmod dummy
> >   # ip link add dummy0 type dummy
> >   RTNETLINK answers: File exists
> > 
> > This is pretty much the same situation as was with the block loop
> > devices
> > which was solved by adding a build-time configuration that the
> > distributions could use as they deem fit while keeping the default
> > for
> > compatibility.
> > 
> > Let's do that here as well.
> > 
> > Signed-off-by: Lubomir Rintel 
> 
> There is already a module parameter for this, so making it a compile
> time option adds nothing.

This option changes the defaults for the parameter.

When the module gets autoloaded, the user doesn't get a chance to
specify the module parameter and unwanted devices pop in.

Worse even, the automatically created devices are likely to collide
with what the user asked for.

Lubo

Re: [PATCH] net: ethernet: support "fixed-link" DT node on nb8800 driver

2016-02-05 Thread Andy Shevchenko

On Fri, Feb 5, 2016 at 3:39 PM, Måns Rullgård  wrote:
>> + if (ret < 0) {
>> + dev_err(>dev, "broken fixed-link 
>> specification\n");
>
> Line is longer than 80 chars.

This is actually okay, though I would recommend to move long string
literal to the next line.

-- 
With Best Regards,
Andy Shevchenko

[PATCH net] hv_netvsc: Restore needed_headroom request

2016-02-05 Thread Vitaly Kuznetsov

Commit c0eb454034aa ("hv_netvsc: Don't ask for additional head room in the
skb") got rid of needed_headroom setting for the driver. With the change I
hit the following issue trying to use ptkgen module:

[   57.522021] kernel BUG at net/core/skbuff.c:1128!
[   57.522021] invalid opcode:  [#1] SMP DEBUG_PAGEALLOC
...
[   58.721068] Call Trace:
[   58.721068]  [] netvsc_start_xmit+0x4c6/0x8e0 [hv_netvsc]
...
[   58.721068]  [] ? pktgen_finalize_skb+0x25c/0x2a0 [pktgen]
[   58.721068]  [] ? __netdev_alloc_skb+0xc0/0x100
[   58.721068]  [] pktgen_thread_worker+0x257/0x1920 [pktgen]

Basically, we're calling skb_cow_head(skb, RNDIS_AND_PPI_SIZE) and crash on
if (skb_shared(skb))
BUG();

We probably need to restore needed_headroom setting (but shrunk to
RNDIS_AND_PPI_SIZE as we don't need more) to request the required headroom
space. In theory, it should not give us performance penalty.

Signed-off-by: Vitaly Kuznetsov 
---
 drivers/net/hyperv/netvsc_drv.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 1d3a665..98e34fe 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -1089,6 +1089,9 @@ static int netvsc_probe(struct hv_device *dev,
net->ethtool_ops = _ops;
SET_NETDEV_DEV(net, >device);
 
+   /* We always need headroom for rndis header */
+   net->needed_headroom = RNDIS_AND_PPI_SIZE;
+
/* Notify the netvsc driver of the new device */
memset(_info, 0, sizeof(device_info));
device_info.ring_size = ring_size;
-- 
2.5.0

Re: [PATCH net-next 2/2] sfc: implement IPv6 NFC (and IPV4_USER_FLOW)

2016-02-05 Thread Ben Hutchings

On Fri, 2016-02-05 at 11:16 +, Edward Cree wrote:
> Signed-off-by: Edward Cree 

Reviewed-by: Ben Hutchings 

> ---
>  drivers/net/ethernet/sfc/ethtool.c | 184 
> +
>  1 file changed, 184 insertions(+)
> 
> diff --git a/drivers/net/ethernet/sfc/ethtool.c 
> b/drivers/net/ethernet/sfc/ethtool.c
> index 0347976..445ccdb 100644
> --- a/drivers/net/ethernet/sfc/ethtool.c
> +++ b/drivers/net/ethernet/sfc/ethtool.c
> @@ -783,14 +783,26 @@ static int efx_ethtool_reset(struct net_device 
> *net_dev, u32 *flags)
>  static const u8 mac_addr_ig_mask[ETH_ALEN] __aligned(2) = {0x01, 0, 0, 0, 0, 
> 0};
>  
>  #define IP4_ADDR_FULL_MASK   ((__force __be32)~0)
> +#define IP_PROTO_FULL_MASK   0xFF
>  #define PORT_FULL_MASK   ((__force __be16)~0)
>  #define ETHER_TYPE_FULL_MASK ((__force __be16)~0)
>  
> +static inline void ip6_fill_mask(__be32 *mask)
> +{
> + mask[0] = mask[1] = mask[2] = mask[3] = ~(__be32)0;
> +}
> +
>  static int efx_ethtool_get_class_rule(struct efx_nic *efx,
>     struct ethtool_rx_flow_spec *rule)
>  {
>   struct ethtool_tcpip4_spec *ip_entry = >h_u.tcp_ip4_spec;
>   struct ethtool_tcpip4_spec *ip_mask = >m_u.tcp_ip4_spec;
> + struct ethtool_usrip4_spec *uip_entry = >h_u.usr_ip4_spec;
> + struct ethtool_usrip4_spec *uip_mask = >m_u.usr_ip4_spec;
> + struct ethtool_tcpip6_spec *ip6_entry = >h_u.tcp_ip6_spec;
> + struct ethtool_tcpip6_spec *ip6_mask = >m_u.tcp_ip6_spec;
> + struct ethtool_usrip6_spec *uip6_entry = >h_u.usr_ip6_spec;
> + struct ethtool_usrip6_spec *uip6_mask = >m_u.usr_ip6_spec;
>   struct ethhdr *mac_entry = >h_u.ether_spec;
>   struct ethhdr *mac_mask = >m_u.ether_spec;
>   struct efx_filter_spec spec;
> @@ -833,6 +845,35 @@ static int efx_ethtool_get_class_rule(struct efx_nic 
> *efx,
>   ip_entry->psrc = spec.rem_port;
>   ip_mask->psrc = PORT_FULL_MASK;
>   }
> + } else if ((spec.match_flags & EFX_FILTER_MATCH_ETHER_TYPE) &&
> + spec.ether_type == htons(ETH_P_IPV6) &&
> + (spec.match_flags & EFX_FILTER_MATCH_IP_PROTO) &&
> + (spec.ip_proto == IPPROTO_TCP || spec.ip_proto == IPPROTO_UDP) &&
> + !(spec.match_flags &
> +   ~(EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_OUTER_VID |
> + EFX_FILTER_MATCH_LOC_HOST | EFX_FILTER_MATCH_REM_HOST |
> + EFX_FILTER_MATCH_IP_PROTO |
> + EFX_FILTER_MATCH_LOC_PORT | EFX_FILTER_MATCH_REM_PORT))) {
> + rule->flow_type = ((spec.ip_proto == IPPROTO_TCP) ?
> +    TCP_V6_FLOW : UDP_V6_FLOW);
> + if (spec.match_flags & EFX_FILTER_MATCH_LOC_HOST) {
> + memcpy(ip6_entry->ip6dst, spec.loc_host,
> +    sizeof(ip6_entry->ip6dst));
> + ip6_fill_mask(ip6_mask->ip6dst);
> + }
> + if (spec.match_flags & EFX_FILTER_MATCH_REM_HOST) {
> + memcpy(ip6_entry->ip6src, spec.rem_host,
> +    sizeof(ip6_entry->ip6src));
> + ip6_fill_mask(ip6_mask->ip6src);
> + }
> + if (spec.match_flags & EFX_FILTER_MATCH_LOC_PORT) {
> + ip6_entry->pdst = spec.loc_port;
> + ip6_mask->pdst = PORT_FULL_MASK;
> + }
> + if (spec.match_flags & EFX_FILTER_MATCH_REM_PORT) {
> + ip6_entry->psrc = spec.rem_port;
> + ip6_mask->psrc = PORT_FULL_MASK;
> + }
>   } else if (!(spec.match_flags &
>    ~(EFX_FILTER_MATCH_LOC_MAC | EFX_FILTER_MATCH_LOC_MAC_IG |
>      EFX_FILTER_MATCH_REM_MAC | EFX_FILTER_MATCH_ETHER_TYPE |
> @@ -855,6 +896,47 @@ static int efx_ethtool_get_class_rule(struct efx_nic 
> *efx,
>   mac_entry->h_proto = spec.ether_type;
>   mac_mask->h_proto = ETHER_TYPE_FULL_MASK;
>   }
> + } else if (spec.match_flags & EFX_FILTER_MATCH_ETHER_TYPE &&
> +    spec.ether_type == htons(ETH_P_IP) &&
> +    !(spec.match_flags &
> +  ~(EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_OUTER_VID 
> |
> +    EFX_FILTER_MATCH_LOC_HOST | EFX_FILTER_MATCH_REM_HOST |
> +    EFX_FILTER_MATCH_IP_PROTO))) {
> + rule->flow_type = IPV4_USER_FLOW;
> + uip_entry->ip_ver = ETH_RX_NFC_IP4;
> + if (spec.match_flags & EFX_FILTER_MATCH_IP_PROTO) {
> + uip_mask->proto = IP_PROTO_FULL_MASK;
> + uip_entry->proto = spec.ip_proto;
> + }
> + if (spec.match_flags & EFX_FILTER_MATCH_LOC_HOST) {
> + uip_entry->ip4dst = spec.loc_host[0];
> + uip_mask->ip4dst = IP4_ADDR_FULL_MASK;
> +

Re: [PATCH v3] net: ethernet: support "fixed-link" DT node on nb8800 driver

2016-02-05 Thread Måns Rullgård

Sebastian Frias  writes:

> On 02/05/2016 04:08 PM, Måns Rullgård wrote:
>> Sebastian Frias  writes:
>>
>>> On 02/05/2016 03:34 PM, Måns Rullgård wrote:
 Sebastian Frias  writes:

> Signed-off-by: Sebastian Frias 

 Please change the subject to something like "net: ethernet: nb8800:
 support fixed-link DT node" and add a comment body.
>>>
>>> The subject is pretty explicit for such a simple patch, what else
>>> could I add that wouldn't be unnecessary chat?
>>
>> It's customary to include a description body even if it's little more
>> than a restatement of the subject.  Also, while the subject usually only
>> says _what_ the patch does, the body should additionally state _why_ it
>> is needed.
>
> I understand, but _why_ it is needed is also obvious in this case; I
> mean, without the patch "fixed-link" cannot be used.

Then say so.

> Other patches may not be as obvious/simple and thus justify and
> require more details.
>
> Anyway, I added "Properly handles the case where the PHY is not connected
> to the real MDIO bus" would that be ok?

Have you read Documentation/SubmittingPatches?  Do so (again) and pay
special attention to section 2 "Describe your changes."

> ---
>drivers/net/ethernet/aurora/nb8800.c | 14 +-
>1 file changed, 13 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/net/ethernet/aurora/nb8800.c
> b/drivers/net/ethernet/aurora/nb8800.c
> index ecc4a33..e1fb071 100644
> --- a/drivers/net/ethernet/aurora/nb8800.c
> +++ b/drivers/net/ethernet/aurora/nb8800.c
> @@ -1460,7 +1460,19 @@ static int nb8800_probe(struct platform_device 
> *pdev)
>   goto err_disable_clk;
>   }
>
> - priv->phy_node = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0);
> + if (of_phy_is_fixed_link(pdev->dev.of_node)) {
> + ret = of_phy_register_fixed_link(pdev->dev.of_node);
> + if (ret < 0) {
> + dev_err(>dev, "bad fixed-link spec\n");
> + goto err_free_bus;
> + }
> + priv->phy_node = of_node_get(pdev->dev.of_node);
> + }
> +
> + if (!priv->phy_node)
> + priv->phy_node = of_parse_phandle(pdev->dev.of_node,
> +   "phy-handle", 0);
> +
>   if (!priv->phy_node) {
>   dev_err(>dev, "no PHY specified\n");
>   ret = -ENODEV;
> --
> 2.1.4

>>

-- 
Måns Rullgård

Re: [PATCH v2 net-next 4/4] net: fib: avoid calling fib_flush for each device when doing batch close and unregister

2016-02-05 Thread Sergei Shtylyov


On 02/05/2016 02:35 AM, Salam Noureddine wrote:


Call fib_flush at the end when closing or unregistering multiple
devices. This can save walking the fib many times and greatly
reduce rtnl_lock hold time when unregistering many devices with
a fib having hundreds of thousands of routes.

Signed-off-by: Salam Noureddine 
---
  include/net/netns/ipv4.h |  1 +
  net/ipv4/fib_frontend.c  | 16 ++--
  2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index d75be32..d59a078 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h

[...]

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 4734475..808426e 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1161,11 +1161,22 @@ static int fib_netdev_event(struct notifier_block 
*this, unsigned long event, vo
unsigned int flags;

if (event == NETDEV_UNREGISTER) {
-   fib_disable_ip(dev, event, true);
+   if (fib_sync_down_dev(dev, event, true))
+   net->ipv4.needs_fib_flush = true;
rt_flush_dev(dev);
return NOTIFY_DONE;
}

+   if (event == NETDEV_UNREGISTER_BATCH || event == NETDEV_DOWN_BATCH) {
+   if (net->ipv4.needs_fib_flush) {
+   fib_flush(net);
+   net->ipv4.needs_fib_flush = false;
+   }
+   rt_cache_flush(net);
+   arp_ifdown_all();
+   return NOTIFY_DONE;
+   }
+


   I'd convert to *switch* the above 2 *if*'s...

[...]

MBR, Sergei

Re: Keystone 2 boards boot failure

2016-02-05 Thread Arnd Bergmann

On Thursday 04 February 2016 18:25:08 Grygorii Strashko wrote:
> > 
> > I have another version for testing below. That removes the logic that
> > splits and reassembles the 64-bit values, but leaves the other changes
> > in place. Can you try this?
> > 
> 
> Nop. It crashes kernel

Ah. too bad.

>50.28] IPv6: ADDRCONF(NETDEV_CHANGE): eth0: link becomes ready
> [   50.266219] Unable to handle kernel NULL pointer dereference at virtual 
> address 0001
> [   50.274287] pgd = c0003000
> [   50.277007] [0001] *pgd=884003, *pmd=
> [   50.282412] Internal error: Oops: a07 [#1] PREEMPT SMP ARM
> [   50.287881] Modules linked in:
> [   50.290938] CPU: 0 PID: 0 Comm: swapper/0 Tainted: GW   
> 4.5.0-rc2-00179-gad2f022-dirty #30
> [   50.300214] Hardware name: Keystone
> [   50.303693] task: c07476c0 ti: c0742000 task.ti: c0742000
> [   50.309082] PC is at _test_and_set_bit+0x4/0x4c
> [   50.313607] LR is at __netif_schedule+0x1c/0x60
> [   50.318127] pc : []lr : []psr: 2113
> [   50.318127] sp : c0743d68  ip : 0001  fp : c0743d7c
> [   50.329568] r10: c0743e00  r9 : c0744100  r8 : 9e75
> [   50.334775] r7 :   r6 : 0040  r5 : de495b00  r4 : 6d3cdb51
> [   50.341282] r3 : 0001  r2 : c07476c0  r1 : 6d3cdba9  r0 : 
> [   50.347790] Flags: nzCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment 
> kernel
> [   50.355077] Control: 30c5387d  Table: 1878abc0  DAC: fffd
> [   50.360803] Process swapper/0 (pid: 0, stack limit = 0xc0742210)
> [   50.366790] Stack: (0xc0743d68 to 0xc0744000)
> [   50.371137] 3d60:   d9a16a00 de495b00 c0743d94 c0743d80 
> c04295a0 c04294e0
> [   50.379291] 3d80: de7a9cc0 de495b00 c0743dbc c0743d98 c037396c c0429570 
> c0061d34 0010
> [   50.387445] 3da0: de7a9d80 de7a9d80 0040 012c c0743ddc c0743dc0 
> c0375f58 c0373848
> [   50.395599] 3dc0: de7a9d80 c0375f3c 0040 012c c0743e3c c0743de0 
> c042a0cc c0375f48
> [   50.403752] 3de0: c0743e9c c06635f8 c0744b1c c0744b1c c0773c46 1e46b000 
> c0741000 debac000
> [   50.411907] 3e00: c0743e00 c0743e00 c0743e08 c0743e08 de408000  
> c074408c c0742000
> [   50.420061] 3e20: 0101 0003 4003 c0744080 c0743e9c c0743e40 
> c0026670 c0429ed8
> [   50.428215] 3e40: d8722360 c0744808 0020 c0744100 9e74 c054088c 
> 000a c07779c0
> [   50.436369] 3e60: c073d2c8 c0744080 c0743e40 c0742000 c0744808 c073dddc 
> 004e 
> [   50.444522] 3e80:  de408000 c0773aa4 c07444fc c0743eb4 c0743ea0 
> c0026a38 c0026548
> [   50.452675] 3ea0: c073dddc 004e c0743edc c0743eb8 c0061d34 c00269c4 
> c0744808 e080400c
> [   50.460828] 3ec0: c0743f08 e0804000 e0805000 c0773aa4 c0743f04 c0743ee0 
> c0009438 c0061cd8
> [   50.468981] 3ee0: c0010314 6013  c0743f3c c0773aa4 c0773aa4 
> c0743f64 c0743f08
> [   50.477136] 3f00: c0013a80 c0009404  deba8348 70c8 c001f880 
> c0742000 c07444b0
> [   50.485289] 3f20: c073d324 c0743f78 c0773aa4 c0773aa4 c07444fc c0743f64 
> c0743f68 c0743f58
> [   50.493442] 3f40: c0010310 c0010314 6013  c006d624 c006a15c 
> c0743f74 c0743f68
> [   50.501595] 3f60: c00598c0 c00102e0 c0743f8c c0743f78 c00599dc c00598a4 
> 0002 
> [   50.509749] 3f80: c0743fa4 c0743f90 c0538468 c00598d8 c0777050  
> c0743ff4 c0743fa8
> [   50.517902] 3fa0: c06fad60 c05383e4    c06fa6d8 
>  
> [   50.526056] 3fc0:  c0731a30  c0777294 c0744484 c0731a2c 
> c0748878 80007000
> [   50.534210] 3fe0: 412fc0f4   c0743ff8 80008090 c06fa964 
>  
> [   50.542357] Backtrace: 
> [   50.544816] [] (__netif_schedule) from [] 
> (netif_wake_subqueue+0x3c/0x44)
> [   50.553312]  r5:de495b00 r4:d9a16a00
> [   50.556909] [] (netif_wake_subqueue) from [] 
> (netcp_process_tx_compl_packets+0x130/0x134)
> [   50.566789]  r5:de495b00 r4:de7a9cc0
> [   50.570381] [] (netcp_process_tx_compl_packets) from 
> [] (netcp_tx_poll+0x1c/0x4c)
> [   50.579570]  r7:012c r6:0040 r5:de7a9d80 r4:de7a9d80
> [   50.585258] [] (netcp_tx_poll) from [] 
> (net_rx_action+0x200/0x2f8)
> [   50.593148]  r7:012c r6:0040 r5:c0375f3c r4:de7a9d80
> [   50.598833] [] (net_rx_action) from [] 
> (__do_softirq+0x134/0x258)
> [   50.606637]  r10:c0744080 r9:4003 r8:0003 r7:0101 r6:c0742000 
> r5:c074408c
> [   50.614486]  r4:
> [   50.617023] [] (__do_softirq) from [] 
> (irq_exit+0x80/0xb8)
> [   50.624221]  r10:c07444fc r9:c0773aa4 r8:de408000 r7: r6: 
> r5:004e
> [   50.632069]  r4:c073dddc
> [   50.634608] [] (irq_exit) from [] 
> (__handle_domain_irq+0x68/0xbc)
> [   50.642410]  r5:004e r4:c073dddc
> [   50.645996] [] (__handle_domain_irq) from [] 
> (gic_handle_irq+0x40/0x78)
> 

This is a different bug now, something is corrupting the skb pointer, probably 
as a
result of the patch below (which is a subset of what is now applied compared
to the last

[PATCH] net: cavium: liquidio: fix check for in progress flag

2016-02-05 Thread Colin King

From: Colin Ian King 

smatch detected a suspicious looking bitop condition:

drivers/net/ethernet/cavium/liquidio/lio_main.c:2529
  handle_timestamp() warn: suspicious bitop condition

(skb_shinfo(skb)->tx_flags | SKBTX_IN_PROGRESS is always non-zero,
so the logic is definitely not correct.  Use & to mask the correct
bit.

Signed-off-by: Colin Ian King 
---
 drivers/net/ethernet/cavium/liquidio/lio_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c 
b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index 8727655..06b6be8 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -2524,7 +2524,7 @@ static void handle_timestamp(struct octeon_device *oct,
 
octeon_swap_8B_data(>timestamp, 1);
 
-   if (unlikely((skb_shinfo(skb)->tx_flags | SKBTX_IN_PROGRESS) != 0)) {
+   if (unlikely((skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS) != 0)) {
struct skb_shared_hwtstamps ts;
u64 ns = resp->timestamp;
 
-- 
2.7.0

Re: [PATCH v3] net: ethernet: support "fixed-link" DT node on nb8800 driver

2016-02-05 Thread Måns Rullgård

Sebastian Frias  writes:

> On 02/05/2016 03:34 PM, Måns Rullgård wrote:
>> Sebastian Frias  writes:
>>
>>> Signed-off-by: Sebastian Frias 
>>
>> Please change the subject to something like "net: ethernet: nb8800:
>> support fixed-link DT node" and add a comment body.
>
> The subject is pretty explicit for such a simple patch, what else
> could I add that wouldn't be unnecessary chat?

It's customary to include a description body even if it's little more
than a restatement of the subject.  Also, while the subject usually only
says _what_ the patch does, the body should additionally state _why_ it
is needed.

>>> ---
>>>   drivers/net/ethernet/aurora/nb8800.c | 14 +-
>>>   1 file changed, 13 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/net/ethernet/aurora/nb8800.c
>>> b/drivers/net/ethernet/aurora/nb8800.c
>>> index ecc4a33..e1fb071 100644
>>> --- a/drivers/net/ethernet/aurora/nb8800.c
>>> +++ b/drivers/net/ethernet/aurora/nb8800.c
>>> @@ -1460,7 +1460,19 @@ static int nb8800_probe(struct platform_device *pdev)
>>> goto err_disable_clk;
>>> }
>>>
>>> -   priv->phy_node = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0);
>>> +   if (of_phy_is_fixed_link(pdev->dev.of_node)) {
>>> +   ret = of_phy_register_fixed_link(pdev->dev.of_node);
>>> +   if (ret < 0) {
>>> +   dev_err(>dev, "bad fixed-link spec\n");
>>> +   goto err_free_bus;
>>> +   }
>>> +   priv->phy_node = of_node_get(pdev->dev.of_node);
>>> +   }
>>> +
>>> +   if (!priv->phy_node)
>>> +   priv->phy_node = of_parse_phandle(pdev->dev.of_node,
>>> + "phy-handle", 0);
>>> +
>>> if (!priv->phy_node) {
>>> dev_err(>dev, "no PHY specified\n");
>>> ret = -ENODEV;
>>> --
>>> 2.1.4
>>

-- 
Måns Rullgård

Re: [PATCH v3] net: ethernet: support "fixed-link" DT node on nb8800 driver

2016-02-05 Thread Sebastian Frias


On 02/05/2016 03:34 PM, Måns Rullgård wrote:

Sebastian Frias  writes:


Signed-off-by: Sebastian Frias 


Please change the subject to something like "net: ethernet: nb8800:
support fixed-link DT node" and add a comment body.


The subject is pretty explicit for such a simple patch, what else could 
I add that wouldn't be unnecessary chat?





---
  drivers/net/ethernet/aurora/nb8800.c | 14 +-
  1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/aurora/nb8800.c
b/drivers/net/ethernet/aurora/nb8800.c
index ecc4a33..e1fb071 100644
--- a/drivers/net/ethernet/aurora/nb8800.c
+++ b/drivers/net/ethernet/aurora/nb8800.c
@@ -1460,7 +1460,19 @@ static int nb8800_probe(struct platform_device *pdev)
goto err_disable_clk;
}

-   priv->phy_node = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0);
+   if (of_phy_is_fixed_link(pdev->dev.of_node)) {
+   ret = of_phy_register_fixed_link(pdev->dev.of_node);
+   if (ret < 0) {
+   dev_err(>dev, "bad fixed-link spec\n");
+   goto err_free_bus;
+   }
+   priv->phy_node = of_node_get(pdev->dev.of_node);
+   }
+
+   if (!priv->phy_node)
+   priv->phy_node = of_parse_phandle(pdev->dev.of_node,
+ "phy-handle", 0);
+
if (!priv->phy_node) {
dev_err(>dev, "no PHY specified\n");
ret = -ENODEV;
--
2.1.4

[PATCH v4] net: ethernet: nb8800: support fixed-link DT node

2016-02-05 Thread Sebastian Frias



Properly handles the case where the PHY is not connected
to the real MDIO bus

Signed-off-by: Sebastian Frias 
---
 drivers/net/ethernet/aurora/nb8800.c | 14 +-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/aurora/nb8800.c 
b/drivers/net/ethernet/aurora/nb8800.c

index ecc4a33..e1fb071 100644
--- a/drivers/net/ethernet/aurora/nb8800.c
+++ b/drivers/net/ethernet/aurora/nb8800.c
@@ -1460,7 +1460,19 @@ static int nb8800_probe(struct platform_device *pdev)
goto err_disable_clk;
}

-   priv->phy_node = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0);
+   if (of_phy_is_fixed_link(pdev->dev.of_node)) {
+   ret = of_phy_register_fixed_link(pdev->dev.of_node);
+   if (ret < 0) {
+   dev_err(>dev, "bad fixed-link spec\n");
+   goto err_free_bus;
+   }
+   priv->phy_node = of_node_get(pdev->dev.of_node);
+   }
+
+   if (!priv->phy_node)
+   priv->phy_node = of_parse_phandle(pdev->dev.of_node,
+ "phy-handle", 0);
+
if (!priv->phy_node) {
dev_err(>dev, "no PHY specified\n");
ret = -ENODEV;
--
2.1.4

Re: gigaset: memory leak in gigaset_initcshw

2016-02-05 Thread Paul Bolle

Hi Dmitry,

(If anyone is confused by this conversation: Dmitry replied to an off
list message.)

On vr, 2016-02-05 at 14:28 +0100, Dmitry Vyukov wrote:
> I wonder why you don't see the leak I am seeing...

So do I, for a few days now.

> are you suing qemu or real hardware? I am using qemu.

Real hardware (a ThinkPad). Probably less powerful that your VM.

What is the rate you're seeing leakage of a struct ser_cardstate? I'm
running your latest test at about 2.000 TIOCSETD's per second - which is
by itself not very useful for our driver - and notice no _obvious_
leakage when I do that for a few minutes. I do note the hardware
screaming to just keep up with the abuse, though.

> I've added the following change:
> 
> --- a/drivers/isdn/gigaset/ser-gigaset.c
> +++ b/drivers/isdn/gigaset/ser-gigaset.c
> @@ -396,6 +396,7 @@ static int gigaset_initcshw(struct cardstate *cs)
> pr_err("out of memory\n");
> return -ENOMEM;
> }
> +   WARN_ON(cs->hw.ser != NULL);
> cs->hw.ser = scs;
> 
> cs->hw.ser->dev.name = GIGASET_MODULENAME;
> 
> and it does fire.
> Can it be a case that free_cs() runs before gigaset_device_release()?

gigaset_device_release() is the release operation that is run when our
struct device goes away. The core code is responsible for calling it, we
can't be certain when that will happen. At least, we should not expect
it to happen directly after calling platform_device_unregister().

(It was actually syzkaller that warned us that we did just that until
recently. See commit 4c5e354a9742 ("ser_gigaset: fix deallocation of
platform device structure").)

> If that would happen, then cs can be reused while the previous
> cs->hw.ser is not freed yet. Just a guess.

I'll have to ponder on that a bit, sorry.

Paul Bolle

Re: Keystone 2 boards boot failure

2016-02-05 Thread Grygorii Strashko

hi Arnd,

On 02/05/2016 06:18 PM, Arnd Bergmann wrote:
> On Thursday 04 February 2016 18:25:08 Grygorii Strashko wrote:
>>>
>>> I have another version for testing below. That removes the logic that
>>> splits and reassembles the 64-bit values, but leaves the other changes
>>> in place. Can you try this?
>>>
>>
>> Nop. It crashes kernel
> 
> Ah. too bad.
> 
>> 50.28] IPv6: ADDRCONF(NETDEV_CHANGE): eth0: link becomes ready
>> [   50.266219] Unable to handle kernel NULL pointer dereference at virtual 
>> address 0001
>> [   50.274287] pgd = c0003000
>> [   50.277007] [0001] *pgd=884003, *pmd=
>> [   50.282412] Internal error: Oops: a07 [#1] PREEMPT SMP ARM
>> [   50.287881] Modules linked in:
>> [   50.290938] CPU: 0 PID: 0 Comm: swapper/0 Tainted: GW   
>> 4.5.0-rc2-00179-gad2f022-dirty #30
>> [   50.300214] Hardware name: Keystone
>> [   50.303693] task: c07476c0 ti: c0742000 task.ti: c0742000
>> [   50.309082] PC is at _test_and_set_bit+0x4/0x4c
>> [   50.313607] LR is at __netif_schedule+0x1c/0x60
>> [   50.318127] pc : []lr : []psr: 2113
>> [   50.318127] sp : c0743d68  ip : 0001  fp : c0743d7c
>> [   50.329568] r10: c0743e00  r9 : c0744100  r8 : 9e75
>> [   50.334775] r7 :   r6 : 0040  r5 : de495b00  r4 : 6d3cdb51
>> [   50.341282] r3 : 0001  r2 : c07476c0  r1 : 6d3cdba9  r0 : 
>> [   50.347790] Flags: nzCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment 
>> kernel
>> [   50.355077] Control: 30c5387d  Table: 1878abc0  DAC: fffd
>> [   50.360803] Process swapper/0 (pid: 0, stack limit = 0xc0742210)
>> [   50.366790] Stack: (0xc0743d68 to 0xc0744000)
>> [   50.371137] 3d60:   d9a16a00 de495b00 c0743d94 c0743d80 
>> c04295a0 c04294e0
>> [   50.379291] 3d80: de7a9cc0 de495b00 c0743dbc c0743d98 c037396c c0429570 
>> c0061d34 0010
>> [   50.387445] 3da0: de7a9d80 de7a9d80 0040 012c c0743ddc c0743dc0 
>> c0375f58 c0373848
>> [   50.395599] 3dc0: de7a9d80 c0375f3c 0040 012c c0743e3c c0743de0 
>> c042a0cc c0375f48
>> [   50.403752] 3de0: c0743e9c c06635f8 c0744b1c c0744b1c c0773c46 1e46b000 
>> c0741000 debac000
>> [   50.411907] 3e00: c0743e00 c0743e00 c0743e08 c0743e08 de408000  
>> c074408c c0742000
>> [   50.420061] 3e20: 0101 0003 4003 c0744080 c0743e9c c0743e40 
>> c0026670 c0429ed8
>> [   50.428215] 3e40: d8722360 c0744808 0020 c0744100 9e74 c054088c 
>> 000a c07779c0
>> [   50.436369] 3e60: c073d2c8 c0744080 c0743e40 c0742000 c0744808 c073dddc 
>> 004e 
>> [   50.444522] 3e80:  de408000 c0773aa4 c07444fc c0743eb4 c0743ea0 
>> c0026a38 c0026548
>> [   50.452675] 3ea0: c073dddc 004e c0743edc c0743eb8 c0061d34 c00269c4 
>> c0744808 e080400c
>> [   50.460828] 3ec0: c0743f08 e0804000 e0805000 c0773aa4 c0743f04 c0743ee0 
>> c0009438 c0061cd8
>> [   50.468981] 3ee0: c0010314 6013  c0743f3c c0773aa4 c0773aa4 
>> c0743f64 c0743f08
>> [   50.477136] 3f00: c0013a80 c0009404  deba8348 70c8 c001f880 
>> c0742000 c07444b0
>> [   50.485289] 3f20: c073d324 c0743f78 c0773aa4 c0773aa4 c07444fc c0743f64 
>> c0743f68 c0743f58
>> [   50.493442] 3f40: c0010310 c0010314 6013  c006d624 c006a15c 
>> c0743f74 c0743f68
>> [   50.501595] 3f60: c00598c0 c00102e0 c0743f8c c0743f78 c00599dc c00598a4 
>> 0002 
>> [   50.509749] 3f80: c0743fa4 c0743f90 c0538468 c00598d8 c0777050  
>> c0743ff4 c0743fa8
>> [   50.517902] 3fa0: c06fad60 c05383e4    c06fa6d8 
>>  
>> [   50.526056] 3fc0:  c0731a30  c0777294 c0744484 c0731a2c 
>> c0748878 80007000
>> [   50.534210] 3fe0: 412fc0f4   c0743ff8 80008090 c06fa964 
>>  
>> [   50.542357] Backtrace:
>> [   50.544816] [] (__netif_schedule) from [] 
>> (netif_wake_subqueue+0x3c/0x44)
>> [   50.553312]  r5:de495b00 r4:d9a16a00
>> [   50.556909] [] (netif_wake_subqueue) from [] 
>> (netcp_process_tx_compl_packets+0x130/0x134)
>> [   50.566789]  r5:de495b00 r4:de7a9cc0
>> [   50.570381] [] (netcp_process_tx_compl_packets) from 
>> [] (netcp_tx_poll+0x1c/0x4c)
>> [   50.579570]  r7:012c r6:0040 r5:de7a9d80 r4:de7a9d80
>> [   50.585258] [] (netcp_tx_poll) from [] 
>> (net_rx_action+0x200/0x2f8)
>> [   50.593148]  r7:012c r6:0040 r5:c0375f3c r4:de7a9d80
>> [   50.598833] [] (net_rx_action) from [] 
>> (__do_softirq+0x134/0x258)
>> [   50.606637]  r10:c0744080 r9:4003 r8:0003 r7:0101 r6:c0742000 
>> r5:c074408c
>> [   50.614486]  r4:
>> [   50.617023] [] (__do_softirq) from [] 
>> (irq_exit+0x80/0xb8)
>> [   50.624221]  r10:c07444fc r9:c0773aa4 r8:de408000 r7: r6: 
>> r5:004e
>> [   50.632069]  r4:c073dddc
>> [   50.634608] [] (irq_exit) from [] 
>> (__handle_domain_irq+0x68/0xbc)
>> [   50.642410]  r5:004e r4:c073dddc
>> [   50.645996] [] (__handle_domain_irq) from [] 
>> (gic_handle_irq+0x40/0x78)
>>
> 
> This is a

Re: [RFC v2] iwlwifi: pcie: transmit queue auto-sizing

2016-02-05 Thread Dave Taht

> A bursted txop can be as big as 5-10ms. If you consider you want to
> queue 5-10ms worth of data for *each* station at any given time you
> obviously introduce a lot of lag. If you have 10 stations you might
> end up with service period at 10*10ms = 100ms. This gets even worse if
> you consider MU-MIMO because you need to do an expensive sounding
> procedure before transmitting. So while SU aggregation can probably
> still work reasonably well with shorter bursts (1-2ms) MU needs at
> least 3ms to get *any* gain when compared to SU (which obviously means
> you want more to actually make MU pay off).

I am not sure where you get these numbers. Got a spreadsheet?

Gradually reducing the maximum sized txop as a function of the number
of stations makes sense. If you have 10 stations pending delivery and
reduced the max txop to 1ms, you hurt bandwidth at that instant, but
by offering more service to more stations, in less time, they will
converge on a reasonable share of the bandwidth for each, faster[1].
And I'm sure that the person videoconferencing on a link like that
would appreciate getting some service inside of a 10ms interval,
rather than a 100ms.

yes, there's overhead, and that's not the right number, which would
vary as to g,n,ac and successors.

You will also get more opportunities to use mu-mimo with shorter
bursts extant and more stations being regularly serviced.

[1] https://www.youtube.com/watch?v=Rb-UnHDw02o at about 13:50

> The rule of thumb is the
> longer you wait the bigger capacity you can get.

This is not strictly true as the "fountain" of packets is regulated by
acks on the other side of the link, and ramp up or down as a function
of service time and loss.

>
> Apparently there's interest in maximizing throughput but it stands in
> direct opposition of keeping the latency down so I've been thinking
> how to satisfy both.
>
> The current approach ath10k is taking (patches in review [1][2]) is to
> use mac80211 software queues for per-station queuing, exposing queue
> state to firmware (it decides where frames should be dequeued from)
> and making it possible to stop/wake per-station tx subqueue with fake
> netdev queues. I'm starting to think this is not the right way though
> because it's inherently hard to control latency and there's a huge
> memory overhead associated with the fake netdev queues.

What is this overhead?

Applying things  like codel tend to dramatically shorten the amount of
skbs extant... modern 802.11ac capable hardware has tons more
memory...

> Also fq_codel
> is a less effective with this kind of setup.

fq_codel's principal problems with working with wifi are long and
documented in the talk above.

> My current thinking is that the entire problem should be solved via
> (per-AC) qdiscs, e.g. fq_codel. I guess one could use
> limit/target/interval/quantum knobs to tune it for higher latency of
> aggregation-oriented Wi-Fi links where long service time (think
> 100-200ms) is acceptable. However fq_codel is oblivious to how Wi-Fi
> works in the first place, i.e. Wi-Fi gets better throughput if you
> deliver bursts of packets destined to the same station. Moreover this
> gets even more complicated with MU-MIMO where you may want to consider
> spatial location (which influences signal quality when grouped) of
> each station when you decide which set of stations you're going to
> aggregate to in parallel. Since drivers have a finite tx ring this it
> is important to deliver bursts that can actually be aggregated
> efficiently. This means driver would need to be able to tell qdisc
> about per-flow conditions to influence the RR scheme in some way
> (assuming a qdiscs even understands flows; do we need a unified way of
> talking about flows between qdiscs and drivers?).

This is a very good summary of the problems in layering fq_codel as it
exists today on top of wifi as it exists today. :/ Our conclusion
several years ago was that as the information needed to do things more
right was in the mac80211 layer that we could not evolve the qdisc
layer to suit, and needed to move the core ideas into the mac80211
layer.

Things have evolved since, but I still think we can't get enough info
up to the qdisc layer (locks and so on) to use it sanely.

>
> [1]: https://www.spinics.net/lists/linux-wireless/msg146187.html
> [2]: https://www.spinics.net/lists/linux-wireless/msg146512.html

I will review!

>
 For reference, ath10k has around 1400 tx descriptors, though
 in practice not all are usable, and in stock firmware, I'm guessing
 the NIC will never be able to actually fill up it's tx descriptors
 and stop traffic.  Instead, it just allows the stack to try to
 TX, then drops the frame...
>>>
>>>
>>> 1400 descriptors, ok... but they are not organised in queues?
>>> (forgive my ignorance of athX drivers)
>>
>>
>> I think all the details are in the firmware, at least for now.
>
> Yeah. Basically ath10k has a flat set of tx descriptors which are
> AC-agnostic.

Re: [RFC v2] iwlwifi: pcie: transmit queue auto-sizing

2016-02-05 Thread Ben Greear


On 02/05/2016 12:44 AM, Michal Kazior wrote:


Per-station queues sound tricky if you consider bufferbloat.

To maximize use of airtime (i.e. txop) you need to send big
aggregates. Since aggregates are per station-tid to maximize
multi-station performance (in AP mode) you'll need to queue a lot of
frames, per each station, depending on the chosen tx rate.

A bursted txop can be as big as 5-10ms. If you consider you want to
queue 5-10ms worth of data for *each* station at any given time you
obviously introduce a lot of lag. If you have 10 stations you might
end up with service period at 10*10ms = 100ms. This gets even worse if
you consider MU-MIMO because you need to do an expensive sounding
procedure before transmitting. So while SU aggregation can probably
still work reasonably well with shorter bursts (1-2ms) MU needs at
least 3ms to get *any* gain when compared to SU (which obviously means
you want more to actually make MU pay off). The rule of thumb is the
longer you wait the bigger capacity you can get.

Apparently there's interest in maximizing throughput but it stands in
direct opposition of keeping the latency down so I've been thinking
how to satisfy both.


I really think this should be tunable.  For instance, someone making an AP
that is mostly for letting lots of users stream movies would care a lot more
about throughput than someone making an AP that is mainly for browsing the
web and doing more latency-sensitive activities.



The current approach ath10k is taking (patches in review [1][2]) is to
use mac80211 software queues for per-station queuing, exposing queue
state to firmware (it decides where frames should be dequeued from)
and making it possible to stop/wake per-station tx subqueue with fake
netdev queues. I'm starting to think this is not the right way though
because it's inherently hard to control latency and there's a huge
memory overhead associated with the fake netdev queues. Also fq_codel
is a less effective with this kind of setup.

My current thinking is that the entire problem should be solved via
(per-AC) qdiscs, e.g. fq_codel. I guess one could use
limit/target/interval/quantum knobs to tune it for higher latency of
aggregation-oriented Wi-Fi links where long service time (think
100-200ms) is acceptable. However fq_codel is oblivious to how Wi-Fi
works in the first place, i.e. Wi-Fi gets better throughput if you
deliver bursts of packets destined to the same station. Moreover this
gets even more complicated with MU-MIMO where you may want to consider
spatial location (which influences signal quality when grouped) of
each station when you decide which set of stations you're going to
aggregate to in parallel. Since drivers have a finite tx ring this it
is important to deliver bursts that can actually be aggregated
efficiently. This means driver would need to be able to tell qdisc
about per-flow conditions to influence the RR scheme in some way
(assuming a qdiscs even understands flows; do we need a unified way of
talking about flows between qdiscs and drivers?).


I wonder if it would work better if we removed most of the
tid handling and aggregation logic in the firmware.  Maybe just have the mgt Q 
and best
effort (and skip VO/VI).  Let the OS tell (or suggest to) the firmware when 
aggregation
starts and stops.  That might at least cut the number of queues in half,
saving memory and latency up and down the stack.

Thanks,
Ben


--
Ben Greear 
Candela Technologies Inc  http://www.candelatech.com

Re: [PATCH net-next 1/2] ethtool: add IPv6 to the NFC API

2016-02-05 Thread Ben Hutchings

On Fri, 2016-02-05 at 11:16 +, Edward Cree wrote:
> Signed-off-by: Edward Cree 

Reviewed-by: Ben Hutchings 

> ---
>  include/uapi/linux/ethtool.h | 70 
> 
>  1 file changed, 64 insertions(+), 6 deletions(-)
> 
> diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
> index 57fa390..7faaf36 100644
> --- a/include/uapi/linux/ethtool.h
> +++ b/include/uapi/linux/ethtool.h
> @@ -748,6 +748,56 @@ struct ethtool_usrip4_spec {
>   __u8proto;
>  };
>  
> +/**
> + * struct ethtool_tcpip6_spec - flow specification for TCP/IPv6 etc.
> + * @ip6src: Source host
> + * @ip6dst: Destination host
> + * @psrc: Source port
> + * @pdst: Destination port
> + * @tclass: Traffic Class
> + *
> + * This can be used to specify a TCP/IPv6, UDP/IPv6 or SCTP/IPv6 flow.
> + */
> +struct ethtool_tcpip6_spec {
> + __be32  ip6src[4];
> + __be32  ip6dst[4];
> + __be16  psrc;
> + __be16  pdst;
> + __u8tclass;
> +};
> +
> +/**
> + * struct ethtool_ah_espip6_spec - flow specification for IPsec/IPv6
> + * @ip6src: Source host
> + * @ip6dst: Destination host
> + * @spi: Security parameters index
> + * @tclass: Traffic Class
> + *
> + * This can be used to specify an IPsec transport or tunnel over IPv6.
> + */
> +struct ethtool_ah_espip6_spec {
> + __be32  ip6src[4];
> + __be32  ip6dst[4];
> + __be32  spi;
> + __u8tclass;
> +};
> +
> +/**
> + * struct ethtool_usrip6_spec - general flow specification for IPv6
> + * @ip6src: Source host
> + * @ip6dst: Destination host
> + * @l4_4_bytes: First 4 bytes of transport (layer 4) header
> + * @tclass: Traffic Class
> + * @l4_proto: Transport protocol number (nexthdr after any Extension Headers)
> + */
> +struct ethtool_usrip6_spec {
> + __be32  ip6src[4];
> + __be32  ip6dst[4];
> + __be32  l4_4_bytes;
> + __u8tclass;
> + __u8l4_proto;
> +};
> +
>  union ethtool_flow_union {
>   struct ethtool_tcpip4_spec  tcp_ip4_spec;
>   struct ethtool_tcpip4_spec  udp_ip4_spec;
> @@ -755,6 +805,12 @@ union ethtool_flow_union {
>   struct ethtool_ah_espip4_spec   ah_ip4_spec;
>   struct ethtool_ah_espip4_spec   esp_ip4_spec;
>   struct ethtool_usrip4_spec  usr_ip4_spec;
> + struct ethtool_tcpip6_spec  tcp_ip6_spec;
> + struct ethtool_tcpip6_spec  udp_ip6_spec;
> + struct ethtool_tcpip6_spec  sctp_ip6_spec;
> + struct ethtool_ah_espip6_spec   ah_ip6_spec;
> + struct ethtool_ah_espip6_spec   esp_ip6_spec;
> + struct ethtool_usrip6_spec  usr_ip6_spec;
>   struct ethhdr   ether_spec;
>   __u8hdata[52];
>  };
> @@ -1367,15 +1423,17 @@ enum ethtool_sfeatures_retval_bits {
>  #define  UDP_V4_FLOW 0x02/* hash or spec (udp_ip4_spec) */
>  #define  SCTP_V4_FLOW0x03/* hash or spec (sctp_ip4_spec) */
>  #define  AH_ESP_V4_FLOW  0x04/* hash only */
> -#define  TCP_V6_FLOW 0x05/* hash only */
> -#define  UDP_V6_FLOW 0x06/* hash only */
> -#define  SCTP_V6_FLOW0x07/* hash only */
> +#define  TCP_V6_FLOW 0x05/* hash or spec (tcp_ip6_spec; nfc 
> only) */
> +#define  UDP_V6_FLOW 0x06/* hash or spec (udp_ip6_spec; nfc 
> only) */
> +#define  SCTP_V6_FLOW0x07/* hash or spec (sctp_ip6_spec; nfc 
> only) */
>  #define  AH_ESP_V6_FLOW  0x08/* hash only */
>  #define  AH_V4_FLOW  0x09/* hash or spec (ah_ip4_spec) */
>  #define  ESP_V4_FLOW 0x0a/* hash or spec (esp_ip4_spec) */
> -#define  AH_V6_FLOW  0x0b/* hash only */
> -#define  ESP_V6_FLOW 0x0c/* hash only */
> -#define  IP_USER_FLOW0x0d/* spec only (usr_ip4_spec) */
> +#define  AH_V6_FLOW  0x0b/* hash or spec (ah_ip6_spec; nfc only) 
> */
> +#define  ESP_V6_FLOW 0x0c/* hash or spec (esp_ip6_spec; nfc 
> only) */
> +#define  IPV4_USER_FLOW  0x0d/* spec only (usr_ip4_spec) */
> +#define  IP_USER_FLOWIPV4_USER_FLOW
> +#define  IPV6_USER_FLOW  0x0e/* spec only (usr_ip6_spec; nfc only) */
>  #define  IPV4_FLOW   0x10/* hash only */
>  #define  IPV6_FLOW   0x11/* hash only */
>  #define  ETHER_FLOW  0x12/* spec only (ether_spec) */
> 
-- 
Ben Hutchings
It is a miracle that curiosity survives formal education. - Albert Einstein

signature.asc
Description: This is a digitally signed message part

Re: [PATCH net-next v1 2/2] tipc: fix link priority propagation

2016-02-05 Thread David Miller

From: Richard Alpe 
Date: Mon, 1 Feb 2016 08:19:57 +0100

> Currently link priority changes isn't handled for active links. In
> this patch we resolve this by changing our priority if the peer passes
> a valid priority in a state message.
> 
> Reviewed-by: Jon Maloy 
> Signed-off-by: Richard Alpe 

Applied.

Re: [PATCH net-next v1 1/2] tipc: fix link attribute propagation bug

2016-02-05 Thread David Miller

From: Richard Alpe 
Date: Mon, 1 Feb 2016 08:19:56 +0100

> Changing certain link attributes (link tolerance and link priority)
> from the TIPC management tool is supposed to automatically take
> effect at both endpoints of the affected link.
> 
> Currently the media address is not instantiated for the link and is
> used uninstantiated when crafting protocol messages designated for the
> peer endpoint. This means that changing a link property currently
> results in the property being changed on the local machine but the
> protocol message designated for the peer gets lost. Resulting in
> property discrepancy between the endpoints.
> 
> In this patch we resolve this by using the media address from the
> link entry and using the bearer transmit function to send it. Hence,
> we can now eliminate the redundant function tipc_link_prot_xmit() and
> the redundant field tipc_link::media_addr.
> 
> Fixes: 2af5ae372a4b (tipc: clean up unused code and structures)
> Reviewed-by: Jon Maloy 
> Reported-by: Jason Hu 
> Signed-off-by: Richard Alpe 

Applied.

Re: [PATCH net 0/4] net: phy: bcm7xxx 40nm PHY fixes

2016-02-05 Thread David Miller

From: Florian Fainelli 
Date: Fri, 5 Feb 2016 17:25:50 -0800

> On 03/02/16 13:13, Florian Fainelli wrote:
>> Hi David,
>> 
>> Here is a collection of fixes for the 40nm Ethernet PHY supported
>> by the 7xxx PHY driver, please also queue these fixes for stable.
>> 
>> Let me know if you think patch 4 is too much of a cleanup to be taken
>> as a fix.
> 
> David, if you have not applied these yet (patchwork says they are under
> review), I have another fix coming your way, and patch 4 should probably
> be targetted at net-next, and bundled with another cleanup.
> 
> Let me know what's the outcome and I will re-submit where appropriate,
> thanks!

I haven't applied any of this so please feel free to resubmit updated
versions as needed.

Thanks.

Re: [net-next 00/20][pull request] 40GbE Intel Wired LAN Driver Updates 2016-02-03

2016-02-05 Thread David Miller

From: Jeff Kirsher 
Date: Thu,  4 Feb 2016 03:48:51 -0800

> This series contains updates to i40e and i40evf only.

Pulled, thanks Jeff.

Re: [PATCH net v4] r8169: Add bios support.

2016-02-05 Thread David Miller

From: Corcodel Marian 
Date: Sat,  6 Feb 2016 08:31:26 +0200

> @@ -8262,8 +8262,9 @@ static int rtl_init_one(struct pci_dev *pdev, const 
> struct pci_device_id *ent)
>   rtl_hw_reset(tp);
>  
>   rtl_ack_events(tp, 0x);
> -
> +#ifndef CONFIG_BIOS_SUPPORT
>   pci_set_master(pdev);
> +#endif

I already made it clear to you that it is perfectly fine to unconditionally
call pci_set_master() from the driver in any circumstance.

Look across all of the drivers under drivers/net/ that invoke this function,
it's meant to be used this way and it's fine.

There is absolutely no reason to conditionalize it.

Thanks.

Re: gigaset: memory leak in gigaset_initcshw

2016-02-05 Thread Paul Bolle

On vr, 2016-02-05 at 17:06 +0100, Paul Bolle wrote:
> If that would happen, then cs can be reused while the previous
> > cs->hw.ser is not freed yet. Just a guess.
> 
> I'll have to ponder on that a bit, sorry.

This is from the hit-the-code-until-it-confesses department:
--- a/drivers/isdn/gigaset/ser-gigaset.c
+++ b/drivers/isdn/gigaset/ser-gigaset.c
@@ -373,13 +373,9 @@ static void gigaset_freecshw(struct cardstate *cs)
 
 static void gigaset_device_release(struct device *dev)
 {
-   struct cardstate *cs = dev_get_drvdata(dev);
-
-   if (!cs)
-   return;
+   struct ser_cardstate *scs = dev_get_drvdata(dev);
dev_set_drvdata(dev, NULL);
-   kfree(cs->hw.ser);
-   cs->hw.ser = NULL;
+   kfree(scs);
 }
 
 /*
@@ -408,7 +404,7 @@ static int gigaset_initcshw(struct cardstate *cs)
cs->hw.ser = NULL;
return rc;
}
-   dev_set_drvdata(>hw.ser->dev.dev, cs);
+   dev_set_drvdata(>hw.ser->dev.dev, scs);
 
tasklet_init(>write_tasklet,
 gigaset_modem_fill, (unsigned long) cs);

Does that make any difference?


Paul Bolle

Re: [patch net-next RFC 0/6] Introduce devlink interface and first drivers to use it

2016-02-05 Thread Alexei Starovoitov

On Fri, Feb 05, 2016 at 11:01:22AM +0100, Hannes Frederic Sowa wrote:
> 
> Okay. I see it more as changing mode of operation of hardware and thus has
> not really anything to do with networking. If you say you change ethernet to
> infiniband it has something to do with networking, sure. But I am fine with
> this, I just thought the code size could be reduced by adding this to sysfs
> quite a lot. I don't have a strong opinion on this.

there is already a way to change eth/ib via
echo 'eth' > /sys/bus/pci/drivers/mlx4_core/:02:00.0/mlx4_port1

sounds like this is another way to achieve the same?
Why not hide echo/cat in iproute2 instead of adding parallel netlink api?
Or this is for switches instead of nics?
Then why it's not adding to switchdev?

Re: [PATCH] flowi: add concept of "not_oif"

2016-02-05 Thread Jason A. Donenfeld

On Thu, Feb 4, 2016 at 4:44 AM, Eric Dumazet  wrote:
> On Wed, 2016-02-03 at 22:02 +0100, Jason A. Donenfeld wrote:
>
>>
>> > I don't know about the particular problems with
>> > tunnels but the scripts can use the route metric to order
>> > the routes in a table.
>>
>> This unfortunately does not cut it with tunnels.
>
> ip rule show
>
> ip route show table 10
>
> I am pretty sure that you could select/change skb mark when packets
> traverse the tunnel : The second route lookup can then select a
> completely different table.

This doesn't work. Not to mention the fact that ip-rules aren't
cleaned up when the interface is removed and the issues with having
multiple routing tables, the following only works for very narrow
cases:

(212.47.239.81 is the IP of a VPN endpoint, for example below)

$ ip rule add to 212.47.239.81 lookup main pref 30
$ ip rule add to all lookup 80 pref 40
$ ip route add default dev tun0 table 80

The problem is -- what happens when you have particular routes that
you'd like specifically to go over your original network connection,
not the tunnel? I am now not able to do this:

$ ip route add 1.2.3.4/32 dev eth0

Because it will examine the second rule. Moving everything to the
second routing table obviously isn't a solution either for all the
reasons listed in my first email. Everything is complicated, partially
broken, and all together unrobust this way.

Sorry, but the current routing facilities of Linux are woefully
insufficient for extremely common place modern day tunneling. The
solution I've offered here is extremely simple, easy, and
non-intrusive to implement. You will be the best friend of many
network administrators and ordinary daily users alike.

Just imagine if authors of userspace tunneling utilities could write
things like:

setsockopt(fd, SO_NOTOIF, tun0_idx);

Or kernel tunneling utilities being able to write:

   .flowi4_not_oif = geneve0_idx;

And then never have to worry about routing loops or bizarre
situations? And have everything nicely cleaned up when the interface
goes away? And be able to continue using all the same old routing
tools and schemes as before, with no need for enormous
reimplementations and daemons and endless bloat?

This is a simple feature that will go a very long way. There is no
current solution that comes anywhere close to solving the real
problem. Please consider it.

Thanks,
Jason

Re: Keystone 2 boards boot failure

2016-02-05 Thread Murali Karicheri

On 02/03/2016 03:13 PM, santosh shilimkar wrote:
> On 2/3/2016 10:47 AM, Murali Karicheri wrote:
>> On 02/03/2016 12:08 PM, santosh shilimkar wrote:
>>> On 2/3/2016 8:35 AM, Arnd Bergmann wrote:
> 
> [..]
> 
 It would be nice to give this a go once the network driver problem
 is solved.

>>> Big endian kernel has worked on Keystone in past.
>>
>> Yes, this was on a v3.10.x baseline, not in the upstream.
>>
> Thats what I mean in past. That time upstream didn't have
> ARM BE patches o.w there was no other depedency.
> 
>>> Yes, above secondary hook needs to be modified along with
>>> drivers endian macro conversion was what was needed IIRC.
>>>
>>
>> To support BE, it may be more than Netcp driver. Do you recall, what
>> changes you did to get BE working on Keystone? Is it just NetCP driver?
>>
> IIRC it was Navigator (QMSS, DMA), NETCP, SPI and couple of
> of more drivers. Driver update was a massive patch done by Prabhu.
> 
Ok. Thanks.

Murali
> Regards,
> Santosh
> 
> 
> 


-- 
Murali Karicheri
Linux Kernel, Keystone

[PATCH net] net: dsa: mv88e6xxx: do not leave reserved VLANs

2016-02-05 Thread Vivien Didelot

BRIDGE_VLAN_FILTERING automatically adds a newly bridged port to the
VLAN with the bridge's default_pvid.

The mv88e6xxx driver currently reserves VLANs 4000+ for unbridged ports
isolation. When a port joins a bridge, it leaves its reserved VLAN. When
a port leaves a bridge, it joins again its reserved VLAN.

But if the VLAN filtering is disabled, or if this hardware VLAN is
already in use, the bridged port ends up with no default VLAN, and the
communication with the CPU is thus broken.

To fix this, make a port join its reserved VLAN once on setup, never
leave it, and restore its PVID after another one was eventually used.

Signed-off-by: Vivien Didelot 
---
 drivers/net/dsa/mv88e6xxx.c | 25 ++---
 1 file changed, 10 insertions(+), 15 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c
index 1cb3d15..4196dd8 100644
--- a/drivers/net/dsa/mv88e6xxx.c
+++ b/drivers/net/dsa/mv88e6xxx.c
@@ -1572,6 +1572,7 @@ int mv88e6xxx_port_vlan_del(struct dsa_switch *ds, int 
port,
const struct switchdev_obj_port_vlan *vlan)
 {
struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
+   const u16 defpvid = 4000 + ds->index * DSA_MAX_PORTS + port;
u16 pvid, vid;
int err = 0;
 
@@ -1587,7 +1588,8 @@ int mv88e6xxx_port_vlan_del(struct dsa_switch *ds, int 
port,
goto unlock;
 
if (vid == pvid) {
-   err = _mv88e6xxx_port_pvid_set(ds, port, 0);
+   /* restore reserved VLAN ID */
+   err = _mv88e6xxx_port_pvid_set(ds, port, defpvid);
if (err)
goto unlock;
}
@@ -1879,26 +1881,20 @@ unlock:
 
 int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, int port, u32 members)
 {
-   struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-   const u16 pvid = 4000 + ds->index * DSA_MAX_PORTS + port;
-   int err;
-
-   /* The port joined a bridge, so leave its reserved VLAN */
-   mutex_lock(>smi_mutex);
-   err = _mv88e6xxx_port_vlan_del(ds, port, pvid);
-   if (!err)
-   err = _mv88e6xxx_port_pvid_set(ds, port, 0);
-   mutex_unlock(>smi_mutex);
-   return err;
+   return 0;
 }
 
 int mv88e6xxx_port_bridge_leave(struct dsa_switch *ds, int port, u32 members)
 {
+   return 0;
+}
+
+static int mv88e6xxx_setup_port_default_vlan(struct dsa_switch *ds, int port)
+{
struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
const u16 pvid = 4000 + ds->index * DSA_MAX_PORTS + port;
int err;
 
-   /* The port left the bridge, so join its reserved VLAN */
mutex_lock(>smi_mutex);
err = _mv88e6xxx_port_vlan_add(ds, port, pvid, true);
if (!err)
@@ -2182,8 +2178,7 @@ int mv88e6xxx_setup_ports(struct dsa_switch *ds)
if (dsa_is_cpu_port(ds, i) || dsa_is_dsa_port(ds, i))
continue;
 
-   /* setup the unbridged state */
-   ret = mv88e6xxx_port_bridge_leave(ds, i, 0);
+   ret = mv88e6xxx_setup_port_default_vlan(ds, i);
if (ret < 0)
return ret;
}
-- 
2.7.0

[RFC 0/2] mac80211: add support for ht_caps mcs rxmask override

2016-02-05 Thread Cedric DEBARGE

This patchset allows the ht_caps mcs rxmask to be defined on the fly.
It applies the given rxmask to all available bands.

This is actually limited to radio cards without internal rc.

Cedric DEBARGE (2):
  cfg80211: add support for ht_caps mcs rxmask override
  nl80211: add nl attribute to set ht_caps mcs rxmask override

 include/net/cfg80211.h   |  7 ++
 include/uapi/linux/nl80211.h |  5 +
 net/mac80211/cfg.c   | 48 +++
 net/mac80211/main.c  | 53 +---
 net/wireless/nl80211.c   | 18 +++
 net/wireless/rdev-ops.h  | 11 +
 net/wireless/trace.h | 15 +
 7 files changed, 149 insertions(+), 8 deletions(-)

-- 
1.9.1

[RFC 1/2] cfg80211: add support for ht_caps mcs rxmask override

2016-02-05 Thread Cedric DEBARGE

Allows the ht_caps mcs rxmask to be defined on the fly.
In this implementation, the given rxmask is applied to
every band available.
This is only applicable for radio cards without internal rc.

Signed-off-by: Cedric Debarge 
---
 include/net/cfg80211.h |  7 +++
 net/mac80211/cfg.c | 48 +
 net/mac80211/main.c| 53 ++
 3 files changed, 100 insertions(+), 8 deletions(-)

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 9e1b24c..257404b 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2500,6 +2500,8 @@ struct cfg80211_qos_map {
  * and returning to the base channel for communication with the AP.
  * @tdls_cancel_channel_switch: Stop channel-switching with a TDLS peer. Both
  * peers must be on the base channel when the call completes.
+ *
+ * @set_htcap_rxmask: Override hardware capabilities for ht_caps mcs rxmask.
  */
 struct cfg80211_ops {
int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow);
@@ -2765,6 +2767,8 @@ struct cfg80211_ops {
void(*tdls_cancel_channel_switch)(struct wiphy *wiphy,
  struct net_device *dev,
  const u8 *addr);
+
+   int (*set_htcap_rxmask)(struct wiphy *wiphy, uint8_t *rxmask);
 };
 
 /*
@@ -3121,6 +3125,8 @@ struct wiphy_vendor_command {
  * wiphy is theirs, e.g. in global notifiers
  * @bands: information about bands/channels supported by this device
  *
+ * @init_bands: save of the originals information about bands.
+ *
  * @mgmt_stypes: bitmasks of frame subtypes that can be subscribed to or
  * transmitted through nl80211, points to an array indexed by interface
  * type
@@ -3266,6 +3272,7 @@ struct wiphy {
const void *privid;
 
struct ieee80211_supported_band *bands[IEEE80211_NUM_BANDS];
+   struct ieee80211_supported_band *init_bands[IEEE80211_NUM_BANDS];
 
/* Lets us get back the wiphy on the callback */
void (*reg_notifier)(struct wiphy *wiphy,
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 66d22de..daa415b 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -3350,6 +3350,53 @@ static int ieee80211_del_tx_ts(struct wiphy *wiphy, 
struct net_device *dev,
return -ENOENT;
 }
 
+static int ieee80211_set_htcap_rxmask(struct wiphy *wiphy, uint8_t *rxmask)
+{
+   struct ieee80211_local *local = wiphy_priv(wiphy);
+   struct ieee80211_supported_band *sband;
+   struct ieee80211_supported_band *iband;
+   int blank = 1;
+   int empty;
+   int i;
+
+   if (ieee80211_hw_check(>hw, HAS_RATE_CONTROL))
+   return -EINVAL;
+
+   mutex_lock(>iflist_mtx);
+   empty = list_empty(>interfaces);
+   mutex_unlock(>iflist_mtx);
+
+   if (!empty)
+   return -EBUSY;
+
+   for (i = 0; i < IEEE80211_HT_MCS_MASK_LEN; i++)
+   if (rxmask[i]) {
+   blank = 0;
+   break;
+   }
+
+   for (i = 0; i < IEEE80211_NUM_BANDS; i++) {
+   int j;
+
+   sband = wiphy->bands[i];
+   iband = wiphy->init_bands[i];
+
+   if (!iband)
+   continue;
+
+   for (j = 0; j < IEEE80211_HT_MCS_MASK_LEN; j++) {
+   if (blank)
+   sband->ht_cap.mcs.rx_mask[j] =
+   iband->ht_cap.mcs.rx_mask[j];
+   else
+   sband->ht_cap.mcs.rx_mask[j] = rxmask[j] &
+   iband->ht_cap.mcs.rx_mask[j];
+   }
+   }
+
+   return 0;
+}
+
 const struct cfg80211_ops mac80211_config_ops = {
.add_virtual_intf = ieee80211_add_iface,
.del_virtual_intf = ieee80211_del_iface,
@@ -3435,4 +3482,5 @@ const struct cfg80211_ops mac80211_config_ops = {
.set_ap_chanwidth = ieee80211_set_ap_chanwidth,
.add_tx_ts = ieee80211_add_tx_ts,
.del_tx_ts = ieee80211_del_tx_ts,
+   .set_htcap_rxmask = ieee80211_set_htcap_rxmask,
 };
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 6bcf0fa..138f1e4 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -789,15 +789,37 @@ static int ieee80211_init_cipher_suites(struct 
ieee80211_local *local)
return 0;
 }
 
+static int ieee80211_alloc_init_bands(struct wiphy *wiphy)
+{
+   int i;
+
+   memset(wiphy->init_bands, 0, IEEE80211_NUM_BANDS);
+   for (i = 0; i < IEEE80211_NUM_BANDS; i++) {
+   if (!wiphy->bands[i])
+   continue;
+
+   wiphy->init_bands[i] = kzalloc(sizeof(*wiphy->init_bands[i]),
+  GFP_KERNEL);
+   if (!wiphy->init_bands[i])
+

[RFC 2/2] nl80211: add nl attribute to set ht_caps mcs rxmask override

2016-02-05 Thread Cedric DEBARGE

This adds the NL80211_ATTR_WIPHY_HTCAP_RXMASK attribute to
NL80211_CMD_SET_WIPHY in order for the user to specify the ht_caps mcs
rxmask.

Signed-off-by: Cedric Debarge 
---
 include/uapi/linux/nl80211.h |  5 +
 net/wireless/nl80211.c   | 18 ++
 net/wireless/rdev-ops.h  | 11 +++
 net/wireless/trace.h | 15 +++
 4 files changed, 49 insertions(+)

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 7758969..50a53d8 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -1794,6 +1794,9 @@ enum nl80211_commands {
  * connecting to a PCP, and in %NL80211_CMD_START_AP to start
  * a PCP instead of AP. Relevant for DMG networks only.
  *
+ * @NL80211_ATTR_WIPHY_HTCAP_RXMASK: Override hardware capabilities for ht_caps
+ * mcs rxmask.
+ *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -2170,6 +2173,8 @@ enum nl80211_attrs {
 
NL80211_ATTR_PBSS,
 
+   NL80211_ATTR_WIPHY_HTCAP_RXMASK,
+
/* add attributes here, update the policy in nl80211.c */
 
__NL80211_ATTR_AFTER_LAST,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 268cb49..ef5ec8b 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -402,6 +402,9 @@ static const struct nla_policy 
nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_SCHED_SCAN_DELAY] = { .type = NLA_U32 },
[NL80211_ATTR_REG_INDOOR] = { .type = NLA_FLAG },
[NL80211_ATTR_PBSS] = { .type = NLA_FLAG },
+   [NL80211_ATTR_WIPHY_HTCAP_RXMASK] = { .type = NLA_BINARY,
+ .len = IEEE80211_HT_MCS_MASK_LEN
+   },
 };
 
 /* policy for the key attributes */
@@ -2243,6 +2246,21 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct 
genl_info *info)
return result;
}
 
+   if (info->attrs[NL80211_ATTR_WIPHY_HTCAP_RXMASK]) {
+   u8 rxmask[IEEE80211_HT_MCS_MASK_LEN];
+
+   if (wdev)
+   return -EOPNOTSUPP;
+
+   memcpy(rxmask,
+  nla_data(info->attrs[NL80211_ATTR_WIPHY_HTCAP_RXMASK]),
+  IEEE80211_HT_MCS_MASK_LEN);
+
+   result = rdev_set_htcap_rxmask(rdev, rxmask);
+   if (result)
+   return result;
+   }
+
changed = 0;
 
if (info->attrs[NL80211_ATTR_WIPHY_RETRY_SHORT]) {
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index 8ae0c04..488adb9 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -1071,4 +1071,15 @@ rdev_set_coalesce(struct cfg80211_registered_device 
*rdev,
trace_rdev_return_int(>wiphy, ret);
return ret;
 }
+
+static inline int
+rdev_set_htcap_rxmask(struct cfg80211_registered_device *rdev, uint8_t *rxmask)
+{
+   int ret;
+
+   trace_rdev_set_htcap_rxmask(>wiphy, rxmask);
+   ret = rdev->ops->set_htcap_rxmask(>wiphy, rxmask);
+   trace_rdev_return_int(>wiphy, ret);
+   return ret;
+}
 #endif /* __CFG80211_RDEV_OPS */
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 09b242b..d7c8c3c 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -2120,6 +2120,21 @@ TRACE_EVENT(rdev_tdls_cancel_channel_switch,
  WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(addr))
 );
 
+TRACE_EVENT(rdev_set_htcap_rxmask,
+   TP_PROTO(struct wiphy *wiphy, uint8_t *rxmask),
+   TP_ARGS(wiphy, rxmask),
+   TP_STRUCT__entry(
+   WIPHY_ENTRY
+   __array(uint8_t, rxmask, IEEE80211_HT_MCS_MASK_LEN)
+   ),
+   TP_fast_assign(
+   WIPHY_ASSIGN;
+   memcpy(__entry->rxmask, rxmask, IEEE80211_HT_MCS_MASK_LEN);
+   ),
+   TP_printk(WIPHY_PR_FMT ", %*ph",
+ WIPHY_PR_ARG, IEEE80211_HT_MCS_MASK_LEN, &__entry->rxmask[0])
+);
+
 /*
  *  cfg80211 exported functions traces  *
  */
-- 
1.9.1

[PATCH net] net: dsa: mv88e6xxx: fix software VLAN deletion

2016-02-05 Thread Vivien Didelot

The current bridge code calls switchdev_port_obj_del on a VLAN port even
if the corresponding switchdev_port_obj_add call returned -EOPNOTSUPP.

If the DSA driver doesn't return -EOPNOTSUPP for a software port VLAN in
its port_vlan_del function, the VLAN is not deleted. Unbridging the port
also generates a stack trace for the same reason.

This can be quickly tested on a VLAN filtering enabled system with:

# brctl addbr br0
# brctl addif br0 lan0
# brctl addbr br1
# brctl addif br1 lan1
# brctl delif br1 lan1

Both bridges have a default default_pvid set to 1. lan0 uses the
hardware VLAN 1 while lan1 falls back to the software VLAN 1.

Unbridging lan1 does not delete its software VLAN, and thus generates
the following stack trace:

[ 2991.681705] device lan1 left promiscuous mode
[ 2991.686237] br1: port 1(lan1) entered disabled state
[ 2991.725094] [ cut here ]
[ 2991.729761] WARNING: CPU: 0 PID: 869 at net/bridge/br_vlan.c:314 
__vlan_group_free+0x4c/0x50()
[ 2991.738437] Modules linked in:
[ 2991.741546] CPU: 0 PID: 869 Comm: ip Not tainted 4.4.0 #16
[ 2991.747039] Hardware name: Freescale Vybrid VF5xx/VF6xx (Device Tree)
[ 2991.753511] Backtrace:
[ 2991.756008] [<80014450>] (dump_backtrace) from [<8001469c>] 
(show_stack+0x20/0x24)
[ 2991.763604]  r6:80512644 r5:0009 r4: r3:
[ 2991.769343] [<8001467c>] (show_stack) from [<80268e44>] 
(dump_stack+0x24/0x28)
[ 2991.776618] [<80268e20>] (dump_stack) from [<80025568>] 
(warn_slowpath_common+0x98/0xc4)
[ 2991.784750] [<800254d0>] (warn_slowpath_common) from [<80025650>] 
(warn_slowpath_null+0x2c/0x34)
[ 2991.793557]  r8: r7:9f786a8c r6:9f76c440 r5:9f786a00 r4:9f68ac00
[ 2991.800366] [<80025624>] (warn_slowpath_null) from [<80512644>] 
(__vlan_group_free+0x4c/0x50)
[ 2991.808946] [<805125f8>] (__vlan_group_free) from [<80514488>] 
(nbp_vlan_flush+0x44/0x68)
[ 2991.817147]  r4:9f68ac00 r3:9ec7
[ 2991.820772] [<8051>] (nbp_vlan_flush) from [<80506f08>] 
(del_nbp+0xac/0x130)
[ 2991.828201]  r5:9f56f800 r4:9f786a00
[ 2991.831841] [<80506e5c>] (del_nbp) from [<8050774c>] 
(br_del_if+0x40/0xbc)
[ 2991.838724]  r7:80590f68 r6: r5:9ec71c38 r4:9f76c440
[ 2991.844475] [<8050770c>] (br_del_if) from [<80503dc0>] 
(br_del_slave+0x1c/0x20)
[ 2991.851802]  r5:9ec71c38 r4:9f56f800
[ 2991.855428] [<80503da4>] (br_del_slave) from [<80484a34>] 
(do_setlink+0x324/0x7b8)
[ 2991.863043] [<80484710>] (do_setlink) from [<80485e90>] 
(rtnl_newlink+0x508/0x6f4)
[ 2991.870616]  r10: r9:9ec71ba8 r8: r7: 
r6:9f6b0400 r5:9f56f800
[ 2991.878548]  r4:8076278c
[ 2991.881110] [<80485988>] (rtnl_newlink) from [<80484048>] 
(rtnetlink_rcv_msg+0x18c/0x22c)
[ 2991.889315]  r10:9f7d4e40 r9: r8: r7: 
r6:9f7d4e40 r5:9f6b0400
[ 2991.897250]  r4:
[ 2991.899814] [<80483ebc>] (rtnetlink_rcv_msg) from [<80497c74>] 
(netlink_rcv_skb+0xb0/0xcc)
[ 2991.908104]  r8: r7:9f7d4e40 r6:9f7d4e40 r5:80483ebc r4:9f6b0400
[ 2991.914928] [<80497bc4>] (netlink_rcv_skb) from [<80483eb4>] 
(rtnetlink_rcv+0x34/0x3c)
[ 2991.922874]  r6:9f5ea000 r5:0028 r4:9f7d4e40 r3:80483e80
[ 2991.928622] [<80483e80>] (rtnetlink_rcv) from [<80497604>] 
(netlink_unicast+0x180/0x200)
[ 2991.936742]  r4:9f4edc00 r3:80483e80
[ 2991.940362] [<80497484>] (netlink_unicast) from [<80497a88>] 
(netlink_sendmsg+0x33c/0x350)
[ 2991.948648]  r8: r7:0028 r6: r5:9f5ea000 r4:9ec71f4c
[ 2991.955481] [<8049774c>] (netlink_sendmsg) from [<80457ff0>] 
(sock_sendmsg+0x24/0x34)
[ 2991.963342]  r10: r9:9ec71e28 r8: r7:9f1e2140 
r6: r5:
[ 2991.971276]  r4:9ec71f4c
[ 2991.973849] [<80457fcc>] (sock_sendmsg) from [<80458af0>] 
(___sys_sendmsg+0x1fc/0x204)
[ 2991.981809] [<804588f4>] (___sys_sendmsg) from [<804598d0>] 
(__sys_sendmsg+0x4c/0x7c)
[ 2991.989640]  r10: r9:9ec7 r8:80010824 r7:0128 
r6:7ee946c4 r5:
[ 2991.997572]  r4:9f1e2140
[ 2992.000128] [<80459884>] (__sys_sendmsg) from [<80459918>] 
(SyS_sendmsg+0x18/0x1c)
[ 2992.007725]  r6: r5:7ee9c7b8 r4:7ee946e0
[ 2992.012430] [<80459900>] (SyS_sendmsg) from [<80010660>] 
(ret_fast_syscall+0x0/0x3c)
[ 2992.020182] ---[ end trace 5d4bc29f4da04280 ]---

To fix this, return -EOPNOTSUPP in _mv88e6xxx_port_vlan_del instead of
-ENOENT if the hardware VLAN doesn't exist or the port is not a member.

Signed-off-by: Vivien Didelot 
---
 drivers/net/dsa/mv88e6xxx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c
index d365007..1cb3d15 100644
--- a/drivers/net/dsa/mv88e6xxx.c
+++ b/drivers/net/dsa/mv88e6xxx.c
@@ -1545,7 +1545,7 @@ static int _mv88e6xxx_port_vlan_del(struct

[V4.4-rc6 Regression] af_unix: Revert 'lock_interruptible' in stream receive code

2016-02-05 Thread Joseph Salisbury

Hi Rainer,

A kernel bug report was opened against Ubuntu [0].  After a kernel
bisect, it was found that reverting the following commit resolved this bug:

commit 3822b5c2fc62e3de8a0f33806ff279fb7df92432
Author: Rainer Weikusat 
Date:   Wed Dec 16 20:09:25 2015 +

af_unix: Revert 'lock_interruptible' in stream receive code

  
The regression was introduced as of v4.4-rc6.

I was hoping to get your feedback, since you are the patch author.  Do
you think gathering any additional data will help diagnose this issue,
or would it be best to submit a revert request?


Thanks,

Joe

[0] http://pad.lv/1540731

我的主页在

2016-02-05 Thread 我的主页在

你的老朋友邀你来Q群:343257759

Re: [PATCH v3 net-next] net: Implement fast csum_partial for x86_64

2016-02-05 Thread Ingo Molnar

* Tom Herbert  wrote:

> [] gcc turns these switch statements into jump tables (not function 
> tables 
> which is what Ingo's example code was using). [...]

So to the extent this still matters, on most x86 microarchitectures that count, 
jump tables and function call tables (i.e. virtual functions that C++ uses) are 
generally optimized by the same branch predictor hardware mechanism. Indirect 
jumps (jump tables) and indirect calls (function pointer tables) are very 
similar 
conceptually. That is why posted the indirect calls test code.

( The only branching variant that will perform badly even on the latest uarchs 
are
  indirect returns: to modify the return address on the stack. )

So my narrow performance point stands, if any sort of indirect jump is used. 
They 
should be avoided if possible, because it's pretty hard for the hardware to get 
it 
right.

As Linus noticed, data lookup tables are the intelligent solution: if you 
manage 
to offload the logic into arithmetics and not affect the control flow then 
that's 
a big win. The inherent branching will be hidden by executing on massively 
parallel arithmetics units which effectively execute everything fed to them in 
a 
single cycle.

In any case, when submitting such patches, please get into the habit of looking 
at 
and posting perf stat output - it will give us a good idea about the quality of 
an 
implementation.

Thanks,

Ingo

Hello There

2016-02-05 Thread Ms.Ella Golan



I am Ms.Ella Golan, I am the Executive Vice President Banking Division with 
FIRST INTERNATIONAL BANK OF ISRAEL LTD (FIBI).
I am getting in touch with you regarding an extremely important and urgent
matter. If you would oblige me the opportunity, I shall provide you with
details upon your response.

Faithfully,
Ms.Ella Golan

Re: [PATCH 1/3] ifb: make device count build-time configurable

2016-02-05 Thread Lubomir Rintel

On Tue, 2016-01-12 at 15:54 -0500, David Miller wrote:
> From: Stephen Hemminger 
> Date: Tue, 12 Jan 2016 10:44:37 -0800
> 
> > On Tue, 12 Jan 2016 07:55:22 -0500
> > Jamal Hadi Salim  wrote:
> > 
> >> On 16-01-12 06:56 AM, Lubomir Rintel wrote:
> >> > The devices can be created at run-time for quite some time
> already and the
> >> > load-time device creation collides with attempts to create the
> device of
> >> > the same name:
> >> >
> >> >    # rmmod ifb
> >> >    # ip link add ifb0 type ifb
> >> >    RTNETLINK answers: File exists
> >> >
> >> > This is pretty much the same situation as was with the block
> loop devices
> >> > which was solved by adding a build-time configuration that the
> >> > distributions could use as they deem fit while keeping the
> default for
> >> > compatibility.
> >> >
> >> > Let's do that here as well.
> >> >
> >> > Signed-off-by: Lubomir Rintel 
> >> 
> >> I guess module options are frowned upon. so:
> > 
> > I would prefer that this were done with a module parameter, the
> same as dummy.
> > Only developers build their own configured kernels. Having the
> value set later
> > at module load time is preferable.
> 
> I like this even less, it means tools behave significantly
> differently
> based upon what module options were passed to the kernel.
> 
> Module options really should not change kernel behavior like this..

The module option is already there. It's defaults (creating the devices
noone asked for and that potentially collide with what the user tried
to create) are what we find bothersome.

Lubo

Re: [PATCH] net: ethernet: support "fixed-link" DT node on nb8800 driver

2016-02-05 Thread Måns Rullgård

Andy Shevchenko  writes:

> On Fri, Feb 5, 2016 at 3:39 PM, Måns Rullgård  wrote:
>>> + if (ret < 0) {
>>> + dev_err(>dev, "broken fixed-link 
>>> specification\n");
>>
>> Line is longer than 80 chars.
>
> This is actually okay, though I would recommend to move long string
> literal to the next line.

I only mentioned it because fixing it was trivial.

-- 
Måns Rullgård

[PATCH net-next 1/2] mpls: packet stats

2016-02-05 Thread Robert Shearman

Having MPLS packet stats is useful for observing network operation and
for diagnosing network problems. In the absence of anything better,
use RFCs for MIBs defining MPLS stats for guidance on the semantics of
the stats to expose. RFC3813 details two per-interface packet stats
that should be provided (label lookup failures and fragmented packets)
and also provides interpretation of RFC2863 for other per-interface
stats (in/out ucast, mcast and bcast, in/out discards and errors and
in unknown protos).

Multicast, fragment and broadcast packet counters are printed, but not
stored to allow for future implementation of current standards or
future standards without user-space having to change.

All the introduced fields are 64-bit, even error ones, to ensure no
overflow with long uptimes. Per-CPU counters are used to avoid
cache-line contention on the commonly used fields. The other fields
have also been made per-CPU for code to avoid performance problems in
error conditions on the assumption that on some platforms the cost of
atomic operations could be more pexpensive than sending the packet
(which is what would be done in the success case). If that's not the
case, we could instead not use per-CPU counters for these fields.

The IPv6 proc code was used as an inspiration for the proc code
here, both in terms of the implementation as well as the location of
the per-device stats proc files: /proc/net/dev_snmp_mpls/.

Signed-off-by: Robert Shearman 
---
 include/net/netns/mpls.h |   1 +
 net/mpls/Makefile|   1 +
 net/mpls/af_mpls.c   | 135 ---
 net/mpls/internal.h  |  93 ++--
 net/mpls/mpls_iptunnel.c |  11 +++-
 net/mpls/proc.c  | 128 
 6 files changed, 334 insertions(+), 35 deletions(-)
 create mode 100644 net/mpls/proc.c

diff --git a/include/net/netns/mpls.h b/include/net/netns/mpls.h
index d29203651c01..3062b0aa3a08 100644
--- a/include/net/netns/mpls.h
+++ b/include/net/netns/mpls.h
@@ -12,6 +12,7 @@ struct netns_mpls {
size_t platform_labels;
struct mpls_route __rcu * __rcu *platform_label;
struct ctl_table_header *ctl;
+   struct proc_dir_entry *proc_net_devsnmp;
 };
 
 #endif /* __NETNS_MPLS_H__ */
diff --git a/net/mpls/Makefile b/net/mpls/Makefile
index 9ca923625016..6fdd61b9eae3 100644
--- a/net/mpls/Makefile
+++ b/net/mpls/Makefile
@@ -6,3 +6,4 @@ obj-$(CONFIG_MPLS_ROUTING) += mpls_router.o
 obj-$(CONFIG_MPLS_IPTUNNEL) += mpls_iptunnel.o
 
 mpls_router-y := af_mpls.o
+mpls_router-$(CONFIG_PROC_FS) += proc.o
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index b18c5ed42d95..6b3c96e2b21f 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -8,6 +8,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -17,8 +18,8 @@
 #include 
 #if IS_ENABLED(CONFIG_IPV6)
 #include 
-#include 
 #endif
+#include 
 #include 
 #include "internal.h"
 
@@ -48,11 +49,6 @@ static struct mpls_route *mpls_route_input_rcu(struct net 
*net, unsigned index)
return rt;
 }
 
-static inline struct mpls_dev *mpls_dev_get(const struct net_device *dev)
-{
-   return rcu_dereference_rtnl(dev->mpls_ptr);
-}
-
 bool mpls_output_possible(const struct net_device *dev)
 {
return dev && (dev->flags & IFF_UP) && netif_carrier_ok(dev);
@@ -98,6 +94,29 @@ bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned 
int mtu)
 }
 EXPORT_SYMBOL_GPL(mpls_pkt_too_big);
 
+void mpls_stats_inc_outucastpkts(struct net_device *dev,
+const struct sk_buff *skb)
+{
+   struct mpls_dev *mdev;
+   struct inet6_dev *in6dev;
+
+   if (skb->protocol == htons(ETH_P_MPLS_UC)) {
+   mdev = mpls_dev_get(dev);
+   if (mdev)
+   MPLS_INC_STATS_LEN(mdev, skb->len,
+  MPLS_IFSTATS_MIB_OUTUCASTPKTS,
+  MPLS_IFSTATS_MIB_OUTOCTETS);
+   } else if (skb->protocol == htons(ETH_P_IP)) {
+   IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUT, skb->len);
+   } else if (skb->protocol == htons(ETH_P_IPV6)) {
+   in6dev = __in6_dev_get(dev);
+   if (in6dev)
+   IP6_UPD_PO_STATS(dev_net(dev), in6dev,
+IPSTATS_MIB_OUT, skb->len);
+   }
+}
+EXPORT_SYMBOL_GPL(mpls_stats_inc_outucastpkts);
+
 static u32 mpls_multipath_hash(struct mpls_route *rt,
   struct sk_buff *skb, bool bos)
 {
@@ -253,6 +272,7 @@ static int mpls_forward(struct sk_buff *skb, struct 
net_device *dev,
struct mpls_nh *nh;
struct mpls_entry_decoded dec;
struct net_device *out_dev;
+   struct mpls_dev *out_mdev;
struct mpls_dev *mdev;
unsigned int hh_len;
unsigned int new_header_size;
@@ -262,17 +282,25 @@ static int

[PATCH net-next 2/2] mpls: allow TTL propagation to/from IP packets to be configured

2016-02-05 Thread Robert Shearman

It is sometimes desirable to present an MPLS transport network as a
single hop to traffic transiting it because it prevents confusion when
diagnosing failures. An example of where confusion can be generated is
when addresses used in the provider network overlap with addresses in
the overlay network and the addresses get exposed through ICMP errors
generated as packets transit the provider network.

Therefore, provide the ability to control whether the TTL value from
an MPLS packet is propagated to an IPv4/IPv6 packet when the last
label is popped through the addition of a new per-namespace sysctl:
"net.mpls.ip_ttl_propagate" which defaults to enabled.

Use the same sysctl to control whether the TTL is propagated from IP
packets into the MPLS header. If the TTL isn't propagated then a
default TTL value is used which can be configured via a new sysctl:
"net.mpls.default_ttl".

Signed-off-by: Robert Shearman 
---
 Documentation/networking/mpls-sysctl.txt | 19 +
 include/net/netns/mpls.h |  3 ++
 net/mpls/af_mpls.c   | 70 
 net/mpls/mpls_iptunnel.c | 10 -
 4 files changed, 83 insertions(+), 19 deletions(-)

diff --git a/Documentation/networking/mpls-sysctl.txt 
b/Documentation/networking/mpls-sysctl.txt
index 9ed15f86c17c..9e8cfa6d48d1 100644
--- a/Documentation/networking/mpls-sysctl.txt
+++ b/Documentation/networking/mpls-sysctl.txt
@@ -19,6 +19,25 @@ platform_labels - INTEGER
Possible values: 0 - 1048575
Default: 0
 
+ip_ttl_propagate - BOOL
+   Control whether TTL is propagated from the IPv4/IPv6 header to
+   the MPLS header on imposing labels and propagated from the
+   MPLS header to the IPv4/IPv6 header on popping the last label.
+
+   If disabled, the MPLS transport network will appear as a
+   single hop to transit traffic.
+
+   0 - disabled
+   1 - enabled (default)
+
+default_ttl - BOOL
+   Default TTL value to use for MPLS packets where it cannot be
+   propagated from an IP header, either because one isn't present
+   or ip_ttl_propagate has been disabled.
+
+   Possible values: 1 - 255
+   Default: 255
+
 conf//input - BOOL
Control whether packets can be input on this interface.
 
diff --git a/include/net/netns/mpls.h b/include/net/netns/mpls.h
index 3062b0aa3a08..9bdc2bd8fcb8 100644
--- a/include/net/netns/mpls.h
+++ b/include/net/netns/mpls.h
@@ -10,7 +10,10 @@ struct ctl_table_header;
 
 struct netns_mpls {
size_t platform_labels;
+   int ip_ttl_propagate;
+   int default_ttl;
struct mpls_route __rcu * __rcu *platform_label;
+
struct ctl_table_header *ctl;
struct proc_dir_entry *proc_net_devsnmp;
 };
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 6b3c96e2b21f..a2a4f0a884a3 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -31,7 +31,9 @@
 #define MPLS_NEIGH_TABLE_UNSPEC (NEIGH_LINK_TABLE + 1)
 
 static int zero = 0;
+static int one = 1;
 static int label_limit = (1 << 20) - 1;
+static int ttl_max = 255;
 
 static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt,
   struct nlmsghdr *nlh, struct net *net, u32 portid,
@@ -215,8 +217,8 @@ out:
return >rt_nh[nh_index];
 }
 
-static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb,
-   struct mpls_entry_decoded dec)
+static bool mpls_egress(struct net *net, struct mpls_route *rt,
+   struct sk_buff *skb, struct mpls_entry_decoded dec)
 {
enum mpls_payload_type payload_type;
bool success = false;
@@ -239,24 +241,29 @@ static bool mpls_egress(struct mpls_route *rt, struct 
sk_buff *skb,
payload_type = ip_hdr(skb)->version;
 
switch (payload_type) {
-   case MPT_IPV4: {
-   struct iphdr *hdr4 = ip_hdr(skb);
-   skb->protocol = htons(ETH_P_IP);
-   csum_replace2(>check,
- htons(hdr4->ttl << 8),
- htons(dec.ttl << 8));
-   hdr4->ttl = dec.ttl;
+   case MPT_IPV4:
+   if (net->mpls.ip_ttl_propagate) {
+   struct iphdr *hdr4 = ip_hdr(skb);
+
+   skb->protocol = htons(ETH_P_IP);
+   csum_replace2(>check,
+ htons(hdr4->ttl << 8),
+ htons(dec.ttl << 8));
+   hdr4->ttl = dec.ttl;
+   }
success = true;
break;
-   }
-   case MPT_IPV6: {
-   struct ipv6hdr *hdr6 = ipv6_hdr(skb);
-   skb->protocol = htons(ETH_P_IPV6);
-   hdr6->hop_limit = dec.ttl;
+   case MPT_IPV6:
+   if (net->mpls.ip_ttl_propagate) {
+   struct ipv6hdr *hdr6 = ipv6_hdr(skb);
+
+   skb->protocol =

Re: [V4.4-rc6 Regression] af_unix: Revert 'lock_interruptible' in stream receive code

2016-02-05 Thread Rainer Weikusat

Joseph Salisbury  writes:
> Hi Rainer,
>
> A kernel bug report was opened against Ubuntu [0].  After a kernel
> bisect, it was found that reverting the following commit resolved this bug:
>
> commit 3822b5c2fc62e3de8a0f33806ff279fb7df92432
> Author: Rainer Weikusat 
> Date:   Wed Dec 16 20:09:25 2015 +
>
> af_unix: Revert 'lock_interruptible' in stream receive code
>
>   
> The regression was introduced as of v4.4-rc6.
>
> I was hoping to get your feedback, since you are the patch author.  Do
> you think gathering any additional data will help diagnose this issue,
> or would it be best to submit a revert request?

Funny little problem :-). The code using the interruptible lock cleared
err as side effect hence the

out:
return copied ? : err;

at the end of unix_stream_read_generic didn't return the -ENOTSUP put
into err at the start of the function if copied was zero after the loop
because the size of the passed data buffer was zero.

The following patch should fix this:

-
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 49d5093..c3e1a08 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2300,6 +2300,7 @@ static int unix_stream_read_generic(struct 
unix_stream_read_state *state)
else
skip = 0;
 
+   err = 0;
do {
int chunk;
bool drop_skb;
--

I was just about to go the the supermarket to buy an apple when I
received the mail. I didn't even compile the change above yet, however,
I'll do so once I'm back and then submit something formal.

Here's a test program which can be compiled with a C compiler:

#define _GNU_SOURCE

#include 
#include 
#include 
#include 
#include 
#include 
#include 

int main(void)
{
enum { server, client, size };
int socket_fd[size];
int const opt = 1;

assert(socketpair(AF_LOCAL, SOCK_STREAM, 0, socket_fd) == 0);

char const msg[] = "A random message";
send(socket_fd[client], msg, sizeof msg, MSG_DONTWAIT | MSG_NOSIGNAL);

assert(setsockopt(socket_fd[server], SOL_SOCKET, SO_PASSCRED, , 
sizeof(opt)) != -1);

union {
struct cmsghdr cmh;
char control[CMSG_SPACE(sizeof(struct ucred))];
} control_un;

control_un.cmh.cmsg_len = CMSG_LEN(sizeof(struct ucred));
control_un.cmh.cmsg_level = SOL_SOCKET;
control_un.cmh.cmsg_type = SCM_CREDENTIALS;

struct msghdr msgh;
msgh.msg_name = NULL;
msgh.msg_namelen = 0;
msgh.msg_iov = NULL;
msgh.msg_iovlen = 0;
msgh.msg_control = control_un.control;
msgh.msg_controllen = sizeof(control_un.control);

errno = 0;

if (recvmsg(socket_fd[server], , MSG_PEEK) == -1)
{
printf("Error: %s\n", strerror(errno));
exit(EXIT_FAILURE);
}
else
{
printf("Success!\n");
exit(EXIT_SUCCESS);
}
}

[PATCH net-next 2/8] net: udp: always set up for CHECKSUM_PARTIAL offload

2016-02-05 Thread Edward Cree

If the dst device doesn't support it, it'll get fixed up later anyway
 by validate_xmit_skb().  Also, this allows us to take advantage of LCO
 to avoid summing the payload multiple times.

Signed-off-by: Edward Cree 
---
 net/ipv4/udp.c  | 14 +-
 net/ipv6/ip6_checksum.c | 13 +
 2 files changed, 2 insertions(+), 25 deletions(-)

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 005280d..c6bca27 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -857,23 +857,11 @@ void udp_set_csum(bool nocheck, struct sk_buff *skb,
uh->check = udp_v4_check(len, saddr, daddr, lco_csum(skb));
if (uh->check == 0)
uh->check = CSUM_MANGLED_0;
-   } else if (skb_dst(skb) && skb_dst(skb)->dev &&
-  (skb_dst(skb)->dev->features &
-   (NETIF_F_IP_CSUM | NETIF_F_HW_CSUM))) {
+   } else {
skb->ip_summed = CHECKSUM_PARTIAL;
skb->csum_start = skb_transport_header(skb) - skb->head;
skb->csum_offset = offsetof(struct udphdr, check);
uh->check = ~udp_v4_check(len, saddr, daddr, 0);
-   } else {
-   __wsum csum;
-
-   uh->check = 0;
-   csum = skb_checksum(skb, 0, len, 0);
-   uh->check = udp_v4_check(len, saddr, daddr, csum);
-   if (uh->check == 0)
-   uh->check = CSUM_MANGLED_0;
-
-   skb->ip_summed = CHECKSUM_UNNECESSARY;
}
 }
 EXPORT_SYMBOL(udp_set_csum);
diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c
index 4924bd7..8f92058 100644
--- a/net/ipv6/ip6_checksum.c
+++ b/net/ipv6/ip6_checksum.c
@@ -103,22 +103,11 @@ void udp6_set_csum(bool nocheck, struct sk_buff *skb,
uh->check = udp_v6_check(len, saddr, daddr, lco_csum(skb));
if (uh->check == 0)
uh->check = CSUM_MANGLED_0;
-   } else if (skb_dst(skb) && skb_dst(skb)->dev &&
-  (skb_dst(skb)->dev->features & NETIF_F_IPV6_CSUM)) {
+   } else {
skb->ip_summed = CHECKSUM_PARTIAL;
skb->csum_start = skb_transport_header(skb) - skb->head;
skb->csum_offset = offsetof(struct udphdr, check);
uh->check = ~udp_v6_check(len, saddr, daddr, 0);
-   } else {
-   __wsum csum;
-
-   uh->check = 0;
-   csum = skb_checksum(skb, 0, len, 0);
-   uh->check = udp_v6_check(len, saddr, daddr, csum);
-   if (uh->check == 0)
-   uh->check = CSUM_MANGLED_0;
-
-   skb->ip_summed = CHECKSUM_UNNECESSARY;
}
 }
 EXPORT_SYMBOL(udp6_set_csum);

Re: [PATCH v2 1/4] lib: move strtobool to kstrtobool

2016-02-05 Thread Kees Cook

On Thu, Feb 4, 2016 at 3:55 PM, Rasmus Villemoes
 wrote:
> On Thu, Feb 04 2016, Kees Cook  wrote:
>
>> Create the kstrtobool_from_user helper and moves strtobool logic into
>> the new kstrtobool (matching all the other kstrto* functions). Provides
>> an inline wrapper for existing strtobool callers.
>>
>> Signed-off-by: Kees Cook 
>> ---
>>  include/linux/kernel.h |  3 +++
>>  include/linux/string.h |  6 +-
>>  lib/kstrtox.c  | 35 +++
>>  lib/string.c   | 29 -
>>  4 files changed, 43 insertions(+), 30 deletions(-)
>>
>> diff --git a/include/linux/kernel.h b/include/linux/kernel.h
>> index f31638c6e873..cdc25f47a23f 100644
>> --- a/include/linux/kernel.h
>> +++ b/include/linux/kernel.h
>> @@ -357,6 +357,7 @@ int __must_check kstrtou16(const char *s, unsigned int 
>> base, u16 *res);
>>  int __must_check kstrtos16(const char *s, unsigned int base, s16 *res);
>>  int __must_check kstrtou8(const char *s, unsigned int base, u8 *res);
>>  int __must_check kstrtos8(const char *s, unsigned int base, s8 *res);
>> +int __must_check kstrtobool(const char *s, unsigned int base, bool *res);
>>
>>  int __must_check kstrtoull_from_user(const char __user *s, size_t count, 
>> unsigned int base, unsigned long long *res);
>>  int __must_check kstrtoll_from_user(const char __user *s, size_t count, 
>> unsigned int base, long long *res);
>> @@ -368,6 +369,8 @@ int __must_check kstrtou16_from_user(const char __user 
>> *s, size_t count, unsigne
>>  int __must_check kstrtos16_from_user(const char __user *s, size_t count, 
>> unsigned int base, s16 *res);
>>  int __must_check kstrtou8_from_user(const char __user *s, size_t count, 
>> unsigned int base, u8 *res);
>>  int __must_check kstrtos8_from_user(const char __user *s, size_t count, 
>> unsigned int base, s8 *res);
>> +int __must_check kstrtobool_from_user(const char __user *s, size_t count,
>> +   unsigned int base, bool *res);
>>
>>  static inline int __must_check kstrtou64_from_user(const char __user *s, 
>> size_t count, unsigned int base, u64 *res)
>>  {
>> diff --git a/include/linux/string.h b/include/linux/string.h
>> index 9eebc66d957a..d2fb21b1081d 100644
>> --- a/include/linux/string.h
>> +++ b/include/linux/string.h
>> @@ -128,7 +128,11 @@ extern char **argv_split(gfp_t gfp, const char *str, 
>> int *argcp);
>>  extern void argv_free(char **argv);
>>
>>  extern bool sysfs_streq(const char *s1, const char *s2);
>> -extern int strtobool(const char *s, bool *res);
>> +extern int kstrtobool(const char *s, unsigned int base, bool *res);
>> +static inline int strtobool(const char *s, bool *res)
>> +{
>> + return kstrtobool(s, 0, res);
>> +}
>>
>>  #ifdef CONFIG_BINARY_PRINTF
>>  int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args);
>> diff --git a/lib/kstrtox.c b/lib/kstrtox.c
>> index 94be244e8441..e18f088704d7 100644
>> --- a/lib/kstrtox.c
>> +++ b/lib/kstrtox.c
>> @@ -321,6 +321,40 @@ int kstrtos8(const char *s, unsigned int base, s8 *res)
>>  }
>>  EXPORT_SYMBOL(kstrtos8);
>>
>> +/**
>> + * kstrtobool - convert common user inputs into boolean values
>> + * @s: input string
>> + * @base: ignored
>> + * @res: result
>> + *
>> + * This routine returns 0 iff the first character is one of 'Yy1Nn0'.
>> + * Otherwise it will return -EINVAL.  Value pointed to by res is
>> + * updated upon finding a match.
>> + */
>> +int kstrtobool(const char *s, unsigned int base, bool *res)
>> +{
>
> Being able to create the kstrtobool_from_user with a single macro
> invocation is convenient, but I don't think that justifies the ugliness
> of having an unused parameter. People reading this code or trying to use
> the interface will wonder what it's doing there, and it will generate
> slightly larger code for all the users of strtobool.
>
> So I'd just make a separate explicit definition of kstrtobool_from_user
> (the stack buffer sizing doesn't apply to the strings we want to parse
> anyway, though 11 is of course plenty).

Okay, thanks. So many things were bothering me, but I feared code
duplication would be seen as worse. I'm much happier to drop the
unused argument. :)

I'll send a v3 with all the changes.

-Kees

-- 
Kees Cook
Chrome OS & Brillo Security

[net-next PATCH 06/10] gre: Use GSO flags to determine csum need instead of GRE flags

2016-02-05 Thread Alexander Duyck

This patch updates the gre checksum path to follow something much closer to
the UDP checksum path.  By doing this we can avoid needing to do as much
header inspection and can just make use of the fields we were already
reading in the sk_buff structure.

Signed-off-by: Alexander Duyck 
---
 net/ipv4/gre_offload.c |   64 +++-
 1 file changed, 30 insertions(+), 34 deletions(-)

diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index 35a8dd35ed4e..c15441b5ff61 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -18,14 +18,14 @@
 static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
   netdev_features_t features)
 {
+   int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
struct sk_buff *segs = ERR_PTR(-EINVAL);
-   int ghl;
struct gre_base_hdr *greh;
u16 mac_offset = skb->mac_header;
-   int mac_len = skb->mac_len;
__be16 protocol = skb->protocol;
-   int tnl_hlen;
-   bool csum;
+   u16 mac_len = skb->mac_len;
+   int gre_offset, outer_hlen;
+   bool need_csum;
 
if (unlikely(skb_shinfo(skb)->gso_type &
~(SKB_GSO_TCPV4 |
@@ -42,64 +42,60 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
if (!skb->encapsulation)
goto out;
 
-   if (unlikely(!pskb_may_pull(skb, sizeof(*greh
+   if (unlikely(tnl_hlen < sizeof(struct gre_base_hdr)))
goto out;
 
-   greh = (struct gre_base_hdr *)skb_transport_header(skb);
-
-   ghl = skb_inner_mac_header(skb) - skb_transport_header(skb);
-   if (unlikely(ghl < sizeof(*greh)))
+   if (unlikely(!pskb_may_pull(skb, tnl_hlen)))
goto out;
 
-   csum = !!(greh->flags & GRE_CSUM);
-   if (csum)
-   skb->encap_hdr_csum = 1;
+   greh = (struct gre_base_hdr *)skb_transport_header(skb);
 
/* setup inner skb. */
skb->protocol = greh->protocol;
skb->encapsulation = 0;
-
-   if (unlikely(!pskb_may_pull(skb, ghl)))
-   goto out;
-
-   __skb_pull(skb, ghl);
+   __skb_pull(skb, tnl_hlen);
skb_reset_mac_header(skb);
skb_set_network_header(skb, skb_inner_network_offset(skb));
skb->mac_len = skb_inner_network_offset(skb);
 
+   need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_GRE_CSUM);
+   skb->encap_hdr_csum = need_csum;
+
features &= skb->dev->hw_enc_features;
 
/* segment inner packet. */
segs = skb_mac_gso_segment(skb, features);
if (IS_ERR_OR_NULL(segs)) {
-   skb_gso_error_unwind(skb, protocol, ghl, mac_offset, mac_len);
+   skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset,
+mac_len);
goto out;
}
 
+   outer_hlen = skb_tnl_header_len(skb);
+   gre_offset = outer_hlen - tnl_hlen;
skb = segs;
-   tnl_hlen = skb_tnl_header_len(skb);
do {
-   __skb_push(skb, ghl);
-   if (csum) {
-   __be32 *pcsum;
-
-   skb_reset_transport_header(skb);
-
-   greh = (struct gre_base_hdr *)
-   skb_transport_header(skb);
-   pcsum = (__be32 *)(greh + 1);
-   *pcsum = 0;
-   *(__sum16 *)pcsum = gso_make_checksum(skb, 0);
-   }
-   __skb_push(skb, tnl_hlen - ghl);
+   __be32 *pcsum;
 
skb_reset_inner_headers(skb);
skb->encapsulation = 1;
 
-   skb_reset_mac_header(skb);
-   skb_set_network_header(skb, mac_len);
skb->mac_len = mac_len;
skb->protocol = protocol;
+
+   __skb_push(skb, outer_hlen);
+   skb_reset_mac_header(skb);
+   skb_set_network_header(skb, mac_len);
+   skb_set_transport_header(skb, gre_offset);
+
+   if (!need_csum)
+   continue;
+
+   greh = (struct gre_base_hdr *)skb_transport_header(skb);
+   pcsum = (__be32 *)(greh + 1);
+
+   *pcsum = 0;
+   *(__sum16 *)pcsum = gso_make_checksum(skb, 0);
} while ((skb = skb->next));
 out:
return segs;

[net-next PATCH 07/10] gre: Use inner_proto to obtain inner header protocol

2016-02-05 Thread Alexander Duyck

Instead of parsing headers to determine the inner protocol we can just pull
the value from inner_proto.

Signed-off-by: Alexander Duyck 
---
 net/ipv4/gre_offload.c |6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index c15441b5ff61..003b0ebbcfdd 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -20,7 +20,6 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
 {
int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
struct sk_buff *segs = ERR_PTR(-EINVAL);
-   struct gre_base_hdr *greh;
u16 mac_offset = skb->mac_header;
__be16 protocol = skb->protocol;
u16 mac_len = skb->mac_len;
@@ -48,15 +47,13 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
if (unlikely(!pskb_may_pull(skb, tnl_hlen)))
goto out;
 
-   greh = (struct gre_base_hdr *)skb_transport_header(skb);
-
/* setup inner skb. */
-   skb->protocol = greh->protocol;
skb->encapsulation = 0;
__skb_pull(skb, tnl_hlen);
skb_reset_mac_header(skb);
skb_set_network_header(skb, skb_inner_network_offset(skb));
skb->mac_len = skb_inner_network_offset(skb);
+   skb->protocol = skb->inner_protocol;
 
need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_GRE_CSUM);
skb->encap_hdr_csum = need_csum;
@@ -75,6 +72,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
gre_offset = outer_hlen - tnl_hlen;
skb = segs;
do {
+   struct gre_base_hdr *greh;
__be32 *pcsum;
 
skb_reset_inner_headers(skb);

[net-next PATCH 02/10] net: Move GSO csum into SKB_GSO_CB

2016-02-05 Thread Alexander Duyck

This patch moves the checksum maintained by GSO out of skb->csum and into
the GSO context block in order to allow for us to work on outer checksums
while maintaining the inner checksum offsets in the case of the inner
checksum being offloaded, while the outer checksums will be computed.

While updating the code I also did a minor cleanu-up on gso_make_checksum.
The change is mostly to make it so that we store the values and compute the
checksum instead of computing the checksum and then storing the values we
needed to update.

Signed-off-by: Alexander Duyck 
---
 include/linux/skbuff.h |   14 +++---
 net/core/skbuff.c  |   16 +---
 2 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 61b8cef73296..33c3807b618a 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3549,6 +3549,7 @@ static inline struct sec_path *skb_sec_path(struct 
sk_buff *skb)
 struct skb_gso_cb {
int mac_offset;
int encap_level;
+   __wsum  csum;
__u16   csum_start;
 };
 #define SKB_SGO_CB_OFFSET  32
@@ -3585,15 +3586,14 @@ static inline int gso_pskb_expand_head(struct sk_buff 
*skb, int extra)
  */
 static inline __sum16 gso_make_checksum(struct sk_buff *skb, __wsum res)
 {
-   int plen = SKB_GSO_CB(skb)->csum_start - skb_headroom(skb) -
-  skb_transport_offset(skb);
-   __wsum partial;
+   unsigned char *csum_start = skb_transport_header(skb);
+   int plen = (skb->head + SKB_GSO_CB(skb)->csum_start) - csum_start;
+   __wsum partial = SKB_GSO_CB(skb)->csum;
 
-   partial = csum_partial(skb_transport_header(skb), plen, skb->csum);
-   skb->csum = res;
-   SKB_GSO_CB(skb)->csum_start -= plen;
+   SKB_GSO_CB(skb)->csum = res;
+   SKB_GSO_CB(skb)->csum_start = csum_start - skb->head;
 
-   return csum_fold(partial);
+   return csum_fold(csum_partial(csum_start, plen, partial));
 }
 
 static inline bool skb_is_gso(const struct sk_buff *skb)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index b2df375ec9c2..02c638a643ea 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3100,11 +3100,12 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
 
if (!sg && !nskb->remcsum_offload) {
nskb->ip_summed = CHECKSUM_NONE;
-   nskb->csum = skb_copy_and_csum_bits(head_skb, offset,
-   skb_put(nskb, len),
-   len, 0);
+   SKB_GSO_CB(nskb)->csum =
+   skb_copy_and_csum_bits(head_skb, offset,
+  skb_put(nskb, len),
+  len, 0);
SKB_GSO_CB(nskb)->csum_start =
-   skb_headroom(nskb) + doffset;
+   skb_headroom(nskb) + doffset;
continue;
}
 
@@ -3171,11 +3172,12 @@ skip_fraglist:
 
 perform_csum_check:
if (!csum && !nskb->remcsum_offload) {
-   nskb->csum = skb_checksum(nskb, doffset,
- nskb->len - doffset, 0);
nskb->ip_summed = CHECKSUM_NONE;
+   SKB_GSO_CB(nskb)->csum =
+   skb_checksum(nskb, doffset,
+nskb->len - doffset, 0);
SKB_GSO_CB(nskb)->csum_start =
-   skb_headroom(nskb) + doffset;
+   skb_headroom(nskb) + doffset;
}
} while ((offset += len) < head_skb->len);

[net-next PATCH 04/10] net: Store checksum result for offloaded GSO checksums

2016-02-05 Thread Alexander Duyck

This patch makes it so that we can offload the checksums for a packet up
to a certain point and then begin computing the checksums via software.
Setting this up is fairly straight forward as all we need to do is reset
the values stored in csum and csum_start for the GSO context block.

One complication for this is remote checksum offload.  In order to allow
the inner checksums to be offloaded while computing the outer checksum
manually we needed to have some way of indicating that the offload wasn't
real.  In order to do that I replaced CHECKSUM_PARTIAL with
CHECKSUM_UNNECESSARY in the case of us computing checksums for the outer
header while skipping computing checksums for the inner headers.  We clean
up the ip_summed flag and set it to either CHECKSUM_PARTIAL or
CHECKSUM_NONE once we hand the packet off to the next lower level.

Signed-off-by: Alexander Duyck 
---
 include/linux/skbuff.h |   15 +++
 net/ipv4/tcp_offload.c |8 ++--
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 33c3807b618a..77ebb61e2352 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2161,6 +2161,11 @@ static inline int skb_checksum_start_offset(const struct 
sk_buff *skb)
return skb->csum_start - skb_headroom(skb);
 }
 
+static inline unsigned char *skb_checksum_start(const struct sk_buff *skb)
+{
+   return skb->head + skb->csum_start;
+}
+
 static inline int skb_transport_offset(const struct sk_buff *skb)
 {
return skb_transport_header(skb) - skb->data;
@@ -3576,6 +3581,16 @@ static inline int gso_pskb_expand_head(struct sk_buff 
*skb, int extra)
return 0;
 }
 
+static inline void gso_reset_checksum(struct sk_buff *skb, __wsum res)
+{
+   /* Do not update partial checksums if remote checksum is enabled. */
+   if (skb->remcsum_offload)
+   return;
+
+   SKB_GSO_CB(skb)->csum = res;
+   SKB_GSO_CB(skb)->csum_start = skb_checksum_start(skb) - skb->head;
+}
+
 /* Compute the checksum for a gso segment. First compute the checksum value
  * from the start of transport header to SKB_GSO_CB(skb)->csum_start, and
  * then add in skb->csum (checksum from csum_start to end of packet).
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 9864a2dbadce..773083b7f1e9 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -135,7 +135,9 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
th->fin = th->psh = 0;
th->check = newcheck;
 
-   if (skb->ip_summed != CHECKSUM_PARTIAL)
+   if (skb->ip_summed == CHECKSUM_PARTIAL)
+   gso_reset_checksum(skb, ~th->check);
+   else
th->check = gso_make_checksum(skb, ~th->check);
 
seq += mss;
@@ -169,7 +171,9 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
  skb->data_len);
th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
(__force u32)delta));
-   if (skb->ip_summed != CHECKSUM_PARTIAL)
+   if (skb->ip_summed == CHECKSUM_PARTIAL)
+   gso_reset_checksum(skb, ~th->check);
+   else
th->check = gso_make_checksum(skb, ~th->check);
 out:
return segs;

[net-next PATCH 03/10] net: Update remote checksum segmentation to support use of GSO checksum

2016-02-05 Thread Alexander Duyck

This patch addresses two main issues.

First in the case of remote checksum offload we were avoiding dealing with
scatter-gather issues.  As a result it would be possible to assemble a
series of frames that used frags instead of being linearized as they should
have if remote checksum offload was enabled.

Second I have updated the code so that we now let GSO take care of doing
the checksum on the data itself and drop the special case that was added
for remote checksum offload.

Signed-off-by: Alexander Duyck 
---
 net/core/skbuff.c  |   10 ++
 net/ipv4/udp_offload.c |   22 ++
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 02c638a643ea..9c065ac72e87 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3098,8 +3098,9 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
if (nskb->len == len + doffset)
goto perform_csum_check;
 
-   if (!sg && !nskb->remcsum_offload) {
-   nskb->ip_summed = CHECKSUM_NONE;
+   if (!sg) {
+   if (!nskb->remcsum_offload)
+   nskb->ip_summed = CHECKSUM_NONE;
SKB_GSO_CB(nskb)->csum =
skb_copy_and_csum_bits(head_skb, offset,
   skb_put(nskb, len),
@@ -3171,8 +3172,9 @@ skip_fraglist:
nskb->truesize += nskb->data_len;
 
 perform_csum_check:
-   if (!csum && !nskb->remcsum_offload) {
-   nskb->ip_summed = CHECKSUM_NONE;
+   if (!csum) {
+   if (!nskb->remcsum_offload)
+   nskb->ip_summed = CHECKSUM_NONE;
SKB_GSO_CB(nskb)->csum =
skb_checksum(nskb, doffset,
 nskb->len - doffset, 0);
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index ce64c2b7ba55..86687f58d613 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -66,6 +66,16 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct 
sk_buff *skb,
 
features &= skb->dev->hw_enc_features;
 
+   /* The only checksum offload we care about from here on out is the
+* outer one so strip the existing checksum feature flags and
+* instead set the flag based on our outer checksum offload value.
+*/
+   if (remcsum) {
+   features &= ~NETIF_F_CSUM_MASK;
+   if (offload_csum)
+   features |= NETIF_F_HW_CSUM;
+   }
+
/* segment inner packet. */
segs = gso_inner_segment(skb, features);
if (IS_ERR_OR_NULL(segs)) {
@@ -116,18 +126,6 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct 
sk_buff *skb,
skb->ip_summed = CHECKSUM_PARTIAL;
skb->csum_start = skb_transport_header(skb) - skb->head;
skb->csum_offset = offsetof(struct udphdr, check);
-   } else if (remcsum) {
-   /* Need to calculate checksum from scratch,
-* inner checksums are never when doing
-* remote_checksum_offload.
-*/
-
-   skb->csum = skb_checksum(skb, udp_offset,
-skb->len - udp_offset,
-0);
-   uh->check = csum_fold(skb->csum);
-   if (uh->check == 0)
-   uh->check = CSUM_MANGLED_0;
} else {
uh->check = gso_make_checksum(skb, ~uh->check);

[net-next PATCH 05/10] net: Move skb_has_shared_frag check out of GRE code and into segmentation

2016-02-05 Thread Alexander Duyck

The call skb_has_shared_frag is used in the GRE path and skb_checksum_help
to verify that no frags can be modified by an external entity.  This check
really doesn't belong in the GRE path but in the skb_segment function
itself.  This way any protocol that might be segmented will be performing
this check before attempting to offload a checksum to software.

Signed-off-by: Alexander Duyck 
---
 net/core/skbuff.c  |5 +
 net/ipv4/gre_offload.c |   11 ---
 2 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 9c065ac72e87..88262c82b96a 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3173,6 +3173,11 @@ skip_fraglist:
 
 perform_csum_check:
if (!csum) {
+   if (skb_has_shared_frag(nskb)) {
+   err = __skb_linearize(nskb);
+   if (err)
+   goto err;
+   }
if (!nskb->remcsum_offload)
nskb->ip_summed = CHECKSUM_NONE;
SKB_GSO_CB(nskb)->csum =
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index 02cb1a416c7d..35a8dd35ed4e 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -83,17 +83,6 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
if (csum) {
__be32 *pcsum;
 
-   if (skb_has_shared_frag(skb)) {
-   int err;
-
-   err = __skb_linearize(skb);
-   if (err) {
-   kfree_skb_list(segs);
-   segs = ERR_PTR(err);
-   goto out;
-   }
-   }
-
skb_reset_transport_header(skb);
 
greh = (struct gre_base_hdr *)

[PATCH net-next V2 2/8] net: fec: fix rx error counts

2016-02-05 Thread Troy Kisky

On an overrun, the other flags are not
valid, so don't check them.

Also, don't pass bad frames up the stack.

Signed-off-by: Troy Kisky 
---
 drivers/net/ethernet/freescale/fec_main.c | 36 +--
 1 file changed, 15 insertions(+), 21 deletions(-)

diff --git a/drivers/net/ethernet/freescale/fec_main.c 
b/drivers/net/ethernet/freescale/fec_main.c
index 3e5b24a..162fa59 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -1408,37 +1408,31 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, 
u16 queue_id)
break;
pkt_received++;
 
-   /* Since we have allocated space to hold a complete frame,
-* the last indicator should be set.
-*/
-   if ((status & BD_ENET_RX_LAST) == 0)
-   netdev_err(ndev, "rcv is not +last\n");
-
writel(FEC_ENET_RXF, fep->hwp + FEC_IEVENT);
 
/* Check for errors. */
+   status ^= BD_ENET_RX_LAST;
if (status & (BD_ENET_RX_LG | BD_ENET_RX_SH | BD_ENET_RX_NO |
-  BD_ENET_RX_CR | BD_ENET_RX_OV)) {
+  BD_ENET_RX_CR | BD_ENET_RX_OV | BD_ENET_RX_LAST |
+  BD_ENET_RX_CL)) {
ndev->stats.rx_errors++;
-   if (status & (BD_ENET_RX_LG | BD_ENET_RX_SH)) {
+   if (status & BD_ENET_RX_OV) {
+   /* FIFO overrun */
+   ndev->stats.rx_fifo_errors++;
+   goto rx_processing_done;
+   }
+   if (status & (BD_ENET_RX_LG | BD_ENET_RX_SH
+   | BD_ENET_RX_LAST)) {
/* Frame too long or too short. */
ndev->stats.rx_length_errors++;
+   if (status & BD_ENET_RX_LAST)
+   netdev_err(ndev, "rcv is not +last\n");
}
-   if (status & BD_ENET_RX_NO) /* Frame alignment */
-   ndev->stats.rx_frame_errors++;
if (status & BD_ENET_RX_CR) /* CRC Error */
ndev->stats.rx_crc_errors++;
-   if (status & BD_ENET_RX_OV) /* FIFO overrun */
-   ndev->stats.rx_fifo_errors++;
-   }
-
-   /* Report late collisions as a frame error.
-* On this error, the BD is closed, but we don't know what we
-* have in the buffer.  So, just drop this frame on the floor.
-*/
-   if (status & BD_ENET_RX_CL) {
-   ndev->stats.rx_errors++;
-   ndev->stats.rx_frame_errors++;
+   /* Report late collisions as a frame error. */
+   if (status & (BD_ENET_RX_NO | BD_ENET_RX_CL))
+   ndev->stats.rx_frame_errors++;
goto rx_processing_done;
}
 
-- 
2.5.0

[PATCH net-next V2 8/8] net: fec: improve error handling

2016-02-05 Thread Troy Kisky

Unmap initial buffer on error.
Don't free skb until it has been unmapped.
Move cbd_bufaddr assignment closer to the mapping function.

Signed-off-by: Troy Kisky 
---
 drivers/net/ethernet/freescale/fec_main.c | 11 +++
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/freescale/fec_main.c 
b/drivers/net/ethernet/freescale/fec_main.c
index 97ca72a..ef18ca5 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -382,7 +382,6 @@ fec_enet_txq_submit_frag_skb(struct fec_enet_priv_tx_q *txq,
addr = dma_map_single(>pdev->dev, bufaddr, frag_len,
  DMA_TO_DEVICE);
if (dma_mapping_error(>pdev->dev, addr)) {
-   dev_kfree_skb_any(skb);
if (net_ratelimit())
netdev_err(ndev, "Tx DMA memory map failed\n");
goto dma_mapping_error;
@@ -467,8 +466,12 @@ static int fec_enet_txq_submit_skb(struct 
fec_enet_priv_tx_q *txq,
 
if (nr_frags) {
last_bdp = fec_enet_txq_submit_frag_skb(txq, skb, ndev);
-   if (IS_ERR(last_bdp))
+   if (IS_ERR(last_bdp)) {
+   dma_unmap_single(>pdev->dev, addr,
+buflen, DMA_TO_DEVICE);
+   dev_kfree_skb_any(skb);
return NETDEV_TX_OK;
+   }
} else {
status |= (BD_ENET_TX_INTR | BD_ENET_TX_LAST);
if (fep->bufdesc_ex) {
@@ -478,6 +481,8 @@ static int fec_enet_txq_submit_skb(struct 
fec_enet_priv_tx_q *txq,
estatus |= BD_ENET_TX_TS;
}
}
+   bdp->cbd_bufaddr = cpu_to_fec32(addr);
+   bdp->cbd_datlen = cpu_to_fec16(buflen);
 
if (fep->bufdesc_ex) {
 
@@ -501,8 +506,6 @@ static int fec_enet_txq_submit_skb(struct 
fec_enet_priv_tx_q *txq,
/* Save skb pointer */
txq->tx_skbuff[index] = skb;
 
-   bdp->cbd_datlen = cpu_to_fec16(buflen);
-   bdp->cbd_bufaddr = cpu_to_fec32(addr);
/* Make sure the updates to rest of the descriptor are performed before
 * transferring ownership.
 */
-- 
2.5.0

[PATCH net-next V2 4/8] net: fec: add struct bufdesc_prop

2016-02-05 Thread Troy Kisky

This reduces code and gains speed.

Signed-off-by: Troy Kisky 
---
 drivers/net/ethernet/freescale/fec.h  |  29 ++-
 drivers/net/ethernet/freescale/fec_main.c | 288 --
 2 files changed, 132 insertions(+), 185 deletions(-)

diff --git a/drivers/net/ethernet/freescale/fec.h 
b/drivers/net/ethernet/freescale/fec.h
index cc9677a..53ec04f 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -448,33 +448,34 @@ struct bufdesc_ex {
 /* Controller supports RACC register */
 #define FEC_QUIRK_HAS_RACC (1 << 12)
 
+struct bufdesc_prop {
+   int qid;
+   /* Address of Rx and Tx buffers */
+   struct bufdesc  *base;
+   struct bufdesc  *last;
+   struct bufdesc  *cur;
+   dma_addr_t  dma;
+   unsigned short ring_size;
+   unsigned char dsize;
+   unsigned char dsize_log2;
+};
+
 struct fec_enet_priv_tx_q {
-   int index;
+   struct bufdesc_prop bd;
unsigned char *tx_bounce[TX_RING_SIZE];
struct  sk_buff *tx_skbuff[TX_RING_SIZE];
 
-   dma_addr_t  bd_dma;
-   struct bufdesc  *tx_bd_base;
-   uint tx_ring_size;
-
unsigned short tx_stop_threshold;
unsigned short tx_wake_threshold;
 
-   struct bufdesc  *cur_tx;
struct bufdesc  *dirty_tx;
char *tso_hdrs;
dma_addr_t tso_hdrs_dma;
 };
 
 struct fec_enet_priv_rx_q {
-   int index;
+   struct bufdesc_prop bd;
struct  sk_buff *rx_skbuff[RX_RING_SIZE];
-
-   dma_addr_t  bd_dma;
-   struct bufdesc  *rx_bd_base;
-   uint rx_ring_size;
-
-   struct bufdesc  *cur_rx;
 };
 
 /* The FEC buffer descriptors track the ring buffers.  The rx_bd_base and
@@ -514,8 +515,6 @@ struct fec_enet_private {
unsigned long work_ts;
unsigned long work_mdio;
 
-   unsigned short bufdesc_size;
-
struct  platform_device *pdev;
 
int dev_id;
diff --git a/drivers/net/ethernet/freescale/fec_main.c 
b/drivers/net/ethernet/freescale/fec_main.c
index adbddfd..b039288 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -217,86 +217,38 @@ MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address");
 
 #define IS_TSO_HEADER(txq, addr) \
((addr >= txq->tso_hdrs_dma) && \
-   (addr < txq->tso_hdrs_dma + txq->tx_ring_size * TSO_HEADER_SIZE))
+   (addr < txq->tso_hdrs_dma + txq->bd.ring_size * TSO_HEADER_SIZE))
 
 static int mii_cnt;
 
-static inline
-struct bufdesc *fec_enet_get_nextdesc(struct bufdesc *bdp,
- struct fec_enet_private *fep,
- int queue_id)
-{
-   struct bufdesc *new_bd = bdp + 1;
-   struct bufdesc_ex *ex_new_bd = (struct bufdesc_ex *)bdp + 1;
-   struct fec_enet_priv_tx_q *txq = fep->tx_queue[queue_id];
-   struct fec_enet_priv_rx_q *rxq = fep->rx_queue[queue_id];
-   struct bufdesc_ex *ex_base;
-   struct bufdesc *base;
-   int ring_size;
-
-   if (bdp >= txq->tx_bd_base) {
-   base = txq->tx_bd_base;
-   ring_size = txq->tx_ring_size;
-   ex_base = (struct bufdesc_ex *)txq->tx_bd_base;
-   } else {
-   base = rxq->rx_bd_base;
-   ring_size = rxq->rx_ring_size;
-   ex_base = (struct bufdesc_ex *)rxq->rx_bd_base;
-   }
-
-   if (fep->bufdesc_ex)
-   return (struct bufdesc *)((ex_new_bd >= (ex_base + ring_size)) ?
-   ex_base : ex_new_bd);
-   else
-   return (new_bd >= (base + ring_size)) ?
-   base : new_bd;
-}
-
-static inline
-struct bufdesc *fec_enet_get_prevdesc(struct bufdesc *bdp,
- struct fec_enet_private *fep,
- int queue_id)
-{
-   struct bufdesc *new_bd = bdp - 1;
-   struct bufdesc_ex *ex_new_bd = (struct bufdesc_ex *)bdp - 1;
-   struct fec_enet_priv_tx_q *txq = fep->tx_queue[queue_id];
-   struct fec_enet_priv_rx_q *rxq = fep->rx_queue[queue_id];
-   struct bufdesc_ex *ex_base;
-   struct bufdesc *base;
-   int ring_size;
-
-   if (bdp >= txq->tx_bd_base) {
-   base = txq->tx_bd_base;
-   ring_size = txq->tx_ring_size;
-   ex_base = (struct bufdesc_ex *)txq->tx_bd_base;
-   } else {
-   base = rxq->rx_bd_base;
-   ring_size = rxq->rx_ring_size;
-   ex_base = (struct bufdesc_ex *)rxq->rx_bd_base;
-   }
+static struct bufdesc *fec_enet_get_nextdesc(struct bufdesc *bdp,
+struct bufdesc_prop *bd)
+{
+   return (bdp >= bd->last) ? bd->base
+   : (struct bufdesc *)(((unsigned)bdp) + bd->dsize);
+}
 
-   if (fep->bufdesc_ex)
-   return (struct bufdesc *)((ex_new_bd < ex_base) ?
-

[PATCH net-next V2 0/8] net: fec: cleanup/fixes

2016-02-05 Thread Troy Kisky


V2 is a rebase on top of johannes endian-safe patch and
is only the 1st eight patches.
The testing for this series was done on a nitrogen6x.
The base commit was
commit b45efa30a626e915192a6c548cd8642379cd47cc
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net

Testing showed no change in performance.
Testing used imx_v6_v7_defconfig + CONFIG_MICREL_PHY.
The processor was running at 996Mhz.
The following commands were used to get the transfer rates.

On an x86 ubunto system,
iperf -s -i.5 -u


On a nitrogen6x board, running via SD Card.
I first stopped some background processes

stop cron
stop upstart-file-bridge
stop upstart-socket-bridge
stop upstart-udev-bridge
stop rsyslog
stop dbus
killall dhclient
echo performance >/sys/devices/system/cpu/cpu0/cpufreq/scaling_governor

taskset 0x2 iperf -c 192.168.0.201 -u -t60 -b500M -r

There is a branch available on github with this series, and the rest of
my fec patches, for those who would like to test it.
https://github.com:boundarydevices/linux-imx6.git branch net-next_master



Troy Kisky (8):
SDCard
 TX/RX
378/405  before any patches
379/407  net: fec: stop the "rcv is not +last, " error messages
378/405  net: fec: fix rx error counts
379/396  net: fec: fix fec_enet_get_free_txdesc_num
379/403  net: fec: add struct bufdesc_prop
390/396  net: fec: add variable reg_desc_active to speed things up
388/396  net: fec: don't disable FEC_ENET_TS_TIMER interrupt
382/403  net: fec: don't transfer ownership until descriptor write is complete
378/403  net: fec: improve error handling

 drivers/net/ethernet/freescale/fec.h  |  38 ++-
 drivers/net/ethernet/freescale/fec_main.c | 396 ++
 2 files changed, 196 insertions(+), 238 deletions(-)

-- 
2.5.0

[PATCH net-next V2 7/8] net: fec: don't transfer ownership until descriptor write is complete

2016-02-05 Thread Troy Kisky

If you don't own it, you shouldn't write to it.

Signed-off-by: Troy Kisky 
---
 drivers/net/ethernet/freescale/fec_main.c | 14 +-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/freescale/fec_main.c 
b/drivers/net/ethernet/freescale/fec_main.c
index ca2708d..97ca72a 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -390,6 +390,10 @@ fec_enet_txq_submit_frag_skb(struct fec_enet_priv_tx_q 
*txq,
 
bdp->cbd_bufaddr = cpu_to_fec32(addr);
bdp->cbd_datlen = cpu_to_fec16(frag_len);
+   /* Make sure the updates to rest of the descriptor are
+* performed before transferring ownership.
+*/
+   wmb();
bdp->cbd_sc = cpu_to_fec16(status);
}
 
@@ -499,6 +503,10 @@ static int fec_enet_txq_submit_skb(struct 
fec_enet_priv_tx_q *txq,
 
bdp->cbd_datlen = cpu_to_fec16(buflen);
bdp->cbd_bufaddr = cpu_to_fec32(addr);
+   /* Make sure the updates to rest of the descriptor are performed before
+* transferring ownership.
+*/
+   wmb();
 
/* Send it on its way.  Tell FEC it's ready, interrupt when done,
 * it's the last BD of the frame, and to put the CRC on the end.
@@ -1475,7 +1483,6 @@ rx_processing_done:
 
/* Mark the buffer empty */
status |= BD_ENET_RX_EMPTY;
-   bdp->cbd_sc = cpu_to_fec16(status);
 
if (fep->bufdesc_ex) {
struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
@@ -1484,6 +1491,11 @@ rx_processing_done:
ebdp->cbd_prot = 0;
ebdp->cbd_bdu = 0;
}
+   /* Make sure the updates to rest of the descriptor are
+* performed before transferring ownership.
+*/
+   wmb();
+   bdp->cbd_sc = cpu_to_fec16(status);
 
/* Update BD pointer to next entry */
bdp = fec_enet_get_nextdesc(bdp, >bd);
-- 
2.5.0

[PATCH net-next V2 6/8] net: fec: don't disable FEC_ENET_TS_TIMER interrupt

2016-02-05 Thread Troy Kisky

Only the interrupt routine processes this condition.

Signed-off-by: Troy Kisky 
---
 drivers/net/ethernet/freescale/fec.h  | 1 +
 drivers/net/ethernet/freescale/fec_main.c | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/freescale/fec.h 
b/drivers/net/ethernet/freescale/fec.h
index bedd28a..195122e 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -375,6 +375,7 @@ struct bufdesc_ex {
 #define FEC_ENET_TS_TIMER   ((uint)0x8000)
 
 #define FEC_DEFAULT_IMASK (FEC_ENET_TXF | FEC_ENET_RXF | FEC_ENET_MII | 
FEC_ENET_TS_TIMER)
+#define FEC_NAPI_IMASK (FEC_ENET_MII | FEC_ENET_TS_TIMER)
 #define FEC_RX_DISABLED_IMASK (FEC_DEFAULT_IMASK & (~FEC_ENET_RXF))
 
 /* ENET interrupt coalescing macro define */
diff --git a/drivers/net/ethernet/freescale/fec_main.c 
b/drivers/net/ethernet/freescale/fec_main.c
index 712e3bb..ca2708d 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -1553,7 +1553,7 @@ fec_enet_interrupt(int irq, void *dev_id)
 
if (napi_schedule_prep(>napi)) {
/* Disable the NAPI interrupts */
-   writel(FEC_ENET_MII, fep->hwp + FEC_IMASK);
+   writel(FEC_NAPI_IMASK, fep->hwp + FEC_IMASK);
__napi_schedule(>napi);
}
}
-- 
2.5.0

Re: [PATCH v2 net-next 0/4] batch calls to fib_flush and arp_ifdown

2016-02-05 Thread Salam Noureddine

Forgot to mention the rtnl_lock hold time gain with these changes.

I got the following benchmark results on one of our switches.
Without this patch, deleting 1k interfaces with 100k routes in the fib held
the rtnl_lock for 13 seconds. With the patch, rtnl_lock hold time went down
to 5 seconds. The gain is even more pronounced with 512k routes in the FIB.
In this case, without the patch, rtnl_lock was held for 36 seconds and with
the patch it was held for 5.5 seconds.

On Thu, Feb 4, 2016 at 3:35 PM, Salam Noureddine  wrote:
> Added changes suggested by Julian Anastasov in version 2.
>
> fib_flush walks the whole fib in a net_namespace and is called for
> each net_device being closed or unregistered. This can be very expensive
> when dealing with 100k or more routes in the fib and removal of a lot
> of interfaces. These four patches deal with this issue by calling fib_flush
> just once for each net namespace and introduce a new function arp_ifdown_all
> that does a similar optimization for the neighbour table.
>
> The benchmark tests were run on linux-3.18.
>
> Salam Noureddine (4):
>   net: add event_list to struct net and provide utility functions
>   net: dev: add batching to net_device notifiers
>   net: core: introduce neigh_ifdown_all for all down interfaces
>   net: fib: avoid calling fib_flush for each device when doing batch
> close and unregister
>
>  include/linux/netdevice.h   |  2 ++
>  include/net/arp.h   |  1 +
>  include/net/neighbour.h |  1 +
>  include/net/net_namespace.h | 22 +
>  include/net/netns/ipv4.h|  1 +
>  net/core/dev.c  | 48 
> -
>  net/core/neighbour.c| 38 ---
>  net/core/net_namespace.c|  1 +
>  net/ipv4/arp.c  |  4 
>  net/ipv4/fib_frontend.c | 16 +--
>  10 files changed, 120 insertions(+), 14 deletions(-)
>
> --
> 1.8.1.4
>

Re: [V4.4-rc6 Regression] af_unix: Revert 'lock_interruptible' in stream receive code

2016-02-05 Thread Eric Dumazet

On Fri, 2016-02-05 at 21:44 +, Rainer Weikusat wrote:
> The present unix_stream_read_generic contains various code sequences of
> the form
> 
> err = -EDISASTER;
> if ()
>   goto out;
> 
> This has the unfortunate side effect of possibly causing the error code
> to bleed through to the final
> 
> out:
>   return copied ? : err;
> 
> and then to be wrongly returned if no data was copied because the caller
> didn't supply a data buffer, as demonstrated by the program available at
> 
> http://pad.lv/1540731
> 
> Change it such that err is only set if an error condition was detected.


Well, if you replace the traditional flow

err = -;
if (test)
 goto out;

Then please add unlikely() to at least give a hint to the compiler.

if (unlikely(test)) {
err = -XXX;
goto out;
}

And please add a 'Fixes:  ' tag for bug fixes.

Thanks.

[net-next PATCH 01/10] net: Drop unecessary enc_features variable from tunnel segmentation functions

2016-02-05 Thread Alexander Duyck

The enc_features variable isn't necessary since features isn't used
anywhere after we create enc_features so instead just use a destructive AND
on features itself and save ourselves the variable declaration.

Signed-off-by: Alexander Duyck 
---
 net/ipv4/gre_offload.c |6 +++---
 net/ipv4/udp_offload.c |6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index 5a8ee3282550..02cb1a416c7d 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -19,7 +19,6 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
   netdev_features_t features)
 {
struct sk_buff *segs = ERR_PTR(-EINVAL);
-   netdev_features_t enc_features;
int ghl;
struct gre_base_hdr *greh;
u16 mac_offset = skb->mac_header;
@@ -68,9 +67,10 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
skb_set_network_header(skb, skb_inner_network_offset(skb));
skb->mac_len = skb_inner_network_offset(skb);
 
+   features &= skb->dev->hw_enc_features;
+
/* segment inner packet. */
-   enc_features = skb->dev->hw_enc_features & features;
-   segs = skb_mac_gso_segment(skb, enc_features);
+   segs = skb_mac_gso_segment(skb, features);
if (IS_ERR_OR_NULL(segs)) {
skb_gso_error_unwind(skb, protocol, ghl, mac_offset, mac_len);
goto out;
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 4c519c1dc161..ce64c2b7ba55 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -37,7 +37,6 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct 
sk_buff *skb,
int mac_len = skb->mac_len;
int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
__be16 protocol = skb->protocol;
-   netdev_features_t enc_features;
int udp_offset, outer_hlen;
unsigned int oldlen;
bool need_csum = !!(skb_shinfo(skb)->gso_type &
@@ -65,9 +64,10 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct 
sk_buff *skb,
   (skb->dev->features & (is_ipv6 ?
NETIF_F_IPV6_CSUM : NETIF_F_IP_CSUM;
 
+   features &= skb->dev->hw_enc_features;
+
/* segment inner packet. */
-   enc_features = skb->dev->hw_enc_features & features;
-   segs = gso_inner_segment(skb, enc_features);
+   segs = gso_inner_segment(skb, features);
if (IS_ERR_OR_NULL(segs)) {
skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset,
 mac_len);

[net-next PATCH 00/10] Add GSO support for outer checksum w/ inner checksum offloads

2016-02-05 Thread Alexander Duyck

This patch series updates the existing segmentation offload code for
tunnels to make better use of existing and updated GSO checksum
computation.  This is done primarily through two mechanisms.  First we
maintain a separate checksum in the GSO context block of the sk_buff.  This
allows us to maintain two checksum values, one offloaded with values stored
in csum_start and csum_offset, and one computed and tracked in
SKB_GSO_CB(skb)->csum.  By maintaining these two values we are able to take
advantage of the same sort of math used in local checksum offload so that
we can provide both inner and outer checksums with minimal overhead.

Below is the performance for a netperf session between an ixgbe PF and VF
on the same host but in different namespaces.  As can be seen a significant
gain in performance can be had from allowing the use of Tx checksum offload
on the inner headers while performing a software offload on the outer
header computation:

 Recv   Send   Send   Utilization  Service Demand
 Socket Socket Message ElapsedSend  Recv   Send  Recv
 Size   Size   SizeTimeThroughput local remote local remote
 bytes  bytes  bytes   secs.   10^6bits/s % S   % Uus/KB us/KB

Before:
 87380  16384  16384   10.00   12844.38   9.30  -1.00  0.712 -1.00
After:
 87380  16384  16384   10.00   13216.63   6.78  -1.00  0.504 -1.000

Changes from v1:
* Dropped use of CHECKSUM_UNNECESSARY for remote checksum offload
* Left encap_hdr_csum as it will likely be needed in future for SCTP GSO
* Broke the changes out over many more patches
* Updated GRE segmentation to more closely match UDP tunnel segmentation

---

Alexander Duyck (10):
  net: Drop unecessary enc_features variable from tunnel segmentation 
functions
  net: Move GSO csum into SKB_GSO_CB
  net: Update remote checksum segmentation to support use of GSO checksum
  net: Store checksum result for offloaded GSO checksums
  net: Move skb_has_shared_frag check out of GRE code and into segmentation
  gre: Use GSO flags to determine csum need instead of GRE flags
  gre: Use inner_proto to obtain inner header protocol
  udp: Clean up the use of flags in UDP segmentation offload
  udp: Use uh->len instead of skb->len to compute checksum in segmentation
  net: Allow tunnels to use inner checksum offloads with outer checksums 
needed


 include/linux/skbuff.h |   29 +++
 net/core/skbuff.c  |   34 +++---
 net/ipv4/gre_offload.c |   85 ++--
 net/ipv4/tcp_offload.c |8 +++-
 net/ipv4/udp_offload.c |   93 +++-
 5 files changed, 127 insertions(+), 122 deletions(-)

--

[PATCH net-next V2 1/8] net: fec: stop the "rcv is not +last, " error messages

2016-02-05 Thread Troy Kisky

Setting the FTRL register will stop the fec from
trying to use multiple receive buffers.

Signed-off-by: Troy Kisky 
---
 drivers/net/ethernet/freescale/fec.h  | 1 +
 drivers/net/ethernet/freescale/fec_main.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/freescale/fec.h 
b/drivers/net/ethernet/freescale/fec.h
index 2106d72..cc9677a 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -64,6 +64,7 @@
 #define FEC_R_FIFO_RSEM0x194 /* Receive FIFO section empty 
threshold */
 #define FEC_R_FIFO_RAEM0x198 /* Receive FIFO almost empty 
threshold */
 #define FEC_R_FIFO_RAFL0x19c /* Receive FIFO almost full 
threshold */
+#define FEC_FTRL   0x1b0 /* Frame truncation receive length*/
 #define FEC_RACC   0x1c4 /* Receive Accelerator function */
 #define FEC_RCMR_1 0x1c8 /* Receive classification match ring 1 */
 #define FEC_RCMR_2 0x1cc /* Receive classification match ring 2 */
diff --git a/drivers/net/ethernet/freescale/fec_main.c 
b/drivers/net/ethernet/freescale/fec_main.c
index 41c81f6..3e5b24a 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -988,6 +988,7 @@ fec_restart(struct net_device *ndev)
val &= ~FEC_RACC_OPTIONS;
writel(val, fep->hwp + FEC_RACC);
}
+   writel(PKT_MAXBUF_SIZE, fep->hwp + FEC_FTRL);
 #endif
 
/*
-- 
2.5.0

Re: [PATCH] net: smc911x: convert pxa dma to dmaengine

2016-02-05 Thread David Miller

From: Robert Jarzmik 
Date: Fri, 05 Feb 2016 22:44:56 +0100

> Apart from Alberto who answered he cannot test it by lack of hardware, the
> others didn't answer.
> 
> So how can I move forward ? Would you want me to amend the KConfig to add a 
> "&&
> !ARCH_PXA" on the "depend" line ?

Please just keep pinging people to properly test this.

[PATCH] af_unix: Don't set err in unix_stream_read_generic unless there was an error

2016-02-05 Thread Rainer Weikusat

The present unix_stream_read_generic contains various code sequences of
the form

err = -EDISASTER;
if ()
goto out;

This has the unfortunate side effect of possibly causing the error code
to bleed through to the final

out:
return copied ? : err;

and then to be wrongly returned if no data was copied because the caller
didn't supply a data buffer, as demonstrated by the program available at

http://pad.lv/1540731

Change it such that err is only set if an error condition was detected.

Signed-off-by: Rainer Weikusat 
---

With proper subject this time (at least I hope so).

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 49d5093..138787d 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2277,13 +2277,15 @@ static int unix_stream_read_generic(struct 
unix_stream_read_state *state)
size_t size = state->size;
unsigned int last_len;
 
-   err = -EINVAL;
-   if (sk->sk_state != TCP_ESTABLISHED)
+   if (sk->sk_state != TCP_ESTABLISHED) {
+   err = -EINVAL;
goto out;
+   }
 
-   err = -EOPNOTSUPP;
-   if (flags & MSG_OOB)
+   if (flags & MSG_OOB) {
+   err = -EOPNOTSUPP;
goto out;
+   }
 
target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
timeo = sock_rcvtimeo(sk, noblock);
@@ -2329,9 +2331,11 @@ again:
goto unlock;
 
unix_state_unlock(sk);
-   err = -EAGAIN;
-   if (!timeo)
+   if (!timeo) {
+   err = -EAGAIN;
break;
+   }
+
mutex_unlock(>readlock);
 
timeo = unix_stream_data_wait(sk, timeo, last,

[PATCH] af_unix: Don't use continue to re-execute unix_stream_read_generic loop

2016-02-05 Thread Rainer Weikusat

The unix_stream_read_generic function tries to use a continue statement
to restart the receive loop after waiting for a message. This may not
work as intended as the caller might use a recvmsg call to peek at
control messages without specifying a message buffer. If this was the
case, the continue will cause the function to return without an error
and without the credential information if the function had to wait for a
message while it had returned with the credentials otherwise. Change to
using goto to restart the loop without checking the condition first in
this case so that credentials are returned either way.

Signed-off-by: Rainer Weikusat 
---
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 49d5093..3b73bd7 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2305,6 +2305,7 @@ static int unix_stream_read_generic(struct 
unix_stream_read_state *state)
bool drop_skb;
struct sk_buff *skb, *last;
 
+redo:
unix_state_lock(sk);
if (sock_flag(sk, SOCK_DEAD)) {
err = -ECONNRESET;
@@ -2344,7 +2345,7 @@ again:
}
 
mutex_lock(>readlock);
-   continue;
+   goto redo;
 unlock:
unix_state_unlock(sk);
break;

Re: [PATCH] net: smc911x: convert pxa dma to dmaengine

2016-02-05 Thread Robert Jarzmik

Robert Jarzmik  writes:

> David Miller  writes:
>
>> From: Robert Jarzmik 
>> Date: Mon, 30 Nov 2015 22:40:28 +0100
>>
>>> Convert the dma transfers to be dmaengine based, now pxa has a dmaengine
>>> slave driver. This makes this driver a bit more PXA agnostic.
>>> 
>>> The driver was only compile tested. The risk is quite small as no
>>> current PXA platform I'm aware of is using smc911x driver.
>>> 
>>> Signed-off-by: Robert Jarzmik 
>>
>> I've marked this 'deferred' in patchwork until someone tests
>> these changes and says they should be good on all platforms
>> this chip is used.
>
> Okay, so would any maintainer of non pxa boards give a feedback for this 
> patch ?
> The ones I have found are :
>  - sh2007: Guennadi and Hitoshi
>  - armadillo5x0: Alberto
>  - imx v6 and imx v7: Fabio
> I've added the patch at the end of this mail for easier handling.
>
> Now, if no maintainer gives it a test, what do we do, David ? I'm intending to
> remove "arch/arm/mach-pxa/include/mach/dma.h" in the near future, which will
> break this driver somehow (at least for PXA boards, even if none is identified
> so far).
> So could we agree on a deadline, and what you wish to do : either drop the 
> patch
> or apply, or something else.

Hi David,

Apart from Alberto who answered he cannot test it by lack of hardware, the
others didn't answer.

So how can I move forward ? Would you want me to amend the KConfig to add a "&&
!ARCH_PXA" on the "depend" line ?

Cheers.

-- 
Robert

Re: [V4.4-rc6 Regression] af_unix: Revert 'lock_interruptible' in stream receive code

2016-02-05 Thread Rainer Weikusat

The present unix_stream_read_generic contains various code sequences of
the form

err = -EDISASTER;
if ()
goto out;

This has the unfortunate side effect of possibly causing the error code
to bleed through to the final

out:
return copied ? : err;

and then to be wrongly returned if no data was copied because the caller
didn't supply a data buffer, as demonstrated by the program available at

http://pad.lv/1540731

Change it such that err is only set if an error condition was detected.

Signed-off-by: Rainer Weikusat 
---
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 49d5093..138787d 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2277,13 +2277,15 @@ static int unix_stream_read_generic(struct 
unix_stream_read_state *state)
size_t size = state->size;
unsigned int last_len;
 
-   err = -EINVAL;
-   if (sk->sk_state != TCP_ESTABLISHED)
+   if (sk->sk_state != TCP_ESTABLISHED) {
+   err = -EINVAL;
goto out;
+   }
 
-   err = -EOPNOTSUPP;
-   if (flags & MSG_OOB)
+   if (flags & MSG_OOB) {
+   err = -EOPNOTSUPP;
goto out;
+   }
 
target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
timeo = sock_rcvtimeo(sk, noblock);
@@ -2329,9 +2331,11 @@ again:
goto unlock;
 
unix_state_unlock(sk);
-   err = -EAGAIN;
-   if (!timeo)
+   if (!timeo) {
+   err = -EAGAIN;
break;
+   }
+
mutex_unlock(>readlock);
 
timeo = unix_stream_data_wait(sk, timeo, last,

Re: [V4.4-rc6 Regression] af_unix: Revert 'lock_interruptible' in stream receive code

2016-02-05 Thread Rainer Weikusat

Rainer Weikusat  writes:
> Joseph Salisbury  writes:
>> On 02/05/2016 02:59 PM, Rainer Weikusat wrote:
>
> [recvmsg w/o iovecs returning ENOTSUP for CMSG requests]

[...]

> There are more problems wrt handling control-message only reads in this
> code.

[...]

> it will return without an error but also without credentials if the

[...]

> because the following
>
> mutex_lock(>readlock);
> continue;
>
> will cause the
>
> do {
> } while (size)
>
> loop condition to be evaluated and since size is 0 (AIUI), the loop will
> terminate immediately.

As I suspected, the test program included below doesn't really receive
the credentials (tested with a 4.5.0-rc2-net w/ the previous patch
applied). As that's a minor, additional problem, I'll fix that, too.

---
#define _GNU_SOURCE

#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 

int main(void)
{
enum { server, client, size };
int socket_fd[size];
int const opt = 1;

assert(socketpair(AF_LOCAL, SOCK_STREAM, 0, socket_fd) == 0);
assert(setsockopt(socket_fd[server], SOL_SOCKET, SO_PASSCRED, , 
sizeof(opt)) != -1);

char const msg[] = "A random message";

if (fork() == 0) {
sleep(1);
send(socket_fd[client], msg, sizeof msg, MSG_DONTWAIT | MSG_NOSIGNAL);

_exit(0);
}

union {
struct cmsghdr cmh;
char control[CMSG_SPACE(sizeof(struct ucred))];
} control_un;

control_un.cmh.cmsg_len = CMSG_LEN(sizeof(struct ucred));
control_un.cmh.cmsg_level = SOL_SOCKET;
control_un.cmh.cmsg_type = SCM_CREDENTIALS;

struct msghdr msgh;
msgh.msg_name = NULL;
msgh.msg_namelen = 0;
msgh.msg_iov = NULL;
msgh.msg_iovlen = 0;
msgh.msg_control = control_un.control;
msgh.msg_controllen = sizeof(control_un.control);

if (recvmsg(socket_fd[server], , MSG_PEEK) == -1)
{
printf("Error: %s\n", strerror(errno));
exit(EXIT_FAILURE);
}
else
{
struct ucred *ucred;

printf("Success?\n");

ucred = (void *)CMSG_DATA(_un.cmh);
printf("...  pid %ld, uid %d, gid %d\n",
   (long)ucred->pid, ucred->uid, ucred->gid);
}

return 0;
}

[net-next PATCH 01/10] net: Drop unecessary enc_features variable from tunnel segmentation functions

2016-02-05 Thread Alexander Duyck

The enc_features variable isn't necessary since features isn't used
anywhere after we create enc_features so instead just use a destructive AND
on features itself and save ourselves the variable declaration.

Signed-off-by: Alexander Duyck 
---
 net/ipv4/gre_offload.c |6 +++---
 net/ipv4/udp_offload.c |6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index 5a8ee3282550..02cb1a416c7d 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -19,7 +19,6 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
   netdev_features_t features)
 {
struct sk_buff *segs = ERR_PTR(-EINVAL);
-   netdev_features_t enc_features;
int ghl;
struct gre_base_hdr *greh;
u16 mac_offset = skb->mac_header;
@@ -68,9 +67,10 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
skb_set_network_header(skb, skb_inner_network_offset(skb));
skb->mac_len = skb_inner_network_offset(skb);
 
+   features &= skb->dev->hw_enc_features;
+
/* segment inner packet. */
-   enc_features = skb->dev->hw_enc_features & features;
-   segs = skb_mac_gso_segment(skb, enc_features);
+   segs = skb_mac_gso_segment(skb, features);
if (IS_ERR_OR_NULL(segs)) {
skb_gso_error_unwind(skb, protocol, ghl, mac_offset, mac_len);
goto out;
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 4c519c1dc161..ce64c2b7ba55 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -37,7 +37,6 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct 
sk_buff *skb,
int mac_len = skb->mac_len;
int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
__be16 protocol = skb->protocol;
-   netdev_features_t enc_features;
int udp_offset, outer_hlen;
unsigned int oldlen;
bool need_csum = !!(skb_shinfo(skb)->gso_type &
@@ -65,9 +64,10 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct 
sk_buff *skb,
   (skb->dev->features & (is_ipv6 ?
NETIF_F_IPV6_CSUM : NETIF_F_IP_CSUM;
 
+   features &= skb->dev->hw_enc_features;
+
/* segment inner packet. */
-   enc_features = skb->dev->hw_enc_features & features;
-   segs = gso_inner_segment(skb, enc_features);
+   segs = gso_inner_segment(skb, features);
if (IS_ERR_OR_NULL(segs)) {
skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset,
 mac_len);

Re: [net-next PATCH 01/10] net: Drop unecessary enc_features variable from tunnel segmentation functions

2016-02-05 Thread Alexander Duyck

Please ignore this patch.  I am re-sending series with cover page.

Sorry for the noise.

- Alex

[PATCH 18/30] rapidio/rionet: add mport removal handling

2016-02-05 Thread Alexandre Bounine

Add handling of a local mport device removal.

RIONET driver registers itself as class interface that supports only
removal notification, 'add_device' callback is not provided because RIONET
network device can be initialized only after enumeration is completed and
the existing method (using remote peer addition) satisfies this condition.

Signed-off-by: Alexandre Bounine 
Cc: Matt Porter 
Cc: Aurelien Jacquiot 
Cc: Andre van Herk 
Cc: linux-ker...@vger.kernel.org
Cc: netdev@vger.kernel.org
---
 drivers/net/rionet.c |   70 +++--
 1 files changed, 44 insertions(+), 26 deletions(-)

diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c
index c15d958..9cfe6ae 100644
--- a/drivers/net/rionet.c
+++ b/drivers/net/rionet.c
@@ -676,6 +676,34 @@ static int rionet_shutdown(struct notifier_block *nb, 
unsigned long code,
return NOTIFY_DONE;
 }
 
+static void rionet_remove_mport(struct device *dev,
+   struct class_interface *class_intf)
+{
+   struct rio_mport *mport = to_rio_mport(dev);
+   struct net_device *ndev;
+   int id = mport->id;
+
+   pr_debug("%s %s\n", __func__, mport->name);
+
+   WARN(nets[id].nact, "%s called when connected to %d peers\n",
+__func__, nets[id].nact);
+   WARN(!nets[id].ndev, "%s called for mport without NDEV\n",
+__func__);
+
+   if (nets[id].ndev) {
+   ndev = nets[id].ndev;
+   netif_stop_queue(ndev);
+   unregister_netdev(ndev);
+
+   free_pages((unsigned long)nets[id].active,
+  get_order(sizeof(void *) *
+  RIO_MAX_ROUTE_ENTRIES(mport->sys_size)));
+   nets[id].active = NULL;
+   free_netdev(ndev);
+   nets[id].ndev = NULL;
+   }
+}
+
 #ifdef MODULE
 static struct rio_device_id rionet_id_table[] = {
{RIO_DEVICE(RIO_ANY_ID, RIO_ANY_ID)},
@@ -696,6 +724,13 @@ static struct notifier_block rionet_notifier = {
.notifier_call = rionet_shutdown,
 };
 
+/* the rio_mport_interface is used to handle local mport devices */
+static struct class_interface rio_mport_interface __refdata = {
+   .class = _mport_class,
+   .add_dev = NULL,
+   .remove_dev = rionet_remove_mport,
+};
+
 static int __init rionet_init(void)
 {
int ret;
@@ -706,39 +741,22 @@ static int __init rionet_init(void)
   DRV_NAME, ret);
return ret;
}
+
+   ret = class_interface_register(_mport_interface);
+   if (ret) {
+   pr_err("%s: class_interface_register error: %d\n",
+  DRV_NAME, ret);
+   return ret;
+   }
+
return subsys_interface_register(_interface);
 }
 
 static void __exit rionet_exit(void)
 {
-   struct rionet_private *rnet;
-   struct net_device *ndev;
-   struct rionet_peer *peer, *tmp;
-   int i;
-
-   for (i = 0; i < RIONET_MAX_NETS; i++) {
-   if (nets[i].ndev != NULL) {
-   ndev = nets[i].ndev;
-   rnet = netdev_priv(ndev);
-   unregister_netdev(ndev);
-
-   list_for_each_entry_safe(peer,
-tmp, [i].peers, node) {
-   list_del(>node);
-   kfree(peer);
-   }
-
-   free_pages((unsigned long)nets[i].active,
-get_order(sizeof(void *) *
-RIO_MAX_ROUTE_ENTRIES(rnet->mport->sys_size)));
-   nets[i].active = NULL;
-
-   free_netdev(ndev);
-   }
-   }
-
unregister_reboot_notifier(_notifier);
subsys_interface_unregister(_interface);
+   class_interface_unregister(_mport_interface);
 }
 
 late_initcall(rionet_init);
-- 
1.7.8.4

[PATCH 0/2] ethtool: {SG}RXFH indirection deficiency

2016-02-05 Thread Jacob Keller

This patch set adds a new ethtool operation .reset_rxfh_indir which is
used by the core ethtool stack to properly indicate to a driver that
the default RSS indirection table has been requested. Current behavior
for notifying the default settings is indistinguishable from an
explicit request. There is no easy way to look at the indirection
table and tell if it matches the default, either. To allow drivers the
ability to correctly report -EINVAL when changing the number of
channels, add the new operation suggested. I chose to use a new
ethtool op instead of an additional flag since this has a lower impact
and we already use NULL on the *indir variable in set_rxfh to indicate
no change was requested.

The second patch in the series is an example implementation of the
.reset_rxfh_indir operation along with fixes to the fm10k_set_channels
to prevent changing the number of channels if it would interfere
with the current redirection table.

Jacob Keller (2):
  ethtool: support notifying drivers when user requests default rxfh
table
  fm10k: correctly report error when changing number of channels

 drivers/net/ethernet/intel/fm10k/fm10k.h |  2 ++
 drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c | 45 
 drivers/net/ethernet/intel/fm10k/fm10k_main.c| 11 --
 include/linux/ethtool.h  |  3 ++
 net/core/ethtool.c   |  8 +
 5 files changed, 66 insertions(+), 3 deletions(-)

-- 
2.7.0.236.gda096a0.dirty

[PATCH 1/2] ethtool: support notifying drivers when user requests default rxfh table

2016-02-05 Thread Jacob Keller

Currently, userspace ethtool supports requesting the default indirection
table by passing 0 as the indir_size. However, ops->set_rxfh does not
distinguish between user requesting default via indir_size=0 and user
requesting explicitly settings which are equivalent of the default.

This causes problems because other driver changes such as number of
channels (queues) should fail when they will not work with other current
settings such as the indirection table.

If there is no way to indicate when the driver is in default RSS table
state compared to user set states, then we wouldn't ever allow changing
the number of queues at all. To fix this, drivers must be able to
distinguish between requested settings and the configured defaults.

We can't use a value of NULL to the *indir pointer as this is already
used to indicate no change to the indirection table. Instead, implement
a new callback ops->reset_rxfh_indir which is used whenever the user
requests size of zero. This has lower impact than adding a new flag and
can be implemented by drivers as necessary.

In this way we have the following scenarios now:

(a) driver is in default configuration, and is free to change RSS
settings as necessary due to other changes such as number of queues.
This makes sense since we can essentially consider this as "RSS
indirection has not been configured". This is the default state of a new
device. In this state, I think the reasonable default is "Always RSS to
all enabled queues".

(b) user has requested RSS indirection settings, should change the
driver state to "RSS indirection table has been configured". Now if the
user requests a change in the number of queues, we can properly indicate
an error when these settings would conflict with the requested RSS
indirection table.

(c) The user can request default settings via the {GS}RXFH operations
and the driver will get a clear indication that it should reset to the
default "RSS indirection table has not been configured" mode. In this
way it will be able to then change the number of queues and go about
business.

If we don't have a way to properly indicate that we've reset to default
then we are not able to implement the proposed behavior, so this patch
adds a new method to properly indicate that we have reset to the default
indirection table.

Signed-off-by: Jacob Keller 
Cc: Dave Miller 
---
This is an alternative proposal to my previous patch. I do not believe
it is possible to obtain desired behavior without this patch, as it is
not possible for the driver to distinguish default settings from user
configured RSS table. If we don't do that, then the user will never be
able to reduce the number of queues without first modifying the RSS
redirection table, which seems wrong to me.

 include/linux/ethtool.h | 3 +++
 net/core/ethtool.c  | 8 
 2 files changed, 11 insertions(+)

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 653dc9c4ebac..700ac5658d34 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -186,6 +186,8 @@ static inline u32 ethtool_rxfh_indir_default(u32 index, u32 
n_rx_rings)
  * will remain unchanged.
  * Returns a negative error code or zero. An error code must be returned
  * if at least one unsupported change was requested.
+ * @reset_rxfh_indir: Reset the contents of the RX flow hash indirection table
+ * to driver defaults. Returns a negative error code or zero.
  * @get_channels: Get number of channels.
  * @set_channels: Set number of channels.  Returns a negative error code or
  * zero.
@@ -262,6 +264,7 @@ struct ethtool_ops {
u8 *hfunc);
int (*set_rxfh)(struct net_device *, const u32 *indir,
const u8 *key, const u8 hfunc);
+   int (*reset_rxfh_indir)(struct net_device *);
void(*get_channels)(struct net_device *, struct ethtool_channels *);
int (*set_channels)(struct net_device *, struct ethtool_channels *);
int (*get_dump_flag)(struct net_device *, struct ethtool_dump *);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index daf04709dd3c..4c6a1c2b8b61 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -725,7 +725,13 @@ static noinline_for_stack int 
ethtool_set_rxfh_indir(struct net_device *dev,
if (ret)
goto out;
 
+   /* user_size == 0 means reset the indir table to default. */
if (user_size == 0) {
+   if (ops->reset_rxfh_indir) {
+   err = ops->reset_rxfh_indir(dev);
+   goto out;
+   }
+
for (i = 0; i < dev_size; i++)
indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data);
} else {
@@ -880,6 +886,8 @@ static noinline_for_stack int ethtool_set_rxfh(struct 
net_device *dev,
  rxfh.indir_size);
if (ret)

[PATCH 2/2] fm10k: correctly report error when changing number of channels

2016-02-05 Thread Jacob Keller

Previously, the fm10k driver would incorrectly allow changing the number
of combined channels when this would have altered user configured RSS
indirection table. With the new ethtool operation .reset_rxfh_indir, we
are now able to correctly handle the changes to number of channels. This
requires several changes:

(a) we must first store whether or not the RSS redirection table has
been manually configured, as there is no way to tell the default
table from an explicit request. Do this by implementing
reset_rxfh_indir ethtool op, and storing a flag to indicate how the
redirection table is set.

(b) replace the fm10k_init_reta code with a check of intialized
netdevice to instead check the new FM10K_FLAG_RETA_TABLE_CONFIGURED.
This will ensure that the table is always repopulated if we're in
the default (unconfigured) state. We still must repopulate if
somehow the reta table is changed to what will become an invalid
state due to new RSS limits. However, since this should no longer
happen, add a dev_err() call to indicate clearly to user what
happened.

(c) modify the fm10k_set_channels call to check that th new count is
within bounds on the reta table. This check is only enforced if the
user has manually configured the RSS indirection table, since the
driver should be free to repopulate its own default configuration.

Signed-off-by: Jacob Keller 
---
 drivers/net/ethernet/intel/fm10k/fm10k.h |  2 ++
 drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c | 45 
 drivers/net/ethernet/intel/fm10k/fm10k_main.c| 11 --
 3 files changed, 55 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/fm10k/fm10k.h 
b/drivers/net/ethernet/intel/fm10k/fm10k.h
index 83f386714e87..983bdda9509b 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k.h
+++ b/drivers/net/ethernet/intel/fm10k/fm10k.h
@@ -268,6 +268,7 @@ struct fm10k_intfc {
 #define FM10K_FLAG_RX_TS_ENABLED   (u32)(BIT(3))
 #define FM10K_FLAG_SWPRI_CONFIG(u32)(BIT(4))
 #define FM10K_FLAG_DEBUG_STATS (u32)(BIT(5))
+#define FM10K_FLAG_RETA_TABLE_CONFIGURED   (u32)(BIT(6))
int xcast_mode;
 
/* Tx fast path data */
@@ -475,6 +476,7 @@ netdev_tx_t fm10k_xmit_frame_ring(struct sk_buff *skb,
 void fm10k_tx_timeout_reset(struct fm10k_intfc *interface);
 bool fm10k_check_tx_hang(struct fm10k_ring *tx_ring);
 void fm10k_alloc_rx_buffers(struct fm10k_ring *rx_ring, u16 cleaned_count);
+void fm10k_init_reta(struct fm10k_intfc *interface);
 
 /* PCI */
 void fm10k_mbx_free_irq(struct fm10k_intfc *);
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c 
b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c
index 6a9f9886cb98..febfa2b009ea 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c
@@ -1060,6 +1060,8 @@ static int fm10k_set_reta(struct net_device *netdev, 
const u32 *indir)
if (!indir)
return 0;
 
+   interface->flags |= FM10K_FLAG_RETA_TABLE_CONFIGURED;
+
/* Verify user input. */
rss_i = interface->ring_feature[RING_F_RSS].indices;
for (i = fm10k_get_reta_size(netdev); i--;) {
@@ -1137,6 +1139,25 @@ static int fm10k_set_rssh(struct net_device *netdev, 
const u32 *indir,
return 0;
 }
 
+static int fm10k_reset_rssh(struct net_device *netdev)
+{
+   struct fm10k_intfc *interface = netdev_priv(netdev);
+   struct fm10k_hw *hw = >hw;
+   int i;
+
+   /* user has requested default configuration so clear configured flag */
+   interface->flags &= ~FM10K_FLAG_RETA_TABLE_CONFIGURED;
+
+   /* initialize the reta table to driver defaults */
+   fm10k_init_reta(interface);
+
+   /* write the new RETA table to hardware */
+   for (i = 0; i < FM10K_RETA_SIZE; i++)
+   fm10k_write_reg(hw, FM10K_RETA(0, i), interface->reta[i]);
+
+   return 0;
+}
+
 static unsigned int fm10k_max_channels(struct net_device *dev)
 {
struct fm10k_intfc *interface = netdev_priv(dev);
@@ -1173,6 +1194,8 @@ static int fm10k_set_channels(struct net_device *dev,
struct fm10k_intfc *interface = netdev_priv(dev);
unsigned int count = ch->combined_count;
struct fm10k_hw *hw = >hw;
+   u32 reta0, reta1, reta2, reta3;
+   int i, rss_i = 0;
 
/* verify they are not requesting separate vectors */
if (!count || ch->rx_count || ch->tx_count)
@@ -1186,6 +1209,27 @@ static int fm10k_set_channels(struct net_device *dev,
if (count > fm10k_max_channels(dev))
return -EINVAL;
 
+   /* determine the current number of queues used by the reta table */
+   for (i = FM10K_RETA_SIZE; i--;) {
+   reta0 = (interface->reta[i] << 24) >> 24;
+   reta1 = (interface->reta[i] << 16) >> 24;
+   reta2 = (interface->reta[i] <<  8) >> 24;
+

[PATCH net-next 7/8] net: ip_tunnel: remove 'csum_help' argument to iptunnel_handle_offloads

2016-02-05 Thread Edward Cree

All users now pass false, so we can remove it, and remove the code that
 was conditional upon it.

Signed-off-by: Edward Cree 
---
 drivers/net/vxlan.c |  4 ++--
 include/net/ip_tunnels.h|  3 +--
 include/net/udp_tunnel.h|  3 +--
 net/ipv4/fou.c  |  4 ++--
 net/ipv4/ip_gre.c   |  3 +--
 net/ipv4/ip_tunnel_core.c   | 18 ++
 net/ipv4/ipip.c |  2 +-
 net/ipv6/sit.c  |  4 ++--
 net/netfilter/ipvs/ip_vs_xmit.c |  6 ++
 9 files changed, 18 insertions(+), 29 deletions(-)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 7299e5f..a5c0363 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1729,7 +1729,7 @@ static int vxlan6_xmit_skb(struct dst_entry *dst, struct 
sock *sk,
goto err;
}
 
-   skb = iptunnel_handle_offloads(skb, false, type);
+   skb = iptunnel_handle_offloads(skb, type);
if (IS_ERR(skb)) {
err = -EINVAL;
goto err;
@@ -1807,7 +1807,7 @@ static int vxlan_xmit_skb(struct rtable *rt, struct sock 
*sk, struct sk_buff *sk
if (WARN_ON(!skb))
return -ENOMEM;
 
-   skb = iptunnel_handle_offloads(skb, false, type);
+   skb = iptunnel_handle_offloads(skb, type);
if (IS_ERR(skb))
return PTR_ERR(skb);
 
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 6db96ea..bc439f3 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -279,8 +279,7 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, 
struct sk_buff *skb,
 struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md,
 gfp_t flags);
 
-struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb, bool gre_csum,
-int gso_type_mask);
+struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb, int 
gso_type_mask);
 
 static inline void iptunnel_xmit_stats(struct net_device *dev, int pkt_len)
 {
diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
index 734c156..97f5adb 100644
--- a/include/net/udp_tunnel.h
+++ b/include/net/udp_tunnel.h
@@ -103,8 +103,7 @@ static inline struct sk_buff 
*udp_tunnel_handle_offloads(struct sk_buff *skb,
 {
int type = udp_csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
 
-   /* As we're a UDP tunnel, we support LCO, so don't need csum_help */
-   return iptunnel_handle_offloads(skb, false, type);
+   return iptunnel_handle_offloads(skb, type);
 }
 
 static inline void udp_tunnel_gro_complete(struct sk_buff *skb, int nhoff)
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index dac1874..88dab0c 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -787,7 +787,7 @@ int fou_build_header(struct sk_buff *skb, struct 
ip_tunnel_encap *e,
   SKB_GSO_UDP_TUNNEL;
__be16 sport;
 
-   skb = iptunnel_handle_offloads(skb, false, type);
+   skb = iptunnel_handle_offloads(skb, type);
 
if (IS_ERR(skb))
return PTR_ERR(skb);
@@ -820,7 +820,7 @@ int gue_build_header(struct sk_buff *skb, struct 
ip_tunnel_encap *e,
 
optlen += need_priv ? GUE_LEN_PRIV : 0;
 
-   skb = iptunnel_handle_offloads(skb, false, type);
+   skb = iptunnel_handle_offloads(skb, type);
 
if (IS_ERR(skb))
return PTR_ERR(skb);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 9b31532..65748db 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -503,8 +503,7 @@ static void __gre_xmit(struct sk_buff *skb, struct 
net_device *dev,
 static struct sk_buff *gre_handle_offloads(struct sk_buff *skb,
   bool csum)
 {
-   return iptunnel_handle_offloads(skb, false,
-   csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
+   return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : 
SKB_GSO_GRE);
 }
 
 static struct rtable *gre_get_rt(struct sk_buff *skb,
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index d74ce93..a6e58b6 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -148,7 +148,6 @@ struct metadata_dst *iptunnel_metadata_reply(struct 
metadata_dst *md,
 EXPORT_SYMBOL_GPL(iptunnel_metadata_reply);
 
 struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb,
-bool csum_help,
 int gso_type_mask)
 {
int err;
@@ -166,18 +165,13 @@ struct sk_buff *iptunnel_handle_offloads(struct sk_buff 
*skb,
return skb;
}
 
-   /* If packet is not gso and we are not offloading inner checksum,
-* clear encapsulation flag. This allows setting CHECKSUM_PARTIAL
-* on the outer header without confusing devices that implement
-* NETIF_F_IP_CSUM with

[PATCH net-next 4/8] net: vxlan: enable local checksum offload

2016-02-05 Thread Edward Cree

Signed-off-by: Edward Cree 
---
 drivers/net/vxlan.c | 18 ++
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 6543918..7299e5f 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1706,10 +1706,8 @@ static int vxlan6_xmit_skb(struct dst_entry *dst, struct 
sock *sk,
if (csum_start <= VXLAN_MAX_REMCSUM_START &&
!(csum_start & VXLAN_RCO_SHIFT_MASK) &&
(skb->csum_offset == offsetof(struct udphdr, check) ||
-skb->csum_offset == offsetof(struct tcphdr, check))) {
-   udp_sum = false;
+skb->csum_offset == offsetof(struct tcphdr, check)))
type |= SKB_GSO_TUNNEL_REMCSUM;
-   }
}
 
skb_scrub_packet(skb, xnet);
@@ -1731,7 +1729,7 @@ static int vxlan6_xmit_skb(struct dst_entry *dst, struct 
sock *sk,
goto err;
}
 
-   skb = iptunnel_handle_offloads(skb, udp_sum, type);
+   skb = iptunnel_handle_offloads(skb, false, type);
if (IS_ERR(skb)) {
err = -EINVAL;
goto err;
@@ -1763,8 +1761,7 @@ static int vxlan6_xmit_skb(struct dst_entry *dst, struct 
sock *sk,
skb_set_inner_protocol(skb, htons(ETH_P_TEB));
 
udp_tunnel6_xmit_skb(dst, sk, skb, dev, saddr, daddr, prio,
-ttl, src_port, dst_port,
-!!(vxflags & VXLAN_F_UDP_ZERO_CSUM6_TX));
+ttl, src_port, dst_port, !udp_sum);
return 0;
 err:
dst_release(dst);
@@ -1791,10 +1788,8 @@ static int vxlan_xmit_skb(struct rtable *rt, struct sock 
*sk, struct sk_buff *sk
if (csum_start <= VXLAN_MAX_REMCSUM_START &&
!(csum_start & VXLAN_RCO_SHIFT_MASK) &&
(skb->csum_offset == offsetof(struct udphdr, check) ||
-skb->csum_offset == offsetof(struct tcphdr, check))) {
-   udp_sum = false;
+skb->csum_offset == offsetof(struct tcphdr, check)))
type |= SKB_GSO_TUNNEL_REMCSUM;
-   }
}
 
min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
@@ -1812,7 +1807,7 @@ static int vxlan_xmit_skb(struct rtable *rt, struct sock 
*sk, struct sk_buff *sk
if (WARN_ON(!skb))
return -ENOMEM;
 
-   skb = iptunnel_handle_offloads(skb, udp_sum, type);
+   skb = iptunnel_handle_offloads(skb, false, type);
if (IS_ERR(skb))
return PTR_ERR(skb);
 
@@ -1842,8 +1837,7 @@ static int vxlan_xmit_skb(struct rtable *rt, struct sock 
*sk, struct sk_buff *sk
skb_set_inner_protocol(skb, htons(ETH_P_TEB));
 
udp_tunnel_xmit_skb(rt, sk, skb, src, dst, tos, ttl, df,
-   src_port, dst_port, xnet,
-   !(vxflags & VXLAN_F_UDP_CSUM));
+   src_port, dst_port, xnet, !udp_sum);
return 0;
 }

Re: [V4.4-rc6 Regression] af_unix: Revert 'lock_interruptible' in stream receive code

2016-02-05 Thread Joseph Salisbury

On 02/05/2016 02:59 PM, Rainer Weikusat wrote:
> Joseph Salisbury  writes:
>> Hi Rainer,
>>
>> A kernel bug report was opened against Ubuntu [0].  After a kernel
>> bisect, it was found that reverting the following commit resolved this bug:
>>
>> commit 3822b5c2fc62e3de8a0f33806ff279fb7df92432
>> Author: Rainer Weikusat 
>> Date:   Wed Dec 16 20:09:25 2015 +
>>
>> af_unix: Revert 'lock_interruptible' in stream receive code
>>
>>   
>> The regression was introduced as of v4.4-rc6.
>>
>> I was hoping to get your feedback, since you are the patch author.  Do
>> you think gathering any additional data will help diagnose this issue,
>> or would it be best to submit a revert request?
> Funny little problem :-). The code using the interruptible lock cleared
> err as side effect hence the
>
> out:
>   return copied ? : err;
>
> at the end of unix_stream_read_generic didn't return the -ENOTSUP put
> into err at the start of the function if copied was zero after the loop
> because the size of the passed data buffer was zero.
>
> The following patch should fix this:
>
> -
> diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
> index 49d5093..c3e1a08 100644
> --- a/net/unix/af_unix.c
> +++ b/net/unix/af_unix.c
> @@ -2300,6 +2300,7 @@ static int unix_stream_read_generic(struct 
> unix_stream_read_state *state)
> else
> skip = 0;
>  
> +   err = 0;
> do {
> int chunk;
> bool drop_skb;
> --
>
> I was just about to go the the supermarket to buy an apple when I
> received the mail. I didn't even compile the change above yet, however,
> I'll do so once I'm back and then submit something formal.
>
> Here's a test program which can be compiled with a C compiler:
> 
> #define _GNU_SOURCE
> 
> #include 
> #include 
> #include 
> #include 
> #include 
> #include 
> #include 
>
> int main(void)
> {
> enum { server, client, size };
> int socket_fd[size];
> int const opt = 1;
>
> assert(socketpair(AF_LOCAL, SOCK_STREAM, 0, socket_fd) == 0);
>
> char const msg[] = "A random message";
> send(socket_fd[client], msg, sizeof msg, MSG_DONTWAIT | MSG_NOSIGNAL);
>
> assert(setsockopt(socket_fd[server], SOL_SOCKET, SO_PASSCRED, , 
> sizeof(opt)) != -1);
>
> union {
> struct cmsghdr cmh;
> char control[CMSG_SPACE(sizeof(struct ucred))];
> } control_un;
>
> control_un.cmh.cmsg_len = CMSG_LEN(sizeof(struct ucred));
> control_un.cmh.cmsg_level = SOL_SOCKET;
> control_un.cmh.cmsg_type = SCM_CREDENTIALS;
>
> struct msghdr msgh;
> msgh.msg_name = NULL;
> msgh.msg_namelen = 0;
> msgh.msg_iov = NULL;
> msgh.msg_iovlen = 0;
> msgh.msg_control = control_un.control;
> msgh.msg_controllen = sizeof(control_un.control);
>
> errno = 0;
>
> if (recvmsg(socket_fd[server], , MSG_PEEK) == -1)
> {
> printf("Error: %s\n", strerror(errno));
> exit(EXIT_FAILURE);
> }
> else
> {
> printf("Success!\n");
> exit(EXIT_SUCCESS);
> }
> }
Thanks for the feedback.  Just curious, was it a green apple or a red
apple? :-)

[PATCH net-next 5/8] fou: enable LCO in FOU and GUE

2016-02-05 Thread Edward Cree

Signed-off-by: Edward Cree 
---
 net/ipv4/fou.c | 14 ++
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 976f0dc..dac1874 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -774,7 +774,6 @@ static void fou_build_udp(struct sk_buff *skb, struct 
ip_tunnel_encap *e,
uh->dest = e->dport;
uh->source = sport;
uh->len = htons(skb->len);
-   uh->check = 0;
udp_set_csum(!(e->flags & TUNNEL_ENCAP_FLAG_CSUM), skb,
 fl4->saddr, fl4->daddr, skb->len);
 
@@ -784,11 +783,11 @@ static void fou_build_udp(struct sk_buff *skb, struct 
ip_tunnel_encap *e,
 int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
 u8 *protocol, struct flowi4 *fl4)
 {
-   bool csum = !!(e->flags & TUNNEL_ENCAP_FLAG_CSUM);
-   int type = csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
+   int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM ? SKB_GSO_UDP_TUNNEL_CSUM :
+  SKB_GSO_UDP_TUNNEL;
__be16 sport;
 
-   skb = iptunnel_handle_offloads(skb, csum, type);
+   skb = iptunnel_handle_offloads(skb, false, type);
 
if (IS_ERR(skb))
return PTR_ERR(skb);
@@ -804,8 +803,8 @@ EXPORT_SYMBOL(fou_build_header);
 int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
 u8 *protocol, struct flowi4 *fl4)
 {
-   bool csum = !!(e->flags & TUNNEL_ENCAP_FLAG_CSUM);
-   int type = csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
+   int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM ? SKB_GSO_UDP_TUNNEL_CSUM :
+  SKB_GSO_UDP_TUNNEL;
struct guehdr *guehdr;
size_t hdrlen, optlen = 0;
__be16 sport;
@@ -814,7 +813,6 @@ int gue_build_header(struct sk_buff *skb, struct 
ip_tunnel_encap *e,
 
if ((e->flags & TUNNEL_ENCAP_FLAG_REMCSUM) &&
skb->ip_summed == CHECKSUM_PARTIAL) {
-   csum = false;
optlen += GUE_PLEN_REMCSUM;
type |= SKB_GSO_TUNNEL_REMCSUM;
need_priv = true;
@@ -822,7 +820,7 @@ int gue_build_header(struct sk_buff *skb, struct 
ip_tunnel_encap *e,
 
optlen += need_priv ? GUE_LEN_PRIV : 0;
 
-   skb = iptunnel_handle_offloads(skb, csum, type);
+   skb = iptunnel_handle_offloads(skb, false, type);
 
if (IS_ERR(skb))
return PTR_ERR(skb);

[PATCH net-next 6/8] net: gre: Implement LCO for GRE over IPv4

2016-02-05 Thread Edward Cree

Signed-off-by: Edward Cree 
---
 net/ipv4/ip_gre.c | 16 +---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 7c51c4e..9b31532 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -440,6 +440,17 @@ drop:
return 0;
 }
 
+static __sum16 gre_checksum(struct sk_buff *skb)
+{
+   __wsum csum;
+
+   if (skb->ip_summed == CHECKSUM_PARTIAL)
+   csum = lco_csum(skb);
+   else
+   csum = skb_checksum(skb, 0, skb->len, 0);
+   return csum_fold(csum);
+}
+
 static void build_header(struct sk_buff *skb, int hdr_len, __be16 flags,
 __be16 proto, __be32 key, __be32 seq)
 {
@@ -467,8 +478,7 @@ static void build_header(struct sk_buff *skb, int hdr_len, 
__be16 flags,
!(skb_shinfo(skb)->gso_type &
  (SKB_GSO_GRE | SKB_GSO_GRE_CSUM))) {
*ptr = 0;
-   *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0,
-skb->len, 0));
+   *(__sum16 *)ptr = gre_checksum(skb);
}
}
 }
@@ -493,7 +503,7 @@ static void __gre_xmit(struct sk_buff *skb, struct 
net_device *dev,
 static struct sk_buff *gre_handle_offloads(struct sk_buff *skb,
   bool csum)
 {
-   return iptunnel_handle_offloads(skb, csum,
+   return iptunnel_handle_offloads(skb, false,
csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
 }

[PATCH net-next 3/8] net: enable LCO for udp_tunnel_handle_offloads() users

2016-02-05 Thread Edward Cree

The only protocol affected at present is Geneve.

Signed-off-by: Edward Cree 
---
 include/net/udp_tunnel.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
index cca2ad3..734c156 100644
--- a/include/net/udp_tunnel.h
+++ b/include/net/udp_tunnel.h
@@ -103,7 +103,8 @@ static inline struct sk_buff 
*udp_tunnel_handle_offloads(struct sk_buff *skb,
 {
int type = udp_csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
 
-   return iptunnel_handle_offloads(skb, udp_csum, type);
+   /* As we're a UDP tunnel, we support LCO, so don't need csum_help */
+   return iptunnel_handle_offloads(skb, false, type);
 }
 
 static inline void udp_tunnel_gro_complete(struct sk_buff *skb, int nhoff)

[PATCH v3 1/4] lib: move strtobool to kstrtobool

2016-02-05 Thread Kees Cook

Create the kstrtobool_from_user helper and moves strtobool logic into
the new kstrtobool (matching all the other kstrto* functions). Provides
an inline wrapper for existing strtobool callers.

Signed-off-by: Kees Cook 
---
v3:
- drop needless "base" argument, rasmus
---
 include/linux/kernel.h |  2 ++
 include/linux/string.h |  6 +-
 lib/kstrtox.c  | 50 ++
 lib/string.c   | 29 -
 4 files changed, 57 insertions(+), 30 deletions(-)

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index f31638c6e873..f4fa2b29c38c 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -357,6 +357,7 @@ int __must_check kstrtou16(const char *s, unsigned int 
base, u16 *res);
 int __must_check kstrtos16(const char *s, unsigned int base, s16 *res);
 int __must_check kstrtou8(const char *s, unsigned int base, u8 *res);
 int __must_check kstrtos8(const char *s, unsigned int base, s8 *res);
+int __must_check kstrtobool(const char *s, bool *res);
 
 int __must_check kstrtoull_from_user(const char __user *s, size_t count, 
unsigned int base, unsigned long long *res);
 int __must_check kstrtoll_from_user(const char __user *s, size_t count, 
unsigned int base, long long *res);
@@ -368,6 +369,7 @@ int __must_check kstrtou16_from_user(const char __user *s, 
size_t count, unsigne
 int __must_check kstrtos16_from_user(const char __user *s, size_t count, 
unsigned int base, s16 *res);
 int __must_check kstrtou8_from_user(const char __user *s, size_t count, 
unsigned int base, u8 *res);
 int __must_check kstrtos8_from_user(const char __user *s, size_t count, 
unsigned int base, s8 *res);
+int __must_check kstrtobool_from_user(const char __user *s, size_t count, bool 
*res);
 
 static inline int __must_check kstrtou64_from_user(const char __user *s, 
size_t count, unsigned int base, u64 *res)
 {
diff --git a/include/linux/string.h b/include/linux/string.h
index 9eebc66d957a..2217224684c9 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -128,7 +128,11 @@ extern char **argv_split(gfp_t gfp, const char *str, int 
*argcp);
 extern void argv_free(char **argv);
 
 extern bool sysfs_streq(const char *s1, const char *s2);
-extern int strtobool(const char *s, bool *res);
+extern int kstrtobool(const char *s, bool *res);
+static inline int strtobool(const char *s, bool *res)
+{
+   return kstrtobool(s, res);
+}
 
 #ifdef CONFIG_BINARY_PRINTF
 int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args);
diff --git a/lib/kstrtox.c b/lib/kstrtox.c
index 94be244e8441..e8ba4a013e82 100644
--- a/lib/kstrtox.c
+++ b/lib/kstrtox.c
@@ -321,6 +321,56 @@ int kstrtos8(const char *s, unsigned int base, s8 *res)
 }
 EXPORT_SYMBOL(kstrtos8);
 
+/**
+ * kstrtobool - convert common user inputs into boolean values
+ * @s: input string
+ * @res: result
+ *
+ * This routine returns 0 iff the first character is one of 'Yy1Nn0'.
+ * Otherwise it will return -EINVAL.  Value pointed to by res is
+ * updated upon finding a match.
+ */
+int kstrtobool(const char *s, bool *res)
+{
+   if (!s)
+   return -EINVAL;
+
+   switch (s[0]) {
+   case 'y':
+   case 'Y':
+   case '1':
+   *res = true;
+   return 0;
+   case 'n':
+   case 'N':
+   case '0':
+   *res = false;
+   return 0;
+   default:
+   break;
+   }
+
+   return -EINVAL;
+}
+EXPORT_SYMBOL(kstrtobool);
+
+/*
+ * Since "base" would be a nonsense argument, this open-codes the
+ * _from_user helper instead of using the helper macro below.
+ */
+int kstrtobool_from_user(const char __user *s, size_t count, bool *res)
+{
+   /* Longest string needed to differentiate, newline, terminator */
+   char buf[4];
+
+   count = min(count, sizeof(buf) - 1);
+   if (copy_from_user(buf, s, count))
+   return -EFAULT;
+   buf[count] = '\0';
+   return kstrtobool(buf, res);
+}
+EXPORT_SYMBOL(kstrtobool_from_user);
+
 #define kstrto_from_user(f, g, type)   \
 int f(const char __user *s, size_t count, unsigned int base, type *res)
\
 {  \
diff --git a/lib/string.c b/lib/string.c
index 0323c0d5629a..1a90db9bc6e1 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -630,35 +630,6 @@ bool sysfs_streq(const char *s1, const char *s2)
 }
 EXPORT_SYMBOL(sysfs_streq);
 
-/**
- * strtobool - convert common user inputs into boolean values
- * @s: input string
- * @res: result
- *
- * This routine returns 0 iff the first character is one of 'Yy1Nn0'.
- * Otherwise it will return -EINVAL.  Value pointed to by res is
- * updated upon finding a match.
- */
-int strtobool(const char *s, bool *res)
-{
-   switch (s[0]) {
-   case 'y':
-   case 'Y':
-   case '1':
-   *res = true;
-   break;
-

Re: gigaset: memory leak in gigaset_initcshw

2016-02-05 Thread Dmitry Vyukov

On Fri, Feb 5, 2016 at 7:36 PM, Paul Bolle  wrote:
> On vr, 2016-02-05 at 17:06 +0100, Paul Bolle wrote:
>> If that would happen, then cs can be reused while the previous
>> > cs->hw.ser is not freed yet. Just a guess.
>>
>> I'll have to ponder on that a bit, sorry.
>
> This is from the hit-the-code-until-it-confesses department:
> --- a/drivers/isdn/gigaset/ser-gigaset.c
> +++ b/drivers/isdn/gigaset/ser-gigaset.c
> @@ -373,13 +373,9 @@ static void gigaset_freecshw(struct cardstate *cs)
>
>  static void gigaset_device_release(struct device *dev)
>  {
> -   struct cardstate *cs = dev_get_drvdata(dev);
> -
> -   if (!cs)
> -   return;
> +   struct ser_cardstate *scs = dev_get_drvdata(dev);
> dev_set_drvdata(dev, NULL);
> -   kfree(cs->hw.ser);
> -   cs->hw.ser = NULL;
> +   kfree(scs);
>  }
>
>  /*
> @@ -408,7 +404,7 @@ static int gigaset_initcshw(struct cardstate *cs)
> cs->hw.ser = NULL;
> return rc;
> }
> -   dev_set_drvdata(>hw.ser->dev.dev, cs);
> +   dev_set_drvdata(>hw.ser->dev.dev, scs);
>
> tasklet_init(>write_tasklet,
>  gigaset_modem_fill, (unsigned long) cs);
>
> Does that make any difference?


Nope.
Almost 500 objects leaked in less than 10 seconds:

# cat /proc/slabinfo | egrep "^kmalloc-2048"
kmalloc-20481992   2015   2520   138 : tunables00
  0 : slabdata155155  0
# cat /proc/slabinfo | egrep "^kmalloc-2048"
kmalloc-20482024   2041   2520   138 : tunables00
  0 : slabdata157157  0
# cat /proc/slabinfo | egrep "^kmalloc-2048"
kmalloc-20482061   2080   2520   138 : tunables00
  0 : slabdata160160  0
# cat /proc/slabinfo | egrep "^kmalloc-2048"
kmalloc-20482091   2119   2520   138 : tunables00
  0 : slabdata163163  0
# cat /proc/slabinfo | egrep "^kmalloc-2048"
kmalloc-20482147   2171   2520   138 : tunables00
  0 : slabdata167167  0
# cat /proc/slabinfo | egrep "^kmalloc-2048"
kmalloc-20482228   2236   2520   138 : tunables00
  0 : slabdata172172  0
# cat /proc/slabinfo | egrep "^kmalloc-2048"
kmalloc-20482261   2288   2520   138 : tunables00
  0 : slabdata176176  0
# cat /proc/slabinfo | egrep "^kmalloc-2048"
kmalloc-20482289   2301   2520   138 : tunables00
  0 : slabdata177177  0
# cat /proc/slabinfo | egrep "^kmalloc-2048"
kmalloc-20482316   2340   2520   138 : tunables00
  0 : slabdata180180  0
# cat /proc/slabinfo | egrep "^kmalloc-2048"
kmalloc-20482324   2366   2520   138 : tunables00
  0 : slabdata182182  0
# cat /proc/slabinfo | egrep "^kmalloc-2048"
kmalloc-20482356   2379   2520   138 : tunables00
  0 : slabdata183183  0
# cat /proc/slabinfo | egrep "^kmalloc-2048"
kmalloc-20482450   2509   2520   138 : tunables00
  0 : slabdata193193  0
# cat /proc/slabinfo | egrep "^kmalloc-2048"
kmalloc-20482450   2509   2520   138 : tunables00
  0 : slabdata193193  0
# cat /proc/slabinfo | egrep "^kmalloc-2048"
kmalloc-20482450   2509   2520   138 : tunables00
  0 : slabdata193193  0

[PATCH net-next 0/2] mpls: packet stats and ttl propagation config

2016-02-05 Thread Robert Shearman

This patch series implements new bits of mpls functionality: keeping
statistics on packets as they pass through the box and allowing ttl
propagation to be configured.

Robert Shearman (2):
  mpls: packet stats
  mpls: allow TTL propagation to/from IP packets to be configured

 Documentation/networking/mpls-sysctl.txt |  19 +++
 include/net/netns/mpls.h |   4 +
 net/mpls/Makefile|   1 +
 net/mpls/af_mpls.c   | 205 ---
 net/mpls/internal.h  |  93 +-
 net/mpls/mpls_iptunnel.c |  21 +++-
 net/mpls/proc.c  | 128 +++
 7 files changed, 417 insertions(+), 54 deletions(-)
 create mode 100644 net/mpls/proc.c

-- 
2.1.4

[PATCH v4 net-next 0/8] Local Checksum Offload

2016-02-05 Thread Edward Cree

Tested with VxLAN, GRE and FOU-IPIP tunnels.  Not tested with GENEVE,
 because iproute2 doesn't support enabling checksums on GENEVE tunnels.
Also tested VxLAN with IPv6 (as both inner and outer protocol).

Changes from v3:
 * Fixed inverted checksum values introduced in v3.
 * Don't mangle zero checksums in GRE.
 * Clear skb->encapsulation in iptunnel_handle_offloads when not using
   CHECKSUM_PARTIAL, lest drivers incorrectly interpret that as a request
   for inner checksum offload.

Changes from v2:
 * Added support for IPv4 GRE.
 * Split out 'always set up for checksum offload' into its own patch.
 * Removed csum_help from iptunnel_handle_offloads.
 * Rewrote LCO callers to only fold once.
 * Simplified nocheck handling.

Changes from v1:
 * Enabled support in more encapsulation protocols.
   I think it now covers everything except GRE.
 * Wrote up some documentation covering TX checksum offload, LCO and RCO.

Edward Cree (8):
  net: local checksum offload for encapsulation
  net: udp: always set up for CHECKSUM_PARTIAL offload
  net: enable LCO for udp_tunnel_handle_offloads() users
  net: vxlan: enable local checksum offload
  fou: enable LCO in FOU and GUE
  net: gre: Implement LCO for GRE over IPv4
  net: ip_tunnel: remove 'csum_help' argument to
iptunnel_handle_offloads
  Documentation/networking: add checksum-offloads.txt to explain LCO

 Documentation/networking/00-INDEX  |   2 +
 Documentation/networking/checksum-offloads.txt | 119 +
 drivers/net/vxlan.c|  18 ++--
 include/linux/skbuff.h |  26 ++
 include/net/ip_tunnels.h   |   3 +-
 include/net/udp_tunnel.h   |   2 +-
 net/ipv4/fou.c |  14 ++-
 net/ipv4/ip_gre.c  |  17 +++-
 net/ipv4/ip_tunnel_core.c  |  22 ++---
 net/ipv4/ipip.c|   2 +-
 net/ipv4/udp.c |  28 ++
 net/ipv6/ip6_checksum.c|  23 ++---
 net/ipv6/sit.c |   4 +-
 net/netfilter/ipvs/ip_vs_xmit.c|   6 +-
 14 files changed, 201 insertions(+), 85 deletions(-)
 create mode 100644 Documentation/networking/checksum-offloads.txt

[PATCH net-next 8/8] Documentation/networking: add checksum-offloads.txt to explain LCO

2016-02-05 Thread Edward Cree

Signed-off-by: Edward Cree 
---
 Documentation/networking/00-INDEX  |   2 +
 Documentation/networking/checksum-offloads.txt | 119 +
 include/linux/skbuff.h |   2 +
 3 files changed, 123 insertions(+)
 create mode 100644 Documentation/networking/checksum-offloads.txt

diff --git a/Documentation/networking/00-INDEX 
b/Documentation/networking/00-INDEX
index df27a1a..415154a 100644
--- a/Documentation/networking/00-INDEX
+++ b/Documentation/networking/00-INDEX
@@ -44,6 +44,8 @@ can.txt
- documentation on CAN protocol family.
 cdc_mbim.txt
- 3G/LTE USB modem (Mobile Broadband Interface Model)
+checksum-offloads.txt
+   - Explanation of checksum offloads; LCO, RCO
 cops.txt
- info on the COPS LocalTalk Linux driver
 cs89x0.txt
diff --git a/Documentation/networking/checksum-offloads.txt 
b/Documentation/networking/checksum-offloads.txt
new file mode 100644
index 000..de2a327
--- /dev/null
+++ b/Documentation/networking/checksum-offloads.txt
@@ -0,0 +1,119 @@
+Checksum Offloads in the Linux Networking Stack
+
+
+Introduction
+
+
+This document describes a set of techniques in the Linux networking stack
+ to take advantage of checksum offload capabilities of various NICs.
+
+The following technologies are described:
+ * TX Checksum Offload
+ * LCO: Local Checksum Offload
+ * RCO: Remote Checksum Offload
+
+Things that should be documented here but aren't yet:
+ * RX Checksum Offload
+ * CHECKSUM_UNNECESSARY conversion
+
+
+TX Checksum Offload
+===
+
+The interface for offloading a transmit checksum to a device is explained
+ in detail in comments near the top of include/linux/skbuff.h.
+In brief, it allows to request the device fill in a single ones-complement
+ checksum defined by the sk_buff fields skb->csum_start and
+ skb->csum_offset.  The device should compute the 16-bit ones-complement
+ checksum (i.e. the 'IP-style' checksum) from csum_start to the end of the
+ packet, and fill in the result at (csum_start + csum_offset).
+Because csum_offset cannot be negative, this ensures that the previous
+ value of the checksum field is included in the checksum computation, thus
+ it can be used to supply any needed corrections to the checksum (such as
+ the sum of the pseudo-header for UDP or TCP).
+This interface only allows a single checksum to be offloaded.  Where
+ encapsulation is used, the packet may have multiple checksum fields in
+ different header layers, and the rest will have to be handled by another
+ mechanism such as LCO or RCO.
+No offloading of the IP header checksum is performed; it is always done in
+ software.  This is OK because when we build the IP header, we obviously
+ have it in cache, so summing it isn't expensive.  It's also rather short.
+The requirements for GSO are more complicated, because when segmenting an
+ encapsulated packet both the inner and outer checksums may need to be
+ edited or recomputed for each resulting segment.  See the skbuff.h comment
+ (section 'E') for more details.
+
+A driver declares its offload capabilities in netdev->hw_features; see
+ Documentation/networking/netdev-features for more.  Note that a device
+ which only advertises NETIF_F_IP[V6]_CSUM must still obey the csum_start
+ and csum_offset given in the SKB; if it tries to deduce these itself in
+ hardware (as some NICs do) the driver should check that the values in the
+ SKB match those which the hardware will deduce, and if not, fall back to
+ checksumming in software instead (with skb_checksum_help or one of the
+ skb_csum_off_chk* functions as mentioned in include/linux/skbuff.h).  This
+ is a pain, but that's what you get when hardware tries to be clever.
+
+The stack should, for the most part, assume that checksum offload is
+ supported by the underlying device.  The only place that should check is
+ validate_xmit_skb(), and the functions it calls directly or indirectly.
+ That function compares the offload features requested by the SKB (which
+ may include other offloads besides TX Checksum Offload) and, if they are
+ not supported or enabled on the device (determined by netdev->features),
+ performs the corresponding offload in software.  In the case of TX
+ Checksum Offload, that means calling skb_checksum_help(skb).
+
+
+LCO: Local Checksum Offload
+===
+
+LCO is a technique for efficiently computing the outer checksum of an
+ encapsulated datagram when the inner checksum is due to be offloaded.
+The ones-complement sum of a correctly checksummed TCP or UDP packet is
+ equal to the sum of the pseudo header, because everything else gets
+ 'cancelled out' by the checksum field.  This is because the sum was
+ complemented before being written to the checksum field.
+More generally, this holds in any case where the 'IP-style' ones complement
+ checksum is used, and thus any checksum that TX Checksum Offload supports.
+That is, if we

[PATCH v3 4/4] param: convert some "on"/"off" users to strtobool

2016-02-05 Thread Kees Cook

This changes several users of manual "on"/"off" parsing to use strtobool.

Some side-effects:
- these uses will now parse y/n/1/0 meaningfully too
- the early_param uses will now bubble up parse errors

Signed-off-by: Kees Cook 
Acked-by: Heiko Carstens 
Acked-by: Michael Ellerman 
Cc: x...@kernel.org
Cc: linuxppc-...@lists.ozlabs.org
Cc: linux-s...@vger.kernel.org
---
v3:
- retain __setup return values, andy.shevchenko
- remove unused "base" argument
---
 arch/powerpc/kernel/rtasd.c  |  7 ++-
 arch/powerpc/platforms/pseries/hotplug-cpu.c | 10 ++
 arch/s390/kernel/time.c  |  8 ++--
 arch/s390/kernel/topology.c  |  7 ++-
 arch/x86/kernel/aperture_64.c| 12 ++--
 include/linux/tick.h |  2 +-
 kernel/time/hrtimer.c| 10 ++
 kernel/time/tick-sched.c | 10 ++
 8 files changed, 15 insertions(+), 51 deletions(-)

diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c
index 5a2c049c1c61..0ae5cb84d4e2 100644
--- a/arch/powerpc/kernel/rtasd.c
+++ b/arch/powerpc/kernel/rtasd.c
@@ -49,7 +49,7 @@ static unsigned int rtas_error_log_buffer_max;
 static unsigned int event_scan;
 static unsigned int rtas_event_scan_rate;
 
-static int full_rtas_msgs = 0;
+static bool full_rtas_msgs;
 
 /* Stop logging to nvram after first fatal error */
 static int logging_enabled; /* Until we initialize everything,
@@ -592,10 +592,7 @@ __setup("surveillance=", surveillance_setup);
 
 static int __init rtasmsgs_setup(char *str)
 {
-   if (strcmp(str, "on") == 0)
-   full_rtas_msgs = 1;
-   else if (strcmp(str, "off") == 0)
-   full_rtas_msgs = 0;
+   kstrtobool(str, _rtas_msgs);
 
return 1;
 }
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c 
b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 32274f72fe3f..282837a1d74b 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -47,20 +47,14 @@ static DEFINE_PER_CPU(enum cpu_state_vals, current_state) = 
CPU_STATE_OFFLINE;
 
 static enum cpu_state_vals default_offline_state = CPU_STATE_OFFLINE;
 
-static int cede_offline_enabled __read_mostly = 1;
+static bool cede_offline_enabled __read_mostly = true;
 
 /*
  * Enable/disable cede_offline when available.
  */
 static int __init setup_cede_offline(char *str)
 {
-   if (!strcmp(str, "off"))
-   cede_offline_enabled = 0;
-   else if (!strcmp(str, "on"))
-   cede_offline_enabled = 1;
-   else
-   return 0;
-   return 1;
+   return (kstrtobool(str, _offline_enabled) == 0);
 }
 
 __setup("cede_offline=", setup_cede_offline);
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 99f84ac31307..580bc7299ec3 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -1433,7 +1433,7 @@ device_initcall(etr_init_sysfs);
 /*
  * Server Time Protocol (STP) code.
  */
-static int stp_online;
+static bool stp_online;
 static struct stp_sstpi stp_info;
 static void *stp_page;
 
@@ -1444,11 +1444,7 @@ static struct timer_list stp_timer;
 
 static int __init early_parse_stp(char *p)
 {
-   if (strncmp(p, "off", 3) == 0)
-   stp_online = 0;
-   else if (strncmp(p, "on", 2) == 0)
-   stp_online = 1;
-   return 0;
+   return kstrtobool(p, _online);
 }
 early_param("stp", early_parse_stp);
 
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 40b8102fdadb..64298a867589 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -37,7 +37,7 @@ static void set_topology_timer(void);
 static void topology_work_fn(struct work_struct *work);
 static struct sysinfo_15_1_x *tl_info;
 
-static int topology_enabled = 1;
+static bool topology_enabled = true;
 static DECLARE_WORK(topology_work, topology_work_fn);
 
 /*
@@ -444,10 +444,7 @@ static const struct cpumask *cpu_book_mask(int cpu)
 
 static int __init early_parse_topology(char *p)
 {
-   if (strncmp(p, "off", 3))
-   return 0;
-   topology_enabled = 0;
-   return 0;
+   return kstrtobool(p, _enabled);
 }
 early_param("topology", early_parse_topology);
 
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 6e85f713641d..0a2bb1f62e72 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -227,19 +227,11 @@ static u32 __init search_agp_bridge(u32 *order, int 
*valid_agp)
return 0;
 }
 
-static int gart_fix_e820 __initdata = 1;
+static bool gart_fix_e820 __initdata = true;
 
 static int __init parse_gart_mem(char *p)
 {
-   if (!p)
-   return -EINVAL;
-
-   if (!strncmp(p, "off", 3))
-   gart_fix_e820 = 0;
-   else if (!strncmp(p, "on", 2))
-

[PATCH net-next 1/8] net: local checksum offload for encapsulation

2016-02-05 Thread Edward Cree

The arithmetic properties of the ones-complement checksum mean that a
 correctly checksummed inner packet, including its checksum, has a ones
 complement sum depending only on whatever value was used to initialise
 the checksum field before checksumming (in the case of TCP and UDP,
 this is the ones complement sum of the pseudo header, complemented).
Consequently, if we are going to offload the inner checksum with
 CHECKSUM_PARTIAL, we can compute the outer checksum based only on the
 packed data not covered by the inner checksum, and the initial value of
 the inner checksum field.

Signed-off-by: Edward Cree 
---
 include/linux/skbuff.h| 24 
 net/ipv4/ip_tunnel_core.c | 10 +-
 net/ipv4/udp.c| 20 ++--
 net/ipv6/ip6_checksum.c   | 14 +++---
 4 files changed, 46 insertions(+), 22 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 11f935c..3e9eb52 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3683,5 +3683,29 @@ static inline unsigned int skb_gso_network_seglen(const 
struct sk_buff *skb)
return hdr_len + skb_gso_transport_seglen(skb);
 }
 
+/* Local Checksum Offload.
+ * Compute outer checksum based on the assumption that the
+ * inner checksum will be offloaded later.
+ * Fill in outer checksum adjustment (e.g. with sum of outer
+ * pseudo-header) before calling.
+ * Also ensure that inner checksum is in linear data area.
+ */
+static inline __wsum lco_csum(struct sk_buff *skb)
+{
+   char *inner_csum_field;
+   __wsum csum;
+
+   /* Start with complement of inner checksum adjustment */
+   inner_csum_field = skb->data + skb_checksum_start_offset(skb) +
+   skb->csum_offset;
+   csum = ~csum_unfold(*(__force __sum16 *)inner_csum_field);
+   /* Add in checksum of our headers (incl. outer checksum
+* adjustment filled in by caller)
+*/
+   csum = skb_checksum(skb, 0, skb_checksum_start_offset(skb), csum);
+   /* The result is the checksum from skb->data to end of packet */
+   return csum;
+}
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_SKBUFF_H */
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 859d415..d74ce93 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -166,20 +166,20 @@ struct sk_buff *iptunnel_handle_offloads(struct sk_buff 
*skb,
return skb;
}
 
-   /* If packet is not gso and we are resolving any partial checksum,
+   /* If packet is not gso and we are not offloading inner checksum,
 * clear encapsulation flag. This allows setting CHECKSUM_PARTIAL
 * on the outer header without confusing devices that implement
 * NETIF_F_IP_CSUM with encapsulation.
 */
-   if (csum_help)
-   skb->encapsulation = 0;
-
if (skb->ip_summed == CHECKSUM_PARTIAL && csum_help) {
+   skb->encapsulation = 0;
err = skb_checksum_help(skb);
if (unlikely(err))
goto error;
-   } else if (skb->ip_summed != CHECKSUM_PARTIAL)
+   } else if (skb->ip_summed != CHECKSUM_PARTIAL) {
skb->ip_summed = CHECKSUM_NONE;
+   skb->encapsulation = 0;
+   }
 
return skb;
 error:
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index be0b218..005280d 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -848,16 +848,18 @@ void udp_set_csum(bool nocheck, struct sk_buff *skb,
 {
struct udphdr *uh = udp_hdr(skb);
 
-   if (nocheck)
+   if (nocheck) {
uh->check = 0;
-   else if (skb_is_gso(skb))
+   } else if (skb_is_gso(skb)) {
uh->check = ~udp_v4_check(len, saddr, daddr, 0);
-   else if (skb_dst(skb) && skb_dst(skb)->dev &&
-(skb_dst(skb)->dev->features &
- (NETIF_F_IP_CSUM | NETIF_F_HW_CSUM))) {
-
-   BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
-
+   } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
+   uh->check = 0;
+   uh->check = udp_v4_check(len, saddr, daddr, lco_csum(skb));
+   if (uh->check == 0)
+   uh->check = CSUM_MANGLED_0;
+   } else if (skb_dst(skb) && skb_dst(skb)->dev &&
+  (skb_dst(skb)->dev->features &
+   (NETIF_F_IP_CSUM | NETIF_F_HW_CSUM))) {
skb->ip_summed = CHECKSUM_PARTIAL;
skb->csum_start = skb_transport_header(skb) - skb->head;
skb->csum_offset = offsetof(struct udphdr, check);
@@ -865,8 +867,6 @@ void udp_set_csum(bool nocheck, struct sk_buff *skb,
} else {
__wsum csum;
 
-   BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
-
uh->check = 0;
csum = skb_checksum(skb, 0, len, 0);
uh->check = udp_v4_check(len, saddr, daddr, csum);

[PATCH v3 2/4] lib: update single-char callers of strtobool

2016-02-05 Thread Kees Cook

Some callers of strtobool were passing a pointer to unterminated strings.
In preparation of adding multi-character processing to kstrtobool, update
the callers to not pass single-character pointers, and switch to using the
new kstrtobool_from_user helper where possible.

Signed-off-by: Kees Cook 
Cc: Amitkumar Karwar 
Cc: Nishant Sarmukadam 
Cc: Kalle Valo 
Cc: Steve French 
Cc: linux-c...@vger.kernel.org
---
v3:
- drop needless buffer, andy.shevchenko
- drop unused "base" argument
---
 drivers/net/wireless/marvell/mwifiex/debugfs.c | 10 ++---
 fs/cifs/cifs_debug.c   | 56 +++---
 fs/cifs/cifs_debug.h   |  2 +-
 fs/cifs/cifsfs.c   |  6 +--
 fs/cifs/cifsglob.h |  4 +-
 5 files changed, 24 insertions(+), 54 deletions(-)

diff --git a/drivers/net/wireless/marvell/mwifiex/debugfs.c 
b/drivers/net/wireless/marvell/mwifiex/debugfs.c
index 0b9c580af988..2eff989c6d9f 100644
--- a/drivers/net/wireless/marvell/mwifiex/debugfs.c
+++ b/drivers/net/wireless/marvell/mwifiex/debugfs.c
@@ -880,14 +880,12 @@ mwifiex_reset_write(struct file *file,
 {
struct mwifiex_private *priv = file->private_data;
struct mwifiex_adapter *adapter = priv->adapter;
-   char cmd;
bool result;
+   int rc;
 
-   if (copy_from_user(, ubuf, sizeof(cmd)))
-   return -EFAULT;
-
-   if (strtobool(, ))
-   return -EINVAL;
+   rc = kstrtobool_from_user(ubuf, count, );
+   if (rc)
+   return rc;
 
if (!result)
return -EINVAL;
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 50b268483302..788e19195991 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -255,7 +255,6 @@ static const struct file_operations 
cifs_debug_data_proc_fops = {
 static ssize_t cifs_stats_proc_write(struct file *file,
const char __user *buffer, size_t count, loff_t *ppos)
 {
-   char c;
bool bv;
int rc;
struct list_head *tmp1, *tmp2, *tmp3;
@@ -263,11 +262,8 @@ static ssize_t cifs_stats_proc_write(struct file *file,
struct cifs_ses *ses;
struct cifs_tcon *tcon;
 
-   rc = get_user(c, buffer);
-   if (rc)
-   return rc;
-
-   if (strtobool(, ) == 0) {
+   rc = kstrtobool_from_user(buffer, count, );
+   if (rc == 0) {
 #ifdef CONFIG_CIFS_STATS2
atomic_set(, 0);
atomic_set(, 0);
@@ -290,6 +286,8 @@ static ssize_t cifs_stats_proc_write(struct file *file,
}
}
spin_unlock(_tcp_ses_lock);
+   } else {
+   return rc;
}
 
return count;
@@ -433,17 +431,17 @@ static int cifsFYI_proc_open(struct inode *inode, struct 
file *file)
 static ssize_t cifsFYI_proc_write(struct file *file, const char __user *buffer,
size_t count, loff_t *ppos)
 {
-   char c;
+   char c[2] = { '\0' };
bool bv;
int rc;
 
-   rc = get_user(c, buffer);
+   rc = get_user(c[0], buffer);
if (rc)
return rc;
-   if (strtobool(, ) == 0)
+   if (strtobool(c, ) == 0)
cifsFYI = bv;
-   else if ((c > '1') && (c <= '9'))
-   cifsFYI = (int) (c - '0'); /* see cifs_debug.h for meanings */
+   else if ((c[0] > '1') && (c[0] <= '9'))
+   cifsFYI = (int) (c[0] - '0'); /* see cifs_debug.h for meanings 
*/
 
return count;
 }
@@ -471,20 +469,12 @@ static int cifs_linux_ext_proc_open(struct inode *inode, 
struct file *file)
 static ssize_t cifs_linux_ext_proc_write(struct file *file,
const char __user *buffer, size_t count, loff_t *ppos)
 {
-   char c;
-   bool bv;
int rc;
 
-   rc = get_user(c, buffer);
+   rc = kstrtobool_from_user(buffer, count, );
if (rc)
return rc;
 
-   rc = strtobool(, );
-   if (rc)
-   return rc;
-
-   linuxExtEnabled = bv;
-
return count;
 }
 
@@ -511,20 +501,12 @@ static int cifs_lookup_cache_proc_open(struct inode 
*inode, struct file *file)
 static ssize_t cifs_lookup_cache_proc_write(struct file *file,
const char __user *buffer, size_t count, loff_t *ppos)
 {
-   char c;
-   bool bv;
int rc;
 
-   rc = get_user(c, buffer);
+   rc = kstrtobool_from_user(buffer, count, );
if (rc)
return rc;
 
-   rc = strtobool(, );
-   if (rc)
-   return rc;
-
-   lookupCacheEnabled = bv;
-
return count;
 }
 
@@ -551,20 +533,12 @@ static int traceSMB_proc_open(struct inode *inode, struct 
file *file)
 static ssize_t traceSMB_proc_write(struct file *file, const char __user 
*buffer,
size_t count, loff_t *ppos)
 {
-

[PATCH v3 0/4] lib: add "on" and "off" to strtobool

2016-02-05 Thread Kees Cook

This consolidates logic for handling "on"/"off" parsing for bools into the
strtobool function, by way of moving it into kstrtobool (with helpers),
and updating various callers.

v3:
- removed unused "base" argument
- fixed missing description change
- retained inverted __setup return values
- removed needless extra buffer in cifs

v2:
- moved to kstroto* style

 arch/powerpc/kernel/rtasd.c|7 --
 arch/powerpc/platforms/pseries/hotplug-cpu.c   |   10 ---
 arch/s390/kernel/time.c|8 ---
 arch/s390/kernel/topology.c|7 --
 arch/x86/kernel/aperture_64.c  |   12 
 drivers/net/wireless/marvell/mwifiex/debugfs.c |   10 +--
 fs/cifs/cifs_debug.c   |   56 +
 fs/cifs/cifs_debug.h   |2 
 fs/cifs/cifsfs.c   |6 +-
 fs/cifs/cifsglob.h |4 -
 include/linux/kernel.h |2 
 include/linux/string.h |6 +-
 include/linux/tick.h   |2 
 kernel/time/hrtimer.c  |   10 ---
 kernel/time/tick-sched.c   |   10 ---
 lib/kstrtox.c  |   64 +
 lib/string.c   |   29 ---
 17 files changed, 110 insertions(+), 135 deletions(-)

-Kees

[PATCH v3 3/4] lib: add "on"/"off" support to kstrtobool

2016-02-05 Thread Kees Cook

Add support for "on" and "off" when converting to boolean.

Signed-off-by: Kees Cook 
---
v3:
- add dropped descripion change, andy.shevchenko
---
 lib/kstrtox.c | 20 +---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/lib/kstrtox.c b/lib/kstrtox.c
index e8ba4a013e82..d8a5cf66c316 100644
--- a/lib/kstrtox.c
+++ b/lib/kstrtox.c
@@ -326,9 +326,9 @@ EXPORT_SYMBOL(kstrtos8);
  * @s: input string
  * @res: result
  *
- * This routine returns 0 iff the first character is one of 'Yy1Nn0'.
- * Otherwise it will return -EINVAL.  Value pointed to by res is
- * updated upon finding a match.
+ * This routine returns 0 iff the first character is one of 'Yy1Nn0', or
+ * [oO][NnFf] for "on" and "off". Otherwise it will return -EINVAL.  Value
+ * pointed to by res is updated upon finding a match.
  */
 int kstrtobool(const char *s, bool *res)
 {
@@ -346,6 +346,20 @@ int kstrtobool(const char *s, bool *res)
case '0':
*res = false;
return 0;
+   case 'o':
+   case 'O':
+   switch (s[1]) {
+   case 'n':
+   case 'N':
+   *res = true;
+   return 0;
+   case 'f':
+   case 'F':
+   *res = false;
+   return 0;
+   default:
+   break;
+   }
default:
break;
}
-- 
2.6.3

Re: [V4.4-rc6 Regression] af_unix: Revert 'lock_interruptible' in stream receive code

2016-02-05 Thread Rainer Weikusat

Joseph Salisbury  writes:
> On 02/05/2016 02:59 PM, Rainer Weikusat wrote:

[recvmsg w/o iovecs returning ENOTSUP for CMSG requests]

>> Funny little problem :-). The code using the interruptible lock cleared
>> err as side effect hence the
>>
>> out:
>>  return copied ? : err;
>>
>> at the end of unix_stream_read_generic didn't return the -ENOTSUP put
>> into err at the start of the function if copied was zero after the loop
>> because the size of the passed data buffer was zero.

There are more problems wrt handling control-message only reads in this
code. In particular, changing the test program as follows:

if (fork() == 0) {
sleep(1);
send(socket_fd[client], msg, sizeof msg, MSG_DONTWAIT | MSG_NOSIGNAL);

_exit(0);
}

makes the recvmsg fail with EAGAIN and judging from the code (I didn't
test this yet), it will return without an error but also without
credentials if the

err = -EAGAIN
if (!timeo)
break;

is changed to

if (!timeo) {
err = -EAGAIN;
break
}

because the following

mutex_lock(>readlock);
continue;

will cause the

do {
} while (size)

loop condition to be evaluated and since size is 0 (AIUI), the loop will
terminate immediately.

Re: [PATCH v2 2/4] lib: update single-char callers of strtobool

2016-02-05 Thread Kees Cook

On Fri, Feb 5, 2016 at 2:46 AM, David Laight  wrote:
> From: Kees Cook
>> Sent: 04 February 2016 21:01
>> Some callers of strtobool were passing a pointer to unterminated strings.
>> In preparation of adding multi-character processing to kstrtobool, update
>> the callers to not pass single-character pointers, and switch to using the
>> new kstrtobool_from_user helper where possible.
>
> Personally I think you should change the name of the function so that the
> compiler (and linker) will pick up places that have not been changed.
> Relying on people to make the required changes will cause problems.

After the single-character users were pointed out, I looked for others
and there aren't any.

> The current code (presumably) treats "no", "nyet" and "nkjkkrkjrkjterkj" as 
> false.
> Changing that behaviour will break things.

There's no change there. All three of those will still be "false".
Perhaps my changelog shouldn't say "unterminated" but rather
"character array".

> If you want to support "on" and "off", then maybe check for the supplied 
> string
> starting with the character sequences "on\0" and "off\0" (as well as any 
> others).
> This doesn't need the input string be '\0' terminated - since you match y and 
> n
> without looking at the 2nd byte.
> You'd have to be extremely unlucky to get a page fault in the 3 bytes
> following an 'o' if the caller supplied a single byte buffer.

I'd prefer to keep the switch statement as short as possible, and I
don't want to do full string compares. And as you say, even fixing the
single-byte callers seems like a needless exercise, but seeing as how
it's a net clean-up, I think it's good they way I've got the series.

-Kees

-- 
Kees Cook
Chrome OS & Brillo Security

Re: Reply: [net] bonding: use return instead of goto

2016-02-05 Thread Jarod Wilson

On Fri, Feb 05, 2016 at 09:42:24AM +0800, 张胜举 wrote:
> > On Wed, Feb 03, 2016 at 06:15:22AM +, Zhang Shengju wrote:
> > > Replace 'goto' with 'return' to remove unnecessary check at label:
> > > err_undo_flags.
> > 
> > I think you're going to have to explain how you came to the conclusion
> > that the check isn't necessary.
...
> The reason is that 'err_undo_flags' do two things for the first slave
> device:
> 1. revert bond mac address if it is set by the slave device.
> 2. revert bond device type if it's not ARPHRD_ETHER.
> 
> I think it's not necessary for the three places, they changed neither  bond
> mac address nor type. 
> it's straightforward to return directly.

I see what you're saying, and that does look to be true if you're only
adding a singular first device right now. But looking at the enslave and
release paths, I don't see anything preventing concurrent slave adds and
removes, which could mean there are situations where those checks really
are necessary. I don't actually know.

-- 
Jarod Wilson
ja...@redhat.com

1 2 >

1 - 100 of 149 matches

Mail list logo