date:20170128

[PATCH net-next v2 0/4] net: dsa: bcm_sf2: CFP support

2017-01-28 Thread Florian Fainelli

Hi all,

This patch series adds support for the Broadcom Compact Field Processor (CFP)
which is a classification and matching engine built into most Broadcom switches.

We support that using ethtool::rxnfc because it allows all known uses cases from
the users I support to work, and more importantly, it allows the selection of a
target rule index, which is later used by e.g: offloading hardware, this is an
essential feature that I could not find being supported with cls_* for instance.

Thanks!

Changes in v2:

- fixed modular builds reported by kbuild test robot


Florian Fainelli (4):
  net: dsa: Hook {get,set}_rxnfc ethtool operations
  net: dsa: bcm_sf2: Configure traffic classes to queue mapping
  net: dsa: bcm_sf2: Add CFP registers definitions
  net: dsa: bcm_sf2: Add support for ethtool::rxnfc

 drivers/net/dsa/Makefile   |   1 +
 drivers/net/dsa/bcm_sf2.c  |  23 ++
 drivers/net/dsa/bcm_sf2.h  |  17 ++
 drivers/net/dsa/bcm_sf2_cfp.c  | 613 +
 drivers/net/dsa/bcm_sf2_regs.h | 150 ++
 include/net/dsa.h  |   8 +
 net/dsa/slave.c|  26 ++
 7 files changed, 838 insertions(+)
 create mode 100644 drivers/net/dsa/bcm_sf2_cfp.c

-- 
2.9.3

[PATCH net-next v2 1/2] qed: Add infrastructure for PTP support.

2017-01-28 Thread Sudarsana Kalluru

From: Sudarsana Reddy Kalluru 

The patch adds the required qed interfaces for configuring/reading
the PTP clock on the adapter.

Signed-off-by: Sudarsana Reddy Kalluru 
---
 drivers/net/ethernet/qlogic/qed/Makefile   |   2 +-
 drivers/net/ethernet/qlogic/qed/qed.h  |   2 +
 drivers/net/ethernet/qlogic/qed/qed_l2.c   |   5 +
 drivers/net/ethernet/qlogic/qed/qed_l2.h   |   1 +
 drivers/net/ethernet/qlogic/qed/qed_main.c |  15 ++
 drivers/net/ethernet/qlogic/qed/qed_ptp.c  | 319 +
 drivers/net/ethernet/qlogic/qed/qed_ptp.h  |  47 
 drivers/net/ethernet/qlogic/qed/qed_reg_addr.h |  31 +++
 include/linux/qed/qed_eth_if.h |  22 ++
 9 files changed, 443 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/ethernet/qlogic/qed/qed_ptp.c
 create mode 100644 drivers/net/ethernet/qlogic/qed/qed_ptp.h

diff --git a/drivers/net/ethernet/qlogic/qed/Makefile 
b/drivers/net/ethernet/qlogic/qed/Makefile
index 729e437..1a7300f 100644
--- a/drivers/net/ethernet/qlogic/qed/Makefile
+++ b/drivers/net/ethernet/qlogic/qed/Makefile
@@ -2,7 +2,7 @@ obj-$(CONFIG_QED) := qed.o
 
 qed-y := qed_cxt.o qed_dev.o qed_hw.o qed_init_fw_funcs.o qed_init_ops.o \
 qed_int.o qed_main.o qed_mcp.o qed_sp_commands.o qed_spq.o qed_l2.o \
-qed_selftest.o qed_dcbx.o qed_debug.o
+qed_selftest.o qed_dcbx.o qed_debug.o qed_ptp.o
 qed-$(CONFIG_QED_SRIOV) += qed_sriov.o qed_vf.o
 qed-$(CONFIG_QED_LL2) += qed_ll2.o
 qed-$(CONFIG_QED_RDMA) += qed_roce.o
diff --git a/drivers/net/ethernet/qlogic/qed/qed.h 
b/drivers/net/ethernet/qlogic/qed/qed.h
index 1f61cf3..6557f94 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -456,6 +456,8 @@ struct qed_hwfn {
u8 dcbx_no_edpm;
u8 db_bar_no_edpm;
 
+   /* p_ptp_ptt is valid for leading HWFN only */
+   struct qed_ptt *p_ptp_ptt;
struct qed_simd_fp_handler  simd_proto_handler[64];
 
 #ifdef CONFIG_QED_SRIOV
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c 
b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index 7520eb3..df932be 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -214,6 +214,7 @@ int qed_sp_eth_vport_start(struct qed_hwfn *p_hwfn,
p_ramrod->vport_id  = abs_vport_id;
 
p_ramrod->mtu   = cpu_to_le16(p_params->mtu);
+   p_ramrod->handle_ptp_pkts   = p_params->handle_ptp_pkts;
p_ramrod->inner_vlan_removal_en = p_params->remove_inner_vlan;
p_ramrod->drop_ttl0_en  = p_params->drop_ttl0;
p_ramrod->untagged  = p_params->only_untagged;
@@ -1886,6 +1887,7 @@ static int qed_start_vport(struct qed_dev *cdev,
start.drop_ttl0 = params->drop_ttl0;
start.opaque_fid = p_hwfn->hw_info.opaque_fid;
start.concrete_fid = p_hwfn->hw_info.concrete_fid;
+   start.handle_ptp_pkts = params->handle_ptp_pkts;
start.vport_id = params->vport_id;
start.max_buffers_per_cqe = 16;
start.mtu = params->mtu;
@@ -2328,6 +2330,8 @@ static int qed_fp_cqe_completion(struct qed_dev *dev,
 extern const struct qed_eth_dcbnl_ops qed_dcbnl_ops_pass;
 #endif
 
+extern const struct qed_eth_ptp_ops qed_ptp_ops_pass;
+
 static const struct qed_eth_ops qed_eth_ops_pass = {
.common = _common_ops_pass,
 #ifdef CONFIG_QED_SRIOV
@@ -2336,6 +2340,7 @@ static int qed_fp_cqe_completion(struct qed_dev *dev,
 #ifdef CONFIG_DCB
.dcb = _dcbnl_ops_pass,
 #endif
+   .ptp = _ptp_ops_pass,
.fill_dev_info = _fill_eth_dev_info,
.register_ops = _register_eth_ops,
.check_mac = _check_mac,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.h 
b/drivers/net/ethernet/qlogic/qed/qed_l2.h
index 93cb932..e763abd 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.h
@@ -156,6 +156,7 @@ struct qed_sp_vport_start_params {
enum qed_tpa_mode tpa_mode;
bool remove_inner_vlan;
bool tx_switching;
+   bool handle_ptp_pkts;
bool only_untagged;
bool drop_ttl0;
u8 max_buffers_per_cqe;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c 
b/drivers/net/ethernet/qlogic/qed/qed_main.c
index 93eee83..592e104 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -902,6 +902,7 @@ static int qed_slowpath_start(struct qed_dev *cdev,
struct qed_mcp_drv_version drv_version;
const u8 *data = NULL;
struct qed_hwfn *hwfn;
+   struct qed_ptt *p_ptt;
int rc = -EINVAL;
 
if (qed_iov_wq_start(cdev))
@@ -916,6 +917,14 @@ static int qed_slowpath_start(struct qed_dev *cdev,
  QED_FW_FILE_NAME);
goto err;
}
+
+

[PATCH net-next v2 2/2] qede: Add driver support for PTP.

2017-01-28 Thread Sudarsana Kalluru

From: Sudarsana Reddy Kalluru 

This patch adds the driver support for,
  - Registering the ptp clock functionality with the OS.
  - Timestamping the Rx/Tx PTP packets.
  - Ethtool callbacks related to PTP.

Signed-off-by: Sudarsana Reddy Kalluru 
Signed-off-by: Yuval Mintz 
---
 drivers/net/ethernet/qlogic/Kconfig |   1 +
 drivers/net/ethernet/qlogic/qede/Makefile   |   2 +-
 drivers/net/ethernet/qlogic/qede/qede.h |   4 +
 drivers/net/ethernet/qlogic/qede/qede_ethtool.c |  10 +
 drivers/net/ethernet/qlogic/qede/qede_fp.c  |   5 +
 drivers/net/ethernet/qlogic/qede/qede_main.c|  39 ++
 drivers/net/ethernet/qlogic/qede/qede_ptp.c | 536 
 drivers/net/ethernet/qlogic/qede/qede_ptp.h |  65 +++
 8 files changed, 661 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/ethernet/qlogic/qede/qede_ptp.c
 create mode 100644 drivers/net/ethernet/qlogic/qede/qede_ptp.h

diff --git a/drivers/net/ethernet/qlogic/Kconfig 
b/drivers/net/ethernet/qlogic/Kconfig
index 3cfd105..aaa1e85 100644
--- a/drivers/net/ethernet/qlogic/Kconfig
+++ b/drivers/net/ethernet/qlogic/Kconfig
@@ -104,6 +104,7 @@ config QED_SRIOV
 config QEDE
tristate "QLogic QED 25/40/100Gb Ethernet NIC"
depends on QED
+   imply PTP_1588_CLOCK
---help---
  This enables the support for ...
 
diff --git a/drivers/net/ethernet/qlogic/qede/Makefile 
b/drivers/net/ethernet/qlogic/qede/Makefile
index 38fbee6..bc5f7c3 100644
--- a/drivers/net/ethernet/qlogic/qede/Makefile
+++ b/drivers/net/ethernet/qlogic/qede/Makefile
@@ -1,5 +1,5 @@
 obj-$(CONFIG_QEDE) := qede.o
 
-qede-y := qede_main.o qede_fp.o qede_filter.o qede_ethtool.o
+qede-y := qede_main.o qede_fp.o qede_filter.o qede_ethtool.o qede_ptp.o
 qede-$(CONFIG_DCB) += qede_dcbnl.o
 qede-$(CONFIG_QED_RDMA) += qede_roce.o
diff --git a/drivers/net/ethernet/qlogic/qede/qede.h 
b/drivers/net/ethernet/qlogic/qede/qede.h
index b423406..f2aaef2 100644
--- a/drivers/net/ethernet/qlogic/qede/qede.h
+++ b/drivers/net/ethernet/qlogic/qede/qede.h
@@ -137,6 +137,8 @@ struct qede_rdma_dev {
struct workqueue_struct *roce_wq;
 };
 
+struct qede_ptp;
+
 struct qede_dev {
struct qed_dev  *cdev;
struct net_device   *ndev;
@@ -148,8 +150,10 @@ struct qede_dev {
u32 flags;
 #define QEDE_FLAG_IS_VFBIT(0)
 #define IS_VF(edev)(!!((edev)->flags & QEDE_FLAG_IS_VF))
+#define QEDE_TX_TIMESTAMPING_ENBIT(1)
 
const struct qed_eth_ops*ops;
+   struct qede_ptp *ptp;
 
struct qed_dev_eth_info dev_info;
 #define QEDE_MAX_RSS_CNT(edev) ((edev)->dev_info.num_queues)
diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c 
b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
index baf2642..c02754d 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
@@ -39,6 +39,7 @@
 #include 
 #include 
 #include "qede.h"
+#include "qede_ptp.h"
 
 #define QEDE_RQSTAT_OFFSET(stat_name) \
 (offsetof(struct qede_rx_queue, stat_name))
@@ -940,6 +941,14 @@ static int qede_set_channels(struct net_device *dev,
return 0;
 }
 
+static int qede_get_ts_info(struct net_device *dev,
+   struct ethtool_ts_info *info)
+{
+   struct qede_dev *edev = netdev_priv(dev);
+
+   return qede_ptp_get_ts_info(edev, info);
+}
+
 static int qede_set_phys_id(struct net_device *dev,
enum ethtool_phys_id_state state)
 {
@@ -1586,6 +1595,7 @@ static int qede_get_tunable(struct net_device *dev,
.get_rxfh_key_size = qede_get_rxfh_key_size,
.get_rxfh = qede_get_rxfh,
.set_rxfh = qede_set_rxfh,
+   .get_ts_info = qede_get_ts_info,
.get_channels = qede_get_channels,
.set_channels = qede_set_channels,
.self_test = qede_self_test,
diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c 
b/drivers/net/ethernet/qlogic/qede/qede_fp.c
index 445d4d2..11e67ea 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_fp.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c
@@ -40,6 +40,7 @@
 #include 
 #include 
 #include 
+#include "qede_ptp.h"
 
 #include 
 #include "qede.h"
@@ -1277,6 +1278,7 @@ static int qede_rx_process_cqe(struct qede_dev *edev,
qede_get_rxhash(skb, fp_cqe->bitfields, fp_cqe->rss_hash);
qede_set_skb_csum(skb, csum_flag);
skb_record_rx_queue(skb, rxq->rxq_id);
+   qede_ptp_record_rx_ts(edev, cqe, skb);
 
/* SKB is prepared - pass it to stack */
qede_skb_receive(edev, fp, rxq, skb, le16_to_cpu(fp_cqe->vlan_tag));
@@ -1451,6 +1453,9 @@ netdev_tx_t qede_start_xmit(struct sk_buff *skb, struct 
net_device *ndev)
first_bd->data.bd_flags.bitfields =
1 << ETH_TX_1ST_BD_FLAGS_START_BD_SHIFT;
 
+   if

[PATCH net-next v2 0/2] qed*: Add support for PTP

2017-01-28 Thread Sudarsana Kalluru

From: Sudarsana Reddy Kalluru 

Hi David,
The patch series adds required changes for qed/qede drivers for
supporting the IEEE Precision Time Protocol (PTP).

Changes from previous versions:
---
v2: Use do_div for 64-bit divisions


Please consider applying this series to "net-next".

Thanks,
Sudarsana

Sudarsana Reddy Kalluru (2):
  qed: Add infrastructure for PTP support.
  qede: Add driver support for PTP.

 drivers/net/ethernet/qlogic/Kconfig |   1 +
 drivers/net/ethernet/qlogic/qed/Makefile|   2 +-
 drivers/net/ethernet/qlogic/qed/qed.h   |   2 +
 drivers/net/ethernet/qlogic/qed/qed_l2.c|   5 +
 drivers/net/ethernet/qlogic/qed/qed_l2.h|   1 +
 drivers/net/ethernet/qlogic/qed/qed_main.c  |  15 +
 drivers/net/ethernet/qlogic/qed/qed_ptp.c   | 319 ++
 drivers/net/ethernet/qlogic/qed/qed_ptp.h   |  47 +++
 drivers/net/ethernet/qlogic/qed/qed_reg_addr.h  |  31 ++
 drivers/net/ethernet/qlogic/qede/Makefile   |   2 +-
 drivers/net/ethernet/qlogic/qede/qede.h |   4 +
 drivers/net/ethernet/qlogic/qede/qede_ethtool.c |  10 +
 drivers/net/ethernet/qlogic/qede/qede_fp.c  |   5 +
 drivers/net/ethernet/qlogic/qede/qede_main.c|  39 ++
 drivers/net/ethernet/qlogic/qede/qede_ptp.c | 536 
 drivers/net/ethernet/qlogic/qede/qede_ptp.h |  65 +++
 include/linux/qed/qed_eth_if.h  |  22 +
 17 files changed, 1104 insertions(+), 2 deletions(-)
 create mode 100644 drivers/net/ethernet/qlogic/qed/qed_ptp.c
 create mode 100644 drivers/net/ethernet/qlogic/qed/qed_ptp.h
 create mode 100644 drivers/net/ethernet/qlogic/qede/qede_ptp.c
 create mode 100644 drivers/net/ethernet/qlogic/qede/qede_ptp.h

-- 
1.8.3.1

[PATCH stable v1 13/13] net: ethernet: aquantia: Integrate AQtion 2.5/5 GB

2017-01-28 Thread Alexander Loktionov

From: David VomLehn 

Modify the drivers/net/ethernet/{Makefile,Kconfig} file to make them a
part of the network drivers build.

Signed-off-by: Alexander Loktionov 
Signed-off-by: Dmitrii Tarakanov 
Signed-off-by: Pavel Belous 
Signed-off-by: Dmitry Bezrukov 
Signed-off-by: David M. VomLehn 
---
 drivers/net/ethernet/Kconfig  | 1 +
 drivers/net/ethernet/Makefile | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig
index 8cc7467..d467c8b 100644
--- a/drivers/net/ethernet/Kconfig
+++ b/drivers/net/ethernet/Kconfig
@@ -28,6 +28,7 @@ source "drivers/net/ethernet/amazon/Kconfig"
 source "drivers/net/ethernet/amd/Kconfig"
 source "drivers/net/ethernet/apm/Kconfig"
 source "drivers/net/ethernet/apple/Kconfig"
+source "drivers/net/ethernet/aquantia/Kconfig"
 source "drivers/net/ethernet/arc/Kconfig"
 source "drivers/net/ethernet/atheros/Kconfig"
 source "drivers/net/ethernet/aurora/Kconfig"
diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile
index a09423d..123ef8e 100644
--- a/drivers/net/ethernet/Makefile
+++ b/drivers/net/ethernet/Makefile
@@ -14,6 +14,7 @@ obj-$(CONFIG_NET_VENDOR_AMAZON) += amazon/
 obj-$(CONFIG_NET_VENDOR_AMD) += amd/
 obj-$(CONFIG_NET_XGENE) += apm/
 obj-$(CONFIG_NET_VENDOR_APPLE) += apple/
+obj-$(CONFIG_NET_VENDOR_AQUANTIA) += aquantia/
 obj-$(CONFIG_NET_VENDOR_ARC) += arc/
 obj-$(CONFIG_NET_VENDOR_ATHEROS) += atheros/
 obj-$(CONFIG_NET_VENDOR_AURORA) += aurora/
-- 
2.9.3

[PATCH stable v1 00/13] net: ethernet: aquantia: Add AQtion 2.5/5 GB NIC driver

2017-01-28 Thread Alexander Loktionov

From: David VomLehn 

This series introduces the AQtion NIC driver for the aQuantia
AQC107/AQC108 network devices.

The commit for the first patch of the upstream patchset on which this
is based is:

665e17fb5a80e09a59f71e8264420c46810d0534.

The only difference with this patchset is how Aquantia directory is
patched into the make and Kconfig files.

Signed-off-by: Alexander Loktionov 
Signed-off-by: Dmitrii Tarakanov 
Signed-off-by: Pavel Belous 
Signed-off-by: David M. VomLehn 
---
v1: Initial version. 

Applying this to version 4.9 is a priority for Aquantia as customers
running Ubuntu 16.10 in environments requiring stability are likely to
stay on this kernel version rather than upgrading to the tip. This
patchset also works with version 4.4. We would appreciate it if it could
be applied there, as well, for customers running older distros.

David VomLehn (13):
  net: ethernet: aquantia: Make and configuration files.
  net: ethernet: aquantia: Common functions and definitions
  net: ethernet: aquantia: Add ring support code
  net: ethernet: aquantia: Low-level hardware interfaces
  net: ethernet: aquantia: Support for NIC-specific code
  net: ethernet: aquantia: Atlantic A0 and B0 specific functions.
  net: ethernet: aquantia: Vector operations
  net: ethernet: aquantia: PCI operations
  net: ethernet: aquantia: Atlantic hardware abstraction layer
  net: ethernet: aquantia: Hardware interface and utility functions
  net: ethernet: aquantia: Ethtool support
  net: ethernet: aquantia: Receive side scaling
  net: ethernet: aquantia: Integrate AQtion 2.5/5 GB

 drivers/net/ethernet/Kconfig   |1 +
 drivers/net/ethernet/Makefile  |1 +
 drivers/net/ethernet/aquantia/Kconfig  |   24 +
 drivers/net/ethernet/aquantia/Makefile |5 +
 drivers/net/ethernet/aquantia/atlantic/Makefile|   42 +
 drivers/net/ethernet/aquantia/atlantic/aq_cfg.h|   77 +
 drivers/net/ethernet/aquantia/atlantic/aq_common.h |   23 +
 .../net/ethernet/aquantia/atlantic/aq_ethtool.c|  261 +++
 .../net/ethernet/aquantia/atlantic/aq_ethtool.h|   19 +
 drivers/net/ethernet/aquantia/atlantic/aq_hw.h |  177 ++
 .../net/ethernet/aquantia/atlantic/aq_hw_utils.c   |   68 +
 .../net/ethernet/aquantia/atlantic/aq_hw_utils.h   |   47 +
 drivers/net/ethernet/aquantia/atlantic/aq_main.c   |  273 +++
 drivers/net/ethernet/aquantia/atlantic/aq_main.h   |   17 +
 drivers/net/ethernet/aquantia/atlantic/aq_nic.c|  937 
 drivers/net/ethernet/aquantia/atlantic/aq_nic.h|  108 +
 .../ethernet/aquantia/atlantic/aq_nic_internal.h   |   46 +
 .../net/ethernet/aquantia/atlantic/aq_pci_func.c   |  345 +++
 .../net/ethernet/aquantia/atlantic/aq_pci_func.h   |   34 +
 drivers/net/ethernet/aquantia/atlantic/aq_ring.c   |  376 
 drivers/net/ethernet/aquantia/atlantic/aq_ring.h   |  157 ++
 drivers/net/ethernet/aquantia/atlantic/aq_rss.h|   26 +
 drivers/net/ethernet/aquantia/atlantic/aq_utils.h  |   50 +
 drivers/net/ethernet/aquantia/atlantic/aq_vec.c|  392 
 drivers/net/ethernet/aquantia/atlantic/aq_vec.h|   42 +
 .../ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c  |  905 
 .../ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.h  |   34 +
 .../aquantia/atlantic/hw_atl/hw_atl_a0_internal.h  |  155 ++
 .../ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c  |  958 
 .../ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h  |   34 +
 .../aquantia/atlantic/hw_atl/hw_atl_b0_internal.h  |  207 ++
 .../ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c | 1394 
 .../ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h |  677 ++
 .../aquantia/atlantic/hw_atl/hw_atl_llh_internal.h | 2375 
 .../aquantia/atlantic/hw_atl/hw_atl_utils.c|  570 +
 .../aquantia/atlantic/hw_atl/hw_atl_utils.h|  210 ++
 drivers/net/ethernet/aquantia/atlantic/ver.h   |   18 +
 37 files changed, 11085 insertions(+)
 create mode 100644 drivers/net/ethernet/aquantia/Kconfig
 create mode 100644 drivers/net/ethernet/aquantia/Makefile
 create mode 100644 drivers/net/ethernet/aquantia/atlantic/Makefile
 create mode 100644 drivers/net/ethernet/aquantia/atlantic/aq_cfg.h
 create mode 100644 drivers/net/ethernet/aquantia/atlantic/aq_common.h
 create mode 100644 drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
 create mode 100644 drivers/net/ethernet/aquantia/atlantic/aq_ethtool.h
 create mode 100644 drivers/net/ethernet/aquantia/atlantic/aq_hw.h
 create mode 100644 drivers/net/ethernet/aquantia/atlantic/aq_hw_utils.c
 create mode 100644 drivers/net/ethernet/aquantia/atlantic/aq_hw_utils.h
 create mode 100644 drivers/net/ethernet/aquantia/atlantic/aq_main.c
 create mode 100644 drivers/net/ethernet/aquantia/atlantic/aq_main.h
 create mode 100644 drivers/net/ethernet/aquantia/atlantic/aq_nic.c
 create mode

[PATCH stable v1 05/13] net: ethernet: aquantia: Support for NIC-specific code

2017-01-28 Thread Alexander Loktionov

From: David VomLehn 

Add support for code specific to the Atlantic NIC.

Signed-off-by: Alexander Loktionov 
Signed-off-by: Dmitrii Tarakanov 
Signed-off-by: Pavel Belous 
Signed-off-by: Dmitry Bezrukov 
Signed-off-by: David M. VomLehn 
---
 drivers/net/ethernet/aquantia/atlantic/aq_main.c   | 273 ++
 drivers/net/ethernet/aquantia/atlantic/aq_main.h   |  17 +
 drivers/net/ethernet/aquantia/atlantic/aq_nic.c| 937 +
 drivers/net/ethernet/aquantia/atlantic/aq_nic.h| 108 +++
 .../ethernet/aquantia/atlantic/aq_nic_internal.h   |  46 +
 5 files changed, 1381 insertions(+)
 create mode 100644 drivers/net/ethernet/aquantia/atlantic/aq_main.c
 create mode 100644 drivers/net/ethernet/aquantia/atlantic/aq_main.h
 create mode 100644 drivers/net/ethernet/aquantia/atlantic/aq_nic.c
 create mode 100644 drivers/net/ethernet/aquantia/atlantic/aq_nic.h
 create mode 100644 drivers/net/ethernet/aquantia/atlantic/aq_nic_internal.h

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_main.c 
b/drivers/net/ethernet/aquantia/atlantic/aq_main.c
new file mode 100644
index 000..c17c70a
--- /dev/null
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_main.c
@@ -0,0 +1,273 @@
+/*
+ * aQuantia Corporation Network Driver
+ * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ */
+
+/* File aq_main.c: Main file for aQuantia Linux driver. */
+
+#include "aq_main.h"
+#include "aq_nic.h"
+#include "aq_pci_func.h"
+#include "aq_ethtool.h"
+#include "hw_atl/hw_atl_a0.h"
+#include "hw_atl/hw_atl_b0.h"
+
+#include 
+#include 
+
+static const struct pci_device_id aq_pci_tbl[] = {
+   { PCI_VDEVICE(AQUANTIA, HW_ATL_DEVICE_ID_0001), },
+   { PCI_VDEVICE(AQUANTIA, HW_ATL_DEVICE_ID_D100), },
+   { PCI_VDEVICE(AQUANTIA, HW_ATL_DEVICE_ID_D107), },
+   { PCI_VDEVICE(AQUANTIA, HW_ATL_DEVICE_ID_D108), },
+   { PCI_VDEVICE(AQUANTIA, HW_ATL_DEVICE_ID_D109), },
+   {}
+};
+
+MODULE_DEVICE_TABLE(pci, aq_pci_tbl);
+
+MODULE_LICENSE("GPL v2");
+MODULE_VERSION(AQ_CFG_DRV_VERSION);
+MODULE_AUTHOR(AQ_CFG_DRV_AUTHOR);
+MODULE_DESCRIPTION(AQ_CFG_DRV_DESC);
+
+static struct aq_hw_ops *aq_pci_probe_get_hw_ops_by_id(struct pci_dev *pdev)
+{
+   struct aq_hw_ops *ops = NULL;
+
+   ops = hw_atl_a0_get_ops_by_id(pdev);
+   if (!ops)
+   ops = hw_atl_b0_get_ops_by_id(pdev);
+
+   return ops;
+}
+
+static int aq_ndev_open(struct net_device *ndev)
+{
+   struct aq_nic_s *aq_nic = NULL;
+   int err = 0;
+
+   aq_nic = aq_nic_alloc_hot(ndev);
+   if (!aq_nic) {
+   err = -ENOMEM;
+   goto err_exit;
+   }
+   err = aq_nic_init(aq_nic);
+   if (err < 0)
+   goto err_exit;
+   err = aq_nic_start(aq_nic);
+   if (err < 0)
+   goto err_exit;
+
+err_exit:
+   if (err < 0)
+   aq_nic_deinit(aq_nic);
+   return err;
+}
+
+static int aq_ndev_close(struct net_device *ndev)
+{
+   int err = 0;
+   struct aq_nic_s *aq_nic = netdev_priv(ndev);
+
+   err = aq_nic_stop(aq_nic);
+   if (err < 0)
+   goto err_exit;
+   aq_nic_deinit(aq_nic);
+   aq_nic_free_hot_resources(aq_nic);
+
+err_exit:
+   return err;
+}
+
+static int aq_ndev_start_xmit(struct sk_buff *skb, struct net_device *ndev)
+{
+   struct aq_nic_s *aq_nic = netdev_priv(ndev);
+   int err = 0;
+
+   err = aq_nic_xmit(aq_nic, skb);
+   if (err < 0)
+   goto err_exit;
+
+err_exit:
+   return err;
+}
+
+static int aq_ndev_change_mtu(struct net_device *ndev, int new_mtu)
+{
+   struct aq_nic_s *aq_nic = netdev_priv(ndev);
+   int err = 0;
+
+   if (new_mtu == ndev->mtu) {
+   err = 0;
+   goto err_exit;
+   }
+   if (new_mtu < 68) {
+   err = -EINVAL;
+   goto err_exit;
+   }
+   err = aq_nic_set_mtu(aq_nic, new_mtu + ETH_HLEN);
+   if (err < 0)
+   goto err_exit;
+   ndev->mtu = new_mtu;
+
+   if (netif_running(ndev)) {
+   aq_ndev_close(ndev);
+   aq_ndev_open(ndev);
+   }
+
+err_exit:
+   return err;
+}
+
+static int aq_ndev_set_features(struct net_device *ndev,
+   netdev_features_t features)
+{
+   struct aq_nic_s *aq_nic = netdev_priv(ndev);
+   struct aq_nic_cfg_s *aq_cfg = aq_nic_get_cfg(aq_nic);
+   bool is_lro = false;
+
+   if (aq_cfg->hw_features & NETIF_F_LRO) {
+   is_lro = features & NETIF_F_LRO;
+
+   if (aq_cfg->is_lro != is_lro) {
+   aq_cfg->is_lro = is_lro;

[PATCH stable v1 07/13] net: ethernet: aquantia: Vector operations

2017-01-28 Thread Alexander Loktionov

From: David VomLehn 

Add functions to manululate the vector of receive and transmit rings.

Signed-off-by: Alexander Loktionov 
Signed-off-by: Dmitrii Tarakanov 
Signed-off-by: Pavel.Belous 
Signed-off-by: Dmitry Bezrukov 
Signed-off-by: David M. VomLehn 
---
 drivers/net/ethernet/aquantia/atlantic/aq_vec.c | 392 
 drivers/net/ethernet/aquantia/atlantic/aq_vec.h |  42 +++
 2 files changed, 434 insertions(+)
 create mode 100644 drivers/net/ethernet/aquantia/atlantic/aq_vec.c
 create mode 100644 drivers/net/ethernet/aquantia/atlantic/aq_vec.h

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c 
b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c
new file mode 100644
index 000..140962f
--- /dev/null
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c
@@ -0,0 +1,392 @@
+/*
+ * aQuantia Corporation Network Driver
+ * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ */
+
+/* File aq_vec.c: Definition of common structure for vector of Rx and Tx rings.
+ * Definition of functions for Rx and Tx rings. Friendly module for aq_nic.
+ */
+
+#include "aq_vec.h"
+#include "aq_nic.h"
+#include "aq_ring.h"
+#include "aq_hw.h"
+
+#include 
+
+struct aq_vec_s {
+   struct aq_obj_s header;
+   struct aq_hw_ops *aq_hw_ops;
+   struct aq_hw_s *aq_hw;
+   struct aq_nic_s *aq_nic;
+   unsigned int tx_rings;
+   unsigned int rx_rings;
+   struct aq_ring_param_s aq_ring_param;
+   struct napi_struct napi;
+   struct aq_ring_s ring[AQ_CFG_TCS_MAX][2];
+};
+
+#define AQ_VEC_TX_ID 0
+#define AQ_VEC_RX_ID 1
+
+static int aq_vec_poll(struct napi_struct *napi, int budget)
+__releases(>lock)
+__acquires(>lock)
+{
+   struct aq_vec_s *self = container_of(napi, struct aq_vec_s, napi);
+   struct aq_ring_s *ring = NULL;
+   int work_done = 0;
+   int err = 0;
+   unsigned int i = 0U;
+   unsigned int sw_tail_old = 0U;
+   bool was_tx_cleaned = false;
+
+   if (!self) {
+   err = -EINVAL;
+   } else if (spin_trylock(>header.lock)) {
+   for (i = 0U, ring = self->ring[0];
+   self->tx_rings > i; ++i, ring = self->ring[i]) {
+   if (self->aq_hw_ops->hw_ring_tx_head_update) {
+   err = self->aq_hw_ops->hw_ring_tx_head_update(
+   self->aq_hw,
+   [AQ_VEC_TX_ID]);
+   if (err < 0)
+   goto err_exit;
+   }
+
+   if (ring[AQ_VEC_TX_ID].sw_head !=
+   ring[AQ_VEC_TX_ID].hw_head) {
+   err = aq_ring_tx_clean([AQ_VEC_TX_ID]);
+   if (err < 0)
+   goto err_exit;
+   was_tx_cleaned = true;
+   }
+
+   err = self->aq_hw_ops->hw_ring_rx_receive(self->aq_hw,
+   [AQ_VEC_RX_ID]);
+   if (err < 0)
+   goto err_exit;
+
+   if (ring[AQ_VEC_RX_ID].sw_head !=
+   ring[AQ_VEC_RX_ID].hw_head) {
+   err = aq_ring_rx_clean([AQ_VEC_RX_ID],
+  _done,
+  budget - work_done);
+   if (err < 0)
+   goto err_exit;
+
+   sw_tail_old = ring[AQ_VEC_RX_ID].sw_tail;
+
+   err = aq_ring_rx_fill([AQ_VEC_RX_ID]);
+   if (err < 0)
+   goto err_exit;
+
+   err = self->aq_hw_ops->hw_ring_rx_fill(
+   self->aq_hw,
+   [AQ_VEC_RX_ID], sw_tail_old);
+   if (err < 0)
+   goto err_exit;
+   }
+   }
+
+   if (was_tx_cleaned)
+   work_done = budget;
+
+   if (work_done < budget) {
+   napi_complete(napi);
+   self->aq_hw_ops->hw_irq_enable(self->aq_hw,
+   1U << self->aq_ring_param.vec_idx);
+   }
+
+err_exit:
+   spin_unlock(>header.lock);
+

[PATCH stable v1 03/13] net: ethernet: aquantia: Add ring support code

2017-01-28 Thread Alexander Loktionov

From: David VomLehn 

Add code to support the transmit and receive ring buffers.

Signed-off-by: Alexander Loktionov 
Signed-off-by: Dmitrii Tarakanov 
Signed-off-by: Pavel Belous 
Signed-off-by: Dmitry Bezrukov 
Signed-off-by: David M. VomLehn 
---
 drivers/net/ethernet/aquantia/atlantic/aq_ring.c | 376 +++
 drivers/net/ethernet/aquantia/atlantic/aq_ring.h | 157 ++
 2 files changed, 533 insertions(+)
 create mode 100644 drivers/net/ethernet/aquantia/atlantic/aq_ring.c
 create mode 100644 drivers/net/ethernet/aquantia/atlantic/aq_ring.h

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c 
b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
new file mode 100644
index 000..b517b26
--- /dev/null
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
@@ -0,0 +1,376 @@
+/*
+ * aQuantia Corporation Network Driver
+ * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ */
+
+/* File aq_ring.c: Definition of functions for Rx/Tx rings. */
+
+#include "aq_ring.h"
+#include "aq_nic.h"
+#include "aq_hw.h"
+
+#include 
+#include 
+
+static struct aq_ring_s *aq_ring_alloc(struct aq_ring_s *self,
+  struct aq_nic_s *aq_nic)
+{
+   int err = 0;
+
+   self->buff_ring =
+   kcalloc(self->size, sizeof(struct aq_ring_buff_s), GFP_KERNEL);
+
+   if (!self->buff_ring) {
+   err = -ENOMEM;
+   goto err_exit;
+   }
+   self->dx_ring = dma_alloc_coherent(aq_nic_get_dev(aq_nic),
+   self->size * self->dx_size,
+   >dx_ring_pa, GFP_KERNEL);
+   if (!self->dx_ring) {
+   err = -ENOMEM;
+   goto err_exit;
+   }
+
+err_exit:
+   if (err < 0) {
+   aq_ring_free(self);
+   self = NULL;
+   }
+   return self;
+}
+
+struct aq_ring_s *aq_ring_tx_alloc(struct aq_ring_s *self,
+  struct aq_nic_s *aq_nic,
+  unsigned int idx,
+  struct aq_nic_cfg_s *aq_nic_cfg)
+{
+   int err = 0;
+
+   self->aq_nic = aq_nic;
+   self->idx = idx;
+   self->size = aq_nic_cfg->txds;
+   self->dx_size = aq_nic_cfg->aq_hw_caps->txd_size;
+
+   self = aq_ring_alloc(self, aq_nic);
+   if (!self) {
+   err = -ENOMEM;
+   goto err_exit;
+   }
+
+err_exit:
+   if (err < 0) {
+   aq_ring_free(self);
+   self = NULL;
+   }
+   return self;
+}
+
+struct aq_ring_s *aq_ring_rx_alloc(struct aq_ring_s *self,
+  struct aq_nic_s *aq_nic,
+  unsigned int idx,
+  struct aq_nic_cfg_s *aq_nic_cfg)
+{
+   int err = 0;
+
+   self->aq_nic = aq_nic;
+   self->idx = idx;
+   self->size = aq_nic_cfg->rxds;
+   self->dx_size = aq_nic_cfg->aq_hw_caps->rxd_size;
+
+   self = aq_ring_alloc(self, aq_nic);
+   if (!self) {
+   err = -ENOMEM;
+   goto err_exit;
+   }
+
+err_exit:
+   if (err < 0) {
+   aq_ring_free(self);
+   self = NULL;
+   }
+   return self;
+}
+
+int aq_ring_init(struct aq_ring_s *self)
+{
+   self->hw_head = 0;
+   self->sw_head = 0;
+   self->sw_tail = 0;
+   return 0;
+}
+
+void aq_ring_tx_append_buffs(struct aq_ring_s *self,
+struct aq_ring_buff_s *buffer,
+unsigned int buffers)
+{
+   if (likely(self->sw_tail + buffers < self->size)) {
+   memcpy(>buff_ring[self->sw_tail], buffer,
+  sizeof(buffer[0]) * buffers);
+   } else {
+   unsigned int first_part = self->size - self->sw_tail;
+   unsigned int second_part = buffers - first_part;
+
+   memcpy(>buff_ring[self->sw_tail], buffer,
+  sizeof(buffer[0]) * first_part);
+
+   memcpy(>buff_ring[0], [first_part],
+  sizeof(buffer[0]) * second_part);
+   }
+}
+
+int aq_ring_tx_clean(struct aq_ring_s *self)
+{
+   struct device *dev = aq_nic_get_dev(self->aq_nic);
+
+   for (; self->sw_head != self->hw_head;
+   self->sw_head = aq_ring_next_dx(self, self->sw_head)) {
+   struct aq_ring_buff_s *buff = >buff_ring[self->sw_head];
+
+   if (likely(buff->is_mapped)) {
+   if (unlikely(buff->is_sop))
+

[PATCH stable v1 11/13] net: ethernet: aquantia: Ethtool support

2017-01-28 Thread Alexander Loktionov

From: David VomLehn 

Add the driver interfaces required for support by the ethtool utility.

Signed-off-by: Alexander Loktionov 
Signed-off-by: Dmitrii Tarakanov 
Signed-off-by: Pavel Belous 
Signed-off-by: Dmitry Bezrukov 
Signed-off-by: David M. VomLehn 
---
 .../net/ethernet/aquantia/atlantic/aq_ethtool.c| 261 +
 .../net/ethernet/aquantia/atlantic/aq_ethtool.h|  19 ++
 2 files changed, 280 insertions(+)
 create mode 100644 drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
 create mode 100644 drivers/net/ethernet/aquantia/atlantic/aq_ethtool.h

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c 
b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
new file mode 100644
index 000..c5b025e
--- /dev/null
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
@@ -0,0 +1,261 @@
+/*
+ * aQuantia Corporation Network Driver
+ * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ */
+
+/* File aq_ethtool.c: Definition of ethertool related functions. */
+
+#include "aq_ethtool.h"
+#include "aq_nic.h"
+
+static void aq_ethtool_get_regs(struct net_device *ndev,
+   struct ethtool_regs *regs, void *p)
+{
+   struct aq_nic_s *aq_nic = netdev_priv(ndev);
+   u32 regs_count = aq_nic_get_regs_count(aq_nic);
+
+   memset(p, 0, regs_count * sizeof(u32));
+   aq_nic_get_regs(aq_nic, regs, p);
+}
+
+static int aq_ethtool_get_regs_len(struct net_device *ndev)
+{
+   struct aq_nic_s *aq_nic = netdev_priv(ndev);
+   u32 regs_count = aq_nic_get_regs_count(aq_nic);
+
+   return regs_count * sizeof(u32);
+}
+
+static u32 aq_ethtool_get_link(struct net_device *ndev)
+{
+   return ethtool_op_get_link(ndev);
+}
+
+static int aq_ethtool_get_settings(struct net_device *ndev,
+  struct ethtool_cmd *cmd)
+{
+   struct aq_nic_s *aq_nic = netdev_priv(ndev);
+
+   aq_nic_get_link_settings(aq_nic, cmd);
+   ethtool_cmd_speed_set(cmd, netif_carrier_ok(ndev) ?
+   aq_nic_get_link_speed(aq_nic) : 0U);
+
+   return 0;
+}
+
+static int aq_ethtool_set_settings(struct net_device *ndev,
+  struct ethtool_cmd *cmd)
+{
+   struct aq_nic_s *aq_nic = netdev_priv(ndev);
+
+   return aq_nic_set_link_settings(aq_nic, cmd);
+}
+
+/* there "5U" is number of queue[#] stats lines (InPackets+...+InErrors) */
+static const unsigned int aq_ethtool_stat_queue_lines = 5U;
+static const unsigned int aq_ethtool_stat_queue_chars =
+   5U * ETH_GSTRING_LEN;
+static const char aq_ethtool_stat_names[][ETH_GSTRING_LEN] = {
+   "InPackets",
+   "InUCast",
+   "InMCast",
+   "InBCast",
+   "InErrors",
+   "OutPackets",
+   "OutUCast",
+   "OutMCast",
+   "OutBCast",
+   "InUCastOctects",
+   "OutUCastOctects",
+   "InMCastOctects",
+   "OutMCastOctects",
+   "InBCastOctects",
+   "OutBCastOctects",
+   "InOctects",
+   "OutOctects",
+   "InPacketsDma",
+   "OutPacketsDma",
+   "InOctetsDma",
+   "OutOctetsDma",
+   "InDroppedDma",
+   "Queue[0] InPackets",
+   "Queue[0] OutPackets",
+   "Queue[0] InJumboPackets",
+   "Queue[0] InLroPackets",
+   "Queue[0] InErrors",
+   "Queue[1] InPackets",
+   "Queue[1] OutPackets",
+   "Queue[1] InJumboPackets",
+   "Queue[1] InLroPackets",
+   "Queue[1] InErrors",
+   "Queue[2] InPackets",
+   "Queue[2] OutPackets",
+   "Queue[2] InJumboPackets",
+   "Queue[2] InLroPackets",
+   "Queue[2] InErrors",
+   "Queue[3] InPackets",
+   "Queue[3] OutPackets",
+   "Queue[3] InJumboPackets",
+   "Queue[3] InLroPackets",
+   "Queue[3] InErrors",
+   "Queue[4] InPackets",
+   "Queue[4] OutPackets",
+   "Queue[4] InJumboPackets",
+   "Queue[4] InLroPackets",
+   "Queue[4] InErrors",
+   "Queue[5] InPackets",
+   "Queue[5] OutPackets",
+   "Queue[5] InJumboPackets",
+   "Queue[5] InLroPackets",
+   "Queue[5] InErrors",
+   "Queue[6] InPackets",
+   "Queue[6] OutPackets",
+   "Queue[6] InJumboPackets",
+   "Queue[6] InLroPackets",
+   "Queue[6] InErrors",
+   "Queue[7] InPackets",
+   "Queue[7] OutPackets",
+   "Queue[7] InJumboPackets",
+   "Queue[7] InLroPackets",
+   "Queue[7] InErrors",
+};
+
+static void aq_ethtool_stats(struct net_device *ndev,
+struct ethtool_stats *stats, u64 *data)
+{
+   struct aq_nic_s *aq_nic = netdev_priv(ndev);
+
+/* ASSERT: Need add

[PATCH stable v1 06/13] net: ethernet: aquantia: Atlantic A0 and B0 specific functions.

2017-01-28 Thread Alexander Loktionov

From: David VomLehn 

Add Atlantic A0 and B0 specific functions.

Signed-off-by: Alexander Loktionov 
Signed-off-by: Dmitrii Tarakanov 
Signed-off-by: Pavel Belous 
Signed-off-by: Dmitry Bezrukov 
Signed-off-by: David M. VomLehn 
---
 .../ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c  | 905 +++
 .../ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.h  |  34 +
 .../aquantia/atlantic/hw_atl/hw_atl_a0_internal.h  | 155 
 .../ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c  | 958 +
 .../ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h  |  34 +
 .../aquantia/atlantic/hw_atl/hw_atl_b0_internal.h  | 207 +
 6 files changed, 2293 insertions(+)
 create mode 100644 drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
 create mode 100644 drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.h
 create mode 100644 
drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0_internal.h
 create mode 100644 drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
 create mode 100644 drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h
 create mode 100644 
drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h

diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c 
b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
new file mode 100644
index 000..1f38805
--- /dev/null
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
@@ -0,0 +1,905 @@
+/*
+ * aQuantia Corporation Network Driver
+ * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ */
+
+/* File hw_atl_a0.c: Definition of Atlantic hardware specific functions. */
+
+#include "../aq_hw.h"
+#include "../aq_hw_utils.h"
+#include "../aq_ring.h"
+#include "hw_atl_a0.h"
+#include "hw_atl_utils.h"
+#include "hw_atl_llh.h"
+#include "hw_atl_a0_internal.h"
+
+static int hw_atl_a0_get_hw_caps(struct aq_hw_s *self,
+struct aq_hw_caps_s *aq_hw_caps)
+{
+   memcpy(aq_hw_caps, _atl_a0_hw_caps_, sizeof(*aq_hw_caps));
+   return 0;
+}
+
+static struct aq_hw_s *hw_atl_a0_create(struct aq_pci_func_s *aq_pci_func,
+   unsigned int port,
+   struct aq_hw_ops *ops)
+{
+   struct hw_atl_s *self = NULL;
+
+   self = kzalloc(sizeof(*self), GFP_KERNEL);
+   if (!self)
+   goto err_exit;
+
+   self->base.aq_pci_func = aq_pci_func;
+
+   self->base.not_ff_addr = 0x10U;
+
+err_exit:
+   return (struct aq_hw_s *)self;
+}
+
+static void hw_atl_a0_destroy(struct aq_hw_s *self)
+{
+   kfree(self);
+}
+
+static int hw_atl_a0_hw_reset(struct aq_hw_s *self)
+{
+   int err = 0;
+
+   glb_glb_reg_res_dis_set(self, 1U);
+   pci_pci_reg_res_dis_set(self, 0U);
+   rx_rx_reg_res_dis_set(self, 0U);
+   tx_tx_reg_res_dis_set(self, 0U);
+
+   HW_ATL_FLUSH();
+   glb_soft_res_set(self, 1);
+
+   /* check 10 times by 1ms */
+   AQ_HW_WAIT_FOR(glb_soft_res_get(self) == 0, 1000U, 10U);
+   if (err < 0)
+   goto err_exit;
+
+   itr_irq_reg_res_dis_set(self, 0U);
+   itr_res_irq_set(self, 1U);
+
+   /* check 10 times by 1ms */
+   AQ_HW_WAIT_FOR(itr_res_irq_get(self) == 0, 1000U, 10U);
+   if (err < 0)
+   goto err_exit;
+
+   hw_atl_utils_mpi_set(self, MPI_RESET, 0x0U);
+
+   err = aq_hw_err_from_flags(self);
+
+err_exit:
+   return err;
+}
+
+static int hw_atl_a0_hw_qos_set(struct aq_hw_s *self)
+{
+   u32 tc = 0U;
+   u32 buff_size = 0U;
+   unsigned int i_priority = 0U;
+   bool is_rx_flow_control = false;
+
+   /* TPS Descriptor rate init */
+   tps_tx_pkt_shed_desc_rate_curr_time_res_set(self, 0x0U);
+   tps_tx_pkt_shed_desc_rate_lim_set(self, 0xA);
+
+   /* TPS VM init */
+   tps_tx_pkt_shed_desc_vm_arb_mode_set(self, 0U);
+
+   /* TPS TC credits init */
+   tps_tx_pkt_shed_desc_tc_arb_mode_set(self, 0U);
+   tps_tx_pkt_shed_data_arb_mode_set(self, 0U);
+
+   tps_tx_pkt_shed_tc_data_max_credit_set(self, 0xFFF, 0U);
+   tps_tx_pkt_shed_tc_data_weight_set(self, 0x64, 0U);
+   tps_tx_pkt_shed_desc_tc_max_credit_set(self, 0x50, 0U);
+   tps_tx_pkt_shed_desc_tc_weight_set(self, 0x1E, 0U);
+
+   /* Tx buf size */
+   buff_size = HW_ATL_A0_TXBUF_MAX;
+
+   tpb_tx_pkt_buff_size_per_tc_set(self, buff_size, tc);
+   tpb_tx_buff_hi_threshold_per_tc_set(self,
+   (buff_size * (1024 / 32U) * 66U) /
+   100U, tc);
+

[PATCH stable v1 02/13] net: ethernet: aquantia: Common functions and definitions

2017-01-28 Thread Alexander Loktionov

From: David VomLehn 

Add files containing the functions and definitions used in common in
different functional areas.

Signed-off-by: Alexander Loktionov 
Signed-off-by: Dmitrii Tarakanov 
Signed-off-by: Pavel Belous 
Signed-off-by: Dmitry Bezrukov 
Signed-off-by: David M. VomLehn 
---
 drivers/net/ethernet/aquantia/atlantic/aq_cfg.h| 77 ++
 drivers/net/ethernet/aquantia/atlantic/aq_common.h | 23 +++
 drivers/net/ethernet/aquantia/atlantic/aq_utils.h  | 50 ++
 3 files changed, 150 insertions(+)
 create mode 100644 drivers/net/ethernet/aquantia/atlantic/aq_cfg.h
 create mode 100644 drivers/net/ethernet/aquantia/atlantic/aq_common.h
 create mode 100644 drivers/net/ethernet/aquantia/atlantic/aq_utils.h

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h 
b/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h
new file mode 100644
index 000..5f99237
--- /dev/null
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h
@@ -0,0 +1,77 @@
+/*
+ * aQuantia Corporation Network Driver
+ * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ */
+
+/* File aq_cfg.h: Definition of configuration parameters and constants. */
+
+#ifndef AQ_CFG_H
+#define AQ_CFG_H
+
+#define AQ_CFG_VECS_DEF   4U
+#define AQ_CFG_TCS_DEF1U
+
+#define AQ_CFG_TXDS_DEF4096U
+#define AQ_CFG_RXDS_DEF1024U
+
+#define AQ_CFG_IS_POLLING_DEF 0U
+
+#define AQ_CFG_FORCE_LEGACY_INT 0U
+
+#define AQ_CFG_IS_INTERRUPT_MODERATION_DEF   1U
+#define AQ_CFG_INTERRUPT_MODERATION_RATE_DEF 0xU
+#define AQ_CFG_IRQ_MASK  0x1FFU
+
+#define AQ_CFG_VECS_MAX   8U
+#define AQ_CFG_TCS_MAX8U
+
+#define AQ_CFG_TX_FRAME_MAX  (16U * 1024U)
+#define AQ_CFG_RX_FRAME_MAX  (4U * 1024U)
+
+/* LRO */
+#define AQ_CFG_IS_LRO_DEF   1U
+
+/* RSS */
+#define AQ_CFG_RSS_INDIRECTION_TABLE_MAX  128U
+#define AQ_CFG_RSS_HASHKEY_SIZE   320U
+
+#define AQ_CFG_IS_RSS_DEF   1U
+#define AQ_CFG_NUM_RSS_QUEUES_DEF   AQ_CFG_VECS_DEF
+#define AQ_CFG_RSS_BASE_CPU_NUM_DEF 0U
+
+#define AQ_CFG_PCI_FUNC_MSIX_IRQS   9U
+#define AQ_CFG_PCI_FUNC_PORTS   2U
+
+#define AQ_CFG_SERVICE_TIMER_INTERVAL(2 * HZ)
+#define AQ_CFG_POLLING_TIMER_INTERVAL   ((unsigned int)(2 * HZ))
+
+#define AQ_CFG_SKB_FRAGS_MAX   32U
+
+#define AQ_CFG_NAPI_WEIGHT 64U
+
+#define AQ_CFG_MULTICAST_ADDRESS_MAX 32U
+
+/*#define AQ_CFG_MAC_ADDR_PERMANENT {0x30, 0x0E, 0xE3, 0x12, 0x34, 0x56}*/
+
+#define AQ_CFG_FC_MODE 3U
+
+#define AQ_CFG_SPEED_MSK  0xU  /* 0xU==auto_neg */
+
+#define AQ_CFG_IS_AUTONEG_DEF   1U
+#define AQ_CFG_MTU_DEF  1514U
+
+#define AQ_CFG_LOCK_TRYS   100U
+
+#define AQ_CFG_DRV_AUTHOR  "aQuantia"
+#define AQ_CFG_DRV_DESC"aQuantia Corporation(R) Network Driver"
+#define AQ_CFG_DRV_NAME"aquantia"
+#define AQ_CFG_DRV_VERSION __stringify(NIC_MAJOR_DRIVER_VERSION)"."\
+   __stringify(NIC_MINOR_DRIVER_VERSION)"."\
+   __stringify(NIC_BUILD_DRIVER_VERSION)"."\
+   __stringify(NIC_REVISION_DRIVER_VERSION)
+
+#endif /* AQ_CFG_H */
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_common.h 
b/drivers/net/ethernet/aquantia/atlantic/aq_common.h
new file mode 100644
index 000..9eb5e22
--- /dev/null
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_common.h
@@ -0,0 +1,23 @@
+/*
+ * aQuantia Corporation Network Driver
+ * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ */
+
+/* File aq_common.h: Basic includes for all files in project. */
+
+#ifndef AQ_COMMON_H
+#define AQ_COMMON_H
+
+#include 
+#include 
+
+#include "ver.h"
+#include "aq_nic.h"
+#include "aq_cfg.h"
+#include "aq_utils.h"
+
+#endif /* AQ_COMMON_H */
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_utils.h 
b/drivers/net/ethernet/aquantia/atlantic/aq_utils.h
new file mode 100644
index 000..4446bd9
--- /dev/null
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_utils.h
@@ -0,0 +1,50 @@
+/*
+ * aQuantia Corporation Network Driver
+ * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ */
+
+/* File aq_utils.h: Useful macro and structures used in all layers of driver.

[PATCH stable v1 01/13] net: ethernet: aquantia: Make and configuration files.

2017-01-28 Thread Alexander Loktionov

From: David VomLehn 

Patches to create the make and configuration files.

Signed-off-by: Alexander Loktionov 
Signed-off-by: Dmitrii Tarakanov 
Signed-off-by: Pavel Belous 
Signed-off-by: Dmitry Bezrukov 
Signed-off-by: David M. VomLehn 
---
 drivers/net/ethernet/aquantia/Kconfig   | 24 ++
 drivers/net/ethernet/aquantia/Makefile  |  5 +++
 drivers/net/ethernet/aquantia/atlantic/Makefile | 42 +
 drivers/net/ethernet/aquantia/atlantic/ver.h| 18 +++
 4 files changed, 89 insertions(+)
 create mode 100644 drivers/net/ethernet/aquantia/Kconfig
 create mode 100644 drivers/net/ethernet/aquantia/Makefile
 create mode 100644 drivers/net/ethernet/aquantia/atlantic/Makefile
 create mode 100644 drivers/net/ethernet/aquantia/atlantic/ver.h

diff --git a/drivers/net/ethernet/aquantia/Kconfig 
b/drivers/net/ethernet/aquantia/Kconfig
new file mode 100644
index 000..cdf78e0
--- /dev/null
+++ b/drivers/net/ethernet/aquantia/Kconfig
@@ -0,0 +1,24 @@
+#
+# aQuantia device configuration
+#
+
+config NET_VENDOR_AQUANTIA
+   bool "aQuantia devices"
+   default y
+   ---help---
+ Set this to y if you have an Ethernet network cards that uses the 
aQuantia
+ AQC107/AQC108 chipset.
+
+ This option does not build any drivers; it casues the aQuantia
+ drivers that can be built to appear in the list of Ethernet drivers.
+
+
+if NET_VENDOR_AQUANTIA
+
+config AQTION
+   tristate "aQuantia AQtion(tm) Support"
+   depends on PCI && X86_64
+   ---help---
+ This enables the support for the aQuantia AQtion(tm) Ethernet card.
+
+endif # NET_VENDOR_AQUANTIA
diff --git a/drivers/net/ethernet/aquantia/Makefile 
b/drivers/net/ethernet/aquantia/Makefile
new file mode 100644
index 000..4f4897b
--- /dev/null
+++ b/drivers/net/ethernet/aquantia/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for the aQuantia device drivers.
+#
+
+obj-$(CONFIG_AQTION) += atlantic/
diff --git a/drivers/net/ethernet/aquantia/atlantic/Makefile 
b/drivers/net/ethernet/aquantia/atlantic/Makefile
new file mode 100644
index 000..e4ae696
--- /dev/null
+++ b/drivers/net/ethernet/aquantia/atlantic/Makefile
@@ -0,0 +1,42 @@
+
+#
+# aQuantia Ethernet Controller AQtion Linux Driver
+# Copyright(c) 2014-2017 aQuantia Corporation.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms and conditions of the GNU General Public License,
+# version 2, as published by the Free Software Foundation.
+#
+# This program is distributed in the hope it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program. If not, see .
+#
+# The full GNU General Public License is included in this distribution in
+# the file called "COPYING".
+#
+# Contact Information: 
+# aQuantia Corporation, 105 E. Tasman Dr. San Jose, CA 95134, USA
+#
+
+
+#
+# Makefile for the AQtion(tm) Ethernet driver
+#
+
+obj-$(CONFIG_AQTION) += atlantic.o
+
+atlantic-objs := aq_main.o \
+   aq_nic.o \
+   aq_pci_func.o \
+   aq_vec.o \
+   aq_ring.o \
+   aq_hw_utils.o \
+   aq_ethtool.o \
+   hw_atl/hw_atl_a0.o \
+   hw_atl/hw_atl_b0.o \
+   hw_atl/hw_atl_utils.o \
+   hw_atl/hw_atl_llh.o
diff --git a/drivers/net/ethernet/aquantia/atlantic/ver.h 
b/drivers/net/ethernet/aquantia/atlantic/ver.h
new file mode 100644
index 000..0de858d
--- /dev/null
+++ b/drivers/net/ethernet/aquantia/atlantic/ver.h
@@ -0,0 +1,18 @@
+/*
+ * aQuantia Corporation Network Driver
+ * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ */
+
+#ifndef VER_H
+#define VER_H
+
+#define NIC_MAJOR_DRIVER_VERSION   1
+#define NIC_MINOR_DRIVER_VERSION   5
+#define NIC_BUILD_DRIVER_VERSION   345
+#define NIC_REVISION_DRIVER_VERSION0
+
+#endif /* VER_H */
-- 
2.9.3

[PATCH stable v1 12/13] net: ethernet: aquantia: Receive side scaling

2017-01-28 Thread Alexander Loktionov

From: David VomLehn 

Add definitions that support receive side scaling.

Signed-off-by: Alexander Loktionov 
Signed-off-by: Dmitrii Tarakanov 
Signed-off-by: Pavel Belous 
Signed-off-by: Dmitry Bezrukov 
Signed-off-by: David M. VomLehn 
---
 drivers/net/ethernet/aquantia/atlantic/aq_rss.h | 26 +
 1 file changed, 26 insertions(+)
 create mode 100644 drivers/net/ethernet/aquantia/atlantic/aq_rss.h

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_rss.h 
b/drivers/net/ethernet/aquantia/atlantic/aq_rss.h
new file mode 100644
index 000..1db6eb2
--- /dev/null
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_rss.h
@@ -0,0 +1,26 @@
+/*
+ * aQuantia Corporation Network Driver
+ * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ */
+
+/* File aq_rss.h: Receive Side Scaling definitions. */
+
+#ifndef AQ_RSS_H
+#define AQ_RSS_H
+
+#include "aq_common.h"
+#include "aq_cfg.h"
+
+struct aq_rss_parameters {
+   u16 base_cpu_number;
+   u16 indirection_table_size;
+   u16 hash_secret_key_size;
+   u32 hash_secret_key[AQ_CFG_RSS_HASHKEY_SIZE / sizeof(u32)];
+   u8 indirection_table[AQ_CFG_RSS_INDIRECTION_TABLE_MAX];
+};
+
+#endif /* AQ_RSS_H */
-- 
2.9.3

[PATCH stable v1 09/13] net: ethernet: aquantia: Atlantic hardware abstraction layer

2017-01-28 Thread Alexander Loktionov

From: David VomLehn 

Add common functions for Atlantic hardware abstraction layer.

Signed-off-by: Alexander Loktionov 
Signed-off-by: Dmitrii Tarakanov 
Signed-off-by: Pavel Belous 
Signed-off-by: Dmitry Bezrukov 
Signed-off-by: David M. VomLehn 
---
 .../aquantia/atlantic/hw_atl/hw_atl_utils.c| 570 +
 .../aquantia/atlantic/hw_atl/hw_atl_utils.h| 210 
 2 files changed, 780 insertions(+)
 create mode 100644 drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
 create mode 100644 drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h

diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c 
b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
new file mode 100644
index 000..8d6d8f5
--- /dev/null
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
@@ -0,0 +1,570 @@
+/*
+ * aQuantia Corporation Network Driver
+ * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ */
+
+/* File hw_atl_utils.c: Definition of common functions for Atlantic hardware
+ * abstraction layer.
+ */
+
+#include "../aq_hw.h"
+#include "../aq_hw_utils.h"
+#include "../aq_pci_func.h"
+#include "../aq_ring.h"
+#include "../aq_vec.h"
+#include "hw_atl_utils.h"
+#include "hw_atl_llh.h"
+
+#include 
+
+#define HW_ATL_UCP_0X370_REG0x0370U
+
+#define HW_ATL_FW_SM_RAM0x2U
+#define HW_ATL_MPI_CONTROL_ADR  0x0368U
+#define HW_ATL_MPI_STATE_ADR0x036CU
+
+#define HW_ATL_MPI_STATE_MSK0x00FFU
+#define HW_ATL_MPI_STATE_SHIFT  0U
+#define HW_ATL_MPI_SPEED_MSK0xU
+#define HW_ATL_MPI_SPEED_SHIFT  16U
+
+static int hw_atl_utils_fw_downld_dwords(struct aq_hw_s *self, u32 a,
+u32 *p, u32 cnt)
+{
+   int err = 0;
+
+   AQ_HW_WAIT_FOR(reg_glb_cpu_sem_get(self,
+  HW_ATL_FW_SM_RAM) == 1U,
+  1U, 1U);
+
+   if (err < 0) {
+   bool is_locked;
+
+   reg_glb_cpu_sem_set(self, 1U, HW_ATL_FW_SM_RAM);
+   is_locked = reg_glb_cpu_sem_get(self, HW_ATL_FW_SM_RAM);
+   if (!is_locked) {
+   err = -ETIME;
+   goto err_exit;
+   }
+   }
+
+   aq_hw_write_reg(self, 0x0208U, a);
+
+   for (++cnt; --cnt;) {
+   u32 i = 0U;
+
+   aq_hw_write_reg(self, 0x0200U, 0x8000U);
+
+   for (i = 1024U;
+   (0x100U & aq_hw_read_reg(self, 0x0200U)) && --i;) {
+   }
+
+   *(p++) = aq_hw_read_reg(self, 0x020CU);
+   }
+
+   reg_glb_cpu_sem_set(self, 1U, HW_ATL_FW_SM_RAM);
+
+err_exit:
+   return err;
+}
+
+static int hw_atl_utils_fw_upload_dwords(struct aq_hw_s *self, u32 a, u32 *p,
+u32 cnt)
+{
+   int err = 0;
+   bool is_locked;
+
+   is_locked = reg_glb_cpu_sem_get(self, HW_ATL_FW_SM_RAM);
+   if (!is_locked) {
+   err = -ETIME;
+   goto err_exit;
+   }
+
+   aq_hw_write_reg(self, 0x0208U, a);
+
+   for (++cnt; --cnt;) {
+   u32 i = 0U;
+
+   aq_hw_write_reg(self, 0x020CU, *(p++));
+   aq_hw_write_reg(self, 0x0200U, 0xC000U);
+
+   for (i = 1024U;
+   (0x100U & aq_hw_read_reg(self, 0x0200U)) && --i;) {
+   }
+   }
+
+   reg_glb_cpu_sem_set(self, 1U, HW_ATL_FW_SM_RAM);
+
+err_exit:
+   return err;
+}
+
+static int hw_atl_utils_ver_match(u32 ver_expected, u32 ver_actual)
+{
+   int err = 0;
+   const u32 dw_major_mask = 0xff00U;
+   const u32 dw_minor_mask = 0x00ffU;
+
+   err = (dw_major_mask & (ver_expected ^ ver_actual)) ? -EOPNOTSUPP : 0;
+   if (err < 0)
+   goto err_exit;
+   err = ((dw_minor_mask & ver_expected) > (dw_minor_mask & ver_actual)) ?
+   -EOPNOTSUPP : 0;
+err_exit:
+   return err;
+}
+
+static int hw_atl_utils_init_ucp(struct aq_hw_s *self,
+struct aq_hw_caps_s *aq_hw_caps)
+{
+   int err = 0;
+
+   if (!aq_hw_read_reg(self, 0x370U)) {
+   unsigned int rnd = 0U;
+   unsigned int ucp_0x370 = 0U;
+
+   get_random_bytes(, sizeof(unsigned int));
+
+   ucp_0x370 = 0x02020202U | (0xFEFEFEFEU & rnd);
+   aq_hw_write_reg(self, HW_ATL_UCP_0X370_REG, ucp_0x370);
+   }
+
+   reg_glb_cpu_scratch_scp_set(self, 0xU, 25U);
+
+   /* check 10 times

[PATCH stable v1 08/13] net: ethernet: aquantia: PCI operations

2017-01-28 Thread Alexander Loktionov

From: David VomLehn 

Add functions that handle the PCI bus interface.

Signed-off-by: Alexander Loktionov 
Signed-off-by: Dmitrii Tarakanov 
Signed-off-by: Pavel Belous 
Signed-off-by: Dmitry Bezrukov 
Signed-off-by: David M. VomLehn 
---
 .../net/ethernet/aquantia/atlantic/aq_pci_func.c   | 345 +
 .../net/ethernet/aquantia/atlantic/aq_pci_func.h   |  34 ++
 2 files changed, 379 insertions(+)
 create mode 100644 drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
 create mode 100644 drivers/net/ethernet/aquantia/atlantic/aq_pci_func.h

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c 
b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
new file mode 100644
index 000..afcecdb
--- /dev/null
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
@@ -0,0 +1,345 @@
+/*
+ * aQuantia Corporation Network Driver
+ * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ */
+
+/* File aq_pci_func.c: Definition of PCI functions. */
+
+#include "aq_pci_func.h"
+#include "aq_nic.h"
+#include "aq_vec.h"
+#include "aq_hw.h"
+#include 
+
+struct aq_pci_func_s {
+   struct pci_dev *pdev;
+   struct aq_nic_s *port[AQ_CFG_PCI_FUNC_PORTS];
+   void __iomem *mmio;
+   void *aq_vec[AQ_CFG_PCI_FUNC_MSIX_IRQS];
+   resource_size_t mmio_pa;
+   unsigned int msix_entry_mask;
+   unsigned int irq_type;
+   unsigned int ports;
+   bool is_pci_enabled;
+   bool is_regions;
+   bool is_pci_using_dac;
+   struct aq_hw_caps_s aq_hw_caps;
+   struct msix_entry msix_entry[AQ_CFG_PCI_FUNC_MSIX_IRQS];
+};
+
+struct aq_pci_func_s *aq_pci_func_alloc(struct aq_hw_ops *aq_hw_ops,
+   struct pci_dev *pdev,
+   const struct net_device_ops *ndev_ops,
+   const struct ethtool_ops *eth_ops)
+{
+   struct aq_pci_func_s *self = NULL;
+   int err = 0;
+   unsigned int port = 0U;
+
+   if (!aq_hw_ops) {
+   err = -EFAULT;
+   goto err_exit;
+   }
+   self = kzalloc(sizeof(*self), GFP_KERNEL);
+   if (!self) {
+   err = -ENOMEM;
+   goto err_exit;
+   }
+
+   pci_set_drvdata(pdev, self);
+   self->pdev = pdev;
+
+   err = aq_hw_ops->get_hw_caps(NULL, >aq_hw_caps);
+   if (err < 0)
+   goto err_exit;
+
+   self->ports = self->aq_hw_caps.ports;
+
+   for (port = 0; port < self->ports; ++port) {
+   struct aq_nic_s *aq_nic = aq_nic_alloc_cold(ndev_ops, eth_ops,
+   >dev, self,
+   port, aq_hw_ops);
+
+   if (!aq_nic) {
+   err = -ENOMEM;
+   goto err_exit;
+   }
+   self->port[port] = aq_nic;
+   }
+
+err_exit:
+   if (err < 0) {
+   if (self)
+   aq_pci_func_free(self);
+   self = NULL;
+   }
+
+   (void)err;
+   return self;
+}
+
+int aq_pci_func_init(struct aq_pci_func_s *self)
+{
+   int err = 0;
+   unsigned int bar = 0U;
+   unsigned int port = 0U;
+   unsigned int i = 0U;
+
+   err = pci_enable_device(self->pdev);
+   if (err < 0)
+   goto err_exit;
+
+   self->is_pci_enabled = true;
+
+   err = pci_set_dma_mask(self->pdev, DMA_BIT_MASK(64));
+   if (!err) {
+   err = pci_set_consistent_dma_mask(self->pdev, DMA_BIT_MASK(64));
+   self->is_pci_using_dac = 1;
+   }
+   if (err) {
+   err = pci_set_dma_mask(self->pdev, DMA_BIT_MASK(32));
+   if (!err)
+   err = pci_set_consistent_dma_mask(self->pdev,
+ DMA_BIT_MASK(32));
+   self->is_pci_using_dac = 0;
+   }
+   if (err != 0) {
+   err = -ENOSR;
+   goto err_exit;
+   }
+
+   err = pci_request_regions(self->pdev, AQ_CFG_DRV_NAME "_mmio");
+   if (err < 0)
+   goto err_exit;
+
+   self->is_regions = true;
+
+   pci_set_master(self->pdev);
+
+   for (bar = 0; bar < 4; ++bar) {
+   if (IORESOURCE_MEM & pci_resource_flags(self->pdev, bar)) {
+   resource_size_t reg_sz;
+
+   self->mmio_pa = pci_resource_start(self->pdev, bar);
+   if (self->mmio_pa == 0U) {
+   err = -EIO;
+

[PATCH stable v1 10/13] net: ethernet: aquantia: Hardware interface and utility functions

2017-01-28 Thread Alexander Loktionov

From: David VomLehn 

Add functions to interface with the hardware and some utility functions.

Signed-off-by: Alexander Loktionov 
Signed-off-by: Dmitrii Tarakanov 
Signed-off-by: Pavel Belous 
Signed-off-by: Dmitry Bezrukov 
Signed-off-by: David M. VomLehn 
---
 drivers/net/ethernet/aquantia/atlantic/aq_hw.h | 177 +
 .../net/ethernet/aquantia/atlantic/aq_hw_utils.c   |  68 
 .../net/ethernet/aquantia/atlantic/aq_hw_utils.h   |  47 ++
 3 files changed, 292 insertions(+)
 create mode 100644 drivers/net/ethernet/aquantia/atlantic/aq_hw.h
 create mode 100644 drivers/net/ethernet/aquantia/atlantic/aq_hw_utils.c
 create mode 100644 drivers/net/ethernet/aquantia/atlantic/aq_hw_utils.h

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h 
b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
new file mode 100644
index 000..fce0fd3
--- /dev/null
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
@@ -0,0 +1,177 @@
+/*
+ * aQuantia Corporation Network Driver
+ * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ */
+
+/* File aq_hw.h: Declaraion of abstract interface for NIC hardware specific
+ * functions.
+ */
+
+#ifndef AQ_HW_H
+#define AQ_HW_H
+
+#include "aq_common.h"
+
+/* NIC H/W capabilities */
+struct aq_hw_caps_s {
+   u64 hw_features;
+   u64 link_speed_msk;
+   unsigned int hw_priv_flags;
+   u32 rxds;
+   u32 txds;
+   u32 txhwb_alignment;
+   u32 irq_mask;
+   u32 vecs;
+   u32 mtu;
+   u32 mac_regs_count;
+   u8 ports;
+   u8 msix_irqs;
+   u8 tcs;
+   u8 rxd_alignment;
+   u8 rxd_size;
+   u8 txd_alignment;
+   u8 txd_size;
+   u8 tx_rings;
+   u8 rx_rings;
+   bool flow_control;
+   bool is_64_dma;
+   u32 fw_ver_expected;
+};
+
+struct aq_hw_link_status_s {
+   unsigned int mbps;
+};
+
+#define AQ_HW_IRQ_INVALID 0U
+#define AQ_HW_IRQ_LEGACY  1U
+#define AQ_HW_IRQ_MSI 2U
+#define AQ_HW_IRQ_MSIX3U
+
+#define AQ_HW_POWER_STATE_D0   0U
+#define AQ_HW_POWER_STATE_D3   3U
+
+#define AQ_HW_FLAG_STARTED 0x0004U
+#define AQ_HW_FLAG_STOPPING0x0008U
+#define AQ_HW_FLAG_RESETTING   0x0010U
+#define AQ_HW_FLAG_CLOSING 0x0020U
+#define AQ_HW_LINK_DOWN0x0400U
+#define AQ_HW_FLAG_ERR_UNPLUG  0x4000U
+#define AQ_HW_FLAG_ERR_HW  0x8000U
+
+#define AQ_HW_FLAG_ERRORS  (AQ_HW_FLAG_ERR_HW | AQ_HW_FLAG_ERR_UNPLUG)
+
+struct aq_hw_s {
+   struct aq_obj_s header;
+   struct aq_nic_cfg_s *aq_nic_cfg;
+   struct aq_pci_func_s *aq_pci_func;
+   void __iomem *mmio;
+   unsigned int not_ff_addr;
+   struct aq_hw_link_status_s aq_link_status;
+};
+
+struct aq_ring_s;
+struct aq_ring_param_s;
+struct aq_nic_cfg_s;
+struct sk_buff;
+
+struct aq_hw_ops {
+   struct aq_hw_s *(*create)(struct aq_pci_func_s *aq_pci_func,
+ unsigned int port, struct aq_hw_ops *ops);
+
+   void (*destroy)(struct aq_hw_s *self);
+
+   int (*get_hw_caps)(struct aq_hw_s *self,
+  struct aq_hw_caps_s *aq_hw_caps);
+
+   int (*hw_ring_tx_xmit)(struct aq_hw_s *self, struct aq_ring_s *aq_ring,
+  unsigned int frags);
+
+   int (*hw_ring_rx_receive)(struct aq_hw_s *self,
+ struct aq_ring_s *aq_ring);
+
+   int (*hw_ring_rx_fill)(struct aq_hw_s *self, struct aq_ring_s *aq_ring,
+  unsigned int sw_tail_old);
+
+   int (*hw_ring_tx_head_update)(struct aq_hw_s *self,
+ struct aq_ring_s *aq_ring);
+
+   int (*hw_get_mac_permanent)(struct aq_hw_s *self,
+   struct aq_hw_caps_s *aq_hw_caps,
+   u8 *mac);
+
+   int (*hw_set_mac_address)(struct aq_hw_s *self, u8 *mac_addr);
+
+   int (*hw_get_link_status)(struct aq_hw_s *self,
+ struct aq_hw_link_status_s *link_status);
+
+   int (*hw_set_link_speed)(struct aq_hw_s *self, u32 speed);
+
+   int (*hw_reset)(struct aq_hw_s *self);
+
+   int (*hw_init)(struct aq_hw_s *self, struct aq_nic_cfg_s *aq_nic_cfg,
+  u8 *mac_addr);
+
+   int (*hw_start)(struct aq_hw_s *self);
+
+   int (*hw_stop)(struct aq_hw_s *self);
+
+   int (*hw_ring_tx_init)(struct aq_hw_s *self, struct aq_ring_s *aq_ring,
+  struct aq_ring_param_s *aq_ring_param);
+
+   int (*hw_ring_tx_start)(struct aq_hw_s *self,
+   struct aq_ring_s

[PATCH net-next v2 1/4] net: dsa: Hook {get,set}_rxnfc ethtool operations

2017-01-28 Thread Florian Fainelli

In preparation for adding support for CFP/TCAMP in the bcm_sf2 driver add the
plumbing to call into driver specific {get,set}_rxnfc operations.

Signed-off-by: Florian Fainelli 
---
 include/net/dsa.h |  8 
 net/dsa/slave.c   | 26 ++
 2 files changed, 34 insertions(+)

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 92fd795e9573..bcad7cc906d9 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -370,6 +370,14 @@ struct dsa_switch_ops {
int (*port_mdb_dump)(struct dsa_switch *ds, int port,
 struct switchdev_obj_port_mdb *mdb,
 int (*cb)(struct switchdev_obj *obj));
+
+   /*
+* RXNFC
+*/
+   int (*get_rxnfc)(struct dsa_switch *ds, int port,
+struct ethtool_rxnfc *nfc, u32 *rule_locs);
+   int (*set_rxnfc)(struct dsa_switch *ds, int port,
+struct ethtool_rxnfc *nfc);
 };
 
 struct dsa_switch_driver {
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 9750dd6f8c17..bdae70f40a8a 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1001,6 +1001,30 @@ void dsa_cpu_port_ethtool_init(struct ethtool_ops *ops)
ops->get_strings = dsa_cpu_port_get_strings;
 }
 
+static int dsa_slave_get_rxnfc(struct net_device *dev,
+  struct ethtool_rxnfc *nfc, u32 *rule_locs)
+{
+   struct dsa_slave_priv *p = netdev_priv(dev);
+   struct dsa_switch *ds = p->parent;
+
+   if (!ds->ops->get_rxnfc)
+   return -EOPNOTSUPP;
+
+   return ds->ops->get_rxnfc(ds, p->port, nfc, rule_locs);
+}
+
+static int dsa_slave_set_rxnfc(struct net_device *dev,
+  struct ethtool_rxnfc *nfc)
+{
+   struct dsa_slave_priv *p = netdev_priv(dev);
+   struct dsa_switch *ds = p->parent;
+
+   if (!ds->ops->set_rxnfc)
+   return -EOPNOTSUPP;
+
+   return ds->ops->set_rxnfc(ds, p->port, nfc);
+}
+
 static const struct ethtool_ops dsa_slave_ethtool_ops = {
.get_drvinfo= dsa_slave_get_drvinfo,
.get_regs_len   = dsa_slave_get_regs_len,
@@ -1019,6 +1043,8 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = {
.get_eee= dsa_slave_get_eee,
.get_link_ksettings = dsa_slave_get_link_ksettings,
.set_link_ksettings = dsa_slave_set_link_ksettings,
+   .get_rxnfc  = dsa_slave_get_rxnfc,
+   .set_rxnfc  = dsa_slave_set_rxnfc,
 };
 
 static const struct net_device_ops dsa_slave_netdev_ops = {
-- 
2.9.3

[PATCH net-next v2 3/4] net: dsa: bcm_sf2: Add CFP registers definitions

2017-01-28 Thread Florian Fainelli

Add Compact Field Processor definitions for the Broadcom Starfighter 2
and compatible versions of the switch.

Signed-off-by: Florian Fainelli 
---
 drivers/net/dsa/bcm_sf2_regs.h | 146 +
 1 file changed, 146 insertions(+)

diff --git a/drivers/net/dsa/bcm_sf2_regs.h b/drivers/net/dsa/bcm_sf2_regs.h
index 6b63c00928ba..26052450091e 100644
--- a/drivers/net/dsa/bcm_sf2_regs.h
+++ b/drivers/net/dsa/bcm_sf2_regs.h
@@ -255,4 +255,150 @@ enum bcm_sf2_reg_offs {
 #define CORE_EEE_EN_CTRL   0x24800
 #define CORE_EEE_LPI_INDICATE  0x24810
 
+#define CORE_CFP_ACC   0x28000
+#define  OP_STR_DONE   (1 << 0)
+#define  OP_SEL_SHIFT  1
+#define  OP_SEL_READ   (1 << OP_SEL_SHIFT)
+#define  OP_SEL_WRITE  (2 << OP_SEL_SHIFT)
+#define  OP_SEL_SEARCH (4 << OP_SEL_SHIFT)
+#define  OP_SEL_MASK   (7 << OP_SEL_SHIFT)
+#define  CFP_RAM_CLEAR (1 << 4)
+#define  RAM_SEL_SHIFT 10
+#define  TCAM_SEL  (1 << RAM_SEL_SHIFT)
+#define  ACT_POL_RAM   (2 << RAM_SEL_SHIFT)
+#define  RATE_METER_RAM(4 << RAM_SEL_SHIFT)
+#define  GREEN_STAT_RAM(8 << RAM_SEL_SHIFT)
+#define  YELLOW_STAT_RAM   (16 << RAM_SEL_SHIFT)
+#define  RED_STAT_RAM  (24 << RAM_SEL_SHIFT)
+#define  RAM_SEL_MASK  (0x1f << RAM_SEL_SHIFT)
+#define  TCAM_RESET(1 << 15)
+#define  XCESS_ADDR_SHIFT  16
+#define  XCESS_ADDR_MASK   0xff
+#define  SEARCH_STS(1 << 27)
+#define  RD_STS_SHIFT  28
+#define  RD_STS_TCAM   (1 << RD_STS_SHIFT)
+#define  RD_STS_ACT_POL_RAM(2 << RD_STS_SHIFT)
+#define  RD_STS_RATE_METER_RAM (4 << RD_STS_SHIFT)
+#define  RD_STS_STAT_RAM   (8 << RD_STS_SHIFT)
+
+#define CORE_CFP_RATE_METER_GLOBAL_CTL 0x28010
+
+#define CORE_CFP_DATA_PORT_0   0x28040
+#define CORE_CFP_DATA_PORT(x)  (CORE_CFP_DATA_PORT_0 + \
+   (x) * 0x10)
+
+/* UDF_DATA7 */
+#define L3_FRAMING_SHIFT   24
+#define L3_FRAMING_MASK(0x3 << L3_FRAMING_SHIFT)
+#define IPPROTO_SHIFT  8
+#define IPPROTO_MASK   (0xff << IPPROTO_SHIFT)
+#define IP_FRAG(1 << 7)
+
+/* UDF_DATA0 */
+#define  SLICE_VALID   3
+#define  SLICE_NUM_SHIFT   2
+#define  SLICE_NUM(x)  ((x) << SLICE_NUM_SHIFT)
+
+#define CORE_CFP_MASK_PORT_0   0x280c0
+
+#define CORE_CFP_MASK_PORT(x)  (CORE_CFP_MASK_PORT_0 + \
+   (x) * 0x10)
+
+#define CORE_ACT_POL_DATA0 0x28140
+#define  VLAN_BYP  (1 << 0)
+#define  EAP_BYP   (1 << 1)
+#define  STP_BYP   (1 << 2)
+#define  REASON_CODE_SHIFT 3
+#define  REASON_CODE_MASK  0x3f
+#define  LOOP_BK_EN(1 << 9)
+#define  NEW_TC_SHIFT  10
+#define  NEW_TC_MASK   0x7
+#define  CHANGE_TC (1 << 13)
+#define  DST_MAP_IB_SHIFT  14
+#define  DST_MAP_IB_MASK   0x1ff
+#define  CHANGE_FWRD_MAP_IB_SHIFT  24
+#define  CHANGE_FWRD_MAP_IB_MASK   0x3
+#define  CHANGE_FWRD_MAP_IB_NO_DEST(0 << CHANGE_FWRD_MAP_IB_SHIFT)
+#define  CHANGE_FWRD_MAP_IB_REM_ARL(1 << CHANGE_FWRD_MAP_IB_SHIFT)
+#define  CHANGE_FWRD_MAP_IB_REP_ARL(2 << CHANGE_FWRD_MAP_IB_SHIFT)
+#define  CHANGE_FWRD_MAP_IB_ADD_DST(3 << CHANGE_FWRD_MAP_IB_SHIFT)
+#define  NEW_DSCP_IB_SHIFT 26
+#define  NEW_DSCP_IB_MASK  0x3f
+
+#define CORE_ACT_POL_DATA1 0x28150
+#define  CHANGE_DSCP_IB(1 << 0)
+#define  DST_MAP_OB_SHIFT  1
+#define  DST_MAP_OB_MASK   0x3ff
+#define  CHANGE_FWRD_MAP_OB_SHIT   11
+#define  CHANGE_FWRD_MAP_OB_MASK   0x3
+#define  NEW_DSCP_OB_SHIFT 13
+#define  NEW_DSCP_OB_MASK  0x3f
+#define  CHANGE_DSCP_OB(1 << 19)
+#define  CHAIN_ID_SHIFT20
+#define  CHAIN_ID_MASK 0xff
+#define  CHANGE_COLOR  (1 << 28)
+#define  NEW_COLOR_SHIFT   29
+#define  NEW_COLOR_MASK0x3
+#define  NEW_COLOR_GREEN   (0 << NEW_COLOR_SHIFT)
+#define  NEW_COLOR_YELLOW  (1 << NEW_COLOR_SHIFT)
+#define  NEW_COLOR_RED (2 << NEW_COLOR_SHIFT)
+#define  RED_DEFAULT   (1 << 31)
+
+#define CORE_ACT_POL_DATA2 0x28160
+#define  MAC_LIMIT_BYPASS  (1 << 0)
+#define  CHANGE_TC_O   (1 << 1)
+#define  NEW_TC_O_SHIFT2

[PATCH net-next v2 2/4] net: dsa: bcm_sf2: Configure traffic classes to queue mapping

2017-01-28 Thread Florian Fainelli

By default, all traffic goes to queue 0, re-configure the traffic
classes to quality of service mapping such that priority X maps to queue
X, where X is from 0 through 7.

Signed-off-by: Florian Fainelli 
---
 drivers/net/dsa/bcm_sf2.c  | 9 +
 drivers/net/dsa/bcm_sf2_regs.h | 4 
 2 files changed, 13 insertions(+)

diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index 8eecfd227e06..637072da3acf 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -229,6 +229,7 @@ static int bcm_sf2_port_setup(struct dsa_switch *ds, int 
port,
 {
struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
s8 cpu_port = ds->dst[ds->index].cpu_port;
+   unsigned int i;
u32 reg;
 
/* Clear the memory power down */
@@ -240,6 +241,14 @@ static int bcm_sf2_port_setup(struct dsa_switch *ds, int 
port,
if (priv->brcm_tag_mask & BIT(port))
bcm_sf2_brcm_hdr_setup(priv, port);
 
+   /* Configure Traffic Class to QoS mapping, allow each priority to map
+* to a different queue number
+*/
+   reg = core_readl(priv, CORE_PORT_TC2_QOS_MAP_PORT(port));
+   for (i = 0; i < 8; i++)
+   reg |= i << (PRT_TO_QID_SHIFT * i);
+   core_writel(priv, reg, CORE_PORT_TC2_QOS_MAP_PORT(port));
+
/* Clear the Rx and Tx disable bits and set to no spanning tree */
core_writel(priv, 0, CORE_G_PCTL_PORT(port));
 
diff --git a/drivers/net/dsa/bcm_sf2_regs.h b/drivers/net/dsa/bcm_sf2_regs.h
index 3b33b8010cc8..6b63c00928ba 100644
--- a/drivers/net/dsa/bcm_sf2_regs.h
+++ b/drivers/net/dsa/bcm_sf2_regs.h
@@ -238,6 +238,10 @@ enum bcm_sf2_reg_offs {
 #define  P_TXQ_PSM_VDD(x)  (P_TXQ_PSM_VDD_MASK << \
((x) * P_TXQ_PSM_VDD_SHIFT))
 
+#define CORE_PORT_TC2_QOS_MAP_PORT(x)  (0xc1c0 + ((x) * 0x10))
+#define  PRT_TO_QID_MASK   0x3
+#define  PRT_TO_QID_SHIFT  3
+
 #define CORE_PORT_VLAN_CTL_PORT(x) (0xc400 + ((x) * 0x8))
 #define  PORT_VLAN_CTRL_MASK   0x1ff
 
-- 
2.9.3

[PATCH net-next v2 4/4] net: dsa: bcm_sf2: Add support for ethtool::rxnfc

2017-01-28 Thread Florian Fainelli

Add support for configuring classification rules using the
ethtool::rxnfc API.  This is useful to program the switch's CFP/TCAM to
redirect specific packets to specific ports/queues for instance. For
now, we allow any kind of IPv4 5-tuple matching.

Signed-off-by: Florian Fainelli 
---
 drivers/net/dsa/Makefile  |   1 +
 drivers/net/dsa/bcm_sf2.c |  14 +
 drivers/net/dsa/bcm_sf2.h |  17 ++
 drivers/net/dsa/bcm_sf2_cfp.c | 613 ++
 4 files changed, 645 insertions(+)
 create mode 100644 drivers/net/dsa/bcm_sf2_cfp.c

diff --git a/drivers/net/dsa/Makefile b/drivers/net/dsa/Makefile
index 8346e4f9737a..da9893478e21 100644
--- a/drivers/net/dsa/Makefile
+++ b/drivers/net/dsa/Makefile
@@ -1,5 +1,6 @@
 obj-$(CONFIG_NET_DSA_MV88E6060) += mv88e6060.o
 obj-$(CONFIG_NET_DSA_BCM_SF2)  += bcm_sf2.o
+bcm_sf2-objs   += bcm_sf2_cfp.o
 obj-$(CONFIG_NET_DSA_QCA8K)+= qca8k.o
 
 obj-y  += b53/
diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index 637072da3acf..be282b430c50 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -1045,6 +1045,8 @@ static const struct dsa_switch_ops bcm_sf2_ops = {
.port_fdb_dump  = b53_fdb_dump,
.port_fdb_add   = b53_fdb_add,
.port_fdb_del   = b53_fdb_del,
+   .get_rxnfc  = bcm_sf2_get_rxnfc,
+   .set_rxnfc  = bcm_sf2_set_rxnfc,
 };
 
 struct bcm_sf2_of_data {
@@ -1168,6 +1170,12 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev)
 
spin_lock_init(>indir_lock);
mutex_init(>stats_mutex);
+   mutex_init(>cfp.lock);
+
+   /* CFP rule #0 cannot be used for specific classifications, flag it as
+* permanently used
+*/
+   set_bit(0, priv->cfp.used);
 
bcm_sf2_identify_ports(priv, dn->child);
 
@@ -1197,6 +1205,12 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev)
return ret;
}
 
+   ret = bcm_sf2_cfp_rst(priv);
+   if (ret) {
+   pr_err("failed to reset CFP\n");
+   goto out_mdio;
+   }
+
/* Disable all interrupts and request them */
bcm_sf2_intr_disable(priv);
 
diff --git a/drivers/net/dsa/bcm_sf2.h b/drivers/net/dsa/bcm_sf2.h
index 6e1f74e4d471..7d3030e04f11 100644
--- a/drivers/net/dsa/bcm_sf2.h
+++ b/drivers/net/dsa/bcm_sf2.h
@@ -52,6 +52,13 @@ struct bcm_sf2_port_status {
struct ethtool_eee eee;
 };
 
+struct bcm_sf2_cfp_priv {
+   /* Mutex protecting concurrent accesses to the CFP registers */
+   struct mutex lock;
+   DECLARE_BITMAP(used, CFP_NUM_RULES);
+   unsigned int rules_cnt;
+};
+
 struct bcm_sf2_priv {
/* Base registers, keep those in order with BCM_SF2_REGS_NAME */
void __iomem*core;
@@ -103,6 +110,9 @@ struct bcm_sf2_priv {
 
/* Bitmask of ports needing BRCM tags */
unsigned intbrcm_tag_mask;
+
+   /* CFP rules context */
+   struct bcm_sf2_cfp_priv cfp;
 };
 
 static inline struct bcm_sf2_priv *bcm_sf2_to_priv(struct dsa_switch *ds)
@@ -197,4 +207,11 @@ SF2_IO_MACRO(acb);
 SWITCH_INTR_L2(0);
 SWITCH_INTR_L2(1);
 
+/* RXNFC */
+int bcm_sf2_get_rxnfc(struct dsa_switch *ds, int port,
+ struct ethtool_rxnfc *nfc, u32 *rule_locs);
+int bcm_sf2_set_rxnfc(struct dsa_switch *ds, int port,
+ struct ethtool_rxnfc *nfc);
+int bcm_sf2_cfp_rst(struct bcm_sf2_priv *priv);
+
 #endif /* __BCM_SF2_H */
diff --git a/drivers/net/dsa/bcm_sf2_cfp.c b/drivers/net/dsa/bcm_sf2_cfp.c
new file mode 100644
index ..c71be3e0dc2d
--- /dev/null
+++ b/drivers/net/dsa/bcm_sf2_cfp.c
@@ -0,0 +1,613 @@
+/*
+ * Broadcom Starfighter 2 DSA switch CFP support
+ *
+ * Copyright (C) 2016, Broadcom
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "bcm_sf2.h"
+#include "bcm_sf2_regs.h"
+
+struct cfp_udf_layout {
+   u8 slices[UDF_NUM_SLICES];
+   u32 mask_value;
+
+};
+
+/* UDF slices layout for a TCPv4/UDPv4 specification */
+static const struct cfp_udf_layout udf_tcpip4_layout = {
+   .slices = {
+   /* End of L2, byte offset 12, src IP[0:15] */
+   CFG_UDF_EOL2 | 6,
+   /* End of L2, byte offset 14, src IP[16:31] */
+   CFG_UDF_EOL2 | 7,
+   /* End of L2, byte offset 16, dst IP[0:15] */
+   CFG_UDF_EOL2 | 8,
+   /* End of L2, byte offset 18, dst IP[16:31] */
+   CFG_UDF_EOL2 | 9,
+   /* End of L3, byte offset 0, src port */
+   CFG_UDF_EOL3 | 0,
+   /*

Re: [PATCH V2 3/3] net: bgmac: use PHY subsystem for initializing PHY

2017-01-28 Thread Florian Fainelli



On 01/28/2017 01:08 PM, Rafał Miłecki wrote:
> From: Rafał Miłecki 
> 
> This adds support for using bgmac with PHYs supported by standalone PHY
> drivers. Having any PHY initialization in bgmac is hacky and shouldn't
> be extended but rather removed if anyone has hardware to test it.
> 
> Signed-off-by: Rafał Miłecki 
> ---
>  drivers/net/ethernet/broadcom/bgmac-bcma-mdio.c | 10 ++
>  1 file changed, 10 insertions(+)
> 
> diff --git a/drivers/net/ethernet/broadcom/bgmac-bcma-mdio.c 
> b/drivers/net/ethernet/broadcom/bgmac-bcma-mdio.c
> index 9d9984999dce..6ce80cbcb48e 100644
> --- a/drivers/net/ethernet/broadcom/bgmac-bcma-mdio.c
> +++ b/drivers/net/ethernet/broadcom/bgmac-bcma-mdio.c
> @@ -132,6 +132,10 @@ static void bcma_mdio_phy_init(struct bgmac *bgmac)
>   struct bcma_chipinfo *ci = >bcma.core->bus->chipinfo;
>   u8 i;
>  
> + /* For some legacy hardware we do chipset-based PHY initialization here
> +  * without even detecting PHY ID. It's hacky and should be cleaned as
> +  * soon as someone can test it.
> +  */
>   if (ci->id == BCMA_CHIP_ID_BCM5356) {
>   for (i = 0; i < 5; i++) {
>   bcma_mdio_phy_write(bgmac, i, 0x1f, 0x008b);
> @@ -140,6 +144,7 @@ static void bcma_mdio_phy_init(struct bgmac *bgmac)
>   bcma_mdio_phy_write(bgmac, i, 0x12, 0x2aaa);
>   bcma_mdio_phy_write(bgmac, i, 0x1f, 0x000b);
>   }
> + return;

That part is clearly initializing the built-in Ethernet switch's PHYs,
and so the natural place for that would be to stick these init values
into the Broadcom PHY driver. When b53-srab/b53_common attaches the
switch, it will scan all of these port's builtin PHYs and bind to an
appropriate PHY driver which could have this initialization as part of
the config_init routine for instance. Right now, we are most likely
using the Generic PHY.

Here are the different PHY IDs you should read from these models if you
want to make a subsequent patch that moves this initialization down to
the Broadcom PHY driver:

5356: 0x03625DA0
5357/53572: 0x03625F00
4749: could either be 0x600D85F0 or the same as 53010 (0x600D8760),
unclear where that product came from... Jon, would you know by chance?
-- 
Florian

Re: [BUG/RFC] vhost: net: big endian viring access despite virtio 1

2017-01-28 Thread Michael S. Tsirkin

On Fri, Jan 27, 2017 at 02:37:47PM +0100, Greg Kurz wrote:
> On Fri, 27 Jan 2017 13:24:13 +0100
> Halil Pasic  wrote:
> 
> > On 01/26/2017 08:20 PM, Michael S. Tsirkin wrote:
> > > On Thu, Jan 26, 2017 at 06:39:14PM +0100, Halil Pasic wrote:  
> > >>
> > >> Hi!
> > >>
> > >> Recently I have been investigating some strange migration problems on
> > >> s390x.
> > >>
> > >> It turned out under certain circumstances vhost_net corrupts avail.idx by
> > >> using wrong endianness.  
> > 
> > [..]
> > 
> > >> -8<--  
> > >> >From b26e2bbdc03832a0204ee2b42967a1b49a277dc8 Mon Sep 17 00:00:00 2001  
> > >> From: Halil Pasic 
> > >> Date: Thu, 26 Jan 2017 00:06:15 +0100
> > >> Subject: [PATCH] vhost: remove useless/dangerous reset of is_le
> > >>
> > >> The reset of is_le does no good, but it contributes its fair share to a
> > >> bug in vhost_net, which occurs if we have some oldubufs when stopping and
> > >> setting a fd = -1 as a backend. Instead of doing something convoluted in
> > >> vhost_net, let's just get rid of the reset.
> > >>
> > >> Signed-off-by: Halil Pasic 
> > >> Fixes: commit 2751c9882b94 
> > >> ---
> > >>  drivers/vhost/vhost.c | 4 +---
> > >>  1 file changed, 1 insertion(+), 3 deletions(-)
> > >>
> > >> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> > >> index d643260..08072a2 100644
> > >> --- a/drivers/vhost/vhost.c
> > >> +++ b/drivers/vhost/vhost.c
> > >> @@ -1714,10 +1714,8 @@ int vhost_vq_init_access(struct vhost_virtqueue 
> > >> *vq)
> > >> int r;
> > >> bool is_le = vq->is_le;
> > >>
> > >> -   if (!vq->private_data) {
> > >> -   vhost_reset_is_le(vq);
> > >> +   if (!vq->private_data)
> > >> return 0;
> > >> -   }
> > >>
> > >> vhost_init_is_le(vq);  
> > > 
> > > 
> > > I think you do need to reset it, just maybe within vhost_init_is_le.
> > > 
> > > if (vhost_has_feature(vq, VIRTIO_F_VERSION_1))
> > > vq->is_le = true;
> > >   else
> > >   vhost_reset_is_le(vq);
> > > 
> > >   
> > 
> > That is a very good point! I have overlooked that while the 
> > CONFIG_VHOST_CROSS_ENDIAN_LEGACY variant
> > 
> > static void vhost_init_is_le(struct vhost_virtqueue *vq)
> > {
> > /* Note for legacy virtio: user_be is initialized at reset time
> >  * according to the host endianness. If userspace does not set an
> >  * explicit endianness, the default behavior is native endian, as
> >  * expected by legacy virtio.
> >  */
> > vq->is_le = vhost_has_feature(vq, VIRTIO_F_VERSION_1) || 
> > !vq->user_be;
> > }
> > 
> > is fine the other variant 
> > 
> > static void vhost_init_is_le(struct vhost_virtqueue *vq)
> > {
> > if (vhost_has_feature(vq, VIRTIO_F_VERSION_1))
> > vq->is_le = true;
> > }
> > is a very strange initializer (makes assumptions about the state
> > to be initialized).
> > 
> > I agree, setting native endianness there sounds very reasonable.
> > 
> > I have a question regarding readability. IMHO the relationship
> > of reset_is_le and int_is_le is a bit confusing, and I'm afraid
> > it could become even more confusing with using reset in one of
> > the init_is_le's.
> > 
> > How about we do the following?
> > 
> > static void vhost_init_is_le(struct vhost_virtqueue *vq)
> > {
> > if (vhost_has_feature(vq, VIRTIO_F_VERSION_1))
> > vq->is_le = true;
> > +   else
> > +   vq->is_le = virtio_legacy_is_little_endian();
> > 
> > }
> > 
> > static void vhost_reset_is_le(struct vhost_virtqueue *vq)
> > {
> > -   vq->is_le = virtio_legacy_is_little_endian();
> > +   vhost_init_is_le(vq);
> > }
> > 
> > That way we would have correct endianness both after reset
> > and after init, I think :).
> > 
> 
> Yes, I think this is what we need.
> 
> Cheers.

OK, pls test this patch.

> --
> Greg
> 
> > Thank you very much!
> > 
> > Halil
> > 
> >

[PATCH net-next] net: ethtool: silence kmalloc warning

2017-01-28 Thread Alexei Starovoitov

under memory pressure 'ethtool -S' command may warn:
[ 2374.385195] ethtool: page allocation failure: order:4, mode:0x242c0c0
[ 2374.405573] CPU: 12 PID: 40211 Comm: ethtool Not tainted
[ 2374.423071] Call Trace:
[ 2374.423076]  [] dump_stack+0x4d/0x64
[ 2374.423080]  [] warn_alloc_failed+0xeb/0x150
[ 2374.423082]  [] ? __alloc_pages_direct_compact+0x43/0xf0
[ 2374.423084]  [] __alloc_pages_nodemask+0x4dc/0xbf0
[ 2374.423091]  [] ? cmd_exec+0x722/0xcd0 [mlx5_core]
[ 2374.423095]  [] alloc_pages_current+0x8c/0x110
[ 2374.423097]  [] alloc_kmem_pages+0x19/0x90
[ 2374.423099]  [] kmalloc_order_trace+0x2e/0xe0
[ 2374.423101]  [] __kmalloc+0x204/0x220
[ 2374.423105]  [] dev_ethtool+0xe4e/0x1f80
[ 2374.423106]  [] ? dev_get_by_name_rcu+0x5e/0x80
[ 2374.423108]  [] dev_ioctl+0x156/0x560
[ 2374.423111]  [] ? mem_cgroup_commit_charge+0x78/0x3c0
[ 2374.423117]  [] sock_do_ioctl+0x42/0x50
[ 2374.423119]  [] sock_ioctl+0x1b3/0x250
[ 2374.423121]  [] do_vfs_ioctl+0x92/0x580
[ 2374.423123]  [] ? do_audit_syscall_entry+0x4b/0x70
[ 2374.423124]  [] ? syscall_trace_enter_phase1+0xfc/0x120
[ 2374.423126]  [] SyS_ioctl+0x79/0x90
[ 2374.423127]  [] do_syscall_64+0x50/0xa0
[ 2374.423129]  [] entry_SYSCALL64_slow_path+0x25/0x25

~1160 mlx5 counters ~= order 4 allocation which is unlikely to succeed
under memory pressure.  Since 'get stats' command is not critical
avoid reclaim and warning.
Also convert to safer kmalloc_array.

Signed-off-by: Alexei Starovoitov 
---
Long term this place is a good candidate to use kvmalloc() once it's merged.
---
 net/core/ethtool.c | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 236a21e3c878..be681a06bf3f 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1820,7 +1820,8 @@ static int ethtool_get_strings(struct net_device *dev, 
void __user *useraddr)
 
gstrings.len = ret;
 
-   data = kcalloc(gstrings.len, ETH_GSTRING_LEN, GFP_USER);
+   data = kcalloc(gstrings.len, ETH_GSTRING_LEN,
+  GFP_USER | __GFP_NORETRY | __GFP_NOWARN);
if (!data)
return -ENOMEM;
 
@@ -1918,7 +1919,8 @@ static int ethtool_get_stats(struct net_device *dev, void 
__user *useraddr)
return -EFAULT;
 
stats.n_stats = n_stats;
-   data = kmalloc(n_stats * sizeof(u64), GFP_USER);
+   data = kmalloc_array(n_stats, sizeof(u64),
+GFP_USER | __GFP_NORETRY | __GFP_NOWARN);
if (!data)
return -ENOMEM;
 
@@ -1957,7 +1959,8 @@ static int ethtool_get_phy_stats(struct net_device *dev, 
void __user *useraddr)
return -EFAULT;
 
stats.n_stats = n_stats;
-   data = kmalloc_array(n_stats, sizeof(u64), GFP_USER);
+   data = kmalloc_array(n_stats, sizeof(u64),
+GFP_USER | __GFP_NORETRY | __GFP_NOWARN);
if (!data)
return -ENOMEM;
 
-- 
2.8.0

Re: [PATCH net-next v3 0/4] net: ipv6: Improve user experience with multipath routes

2017-01-28 Thread Roopa Prabhu

On 1/27/17, 3:20 PM, David Ahern wrote:
> This series closes a couple of gaps between IPv4 and IPv6 with respect
> to multipath routes:
>
> 1. IPv4 allows all nexthops of multipath routes to be deleted using just
>the prefix and length; IPv6 only deletes the first nexthop for the
>route if only the prefix and length are given.
>
> 2. IPv4 returns multipath routes encoded in the RTA_MULTIPATH attribute.
>IPv6 returns a series of routes with the same prefix and length - one
>for each nexthop. This happens for both dumps and notifications.
>
> IPv6 does accept RTA_MULTIPATH encoded routes, but installs them as a
> series of routes.
>
> Patch 2 addresses the first item by allowing IPv6 multipath routes to be
> deleted using just the prefix and length. Patch 3 addresses the second
> allowing IPv6 multipath routes to be returned encoded in the RTA_MULTIPATH.
>
> Patch 1 adds the NLM_F_APPEND flag to notifications when the flag is
> present in the request. The lack of this flag was noted testing route
> appends and comparing to IPv4.
>
> Patch 4 prints IPv6 addresses in compressed format when showing route
> replace errors. This was noticed testing REPLACE failures.
>
> The end result for multipath routes:
> 1. Route Add
>- one notification with RTA_MULTIPATH attribute
>
> 2. Route Replace
>- notification for first route and all siblings that have
>  succeeded. This is needed regardless of success of remaining
>  nexthops to maintain add/delete consistency should a failure
>  happens on the second or following nexthop (ie., need to tell
>  userspace that original route has been replaced and then the
>  failure logic deletes all routes inserted thus far).
>  
> 3. Route Delete
>- for multipath route only given nexthops are deleted. This path
>  is hit when DELETE contains RTA_MULTIPATH. All other route deletes,
>  all nexthops are deleted for given prefix and length (and any
>  other specs if given)
>
>- one notification sent per nexthop deleted. This is unavoidable
>  since IPv6 alllows a single nexthop to be deleted within a multipath
>  route
>
> 4. Route Appends
>- IPv6 allows nexthops to be appended to an existing route. In this
>  case one notification is sent per nexthop added

thanks for listing all of these...I think you mentioned this case to me..
but I don't remember now why this notification is
sent per nexthop added. This is an update to an existing multipath route.
so seems like the notification should be a RTM_NEWROUTE with the full 
RTA_MULTIPATH route
(similar to route add)

Same holds for replace, I know the code might be tricky here...but the route 
replace
is also an update to an existing multipath route and hence should be a 
RTM_NEWROUTE
with the full multipath route (RTA_MULTIPATH) that changed (from userspace 
semantics POV)

I don't have a better solution, but with the above still being different, 
wondering
if its worth the risk changing the api for just a few notifications.

>
> Addresses some of the inconsistencies also noted by Roopa at netdev0.1:
> https://www.netdev01.org/docs/prabhu-linux_ipv4_ipv6_inconsistencies_talk_slides.pdf
>
> v3
> - removed the need for a user API to opt-in to change. Requiring an
>   API just shifts the difference from same API with different
>   behavior to different API to achieve equivalent behavior
>
> - route notifications changed to use RTA_MULTIPATH for add and replace
>
> - upated commit messages and cover letter
>
> v2
> - fixed locking in patch 1 as noted by DaveM
> - changed user API for patch 2 to require an rtmsg with RTM_F_ALL_NEXTHOPS
>   set in rtm_flags
> - revamped explanation of patch 2 and cover letter
>
> David Ahern (4):
>   net: ipv6: add NLM_F_APPEND in notifications when applicable
>   net: ipv6: Allow shorthand delete of all nexthops in multipath route
>   net: ipv6: Add support to dump multipath routes via RTA_MULTIPATH
> attribute
>   net: ipv6: Use compressed IPv6 addresses showing route replace error
>
>  include/net/ip6_fib.h |   4 +-
>  include/net/netlink.h |   1 +
>  net/ipv6/ip6_fib.c|  19 +-
>  net/ipv6/route.c  | 163 
> --
>  4 files changed, 165 insertions(+), 22 deletions(-)
>

BUG at net/sctp/socket.c:7425

2017-01-28 Thread Alexander Popov

Hello,

I'm running the syzkaller fuzzer for v4.10-rc4 
(0aa0313f9d576affd7747cc3f179feb097d28990)
and have such a crash in sctp code:

[   38.423932] [ cut here ]
[   38.424298] kernel BUG at net/sctp/socket.c:7425!
[   38.424583] invalid opcode:  [#1] SMP KASAN
[   38.424839] Dumping ftrace buffer:
[   38.425031](ftrace buffer empty)
[   38.425232] Modules linked in: sctp libcrc32c snd_hda_codec_generic 
snd_hda_intel
snd_hda_codec snd_hda_core snd_intel8x0 snd_ens1370 snd_ac97_codec gameport 
snd_rawmidi
snd_hwdep snd_seq_device ac97_bus snd_pcm hid_generic joydev usbmouse snd_timer 
psmouse
usbhid e1000 snd hid parport_pc i2c_piix4 soundcore serio_raw parport 
input_leds pcspkr
floppy evbug mac_hid
[   38.427058] CPU: 0 PID: 1930 Comm: syz-executor12 Not tainted 4.10.0-rc4+ #2
[   38.427457] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
Ubuntu-1.8.2-1ubuntu1 04/01/2014
[   38.427999] task: 88006945ca00 task.stack: 880053e4
[   38.428364] RIP: 0010:sctp_sendmsg+0x29b3/0x3030 [sctp]
[   38.428719] RSP: 0018:880053e478f8 EFLAGS: 00010297
[   38.429062] RAX: 88006945ca00 RBX: 880048d148c0 RCX: 
[   38.429636] RDX:  RSI:  RDI: 88006d022c88
[   38.430051] RBP: 880053e47b70 R08: 0560 R09: 88007ffda680
[   38.430473] R10: 000a R11: 1d400032be05 R12: dc00
[   38.430915] R13: 880048d148c0 R14:  R15: 880059ad9160
[   38.431390] FS:  7f984a645700() GS:88006d00() 
knlGS:
[   38.431979] CS:  0010 DS:  ES:  CR0: 80050033
[   38.432405] CR2: 20005fe0 CR3: 6400a000 CR4: 06f0
[   38.432827] DR0:  DR1:  DR2: 
[   38.433253] DR3:  DR6: fffe0ff0 DR7: 0400
[   38.433765] Call Trace:
[   38.433938]  ? sctp_id2assoc+0x330/0x330 [sctp]
[   38.434245]  ? wake_atomic_t_function+0x2b0/0x2b0
[   38.434545]  inet_sendmsg+0x128/0x3a0
[   38.434758]  ? inet_recvmsg+0x420/0x420
[   38.434983]  sock_sendmsg+0xcf/0x110
[   38.435192]  sock_write_iter+0x222/0x3c0
[   38.435421]  ? sock_sendmsg+0x110/0x110
[   38.435644]  ? iov_iter_init+0xaf/0x1d0
[   38.435867]  __vfs_write+0x3cb/0x640
[   38.436075]  ? do_iter_readv_writev+0x4c0/0x4c0
[   38.436338]  ? apparmor_file_permission+0x27/0x30
[   38.436618]  ? rw_verify_area+0xea/0x2b0
[   38.436853]  vfs_write+0x175/0x4e0
[   38.437053]  SyS_write+0xd8/0x1b0
[   38.437283]  ? SyS_read+0x1b0/0x1b0
[   38.437522]  entry_SYSCALL_64_fastpath+0x1e/0xad
[   38.437820] RIP: 0033:0x44f869
[   38.438013] RSP: 002b:7f984a644b58 EFLAGS: 0212 ORIG_RAX: 
0001
[   38.438464] RAX: ffda RBX: 7f984a645700 RCX: 0044f869
[   38.438886] RDX: 0018 RSI: 20ac4fe8 RDI: 0004
[   38.439305] RBP: 7ffe1d7be490 R08:  R09: 
[   38.439712] R10:  R11: 0212 R12: 
[   38.440145] R13: 7ffe1d7be40f R14: 7f984a6459c0 R15: 
[   38.440563] Code: c7 c7 10 1a 5c a0 e8 4d fb 76 e1 c6 44 24 68 01 e9 a2 f2 
ff ff e8 be
34 e1 e0 8b 9c 24 98 00 00 00 e9 06 fd ff ff e8 ad 34 e1 e0 <0f> 0b e8 a6 34 e1 
e0 4c 8b
4c 24 78 4c 8b 44 24 68 4c 89 f9 48
[   38.441881] RIP: sctp_sendmsg+0x29b3/0x3030 [sctp] RSP: 880053e478f8
[   38.442341] ---[ end trace c704b04c884389c0 ]---
[   38.442634] Kernel panic - not syncing: Fatal exception
[   38.443084] Dumping ftrace buffer:
[   38.443335](ftrace buffer empty)
[   38.443590] Kernel Offset: disabled


Unfortunately, I didn't manage to get a C program reproducing the crash (looks 
like race).
However, I stably hit it on my setup - so I can help fixing the issue.

The crash happens here:
/* Let another process have a go.  Since we are going
 * to sleep anyway.
 */
release_sock(sk);
current_timeo = schedule_timeout(current_timeo);
>   BUG_ON(sk != asoc->base.sk);
lock_sock(sk);

I've added some debugging output and see, that the original value of 
asoc->base.sk is
changed to the address of another struct sock, which appeared in 
sctp_endpoint_init()
shortly before the crash.

Hope for some assistance.
Best regards,
Alexander

Re: [PATCH V2 1/3] net: bgmac: allocate struct bgmac just once & don't copy it

2017-01-28 Thread kbuild test robot

Hi Rafał,

[auto build test ERROR on net-next/master]
[also build test ERROR on v4.10-rc5 next-20170125]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Rafa-Mi-ecki/net-next-use-one-struct-bgmac-add-PHY-support/20170129-062241
config: i386-allmodconfig (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
# save the attached .config to linux build tree
make ARCH=i386 

All error/warnings (new ones prefixed by >>):

   drivers/net/ethernet/broadcom/bgmac.c: In function 'bgmac_alloc':
>> drivers/net/ethernet/broadcom/bgmac.c:1455:12: error: implicit declaration 
>> of function 'devm_alloc_etherdev' [-Werror=implicit-function-declaration]
 net_dev = devm_alloc_etherdev(dev, sizeof(*bgmac));
   ^~~
>> drivers/net/ethernet/broadcom/bgmac.c:1455:10: warning: assignment makes 
>> pointer from integer without a cast [-Wint-conversion]
 net_dev = devm_alloc_etherdev(dev, sizeof(*bgmac));
 ^
   cc1: some warnings being treated as errors

vim +/devm_alloc_etherdev +1455 drivers/net/ethernet/broadcom/bgmac.c

  1449  struct bgmac *bgmac_alloc(struct device *dev)
  1450  {
  1451  struct net_device *net_dev;
  1452  struct bgmac *bgmac;
  1453  
  1454  /* Allocation and references */
> 1455  net_dev = devm_alloc_etherdev(dev, sizeof(*bgmac));
  1456  if (!net_dev)
  1457  return NULL;
  1458  

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip

[PATCH V2 2/3] net: bgmac: drop struct bcma_mdio we don't need anymore

2017-01-28 Thread Rafał Miłecki

From: Rafał Miłecki 

Adding struct bcma_mdio was a workaround for bcma code not having access
to the struct bgmac used in the core code. Now we don't duplicate this
struct we can just use it internally in bcma code.

This simplifies code & allows access to all bgmac driver details from
all places in bcma code.

Signed-off-by: Rafał Miłecki 
---
 drivers/net/ethernet/broadcom/bgmac-bcma-mdio.c | 98 ++---
 drivers/net/ethernet/broadcom/bgmac-bcma.c  |  2 +-
 drivers/net/ethernet/broadcom/bgmac.h   |  2 +-
 3 files changed, 42 insertions(+), 60 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bgmac-bcma-mdio.c 
b/drivers/net/ethernet/broadcom/bgmac-bcma-mdio.c
index 7c19c8e2bf91..9d9984999dce 100644
--- a/drivers/net/ethernet/broadcom/bgmac-bcma-mdio.c
+++ b/drivers/net/ethernet/broadcom/bgmac-bcma-mdio.c
@@ -12,11 +12,6 @@
 #include 
 #include "bgmac.h"
 
-struct bcma_mdio {
-   struct bcma_device *core;
-   u8 phyaddr;
-};
-
 static bool bcma_mdio_wait_value(struct bcma_device *core, u16 reg, u32 mask,
 u32 value, int timeout)
 {
@@ -37,7 +32,7 @@ static bool bcma_mdio_wait_value(struct bcma_device *core, 
u16 reg, u32 mask,
  * PHY ops
  **/
 
-static u16 bcma_mdio_phy_read(struct bcma_mdio *bcma_mdio, u8 phyaddr, u8 reg)
+static u16 bcma_mdio_phy_read(struct bgmac *bgmac, u8 phyaddr, u8 reg)
 {
struct bcma_device *core;
u16 phy_access_addr;
@@ -56,12 +51,12 @@ static u16 bcma_mdio_phy_read(struct bcma_mdio *bcma_mdio, 
u8 phyaddr, u8 reg)
BUILD_BUG_ON(BGMAC_PC_MCT_SHIFT != BCMA_GMAC_CMN_PC_MCT_SHIFT);
BUILD_BUG_ON(BGMAC_PC_MTE != BCMA_GMAC_CMN_PC_MTE);
 
-   if (bcma_mdio->core->id.id == BCMA_CORE_4706_MAC_GBIT) {
-   core = bcma_mdio->core->bus->drv_gmac_cmn.core;
+   if (bgmac->bcma.core->id.id == BCMA_CORE_4706_MAC_GBIT) {
+   core = bgmac->bcma.core->bus->drv_gmac_cmn.core;
phy_access_addr = BCMA_GMAC_CMN_PHY_ACCESS;
phy_ctl_addr = BCMA_GMAC_CMN_PHY_CTL;
} else {
-   core = bcma_mdio->core;
+   core = bgmac->bcma.core;
phy_access_addr = BGMAC_PHY_ACCESS;
phy_ctl_addr = BGMAC_PHY_CNTL;
}
@@ -87,7 +82,7 @@ static u16 bcma_mdio_phy_read(struct bcma_mdio *bcma_mdio, u8 
phyaddr, u8 reg)
 }
 
 /* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipphywr */
-static int bcma_mdio_phy_write(struct bcma_mdio *bcma_mdio, u8 phyaddr, u8 reg,
+static int bcma_mdio_phy_write(struct bgmac *bgmac, u8 phyaddr, u8 reg,
   u16 value)
 {
struct bcma_device *core;
@@ -95,12 +90,12 @@ static int bcma_mdio_phy_write(struct bcma_mdio *bcma_mdio, 
u8 phyaddr, u8 reg,
u16 phy_ctl_addr;
u32 tmp;
 
-   if (bcma_mdio->core->id.id == BCMA_CORE_4706_MAC_GBIT) {
-   core = bcma_mdio->core->bus->drv_gmac_cmn.core;
+   if (bgmac->bcma.core->id.id == BCMA_CORE_4706_MAC_GBIT) {
+   core = bgmac->bcma.core->bus->drv_gmac_cmn.core;
phy_access_addr = BCMA_GMAC_CMN_PHY_ACCESS;
phy_ctl_addr = BCMA_GMAC_CMN_PHY_CTL;
} else {
-   core = bcma_mdio->core;
+   core = bgmac->bcma.core;
phy_access_addr = BGMAC_PHY_ACCESS;
phy_ctl_addr = BGMAC_PHY_CNTL;
}
@@ -110,8 +105,8 @@ static int bcma_mdio_phy_write(struct bcma_mdio *bcma_mdio, 
u8 phyaddr, u8 reg,
tmp |= phyaddr;
bcma_write32(core, phy_ctl_addr, tmp);
 
-   bcma_write32(bcma_mdio->core, BGMAC_INT_STATUS, BGMAC_IS_MDIO);
-   if (bcma_read32(bcma_mdio->core, BGMAC_INT_STATUS) & BGMAC_IS_MDIO)
+   bcma_write32(bgmac->bcma.core, BGMAC_INT_STATUS, BGMAC_IS_MDIO);
+   if (bcma_read32(bgmac->bcma.core, BGMAC_INT_STATUS) & BGMAC_IS_MDIO)
dev_warn(>dev, "Error setting MDIO int\n");
 
tmp = BGMAC_PA_START;
@@ -132,39 +127,39 @@ static int bcma_mdio_phy_write(struct bcma_mdio 
*bcma_mdio, u8 phyaddr, u8 reg,
 }
 
 /* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipphyinit */
-static void bcma_mdio_phy_init(struct bcma_mdio *bcma_mdio)
+static void bcma_mdio_phy_init(struct bgmac *bgmac)
 {
-   struct bcma_chipinfo *ci = _mdio->core->bus->chipinfo;
+   struct bcma_chipinfo *ci = >bcma.core->bus->chipinfo;
u8 i;
 
if (ci->id == BCMA_CHIP_ID_BCM5356) {
for (i = 0; i < 5; i++) {
-   bcma_mdio_phy_write(bcma_mdio, i, 0x1f, 0x008b);
-   bcma_mdio_phy_write(bcma_mdio, i, 0x15, 0x0100);
-   bcma_mdio_phy_write(bcma_mdio, i, 0x1f, 0x000f);
-   bcma_mdio_phy_write(bcma_mdio, i, 0x12, 0x2aaa);
-   bcma_mdio_phy_write(bcma_mdio, i, 0x1f, 0x000b);
+   bcma_mdio_phy_write(bgmac, i, 0x1f,

[PATCH V2 1/3] net: bgmac: allocate struct bgmac just once & don't copy it

2017-01-28 Thread Rafał Miłecki

From: Rafał Miłecki 

So far were were allocating struct bgmac in 3 places: platform code,
bcma code and shared bgmac_enet_probe function. The reason for this was
bgmac_enet_probe:
1) Requiring early-filled struct bgmac
2) Calling alloc_etherdev on its own in order to use netdev_priv later

This solution got few drawbacks:
1) Was duplicating allocating code
2) Required copying early-filled struct
3) Resulted in platform/bcma code having access only to unused struct

Solve this situation by simply extracting some probe code into the new
bgmac_alloc function.

Signed-off-by: Rafał Miłecki 
---
V2: Add bgmac_alloc function instead of hacking alloc_etherdev and netdev_priv

Important: this patch depends on:
[PATCH net-next] net: add devm version of alloc_etherdev_mqs function
and is the first user of devm_alloc_etherdev.
---
 drivers/net/ethernet/broadcom/bgmac-bcma.c |  4 +---
 drivers/net/ethernet/broadcom/bgmac-platform.c |  2 +-
 drivers/net/ethernet/broadcom/bgmac.c  | 24 
 drivers/net/ethernet/broadcom/bgmac.h  |  3 ++-
 4 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bgmac-bcma.c 
b/drivers/net/ethernet/broadcom/bgmac-bcma.c
index 4a4ffc0c4c65..9281abda4026 100644
--- a/drivers/net/ethernet/broadcom/bgmac-bcma.c
+++ b/drivers/net/ethernet/broadcom/bgmac-bcma.c
@@ -117,12 +117,11 @@ static int bgmac_probe(struct bcma_device *core)
u8 *mac;
int err;
 
-   bgmac = kzalloc(sizeof(*bgmac), GFP_KERNEL);
+   bgmac = bgmac_alloc(>dev);
if (!bgmac)
return -ENOMEM;
 
bgmac->bcma.core = core;
-   bgmac->dev = >dev;
bgmac->dma_dev = core->dma_dev;
bgmac->irq = core->irq;
 
@@ -307,7 +306,6 @@ static int bgmac_probe(struct bcma_device *core)
 err1:
bcma_mdio_mii_unregister(bgmac->mii_bus);
 err:
-   kfree(bgmac);
bcma_set_drvdata(core, NULL);
 
return err;
diff --git a/drivers/net/ethernet/broadcom/bgmac-platform.c 
b/drivers/net/ethernet/broadcom/bgmac-platform.c
index 6f736c19872f..805e6ed6c390 100644
--- a/drivers/net/ethernet/broadcom/bgmac-platform.c
+++ b/drivers/net/ethernet/broadcom/bgmac-platform.c
@@ -151,7 +151,7 @@ static int bgmac_probe(struct platform_device *pdev)
struct resource *regs;
const u8 *mac_addr;
 
-   bgmac = devm_kzalloc(>dev, sizeof(*bgmac), GFP_KERNEL);
+   bgmac = bgmac_alloc(>dev);
if (!bgmac)
return -ENOMEM;
 
diff --git a/drivers/net/ethernet/broadcom/bgmac.c 
b/drivers/net/ethernet/broadcom/bgmac.c
index 0e066dc6b8cc..632d4d7b5a5b 100644
--- a/drivers/net/ethernet/broadcom/bgmac.c
+++ b/drivers/net/ethernet/broadcom/bgmac.c
@@ -1446,22 +1446,31 @@ int bgmac_phy_connect_direct(struct bgmac *bgmac)
 }
 EXPORT_SYMBOL_GPL(bgmac_phy_connect_direct);
 
-int bgmac_enet_probe(struct bgmac *info)
+struct bgmac *bgmac_alloc(struct device *dev)
 {
struct net_device *net_dev;
struct bgmac *bgmac;
-   int err;
 
/* Allocation and references */
-   net_dev = alloc_etherdev(sizeof(*bgmac));
+   net_dev = devm_alloc_etherdev(dev, sizeof(*bgmac));
if (!net_dev)
-   return -ENOMEM;
+   return NULL;
 
net_dev->netdev_ops = _netdev_ops;
net_dev->ethtool_ops = _ethtool_ops;
+
bgmac = netdev_priv(net_dev);
-   memcpy(bgmac, info, sizeof(*bgmac));
+   bgmac->dev = dev;
bgmac->net_dev = net_dev;
+
+   return bgmac;
+}
+
+int bgmac_enet_probe(struct bgmac *bgmac)
+{
+   struct net_device *net_dev = bgmac->net_dev;
+   int err;
+
net_dev->irq = bgmac->irq;
SET_NETDEV_DEV(net_dev, bgmac->dev);
 
@@ -1488,7 +1497,7 @@ int bgmac_enet_probe(struct bgmac *info)
err = bgmac_dma_alloc(bgmac);
if (err) {
dev_err(bgmac->dev, "Unable to alloc memory for DMA\n");
-   goto err_netdev_free;
+   goto err_out;
}
 
bgmac->int_mask = BGMAC_IS_ERRMASK | BGMAC_IS_RX | BGMAC_IS_TX_MASK;
@@ -1521,8 +1530,7 @@ int bgmac_enet_probe(struct bgmac *info)
phy_disconnect(net_dev->phydev);
 err_dma_free:
bgmac_dma_free(bgmac);
-err_netdev_free:
-   free_netdev(net_dev);
+err_out:
 
return err;
 }
diff --git a/drivers/net/ethernet/broadcom/bgmac.h 
b/drivers/net/ethernet/broadcom/bgmac.h
index 71f493f2451f..dfebaded3b52 100644
--- a/drivers/net/ethernet/broadcom/bgmac.h
+++ b/drivers/net/ethernet/broadcom/bgmac.h
@@ -517,7 +517,8 @@ struct bgmac {
int (*phy_connect)(struct bgmac *bgmac);
 };
 
-int bgmac_enet_probe(struct bgmac *info);
+struct bgmac *bgmac_alloc(struct device *dev);
+int bgmac_enet_probe(struct bgmac *bgmac);
 void bgmac_enet_remove(struct bgmac *bgmac);
 void bgmac_adjust_link(struct net_device *net_dev);
 int bgmac_phy_connect_direct(struct bgmac *bgmac);
-- 
2.11.0

[PATCH V2 0/3] net-next: use one struct bgmac & add PHY support

2017-01-28 Thread Rafał Miłecki

From: Rafał Miłecki 

This patchset adds support for initializing PHY using PHY subsystem.
It's required e.g. for wireless access point devices that use bgmac
supported Ethernet device connected to some external PHY.

Implementing this required accessing phydev in bcma specific code which
wasn't possible with core code allocating struct bgmac on its own. This
is why I needed to modify alloc_etherdev usage first.

Rafał Miłecki (3):
  net: bgmac: allocate struct bgmac just once & don't copy it
  net: bgmac: drop struct bcma_mdio we don't need anymore
  net: bgmac: use PHY subsystem for initializing PHY

 drivers/net/ethernet/broadcom/bgmac-bcma-mdio.c | 108 +++-
 drivers/net/ethernet/broadcom/bgmac-bcma.c  |   6 +-
 drivers/net/ethernet/broadcom/bgmac-platform.c  |   2 +-
 drivers/net/ethernet/broadcom/bgmac.c   |  24 --
 drivers/net/ethernet/broadcom/bgmac.h   |   5 +-
 5 files changed, 72 insertions(+), 73 deletions(-)

-- 
2.11.0

[PATCH V2 3/3] net: bgmac: use PHY subsystem for initializing PHY

2017-01-28 Thread Rafał Miłecki

From: Rafał Miłecki 

This adds support for using bgmac with PHYs supported by standalone PHY
drivers. Having any PHY initialization in bgmac is hacky and shouldn't
be extended but rather removed if anyone has hardware to test it.

Signed-off-by: Rafał Miłecki 
---
 drivers/net/ethernet/broadcom/bgmac-bcma-mdio.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/bgmac-bcma-mdio.c 
b/drivers/net/ethernet/broadcom/bgmac-bcma-mdio.c
index 9d9984999dce..6ce80cbcb48e 100644
--- a/drivers/net/ethernet/broadcom/bgmac-bcma-mdio.c
+++ b/drivers/net/ethernet/broadcom/bgmac-bcma-mdio.c
@@ -132,6 +132,10 @@ static void bcma_mdio_phy_init(struct bgmac *bgmac)
struct bcma_chipinfo *ci = >bcma.core->bus->chipinfo;
u8 i;
 
+   /* For some legacy hardware we do chipset-based PHY initialization here
+* without even detecting PHY ID. It's hacky and should be cleaned as
+* soon as someone can test it.
+*/
if (ci->id == BCMA_CHIP_ID_BCM5356) {
for (i = 0; i < 5; i++) {
bcma_mdio_phy_write(bgmac, i, 0x1f, 0x008b);
@@ -140,6 +144,7 @@ static void bcma_mdio_phy_init(struct bgmac *bgmac)
bcma_mdio_phy_write(bgmac, i, 0x12, 0x2aaa);
bcma_mdio_phy_write(bgmac, i, 0x1f, 0x000b);
}
+   return;
}
if ((ci->id == BCMA_CHIP_ID_BCM5357 && ci->pkg != 10) ||
(ci->id == BCMA_CHIP_ID_BCM4749 && ci->pkg != 10) ||
@@ -161,7 +166,12 @@ static void bcma_mdio_phy_init(struct bgmac *bgmac)
bcma_mdio_phy_write(bgmac, i, 0x17, 0x9273);
bcma_mdio_phy_write(bgmac, i, 0x1f, 0x000b);
}
+   return;
}
+
+   /* For all other hw do initialization using PHY subsystem. */
+   if (bgmac->net_dev && bgmac->net_dev->phydev)
+   phy_init_hw(bgmac->net_dev->phydev);
 }
 
 /* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipphyreset */
-- 
2.11.0

[PATCH net] net: ethtool: add support for 2500BaseT and 5000BaseT link modes

2017-01-28 Thread Pavel Belous

This patch introduce support for 2500BaseT and 5000BaseT link modes.
These modes are included in the new IEEE 802.3bz standard.

Signed-off-by: Pavel Belous 
---
 include/uapi/linux/ethtool.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index f0db778..3dc91a4 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -1384,6 +1384,8 @@ enum ethtool_link_mode_bit_indices {
ETHTOOL_LINK_MODE_1baseLR_Full_BIT  = 44,
ETHTOOL_LINK_MODE_1baseLRM_Full_BIT = 45,
ETHTOOL_LINK_MODE_1baseER_Full_BIT  = 46,
+   ETHTOOL_LINK_MODE_2500baseT_Full_BIT= 47,
+   ETHTOOL_LINK_MODE_5000baseT_Full_BIT= 48,
 
 
/* Last allowed bit for __ETHTOOL_LINK_MODE_LEGACY_MASK is bit
@@ -1393,7 +1395,7 @@ enum ethtool_link_mode_bit_indices {
 */
 
__ETHTOOL_LINK_MODE_LAST
- = ETHTOOL_LINK_MODE_1baseER_Full_BIT,
+ = ETHTOOL_LINK_MODE_5000baseT_Full_BIT,
 };
 
 #define __ETHTOOL_LINK_MODE_LEGACY_MASK(base_name) \
-- 
2.7.4

Re: [PATCH net-next 1/4] mlx5: Make building eswitch configurable

2017-01-28 Thread Alexei Starovoitov


On 1/28/17 3:20 AM, Saeed Mahameed wrote:

On Sat, Jan 28, 2017 at 1:23 AM, Alexei Starovoitov  wrote:

On 1/27/17 1:15 PM, Saeed Mahameed wrote:


It is only mandatory for configurations that needs eswitch, where the
driver has no way to know about them, for a good old bare metal box,
eswitch is not needed.

we can do some work to strip the l2 table logic - needed for PFs to
work on multi-host - out of eswitch but again that would further
complicate the driver code since eswitch will still need to update l2
tables for VFs.



Saeed,
for multi-host setups every host in that multi-host doesn't
actually see the eswitch, no? Otherwise broken driver on one machine
can affect the other hosts in the same bundle? Please double check,


each host (PF) has its own eswitch, and each eswitch lives in its own
"steering-space"
  and it can't affect others.


since this is absolutely critical HW requirement.



The only shared HW resources between hosts (PFs) is the simple l2 table,
and the only thing a host can ask from the l2 talbe (FW) is: "forward
UC MAC to me", and it is the responsibility of the the driver eswitch
to do so.

the l2 table is created and managed by FW, SW eswitch can only request
from FW, and the FW is trusted.


ok. clear. thanks for explaining.
Could you describe the sequence of function calls within mlx5
that does the assignment of uc mac for PF ?
since I'm missing where eswitch is involved.
I can see:
mlx5e_nic_enable | mlx5e_set_mac
  queue_work(priv->wq, >set_rx_mode_work);
mlx5e_set_rx_mode_work
  mlx5e_apply_netdev_addr
mlx5e_add_l2_flow_rule

Re: [PATCH net-next v2 1/4] net: dsa: Add plumbing for port mirroring

2017-01-28 Thread Florian Fainelli

Le 01/28/17 à 01:14, Jiri Pirko a écrit :
> Sat, Jan 28, 2017 at 02:25:25AM CET, f.faine...@gmail.com wrote:
>> Add necessary plumbing at the slave network device level to have switch
>> drivers implement ndo_setup_tc() and most particularly the cls_matchall
>> classifier. We add support for two switch operations:
>>
>> port_add_mirror and port_del_mirror() which configure, on a per-port
>> basis the mirror parameters requested from the cls_matchall classifier.
>>
>> Code is largely borrowed from the Mellanox Spectrum switch driver.
>>
>> Signed-off-by: Florian Fainelli 
>> ---
> 
> [...]
> 
> 
>> +/*
>> + * Mirroring TC entry
>> + */
>> +struct dsa_mall_mirror_tc_entry {
>> +u8 to_local_port;
>> +bool ingress;
>> +};
>> +
>> +/*
>> + * TC matchall entry
>> + */
> 
> Why are you using multiline comment format for single line comments?

There are precedents in that file, but I will remove it.

> 
> 
>> +struct dsa_mall_tc_entry {
>> +struct list_head list;
>> +unsigned long cookie;
>> +enum dsa_port_mall_action_type type;
>> +union {
>> +struct dsa_mall_mirror_tc_entry mirror;
>> +};
>> +};
>> +
>> +
>> struct dsa_port {
>>  struct net_device   *netdev;
>>  struct device_node  *dn;
>> @@ -370,6 +397,15 @@ struct dsa_switch_ops {
>>  int (*port_mdb_dump)(struct dsa_switch *ds, int port,
>>   struct switchdev_obj_port_mdb *mdb,
>>   int (*cb)(struct switchdev_obj *obj));
>> +
>> +/*
>> + * TC integration
>> + */
>> +int (*port_mirror_add)(struct dsa_switch *ds, int port,
>> +   struct dsa_mall_mirror_tc_entry *mirror,
>> +   bool ingress);
>> +void(*port_mirror_del)(struct dsa_switch *ds, int port,
>> +   struct dsa_mall_mirror_tc_entry *mirror);
>> };
> 
> [...]
> 
> 
>> +static int dsa_slave_add_cls_matchall(struct net_device *dev,
>> +  __be16 protocol,
>> +  struct tc_cls_matchall_offload *cls,
>> +  bool ingress)
>> +{
>> +struct dsa_slave_priv *p = netdev_priv(dev);
>> +struct dsa_mall_tc_entry *mall_tc_entry;
>> +struct dsa_switch *ds = p->parent;
>> +struct net *net = dev_net(dev);
>> +struct dsa_slave_priv *to_p;
>> +struct net_device *to_dev;
>> +const struct tc_action *a;
>> +int err = -EOPNOTSUPP;
>> +LIST_HEAD(actions);
>> +int ifindex;
>> +
>> +if (!ds->ops->port_mirror_add)
>> +return err;
>> +
>> +if (!tc_single_action(cls->exts)) {
>> +netdev_err(dev, "only singular actions are supported\n");
> 
> Why you note the user in this case, but in case he tries to add
> non-supported action you don't note him?

Will remove that message.

> 
> 
>> +return err;
>> +}
>> +
>> +mall_tc_entry = kzalloc(sizeof(*mall_tc_entry), GFP_KERNEL);
>> +if (!mall_tc_entry)
>> +return -ENOMEM;
>> +mall_tc_entry->cookie = cls->cookie;
> 
> Hmm, I believe that this allocation and initialization should go into
> the "is_mirred if". You can do the checks in advance. That would also
> make the error path simplier.

Yes good point, seems like you may want to do the same in mlxsw since
that part of the code was loosely based on that too.

Thanks Jiri!
-- 
Florian

Re: [PATCH net-next 0/4] mlx5: Create build configuration options

2017-01-28 Thread Tom Herbert

On Sat, Jan 28, 2017 at 3:38 AM, Saeed Mahameed
 wrote:
> On Fri, Jan 27, 2017 at 8:13 PM, Tom Herbert  wrote:
>> On Fri, Jan 27, 2017 at 9:58 AM, Saeed Mahameed
>>  wrote:
>>> On Fri, Jan 27, 2017 at 1:32 AM, Tom Herbert  wrote:
 This patchset creates configuration options for sriov, vxlan, eswitch,
 and tc features in the mlx5 driver. The purpose of this is to allow not
 building these features. These features are optional advanced features
 that are not required for a core Ethernet driver. A user can disable
 these features which resuces the amount of code in the driver. Disabling
 these features (and DCB) reduces the size of mlx5_core.o by about 16%.
 This is also can reduce the complexity of backport and rebases since
 user would no longer need to worry about dependencies with the rest of
 the kernel that features which might not be of any interest to a user
 may bring in.

 Tested: Build and ran the driver with all features enabled (the default)
 and with none enabled (including DCB). Did not see any issues. I did
 not explicity test operation of ayy of features in the list.

>>>
>>> Basically I am not against this kind of change, infact i am with it,
>>> although I would have done some restructuring in the driver before i
>>> did such change ;), filling the code with ifdefs is not a neat thing.
>>>
>> If you wish, please take this as an RFC and feel free to structure the
>> code the right way. I think the intent is clear enough and looks like
>> davem isn't going to allow the directory restructuring so something
>> like this seems to be the best course of action now.
>>
>
> Right.
>
>>> I agree this will simplify backporting and provide some kind of
>>> feature separation inside the driver.
>>> But this will also increase the testing matrix we need to cover and
>>> increase the likelihood of kbuild breaks by an order of magnitude.
>>>
>> The testing matrix already exploded with the proliferation of
>> supported features. If anything this reduces the test matrix problem.
>> For instance, if we make a change to the core driver and functionality
>> properly isolated there is a much better chance that this won't affect
>> peripheral functionality and vice versa. It is just not feasible for
>> us to test every combination of NIC features for every change being
>> made.
>>
>
> Yes for isolated features, but for base functionality, we need to test
> it with all new device specific kconfig combinations on every patch!

Sorry, but that is the price you need to pay for a feature rich device.

On the subject of testing, I don't really see any indication in these
patches on how patches are being tested. Also, there are patches that
fix things without any mention of how to repro the problems. It is
critical that we know IPv6 is tested as much or more than IPv4 (just
last week with hit yet another IPv6-only issue in an another upstream
driver that should have been caught with a simple load test-- this
really is not acceptable any more!). Please add a description of how
patches were tested to commit logs.

Tom

> since a misplaced code inside or outside the correct ifdef
> can easily go unnoticed and break functionality.
>
>>> One more thing, do we really need a device specific flag per feature
>>> per vendor per device?  can't we just use the same kconfig flag for
>>> all drivers and if there is a more generic system wide flag that
>>> covers the same feature
>>> can't we just use it, for instance instead of
>>> CONFIG__SRIOV why not use already existing CONFIG_PCI_IOV
>>> for all drivers ?
>>>
>> That sounds good to me. We already have CONFIG_RFS_ACCEL and others
>> that do that.
>>
>> Tom
>>
>>> Saeed.
>>>


 Tom Herbert (4):
   mlx5: Make building eswitch configurable
   mlx5: Make building SR-IOV configurable
   mlx5: Make building tc hardware offload configurable
   mlx5: Make building vxlan hardware offload configurable

  drivers/net/ethernet/mellanox/mlx5/core/Kconfig   |  35 ++
  drivers/net/ethernet/mellanox/mlx5/core/Makefile  |  16 ++-
  drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 129 
 --
  drivers/net/ethernet/mellanox/mlx5/core/en_tc.c   |  39 +--
  drivers/net/ethernet/mellanox/mlx5/core/eq.c  |   4 +-
  drivers/net/ethernet/mellanox/mlx5/core/lag.c |   2 +
  drivers/net/ethernet/mellanox/mlx5/core/main.c|  32 --
  drivers/net/ethernet/mellanox/mlx5/core/sriov.c   |   6 +-
  8 files changed, 205 insertions(+), 58 deletions(-)

 --
 2.9.3

Re: cls_matchall and port mirroring questions

2017-01-28 Thread Florian Fainelli

Le 01/28/17 à 07:55, Jiri Pirko a écrit :
> Sat, Jan 28, 2017 at 03:19:04PM CET, ido...@mellanox.com wrote:
>> On Thu, Jan 26, 2017 at 07:00:50PM -0800, Florian Fainelli wrote:
>>> Hi,
>>>
>>> As I am adding support for cls_matchall in the b53/bcm_sf2 drivers, I
>>> was looking into several, yet unrelated things:
>>>
>>> - mlxsw does not seem to specify whether the port used for capture
>>> remains usable, or blocks non-mirror traffic ingressing/egressing it, do
>>> we want a control knob for that? If not, what is a sensible default,
>>> block all non capture traffic?
>>
>> Doesn't make sense to me to add such a default. It's up to the user.
>>
>>> - do we have an updated man page for tc-matchall.8 that features how to
>>> use the statistical sampler too? b53 switches have a divider that allows
>>> us to select how many frames we want to receive (10 bit value).
>>
>> https://git.kernel.org/cgit/linux/kernel/git/davem/net-next.git/commit/?id=5c5670fae43027778e84b9d9ff3b9d91a10a8131
>>
>> Yotam (Cced) already commented that he intends to send iproute patches.
>>
>>> - b53 supports capture against a particular MAC SA or DA (or both), do
>>> we want to be able to control that somehow?
>>
>> Can't you just use flower (for example) instead of matchall in that
>> case?
> 
> Definitelly. No extensions to matchall, as otherwise it could not be
> called like that anylonger:)

OK, that make sense. I thought of matchall as action that would solely
deal with all port mirroring features, but using flower on top of an
ingress or egress qdisc + src/dst MAC would definitively and I guess
make senses too.

> 
> 
>>
>>> What about Marvell switches, what can they do?
>>
>> No idea :)
>>
>>> -  a fair amount of code dealing with the cls_matchall mirroring entry
>>> is not switch driver specific, in fact, the only things that are switch
>>> driver specific are:
>>> - list pointer where to store this entry (typically in the private
>>> network device context)
>>> - operation to check whether the device belongs to us (identical
>>> netdev_ops)
>>> - retrieval of the destination port number (to_port) which is also
>>> typically available in network device private context
>>>
>>> Do we want to move a fair amount of code into switchdev, treat
>>> cls_matchall entries as a specific switchdev object, and have drivers
>>> take over at the same level that mlxsw_sp_port_add_cls_matchall_mirror()
>>> currently starts?
>>
>> I prefer the current way in which we re-use as many as possible core
>> APIs without adding switchdev-specific code. I don't have a concrete
>> argument against your proposal, though.
> 
> This (tc-offload) is completely unrelated to switchdev. So it would make
> no sense.

That is true, there is a bit of code (allocation of tc entries, parsing
of actions list etc.) that could be made generic; and one could argue
that each tc action programming request could look like some kind of
special switchdev object. I am totally fine keeping things the way they
are though.

Thanks!
-- 
Florian

Re: cls_matchall and port mirroring questions

2017-01-28 Thread Jiri Pirko

Sat, Jan 28, 2017 at 03:19:04PM CET, ido...@mellanox.com wrote:
>On Thu, Jan 26, 2017 at 07:00:50PM -0800, Florian Fainelli wrote:
>> Hi,
>> 
>> As I am adding support for cls_matchall in the b53/bcm_sf2 drivers, I
>> was looking into several, yet unrelated things:
>> 
>> - mlxsw does not seem to specify whether the port used for capture
>> remains usable, or blocks non-mirror traffic ingressing/egressing it, do
>> we want a control knob for that? If not, what is a sensible default,
>> block all non capture traffic?
>
>Doesn't make sense to me to add such a default. It's up to the user.
>
>> - do we have an updated man page for tc-matchall.8 that features how to
>> use the statistical sampler too? b53 switches have a divider that allows
>> us to select how many frames we want to receive (10 bit value).
>
>https://git.kernel.org/cgit/linux/kernel/git/davem/net-next.git/commit/?id=5c5670fae43027778e84b9d9ff3b9d91a10a8131
>
>Yotam (Cced) already commented that he intends to send iproute patches.
>
>> - b53 supports capture against a particular MAC SA or DA (or both), do
>> we want to be able to control that somehow?
>
>Can't you just use flower (for example) instead of matchall in that
>case?

Definitelly. No extensions to matchall, as otherwise it could not be
called like that anylonger:)


>
>> What about Marvell switches, what can they do?
>
>No idea :)
>
>> -  a fair amount of code dealing with the cls_matchall mirroring entry
>> is not switch driver specific, in fact, the only things that are switch
>> driver specific are:
>>  - list pointer where to store this entry (typically in the private
>> network device context)
>>  - operation to check whether the device belongs to us (identical
>> netdev_ops)
>>  - retrieval of the destination port number (to_port) which is also
>> typically available in network device private context
>> 
>> Do we want to move a fair amount of code into switchdev, treat
>> cls_matchall entries as a specific switchdev object, and have drivers
>> take over at the same level that mlxsw_sp_port_add_cls_matchall_mirror()
>> currently starts?
>
>I prefer the current way in which we re-use as many as possible core
>APIs without adding switchdev-specific code. I don't have a concrete
>argument against your proposal, though.

This (tc-offload) is completely unrelated to switchdev. So it would make
no sense.

Re: [PATCH net-next 4/4] net: dsa: bcm_sf2: Add support for ethtool::rxnfc

2017-01-28 Thread kbuild test robot

Hi Florian,

[auto build test ERROR on net-next/master]

url:
https://github.com/0day-ci/linux/commits/Florian-Fainelli/net-dsa-bcm_sf2-CFP-support/20170128-052440
config: arm-multi_v7_defconfig (attached as .config)
compiler: arm-linux-gnueabi-gcc (Debian 6.1.1-9) 6.1.1 20160705
reproduce:
wget 
https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross
 -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
make.cross ARCH=arm 

All errors (new ones prefixed by >>):

>> ERROR: "bcm_sf2_set_rxnfc" [drivers/net/dsa/bcm_sf2.ko] undefined!
>> ERROR: "bcm_sf2_get_rxnfc" [drivers/net/dsa/bcm_sf2.ko] undefined!
>> ERROR: "bcm_sf2_cfp_rst" [drivers/net/dsa/bcm_sf2.ko] undefined!

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip

Re: cls_matchall and port mirroring questions

2017-01-28 Thread Ido Schimmel

On Thu, Jan 26, 2017 at 07:00:50PM -0800, Florian Fainelli wrote:
> Hi,
> 
> As I am adding support for cls_matchall in the b53/bcm_sf2 drivers, I
> was looking into several, yet unrelated things:
> 
> - mlxsw does not seem to specify whether the port used for capture
> remains usable, or blocks non-mirror traffic ingressing/egressing it, do
> we want a control knob for that? If not, what is a sensible default,
> block all non capture traffic?

Doesn't make sense to me to add such a default. It's up to the user.

> - do we have an updated man page for tc-matchall.8 that features how to
> use the statistical sampler too? b53 switches have a divider that allows
> us to select how many frames we want to receive (10 bit value).

https://git.kernel.org/cgit/linux/kernel/git/davem/net-next.git/commit/?id=5c5670fae43027778e84b9d9ff3b9d91a10a8131

Yotam (Cced) already commented that he intends to send iproute patches.

> - b53 supports capture against a particular MAC SA or DA (or both), do
> we want to be able to control that somehow?

Can't you just use flower (for example) instead of matchall in that
case?

> What about Marvell switches, what can they do?

No idea :)

> -  a fair amount of code dealing with the cls_matchall mirroring entry
> is not switch driver specific, in fact, the only things that are switch
> driver specific are:
>   - list pointer where to store this entry (typically in the private
> network device context)
>   - operation to check whether the device belongs to us (identical
> netdev_ops)
>   - retrieval of the destination port number (to_port) which is also
> typically available in network device private context
> 
> Do we want to move a fair amount of code into switchdev, treat
> cls_matchall entries as a specific switchdev object, and have drivers
> take over at the same level that mlxsw_sp_port_add_cls_matchall_mirror()
> currently starts?

I prefer the current way in which we re-use as many as possible core
APIs without adding switchdev-specific code. I don't have a concrete
argument against your proposal, though.

Re: [PATCH v2] bpf: Restrict cgroup bpf hooks to the init netns

2017-01-28 Thread Tejun Heo

Hello, Eric.

On Thu, Jan 26, 2017 at 01:45:07PM +1300, Eric W. Biederman wrote:
> > Eric, does this sound okay to you?  You're the authority on exposing
> > things like namespace ids to users.
> 
> *Boggle*  Things that run across all network namespaces break any kind
>  of sense I have about thinking about them.
> 
> Running across more than one network namespace by default seems very
> broken to me.

Can you explain why that is?  Other namespaces don't behave this way.
For example, a PID namespace doesn't hide the processes at the system
level.  It just gives additional nested names to the namespaced
processes and having objects visible at the system level is very
useful for monitoring and management.  Are there inherent reasons why
network namespace should be very different from other namespaces in
this regard?

Thanks.

-- 
tejun

[PATCHv2 RFC net-next 2/7] net: add dst_pending_confirm flag to skbuff

2017-01-28 Thread Julian Anastasov

Add new skbuff flag to allow protocols to confirm neighbour.
When same struct dst_entry can be used for many different
neighbours we can not use it for pending confirmations.

Add sock_confirm_neigh() helper to confirm the neighbour and
use it for IPv4, IPv6 and VRF before dst_neigh_output.

Signed-off-by: Julian Anastasov 
---
 drivers/net/vrf.c  |  5 -
 include/linux/skbuff.h | 12 
 include/net/sock.h | 14 ++
 net/ipv4/ip_output.c   |  5 -
 net/ipv6/ip6_output.c  |  1 +
 5 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 264fc15..630eafd 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -378,6 +378,7 @@ static int vrf_finish_output6(struct net *net, struct sock 
*sk,
if (unlikely(!neigh))
neigh = __neigh_create(_tbl, nexthop, dst->dev, false);
if (!IS_ERR(neigh)) {
+   sock_confirm_neigh(skb, neigh);
ret = dst_neigh_output(dst, neigh, skb);
rcu_read_unlock_bh();
return ret;
@@ -574,8 +575,10 @@ static int vrf_finish_output(struct net *net, struct sock 
*sk, struct sk_buff *s
neigh = __ipv4_neigh_lookup_noref(dev, nexthop);
if (unlikely(!neigh))
neigh = __neigh_create(_tbl, , dev, false);
-   if (!IS_ERR(neigh))
+   if (!IS_ERR(neigh)) {
+   sock_confirm_neigh(skb, neigh);
ret = dst_neigh_output(dst, neigh, skb);
+   }
 
rcu_read_unlock_bh();
 err:
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 6f63b7e..3ac3c3b 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -613,6 +613,7 @@ static inline bool skb_mstamp_after(const struct skb_mstamp 
*t1,
  * @wifi_acked_valid: wifi_acked was set
  * @wifi_acked: whether frame was acked on wifi or not
  * @no_fcs:  Request NIC to treat last 4 bytes as Ethernet FCS
+ * @dst_pending_confirm: need to confirm neighbour
   *@napi_id: id of the NAPI struct this skb came from
  * @secmark: security marking
  * @mark: Generic packet mark
@@ -743,6 +744,7 @@ struct sk_buff {
__u8csum_level:2;
__u8csum_bad:1;
 
+   __u8dst_pending_confirm:1;
 #ifdef CONFIG_IPV6_NDISC_NODETYPE
__u8ndisc_nodetype:2;
 #endif
@@ -3694,6 +3696,16 @@ static inline bool skb_rx_queue_recorded(const struct 
sk_buff *skb)
return skb->queue_mapping != 0;
 }
 
+static inline void skb_set_dst_pending_confirm(struct sk_buff *skb, u32 val)
+{
+   skb->dst_pending_confirm = val;
+}
+
+static inline bool skb_get_dst_pending_confirm(const struct sk_buff *skb)
+{
+   return skb->dst_pending_confirm != 0;
+}
+
 static inline struct sec_path *skb_sec_path(struct sk_buff *skb)
 {
 #ifdef CONFIG_XFRM
diff --git a/include/net/sock.h b/include/net/sock.h
index e113786..1bc821e 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1821,6 +1821,20 @@ static inline void sk_dst_confirm(struct sock *sk)
sk->sk_dst_pending_confirm = 1;
 }
 
+static inline void sock_confirm_neigh(struct sk_buff *skb, struct neighbour *n)
+{
+   if (skb_get_dst_pending_confirm(skb)) {
+   struct sock *sk = skb->sk;
+   unsigned long now = jiffies;
+
+   /* avoid dirtying neighbour */
+   if (n->confirmed != now)
+   n->confirmed = now;
+   if (sk && sk->sk_dst_pending_confirm)
+   sk->sk_dst_pending_confirm = 0;
+   }
+}
+
 bool sk_mc_loop(struct sock *sk);
 
 static inline bool sk_can_gso(const struct sock *sk)
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index fac275c4..27f1db7 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -222,7 +222,10 @@ static int ip_finish_output2(struct net *net, struct sock 
*sk, struct sk_buff *s
if (unlikely(!neigh))
neigh = __neigh_create(_tbl, , dev, false);
if (!IS_ERR(neigh)) {
-   int res = dst_neigh_output(dst, neigh, skb);
+   int res;
+
+   sock_confirm_neigh(skb, neigh);
+   res = dst_neigh_output(dst, neigh, skb);
 
rcu_read_unlock_bh();
return res;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 38122d0..7d90cab 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -119,6 +119,7 @@ static int ip6_finish_output2(struct net *net, struct sock 
*sk, struct sk_buff *
if (unlikely(!neigh))
neigh = __neigh_create(_tbl, nexthop, dst->dev, false);
if (!IS_ERR(neigh)) {
+   sock_confirm_neigh(skb, neigh);
ret = dst_neigh_output(dst, neigh, skb);
rcu_read_unlock_bh();
return ret;
-- 
1.9.3

[PATCHv2 RFC net-next 3/7] sctp: add dst_pending_confirm flag

2017-01-28 Thread Julian Anastasov

Add new transport flag to allow sockets to confirm neighbour.
When same struct dst_entry can be used for many different
neighbours we can not use it for pending confirmations.
The flag is propagated from transport to every packet.
It is reset when cached dst is reset.

Reported-by: YueHaibing 
Fixes: 5110effee8fd ("net: Do delayed neigh confirmation.")
Fixes: f2bb4bedf35d ("ipv4: Cache output routes in fib_info nexthops.")
Signed-off-by: Julian Anastasov 
---
 include/net/sctp/sctp.h|  6 ++
 include/net/sctp/structs.h |  4 
 net/sctp/associola.c   |  3 +--
 net/sctp/output.c  | 10 +-
 net/sctp/outqueue.c|  2 +-
 net/sctp/sm_make_chunk.c   |  6 ++
 net/sctp/sm_sideeffect.c   |  2 +-
 net/sctp/socket.c  |  4 ++--
 net/sctp/transport.c   | 17 -
 9 files changed, 38 insertions(+), 16 deletions(-)

diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 3cfd365b..480b65a 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -593,10 +593,8 @@ static inline void sctp_v4_map_v6(union sctp_addr *addr)
  */
 static inline struct dst_entry *sctp_transport_dst_check(struct sctp_transport 
*t)
 {
-   if (t->dst && !dst_check(t->dst, t->dst_cookie)) {
-   dst_release(t->dst);
-   t->dst = NULL;
-   }
+   if (t->dst && !dst_check(t->dst, t->dst_cookie))
+   sctp_transport_dst_release(t);
 
return t->dst;
 }
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 231fa9ac..6a68504 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -804,6 +804,8 @@ struct sctp_transport {
 
__u32 burst_limited;/* Holds old cwnd when max.burst is applied */
 
+   __u32 dst_pending_confirm;  /* need to confirm neighbour */
+
/* Destination */
struct dst_entry *dst;
/* Source address. */
@@ -950,6 +952,8 @@ void sctp_transport_route(struct sctp_transport *, union 
sctp_addr *,
 void sctp_transport_reset(struct sctp_transport *);
 void sctp_transport_update_pmtu(struct sock *, struct sctp_transport *, u32);
 void sctp_transport_immediate_rtx(struct sctp_transport *);
+void sctp_transport_dst_release(struct sctp_transport *t);
+void sctp_transport_dst_confirm(struct sctp_transport *t);
 
 
 /* This is the structure we use to queue packets as they come into
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index e50dc6d..2a6835b 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -832,8 +832,7 @@ void sctp_assoc_control_transport(struct sctp_association 
*asoc,
if (transport->state != SCTP_UNCONFIRMED)
transport->state = SCTP_INACTIVE;
else {
-   dst_release(transport->dst);
-   transport->dst = NULL;
+   sctp_transport_dst_release(transport);
ulp_notify = false;
}
 
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 07ab506..814eac0 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -546,6 +546,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t 
gfp)
struct sctp_association *asoc = tp->asoc;
struct sctp_chunk *chunk, *tmp;
int pkt_count, gso = 0;
+   int confirm;
struct dst_entry *dst;
struct sk_buff *head;
struct sctphdr *sh;
@@ -624,7 +625,14 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t 
gfp)
asoc->peer.last_sent_to = tp;
}
head->ignore_df = packet->ipfragok;
-   tp->af_specific->sctp_xmit(head, tp);
+   confirm = tp->dst_pending_confirm;
+   if (confirm)
+   skb_set_dst_pending_confirm(head, 1);
+   /* neighbour should be confirmed on successful transmission or
+* positive error
+*/
+   if (tp->af_specific->sctp_xmit(head, tp) >= 0 && confirm)
+   tp->dst_pending_confirm = 0;
 
 out:
list_for_each_entry_safe(chunk, tmp, >chunk_list, list) {
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 65abe22..db352e5 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -1654,7 +1654,7 @@ static void sctp_check_transmitted(struct sctp_outq *q,
 
if (forward_progress) {
if (transport->dst)
-   dst_confirm(transport->dst);
+   sctp_transport_dst_confirm(transport);
}
}
 
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index ad3445b..c7d3249 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -,8 +,7 @@ static void sctp_asconf_param_success(struct 
sctp_association *asoc,
local_bh_enable();
list_for_each_entry(transport, >peer.transport_addr_list,
transports) {
-

[PATCHv2 RFC net-next 5/7] net: add confirm_neigh method to dst_ops

2017-01-28 Thread Julian Anastasov

Add confirm_neigh method to dst_ops and use it from IPv4 and IPv6
to lookup and confirm the neighbour. Its usage via the new helper
dst_confirm_neigh() should be restricted to MSG_PROBE users for
performance reasons.

Signed-off-by: Julian Anastasov 
---
 include/net/arp.h  | 16 
 include/net/dst.h  |  7 +++
 include/net/dst_ops.h  |  2 ++
 include/net/ndisc.h| 17 +
 net/ipv4/route.c   | 19 +++
 net/ipv6/route.c   | 16 
 net/xfrm/xfrm_policy.c | 16 
 7 files changed, 93 insertions(+)

diff --git a/include/net/arp.h b/include/net/arp.h
index 5e0f891..65619a2 100644
--- a/include/net/arp.h
+++ b/include/net/arp.h
@@ -35,6 +35,22 @@ static inline struct neighbour *__ipv4_neigh_lookup(struct 
net_device *dev, u32
return n;
 }
 
+static inline void __ipv4_confirm_neigh(struct net_device *dev, u32 key)
+{
+   struct neighbour *n;
+
+   rcu_read_lock_bh();
+   n = __ipv4_neigh_lookup_noref(dev, key);
+   if (n) {
+   unsigned long now = jiffies;
+
+   /* avoid dirtying neighbour */
+   if (n->confirmed != now)
+   n->confirmed = now;
+   }
+   rcu_read_unlock_bh();
+}
+
 void arp_init(void);
 int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg);
 void arp_send(int type, int ptype, __be32 dest_ip,
diff --git a/include/net/dst.h b/include/net/dst.h
index 6835d22..3a3b34b 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -477,6 +477,13 @@ static inline struct neighbour *dst_neigh_lookup_skb(const 
struct dst_entry *dst
return IS_ERR(n) ? NULL : n;
 }
 
+static inline void dst_confirm_neigh(const struct dst_entry *dst,
+const void *daddr)
+{
+   if (dst->ops->confirm_neigh)
+   dst->ops->confirm_neigh(dst, daddr);
+}
+
 static inline void dst_link_failure(struct sk_buff *skb)
 {
struct dst_entry *dst = skb_dst(skb);
diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h
index 8a2b66d..13f6d59 100644
--- a/include/net/dst_ops.h
+++ b/include/net/dst_ops.h
@@ -33,6 +33,8 @@ struct dst_ops {
struct neighbour *  (*neigh_lookup)(const struct dst_entry *dst,
struct sk_buff *skb,
const void *daddr);
+   void(*confirm_neigh)(const struct dst_entry *dst,
+const void *daddr);
 
struct kmem_cache   *kmem_cachep;
 
diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index d562a2f..8a02146 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -391,6 +391,23 @@ static inline struct neighbour *__ipv6_neigh_lookup(struct 
net_device *dev, cons
return n;
 }
 
+static inline void __ipv6_confirm_neigh(struct net_device *dev,
+   const void *pkey)
+{
+   struct neighbour *n;
+
+   rcu_read_lock_bh();
+   n = __ipv6_neigh_lookup_noref(dev, pkey);
+   if (n) {
+   unsigned long now = jiffies;
+
+   /* avoid dirtying neighbour */
+   if (n->confirmed != now)
+   n->confirmed = now;
+   }
+   rcu_read_unlock_bh();
+}
+
 int ndisc_init(void);
 int ndisc_late_init(void);
 
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 4b7c231..cb494a5 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -154,6 +154,7 @@ static u32 *ipv4_cow_metrics(struct dst_entry *dst, 
unsigned long old)
 static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
   struct sk_buff *skb,
   const void *daddr);
+static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr);
 
 static struct dst_ops ipv4_dst_ops = {
.family =   AF_INET,
@@ -168,6 +169,7 @@ static struct neighbour *ipv4_neigh_lookup(const struct 
dst_entry *dst,
.redirect = ip_do_redirect,
.local_out =__ip_local_out,
.neigh_lookup = ipv4_neigh_lookup,
+   .confirm_neigh =ipv4_confirm_neigh,
 };
 
 #define ECN_OR_COST(class) TC_PRIO_##class
@@ -461,6 +463,23 @@ static struct neighbour *ipv4_neigh_lookup(const struct 
dst_entry *dst,
return neigh_create(_tbl, pkey, dev);
 }
 
+static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr)
+{
+   struct net_device *dev = dst->dev;
+   const __be32 *pkey = daddr;
+   const struct rtable *rt;
+
+   rt = (const struct rtable *)dst;
+   if (rt->rt_gateway)
+   pkey = (const __be32 *)>rt_gateway;
+   else if (!daddr ||
+(rt->rt_flags &
+ (RTCF_MULTICAST | RTCF_BROADCAST | RTCF_LOCAL)))
+   return;
+
+   __ipv4_confirm_neigh(dev,

[PATCHv2 RFC net-next 1/7] sock: add sk_dst_pending_confirm flag

2017-01-28 Thread Julian Anastasov

Add new sock flag to allow sockets to confirm neighbour.
When same struct dst_entry can be used for many different
neighbours we can not use it for pending confirmations.
As not all call paths lock the socket use full word for
the flag.

Add sk_dst_confirm as replacement for dst_confirm when
called for received packets.

Signed-off-by: Julian Anastasov 
---
 include/net/sock.h | 12 
 net/core/sock.c|  2 ++
 2 files changed, 14 insertions(+)

diff --git a/include/net/sock.h b/include/net/sock.h
index 7144750..e113786 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -240,6 +240,7 @@ struct sock_common {
   *@sk_wq: sock wait queue and async head
   *@sk_rx_dst: receive input route used by early demux
   *@sk_dst_cache: destination cache
+  *@sk_dst_pending_confirm: need to confirm neighbour
   *@sk_policy: flow policy
   *@sk_receive_queue: incoming packets
   *@sk_wmem_alloc: transmit queue bytes committed
@@ -393,6 +394,8 @@ struct sock {
struct sk_buff_head sk_write_queue;
__s32   sk_peek_off;
int sk_write_pending;
+   __u32   sk_dst_pending_confirm;
+   /* Note: 32bit hole on 64bit arches */
longsk_sndtimeo;
struct timer_list   sk_timer;
__u32   sk_priority;
@@ -1764,6 +1767,7 @@ static inline void dst_negative_advice(struct sock *sk)
if (ndst != dst) {
rcu_assign_pointer(sk->sk_dst_cache, ndst);
sk_tx_queue_clear(sk);
+   sk->sk_dst_pending_confirm = 0;
}
}
 }
@@ -1774,6 +1778,7 @@ static inline void dst_negative_advice(struct sock *sk)
struct dst_entry *old_dst;
 
sk_tx_queue_clear(sk);
+   sk->sk_dst_pending_confirm = 0;
/*
 * This can be called while sk is owned by the caller only,
 * with no state that can be checked in a rcu_dereference_check() cond
@@ -1789,6 +1794,7 @@ static inline void dst_negative_advice(struct sock *sk)
struct dst_entry *old_dst;
 
sk_tx_queue_clear(sk);
+   sk->sk_dst_pending_confirm = 0;
old_dst = xchg((__force struct dst_entry **)>sk_dst_cache, dst);
dst_release(old_dst);
 }
@@ -1809,6 +1815,12 @@ static inline void dst_negative_advice(struct sock *sk)
 
 struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie);
 
+static inline void sk_dst_confirm(struct sock *sk)
+{
+   if (!sk->sk_dst_pending_confirm)
+   sk->sk_dst_pending_confirm = 1;
+}
+
 bool sk_mc_loop(struct sock *sk);
 
 static inline bool sk_can_gso(const struct sock *sk)
diff --git a/net/core/sock.c b/net/core/sock.c
index 8b35debf..b743565 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -502,6 +502,7 @@ struct dst_entry *__sk_dst_check(struct sock *sk, u32 
cookie)
 
if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
sk_tx_queue_clear(sk);
+   sk->sk_dst_pending_confirm = 0;
RCU_INIT_POINTER(sk->sk_dst_cache, NULL);
dst_release(dst);
return NULL;
@@ -1519,6 +1520,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const 
gfp_t priority)
af_family_clock_key_strings[newsk->sk_family]);
 
newsk->sk_dst_cache = NULL;
+   newsk->sk_dst_pending_confirm = 0;
newsk->sk_wmem_queued   = 0;
newsk->sk_forward_alloc = 0;
atomic_set(>sk_drops, 0);
-- 
1.9.3

[PATCHv2 RFC net-next 7/7] net: pending_confirm is not used anymore

2017-01-28 Thread Julian Anastasov

When same struct dst_entry can be used for many different
neighbours we can not use it for pending confirmations.
As last step, we can remove the pending_confirm flag.

Reported-by: YueHaibing 
Fixes: 5110effee8fd ("net: Do delayed neigh confirmation.")
Fixes: f2bb4bedf35d ("ipv4: Cache output routes in fib_info nexthops.")
Signed-off-by: Julian Anastasov 
---
 include/net/dst.h | 14 ++
 net/core/dst.c|  1 -
 2 files changed, 2 insertions(+), 13 deletions(-)

diff --git a/include/net/dst.h b/include/net/dst.h
index 3a3b34b..84a1043 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -59,8 +59,6 @@ struct dst_entry {
 #define DST_XFRM_QUEUE 0x0100
 #define DST_METADATA   0x0200
 
-   unsigned short  pending_confirm;
-
short   error;
 
/* A non-zero value of dst->obsolete forces by-hand validation
@@ -78,6 +76,8 @@ struct dst_entry {
 #define DST_OBSOLETE_KILL  -2
unsigned short  header_len; /* more space at head required 
*/
unsigned short  trailer_len;/* space to reserve at tail */
+   unsigned short  __pad3;
+
 #ifdef CONFIG_IP_ROUTE_CLASSID
__u32   tclassid;
 #else
@@ -440,7 +440,6 @@ static inline void dst_rcu_free(struct rcu_head *head)
 
 static inline void dst_confirm(struct dst_entry *dst)
 {
-   dst->pending_confirm = 1;
 }
 
 static inline int dst_neigh_output(struct dst_entry *dst, struct neighbour *n,
@@ -448,15 +447,6 @@ static inline int dst_neigh_output(struct dst_entry *dst, 
struct neighbour *n,
 {
const struct hh_cache *hh;
 
-   if (dst->pending_confirm) {
-   unsigned long now = jiffies;
-
-   dst->pending_confirm = 0;
-   /* avoid dirtying neighbour */
-   if (n->confirmed != now)
-   n->confirmed = now;
-   }
-
hh = >hh;
if ((n->nud_state & NUD_CONNECTED) && hh->hh_len)
return neigh_hh_output(hh, skb);
diff --git a/net/core/dst.c b/net/core/dst.c
index b5cbbe0..960e503 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -190,7 +190,6 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops,
dst->__use = 0;
dst->lastuse = jiffies;
dst->flags = flags;
-   dst->pending_confirm = 0;
dst->next = NULL;
if (!(flags & DST_NOCOUNT))
dst_entries_add(ops, 1);
-- 
1.9.3

[PATCHv2 RFC net-next 6/7] net: use dst_confirm_neigh for UDP, RAW, ICMP, L2TP

2017-01-28 Thread Julian Anastasov

When same struct dst_entry can be used for many different
neighbours we can not use it for pending confirmations.

The datagram protocols can use MSG_CONFIRM to confirm the
neighbour. When used with MSG_PROBE we do not reach the
code where neighbour is confirmed, so we have to do the
same slow lookup by using the dst_confirm_neigh() helper.
When MSG_PROBE is not used, ip_append_data/ip6_append_data
will set the skb flag dst_pending_confirm.

Reported-by: YueHaibing 
Fixes: 5110effee8fd ("net: Do delayed neigh confirmation.")
Fixes: f2bb4bedf35d ("ipv4: Cache output routes in fib_info nexthops.")
Signed-off-by: Julian Anastasov 
---
 net/ipv4/ip_output.c  |  6 ++
 net/ipv4/ping.c   |  3 ++-
 net/ipv4/raw.c|  6 +-
 net/ipv4/udp.c|  3 ++-
 net/ipv6/ip6_output.c |  6 ++
 net/ipv6/raw.c|  6 +-
 net/ipv6/route.c  | 27 ++-
 net/ipv6/udp.c|  3 ++-
 net/l2tp/l2tp_ip6.c   |  3 ++-
 9 files changed, 44 insertions(+), 19 deletions(-)

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 27f1db7..ff0fcaa 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -889,6 +889,9 @@ static inline int ip_ufo_append_data(struct sock *sk,
 
skb->csum = 0;
 
+   if (flags & MSG_CONFIRM)
+   skb_set_dst_pending_confirm(skb, 1);
+
__skb_queue_tail(queue, skb);
} else if (skb_is_gso(skb)) {
goto append;
@@ -1089,6 +1092,9 @@ static int __ip_append_data(struct sock *sk,
exthdrlen = 0;
csummode = CHECKSUM_NONE;
 
+   if ((flags & MSG_CONFIRM) && !skb_prev)
+   skb_set_dst_pending_confirm(skb, 1);
+
/*
 * Put the packet on the pending queue.
 */
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 592db6a..6ee792d 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -848,7 +848,8 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr 
*msg, size_t len)
return err;
 
 do_confirm:
-   dst_confirm(>dst);
+   if (msg->msg_flags & MSG_PROBE)
+   dst_confirm_neigh(>dst, );
if (!(msg->msg_flags & MSG_PROBE) || len)
goto back_from_confirm;
err = 0;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 4e49e5c..8119e1f 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -383,6 +383,9 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 
*fl4,
 
sock_tx_timestamp(sk, sockc->tsflags, _shinfo(skb)->tx_flags);
 
+   if (flags & MSG_CONFIRM)
+   skb_set_dst_pending_confirm(skb, 1);
+
skb->transport_header = skb->network_header;
err = -EFAULT;
if (memcpy_from_msg(iph, msg, length))
@@ -666,7 +669,8 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, 
size_t len)
return len;
 
 do_confirm:
-   dst_confirm(>dst);
+   if (msg->msg_flags & MSG_PROBE)
+   dst_confirm_neigh(>dst, );
if (!(msg->msg_flags & MSG_PROBE) || len)
goto back_from_confirm;
err = 0;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index d6dddcf..4bdb358 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1088,7 +1088,8 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, 
size_t len)
return err;
 
 do_confirm:
-   dst_confirm(>dst);
+   if (msg->msg_flags & MSG_PROBE)
+   dst_confirm_neigh(>dst, >daddr);
if (!(msg->msg_flags_PROBE) || len)
goto back_from_confirm;
err = 0;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 7d90cab..5d944c1 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1145,6 +1145,9 @@ static inline int ip6_ufo_append_data(struct sock *sk,
skb->protocol = htons(ETH_P_IPV6);
skb->csum = 0;
 
+   if (flags & MSG_CONFIRM)
+   skb_set_dst_pending_confirm(skb, 1);
+
__skb_queue_tail(queue, skb);
} else if (skb_is_gso(skb)) {
goto append;
@@ -1517,6 +1520,9 @@ static int __ip6_append_data(struct sock *sk,
exthdrlen = 0;
dst_exthdrlen = 0;
 
+   if ((flags & MSG_CONFIRM) && !skb_prev)
+   skb_set_dst_pending_confirm(skb, 1);
+
/*
 * Put the packet on the pending queue
 */
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index ea89073..f174e76 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -654,6 +654,9 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr 
*msg, int length,
 
skb->ip_summed = CHECKSUM_NONE;
 
+   if (flags & MSG_CONFIRM)
+   skb_set_dst_pending_confirm(skb, 1);
+
skb->transport_header

[PATCHv2 RFC net-next 0/7] net: dst_confirm replacement

2017-01-28 Thread Julian Anastasov

v1->v2:
- patch 1:
  put sk_dst_pending_confirm in TX cacheline
- patch 2:
  add skb_set_dst_pending_confirm and skb_get_dst_pending_confirm helpers
- patch 3:
  use skb_set_dst_pending_confirm
  remove check from sctp_transport_dst_confirm, directly assign
- patch 4:
  use skb_set_dst_pending_confirm
- patch 6:
  use skb_set_dst_pending_confirm

This patchset addresses the problem of neighbour
confirmation where received replies from one nexthop
can cause confirmation of different nexthop when using
the same dst. Thanks to YueHaibing 
for tracking the dst->pending_confirm problem.

Sockets can obtain cached output route. Such
routes can be to known nexthop (rt_gateway=IP) or to be
used simultaneously for different nexthop IPs by different
subnet prefixes (nh->nh_scope = RT_SCOPE_HOST, rt_gateway=0).

At first look, there are more problems:

- dst_confirm() sets flag on dst and not on dst->path,
as result, indication is lost when XFRM is used

- DNAT can change the nexthop, so the really used nexthop is
not confirmed

So, the following solution is to avoid using
dst->pending_confirm.

The current dst_confirm() usage is as follows:

Protocols confirming dst on received packets:
- TCP (1 dst per socket)
- SCTP (1 dst per transport)
- CXGB*

Protocols supporting sendmsg with MSG_CONFIRM [ | MSG_PROBE ] to
confirm neighbour:
- UDP IPv4/IPv6
- ICMPv4 PING
- RAW IPv4/IPv6
- L2TP/IPv6

MSG_CONFIRM for other purposes (fix not needed):
- CAN

Sending without locking the socket:
- UDP (when no cork)
- RAW (when hdrincl=1)

Redirects from old to new GW:
- rt6_do_redirect


The patchset includes the following changes:

1. sock: add sk_dst_pending_confirm flag

- used only by TCP with patch 4 to remember the received
indication in sk->sk_dst_pending_confirm

2. net: add dst_pending_confirm flag to skbuff

- skb->dst_pending_confirm will be used by all protocols
in following patches, via skb_{set,get}_dst_pending_confirm

3. sctp: add dst_pending_confirm flag

- SCTP uses per-transport dsts and can not use
sk->sk_dst_pending_confirm like TCP

4. tcp: replace dst_confirm with sk_dst_confirm

5. net: add confirm_neigh method to dst_ops

- IPv4 and IPv6 provision for slow neigh lookups for MSG_PROBE users.
I decided to use neigh lookup only for this case because on
MSG_PROBE the skb may pass MTU checks but it does not reach
the neigh confirmation code. This patch will be used from patch 6.

- xfrm_confirm_neigh: support is incomplete here, only routes with
known nexthops (gateway) are supported because the tunnel address
is slow to obtain. Or there is solution to this problem?

6. net: use dst_confirm_neigh for UDP, RAW, ICMP, L2TP

- dst_confirm conversion for UDP, RAW, ICMP and L2TP/IPv6

- these protocols use MSG_CONFIRM propagated by ip*_append_data
to skb->dst_pending_confirm. sk->sk_dst_pending_confirm is not
used because some sending paths do not lock the socket. For
MSG_PROBE we use the slow lookup (dst_confirm_neigh).

- there are also 2 cases that need the slow lookup:
__ip6_rt_update_pmtu and rt6_do_redirect. I hope
_hdr(skb)->saddr is the correct nexthop address to use here.

7. net: pending_confirm is not used anymore

- I failed to understand the CXGB* code, I see dst_confirm()
calls but I'm not sure dst_neigh_output() was called. For now
I just removed the dst->pending_confirm flag and left all
dst_confirm() calls there. Any better idea?

- Now may be old function neigh_output() should be restored
instead of dst_neigh_output?


Julian Anastasov (7):
  sock: add sk_dst_pending_confirm flag
  net: add dst_pending_confirm flag to skbuff
  sctp: add dst_pending_confirm flag
  tcp: replace dst_confirm with sk_dst_confirm
  net: add confirm_neigh method to dst_ops
  net: use dst_confirm_neigh for UDP, RAW, ICMP, L2TP
  net: pending_confirm is not used anymore

 drivers/net/vrf.c  |  5 -
 include/linux/skbuff.h | 12 
 include/net/arp.h  | 16 
 include/net/dst.h  | 21 +
 include/net/dst_ops.h  |  2 ++
 include/net/ndisc.h| 17 +
 include/net/sctp/sctp.h|  6 ++
 include/net/sctp/structs.h |  4 
 include/net/sock.h | 26 ++
 net/core/dst.c |  1 -
 net/core/sock.c|  2 ++
 net/ipv4/ip_output.c   | 11 ++-
 net/ipv4/ping.c|  3 ++-
 net/ipv4/raw.c |  6 +-
 net/ipv4/route.c   | 19 +++
 net/ipv4/tcp_input.c   | 12 +++-
 net/ipv4/tcp_metrics.c |  7 ++-
 net/ipv4/tcp_output.c  |  2 ++
 net/ipv4/udp.c |  3 ++-
 net/ipv6/ip6_output.c  |  7 +++
 net/ipv6/raw.c |  6 +-
 net/ipv6/route.c   | 43 ++-
 net/ipv6/udp.c |  3 ++-
 net/l2tp/l2tp_ip6.c|  3 ++-
 net/sctp/associola.c   |  3 +--

[PATCHv2 RFC net-next 4/7] tcp: replace dst_confirm with sk_dst_confirm

2017-01-28 Thread Julian Anastasov

When same struct dst_entry can be used for many different
neighbours we can not use it for pending confirmations.
Use the new sk_dst_confirm() helper to propagate the
indication from received packets to sock_confirm_neigh().

Reported-by: YueHaibing 
Fixes: 5110effee8fd ("net: Do delayed neigh confirmation.")
Fixes: f2bb4bedf35d ("ipv4: Cache output routes in fib_info nexthops.")
Signed-off-by: Julian Anastasov 
---
 net/ipv4/tcp_input.c   | 12 +++-
 net/ipv4/tcp_metrics.c |  7 ++-
 net/ipv4/tcp_output.c  |  2 ++
 3 files changed, 7 insertions(+), 14 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3de6eba..b3e88bb 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3644,11 +3644,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff 
*skb, int flag)
if (tp->tlp_high_seq)
tcp_process_tlp_ack(sk, ack, flag);
 
-   if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) {
-   struct dst_entry *dst = __sk_dst_get(sk);
-   if (dst)
-   dst_confirm(dst);
-   }
+   if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
+   sk_dst_confirm(sk);
 
if (icsk->icsk_pending == ICSK_TIME_RETRANS)
tcp_schedule_loss_probe(sk);
@@ -5995,7 +5992,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff 
*skb)
break;
 
case TCP_FIN_WAIT1: {
-   struct dst_entry *dst;
int tmo;
 
/* If we enter the TCP_FIN_WAIT1 state and we are a
@@ -6022,9 +6018,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff 
*skb)
tcp_set_state(sk, TCP_FIN_WAIT2);
sk->sk_shutdown |= SEND_SHUTDOWN;
 
-   dst = __sk_dst_get(sk);
-   if (dst)
-   dst_confirm(dst);
+   sk_dst_confirm(sk);
 
if (!sock_flag(sk, SOCK_DEAD)) {
/* Wake up lingering close() */
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index b9ed0d5..0f46e5f 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -375,12 +375,10 @@ void tcp_update_metrics(struct sock *sk)
u32 val;
int m;
 
+   sk_dst_confirm(sk);
if (sysctl_tcp_nometrics_save || !dst)
return;
 
-   if (dst->flags & DST_HOST)
-   dst_confirm(dst);
-
rcu_read_lock();
if (icsk->icsk_backoff || !tp->srtt_us) {
/* This session failed to estimate rtt. Why?
@@ -493,11 +491,10 @@ void tcp_init_metrics(struct sock *sk)
struct tcp_metrics_block *tm;
u32 val, crtt = 0; /* cached RTT scaled by 8 */
 
+   sk_dst_confirm(sk);
if (!dst)
goto reset;
 
-   dst_confirm(dst);
-
rcu_read_lock();
tm = tcp_get_metrics(sk, dst, true);
if (!tm) {
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 671c695..c1f8a59 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -973,6 +973,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff 
*skb, int clone_it,
skb_set_hash_from_sk(skb, sk);
atomic_add(skb->truesize, >sk_wmem_alloc);
 
+   skb_set_dst_pending_confirm(skb, sk->sk_dst_pending_confirm);
+
/* Build TCP header and checksum it. */
th = (struct tcphdr *)skb->data;
th->source  = inet->inet_sport;
-- 
1.9.3

[PATCH v2] cfg80211 debugfs: Cleanup some checkpatch issues

2017-01-28 Thread Dmitriy Pichugin

This fixes the checkpatch.pl warnings:
* Macros should not use a trailing semicolon.
* Spaces required around that '='.
* Symbolic permissions 'S_IRUGO' are not preferred.

Signed-off-by: Dmitriy Pichugin 
---
 net/wireless/debugfs.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/net/wireless/debugfs.c b/net/wireless/debugfs.c
index 5d45391..30fc6eb 100644
--- a/net/wireless/debugfs.c
+++ b/net/wireless/debugfs.c
@@ -17,7 +17,7 @@
 static ssize_t name## _read(struct file *file, char __user *userbuf,   \
size_t count, loff_t *ppos) \
 {  \
-   struct wiphy *wiphy= file->private_data;\
+   struct wiphy *wiphy = file->private_data;   \
char buf[buflen];   \
int res;\
\
@@ -29,14 +29,14 @@
.read = name## _read,   \
.open = simple_open,\
.llseek = generic_file_llseek,  \
-};
+}
 
 DEBUGFS_READONLY_FILE(rts_threshold, 20, "%d",
- wiphy->rts_threshold)
+ wiphy->rts_threshold);
 DEBUGFS_READONLY_FILE(fragmentation_threshold, 20, "%d",
  wiphy->frag_threshold);
 DEBUGFS_READONLY_FILE(short_retry_limit, 20, "%d",
- wiphy->retry_short)
+ wiphy->retry_short);
 DEBUGFS_READONLY_FILE(long_retry_limit, 20, "%d",
  wiphy->retry_long);
 
@@ -103,7 +103,7 @@ static ssize_t ht40allow_map_read(struct file *file,
 };
 
 #define DEBUGFS_ADD(name)  \
-   debugfs_create_file(#name, S_IRUGO, phyd, >wiphy, ## _ops);
+   debugfs_create_file(#name, 0444, phyd, >wiphy, ## _ops)
 
 void cfg80211_debugfs_rdev_add(struct cfg80211_registered_device *rdev)
 {
-- 
1.9.1

[PATCH net-next] net: add devm version of alloc_etherdev_mqs function

2017-01-28 Thread Rafał Miłecki

From: Rafał Miłecki 

This patch adds devm_alloc_etherdev_mqs function and devm_alloc_etherdev
macro. These can be used for simpler netdev allocation without having to
care about calling free_netdev.

Thanks to this change drivers, their error paths and removal paths may
get simpler by a bit.

Signed-off-by: Rafał Miłecki 
---
I'm working on V2 of:
[PATCH 0/3] net-next: use one struct bgmac & add PHY support
and I just realized I could get a way simpler error path in bgmac with a
devm_alloc_etherdev helper. So there is my suggestion for adding it.
---
 include/linux/etherdevice.h |  5 +
 net/ethernet/eth.c  | 28 
 2 files changed, 33 insertions(+)

diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 42add77ae47d..c62b709b1ce0 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -54,6 +54,11 @@ struct net_device *alloc_etherdev_mqs(int sizeof_priv, 
unsigned int txqs,
 #define alloc_etherdev(sizeof_priv) alloc_etherdev_mq(sizeof_priv, 1)
 #define alloc_etherdev_mq(sizeof_priv, count) alloc_etherdev_mqs(sizeof_priv, 
count, count)
 
+struct net_device *devm_alloc_etherdev_mqs(struct device *dev, int sizeof_priv,
+  unsigned int txqs,
+  unsigned int rxqs);
+#define devm_alloc_etherdev(dev, sizeof_priv) devm_alloc_etherdev_mqs(dev, 
sizeof_priv, 1, 1)
+
 struct sk_buff **eth_gro_receive(struct sk_buff **head,
 struct sk_buff *skb);
 int eth_gro_complete(struct sk_buff *skb, int nhoff);
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 8c5a479681ca..efdaaab735fc 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -392,6 +392,34 @@ struct net_device *alloc_etherdev_mqs(int sizeof_priv, 
unsigned int txqs,
 }
 EXPORT_SYMBOL(alloc_etherdev_mqs);
 
+static void devm_free_netdev(struct device *dev, void *res)
+{
+   free_netdev(*(struct net_device **)res);
+}
+
+struct net_device *devm_alloc_etherdev_mqs(struct device *dev, int sizeof_priv,
+  unsigned int txqs, unsigned int rxqs)
+{
+   struct net_device **dr;
+   struct net_device *netdev;
+
+   dr = devres_alloc(devm_free_netdev, sizeof(*dr), GFP_KERNEL);
+   if (!dr)
+   return NULL;
+
+   netdev = alloc_etherdev_mqs(sizeof_priv, txqs, rxqs);
+   if (!netdev) {
+   devres_free(dr);
+   return NULL;
+   }
+
+   *dr = netdev;
+   devres_add(dev, dr);
+
+   return netdev;
+}
+EXPORT_SYMBOL(devm_alloc_etherdev_mqs);
+
 ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len)
 {
return scnprintf(buf, PAGE_SIZE, "%*phC\n", len, addr);
-- 
2.11.0

[PATCH] net: aquantia: atlantic: use new api ethtool_{get|set}_link_ksettings

2017-01-28 Thread Philippe Reynes

The ethtool api {get|set}_settings is deprecated.
We move this driver to new api {get|set}_link_ksettings.

As I don't have the hardware, I'd be very pleased if
someone may test this patch.

Signed-off-by: Philippe Reynes 
---
 .../net/ethernet/aquantia/atlantic/aq_ethtool.c|   23 +
 drivers/net/ethernet/aquantia/atlantic/aq_nic.c|   49 ---
 drivers/net/ethernet/aquantia/atlantic/aq_nic.h|6 ++-
 3 files changed, 47 insertions(+), 31 deletions(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c 
b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
index c5b025e..a761e91 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
@@ -35,24 +35,25 @@ static u32 aq_ethtool_get_link(struct net_device *ndev)
return ethtool_op_get_link(ndev);
 }
 
-static int aq_ethtool_get_settings(struct net_device *ndev,
-  struct ethtool_cmd *cmd)
+static int aq_ethtool_get_link_ksettings(struct net_device *ndev,
+struct ethtool_link_ksettings *cmd)
 {
struct aq_nic_s *aq_nic = netdev_priv(ndev);
 
-   aq_nic_get_link_settings(aq_nic, cmd);
-   ethtool_cmd_speed_set(cmd, netif_carrier_ok(ndev) ?
-   aq_nic_get_link_speed(aq_nic) : 0U);
+   aq_nic_get_link_ksettings(aq_nic, cmd);
+   cmd->base.speed = netif_carrier_ok(ndev) ?
+   aq_nic_get_link_speed(aq_nic) : 0U;
 
return 0;
 }
 
-static int aq_ethtool_set_settings(struct net_device *ndev,
-  struct ethtool_cmd *cmd)
+static int
+aq_ethtool_set_link_ksettings(struct net_device *ndev,
+ const struct ethtool_link_ksettings *cmd)
 {
struct aq_nic_s *aq_nic = netdev_priv(ndev);
 
-   return aq_nic_set_link_settings(aq_nic, cmd);
+   return aq_nic_set_link_ksettings(aq_nic, cmd);
 }
 
 /* there "5U" is number of queue[#] stats lines (InPackets+...+InErrors) */
@@ -248,8 +249,6 @@ static int aq_ethtool_get_rxnfc(struct net_device *ndev,
.get_link= aq_ethtool_get_link,
.get_regs_len= aq_ethtool_get_regs_len,
.get_regs= aq_ethtool_get_regs,
-   .get_settings= aq_ethtool_get_settings,
-   .set_settings= aq_ethtool_set_settings,
.get_drvinfo = aq_ethtool_get_drvinfo,
.get_strings = aq_ethtool_get_strings,
.get_rxfh_indir_size = aq_ethtool_get_rss_indir_size,
@@ -257,5 +256,7 @@ static int aq_ethtool_get_rxnfc(struct net_device *ndev,
.get_rxfh= aq_ethtool_get_rss,
.get_rxnfc   = aq_ethtool_get_rxnfc,
.get_sset_count  = aq_ethtool_get_sset_count,
-   .get_ethtool_stats   = aq_ethtool_stats
+   .get_ethtool_stats   = aq_ethtool_stats,
+   .get_link_ksettings  = aq_ethtool_get_link_ksettings,
+   .set_link_ksettings  = aq_ethtool_set_link_ksettings,
 };
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c 
b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
index 84bb441..ea86801 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
@@ -734,50 +734,63 @@ void aq_nic_get_stats(struct aq_nic_s *self, u64 *data)
(void)err;
 }
 
-void aq_nic_get_link_settings(struct aq_nic_s *self, struct ethtool_cmd *cmd)
+void aq_nic_get_link_ksettings(struct aq_nic_s *self,
+  struct ethtool_link_ksettings *cmd)
 {
-   cmd->port = PORT_TP;
-   cmd->transceiver = XCVR_EXTERNAL;
+   u32 supported, advertising;
+
+   cmd->base.port = PORT_TP;
/* This driver supports only 10G capable adapters, so DUPLEX_FULL */
-   cmd->duplex = DUPLEX_FULL;
-   cmd->autoneg = self->aq_nic_cfg.is_autoneg;
+   cmd->base.duplex = DUPLEX_FULL;
+   cmd->base.autoneg = self->aq_nic_cfg.is_autoneg;
+
+   ethtool_convert_link_mode_to_legacy_u32(,
+   cmd->link_modes.supported);
+   ethtool_convert_link_mode_to_legacy_u32(,
+   cmd->link_modes.advertising);
 
-   cmd->supported |= (self->aq_hw_caps.link_speed_msk & AQ_NIC_RATE_10G) ?
+   supported |= (self->aq_hw_caps.link_speed_msk & AQ_NIC_RATE_10G) ?
ADVERTISED_1baseT_Full : 0U;
-   cmd->supported |= (self->aq_hw_caps.link_speed_msk & AQ_NIC_RATE_1G) ?
+   supported |= (self->aq_hw_caps.link_speed_msk & AQ_NIC_RATE_1G) ?
ADVERTISED_1000baseT_Full : 0U;
-   cmd->supported |= (self->aq_hw_caps.link_speed_msk & AQ_NIC_RATE_100M) ?
+   supported |= (self->aq_hw_caps.link_speed_msk & AQ_NIC_RATE_100M) ?
ADVERTISED_100baseT_Full : 0U;
-   cmd->supported |=

[iproute PATCH v2] man: tc-csum.8: Fix example

2017-01-28 Thread Phil Sutter

This fixes two issues with the provided example:

- Add missing 'dev' keyword to second command.
- Use a real IPv4 address instead of a bogus hex value since that will
  be rejected by get_addr_ipv4().

Fixes: dbfb17a67f9c7 ("man: tc-csum.8: Add an example")
Reported-by: Davide Caratti 
Signed-off-by: Phil Sutter 
---
Changes since v1:
- Instead of using potentially valid IP addresses, use RFC 5737 ones.
---
 man/man8/tc-csum.8 | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/man/man8/tc-csum.8 b/man/man8/tc-csum.8
index 3a64c82f09ba8..68e5610513a51 100644
--- a/man/man8/tc-csum.8
+++ b/man/man8/tc-csum.8
@@ -57,9 +57,9 @@ packets, both IP and UDP checksums have to be recalculated:
 .RS
 .EX
 # tc qdisc add dev eth0 ingress handle :
-# tc filter add eth0 prio 1 protocol ip parent : \\
-   u32 match ip src 192.168.1.100/32 flowid :1 \\
-   action pedit munge ip dst set 0x12345678 pipe \\
+# tc filter add dev eth0 prio 1 protocol ip parent : \\
+   u32 match ip src 192.0.2.100/32 flowid :1 \\
+   action pedit munge ip dst set 198.51.100.1 pipe \\
csum ip and udp
 .EE
 .RE
-- 
2.11.0

Re: [PATCH] cfg80211 debugfs: Cleanup some checkpatch issues

2017-01-28 Thread Dmitriy Pichugin

On Fri, Jan 27, 2017 at 11:48:35AM -0800, Joe Perches wrote:
> On Fri, 2017-01-27 at 22:26 +0300, Pichugin Dmitry wrote:
> > This fixes the checkpatch.pl warnings:
> > * Macros should not use a trailing semicolon.
> > * Spaces required around that '='.
> > * Symbolic permissions 'S_IRUGO' are not preferred.
> 
> OK
> 
> > * Macro argument reuse 'buflen' - possible side-effects
> 
> Not all checkpatch messages need fixing.
> This is one of them.
> 
> > diff --git a/net/wireless/debugfs.c b/net/wireless/debugfs.c
> []
> > @@ -17,11 +17,12 @@
> >  static ssize_t name## _read(struct file *file, char __user *userbuf,   
> > \
> > size_t count, loff_t *ppos) \
> >  {  \
> > -   struct wiphy *wiphy= file->private_data;\
> > -   char buf[buflen];   \
> > +   struct wiphy *wiphy = file->private_data;   \
> > +   int __buflen = __builtin_constant_p(buflen) ? buflen : -1;  \
> > +   char buf[__buflen]; \
> 
> That's rather an odd change too
> 
OK. I will update the patch.

Best Regards,
Dmitriy.

Re: [PATCH net-next 0/4] mlx5: Create build configuration options

2017-01-28 Thread Saeed Mahameed

On Fri, Jan 27, 2017 at 8:13 PM, Tom Herbert  wrote:
> On Fri, Jan 27, 2017 at 9:58 AM, Saeed Mahameed
>  wrote:
>> On Fri, Jan 27, 2017 at 1:32 AM, Tom Herbert  wrote:
>>> This patchset creates configuration options for sriov, vxlan, eswitch,
>>> and tc features in the mlx5 driver. The purpose of this is to allow not
>>> building these features. These features are optional advanced features
>>> that are not required for a core Ethernet driver. A user can disable
>>> these features which resuces the amount of code in the driver. Disabling
>>> these features (and DCB) reduces the size of mlx5_core.o by about 16%.
>>> This is also can reduce the complexity of backport and rebases since
>>> user would no longer need to worry about dependencies with the rest of
>>> the kernel that features which might not be of any interest to a user
>>> may bring in.
>>>
>>> Tested: Build and ran the driver with all features enabled (the default)
>>> and with none enabled (including DCB). Did not see any issues. I did
>>> not explicity test operation of ayy of features in the list.
>>>
>>
>> Basically I am not against this kind of change, infact i am with it,
>> although I would have done some restructuring in the driver before i
>> did such change ;), filling the code with ifdefs is not a neat thing.
>>
> If you wish, please take this as an RFC and feel free to structure the
> code the right way. I think the intent is clear enough and looks like
> davem isn't going to allow the directory restructuring so something
> like this seems to be the best course of action now.
>

Right.

>> I agree this will simplify backporting and provide some kind of
>> feature separation inside the driver.
>> But this will also increase the testing matrix we need to cover and
>> increase the likelihood of kbuild breaks by an order of magnitude.
>>
> The testing matrix already exploded with the proliferation of
> supported features. If anything this reduces the test matrix problem.
> For instance, if we make a change to the core driver and functionality
> properly isolated there is a much better chance that this won't affect
> peripheral functionality and vice versa. It is just not feasible for
> us to test every combination of NIC features for every change being
> made.
>

Yes for isolated features, but for base functionality, we need to test
it with all new device specific kconfig combinations on every patch!
since a misplaced code inside or outside the correct ifdef
can easily go unnoticed and break functionality.

>> One more thing, do we really need a device specific flag per feature
>> per vendor per device?  can't we just use the same kconfig flag for
>> all drivers and if there is a more generic system wide flag that
>> covers the same feature
>> can't we just use it, for instance instead of
>> CONFIG__SRIOV why not use already existing CONFIG_PCI_IOV
>> for all drivers ?
>>
> That sounds good to me. We already have CONFIG_RFS_ACCEL and others
> that do that.
>
> Tom
>
>> Saeed.
>>
>>>
>>>
>>> Tom Herbert (4):
>>>   mlx5: Make building eswitch configurable
>>>   mlx5: Make building SR-IOV configurable
>>>   mlx5: Make building tc hardware offload configurable
>>>   mlx5: Make building vxlan hardware offload configurable
>>>
>>>  drivers/net/ethernet/mellanox/mlx5/core/Kconfig   |  35 ++
>>>  drivers/net/ethernet/mellanox/mlx5/core/Makefile  |  16 ++-
>>>  drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 129 
>>> --
>>>  drivers/net/ethernet/mellanox/mlx5/core/en_tc.c   |  39 +--
>>>  drivers/net/ethernet/mellanox/mlx5/core/eq.c  |   4 +-
>>>  drivers/net/ethernet/mellanox/mlx5/core/lag.c |   2 +
>>>  drivers/net/ethernet/mellanox/mlx5/core/main.c|  32 --
>>>  drivers/net/ethernet/mellanox/mlx5/core/sriov.c   |   6 +-
>>>  8 files changed, 205 insertions(+), 58 deletions(-)
>>>
>>> --
>>> 2.9.3
>>>

Re: [PATCH net-next 1/4] mlx5: Make building eswitch configurable

2017-01-28 Thread Saeed Mahameed

On Sat, Jan 28, 2017 at 1:23 AM, Alexei Starovoitov  wrote:
> On 1/27/17 1:15 PM, Saeed Mahameed wrote:
>>
>> It is only mandatory for configurations that needs eswitch, where the
>> driver has no way to know about them, for a good old bare metal box,
>> eswitch is not needed.
>>
>> we can do some work to strip the l2 table logic - needed for PFs to
>> work on multi-host - out of eswitch but again that would further
>> complicate the driver code since eswitch will still need to update l2
>> tables for VFs.
>
>
> Saeed,
> for multi-host setups every host in that multi-host doesn't
> actually see the eswitch, no? Otherwise broken driver on one machine
> can affect the other hosts in the same bundle? Please double check,

each host (PF) has its own eswitch, and each eswitch lives in its own
"steering-space"
 and it can't affect others.

> since this is absolutely critical HW requirement.
>

The only shared HW resources between hosts (PFs) is the simple l2 table,
and the only thing a host can ask from the l2 talbe (FW) is: "forward
UC MAC to me", and it is the responsibility of the the driver eswitch
to do so.

the l2 table is created and managed by FW, SW eswitch can only request
from FW, and the FW is trusted.

Re: [iproute PATCH] man: tc-csum.8: Fix example

2017-01-28 Thread Phil Sutter

On Fri, Jan 27, 2017 at 09:49:58PM +0100, Guillaume Nault wrote:
> On Fri, Jan 27, 2017 at 12:15:01PM +0100, Phil Sutter wrote:
> > +# tc filter add dev eth0 prio 1 protocol ip parent : \\
> > u32 match ip src 192.168.1.100/32 flowid :1 \\
> > -   action pedit munge ip dst set 0x12345678 pipe \\
> > +   action pedit munge ip dst set 1.2.3.4 pipe \\
> > 
> Just nitpicking here, but IMHO examples like this should better use IP
> addresses reserved for documentation (192.0.2.0/24, 198.51.100.0/24 or
> 203.0.113.0/24).

Good point! This wasn't on my radar yet and I didn't know there were
IPv4 ranges specifically for that purpose. I guess the reasoning here is
analogous to why one shouldn't use 'example.com' everywhere.

Luckily, 1.2.3.0/24 seems to be reserved by APNIC for testing purposes.
:)

I'll respin using another example address.

Thanks, Phil

Re: [net 7/8] net/mlx5e: Fix update of hash function/key via ethtool

2017-01-28 Thread Saeed Mahameed

On Fri, Jan 27, 2017 at 11:50 PM, Tom Herbert  wrote:
> On Fri, Jan 27, 2017 at 12:38 PM, Saeed Mahameed  wrote:
>> From: Gal Pressman 
>>
>> Modifying TIR hash should change selected fields bitmask in addition to
>> the function and key.
>> Formerly, we would not set this field resulting in zeroing of its value,
>> which means no packet fields are used for RX RSS hash calculation thus
>> causing all traffic to arrive in RQ[0].
>>
> This commit log is rather scant in details. Does this mean that RSS is
> somehow broken in mlx5? What is exact test that demonstrates bad
> behavior? Did you verify that this doesn't break IPv4 or IPv6?
>

before this fix out of the box RSS worked fine for both IPv4/IPv6, the
only broken flow is when the user explicitly uses ethtoo -X to update
the RSS indirection table or hash function.

We did verify both IPv6 and IPv4 RSS worked fine after the user
changes RSS configuration via ethtool -X

[PATCH 2/2] batman-adv: Fix includes for IS_ERR/ERR_PTR

2017-01-28 Thread Simon Wunderlich

From: Sven Eckelmann 

IS_ERR/ERR_PTR are not defined in linux/device.h but in linux/err.h. The
files using these macros therefore have to include the correct one.

Reported-by: Linus Luessing 
Signed-off-by: Sven Eckelmann 
Signed-off-by: Simon Wunderlich 
---
 net/batman-adv/debugfs.c  | 2 +-
 net/batman-adv/tp_meter.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c
index 5406148b9497..e32ad47c6efd 100644
--- a/net/batman-adv/debugfs.c
+++ b/net/batman-adv/debugfs.c
@@ -19,7 +19,7 @@
 #include "main.h"
 
 #include 
-#include 
+#include 
 #include 
 #include 
 #include 
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c
index 07f64b60b528..c94ebdecdc3d 100644
--- a/net/batman-adv/tp_meter.c
+++ b/net/batman-adv/tp_meter.c
@@ -23,7 +23,7 @@
 #include 
 #include 
 #include 
-#include 
+#include 
 #include 
 #include 
 #include 
-- 
2.11.0

[PATCH 1/2] batman-adv: Fix double call of dev_queue_xmit

2017-01-28 Thread Simon Wunderlich

From: Sven Eckelmann 

The net_xmit_eval has side effects because it is not making sure that e
isn't evaluated twice.

#define net_xmit_eval(e)((e) == NET_XMIT_CN ? 0 : (e))

The code requested by David Miller [1]

return net_xmit_eval(dev_queue_xmit(skb));

will get transformed into

return ((dev_queue_xmit(skb)) == NET_XMIT_CN ? 0 : (dev_queue_xmit(skb)))

dev_queue_xmit will therefore be tried again (with an already consumed skb)
whenever the return code is not NET_XMIT_CN.

[1] 
https://lkml.kernel.org/r/20170125.225624.965229145391320056.da...@davemloft.net

Fixes: c33705188c49 ("batman-adv: Treat NET_XMIT_CN as transmit successfully")
Signed-off-by: Sven Eckelmann 
Signed-off-by: Simon Wunderlich 
---
 net/batman-adv/send.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index d9b2889064a6..1489ec27daff 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -77,6 +77,7 @@ int batadv_send_skb_packet(struct sk_buff *skb,
 {
struct batadv_priv *bat_priv;
struct ethhdr *ethhdr;
+   int ret;
 
bat_priv = netdev_priv(hard_iface->soft_iface);
 
@@ -115,7 +116,8 @@ int batadv_send_skb_packet(struct sk_buff *skb,
 * congestion and traffic shaping, it drops and returns NET_XMIT_DROP
 * (which is > 0). This will not be treated as an error.
 */
-   return net_xmit_eval(dev_queue_xmit(skb));
+   ret = dev_queue_xmit(skb);
+   return net_xmit_eval(ret);
 send_skb_err:
kfree_skb(skb);
return NET_XMIT_DROP;
-- 
2.11.0

[PATCH 0/2] pull request for net-next: batman-adv 2017-01-28

2017-01-28 Thread Simon Wunderlich

Hi David,

here is another pull request for batman-adv in net-next. One of them fixes
a regression introduced by a patch in the previous pull request two days ago.

Please pull or let me know of any problem!

Thank you,
  Simon

The following changes since commit c33705188c493b7de3b8dc2956d67de91b444727:

  batman-adv: Treat NET_XMIT_CN as transmit successfully (2017-01-26 08:41:18 
+0100)

are available in the git repository at:

  git://git.open-mesh.org/linux-merge.git tags/batadv-next-for-davem-20170128

for you to fetch changes up to 3e7514afc7d728dd47c5fe9d7a1f5216fe659cda:

  batman-adv: Fix includes for IS_ERR/ERR_PTR (2017-01-28 10:40:35 +0100)


Here are two fixes for batman-adv for net-next:

 - fix double call of dev_queue_xmit(), caused by the recent introduction
   of net_xmit_eval(), by Sven Eckelmann

 - Fix includes for IS_ERR/ERR_PTR, by Sven Eckelmann


Sven Eckelmann (2):
  batman-adv: Fix double call of dev_queue_xmit
  batman-adv: Fix includes for IS_ERR/ERR_PTR

 net/batman-adv/debugfs.c  | 2 +-
 net/batman-adv/send.c | 4 +++-
 net/batman-adv/tp_meter.c | 2 +-
 3 files changed, 5 insertions(+), 3 deletions(-)

[PATCH 4/4] dcbnl: Add some spaces for better code readability

2017-01-28 Thread SF Markus Elfring

From: Markus Elfring 
Date: Sat, 28 Jan 2017 10:15:59 +0100

Use space characters at some source code places according to
the Linux coding style convention.

Signed-off-by: Markus Elfring 
---
 net/dcb/dcbnl.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 0903081a1212..0150de92c8ba 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -315,7 +315,7 @@ static int dcbnl_getcap(struct net_device *netdev, struct 
nlmsghdr *nlh,
if (data[DCB_CAP_ATTR_ALL])
getall = 1;
 
-   for (i = DCB_CAP_ATTR_ALL+1; i <= DCB_CAP_ATTR_MAX; i++) {
+   for (i = DCB_CAP_ATTR_ALL + 1; i <= DCB_CAP_ATTR_MAX; i++) {
if (!getall && !data[i])
continue;
 
@@ -359,7 +359,7 @@ static int dcbnl_getnumtcs(struct net_device *netdev, 
struct nlmsghdr *nlh,
if (data[DCB_NUMTCS_ATTR_ALL])
getall = 1;
 
-   for (i = DCB_NUMTCS_ATTR_ALL+1; i <= DCB_NUMTCS_ATTR_MAX; i++) {
+   for (i = DCB_NUMTCS_ATTR_ALL + 1; i <= DCB_NUMTCS_ATTR_MAX; i++) {
if (!getall && !data[i])
continue;
 
@@ -397,7 +397,7 @@ static int dcbnl_setnumtcs(struct net_device *netdev, 
struct nlmsghdr *nlh,
if (ret)
return ret;
 
-   for (i = DCB_NUMTCS_ATTR_ALL+1; i <= DCB_NUMTCS_ATTR_MAX; i++) {
+   for (i = DCB_NUMTCS_ATTR_ALL + 1; i <= DCB_NUMTCS_ATTR_MAX; i++) {
if (!data[i])
continue;
 
@@ -1593,7 +1593,7 @@ static int dcbnl_getfeatcfg(struct net_device *netdev, 
struct nlmsghdr *nlh,
if (data[DCB_FEATCFG_ATTR_ALL])
getall = 1;
 
-   for (i = DCB_FEATCFG_ATTR_ALL+1; i <= DCB_FEATCFG_ATTR_MAX; i++) {
+   for (i = DCB_FEATCFG_ATTR_ALL + 1; i <= DCB_FEATCFG_ATTR_MAX; i++) {
if (!getall && !data[i])
continue;
 
@@ -1631,7 +1631,7 @@ static int dcbnl_setfeatcfg(struct net_device *netdev, 
struct nlmsghdr *nlh,
if (ret)
goto err;
 
-   for (i = DCB_FEATCFG_ATTR_ALL+1; i <= DCB_FEATCFG_ATTR_MAX; i++) {
+   for (i = DCB_FEATCFG_ATTR_ALL + 1; i <= DCB_FEATCFG_ATTR_MAX; i++) {
if (!data[i])
continue;
 
@@ -1669,7 +1669,7 @@ struct reply_func {
struct nlattr **, struct sk_buff *);
 };
 
-static const struct reply_func reply_funcs[DCB_CMD_MAX+1] = {
+static const struct reply_func reply_funcs[DCB_CMD_MAX + 1] = {
[DCB_CMD_GSTATE]= { RTM_GETDCB, dcbnl_getstate },
[DCB_CMD_SSTATE]= { RTM_SETDCB, dcbnl_setstate },
[DCB_CMD_PFC_GCFG]  = { RTM_GETDCB, dcbnl_getpfccfg },
-- 
2.11.0

[PATCH 2/4] dcbnl: Adjust four function calls together with a variable assignment

2017-01-28 Thread SF Markus Elfring

From: Markus Elfring 
Date: Sat, 28 Jan 2017 09:19:58 +0100

The script "checkpatch.pl" pointed information out like the following.

ERROR: do not use assignment in if condition

Thus fix the affected source code places.

Signed-off-by: Markus Elfring 
---
 net/dcb/dcbnl.c | 12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 76fd727e2eb4..f29e19d962ec 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -1799,7 +1799,8 @@ u8 dcb_getapp(struct net_device *dev, struct dcb_app *app)
u8 prio = 0;
 
spin_lock_bh(_lock);
-   if ((itr = dcb_app_lookup(app, dev->ifindex, 0)))
+   itr = dcb_app_lookup(app, dev->ifindex, 0);
+   if (itr)
prio = itr->app.priority;
spin_unlock_bh(_lock);
 
@@ -1827,7 +1828,8 @@ int dcb_setapp(struct net_device *dev, struct dcb_app 
*new)
 
spin_lock_bh(_lock);
/* Search for existing match and replace */
-   if ((itr = dcb_app_lookup(new, dev->ifindex, 0))) {
+   itr = dcb_app_lookup(new, dev->ifindex, 0);
+   if (itr) {
if (new->priority)
itr->app.priority = new->priority;
else {
@@ -1860,7 +1862,8 @@ u8 dcb_ieee_getapp_mask(struct net_device *dev, struct 
dcb_app *app)
u8 prio = 0;
 
spin_lock_bh(_lock);
-   if ((itr = dcb_app_lookup(app, dev->ifindex, 0)))
+   itr = dcb_app_lookup(app, dev->ifindex, 0);
+   if (itr)
prio |= 1 << itr->app.priority;
spin_unlock_bh(_lock);
 
@@ -1920,7 +1923,8 @@ int dcb_ieee_delapp(struct net_device *dev, struct 
dcb_app *del)
 
spin_lock_bh(_lock);
/* Search for existing match and remove it. */
-   if ((itr = dcb_app_lookup(del, dev->ifindex, del->priority))) {
+   itr = dcb_app_lookup(del, dev->ifindex, del->priority);
+   if (itr) {
list_del(>list);
kfree(itr);
err = 0;
-- 
2.11.0

[PATCH 3/4] dcbnl: Adjust five checks for null pointers

2017-01-28 Thread SF Markus Elfring

From: Markus Elfring 
Date: Sat, 28 Jan 2017 09:56:36 +0100
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The script "checkpatch.pl" pointed information out like the following.

Comparison to NULL could be written !…

Thus fix the affected source code places.

Signed-off-by: Markus Elfring 
---
 net/dcb/dcbnl.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index f29e19d962ec..0903081a1212 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -398,7 +398,7 @@ static int dcbnl_setnumtcs(struct net_device *netdev, 
struct nlmsghdr *nlh,
return ret;
 
for (i = DCB_NUMTCS_ATTR_ALL+1; i <= DCB_NUMTCS_ATTR_MAX; i++) {
-   if (data[i] == NULL)
+   if (!data[i])
continue;
 
value = nla_get_u8(data[i]);
@@ -741,7 +741,7 @@ static int dcbnl_setpfccfg(struct net_device *netdev, 
struct nlmsghdr *nlh,
return ret;
 
for (i = DCB_PFC_UP_ATTR_0; i <= DCB_PFC_UP_ATTR_7; i++) {
-   if (data[i] == NULL)
+   if (!data[i])
continue;
value = nla_get_u8(data[i]);
netdev->dcbnl_ops->setpfccfg(netdev,
@@ -955,7 +955,7 @@ static int dcbnl_bcn_setcfg(struct net_device *netdev, 
struct nlmsghdr *nlh,
return ret;
 
for (i = DCB_BCN_ATTR_RP_0; i <= DCB_BCN_ATTR_RP_7; i++) {
-   if (data[i] == NULL)
+   if (!data[i])
continue;
value_byte = nla_get_u8(data[i]);
netdev->dcbnl_ops->setbcnrp(netdev,
@@ -963,7 +963,7 @@ static int dcbnl_bcn_setcfg(struct net_device *netdev, 
struct nlmsghdr *nlh,
}
 
for (i = DCB_BCN_ATTR_BCNA_0; i <= DCB_BCN_ATTR_RI; i++) {
-   if (data[i] == NULL)
+   if (!data[i])
continue;
value_int = nla_get_u32(data[i]);
netdev->dcbnl_ops->setbcncfg(netdev,
@@ -1632,7 +1632,7 @@ static int dcbnl_setfeatcfg(struct net_device *netdev, 
struct nlmsghdr *nlh,
goto err;
 
for (i = DCB_FEATCFG_ATTR_ALL+1; i <= DCB_FEATCFG_ATTR_MAX; i++) {
-   if (data[i] == NULL)
+   if (!data[i])
continue;
 
value = nla_get_u8(data[i]);
-- 
2.11.0

[PATCH 1/4] dcbnl: Use kmalloc_array() in dcbnl_build_peer_app()

2017-01-28 Thread SF Markus Elfring

From: Markus Elfring 
Date: Fri, 27 Jan 2017 22:30:09 +0100

* A multiplication for the size determination of a memory allocation
  indicated that an array data structure should be processed.
  Thus use the corresponding function "kmalloc_array".

  This issue was detected by using the Coccinelle software.

* Replace the specification of a data structure by a pointer dereference
  to make the corresponding size determination a bit safer according to
  the Linux coding style convention.

Signed-off-by: Markus Elfring 
---
 net/dcb/dcbnl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 3202d75329b5..76fd727e2eb4 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -990,7 +990,7 @@ static int dcbnl_build_peer_app(struct net_device *netdev, 
struct sk_buff* skb,
 */
err = ops->peer_getappinfo(netdev, , _count);
if (!err && app_count) {
-   table = kmalloc(sizeof(struct dcb_app) * app_count, GFP_KERNEL);
+   table = kmalloc_array(app_count, sizeof(*table), GFP_KERNEL);
if (!table)
return -ENOMEM;

-- 
2.11.0

[PATCH 0/4] DCB netlink: Fine-tuning for some function implementations

2017-01-28 Thread SF Markus Elfring

From: Markus Elfring 
Date: Sat, 28 Jan 2017 10:28:19 +0100

A few update suggestions were taken into account
from static source code analysis.

Markus Elfring (4):
  Use kmalloc_array() in dcbnl_build_peer_app()
  Adjust four function calls together with a variable assignment
  Adjust five checks for null pointers
  Add some spaces for better code readability

 net/dcb/dcbnl.c | 36 
 1 file changed, 20 insertions(+), 16 deletions(-)

-- 
2.11.0

Re: [PATCH net-next v2 1/4] net: dsa: Add plumbing for port mirroring

2017-01-28 Thread Jiri Pirko

Sat, Jan 28, 2017 at 02:25:25AM CET, f.faine...@gmail.com wrote:
>Add necessary plumbing at the slave network device level to have switch
>drivers implement ndo_setup_tc() and most particularly the cls_matchall
>classifier. We add support for two switch operations:
>
>port_add_mirror and port_del_mirror() which configure, on a per-port
>basis the mirror parameters requested from the cls_matchall classifier.
>
>Code is largely borrowed from the Mellanox Spectrum switch driver.
>
>Signed-off-by: Florian Fainelli 
>---

[...]


>+/*
>+ * Mirroring TC entry
>+ */
>+struct dsa_mall_mirror_tc_entry {
>+  u8 to_local_port;
>+  bool ingress;
>+};
>+
>+/*
>+ * TC matchall entry
>+ */

Why are you using multiline comment format for single line comments?


>+struct dsa_mall_tc_entry {
>+  struct list_head list;
>+  unsigned long cookie;
>+  enum dsa_port_mall_action_type type;
>+  union {
>+  struct dsa_mall_mirror_tc_entry mirror;
>+  };
>+};
>+
>+
> struct dsa_port {
>   struct net_device   *netdev;
>   struct device_node  *dn;
>@@ -370,6 +397,15 @@ struct dsa_switch_ops {
>   int (*port_mdb_dump)(struct dsa_switch *ds, int port,
>struct switchdev_obj_port_mdb *mdb,
>int (*cb)(struct switchdev_obj *obj));
>+
>+  /*
>+   * TC integration
>+   */
>+  int (*port_mirror_add)(struct dsa_switch *ds, int port,
>+ struct dsa_mall_mirror_tc_entry *mirror,
>+ bool ingress);
>+  void(*port_mirror_del)(struct dsa_switch *ds, int port,
>+ struct dsa_mall_mirror_tc_entry *mirror);
> };

[...]


>+static int dsa_slave_add_cls_matchall(struct net_device *dev,
>+__be16 protocol,
>+struct tc_cls_matchall_offload *cls,
>+bool ingress)
>+{
>+  struct dsa_slave_priv *p = netdev_priv(dev);
>+  struct dsa_mall_tc_entry *mall_tc_entry;
>+  struct dsa_switch *ds = p->parent;
>+  struct net *net = dev_net(dev);
>+  struct dsa_slave_priv *to_p;
>+  struct net_device *to_dev;
>+  const struct tc_action *a;
>+  int err = -EOPNOTSUPP;
>+  LIST_HEAD(actions);
>+  int ifindex;
>+
>+  if (!ds->ops->port_mirror_add)
>+  return err;
>+
>+  if (!tc_single_action(cls->exts)) {
>+  netdev_err(dev, "only singular actions are supported\n");

Why you note the user in this case, but in case he tries to add
non-supported action you don't note him?


>+  return err;
>+  }
>+
>+  mall_tc_entry = kzalloc(sizeof(*mall_tc_entry), GFP_KERNEL);
>+  if (!mall_tc_entry)
>+  return -ENOMEM;
>+  mall_tc_entry->cookie = cls->cookie;

Hmm, I believe that this allocation and initialization should go into
the "is_mirred if". You can do the checks in advance. That would also
make the error path simplier.


>+
>+  tcf_exts_to_list(cls->exts, );
>+  a = list_first_entry(, struct tc_action, list);
>+
>+  if (is_tcf_mirred_egress_mirror(a) && protocol == htons(ETH_P_ALL)) {
>+  struct dsa_mall_mirror_tc_entry *mirror;
>+
>+  mall_tc_entry->type = DSA_PORT_MALL_MIRROR;
>+  mirror = _tc_entry->mirror;
>+
>+  ifindex = tcf_mirred_ifindex(a);
>+  to_dev = __dev_get_by_index(net, ifindex);
>+  if (!to_dev) {
>+  err = -EINVAL;
>+  goto err_add_action;
>+  }
>+
>+  if (!dsa_slave_dev_check(to_dev)) {
>+  err = -EOPNOTSUPP;
>+  goto err_add_action;
>+  }
>+
>+  to_p = netdev_priv(to_dev);
>+
>+  mirror->to_local_port = to_p->port;
>+  mirror->ingress = ingress;
>+
>+  err = ds->ops->port_mirror_add(ds, p->port, mirror, ingress);
>+  }
>+
>+  if (err)
>+  goto err_add_action;
>+
>+  list_add_tail(_tc_entry->list, >mall_tc_list);
>+  return 0;
>+
>+err_add_action:
>+  kfree(mall_tc_entry);
>+  return err;
>+}

[PATCH net] mlx4: xdp_prog becomes inactive after ethtool '-L' or '-G'

2017-01-28 Thread Martin KaFai Lau

If the rx-queues ever get re-initialized (e.g. by changing the
number of rx-queues with ethtool -L), the existing xdp_prog becomes
inactive.

The bug is that the xdp_prog ptr has not been carried over from
the old rx-queues to the new rx-queues

Fixes: 47a38e155037 ("net/mlx4_en: add support for fast rx drop bpf program")
Signed-off-by: Martin KaFai Lau 
---
 drivers/net/ethernet/mellanox/mlx4/en_ethtool.c |  4 +-
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c  | 52 -
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h|  3 +-
 3 files changed, 47 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c 
b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index d5a9372ed84d..9aa422691954 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -1099,7 +1099,7 @@ static int mlx4_en_set_ringparam(struct net_device *dev,
memcpy(_prof, priv->prof, sizeof(struct mlx4_en_port_profile));
new_prof.tx_ring_size = tx_size;
new_prof.rx_ring_size = rx_size;
-   err = mlx4_en_try_alloc_resources(priv, tmp, _prof);
+   err = mlx4_en_try_alloc_resources(priv, tmp, _prof, true);
if (err)
goto out;
 
@@ -1774,7 +1774,7 @@ static int mlx4_en_set_channels(struct net_device *dev,
new_prof.tx_ring_num[TX_XDP] = xdp_count;
new_prof.rx_ring_num = channel->rx_count;
 
-   err = mlx4_en_try_alloc_resources(priv, tmp, _prof);
+   err = mlx4_en_try_alloc_resources(priv, tmp, _prof, true);
if (err)
goto out;
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c 
b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 761f8b12399c..f4179086b3c6 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -2184,23 +2184,57 @@ static void mlx4_en_update_priv(struct mlx4_en_priv 
*dst,
 
 int mlx4_en_try_alloc_resources(struct mlx4_en_priv *priv,
struct mlx4_en_priv *tmp,
-   struct mlx4_en_port_profile *prof)
+   struct mlx4_en_port_profile *prof,
+   bool carry_xdp_prog)
 {
-   int t;
+   struct bpf_prog *xdp_prog = NULL;
+   int err;
+   int i;
 
mlx4_en_copy_priv(tmp, priv, prof);
 
+   if (carry_xdp_prog) {
+   /* All rx_rings has the same xdp_prog.  Pick the first one */
+   xdp_prog = rcu_dereference_protected(
+   priv->rx_ring[0]->xdp_prog,
+   lockdep_is_held(>mdev->state_lock));
+
+   if (xdp_prog) {
+   xdp_prog = bpf_prog_add(xdp_prog, tmp->rx_ring_num);
+   if (IS_ERR(xdp_prog)) {
+   err = PTR_ERR(xdp_prog);
+   xdp_prog = NULL;
+   goto err_free;
+   }
+   }
+   }
+
if (mlx4_en_alloc_resources(tmp)) {
en_warn(priv,
"%s: Resource allocation failed, using previous 
configuration\n",
__func__);
-   for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++) {
-   kfree(tmp->tx_ring[t]);
-   kfree(tmp->tx_cq[t]);
-   }
-   return -ENOMEM;
+   err = -ENOMEM;
+   goto err_free;
+   }
+
+   if (xdp_prog) {
+   for (i = 0; i < tmp->rx_ring_num; i++)
+   rcu_assign_pointer(tmp->rx_ring[i]->xdp_prog,
+  xdp_prog);
}
+
return 0;
+
+err_free:
+   if (xdp_prog)
+   bpf_prog_sub(xdp_prog, tmp->rx_ring_num);
+
+   for (i = 0; i < MLX4_EN_NUM_TX_TYPES; i++) {
+   kfree(tmp->tx_ring[i]);
+   kfree(tmp->tx_cq[i]);
+   }
+
+   return err;
 }
 
 void mlx4_en_safe_replace_resources(struct mlx4_en_priv *priv,
@@ -2755,7 +2789,7 @@ static int mlx4_xdp_set(struct net_device *dev, struct 
bpf_prog *prog)
en_warn(priv, "Reducing the number of TX rings, to not exceed 
the max total rings number.\n");
}
 
-   err = mlx4_en_try_alloc_resources(priv, tmp, _prof);
+   err = mlx4_en_try_alloc_resources(priv, tmp, _prof, false);
if (err) {
if (prog)
bpf_prog_sub(prog, priv->rx_ring_num - 1);
@@ -3499,7 +3533,7 @@ int mlx4_en_reset_config(struct net_device *dev,
memcpy(_prof, priv->prof, sizeof(struct mlx4_en_port_profile));
memcpy(_prof.hwtstamp_config, _config, sizeof(ts_config));
 
-   err = mlx4_en_try_alloc_resources(priv, tmp, _prof);
+   err = mlx4_en_try_alloc_resources(priv, tmp, _prof, true);
if (err)
goto out;
 
diff --git

67 matches

Mail list logo