[PATCH] (resend) ixgbe: always initialize setup_fc

2016-07-01 Thread Patrick McLean
Gmail mangled my first message, sorry about that. Second attempt.

In ixgbe_init_mac_link_ops_X550em, the code has a special case for
backplane media type, but does not fall through to the default case,
so the setup_fc never gets initialized. This causes a panic when it
later tries to set up the card, and the kernel dereferences the null
pointer.

This patch lets the the function fall through, which initialized
setup_fc properly.
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
index 19b75cd..73e2de7 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
@@ -1653,7 +1653,6 @@ static void ixgbe_init_mac_link_ops_X550em(struct ixgbe_hw *hw)
 		if (hw->device_id == IXGBE_DEV_ID_X550EM_A_SGMII ||
 		hw->device_id == IXGBE_DEV_ID_X550EM_A_SGMII_L)
 			mac->ops.setup_link = ixgbe_setup_sgmii;
-		break;
 	default:
 		mac->ops.setup_fc = ixgbe_setup_fc_x550em;
 		break;


[PATCH net-next 03/11] bnxt_en: Do function reset on the 1st PF open only.

2016-07-01 Thread Michael Chan
Calling the firmware to do function reset on the PF will kill all the VFs.
To prevent that, we call function reset on the 1st PF open before any VF
can be activated.  On subsequent PF opens (with possibly some active VFs),
a bit has been set and we'll skip the function reset.  VF driver will
always do function reset on every open.  If there is an AER event, we will
always do function reset.

Signed-off-by: Michael Chan 
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 22 --
 drivers/net/ethernet/broadcom/bnxt/bnxt.h |  1 +
 2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c 
b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 673f4d6..b489fb6 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -5295,12 +5295,19 @@ static int bnxt_open(struct net_device *dev)
struct bnxt *bp = netdev_priv(dev);
int rc = 0;
 
-   rc = bnxt_hwrm_func_reset(bp);
-   if (rc) {
-   netdev_err(bp->dev, "hwrm chip reset failure rc: %x\n",
-  rc);
-   rc = -1;
-   return rc;
+   if (!test_bit(BNXT_STATE_FN_RST_DONE, >state)) {
+   rc = bnxt_hwrm_func_reset(bp);
+   if (rc) {
+   netdev_err(bp->dev, "hwrm chip reset failure rc: %x\n",
+  rc);
+   rc = -EBUSY;
+   return rc;
+   }
+   /* Do func_reset during the 1st PF open only to prevent killing
+* the VFs when the PF is brought down and up.
+*/
+   if (BNXT_PF(bp))
+   set_bit(BNXT_STATE_FN_RST_DONE, >state);
}
return __bnxt_open_nic(bp, true, true);
 }
@@ -6676,6 +6683,7 @@ static pci_ers_result_t bnxt_io_error_detected(struct 
pci_dev *pdev,
   pci_channel_state_t state)
 {
struct net_device *netdev = pci_get_drvdata(pdev);
+   struct bnxt *bp = netdev_priv(netdev);
 
netdev_info(netdev, "PCI I/O error detected\n");
 
@@ -6690,6 +6698,8 @@ static pci_ers_result_t bnxt_io_error_detected(struct 
pci_dev *pdev,
if (netif_running(netdev))
bnxt_close(netdev);
 
+   /* So that func_reset will be done during slot_reset */
+   clear_bit(BNXT_STATE_FN_RST_DONE, >state);
pci_disable_device(pdev);
rtnl_unlock();
 
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h 
b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 084b3f2..1d5a3cd 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -1018,6 +1018,7 @@ struct bnxt {
unsigned long   state;
 #define BNXT_STATE_OPEN0
 #define BNXT_STATE_IN_SP_TASK  1
+#define BNXT_STATE_FN_RST_DONE 2
 
struct bnxt_irq *irq_tbl;
u8  mac_addr[ETH_ALEN];
-- 
1.8.3.1



[PATCH net-next 02/11] bnxt_en: Update firmware spec. to 1.3.0.

2016-07-01 Thread Michael Chan
And update driver version to 1.3.0.

Signed-off-by: Michael Chan 
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.h | 11 ++--
 drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h | 87 ++-
 2 files changed, 66 insertions(+), 32 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h 
b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 927ece9..084b3f2 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -11,10 +11,10 @@
 #define BNXT_H
 
 #define DRV_MODULE_NAME"bnxt_en"
-#define DRV_MODULE_VERSION "1.2.0"
+#define DRV_MODULE_VERSION "1.3.0"
 
 #define DRV_VER_MAJ1
-#define DRV_VER_MIN0
+#define DRV_VER_MIN3
 #define DRV_VER_UPD0
 
 struct tx_bd {
@@ -359,7 +359,8 @@ struct rx_tpa_end_cmp {
 RX_TPA_END_CMP_FLAGS_PLACEMENT_ANY_GRO)
 
 #define TPA_END_GRO_TS(rx_tpa_end) \
-   ((rx_tpa_end)->rx_tpa_end_cmp_tsdelta & cpu_to_le32(RX_TPA_END_GRO_TS))
+   (!!((rx_tpa_end)->rx_tpa_end_cmp_tsdelta &  \
+   cpu_to_le32(RX_TPA_END_GRO_TS)))
 
 struct rx_tpa_end_cmp_ext {
__le32 rx_tpa_end_cmp_dup_acks;
@@ -753,8 +754,8 @@ struct bnxt_vf_info {
 struct bnxt_pf_info {
 #define BNXT_FIRST_PF_FID  1
 #define BNXT_FIRST_VF_FID  128
-   u32 fw_fid;
-   u8  port_id;
+   u16 fw_fid;
+   u16 port_id;
u8  mac_addr[ETH_ALEN];
u16 max_rsscos_ctxs;
u16 max_cp_rings;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h 
b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
index 05e3c49..517567f 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
@@ -105,6 +105,7 @@ struct hwrm_async_event_cmpl {
#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PORT_CONN_NOT_ALLOWED (0x4UL << 
0)
#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_NOT_ALLOWED 
(0x5UL << 0)
#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE (0x6UL << 
0)
+   #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PORT_PHY_CFG_CHANGE (0x7UL << 0)
#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_FUNC_DRVR_UNLOAD   (0x10UL << 0)
#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_FUNC_DRVR_LOAD (0x11UL << 0)
#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_UNLOAD (0x20UL << 0)
@@ -484,12 +485,12 @@ struct hwrm_async_event_cmpl_hwrm_error {
#define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA1_TIMESTAMP 0x1UL
 };
 
-/* HW Resource Manager Specification 1.2.2 */
+/* HW Resource Manager Specification 1.3.0 */
 #define HWRM_VERSION_MAJOR 1
-#define HWRM_VERSION_MINOR 2
-#define HWRM_VERSION_UPDATE2
+#define HWRM_VERSION_MINOR 3
+#define HWRM_VERSION_UPDATE0
 
-#define HWRM_VERSION_STR   "1.2.2"
+#define HWRM_VERSION_STR   "1.3.0"
 /*
  * Following is the signature for HWRM message field that indicates not
  * applicable (All F's). Need to cast it the size of the field if needed.
@@ -611,6 +612,9 @@ struct cmd_nums {
#define HWRM_FWD_RESP  (0xd2UL)
#define HWRM_FWD_ASYNC_EVENT_CMPL  (0xd3UL)
#define HWRM_TEMP_MONITOR_QUERY(0xe0UL)
+   #define HWRM_WOL_FILTER_ALLOC  (0xf0UL)
+   #define HWRM_WOL_FILTER_FREE   (0xf1UL)
+   #define HWRM_WOL_FILTER_QCFG   (0xf2UL)
#define HWRM_DBG_READ_DIRECT   (0xff10UL)
#define HWRM_DBG_READ_INDIRECT (0xff11UL)
#define HWRM_DBG_WRITE_DIRECT  (0xff12UL)
@@ -1020,6 +1024,10 @@ struct hwrm_func_qcaps_output {
#define FUNC_QCAPS_RESP_FLAGS_PUSH_MODE_SUPPORTED   0x1UL
#define FUNC_QCAPS_RESP_FLAGS_GLOBAL_MSIX_AUTOMASKING  0x2UL
#define FUNC_QCAPS_RESP_FLAGS_PTP_SUPPORTED 0x4UL
+   #define FUNC_QCAPS_RESP_FLAGS_ROCE_V1_SUPPORTED 0x8UL
+   #define FUNC_QCAPS_RESP_FLAGS_ROCE_V2_SUPPORTED 0x10UL
+   #define FUNC_QCAPS_RESP_FLAGS_WOL_MAGICPKT_SUPPORTED   0x20UL
+   #define FUNC_QCAPS_RESP_FLAGS_WOL_BMP_SUPPORTED 0x40UL
u8 mac_address[6];
__le16 max_rsscos_ctx;
__le16 max_cmpl_rings;
@@ -1066,8 +1074,9 @@ struct hwrm_func_qcfg_output {
__le16 fid;
__le16 port_id;
__le16 vlan;
-   u8 unused_0;
-   u8 unused_1;
+   __le16 flags;
+   #define FUNC_QCFG_RESP_FLAGS_OOB_WOL_MAGICPKT_ENABLED  0x1UL
+   #define FUNC_QCFG_RESP_FLAGS_OOB_WOL_BMP_ENABLED0x2UL
u8 mac_address[6];
__le16 pci_id;
__le16 alloc_rsscos_ctx;
@@ -1086,23 +1095,23 @@ struct hwrm_func_qcfg_output {
#define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_5 (0x3UL << 0)
 

[PATCH net-next 10/11] bnxt_en: Assign netdev->dev_port with port ID.

2016-07-01 Thread Michael Chan
This is useful for multi-function devices.

Signed-off-by: Michael Chan 
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c 
b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 2740ac3..18be62b 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -4026,6 +4026,7 @@ int bnxt_hwrm_func_qcaps(struct bnxt *bp)
 
pf->fw_fid = le16_to_cpu(resp->fid);
pf->port_id = le16_to_cpu(resp->port_id);
+   bp->dev->dev_port = pf->port_id;
memcpy(pf->mac_addr, resp->mac_address, ETH_ALEN);
memcpy(bp->dev->dev_addr, pf->mac_addr, ETH_ALEN);
pf->max_rsscos_ctxs = le16_to_cpu(resp->max_rsscos_ctx);
-- 
1.8.3.1



[PATCH net-next 08/11] bnxt_en: Increase maximum supported MTU to 9500.

2016-07-01 Thread Michael Chan
From: Vasundhara Volam 

Signed-off-by: Vasundhara Volam 
Signed-off-by: Michael Chan 
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c 
b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 28a5aee..9134268 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -6049,7 +6049,7 @@ static int bnxt_change_mtu(struct net_device *dev, int 
new_mtu)
 {
struct bnxt *bp = netdev_priv(dev);
 
-   if (new_mtu < 60 || new_mtu > 9000)
+   if (new_mtu < 60 || new_mtu > 9500)
return -EINVAL;
 
if (netif_running(dev))
-- 
1.8.3.1



[PATCH net-next 07/11] bnxt_en: Enable MRU enables bit when configuring VNIC MRU.

2016-07-01 Thread Michael Chan
For correctness, the MRU enables bit must be set when passing the
MRU to firmware during vnic configuration.

Signed-off-by: Michael Chan 
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c 
b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index b489fb6..28a5aee 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -3414,7 +3414,8 @@ static int bnxt_hwrm_vnic_cfg(struct bnxt *bp, u16 
vnic_id)
bnxt_hwrm_cmd_hdr_init(bp, , HWRM_VNIC_CFG, -1, -1);
/* Only RSS support for now TBD: COS & LB */
req.enables = cpu_to_le32(VNIC_CFG_REQ_ENABLES_DFLT_RING_GRP |
- VNIC_CFG_REQ_ENABLES_RSS_RULE);
+ VNIC_CFG_REQ_ENABLES_RSS_RULE |
+ VNIC_CFG_REQ_ENABLES_MRU);
req.rss_rule = cpu_to_le16(vnic->fw_rss_cos_lb_ctx);
req.cos_rule = cpu_to_le16(0x);
if (vnic->flags & BNXT_VNIC_RSS_FLAG)
-- 
1.8.3.1



[PATCH net-next 11/11] bnxt_en: Allow statistics DMA to be configurable using ethtool -C.

2016-07-01 Thread Michael Chan
The allowable range is 0.25 seconds to 1 second interval.  Default is
1 second.

Signed-off-by: Michael Chan 
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c |  4 +++-
 drivers/net/ethernet/broadcom/bnxt/bnxt.h |  5 +
 drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 25 +--
 3 files changed, 31 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c 
b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 18be62b..70b148a 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -3952,7 +3952,7 @@ static int bnxt_hwrm_stat_ctx_alloc(struct bnxt *bp)
 
bnxt_hwrm_cmd_hdr_init(bp, , HWRM_STAT_CTX_ALLOC, -1, -1);
 
-   req.update_period_ms = cpu_to_le32(1000);
+   req.update_period_ms = cpu_to_le32(bp->stats_coal_ticks / 1000);
 
mutex_lock(>hwrm_cmd_lock);
for (i = 0; i < bp->cp_nr_rings; i++) {
@@ -5994,6 +5994,8 @@ static int bnxt_init_board(struct pci_dev *pdev, struct 
net_device *dev)
bp->tx_coal_ticks_irq = 2;
bp->tx_coal_bufs_irq = 2;
 
+   bp->stats_coal_ticks = BNXT_DEF_STATS_COAL_TICKS;
+
init_timer(>timer);
bp->timer.data = (unsigned long)bp;
bp->timer.function = bnxt_timer;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h 
b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 1d5a3cd..2313e37 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -1067,6 +1067,11 @@ struct bnxt {
 
 #define BNXT_USEC_TO_COAL_TIMER(x) ((x) * 25 / 2)
 
+   u32 stats_coal_ticks;
+#define BNXT_DEF_STATS_COAL_TICKS   100
+#define BNXT_MIN_STATS_COAL_TICKS25
+#define BNXT_MAX_STATS_COAL_TICKS   100
+
struct work_struct  sp_task;
unsigned long   sp_event;
 #define BNXT_RX_MASK_SP_EVENT  0
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c 
b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index 33b3135..0f7dd86 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -56,6 +56,8 @@ static int bnxt_get_coalesce(struct net_device *dev,
coal->tx_coalesce_usecs_irq = bp->tx_coal_ticks_irq;
coal->tx_max_coalesced_frames_irq = bp->tx_coal_bufs_irq;
 
+   coal->stats_block_coalesce_usecs = bp->stats_coal_ticks;
+
return 0;
 }
 
@@ -63,6 +65,7 @@ static int bnxt_set_coalesce(struct net_device *dev,
 struct ethtool_coalesce *coal)
 {
struct bnxt *bp = netdev_priv(dev);
+   bool update_stats = false;
int rc = 0;
 
bp->rx_coal_ticks = coal->rx_coalesce_usecs;
@@ -76,8 +79,26 @@ static int bnxt_set_coalesce(struct net_device *dev,
bp->tx_coal_ticks_irq = coal->tx_coalesce_usecs_irq;
bp->tx_coal_bufs_irq = coal->tx_max_coalesced_frames_irq;
 
-   if (netif_running(dev))
-   rc = bnxt_hwrm_set_coal(bp);
+   if (bp->stats_coal_ticks != coal->stats_block_coalesce_usecs) {
+   u32 stats_ticks = coal->stats_block_coalesce_usecs;
+
+   stats_ticks = clamp_t(u32, stats_ticks,
+ BNXT_MIN_STATS_COAL_TICKS,
+ BNXT_MAX_STATS_COAL_TICKS);
+   stats_ticks = rounddown(stats_ticks, BNXT_MIN_STATS_COAL_TICKS);
+   bp->stats_coal_ticks = stats_ticks;
+   update_stats = true;
+   }
+
+   if (netif_running(dev)) {
+   if (update_stats) {
+   rc = bnxt_close_nic(bp, true, false);
+   if (!rc)
+   rc = bnxt_open_nic(bp, true, false);
+   } else {
+   rc = bnxt_hwrm_set_coal(bp);
+   }
+   }
 
return rc;
 }
-- 
1.8.3.1



[PATCH net-next 05/11] bnxt_en: Request firmware reset after successful firwmare update

2016-07-01 Thread Michael Chan
From: Rob Swindell 

Upon successful mgmt processor firmware update, request a self
reset upon next PCIe reset (e.g. system reboot).

Signed-off-by: Rob Swindell 
Signed-off-by: Michael Chan 
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c 
b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index 3328aa5..12a5141 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -1059,6 +1059,8 @@ static int bnxt_firmware_reset(struct net_device *dev,
case BNX_DIR_TYPE_APE_FW:
case BNX_DIR_TYPE_APE_PATCH:
req.embedded_proc_type = FW_RESET_REQ_EMBEDDED_PROC_TYPE_MGMT;
+   /* Self-reset APE upon next PCIe reset: */
+   req.selfrst_status = FW_RESET_REQ_SELFRST_STATUS_SELFRSTPCIERST;
break;
case BNX_DIR_TYPE_KONG_FW:
case BNX_DIR_TYPE_KONG_PATCH:
-- 
1.8.3.1



[PATCH net-next 09/11] bnxt_en: Allow promiscuous mode for VF if default VLAN is enabled.

2016-07-01 Thread Michael Chan
With a default VLAN, the VF has its own VLAN domain and it can receive
all traffic within that domain.

Signed-off-by: Michael Chan 
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 15 ---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c 
b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 9134268..2740ac3 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -4316,6 +4316,16 @@ static int bnxt_alloc_rfs_vnics(struct bnxt *bp)
 #endif
 }
 
+/* Allow PF and VF with default VLAN to be in promiscuous mode */
+static bool bnxt_promisc_ok(struct bnxt *bp)
+{
+#ifdef CONFIG_BNXT_SRIOV
+   if (BNXT_VF(bp) && !bp->vf.vlan)
+   return false;
+#endif
+   return true;
+}
+
 static int bnxt_cfg_rx_mode(struct bnxt *);
 static bool bnxt_mc_list_updated(struct bnxt *, u32 *);
 
@@ -4381,7 +4391,7 @@ static int bnxt_init_chip(struct bnxt *bp, bool 
irq_re_init)
 
vnic->rx_mask = CFA_L2_SET_RX_MASK_REQ_MASK_BCAST;
 
-   if ((bp->dev->flags & IFF_PROMISC) && BNXT_PF(bp))
+   if ((bp->dev->flags & IFF_PROMISC) && bnxt_promisc_ok(bp))
vnic->rx_mask |= CFA_L2_SET_RX_MASK_REQ_MASK_PROMISCUOUS;
 
if (bp->dev->flags & IFF_ALLMULTI) {
@@ -5528,8 +5538,7 @@ static void bnxt_set_rx_mode(struct net_device *dev)
  CFA_L2_SET_RX_MASK_REQ_MASK_MCAST |
  CFA_L2_SET_RX_MASK_REQ_MASK_ALL_MCAST);
 
-   /* Only allow PF to be in promiscuous mode */
-   if ((dev->flags & IFF_PROMISC) && BNXT_PF(bp))
+   if ((dev->flags & IFF_PROMISC) && bnxt_promisc_ok(bp))
mask |= CFA_L2_SET_RX_MASK_REQ_MASK_PROMISCUOUS;
 
uc_update = bnxt_uc_list_updated(bp);
-- 
1.8.3.1



[PATCH net-next 00/11] bnxt_en updates for net-next.

2016-07-01 Thread Michael Chan
Mostly small miscellaneous changes.

Please review for net-next.  Thanks.

Michael Chan (7):
  bnxt_en: VF/NPAR should return -EOPNOTSUPP for unsupported ethtool
ops.
  bnxt_en: Update firmware spec. to 1.3.0.
  bnxt_en: Do function reset on the 1st PF open only.
  bnxt_en: Enable MRU enables bit when configuring VNIC MRU.
  bnxt_en: Allow promiscuous mode for VF if default VLAN is enabled.
  bnxt_en: Assign netdev->dev_port with port ID.
  bnxt_en: Allow statistics DMA to be configurable using ethtool -C.

Rob Swindell (3):
  bnxt_en: Add support for updating flash more securely
  bnxt_en: Request firmware reset after successful firwmare update
  bnxt_en: Add support for firmware updates for additional processors.

Vasundhara Volam (1):
  bnxt_en: Increase maximum supported MTU to 9500.

 drivers/net/ethernet/broadcom/bnxt/bnxt.c  | 47 +---
 drivers/net/ethernet/broadcom/bnxt/bnxt.h  | 17 +++--
 drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c  | 54 --
 drivers/net/ethernet/broadcom/bnxt/bnxt_fw_hdr.h   |  1 +
 drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h  | 87 +++---
 drivers/net/ethernet/broadcom/bnxt/bnxt_nvm_defs.h |  1 +
 6 files changed, 158 insertions(+), 49 deletions(-)

-- 
1.8.3.1



[PATCH net-next 06/11] bnxt_en: Add support for firmware updates for additional processors.

2016-07-01 Thread Michael Chan
From: Rob Swindell 

Add support to the Ethtool FLASHDEV command handler for additional
firmware types to cover all the on-chip processors.

Signed-off-by: Rob Swindell 
Signed-off-by: Michael Chan 
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 20 
 drivers/net/ethernet/broadcom/bnxt/bnxt_fw_hdr.h  |  1 +
 2 files changed, 21 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c 
b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index 12a5141..33b3135 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -1094,9 +1094,27 @@ static int bnxt_flash_firmware(struct net_device *dev,
case BNX_DIR_TYPE_BOOTCODE_2:
code_type = CODE_BOOT;
break;
+   case BNX_DIR_TYPE_CHIMP_PATCH:
+   code_type = CODE_CHIMP_PATCH;
+   break;
case BNX_DIR_TYPE_APE_FW:
code_type = CODE_MCTP_PASSTHRU;
break;
+   case BNX_DIR_TYPE_APE_PATCH:
+   code_type = CODE_APE_PATCH;
+   break;
+   case BNX_DIR_TYPE_KONG_FW:
+   code_type = CODE_KONG_FW;
+   break;
+   case BNX_DIR_TYPE_KONG_PATCH:
+   code_type = CODE_KONG_PATCH;
+   break;
+   case BNX_DIR_TYPE_BONO_FW:
+   code_type = CODE_BONO_FW;
+   break;
+   case BNX_DIR_TYPE_BONO_PATCH:
+   code_type = CODE_BONO_PATCH;
+   break;
default:
netdev_err(dev, "Unsupported directory entry type: %u\n",
   dir_type);
@@ -1151,6 +1169,8 @@ static bool bnxt_dir_type_is_ape_bin_format(u16 dir_type)
case BNX_DIR_TYPE_APE_PATCH:
case BNX_DIR_TYPE_KONG_FW:
case BNX_DIR_TYPE_KONG_PATCH:
+   case BNX_DIR_TYPE_BONO_FW:
+   case BNX_DIR_TYPE_BONO_PATCH:
return true;
}
 
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_fw_hdr.h 
b/drivers/net/ethernet/broadcom/bnxt/bnxt_fw_hdr.h
index 461675c..82bf44a 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_fw_hdr.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_fw_hdr.h
@@ -70,6 +70,7 @@ enum SUPPORTED_CODE {
CODE_KONG_PATCH,/* 18 - KONG Patch firmware */
CODE_BONO_FW,   /* 19 - BONO firmware */
CODE_BONO_PATCH,/* 20 - BONO Patch firmware */
+   CODE_CHIMP_PATCH,   /* 21 - ChiMP Patch firmware */
 
MAX_CODE_TYPE,
 };
-- 
1.8.3.1



[PATCH net-next 04/11] bnxt_en: Add support for updating flash more securely

2016-07-01 Thread Michael Chan
From: Rob Swindell 

To support Secure Firmware Update, we must be able to allocate
a staging area in the Flash.  This patch adds support for the
"update" type to tell firmware to do that.

Signed-off-by: Rob Swindell 
Signed-off-by: Michael Chan 
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c  | 3 ++-
 drivers/net/ethernet/broadcom/bnxt/bnxt_nvm_defs.h | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c 
b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index c63ed2f..3328aa5 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -1186,7 +1186,8 @@ static int bnxt_flash_firmware_from_file(struct 
net_device *dev,
const struct firmware  *fw;
int rc;
 
-   if (bnxt_dir_type_is_executable(dir_type) == false)
+   if (dir_type != BNX_DIR_TYPE_UPDATE &&
+   bnxt_dir_type_is_executable(dir_type) == false)
return -EINVAL;
 
rc = request_firmware(, filename, >dev);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_nvm_defs.h 
b/drivers/net/ethernet/broadcom/bnxt/bnxt_nvm_defs.h
index 40a7b0e..73f2249 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_nvm_defs.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_nvm_defs.h
@@ -13,6 +13,7 @@
 enum bnxt_nvm_directory_type {
BNX_DIR_TYPE_UNUSED = 0,
BNX_DIR_TYPE_PKG_LOG = 1,
+   BNX_DIR_TYPE_UPDATE = 2,
BNX_DIR_TYPE_CHIMP_PATCH = 3,
BNX_DIR_TYPE_BOOTCODE = 4,
BNX_DIR_TYPE_VPD = 5,
-- 
1.8.3.1



[PATCH net-next 01/11] bnxt_en: VF/NPAR should return -EOPNOTSUPP for unsupported ethtool ops.

2016-07-01 Thread Michael Chan
Returning 0 for doing nothing is confusing to the user.

Signed-off-by: Michael Chan 
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c 
b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index d7ab2d79..c63ed2f 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -961,7 +961,7 @@ static int bnxt_set_pauseparam(struct net_device *dev,
struct bnxt_link_info *link_info = >link_info;
 
if (!BNXT_SINGLE_PF(bp))
-   return rc;
+   return -EOPNOTSUPP;
 
if (epause->autoneg) {
if (!(link_info->autoneg & BNXT_AUTONEG_SPEED))
@@ -1483,7 +1483,7 @@ static int bnxt_set_eee(struct net_device *dev, struct 
ethtool_eee *edata)
int rc = 0;
 
if (!BNXT_SINGLE_PF(bp))
-   return 0;
+   return -EOPNOTSUPP;
 
if (!(bp->flags & BNXT_FLAG_EEE_CAP))
return -EOPNOTSUPP;
-- 
1.8.3.1



Re: [patch net-next 10/42] mlxsw: spectrum_router: Add basic ipv4 router initialization

2016-07-01 Thread Ido Schimmel
Fri, Jul 01, 2016 at 05:39:01PM IDT, d...@cumulusnetworks.com wrote:
>On 7/1/16 8:04 AM, Jiri Pirko wrote:
>> diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h 
>> b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
>> index 05d5fcc..c2ac037 100644
>> --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
>> +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
>> @@ -74,6 +74,8 @@
>>
>>  #define MLXSW_SP_CELL_FACTOR 2  /* 2 * cell_size / (IPG + cell_size + 
>> 1) */
>>
>> +#define MLXSW_SP_RIF_MAX 800
>
>At most 800 RIFs can be created?

Yes.

Do you have use cases that require more than that? If so, how many
router interfaces would be required? That would be a good feedback for
us.

>Why 800?

Currently, trying to configure a RIF with an higher index will result in
firmware errors. We plan to increase this number in the future (don't
expect it to be infinite...).

We are currently implementing a new mechanism in the driver that will
query these resources from the firmware, so we won't need to hardcode
them.


Re: [patch net-next 11/42] mlxsw: spectrum: Add router interface struct

2016-07-01 Thread Ido Schimmel
Fri, Jul 01, 2016 at 07:16:23PM IDT, d...@cumulusnetworks.com wrote:
>On 7/1/16 8:04 AM, Jiri Pirko wrote:
>> @@ -327,6 +333,19 @@ mlxsw_sp_port_vport_find_by_fid(const struct 
>> mlxsw_sp_port *mlxsw_sp_port,
>>  return NULL;
>>  }
>>
>> +static inline struct mlxsw_sp_rif *
>> +mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
>> + const struct net_device *dev)
>> +{
>> +int i;
>> +
>> +for (i = 0; i < MLXSW_SP_RIF_MAX; i++)
>> +if (mlxsw_sp->rifs[i] && mlxsw_sp->rifs[i]->dev == dev)
>> +return mlxsw_sp->rifs[i];
>> +
>> +return NULL;
>> +}
>> +
>
>Why not add the rif to mlxsw_sp_port which is the priv data for a mlxsw dev?

We create 'mlxsw_sp_port' for physical ports and vPorts (VLAN netdevs),
but not for master devices such as bridge and team, which can also be
assigned RIFs. The reason for this is that in most cases any bridge /
team setting eventually needs to be programmed to the device via each
slave 'mlxsw_sp_port'. You can see, for example, that in our netdev
notification block we simply propagate team/bond CHANGEUPPER to each
slave port.

Thanks for taking the time to review this!


[PATCH 1/2] net: ethernet: davinci_emac: use phydev from struct net_device

2016-07-01 Thread Philippe Reynes
The private structure contain a pointer to phydev, but the structure
net_device already contain such pointer. So we can remove the pointer
phy in the private structure, and update the driver to use the
one contained in struct net_device.

Signed-off-by: Philippe Reynes 
---
 drivers/net/ethernet/ti/davinci_emac.c |   61 ++--
 1 files changed, 27 insertions(+), 34 deletions(-)

diff --git a/drivers/net/ethernet/ti/davinci_emac.c 
b/drivers/net/ethernet/ti/davinci_emac.c
index f56d66e..76683c7 100644
--- a/drivers/net/ethernet/ti/davinci_emac.c
+++ b/drivers/net/ethernet/ti/davinci_emac.c
@@ -348,7 +348,6 @@ struct emac_priv {
u32 rx_addr_type;
const char *phy_id;
struct device_node *phy_node;
-   struct phy_device *phydev;
spinlock_t lock;
/*platform specific members*/
void (*int_enable) (void);
@@ -496,9 +495,8 @@ static void emac_get_drvinfo(struct net_device *ndev,
 static int emac_get_settings(struct net_device *ndev,
 struct ethtool_cmd *ecmd)
 {
-   struct emac_priv *priv = netdev_priv(ndev);
-   if (priv->phydev)
-   return phy_ethtool_gset(priv->phydev, ecmd);
+   if (ndev->phydev)
+   return phy_ethtool_gset(ndev->phydev, ecmd);
else
return -EOPNOTSUPP;
 
@@ -514,9 +512,8 @@ static int emac_get_settings(struct net_device *ndev,
  */
 static int emac_set_settings(struct net_device *ndev, struct ethtool_cmd *ecmd)
 {
-   struct emac_priv *priv = netdev_priv(ndev);
-   if (priv->phydev)
-   return phy_ethtool_sset(priv->phydev, ecmd);
+   if (ndev->phydev)
+   return phy_ethtool_sset(ndev->phydev, ecmd);
else
return -EOPNOTSUPP;
 
@@ -651,8 +648,8 @@ static void emac_update_phystatus(struct emac_priv *priv)
mac_control = emac_read(EMAC_MACCONTROL);
cur_duplex = (mac_control & EMAC_MACCONTROL_FULLDUPLEXEN) ?
DUPLEX_FULL : DUPLEX_HALF;
-   if (priv->phydev)
-   new_duplex = priv->phydev->duplex;
+   if (ndev->phydev)
+   new_duplex = ndev->phydev->duplex;
else
new_duplex = DUPLEX_FULL;
 
@@ -1454,7 +1451,7 @@ static void emac_poll_controller(struct net_device *ndev)
 static void emac_adjust_link(struct net_device *ndev)
 {
struct emac_priv *priv = netdev_priv(ndev);
-   struct phy_device *phydev = priv->phydev;
+   struct phy_device *phydev = ndev->phydev;
unsigned long flags;
int new_state = 0;
 
@@ -1483,7 +1480,7 @@ static void emac_adjust_link(struct net_device *ndev)
}
if (new_state) {
emac_update_phystatus(priv);
-   phy_print_status(priv->phydev);
+   phy_print_status(ndev->phydev);
}
 
spin_unlock_irqrestore(>lock, flags);
@@ -1505,15 +1502,13 @@ static void emac_adjust_link(struct net_device *ndev)
  */
 static int emac_devioctl(struct net_device *ndev, struct ifreq *ifrq, int cmd)
 {
-   struct emac_priv *priv = netdev_priv(ndev);
-
if (!(netif_running(ndev)))
return -EINVAL;
 
/* TODO: Add phy read and write and private statistics get feature */
 
-   if (priv->phydev)
-   return phy_mii_ioctl(priv->phydev, ifrq, cmd);
+   if (ndev->phydev)
+   return phy_mii_ioctl(ndev->phydev, ifrq, cmd);
else
return -EOPNOTSUPP;
 }
@@ -1542,6 +1537,7 @@ static int emac_dev_open(struct net_device *ndev)
int res_num = 0, irq_num = 0;
int i = 0;
struct emac_priv *priv = netdev_priv(ndev);
+   struct phy_device *phydev = NULL;
 
ret = pm_runtime_get_sync(>pdev->dev);
if (ret < 0) {
@@ -1607,12 +1603,10 @@ static int emac_dev_open(struct net_device *ndev)
 
cpdma_ctlr_start(priv->dma);
 
-   priv->phydev = NULL;
-
if (priv->phy_node) {
-   priv->phydev = of_phy_connect(ndev, priv->phy_node,
- _adjust_link, 0, 0);
-   if (!priv->phydev) {
+   phydev = of_phy_connect(ndev, priv->phy_node,
+   _adjust_link, 0, 0);
+   if (!phydev) {
dev_err(emac_dev, "could not connect to phy %s\n",
priv->phy_node->full_name);
ret = -ENODEV;
@@ -1621,7 +1615,7 @@ static int emac_dev_open(struct net_device *ndev)
}
 
/* use the first phy on the bus if pdata did not give us a phy id */
-   if (!priv->phydev && !priv->phy_id) {
+   if (!phydev && !priv->phy_id) {
struct device *phy;
 
phy = bus_find_device(_bus_type, NULL, NULL,
@@ -1630,16 +1624,15 @@ static int emac_dev_open(struct net_device *ndev)
priv->phy_id = dev_name(phy);
}
 
-   if 

[PATCH 2/2] net: ethernet: davinci_emac: use phy_ethtool_{get|set}_link_ksettings

2016-07-01 Thread Philippe Reynes
There are two generics functions phy_ethtool_{get|set}_link_ksettings,
so we can use them instead of defining the same code in the driver.

Signed-off-by: Philippe Reynes 
---
 drivers/net/ethernet/ti/davinci_emac.c |   39 +--
 1 files changed, 2 insertions(+), 37 deletions(-)

diff --git a/drivers/net/ethernet/ti/davinci_emac.c 
b/drivers/net/ethernet/ti/davinci_emac.c
index 76683c7..c6c5465 100644
--- a/drivers/net/ethernet/ti/davinci_emac.c
+++ b/drivers/net/ethernet/ti/davinci_emac.c
@@ -485,41 +485,6 @@ static void emac_get_drvinfo(struct net_device *ndev,
 }
 
 /**
- * emac_get_settings - Get EMAC settings
- * @ndev: The DaVinci EMAC network adapter
- * @ecmd: ethtool command
- *
- * Executes ethool get command
- *
- */
-static int emac_get_settings(struct net_device *ndev,
-struct ethtool_cmd *ecmd)
-{
-   if (ndev->phydev)
-   return phy_ethtool_gset(ndev->phydev, ecmd);
-   else
-   return -EOPNOTSUPP;
-
-}
-
-/**
- * emac_set_settings - Set EMAC settings
- * @ndev: The DaVinci EMAC network adapter
- * @ecmd: ethtool command
- *
- * Executes ethool set command
- *
- */
-static int emac_set_settings(struct net_device *ndev, struct ethtool_cmd *ecmd)
-{
-   if (ndev->phydev)
-   return phy_ethtool_sset(ndev->phydev, ecmd);
-   else
-   return -EOPNOTSUPP;
-
-}
-
-/**
  * emac_get_coalesce - Get interrupt coalesce settings for this device
  * @ndev : The DaVinci EMAC network adapter
  * @coal : ethtool coalesce settings structure
@@ -622,12 +587,12 @@ static int emac_set_coalesce(struct net_device *ndev,
  */
 static const struct ethtool_ops ethtool_ops = {
.get_drvinfo = emac_get_drvinfo,
-   .get_settings = emac_get_settings,
-   .set_settings = emac_set_settings,
.get_link = ethtool_op_get_link,
.get_coalesce = emac_get_coalesce,
.set_coalesce =  emac_set_coalesce,
.get_ts_info = ethtool_op_get_ts_info,
+   .get_link_ksettings = phy_ethtool_get_link_ksettings,
+   .set_link_ksettings = phy_ethtool_set_link_ksettings,
 };
 
 /**
-- 
1.7.4.4



[PATCHv3 wl-drv-next 1/2] add basic register-field manipulation macros

2016-07-01 Thread Jakub Kicinski
C bitfields are problematic and best avoided.  Developers
interacting with hardware registers find themselves searching
for easy-to-use alternatives.  Common approach is to define
structures or sets of macros containing mask and shift pair.
Operations on the register are then performed as follows:

 field = (reg >> shift) & mask;

 reg &= ~(mask << shift);
 reg |= (field & mask) << shift;

Defining shift and mask separately is tedious.  Ivo van Doorn
came up with an idea of computing them at compilation time
based on a single shifted mask (later refined by Felix) which
can be used like this:

 #define REG_FIELD 0x000ff000

 field = FIELD_GET(REG_FIELD, reg);

 reg &= ~REG_FIELD;
 reg |= FIELD_PUT(REG_FIELD, field);

FIELD_{GET,PUT} macros take care of finding out what the
appropriate shift is based on compilation time ffs operation.

GENMASK can be used to define registers (which is usually
less error-prone and easier to match with datasheets).

This approach is the most convenient I've seen so to limit code
multiplication let's move the macros to a global header file.

Signed-off-by: Jakub Kicinski 
---
 include/linux/bitfield.h | 58 
 include/linux/bug.h  |  3 +++
 2 files changed, 61 insertions(+)
 create mode 100644 include/linux/bitfield.h

diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h
new file mode 100644
index ..d6a36c3c1775
--- /dev/null
+++ b/include/linux/bitfield.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2014 Felix Fietkau 
+ * Copyright (C) 2004 - 2009 Ivo van Doorn 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _LINUX_BITFIELD_H
+#define _LINUX_BITFIELD_H
+
+#include 
+#include 
+#include 
+
+#define _bf_shf(x) (__builtin_ffsll(x) - 1)
+
+#define _BF_FIELD_CHECK(_mask, _val)   \
+   ({  \
+   BUILD_BUG_ON(!(_mask)); \
+   BUILD_BUG_ON(__builtin_constant_p(_val) ?   \
+~((_mask) >> _bf_shf(_mask)) & (_val) :\
+0);\
+   __BUILD_BUG_ON_NOT_POWER_OF_2((_mask) + \
+ (1ULL << _bf_shf(_mask))); \
+   })
+
+#define FIELD_PUT(_mask, _val) \
+   ({  \
+   _BF_FIELD_CHECK(_mask, _val);   \
+   ((u32)(_val) << _bf_shf(_mask)) & (_mask);  \
+   })
+
+#define FIELD_GET(_mask, _val) \
+   ({  \
+   _BF_FIELD_CHECK(_mask, 0);  \
+   (u32)(((_val) & (_mask)) >> _bf_shf(_mask));\
+   })
+
+#define FIELD_PUT64(_mask, _val)   \
+   ({  \
+   _BF_FIELD_CHECK(_mask, _val);   \
+   ((u64)(_val) << _bf_shf(_mask)) & (_mask);  \
+   })
+
+#define FIELD_GET64(_mask, _val)   \
+   ({  \
+   _BF_FIELD_CHECK(_mask, 0);  \
+   (u64)(((_val) & (_mask)) >> _bf_shf(_mask));\
+   })
+
+#endif
diff --git a/include/linux/bug.h b/include/linux/bug.h
index e51b0709e78d..bba5bdae1681 100644
--- a/include/linux/bug.h
+++ b/include/linux/bug.h
@@ -13,6 +13,7 @@ enum bug_trap_type {
 struct pt_regs;
 
 #ifdef __CHECKER__
+#define __BUILD_BUG_ON_NOT_POWER_OF_2(n) (0)
 #define BUILD_BUG_ON_NOT_POWER_OF_2(n) (0)
 #define BUILD_BUG_ON_ZERO(e) (0)
 #define BUILD_BUG_ON_NULL(e) ((void*)0)
@@ -24,6 +25,8 @@ struct pt_regs;
 #else /* __CHECKER__ */
 
 /* Force a compilation error if a constant expression is not a power of 2 */
+#define __BUILD_BUG_ON_NOT_POWER_OF_2(n)   \
+   BUILD_BUG_ON(((n) & ((n) - 1)) != 0)
 #define BUILD_BUG_ON_NOT_POWER_OF_2(n) \
BUILD_BUG_ON((n) == 0 || (((n) & ((n) - 1)) != 0))
 
-- 
1.9.1



[PATCHv3 wl-drv-next 0/2] register-field manipulation macros

2016-07-01 Thread Jakub Kicinski
Hi!

This set moves to a global header file macros which I find
very useful and worth popularising.  The basic problem is
that since C bitfields are not very dependable accessing
subfields of registers becomes slightly inconvenient.
It is nice to have the necessary mask and shift operations
wrapped in a macro.  It is also nice to have that macro
compute the shift amount based on the mask automatically.

My implementation follows what Felix Fietkau has done in
mt76.  Hannes Frederic Sowa suggested more use of standard
Linux/GCC functions.  Since the RFC I've also added a 
compile-time check to validate that the value passed to
setters fits in the mask.

I attempted the use of static inlines instead of macros
but it makes GCC < 6.0 barf at the BUILD_BUG_ON()s.
I also noticed that forcing arguments to be u32 for inlines
makes the compiler use 32bit arithmetic where it could
get away with 64bit before (on 64bit machines, obviously).
That's a potential performance concern but probably not
a very practical one today.  Apart from looking "cleaner"
static inlines would have the advantage that we could #undef
the auxiliary macros at the end of the header.

v3:
Build bot caught a build failure with -Os set.  AFAICT gcc
did not handle temporary variable I put in the macro
expression too well.  I work around that by defining
__BUILD_BUG_ON_NOT_POWER_OF_2 and using it instead of
BUILD_BUG_ON(!tmp || is_power_of_2(tmp)).

Please review and advise on improvements.

If accepted I think would be best to push this through
Kalle's tree, since the only existing user is in
drivers/net/wireless/.

v3:
 - don't use variables in statement expressions;
 - use __BUILD_BUG_ON_NOT_POWER_OF_2.
v2:
 - change Felix's email address.

Jakub Kicinski (2):
  add basic register-field manipulation macros
  mt7601u: use linux/bitfield.h

 drivers/net/wireless/mediatek/mt7601u/dma.h |  2 -
 drivers/net/wireless/mediatek/mt7601u/mt7601u.h |  5 +-
 drivers/net/wireless/mediatek/mt7601u/util.h| 77 -
 include/linux/bitfield.h| 58 +++
 include/linux/bug.h |  3 +
 5 files changed, 65 insertions(+), 80 deletions(-)
 delete mode 100644 drivers/net/wireless/mediatek/mt7601u/util.h
 create mode 100644 include/linux/bitfield.h

-- 
1.9.1



[PATCHv3 wl-drv-next 2/2] mt7601u: use linux/bitfield.h

2016-07-01 Thread Jakub Kicinski
Use the newly added linux/bitfield.h.

Signed-off-by: Jakub Kicinski 
---
 drivers/net/wireless/mediatek/mt7601u/dma.h |  2 -
 drivers/net/wireless/mediatek/mt7601u/mt7601u.h |  5 +-
 drivers/net/wireless/mediatek/mt7601u/util.h| 77 -
 3 files changed, 4 insertions(+), 80 deletions(-)
 delete mode 100644 drivers/net/wireless/mediatek/mt7601u/util.h

diff --git a/drivers/net/wireless/mediatek/mt7601u/dma.h 
b/drivers/net/wireless/mediatek/mt7601u/dma.h
index 978e8a90b87f..166ac71905d2 100644
--- a/drivers/net/wireless/mediatek/mt7601u/dma.h
+++ b/drivers/net/wireless/mediatek/mt7601u/dma.h
@@ -18,8 +18,6 @@
 #include 
 #include 
 
-#include "util.h"
-
 #define MT_DMA_HDR_LEN 4
 #define MT_RX_INFO_LEN 4
 #define MT_FCE_INFO_LEN4
diff --git a/drivers/net/wireless/mediatek/mt7601u/mt7601u.h 
b/drivers/net/wireless/mediatek/mt7601u/mt7601u.h
index 428bd2f10b7b..5ef62e02ce66 100644
--- a/drivers/net/wireless/mediatek/mt7601u/mt7601u.h
+++ b/drivers/net/wireless/mediatek/mt7601u/mt7601u.h
@@ -15,6 +15,7 @@
 #ifndef MT7601U_H
 #define MT7601U_H
 
+#include 
 #include 
 #include 
 #include 
@@ -24,7 +25,6 @@
 #include 
 
 #include "regs.h"
-#include "util.h"
 
 #define MT_CALIBRATE_INTERVAL  (4 * HZ)
 
@@ -282,6 +282,9 @@ struct mt7601u_rxwi;
 
 extern const struct ieee80211_ops mt7601u_ops;
 
+#define MT76_SET   FIELD_PUT
+#define MT76_GET   FIELD_GET
+
 void mt7601u_init_debugfs(struct mt7601u_dev *dev);
 
 u32 mt7601u_rr(struct mt7601u_dev *dev, u32 offset);
diff --git a/drivers/net/wireless/mediatek/mt7601u/util.h 
b/drivers/net/wireless/mediatek/mt7601u/util.h
deleted file mode 100644
index b89140bf1210..
--- a/drivers/net/wireless/mediatek/mt7601u/util.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (C) 2014 Felix Fietkau 
- * Copyright (C) 2004 - 2009 Ivo van Doorn 
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#ifndef __MT76_UTIL_H
-#define __MT76_UTIL_H
-
-/*
- * Power of two check, this will check
- * if the mask that has been given contains and contiguous set of bits.
- * Note that we cannot use the is_power_of_2() function since this
- * check must be done at compile-time.
- */
-#define is_power_of_two(x) ( !((x) & ((x)-1)) )
-#define low_bit_mask(x)( ((x)-1) & ~(x) )
-#define is_valid_mask(x)   is_power_of_two(1LU + (x) + low_bit_mask(x))
-
-/*
- * Macros to find first set bit in a variable.
- * These macros behave the same as the __ffs() functions but
- * the most important difference that this is done during
- * compile-time rather then run-time.
- */
-#define compile_ffs2(__x) \
-   __builtin_choose_expr(((__x) & 0x1), 0, 1)
-
-#define compile_ffs4(__x) \
-   __builtin_choose_expr(((__x) & 0x3), \
- (compile_ffs2((__x))), \
- (compile_ffs2((__x) >> 2) + 2))
-
-#define compile_ffs8(__x) \
-   __builtin_choose_expr(((__x) & 0xf), \
- (compile_ffs4((__x))), \
- (compile_ffs4((__x) >> 4) + 4))
-
-#define compile_ffs16(__x) \
-   __builtin_choose_expr(((__x) & 0xff), \
- (compile_ffs8((__x))), \
- (compile_ffs8((__x) >> 8) + 8))
-
-#define compile_ffs32(__x) \
-   __builtin_choose_expr(((__x) & 0x), \
- (compile_ffs16((__x))), \
- (compile_ffs16((__x) >> 16) + 16))
-
-/*
- * This macro will check the requirements for the FIELD{8,16,32} macros
- * The mask should be a constant non-zero contiguous set of bits which
- * does not exceed the given typelimit.
- */
-#define FIELD_CHECK(__mask) \
-   BUILD_BUG_ON(!(__mask) || !is_valid_mask(__mask))
-
-#define MT76_SET(_mask, _val)  \
-   ({  \
-   FIELD_CHECK(_mask); \
-   (((u32) (_val)) << compile_ffs32(_mask)) & _mask;   \
-   })
-
-#define MT76_GET(_mask, _val)  \
-   ({  \
-   FIELD_CHECK(_mask); \
-   (u32) (((_val) & _mask) >> compile_ffs32(_mask));   \
-   })
-
-#endif
-- 
1.9.1



Re: [PATCH] packet: Use symmetric hash for PACKET_FANOUT_HASH.

2016-07-01 Thread Tom Herbert
On Fri, Jul 1, 2016 at 2:07 PM, David Miller  wrote:
> From: Tom Herbert 
> Date: Fri, 1 Jul 2016 13:52:58 -0700
>
>> Why are symmetric hashes required?
>
> Because they want load balancing, such that one flow only can overrun
> one single socket not all of the ones in the fanout.
>
I'm still missing it. Why is this any different than what we need with
something like SO_REUSEPORT?

> Every single user of AF_PACKET fanout wants this behavior.


Re: [PATCH] packet: Use symmetric hash for PACKET_FANOUT_HASH.

2016-07-01 Thread David Miller
From: Tom Herbert 
Date: Fri, 1 Jul 2016 13:52:58 -0700

> Why are symmetric hashes required?

Because they want load balancing, such that one flow only can overrun
one single socket not all of the ones in the fanout.

Every single user of AF_PACKET fanout wants this behavior.


Re: pull request for net-next: batman-adv 2016-07-01

2016-07-01 Thread David Miller
From: Simon Wunderlich 
Date: Fri,  1 Jul 2016 15:08:52 +0200

> we are a bit late to submit our feature patches, but I hope we make it in 
> time.
> Antonio is taking a pause in his upstream work, so I'll be submitting our
> batman-adv patches for now. It is my first time, so please bear with me. :)
> 
> Please pull or let me know of any problem!

Pulled, thanks.


[PATCH] net: phy: dp83867: Fix initialization of PHYCR register

2016-07-01 Thread Stefan Hauser
When initializing the PHY control register, the FIFO depth bits are
written without reading the previous register value, i.e. all other
bits are overwritten with zero. This disables automatic MDI-X
configuration, which is enabled by default. Fix initialization by doing
a read/modify/write operation.

Signed-off-by: Stefan Hauser 
---
 drivers/net/phy/dp83867.c | 13 +
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c
index 2afa61b..91177a4 100644
--- a/drivers/net/phy/dp83867.c
+++ b/drivers/net/phy/dp83867.c
@@ -57,6 +57,7 @@
 
 /* PHY CTRL bits */
 #define DP83867_PHYCR_FIFO_DEPTH_SHIFT 14
+#define DP83867_PHYCR_FIFO_DEPTH_MASK  (3 << 14)
 
 /* RGMIIDCTL bits */
 #define DP83867_RGMII_TX_CLK_DELAY_SHIFT   4
@@ -133,8 +134,8 @@ static int dp83867_of_init(struct phy_device *phydev)
 static int dp83867_config_init(struct phy_device *phydev)
 {
struct dp83867_private *dp83867;
-   int ret;
-   u16 val, delay;
+   int ret, val;
+   u16 delay;
 
if (!phydev->priv) {
dp83867 = devm_kzalloc(>mdio.dev, sizeof(*dp83867),
@@ -151,8 +152,12 @@ static int dp83867_config_init(struct phy_device *phydev)
}
 
if (phy_interface_is_rgmii(phydev)) {
-   ret = phy_write(phydev, MII_DP83867_PHYCTRL,
-   (dp83867->fifo_depth << 
DP83867_PHYCR_FIFO_DEPTH_SHIFT));
+   val = phy_read(phydev, MII_DP83867_PHYCTRL);
+   if (val < 0)
+   return val;
+   val &= ~DP83867_PHYCR_FIFO_DEPTH_MASK;
+   val |= (dp83867->fifo_depth << DP83867_PHYCR_FIFO_DEPTH_SHIFT);
+   ret = phy_write(phydev, MII_DP83867_PHYCTRL, val);
if (ret)
return ret;
}
-- 
1.9.1



Re: [PATCH v3 net-next 0/9] net: hns: fix the typo of hns

2016-07-01 Thread David Miller
From: Yisen Zhuang 
Date: Fri, 1 Jul 2016 17:34:04 +0800

> This series includes typo fixes which review by Andy, adding
> the hns maintainer to MAINTAINERS, as below:
> 
>  > from Daode: adds the maintainer for hns driver;
> 
>  > from Daode: fix the typo of hns reviewed by Andy Shevchenko;
> 
>  > from Kejian: one remove redundant function and two fix to get 
> configuration from DT.
> 
> changlog:
>  v2 -> v3:
>   match all files in and below drivers/net/ethernet/hisilicon/
> 
>  v1 -> v2:
>   fix the indentations reviewed by David.

Series applied, thanks.


Re: [PATCH] packet: Use symmetric hash for PACKET_FANOUT_HASH.

2016-07-01 Thread Tom Herbert
On Fri, Jul 1, 2016 at 1:08 PM, David Miller  wrote:
>
> People who use PACKET_FANOUT_HASH want a symmetric hash, meaning that
> they want packets going in both directions on a flow to hash to the
> same bucket.
>
> The core kernel SKB hash became non-symmetric when the ipv6 flow label
> and other entities were incorporated into the standard flow hash order
> to increase entropy.
>
> But there are no users of PACKET_FANOUT_HASH who want an assymetric
> hash, they all want a symmetric one.
>
> Therefore, use the flow dissector to compute a flat symmetric hash
> over only the protocol, addresses and ports.  This hash does not get
> installed into and override the normal skb hash, so this change has
> no effect whatsoever on the rest of the stack.
>
This doesn't work for any of the UDP encapsulations, packets of an
encapsulated flow will still have asymmetric hashes.

Why are symmetric hashes required?

Tom

> Reported-by: Eric Leblond 
> Tested-by: Eric Leblond 
> Signed-off-by: David S. Miller 
> ---
>
> I'll be pushing this to -stable branches as well.
>
>  include/linux/skbuff.h|  1 +
>  net/core/flow_dissector.c | 43 +++
>  net/packet/af_packet.c|  2 +-
>  3 files changed, 45 insertions(+), 1 deletion(-)
>
> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index ee38a41..24859d4 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -1062,6 +1062,7 @@ __skb_set_sw_hash(struct sk_buff *skb, __u32 hash, bool 
> is_l4)
>  }
>
>  void __skb_get_hash(struct sk_buff *skb);
> +u32 __skb_get_hash_symmetric(struct sk_buff *skb);
>  u32 skb_get_poff(const struct sk_buff *skb);
>  u32 __skb_get_poff(const struct sk_buff *skb, void *data,
>const struct flow_keys *keys, int hlen);
> diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
> index a669dea..61ad43f 100644
> --- a/net/core/flow_dissector.c
> +++ b/net/core/flow_dissector.c
> @@ -651,6 +651,23 @@ void make_flow_keys_digest(struct flow_keys_digest 
> *digest,
>  }
>  EXPORT_SYMBOL(make_flow_keys_digest);
>
> +static struct flow_dissector flow_keys_dissector_symmetric __read_mostly;
> +
> +u32 __skb_get_hash_symmetric(struct sk_buff *skb)
> +{
> +   struct flow_keys keys;
> +
> +   __flow_hash_secret_init();
> +
> +   memset(, 0, sizeof(keys));
> +   __skb_flow_dissect(skb, _keys_dissector_symmetric, ,
> +  NULL, 0, 0, 0,
> +  FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
> +
> +   return __flow_hash_from_keys(, hashrnd);
> +}
> +EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric);
> +
>  /**
>   * __skb_get_hash: calculate a flow hash
>   * @skb: sk_buff to calculate flow hash from
> @@ -868,6 +885,29 @@ static const struct flow_dissector_key 
> flow_keys_dissector_keys[] = {
> },
>  };
>
> +static const struct flow_dissector_key flow_keys_dissector_symmetric_keys[] 
> = {
> +   {
> +   .key_id = FLOW_DISSECTOR_KEY_CONTROL,
> +   .offset = offsetof(struct flow_keys, control),
> +   },
> +   {
> +   .key_id = FLOW_DISSECTOR_KEY_BASIC,
> +   .offset = offsetof(struct flow_keys, basic),
> +   },
> +   {
> +   .key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS,
> +   .offset = offsetof(struct flow_keys, addrs.v4addrs),
> +   },
> +   {
> +   .key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS,
> +   .offset = offsetof(struct flow_keys, addrs.v6addrs),
> +   },
> +   {
> +   .key_id = FLOW_DISSECTOR_KEY_PORTS,
> +   .offset = offsetof(struct flow_keys, ports),
> +   },
> +};
> +
>  static const struct flow_dissector_key flow_keys_buf_dissector_keys[] = {
> {
> .key_id = FLOW_DISSECTOR_KEY_CONTROL,
> @@ -889,6 +929,9 @@ static int __init init_default_flow_dissectors(void)
> skb_flow_dissector_init(_keys_dissector,
> flow_keys_dissector_keys,
> ARRAY_SIZE(flow_keys_dissector_keys));
> +   skb_flow_dissector_init(_keys_dissector_symmetric,
> +   flow_keys_dissector_symmetric_keys,
> +   
> ARRAY_SIZE(flow_keys_dissector_symmetric_keys));
> skb_flow_dissector_init(_keys_buf_dissector,
> flow_keys_buf_dissector_keys,
> ARRAY_SIZE(flow_keys_buf_dissector_keys));
> diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
> index 9bff6ef..9f0983f 100644
> --- a/net/packet/af_packet.c
> +++ b/net/packet/af_packet.c
> @@ -1341,7 +1341,7 @@ static unsigned int fanout_demux_hash(struct 
> packet_fanout *f,
>   struct sk_buff *skb,
>   unsigned int num)
>  {
> -   return 

Re: [PATCH net-next] tipc: fix nl compat regression for link statistics

2016-07-01 Thread David Miller
From: Richard Alpe 
Date: Fri, 1 Jul 2016 11:11:21 +0200

> Fix incorrect use of nla_strlcpy() where the first NLA_HDRLEN bytes
> of the link name where left out.
> 
> Making the output of tipc-config -ls look something like:
> Link statistics:
> dcast-link
> 1:data0-1.1.2:data0
> 1:data0-1.1.3:data0
> 
> Also, for the record, the patch that introduce this regression
> claims "Sending the whole object out can cause a leak". Which isn't
> very likely as this is a compat layer, where the data we are parsing
> is generated by us and we know the string to be NULL terminated. But
> you can of course never be to secure.
> 
> Fixes: 5d2be1422e02 (tipc: fix an infoleak in tipc_nl_compat_link_dump)
> Signed-off-by: Richard Alpe 

Applied to 'net' since that is where the regression exists.

Please submit bug fixes to the correct tree.


Re: [PATCH net-next 0/9] RDS:TCP data structure changes for multipath support

2016-07-01 Thread David Miller
From: Sowmini Varadhan 
Date: Thu, 30 Jun 2016 16:11:09 -0700

> The second installment of changes to enable multipath support in
> RDS-TCP. This series implements the changes in rds-tcp so that the 
> rds_conn_path has a pointer to the rds_tcp_connection in cp_transport_data.
> Struct rds_tcp_connection keeps track of the inet_sk per path in
> t_sock. The ->sk_user_data in turn is a pointer to the rds_conn_path.
> With this set of changes, rds_tcp has the needed plumbing to handle
> multiple paths(socket) per rds_connection.

Series applied, thanks.


Re: [PATCH 1/4] net: ethernet: ti: davinci_cpdma: split descs num between all channels

2016-07-01 Thread David Miller
From: Ivan Khoronzhuk 
Date: Thu, 30 Jun 2016 22:04:35 +0300

> @@ -720,7 +763,7 @@ unlock_ret:
>  }
>  EXPORT_SYMBOL_GPL(cpdma_chan_submit);
>  
> -bool cpdma_check_free_tx_desc(struct cpdma_chan *chan)
> +inline bool cpdma_check_free_desc(struct cpdma_chan *chan)
>  {

This needs to be marked static.


Re: [PATCH net] macsec: set actual real device for xmit when !protect_frames

2016-07-01 Thread David Miller
From: Daniel Borkmann 
Date: Fri,  1 Jul 2016 00:00:54 +0200

> Avoid recursions of dev_queue_xmit() to the wrong net device when
> frames are unprotected, since at that time skb->dev still points to
> our own macsec dev and unlike macsec_encrypt_finish() dev pointer
> doesn't get updated to real underlying device.
> 
> Fixes: c09440f7dcb3 ("macsec: introduce IEEE 802.1AE driver")
> Signed-off-by: Daniel Borkmann 
> Acked-by: Sabrina Dubroca 

Applied and queued up for -stable, thanks.


Re: [PATCH net-next v3 0/4] cgroup: bpf: cgroup2 membership test on skb

2016-07-01 Thread David Miller
From: Martin KaFai Lau 
Date: Thu, 30 Jun 2016 10:28:41 -0700

> This series is to implement a bpf-way to
> check the cgroup2 membership of a skb (sk_buff).
> 
> It is similar to the feature added in netfilter:
> c38c4597e4bf ("netfilter: implement xt_cgroup cgroup2 path match")
> 
> The current target is the tc-like usage.

I'll apply this series, but the colloqual way to arrange this header
posting is to keep the overview and description of the series at the
top of the text, and then have the "v3 ..., v2 ..." changelog afterwards.


Re: [Patch net] net_sched: fix mirrored packets checksum

2016-07-01 Thread David Miller
From: Cong Wang 
Date: Thu, 30 Jun 2016 10:15:22 -0700

> Similar to commit 9b368814b336 ("net: fix bridge multicast packet checksum 
> validation")
> we need to fixup the checksum for CHECKSUM_COMPLETE when
> pushing skb on RX path. Otherwise we get similar splats.
> 
> Cc: Jamal Hadi Salim 
> Cc: Tom Herbert 
> Signed-off-by: Cong Wang 

Applied and queued up for -stable, thanks.


Re: [PATCH net-next 0/2] Further robustify putting BPF progs

2016-07-01 Thread David Miller
From: Daniel Borkmann 
Date: Thu, 30 Jun 2016 17:24:42 +0200

> This series addresses a potential issue reported to us by Jann Horn
> with regards to putting progs. First patch moves progs generally under
> RCU destruction and second patch refactors getting of progs to simplify
> code a bit. For details, please see individual patches. Note, we think
> that addressing this one in net-next should be sufficient.

Series applied, thanks Daniel.


[PATCH] packet: Use symmetric hash for PACKET_FANOUT_HASH.

2016-07-01 Thread David Miller

People who use PACKET_FANOUT_HASH want a symmetric hash, meaning that
they want packets going in both directions on a flow to hash to the
same bucket.

The core kernel SKB hash became non-symmetric when the ipv6 flow label
and other entities were incorporated into the standard flow hash order
to increase entropy.

But there are no users of PACKET_FANOUT_HASH who want an assymetric
hash, they all want a symmetric one.

Therefore, use the flow dissector to compute a flat symmetric hash
over only the protocol, addresses and ports.  This hash does not get
installed into and override the normal skb hash, so this change has
no effect whatsoever on the rest of the stack.

Reported-by: Eric Leblond 
Tested-by: Eric Leblond 
Signed-off-by: David S. Miller 
---

I'll be pushing this to -stable branches as well.

 include/linux/skbuff.h|  1 +
 net/core/flow_dissector.c | 43 +++
 net/packet/af_packet.c|  2 +-
 3 files changed, 45 insertions(+), 1 deletion(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index ee38a41..24859d4 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1062,6 +1062,7 @@ __skb_set_sw_hash(struct sk_buff *skb, __u32 hash, bool 
is_l4)
 }
 
 void __skb_get_hash(struct sk_buff *skb);
+u32 __skb_get_hash_symmetric(struct sk_buff *skb);
 u32 skb_get_poff(const struct sk_buff *skb);
 u32 __skb_get_poff(const struct sk_buff *skb, void *data,
   const struct flow_keys *keys, int hlen);
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index a669dea..61ad43f 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -651,6 +651,23 @@ void make_flow_keys_digest(struct flow_keys_digest *digest,
 }
 EXPORT_SYMBOL(make_flow_keys_digest);
 
+static struct flow_dissector flow_keys_dissector_symmetric __read_mostly;
+
+u32 __skb_get_hash_symmetric(struct sk_buff *skb)
+{
+   struct flow_keys keys;
+
+   __flow_hash_secret_init();
+
+   memset(, 0, sizeof(keys));
+   __skb_flow_dissect(skb, _keys_dissector_symmetric, ,
+  NULL, 0, 0, 0,
+  FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
+
+   return __flow_hash_from_keys(, hashrnd);
+}
+EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric);
+
 /**
  * __skb_get_hash: calculate a flow hash
  * @skb: sk_buff to calculate flow hash from
@@ -868,6 +885,29 @@ static const struct flow_dissector_key 
flow_keys_dissector_keys[] = {
},
 };
 
+static const struct flow_dissector_key flow_keys_dissector_symmetric_keys[] = {
+   {
+   .key_id = FLOW_DISSECTOR_KEY_CONTROL,
+   .offset = offsetof(struct flow_keys, control),
+   },
+   {
+   .key_id = FLOW_DISSECTOR_KEY_BASIC,
+   .offset = offsetof(struct flow_keys, basic),
+   },
+   {
+   .key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+   .offset = offsetof(struct flow_keys, addrs.v4addrs),
+   },
+   {
+   .key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+   .offset = offsetof(struct flow_keys, addrs.v6addrs),
+   },
+   {
+   .key_id = FLOW_DISSECTOR_KEY_PORTS,
+   .offset = offsetof(struct flow_keys, ports),
+   },
+};
+
 static const struct flow_dissector_key flow_keys_buf_dissector_keys[] = {
{
.key_id = FLOW_DISSECTOR_KEY_CONTROL,
@@ -889,6 +929,9 @@ static int __init init_default_flow_dissectors(void)
skb_flow_dissector_init(_keys_dissector,
flow_keys_dissector_keys,
ARRAY_SIZE(flow_keys_dissector_keys));
+   skb_flow_dissector_init(_keys_dissector_symmetric,
+   flow_keys_dissector_symmetric_keys,
+   ARRAY_SIZE(flow_keys_dissector_symmetric_keys));
skb_flow_dissector_init(_keys_buf_dissector,
flow_keys_buf_dissector_keys,
ARRAY_SIZE(flow_keys_buf_dissector_keys));
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 9bff6ef..9f0983f 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1341,7 +1341,7 @@ static unsigned int fanout_demux_hash(struct 
packet_fanout *f,
  struct sk_buff *skb,
  unsigned int num)
 {
-   return reciprocal_scale(skb_get_hash(skb), num);
+   return reciprocal_scale(__skb_get_hash_symmetric(skb), num);
 }
 
 static unsigned int fanout_demux_lb(struct packet_fanout *f,
-- 
2.5.5



Re: [PATCH] usbnet: add reset_resume quirk to prevent resume failure

2016-07-01 Thread David Miller
From: Vivek Kumar Bhagat 
Date: Thu, 30 Jun 2016 10:41:59 + (GMT)

> 
> Ideally, usbnet_resume is sufficient for device resume operation.
> since usbcore function usb_resume_device() sets udev->reset_resume
> flag as a quirk solution, reset_resume() quirk we can keep in
> class driver also. We checked on dongle DA-Queen UFE20C,
> without reset function resume operation fails. Reason could be
> some power glitch during suspend time due to which device lose
> its internal state and it needs a device phy reset again during
> resume to recover.
> 
> Signed-off-by: Vivek Kumar Bhagat 
> Signed-off-by: Vikas Bansal 
> Signed-off-by: Sangmin Bae 

You've seen this necessary for one device, yet you are doing this
new reset for all usbnet devices.

That doesn't sound correct or safe at all.

I'm not applying this, sorry.


Re: [PATCH] net: Fix resetting network_header in neigh_resolve_output and neigh_connected_output

2016-07-01 Thread David Miller
From: Abdelrhman Ahmed 
Date: Mon, 27 Jun 2016 16:28:59 +0200

> @@ -1293,15 +1293,19 @@ int neigh_resolve_output(struct neighbour *neigh, 
> struct sk_buff *skb)
>   int rc = 0;
>  
>   if (!neigh_event_send(neigh, skb)) {
> - int err;
> + int err, offset;
>   struct net_device *dev = neigh->dev;
> + unsigned char *data;
>   unsigned int seq;
>  
>   if (dev->header_ops->cache && !neigh->hh.hh_len)
>   neigh_hh_init(neigh);
>  
> + data = skb->data;
> +
>   do {
> - __skb_pull(skb, skb_network_offset(skb));
> + offset = data - skb->data;
> + __skb_pull(skb, offset);

This is definitely not right, using skb->data for this.  It may work
for the cases you have tested but it is not generally correct.

You must use the skb network header.

You are just trying to avoid doing the pull more than once if we loop
right?  Then simply use a boolean to track that.


[PATCH] mwifiex: fix unconditional error return in .add_virtual_intf callback

2016-07-01 Thread Javier Martinez Canillas
The commit 7311ea850079 ("mwifiex: fix AP start problem for newly added
interface") attempted to fix an issue when a new AP interface is added.

But the patch didn't check the return value of the functions doing the
firmware calls and returned an error even if the functions didn't fail.

This prevents the network device to be registered properly, so fix it.

Fixes: commit 7311ea850079 ("mwifiex: fix AP start problem for newly added 
interface")
Signed-off-by: Javier Martinez Canillas 
---

 drivers/net/wireless/marvell/mwifiex/cfg80211.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c 
b/drivers/net/wireless/marvell/mwifiex/cfg80211.c
index 99e8cf1ad610..5de9f63e2c01 100644
--- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c
+++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c
@@ -2865,9 +2865,11 @@ struct wireless_dev *mwifiex_add_virtual_intf(struct 
wiphy *wiphy,
 
ret = mwifiex_send_cmd(priv, HostCmd_CMD_SET_BSS_MODE,
   HostCmd_ACT_GEN_SET, 0, NULL, true);
+   if (ret)
return ERR_PTR(ret);
 
ret = mwifiex_sta_init_cmd(priv, false, false);
+   if (ret)
return ERR_PTR(ret);
 
mwifiex_setup_ht_caps(>bands[NL80211_BAND_2GHZ]->ht_cap, priv);
-- 
2.5.5



RE: [PATCH net-next] netvsc: Use the new in-place consumption APIs in the rx path

2016-07-01 Thread KY Srinivasan


> -Original Message-
> From: KY Srinivasan
> Sent: Thursday, June 9, 2016 5:16 PM
> To: 'Linus Torvalds' 
> Cc: da...@davemloft.net; netdev@vger.kernel.org; lkml  ker...@vger.kernel.org>; de...@linuxdriverproject.org; o...@aepfle.de;
> a...@canonical.com; jasow...@redhat.com;
> leann.ogasaw...@canonical.com
> Subject: RE: [PATCH net-next] netvsc: Use the new in-place consumption APIs in
> the rx path
> 
> 
> 
> > -Original Message-
> > From: Linus Torvalds [mailto:torva...@linux-foundation.org]
> > Sent: Thursday, June 9, 2016 5:12 PM
> > To: KY Srinivasan 
> > Cc: da...@davemloft.net; netdev@vger.kernel.org; lkml  > ker...@vger.kernel.org>; de...@linuxdriverproject.org; o...@aepfle.de;
> > a...@canonical.com; jasow...@redhat.com;
> > leann.ogasaw...@canonical.com
> > Subject: Re: [PATCH net-next] netvsc: Use the new in-place consumption APIs
> in
> > the rx path
> >
> > Srinivasan,
> >
> >  these are all sent through linuxonhyperv.com, and fail DMARC because
> > they have a microsoft.com address but no valid DKIM.
> >
> > Please fix your email setup.  You need to go through the real
> > microsoft smtp servers if you use a microsoft.com address. Or you need
> > to get linuxonhyperv.com fixed as a smtp server with the proper MS
> > email signing.
> >
> >Linus
> 
> Thanks Linus, we will fix our email setup.
We have now addressed this issue (kind of). We will continue to send our 
patches from
linuxonhyperv.com; however the "from" address will be 
exam...@exchange.microsoft.com.
We are working on the longer term solution that won't need this weird "from" 
address.

David, based on this comment from Linus, you have deferred this patch - 
as far as I know there were no other comments for me to address.
Should I resend the patch.

Regards,

K. Y
  

> 
> Regards,
> 
> K. Y
> >
> > On Thu, Jun 9, 2016 at 6:34 PM, K. Y. Srinivasan  wrote:
> > > Use the new APIs for eliminating a copy on the receive path. These new
> APIs
> > also
> > > help in minimizing the number of memory barriers we end up issuing (in
> the
> > > ringbuffer code) since we can better control when we want to expose the
> ring
> > > state to the host.
> > >
> > > Signed-off-by: K. Y. Srinivasan 
> > > Reviewed-by: Haiyang Zhang 
> > > Tested-by: Dexuan Cui 
> > > Tested-by: Simon Xiao 
> > > ---
> > >  drivers/net/hyperv/netvsc.c |   88 +---
> --
> > -
> > >  1 files changed, 59 insertions(+), 29 deletions(-)
> > >
> > > diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
> > > index 719cb35..8cd4c19 100644
> > > --- a/drivers/net/hyperv/netvsc.c
> > > +++ b/drivers/net/hyperv/netvsc.c
> > > @@ -1141,6 +1141,39 @@ static inline void netvsc_receive_inband(struct
> > hv_device *hdev,
> > > }
> > >  }
> > >
> > > +static void netvsc_process_raw_pkt(struct hv_device *device,
> > > +  struct vmbus_channel *channel,
> > > +  struct netvsc_device *net_device,
> > > +  struct net_device *ndev,
> > > +  u64 request_id,
> > > +  struct vmpacket_descriptor *desc)
> > > +{
> > > +   struct nvsp_message *nvmsg;
> > > +
> > > +   nvmsg = (struct nvsp_message *)((unsigned long)
> > > +   desc + (desc->offset8 << 3));
> > > +
> > > +   switch (desc->type) {
> > > +   case VM_PKT_COMP:
> > > +   netvsc_send_completion(net_device, channel, device, desc);
> > > +   break;
> > > +
> > > +   case VM_PKT_DATA_USING_XFER_PAGES:
> > > +   netvsc_receive(net_device, channel, device, desc);
> > > +   break;
> > > +
> > > +   case VM_PKT_DATA_INBAND:
> > > +   netvsc_receive_inband(device, net_device, nvmsg);
> > > +   break;
> > > +
> > > +   default:
> > > +   netdev_err(ndev, "unhandled packet type %d, tid %llx\n",
> > > +  desc->type, request_id);
> > > +   break;
> > > +   }
> > > +}
> > > +
> > > +
> > >  void netvsc_channel_cb(void *context)
> > >  {
> > > int ret;
> > > @@ -1153,7 +1186,7 @@ void netvsc_channel_cb(void *context)
> > > unsigned char *buffer;
> > > int bufferlen = NETVSC_PACKET_SIZE;
> > > struct net_device *ndev;
> > > -   struct nvsp_message *nvmsg;
> > > +   bool need_to_commit = false;
> > >
> > > if (channel->primary_channel != NULL)
> > > device = channel->primary_channel->device_obj;
> > > @@ -1167,39 +1200,36 @@ void netvsc_channel_cb(void *context)
> > > buffer = get_per_channel_state(channel);
> > >
> > > do {
> > > +   desc = get_next_pkt_raw(channel);
> > > +   if 

Re: ethtool needs a new maintainer

2016-07-01 Thread David Miller
From: Andy Gospodarek 
Date: Fri, 1 Jul 2016 14:31:34 -0400

> On Thu, Jun 30, 2016 at 03:12:52PM -0700, John Fastabend wrote:
>> On 16-06-30 11:15 AM, John W. Linville wrote:
>> > On Mon, Jun 27, 2016 at 09:51:47AM -0400, John W. Linville wrote:
>> >> On Sun, Jun 26, 2016 at 06:11:41PM +0200, Ben Hutchings wrote:
>> >>> I've become steadily less enthusiastic and less responsive as a
>> >>> maintainer over the past year or so.  I no longer work on networking
>> >>> regularly, so it takes a lot more time to get into the right state of
>> >>> mind to think about ethtool code, while I have other demands on my time
>> >>> that tend to take priority.
>> >>>
>> >>> So, I would like to find a new maintainer to take over as soon as
>> >>> possible.  Ideally the new maintainer would have previous contributions
>> >>> to ethtool and an existing account on kernel.org so that they can push
>> >>> to the git repository and the home page.  But neither of those is
>> >>> essential.  Please reply if you're interested.
>> >>
>> >> I would like to take this responsibility. My previous contributions
>> >> to ethtool are meager, but I think my skills and interests are suited
>> >> to the task.  Plus, I already have a kernel.org account... :-)
>> > 
>> > Are there any other takers?  Or is this a done deal?
>> > 
>> > John
>> > 
>> 
>> +1 for having John take it on :)
>> 
>> .JohnF
> 
> I'll add a +1 for John Linville to take it over as well!

I'm also perfectly fine with this.

We can add John to the list of delegates in patchwork at
patchwork.ozlabs.org and track ethtool patches there if we want.


Re: [patch net-next 00/42] mlxsw: Implement IPV4 unicast routing

2016-07-01 Thread David Miller
From: Jiri Pirko 
Date: Fri,  1 Jul 2016 16:04:28 +0200

> This patchset enables IPv4 unicast routing in the Mellanox Spectrum
> ASIC switch driver. The basic dependencies are already present in
> the kernel and used by rocker.

Please split this up into smaller segments (~15 patches, max) and
resubmit.

A 42 patch series present an unreasonable burdon for reviewers.


Re: strange Mac OSX RST behavior

2016-07-01 Thread Jason Baron


On 07/01/2016 02:16 PM, One Thousand Gnomes wrote:
>> yes, we do in fact see a POLLRDHUP from the FIN in this case and
>> read of zero, but we still have more data to write to the socket, and
>> b/c the RST is dropped here, the socket stays in TIME_WAIT until
>> things eventually time out...
> 
> After the FIN when you send/retransmit your next segment do you then get
> a valid RST back from the Mac end?
> 
> Alan
> 

No, we only get the single RST after the FIN from the Mac side which
is dropped. I would have expected the RST from the Mac after the
retransmits, but we don't see any further transmits from the Mac.
And the linux socket stays in CLOSE-WAIT (i mistakingly said
TIME_WAIT above).

For reference, I put the packet exchange in my initial mail.

Thanks,

-Jason


Re: ethtool needs a new maintainer

2016-07-01 Thread Andy Gospodarek
On Thu, Jun 30, 2016 at 03:12:52PM -0700, John Fastabend wrote:
> On 16-06-30 11:15 AM, John W. Linville wrote:
> > On Mon, Jun 27, 2016 at 09:51:47AM -0400, John W. Linville wrote:
> >> On Sun, Jun 26, 2016 at 06:11:41PM +0200, Ben Hutchings wrote:
> >>> I've become steadily less enthusiastic and less responsive as a
> >>> maintainer over the past year or so.  I no longer work on networking
> >>> regularly, so it takes a lot more time to get into the right state of
> >>> mind to think about ethtool code, while I have other demands on my time
> >>> that tend to take priority.
> >>>
> >>> So, I would like to find a new maintainer to take over as soon as
> >>> possible.  Ideally the new maintainer would have previous contributions
> >>> to ethtool and an existing account on kernel.org so that they can push
> >>> to the git repository and the home page.  But neither of those is
> >>> essential.  Please reply if you're interested.
> >>
> >> I would like to take this responsibility. My previous contributions
> >> to ethtool are meager, but I think my skills and interests are suited
> >> to the task.  Plus, I already have a kernel.org account... :-)
> > 
> > Are there any other takers?  Or is this a done deal?
> > 
> > John
> > 
> 
> +1 for having John take it on :)
> 
> .JohnF

I'll add a +1 for John Linville to take it over as well!



Re: strange Mac OSX RST behavior

2016-07-01 Thread One Thousand Gnomes
> yes, we do in fact see a POLLRDHUP from the FIN in this case and
> read of zero, but we still have more data to write to the socket, and
> b/c the RST is dropped here, the socket stays in TIME_WAIT until
> things eventually time out...

After the FIN when you send/retransmit your next segment do you then get
a valid RST back from the Mac end?

Alan


[PATCH] Fix race condition in enc28j60 driver

2016-07-01 Thread Sergio Valverde
From: Sergio Valverde 

The interrupt worker code for the enc28j60 relies only on the TXIF flag to
determinate if the packet transmission was completed. However the datasheet
specifies in section 12.1.3 that TXERIF will clear the TXRTS after a
transmit abort. Also in section 12.1.4 that TXIF will be set
when TXRTS transitions from '1' to '0'. Therefore the TXIF flag is enabled
during transmission errors.

This causes a race condition, since the worker code will invoke
enc28j60_tx_clear() -> netif_wake_queue(), potentially invoking the
ndo_start_xmit function to send a new packet. The enc28j60_send_packet function
uses a workqueue that invokes enc28j60_hw_tx(). In between this function is
called, the worker from the interrupt handler will enter the path for error
handler because of the TXERIF flag, causing to invoke enc28j60_tx_clear() again
and releasing the packet scheduled for transmission, causing a kernel crash with
due a NULL pointer.

These crashes due a NULL pointer were observed under stress conditions of the
device. A BUG_ON() sequence was used to validate the issue was fixed, and has
been running without problems for 2 years now.

Signed-off-by: Diego Dompe 
Acked-by: Sergio Valverde 
---
 drivers/net/ethernet/microchip/enc28j60.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/microchip/enc28j60.c 
b/drivers/net/ethernet/microchip/enc28j60.c
index 7066954..0a26b11 100644
--- a/drivers/net/ethernet/microchip/enc28j60.c
+++ b/drivers/net/ethernet/microchip/enc28j60.c
@@ -1151,7 +1151,8 @@ static void enc28j60_irq_work_handler(struct work_struct 
*work)
enc28j60_phy_read(priv, PHIR);
}
/* TX complete handler */
-   if ((intflags & EIR_TXIF) != 0) {
+   if (((intflags & EIR_TXIF) != 0) &&
+   ((intflags & EIR_TXERIF) == 0)) {
bool err = false;
loop++;
if (netif_msg_intr(priv))
@@ -1203,7 +1204,7 @@ static void enc28j60_irq_work_handler(struct work_struct 
*work)
enc28j60_tx_clear(ndev, true);
} else
enc28j60_tx_clear(ndev, true);
-   locked_reg_bfclr(priv, EIR, EIR_TXERIF);
+   locked_reg_bfclr(priv, EIR, EIR_TXERIF | EIR_TXIF);
}
/* RX Error handler */
if ((intflags & EIR_RXERIF) != 0) {
@@ -1238,6 +1239,8 @@ static void enc28j60_irq_work_handler(struct work_struct 
*work)
  */
 static void enc28j60_hw_tx(struct enc28j60_net *priv)
 {
+   BUG_ON(!priv->tx_skb);
+
if (netif_msg_tx_queued(priv))
printk(KERN_DEBUG DRV_NAME
": Tx Packet Len:%d\n", priv->tx_skb->len);
--
1.9.1


Re: [iproute PATCH 1/2] ipaddress: Simplify vf_info parsing

2016-07-01 Thread Phil Sutter
On Wed, Jun 01, 2016 at 03:36:09PM -0700, Greg Rose wrote:
> On Wed, Jun 1, 2016 at 3:07 PM, Phil Sutter  wrote:
> > On Wed, Jun 01, 2016 at 03:00:08PM -0700, Greg Rose wrote:
> >> On Wed, Jun 1, 2016 at 1:03 PM, Phil Sutter  wrote:
> >> > Not sure whether I misinterpret commit 7b8179c780a1a, but it looks
> >> > overly complicated. Instead rely upon parse_rtattr_nested() to assign
> >> > the relevant pointer if requested rtattr fields are present.
> >>
> >> I'm not sure if newer iproute2 utilities are supposed to work on older
> >> kernels but if it is you may want to check this against a 2.6.32
> >> kernel.
> >
> > Yes, it is supposed to. Actually I tried, but the old RHEL6 kernel I
> > used didn't export the VF list at all and then I lost motivation.
> >
> > I didn't check all earlier versions of 7b8179c780a1a, was there a stage
> > when it looked like what I'm changing it to?
> 
> I don't think so but your patch looks correct - I mean it looks like
> it should work.
> 
> It's been 5 years since I wrote that original patch and my memory
> isn't so great as to why I didn't just do as your patch does but I
> think it had something to do with not all drivers reporting a spoof
> check value.  However, your patch should handle that case so I see no
> reason not to accept it.  Unfortunately I don't have time or the
> resources at the moment to check it on an older kernel.

So can I count that as your Acked-by? ;)
Looks like Stephen hesitates to accept this patch due to the discussion
it provoked.

Cheers, Phil


Re: next-20160701 build: 2 failures 5 warnings (next-20160701)

2016-07-01 Thread John Crispin


On 01/07/2016 19:15, Mark Brown wrote:
> On Fri, Jul 01, 2016 at 10:00:09AM +0100, Build bot for Mark Brown wrote:
> 
> Today's -next fails to build am ARM allmodconfig due to:
> 
>>  arm-allmodconfig
>> ../drivers/net/ethernet/mediatek/mtk_eth_soc.c:1300:2: error: implicit 
>> declaration of function 'mtk_handle_irq' 
>> [-Werror=implicit-function-declaration]
>> ../drivers/net/ethernet/mediatek/mtk_eth_soc.c:1300:25: error: subscripted 
>> value is neither array nor pointer nor vector
> 
> due to the recent changes to the interrupt handling code, the function
> has been split into rx and tx functions but the NET_POLL_CONTROLLER code
> hasn't been updated.
> 

Hi Mark,

i'll look into this first thing in the morning.

John


Re: [PATCH] mwifiex: mask PCIe interrupts before removal

2016-07-01 Thread Doug Anderson
Hi,

On Thu, Jun 30, 2016 at 3:21 PM, Brian Norris  wrote:
> The PCIe driver didn't mask the host interrupts before trying to tear
> down. This causes lockups at reboot or rmmod when using MSI-X on 8997,
> since the MSI handler gets confused and locks up the system.
>
> Also tested on 8897, which does not support MSI-X (and wasn't
> experiencing this same bug). No regressions seen there.
>
> Signed-off-by: Brian Norris 
> ---
>  drivers/net/wireless/marvell/mwifiex/pcie.c | 6 ++
>  1 file changed, 6 insertions(+)

This looks very sane but isn't my area of expertise at all.  However,
if my Reviewed-by is at all useful feel free to add it.  I will say
that this fixes the issue of the crash at reboot time, so at least
I'll add:

Tested-by: Douglas Anderson 


Re: [PATCH 02/17] batman-adv: statically print gateway table header

2016-07-01 Thread Joe Perches
On Fri, 2016-07-01 at 15:08 +0200, Simon Wunderlich wrote:
> To make it easier to search through the code it is better to print static
> strings directly instead of using format strings printing constants.

It's also generally smaller object code and faster at runtime.

$ size net/batman-adv/gateway_client.o*
   text    data bss dec hex filename
   4898   1   0    4899    1323 net/batman-adv/gateway_client.o.new
   4951   1   0    4952    1358 net/batman-adv/gateway_client.o.old


Re: strange Mac OSX RST behavior

2016-07-01 Thread Jason Baron
On 07/01/2016 01:08 PM, Rick Jones wrote:
> On 07/01/2016 08:10 AM, Jason Baron wrote:
>> I'm wondering if anybody else has run into this...
>>
>> On Mac OSX 10.11.5 (latest version), we have found that when tcp
>> connections are abruptly terminated (via ^C), a FIN is sent followed
>> by an RST packet.
> 
> That just seems, well, silly.  If the client application wants to use
> abortive close (sigh..) it should do so, there shouldn't be this
> little-bit-pregnant, correct close initiation (FIN) followed by a RST.
> 
>> The RST is sent with the same sequence number as the
>> FIN, and thus dropped since the stack only accepts RST packets matching
>> rcv_nxt (RFC 5961). This could also be resolved if Mac OSX replied with
>> an RST on the closed socket, but it appears that it does not.
>>
>> The workaround here is then to reset the connection, if the RST is
>> is equal to rcv_nxt - 1, if we have already received a FIN.
>>
>> The RST attack surface is limited b/c we only accept the RST after we've
>> accepted a FIN and have not previously sent a FIN and received back the
>> corresponding ACK. In other words RST is only accepted in the tcp
>> states: TCP_CLOSE_WAIT, TCP_LAST_ACK, and TCP_CLOSING.
>>
>> I'm interested if anybody else has run into this issue. Its problematic
>> since it takes up server resources for sockets sitting in TCP_CLOSE_WAIT.
> 
> Isn't the server application expected to act on the read return of zero
> (which is supposed to be) triggered by the receipt of the FIN segment?
>

yes, we do in fact see a POLLRDHUP from the FIN in this case and
read of zero, but we still have more data to write to the socket, and
b/c the RST is dropped here, the socket stays in TIME_WAIT until
things eventually time out...

Thanks,

-Jason

> rick jones
> 
>> We are also in the process of contacting Apple to see what can be done
>> here...workaround patch is below.


Re: next-20160701 build: 2 failures 5 warnings (next-20160701)

2016-07-01 Thread Mark Brown
On Fri, Jul 01, 2016 at 10:00:09AM +0100, Build bot for Mark Brown wrote:

Today's -next fails to build am ARM allmodconfig due to:

>   arm-allmodconfig
> ../drivers/net/ethernet/mediatek/mtk_eth_soc.c:1300:2: error: implicit 
> declaration of function 'mtk_handle_irq' 
> [-Werror=implicit-function-declaration]
> ../drivers/net/ethernet/mediatek/mtk_eth_soc.c:1300:25: error: subscripted 
> value is neither array nor pointer nor vector

due to the recent changes to the interrupt handling code, the function
has been split into rx and tx functions but the NET_POLL_CONTROLLER code
hasn't been updated.


signature.asc
Description: PGP signature


[PATCH] i40e: Explicitly write platform-specific mac address after PF reset

2016-07-01 Thread Tushar Dave
i40e PF reset clears mac filters. If platform-specific mac address
is used, driver has to explicitly write default mac address to mac
filters otherwise all incoming traffic destined to default mac
address will be dropped after reset.

This issue was found on SPARC while toggling i40e ntuple via ethtool.

Signed-off-by: Tushar Dave 
Acked-by: Sowmini Varadhan 
---
 drivers/net/ethernet/intel/i40e/i40e_main.c | 87 -
 1 file changed, 49 insertions(+), 38 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c 
b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 3449129..b330723 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -2573,6 +2573,44 @@ static int i40e_vlan_rx_kill_vid(struct net_device 
*netdev,
 }
 
 /**
+ * i40e_macaddr_init - explicitly write the mac address filters.
+ *
+ * @vsi: pointer to the vsi.
+ * @macaddr: the MAC address
+ *
+ * This is needed when the macaddr has been obtained by other
+ * means than the default, e.g., from Open Firmware or IDPROM.
+ * Returns 0 on success, negative on failure
+ **/
+static int i40e_macaddr_init(struct i40e_vsi *vsi, u8 *macaddr)
+{
+   int ret;
+   struct i40e_aqc_add_macvlan_element_data element;
+
+   ret = i40e_aq_mac_address_write(>back->hw,
+   I40E_AQC_WRITE_TYPE_LAA_WOL,
+   macaddr, NULL);
+   if (ret) {
+   dev_info(>back->pdev->dev,
+"Addr change for VSI failed: %d\n", ret);
+   return -EADDRNOTAVAIL;
+   }
+
+   memset(, 0, sizeof(element));
+   ether_addr_copy(element.mac_addr, macaddr);
+   element.flags = cpu_to_le16(I40E_AQC_MACVLAN_ADD_PERFECT_MATCH);
+   ret = i40e_aq_add_macvlan(>back->hw, vsi->seid, , 1, NULL);
+   if (ret) {
+   dev_info(>back->pdev->dev,
+"add filter failed err %s aq_err %s\n",
+i40e_stat_str(>back->hw, ret),
+i40e_aq_str(>back->hw,
+vsi->back->hw.aq.asq_last_status));
+   }
+   return ret;
+}
+
+/**
  * i40e_restore_vlan - Reinstate vlans when vsi/netdev comes back up
  * @vsi: the vsi being brought back up
  **/
@@ -3058,8 +3096,19 @@ static void i40e_vsi_config_dcb_rings(struct i40e_vsi 
*vsi)
  **/
 static void i40e_set_vsi_rx_mode(struct i40e_vsi *vsi)
 {
+   struct i40e_pf *pf = vsi->back;
+   int err;
+
if (vsi->netdev)
i40e_set_rx_mode(vsi->netdev);
+
+   if (!!(pf->flags & I40E_FLAG_PF_MAC)) {
+   err = i40e_macaddr_init(vsi, pf->hw.mac.addr);
+   if (err) {
+   dev_warn(>pdev->dev,
+"could not set up macaddr; err %d\n", err);
+   }
+   }
 }
 
 /**
@@ -9719,44 +9768,6 @@ err_vsi:
 }
 
 /**
- * i40e_macaddr_init - explicitly write the mac address filters.
- *
- * @vsi: pointer to the vsi.
- * @macaddr: the MAC address
- *
- * This is needed when the macaddr has been obtained by other
- * means than the default, e.g., from Open Firmware or IDPROM.
- * Returns 0 on success, negative on failure
- **/
-static int i40e_macaddr_init(struct i40e_vsi *vsi, u8 *macaddr)
-{
-   int ret;
-   struct i40e_aqc_add_macvlan_element_data element;
-
-   ret = i40e_aq_mac_address_write(>back->hw,
-   I40E_AQC_WRITE_TYPE_LAA_WOL,
-   macaddr, NULL);
-   if (ret) {
-   dev_info(>back->pdev->dev,
-"Addr change for VSI failed: %d\n", ret);
-   return -EADDRNOTAVAIL;
-   }
-
-   memset(, 0, sizeof(element));
-   ether_addr_copy(element.mac_addr, macaddr);
-   element.flags = cpu_to_le16(I40E_AQC_MACVLAN_ADD_PERFECT_MATCH);
-   ret = i40e_aq_add_macvlan(>back->hw, vsi->seid, , 1, NULL);
-   if (ret) {
-   dev_info(>back->pdev->dev,
-"add filter failed err %s aq_err %s\n",
-i40e_stat_str(>back->hw, ret),
-i40e_aq_str(>back->hw,
-vsi->back->hw.aq.asq_last_status));
-   }
-   return ret;
-}
-
-/**
  * i40e_vsi_setup - Set up a VSI by a given type
  * @pf: board private structure
  * @type: VSI type
-- 
1.9.1



Re: strange Mac OSX RST behavior

2016-07-01 Thread Rick Jones

On 07/01/2016 08:10 AM, Jason Baron wrote:

I'm wondering if anybody else has run into this...

On Mac OSX 10.11.5 (latest version), we have found that when tcp
connections are abruptly terminated (via ^C), a FIN is sent followed
by an RST packet.


That just seems, well, silly.  If the client application wants to use 
abortive close (sigh..) it should do so, there shouldn't be this 
little-bit-pregnant, correct close initiation (FIN) followed by a RST.



The RST is sent with the same sequence number as the
FIN, and thus dropped since the stack only accepts RST packets matching
rcv_nxt (RFC 5961). This could also be resolved if Mac OSX replied with
an RST on the closed socket, but it appears that it does not.

The workaround here is then to reset the connection, if the RST is
is equal to rcv_nxt - 1, if we have already received a FIN.

The RST attack surface is limited b/c we only accept the RST after we've
accepted a FIN and have not previously sent a FIN and received back the
corresponding ACK. In other words RST is only accepted in the tcp
states: TCP_CLOSE_WAIT, TCP_LAST_ACK, and TCP_CLOSING.

I'm interested if anybody else has run into this issue. Its problematic
since it takes up server resources for sockets sitting in TCP_CLOSE_WAIT.


Isn't the server application expected to act on the read return of zero 
(which is supposed to be) triggered by the receipt of the FIN segment?


rick jones


We are also in the process of contacting Apple to see what can be done
here...workaround patch is below.


Re: strange Mac OSX RST behavior

2016-07-01 Thread One Thousand Gnomes
> On Mac OSX 10.11.5 (latest version), we have found that when tcp
> connections are abruptly terminated (via ^C), a FIN is sent followed
> by an RST packet. The RST is sent with the same sequence number as the
> FIN, and thus dropped since the stack only accepts RST packets matching
> rcv_nxt (RFC 5961). 

The Linux behaviour appears to be correct, and accepting a broken FIN is
most definitely a bad idea, it has a very different effect to a FIN
because there can be data going the other direction and a FIN is a one
way close

Alan


[patch net-next 37/42] mlxsw: Introduce simplistic KVD linear area manager

2016-07-01 Thread Jiri Pirko
From: Jiri Pirko 

This is a very simple manager for KVD linear area. Currently, the
allocator will either allocate a single entry from pre-defined sub-area,
or in case more than one entry is needed, it will allocate 32-entry chunk
in other pre-defined sub-area.

Signed-off-by: Jiri Pirko 
Reviewed-by: Ido Schimmel 
---
 drivers/net/ethernet/mellanox/mlxsw/Makefile   |  3 +-
 drivers/net/ethernet/mellanox/mlxsw/spectrum.h |  6 ++
 .../net/ethernet/mellanox/mlxsw/spectrum_kvdl.c| 91 ++
 3 files changed, 99 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c

diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile 
b/drivers/net/ethernet/mellanox/mlxsw/Makefile
index ea05f8a..d20ae18 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Makefile
+++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile
@@ -7,5 +7,6 @@ obj-$(CONFIG_MLXSW_SWITCHX2)+= mlxsw_switchx2.o
 mlxsw_switchx2-objs:= switchx2.o
 obj-$(CONFIG_MLXSW_SPECTRUM)   += mlxsw_spectrum.o
 mlxsw_spectrum-objs:= spectrum.o spectrum_buffers.o \
-  spectrum_switchdev.o spectrum_router.o
+  spectrum_switchdev.o spectrum_router.o \
+  spectrum_kvdl.o
 mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB)+= spectrum_dcb.o
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index f7d34d8..e781128 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -253,6 +253,9 @@ struct mlxsw_sp {
u8 port_to_module[MLXSW_PORT_MAX_PORTS];
struct mlxsw_sp_sb sb;
struct mlxsw_sp_router router;
+   struct {
+   DECLARE_BITMAP(usage, MLXSW_SP_KVD_LINEAR_SIZE);
+   } kvdl;
 };
 
 static inline struct mlxsw_sp_upper *
@@ -539,4 +542,7 @@ int mlxsw_sp_router_neigh_construct(struct net_device *dev,
 void mlxsw_sp_router_neigh_destroy(struct net_device *dev,
   struct neighbour *n);
 
+int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count);
+void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, int entry_index);
+
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
new file mode 100644
index 000..ac321e8
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
@@ -0,0 +1,91 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Jiri Pirko 
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *notice, this list of conditions and the following disclaimer in the
+ *documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *contributors may be used to endorse or promote products derived from
+ *this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include 
+#include 
+
+#include "spectrum.h"
+
+#define MLXSW_SP_KVDL_SINGLE_BASE 0
+#define MLXSW_SP_KVDL_SINGLE_SIZE 16384
+#define MLXSW_SP_KVDL_CHUNKS_BASE \
+   (MLXSW_SP_KVDL_SINGLE_BASE + MLXSW_SP_KVDL_SINGLE_SIZE)
+#define MLXSW_SP_KVDL_CHUNKS_SIZE \
+   (MLXSW_SP_KVD_LINEAR_SIZE - MLXSW_SP_KVDL_CHUNKS_BASE)
+#define MLXSW_SP_CHUNK_MAX 32
+
+int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count)
+{
+   int entry_index;
+   int size;
+   int 

Re: [patch net-next 11/42] mlxsw: spectrum: Add router interface struct

2016-07-01 Thread David Ahern

On 7/1/16 8:04 AM, Jiri Pirko wrote:

@@ -327,6 +333,19 @@ mlxsw_sp_port_vport_find_by_fid(const struct mlxsw_sp_port 
*mlxsw_sp_port,
return NULL;
 }

+static inline struct mlxsw_sp_rif *
+mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
+const struct net_device *dev)
+{
+   int i;
+
+   for (i = 0; i < MLXSW_SP_RIF_MAX; i++)
+   if (mlxsw_sp->rifs[i] && mlxsw_sp->rifs[i]->dev == dev)
+   return mlxsw_sp->rifs[i];
+
+   return NULL;
+}
+


Why not add the rif to mlxsw_sp_port which is the priv data for a mlxsw dev?


Re: [patch net-next 23/42] mlxsw: spectrum_router: Implement fib4 add/del switchdev obj ops

2016-07-01 Thread David Ahern

On 7/1/16 8:04 AM, Jiri Pirko wrote:


+static int
+mlxsw_sp_router_fib4_entry_init(struct mlxsw_sp *mlxsw_sp,
+   const struct switchdev_obj_ipv4_fib *fib4,
+   struct mlxsw_sp_fib_entry *fib_entry)
+{
+   struct fib_info *fi = fib4->fi;
+
+   if (fib4->type == RTN_LOCAL || fib4->type == RTN_BROADCAST) {
+   fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
+   return 0;
+   }
+   if (fib4->type != RTN_UNICAST)
+   return -EINVAL;


This is going to cause offload to fail b/c is a user has RTN_UNREACHABLE 
or RTN_PROHIBIT default route in a table. Those routes are needed per 
VRF/table to keep lookups from dropping to the another table.





[patch net-next 03/42] neigh: Send a notification when DELAY_PROBE_TIME changes

2016-07-01 Thread Jiri Pirko
From: Ido Schimmel 

When the data plane is offloaded the traffic doesn't go through the
networking stack. Therefore, after first resolving a neighbour the NUD
state machine will transition it from REACHABLE to STALE until it's
finally deleted by the garbage collector.

To prevent such situations the offloading driver should notify the NUD
state machine on any neighbours that were recently used. The driver's
polling interval should be set so that the NUD state machine can
function as if the traffic wasn't offloaded.

Currently, there are no in-tree drivers that can report confirmation for
a neighbour, but only 'used' indication. Therefore, the polling interval
should be set according to DELAY_FIRST_PROBE_TIME, as a neighbour will
transition from REACHABLE state to DELAY (instead of STALE) if "a packet
was sent within the last DELAY_FIRST_PROBE_TIME seconds" (RFC 4861).

Send a netevent whenever the DELAY_FIRST_PROBE_TIME changes - either via
netlink or sysctl - so that offloading drivers can correctly set their
polling interval.

Signed-off-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 include/net/netevent.h | 1 +
 net/core/neighbour.c   | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/include/net/netevent.h b/include/net/netevent.h
index d8bbb38..f440df1 100644
--- a/include/net/netevent.h
+++ b/include/net/netevent.h
@@ -24,6 +24,7 @@ struct netevent_redirect {
 enum netevent_notif_type {
NETEVENT_NEIGH_UPDATE = 1, /* arg is struct neighbour ptr */
NETEVENT_REDIRECT, /* arg is struct netevent_redirect ptr */
+   NETEVENT_DELAY_PROBE_TIME_UPDATE, /* arg is struct neigh_parms ptr */
 };
 
 int register_netevent_notifier(struct notifier_block *nb);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 952aabb..5cdc62a 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -2047,6 +2047,7 @@ static int neightbl_set(struct sk_buff *skb, struct 
nlmsghdr *nlh)
case NDTPA_DELAY_PROBE_TIME:
NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
  nla_get_msecs(tbp[i]));
+   
call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
break;
case NDTPA_RETRANS_TIME:
NEIGH_VAR_SET(p, RETRANS_TIME,
@@ -2930,6 +2931,7 @@ static void neigh_proc_update(struct ctl_table *ctl, int 
write)
return;
 
set_bit(index, p->data_state);
+   call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
if (!dev) /* NULL dev means this is default value */
neigh_copy_dflt_parms(net, p, index);
 }
-- 
2.5.5



Re: [PATCH 6/7] dt-bindings: net: bgmac: add bindings documentation for bgmac

2016-07-01 Thread Arnd Bergmann
On Friday, July 1, 2016 11:17:25 AM CEST Jon Mason wrote:
> On Fri, Jul 1, 2016 at 5:46 AM, Arnd Bergmann  wrote:
> > On Thursday, June 30, 2016 6:59:13 PM CEST Jon Mason wrote:
> >> +
> >> +Required properties:
> >> + - compatible: "brcm,bgmac-nsp"
> >> + - reg:Address and length of the GMAC registers,
> >> +   Address and length of the GMAC IDM registers
> >> + - reg-names:  Names of the registers.  Must have both "gmac_base" and
> >> +   "idm_base"
> >> + - interrupts: Interrupt number
> >> +
> >
> >
> > "brcm,bgmac-nsp" sounds a bit too general. As I understand, this is a family
> > of SoCs that might not all have the exact same implementation of this
> > ethernet device, as we can see from the long lookup table in bgmac_probe().
> 
> The Broadcom iProc family of SoCs contains:
> Northstar
> Northstar Plus
> Cygnus
> Northstar 2
> a few SoCs that are under development
> and a number of ethernet switches (which might never be officially supported)
> 
> Each one of these SoCs could have a different revision of the gmac IP
> block, but they should be uniform within each SoC (though there might
> be a A0/B0 change necessary).  The Northstar Plus product family has a
> number of different implementations, but the SoC is unchanged.  So, I
> think this might be too specific, when we really need a general compat
> string.

Ok, thanks for the clarification, that sounds good enough.

> Broadcom has a history of sharing IP blocks amongst the different
> divisions.  So, this driver might be used on other SoC families (as it
> apparently has been done in the past, based on the code you
> reference).  I do not know of any way to know what legacy, non-iProc
> chips have used this IP block.  I can make this "brcm,iproc-bgmac",
> and add "brcm,iproc-nsp-bgmac" as an alternative compatible string in
> this file (which I believe you are suggesting), but there might be
> non-iProc SoCs that use this driver.  Is this acceptable?

If it is also used outside of iProc, then I see no need for the
extra compatible string, although it would not do any harm either.

Ideally we should name it whatever the name for this IP block is
inside of the company, with "nsp" as the designation for the variant
in Northstar Plus. A lot of Broadcom IP blocks themselves seem to have
some four-digit or five-digit number, maybe this one does too?

Arnd


GOOD INTEREST

2016-07-01 Thread zam chukwu
Dearest,

I am interested in establishing and operating a very viable business
as a means of investment abroad. I do not know too well on how this is
done in your country, so I will need you to help me in this regard.

My preference is any good profit yielding business and I would any
viable ideas you could come up with. I will also need you to help me
look for properties like homes and lands for sale as I proposing to
invest the sum of Fifty Million United States Dollars ($50,000,000
USD) for this. I do not know if you can and will be of help to me. For
a brief on my personality; My name is Elder Zireva, a  in Accra Ghana.

I am a retired Business man formally into private Shipping Business. I
62 years of age, married with a wife and 4 lovely kids. I dropped my
Shipping business because it wasn't producing profitable income and
above all, the Government is too inquisitive with a lot of political
enemies here and there.

My need for this business proposition and to acquire these properties
is very urgent as I am planning to move out of this country with my
family down to your country. I want you to also help in finding a good
home where my family and I will live in. (Mini Estate)

Please I expect your good and prompt reply so that we can proceed swiftly.

I will need your phone and fax numbers for easier communication.

Best Regards,
Mr Zireva David.


RE: [RFC PATCH 0/2] net: ethernet: Add support for gmii2rgmii converter

2016-07-01 Thread Appana Durga Kedareswara Rao
Hi Andrew,

> On Fri, Jul 01, 2016 at 11:50:10AM +0530, Kedareswara rao Appana wrote:
> > This patch series does the following
> > ---> Add support for gmii2rgmii converter.
> 
> How generic is this gmii2rgmii IP block? Could it be used with any GMII and
> RGMII device?

This converter does GMII2RGMII conversion.
This can be used with any MAC which has shared MDIO with external PHY
And this Converter. This Converter IP is validated for MACB.
But it should work with any MAC which has shared MDIO bus (I mean single MDIO 
multiple PHYS)...

This converter works like below

MACB <==> GMII2RGMII <==> RGMII_PHY

MDIO<> GMII2RGMII
MCAB<===> 
  <> RGMII

Using MACB MDIO bus we can access both the converter and the external PHY.
We need to program the line speed of the converter during run time based on the 
External phy negotiated speed. 

MDIO interface is used to set operating speed (line speed) 

The converter has only one register (0x10) that we need to program to set the 
operating speed.
The composition of this register is similar to the IEEE standard 802.3 MDIO 
control register 0x0.

Please let me know if you still not clear about how this converter works.

> 
> Should it be placed in drivers/net/phy, so making it easier to reuse?

Ok will move it drivers/net/phy folder in the next version...

> 
> Also, Russell Kings phylink might be a more natural structure for this. It is 
> hard to
> know when those patches will land, but it might be worth looking at.

Ok sure will take a look at this series once posted...

Regards,
Kedar.

> 
>Andrew


RE: [RFC PATCH 0/2] net: ethernet: Add support for gmii2rgmii converter

2016-07-01 Thread Appana Durga Kedareswara Rao
Hi Florian,

Thanks for the review...

> Le 30/06/2016 23:20, Kedareswara rao Appana a écrit :
> > This patch series does the following
> > ---> Add support for gmii2rgmii converter.
> > ---> Add support for gmii2rgmii converter in the macb driver.
> >
> > Earlier sent one RFC patch https://patchwork.kernel.org/patch/9186615/
> > which includes converter related stuff also in macb driver.
> > This patch series fixes this issue.
> 
> This still seems very adhoc and not completely explained. Can you clarify how
> the gmmi2rgmii converter gets used?

Sorry I should have been explained it clearly in the patch.
Will fix it in the v2.

This converter works like below

MACB <==> GMII2RGMII <==> RGMII_PHY

MDIO<> GMII2RGMII
MCAB<===> 
  <> RGMII

Using MACB MDIO bus we can access both the converter and the external PHY.
We need to program the line speed of the converter during run time based on the
External phy negotiated speed. 

MDIO interface is used to set operating speed (line speed) 

The converter has only one register (0x10) that we need to program to set the 
operating speed.
The composition of this register is similar to the IEEE standard 802.3 MDIO 
control register 0x0.

Please let me know if you still need not clear with how this converter works.

IP data sheet is available here @ 
http://www.xilinx.com/support/documentation/ip_documentation/gmii_to_rgmii/v3_0/pg160-gmii-to-rgmii.pdf
 

   
> 
> Is the expectation that a MACB Ethernet adapter will be connected to a RGMII
> PHY like this:
> 
> MACB <=> GMII2RGMII <=> RGMII PHY
> MACB MDIO <===> RGMII_PHY
> 
> and still have the ability to control both the PHY device's MDIO registers 
> and the
> GMII2RGMII converter and we need to make sure both have matching settings,
> or is it something like this:
> 
> MACB <=> GMII2RGMII <=> RGMII PHY
> MACB MDIO unconnected
> 
> and we lose the ability to query the PHY via MDIO?
> 
> As Nicolas pointed out, providing a binding documentation update may help
> reviewers understand what is being accomplished here.

Sure will fix in v2.

Regards,
Kedar.



RE: [RFC PATCH 1/2] net: ethernet: xilinx: Add gmii2rgmii converter support

2016-07-01 Thread Appana Durga Kedareswara Rao
Hi Florian,

Thanks for the review.

> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +
> > +static void xgmii2rgmii_fix_mac_speed(void *priv, unsigned int speed)
> > +{
> > +   struct gmii2rgmii *xphy = (struct xphy *)priv;
> 
> Why not pass struct xphy pointer directly?

Ok will fix in v2...

> 
> > +   struct phy_device *gmii2rgmii_phydev = xphy->gmii2rgmii_phy_dev;
> > +   u16 gmii2rgmii_reg = 0;
> > +
> > +   switch (speed) {
> > +   case 1000:
> > +   gmii2rgmii_reg |= XILINX_GMII2RGMII_SPEED1000;
> > +   break;
> > +   case 100:
> > +   gmii2rgmii_reg |= XILINX_GMII2RGMII_SPEED100;
> > +   break;
> > +   default:
> > +   return;
> > +   }
> > +
> > +   xphy->mdio_write(xphy->mii_bus, gmii2rgmii_phydev->mdio.addr,
> > +XILINX_GMII2RGMII_REG_NUM,
> > +gmii2rgmii_reg);
> > +}
> > +
> > +int gmii2rgmii_phyprobe(struct gmii2rgmii *xphy) {
> > +   struct device_node *phy_node;
> > +   struct phy_device *phydev;
> > +   struct device_node *np = (struct device_node *)xphy->platform_data;
> > +
> > +   phy_node = of_parse_phandle(np, "gmii2rgmii-phy-handle", 0);
> 
> Is that property documented in a binding document?

Will document. Will fix in v2...

> 
> > +   if (phy_node) {
> 
> Should not there be an else clause which does not assign
> xphy->fix_mac_speed in case this property lookup fails?

Will fix in v2...

> 
> > +   phydev = of_phy_attach(xphy->dev, phy_node, 0, 0);
> > +   if (!phydev) {
> > +   netdev_err(xphy->dev,
> > +  "%s: no gmii to rgmii converter found\n",
> > +  xphy->dev->name);
> > +   return -1;
> > +   }
> > +   xphy->gmii2rgmii_phy_dev = phydev;
> > +   }
> > +   xphy->fix_mac_speed = xgmii2rgmii_fix_mac_speed;
> > +
> > +   return 0;
> > +}
> > +EXPORT_SYMBOL(gmii2rgmii_phyprobe);
> > +
> > +MODULE_DESCRIPTION("Xilinx GMII2RGMII converter driver");
> > +MODULE_LICENSE("GPL");
> > diff --git a/include/linux/xilinx_gmii2rgmii.h
> > b/include/linux/xilinx_gmii2rgmii.h
> > new file mode 100644
> > index 000..64e1659
> > --- /dev/null
> > +++ b/include/linux/xilinx_gmii2rgmii.h
> > @@ -0,0 +1,24 @@
> > +#ifndef _GMII2RGMII_H
> > +#define _GMII2RGMII_H
> > +
> > +#include 
> > +#include 
> > +#include 
> > +
> > +#define XILINX_GMII2RGMII_FULLDPLX BMCR_FULLDPLX
> > +#define XILINX_GMII2RGMII_SPEED1000BMCR_SPEED1000
> > +#define XILINX_GMII2RGMII_SPEED100 BMCR_SPEED100
> > +#define XILINX_GMII2RGMII_REG_NUM  0x10
> 
> So the register semantics are fairly standard but not the register location, 
> have
> you considered writing a small PHY driver for this block?

I tried but this PHY doesn't have any vendor / Device ID's
This converter won't suit to PHY framework as we need to programmed the
PHY Control register with the external phy negotiated speed as explained in the 
other
Mail thread...

Regards,
Kedar. 


strange Mac OSX RST behavior

2016-07-01 Thread Jason Baron
I'm wondering if anybody else has run into this...

On Mac OSX 10.11.5 (latest version), we have found that when tcp
connections are abruptly terminated (via ^C), a FIN is sent followed
by an RST packet. The RST is sent with the same sequence number as the
FIN, and thus dropped since the stack only accepts RST packets matching
rcv_nxt (RFC 5961). This could also be resolved if Mac OSX replied with
an RST on the closed socket, but it appears that it does not.

The workaround here is then to reset the connection, if the RST is
is equal to rcv_nxt - 1, if we have already received a FIN.

The RST attack surface is limited b/c we only accept the RST after we've
accepted a FIN and have not previously sent a FIN and received back the
corresponding ACK. In other words RST is only accepted in the tcp
states: TCP_CLOSE_WAIT, TCP_LAST_ACK, and TCP_CLOSING.

I'm interested if anybody else has run into this issue. Its problematic
since it takes up server resources for sockets sitting in TCP_CLOSE_WAIT.
We are also in the process of contacting Apple to see what can be done
here...workaround patch is below.


Here is the sequence from wireshark, mac osx is client sending the
fin:

84581  14.752908  ->  TCP 66 49896 > http [FIN, ACK] 
Seq=673257230 Ack=924722210 Win=131072 Len=0 TSval=622455547 TSecr=346246436
84984  14.788056  ->  TCP 60 49896 > http [RST] 
Seq=673257230 Win=0 Len=0
84985  14.788061  ->  TCP 66 http > 49896 [ACK] 
Seq=924739994 Ack=673257231 Win=28960 Len=0 TSval=346246723 TSecr=622455547

followed by a bunch of retransmits from server:

85138  14.994217  ->  TCP 1054 [TCP segment of a 
reassembled PDU]
85237  15.348217  ->  TCP 1054 [TCP Retransmission] 
[TCP segment of a reassembled PDU]
85337  16.056224  ->  TCP 1054 [TCP Retransmission] 
[TCP segment of a reassembled PDU]
85436  17.472225  ->  TCP 1054 [TCP Retransmission] 
[TCP segment of a reassembled PDU]
85540  20.304222  ->  TCP 1054 [TCP Retransmission] 
[TCP segment of a reassembled PDU]
85644  25.968218  ->  TCP 1054 [TCP Retransmission] 
[TCP segment of a reassembled PDU]
85745  37.280230  ->  TCP 1054 [TCP Retransmission] 
[TCP segment of a reassembled PDU]
85845  59.904235  ->  TCP 1054 [TCP Retransmission] 
[TCP segment of a reassembled PDU]

Thanks,

-Jason

---
 net/ipv4/tcp_input.c | 25 +++--
 1 file changed, 23 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 94d4aff97523..b3c55b91140c 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5155,6 +5155,25 @@ static int tcp_copy_to_iovec(struct sock *sk, struct 
sk_buff *skb, int hlen)
return err;
 }
 
+/*
+ * Mac OSX 10.11.5 can send a FIN followed by a RST where the RST
+ * has the same sequence number as the FIN. This is not compliant
+ * with RFC 5961, but ends up in a number of sockets tied up mostly
+ * in TCP_CLOSE_WAIT. The rst attack surface is limited b/c we only
+ * accept the RST after we've accepted a FIN and have not previously
+ * sent a FIN and received back the corresponding ACK.
+ */
+static bool tcp_fin_rst_check(struct sock *sk, struct sk_buff *skb)
+{
+   struct tcp_sock *tp = tcp_sk(sk);
+
+   return unlikely((TCP_SKB_CB(skb)->seq == (tp->rcv_nxt - 1)) &&
+   (TCP_SKB_CB(skb)->end_seq == (tp->rcv_nxt - 1)) &&
+   (sk->sk_state == TCP_CLOSE_WAIT ||
+sk->sk_state == TCP_LAST_ACK ||
+sk->sk_state == TCP_CLOSING));
+}
+
 /* Does PAWS and seqno based validation of an incoming segment, flags will
  * play significant role here.
  */
@@ -5193,7 +5212,8 @@ static bool tcp_validate_incoming(struct sock *sk, struct 
sk_buff *skb,
  LINUX_MIB_TCPACKSKIPPEDSEQ,
  >last_oow_ack_time))
tcp_send_dupack(sk, skb);
-   }
+   } else if (tcp_fin_rst_check(sk, skb))
+   tcp_reset(sk);
goto discard;
}
 
@@ -5206,7 +5226,8 @@ static bool tcp_validate_incoming(struct sock *sk, struct 
sk_buff *skb,
 * else
 * Send a challenge ACK
 */
-   if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
+   if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt ||
+   tcp_fin_rst_check(sk, skb)) {
rst_seq_match = true;
} else if (tcp_is_sack(tp) && tp->rx_opt.num_sacks > 0) {
struct tcp_sack_block *sp = >selective_acks[0];
-- 
2.6.1



Re: [PATCH 6/7] dt-bindings: net: bgmac: add bindings documentation for bgmac

2016-07-01 Thread Jon Mason
On Fri, Jul 1, 2016 at 5:46 AM, Arnd Bergmann  wrote:
> On Thursday, June 30, 2016 6:59:13 PM CEST Jon Mason wrote:
>> +
>> +Required properties:
>> + - compatible: "brcm,bgmac-nsp"
>> + - reg:Address and length of the GMAC registers,
>> +   Address and length of the GMAC IDM registers
>> + - reg-names:  Names of the registers.  Must have both "gmac_base" and
>> +   "idm_base"
>> + - interrupts: Interrupt number
>> +
>
>
> "brcm,bgmac-nsp" sounds a bit too general. As I understand, this is a family
> of SoCs that might not all have the exact same implementation of this
> ethernet device, as we can see from the long lookup table in bgmac_probe().

The Broadcom iProc family of SoCs contains:
Northstar
Northstar Plus
Cygnus
Northstar 2
a few SoCs that are under development
and a number of ethernet switches (which might never be officially supported)

Each one of these SoCs could have a different revision of the gmac IP
block, but they should be uniform within each SoC (though there might
be a A0/B0 change necessary).  The Northstar Plus product family has a
number of different implementations, but the SoC is unchanged.  So, I
think this might be too specific, when we really need a general compat
string.

Broadcom has a history of sharing IP blocks amongst the different
divisions.  So, this driver might be used on other SoC families (as it
apparently has been done in the past, based on the code you
reference).  I do not know of any way to know what legacy, non-iProc
chips have used this IP block.  I can make this "brcm,iproc-bgmac",
and add "brcm,iproc-nsp-bgmac" as an alternative compatible string in
this file (which I believe you are suggesting), but there might be
non-iProc SoCs that use this driver.  Is this acceptable?

Thanks,
Jon

> Please document the specific product numbers here that are publically
> known already. Having the driver match just on "brcm,bgmac-nsp" as a fallback
> is fine, so you can document that one as required for all users.
>
> Arnd


Re: [RFC PATCH 0/2] net: ethernet: Add support for gmii2rgmii converter

2016-07-01 Thread Andrew Lunn
On Fri, Jul 01, 2016 at 11:50:10AM +0530, Kedareswara rao Appana wrote:
> This patch series does the following
> ---> Add support for gmii2rgmii converter.

How generic is this gmii2rgmii IP block? Could it be used with any
GMII and RGMII device?

Should it be placed in drivers/net/phy, so making it easier to reuse?

Also, Russell Kings phylink might be a more natural structure for
this. It is hard to know when those patches will land, but it might be
worth looking at.

   Andrew


[patch net-next 23/42] mlxsw: spectrum_router: Implement fib4 add/del switchdev obj ops

2016-07-01 Thread Jiri Pirko
From: Jiri Pirko 

Implement ipv4 FIB entries addition and removal. Initially, we support
local and broadcast routes using "ip2me" trap action.
Also, unicast routes without nexthop are supported using "local" action.

Signed-off-by: Jiri Pirko 
Reviewed-by: Ido Schimmel 
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.h |   5 +
 .../net/ethernet/mellanox/mlxsw/spectrum_router.c  | 245 +
 .../ethernet/mellanox/mlxsw/spectrum_switchdev.c   |   9 +
 3 files changed, 259 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 5b40dfc..877a879 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -476,5 +476,10 @@ static inline void mlxsw_sp_port_dcb_fini(struct 
mlxsw_sp_port *mlxsw_sp_port)
 
 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp);
 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp);
+int mlxsw_sp_router_fib4_add(struct mlxsw_sp_port *mlxsw_sp_port,
+const struct switchdev_obj_ipv4_fib *fib4,
+struct switchdev_trans *trans);
+int mlxsw_sp_router_fib4_del(struct mlxsw_sp_port *mlxsw_sp_port,
+const struct switchdev_obj_ipv4_fib *fib4);
 
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 11dab74..7e3992a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -106,9 +106,19 @@ struct mlxsw_sp_fib_key {
unsigned char prefix_len;
 };
 
+enum mlxsw_sp_fib_entry_type {
+   MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
+   MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
+   MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
+};
+
 struct mlxsw_sp_fib_entry {
struct rhash_head ht_node;
struct mlxsw_sp_fib_key key;
+   enum mlxsw_sp_fib_entry_type type;
+   u8 added:1;
+   u16 rif; /* used for action local */
+   struct mlxsw_sp_vr *vr;
 };
 
 struct mlxsw_sp_fib {
@@ -567,3 +577,238 @@ void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
 {
__mlxsw_sp_router_fini(mlxsw_sp);
 }
+
+static int mlxsw_sp_fib_entry_op4_local(struct mlxsw_sp *mlxsw_sp,
+   struct mlxsw_sp_fib_entry *fib_entry,
+   enum mlxsw_reg_ralue_op op)
+{
+   char ralue_pl[MLXSW_REG_RALUE_LEN];
+   u32 *p_dip = (u32 *) fib_entry->key.addr;
+   struct mlxsw_sp_vr *vr = fib_entry->vr;
+
+   mlxsw_reg_ralue_pack4(ralue_pl, vr->proto, op, vr->id,
+ fib_entry->key.prefix_len, *p_dip);
+   mlxsw_reg_ralue_act_local_pack(ralue_pl,
+  MLXSW_REG_RALUE_TRAP_ACTION_NOP, 0,
+  fib_entry->rif);
+   return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
+}
+
+static int mlxsw_sp_fib_entry_op4_trap(struct mlxsw_sp *mlxsw_sp,
+  struct mlxsw_sp_fib_entry *fib_entry,
+  enum mlxsw_reg_ralue_op op)
+{
+   char ralue_pl[MLXSW_REG_RALUE_LEN];
+   u32 *p_dip = (u32 *) fib_entry->key.addr;
+   struct mlxsw_sp_vr *vr = fib_entry->vr;
+
+   mlxsw_reg_ralue_pack4(ralue_pl, vr->proto, op, vr->id,
+ fib_entry->key.prefix_len, *p_dip);
+   mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
+   return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
+}
+
+static int mlxsw_sp_fib_entry_op4(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib_entry *fib_entry,
+ enum mlxsw_reg_ralue_op op)
+{
+   switch (fib_entry->type) {
+   case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
+   return -EINVAL;
+   case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
+   return mlxsw_sp_fib_entry_op4_local(mlxsw_sp, fib_entry, op);
+   case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
+   return mlxsw_sp_fib_entry_op4_trap(mlxsw_sp, fib_entry, op);
+   }
+   return -EINVAL;
+}
+
+static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
+struct mlxsw_sp_fib_entry *fib_entry,
+enum mlxsw_reg_ralue_op op)
+{
+   switch (fib_entry->vr->proto) {
+   case MLXSW_SP_L3_PROTO_IPV4:
+   return mlxsw_sp_fib_entry_op4(mlxsw_sp, fib_entry, op);
+   case MLXSW_SP_L3_PROTO_IPV6:
+   return -EINVAL;
+   }
+   return -EINVAL;
+}
+
+static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
+struct mlxsw_sp_fib_entry *fib_entry)
+{
+   enum mlxsw_reg_ralue_op op;
+
+   op = !fib_entry->added ? MLXSW_REG_RALUE_OP_WRITE_WRITE :
+

[patch net-next 42/42] mlxsw: Add the unresolved next-hops probes

2016-07-01 Thread Jiri Pirko
From: Yotam Gigi 

Now, the driver sends arp probes for all unresolved neighbours that are
currently a nexthop for some route on the system. The job is set
periodically every 5 seconds.

Signed-off-by: Yotam Gigi 
Signed-off-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.h |  2 ++
 .../net/ethernet/mellanox/mlxsw/spectrum_router.c  | 33 +-
 2 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index ff5b859..ef4ac89 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -222,6 +222,8 @@ struct mlxsw_sp_router {
struct delayed_work dw;
unsigned long interval; /* ms */
} neighs_update;
+   struct delayed_work nexthop_probe_dw;
+#define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */
struct list_head nexthop_group_list;
struct list_head nexthop_neighs_list;
 };
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 2b20279..e084ea5 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -845,6 +845,33 @@ static void mlxsw_sp_router_neighs_update_work(struct 
work_struct *work)
mlxsw_sp_router_neighs_update_work_schedule(mlxsw_sp);
 }
 
+static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
+{
+   struct mlxsw_sp_neigh_entry *neigh_entry;
+   struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp,
+router.nexthop_probe_dw.work);
+
+   /* Iterate over nexthop neighbours, find those who are unresolved and
+* send arp on them. This solves the chicken-egg problem when
+* the nexthop wouldn't get offloaded until the neighbor is resolved
+* but it wouldn't get resolved ever in case traffic is flowing in HW
+* using different nexthop.
+*
+* Take RTNL mutex here to prevent lists from changes.
+*/
+   rtnl_lock();
+   list_for_each_entry(neigh_entry, _sp->router.nexthop_neighs_list,
+   nexthop_neighs_list_node) {
+   if (!(neigh_entry->n->nud_state & NUD_VALID) &&
+   !list_empty(_entry->nexthop_list))
+   neigh_event_send(neigh_entry->n, NULL);
+   }
+   rtnl_unlock();
+
+   mlxsw_core_schedule_dw(_sp->router.nexthop_probe_dw,
+  MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
+}
+
 static void
 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
  struct mlxsw_sp_neigh_entry *neigh_entry,
@@ -1004,10 +1031,13 @@ static int mlxsw_sp_neigh_init(struct mlxsw_sp 
*mlxsw_sp)
if (err)
goto err_register_netevent_notifier;
 
+   /* Create the delayed works for the activity_update */
INIT_DELAYED_WORK(_sp->router.neighs_update.dw,
  mlxsw_sp_router_neighs_update_work);
+   INIT_DELAYED_WORK(_sp->router.nexthop_probe_dw,
+ mlxsw_sp_router_probe_unresolved_nexthops);
mlxsw_core_schedule_dw(_sp->router.neighs_update.dw, 0);
-
+   mlxsw_core_schedule_dw(_sp->router.nexthop_probe_dw, 0);
return 0;
 
 err_register_netevent_notifier:
@@ -1018,6 +1048,7 @@ err_register_netevent_notifier:
 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
 {
cancel_delayed_work_sync(_sp->router.neighs_update.dw);
+   cancel_delayed_work_sync(_sp->router.nexthop_probe_dw);
unregister_netevent_notifier(_sp_router_netevent_nb);
rhashtable_destroy(_sp->router.neigh_ht);
 }
-- 
2.5.5



Re: [RFC PATCH 0/2] net: ethernet: Add support for gmii2rgmii converter

2016-07-01 Thread Florian Fainelli
Le 30/06/2016 23:20, Kedareswara rao Appana a écrit :
> This patch series does the following
> ---> Add support for gmii2rgmii converter.
> ---> Add support for gmii2rgmii converter in the macb driver.
> 
> Earlier sent one RFC patch https://patchwork.kernel.org/patch/9186615/ 
> which includes converter related stuff also in macb driver.
> This patch series fixes this issue.

This still seems very adhoc and not completely explained. Can you
clarify how the gmmi2rgmii converter gets used?

Is the expectation that a MACB Ethernet adapter will be connected to a
RGMII PHY like this:

MACB <=> GMII2RGMII <=> RGMII PHY
MACB MDIO <===> RGMII_PHY

and still have the ability to control both the PHY device's MDIO
registers and the GMII2RGMII converter and we need to make sure both
have matching settings, or is it something like this:

MACB <=> GMII2RGMII <=> RGMII PHY
MACB MDIO unconnected

and we lose the ability to query the PHY via MDIO?

As Nicolas pointed out, providing a binding documentation update may
help reviewers understand what is being accomplished here.

Thanks!

> 
> Kedareswara rao Appana (2):
>   net: ethernet: xilinx: Add gmii2rgmii converter support
>   net: macb: Add gmii2rgmii phy converter support
> 
>  drivers/net/ethernet/cadence/macb.c |   21 ++-
>  drivers/net/ethernet/cadence/macb.h |3 +
>  drivers/net/ethernet/xilinx/Kconfig |7 ++
>  drivers/net/ethernet/xilinx/Makefile|1 +
>  drivers/net/ethernet/xilinx/xilinx_gmii2rgmii.c |   76 
> +++
>  include/linux/xilinx_gmii2rgmii.h   |   24 +++
>  6 files changed, 131 insertions(+), 1 deletions(-)
>  create mode 100644 drivers/net/ethernet/xilinx/xilinx_gmii2rgmii.c
>  create mode 100644 include/linux/xilinx_gmii2rgmii.h
> 


-- 
Florian


Re: [RFC PATCH 1/2] net: ethernet: xilinx: Add gmii2rgmii converter support

2016-07-01 Thread Florian Fainelli
Le 30/06/2016 23:20, Kedareswara rao Appana a écrit :
> This patch adds support for gmii2rgmii converter.
> 
> The GMII to RGMII IP core provides the Reduced Gigabit Media
> Independent Interface (RGMII) between Ethernet physical media
> Devices and the Gigabit Ethernet controller. This core can
> switch dynamically between the three different speed modes of
> Operation.
> MDIO interface is used to set operating speed of Ethernet MAC
> 
> Signed-off-by: Kedareswara rao Appana 
> ---
>  drivers/net/ethernet/xilinx/Kconfig |7 ++
>  drivers/net/ethernet/xilinx/Makefile|1 +
>  drivers/net/ethernet/xilinx/xilinx_gmii2rgmii.c |   76 
> +++
>  include/linux/xilinx_gmii2rgmii.h   |   24 +++
>  4 files changed, 108 insertions(+), 0 deletions(-)
>  create mode 100644 drivers/net/ethernet/xilinx/xilinx_gmii2rgmii.c
>  create mode 100644 include/linux/xilinx_gmii2rgmii.h
> 
> diff --git a/drivers/net/ethernet/xilinx/Kconfig 
> b/drivers/net/ethernet/xilinx/Kconfig
> index 4f5c024..d7df70a 100644
> --- a/drivers/net/ethernet/xilinx/Kconfig
> +++ b/drivers/net/ethernet/xilinx/Kconfig
> @@ -39,4 +39,11 @@ config XILINX_LL_TEMAC
> This driver supports the Xilinx 10/100/1000 LocalLink TEMAC
> core used in Xilinx Spartan and Virtex FPGAs
>  
> +config XILINX_GMII2RGMII
> + tristate "Xilinx GMII2RGMII converter driver"
> + ---help---
> +   This driver support xilinx GMII to RGMII IP core it provides
> +   the Reduced Gigabit Media Independent Interface(RGMII) between
> +   Ethernet physical media devices and the Gigabit Ethernet controller.
> +
>  endif # NET_VENDOR_XILINX
> diff --git a/drivers/net/ethernet/xilinx/Makefile 
> b/drivers/net/ethernet/xilinx/Makefile
> index 214205e..bca0da0 100644
> --- a/drivers/net/ethernet/xilinx/Makefile
> +++ b/drivers/net/ethernet/xilinx/Makefile
> @@ -7,3 +7,4 @@ obj-$(CONFIG_XILINX_LL_TEMAC) += ll_temac.o
>  obj-$(CONFIG_XILINX_EMACLITE) += xilinx_emaclite.o
>  xilinx_emac-objs := xilinx_axienet_main.o xilinx_axienet_mdio.o
>  obj-$(CONFIG_XILINX_AXI_EMAC) += xilinx_emac.o
> +obj-$(CONFIG_XILINX_GMII2RGMII) += xilinx_gmii2rgmii.o
> diff --git a/drivers/net/ethernet/xilinx/xilinx_gmii2rgmii.c 
> b/drivers/net/ethernet/xilinx/xilinx_gmii2rgmii.c
> new file mode 100644
> index 000..ca9f1ad
> --- /dev/null
> +++ b/drivers/net/ethernet/xilinx/xilinx_gmii2rgmii.c
> @@ -0,0 +1,76 @@
> +/* Xilinx GMII2RGMII Converter driver
> + *
> + * Copyright (C) 2016 Xilinx, Inc.
> + *
> + * Author: Kedareswara rao Appana 
> + *
> + * Description:
> + * This driver is developed for Xilinx GMII2RGMII Converter
> + *
> + * This program is free software: you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation, either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + */
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +static void xgmii2rgmii_fix_mac_speed(void *priv, unsigned int speed)
> +{
> + struct gmii2rgmii *xphy = (struct xphy *)priv;

Why not pass struct xphy pointer directly?

> + struct phy_device *gmii2rgmii_phydev = xphy->gmii2rgmii_phy_dev;
> + u16 gmii2rgmii_reg = 0;
> +
> + switch (speed) {
> + case 1000:
> + gmii2rgmii_reg |= XILINX_GMII2RGMII_SPEED1000;
> + break;
> + case 100:
> + gmii2rgmii_reg |= XILINX_GMII2RGMII_SPEED100;
> + break;
> + default:
> + return;
> + }
> +
> + xphy->mdio_write(xphy->mii_bus, gmii2rgmii_phydev->mdio.addr,
> +  XILINX_GMII2RGMII_REG_NUM,
> +  gmii2rgmii_reg);
> +}
> +
> +int gmii2rgmii_phyprobe(struct gmii2rgmii *xphy)
> +{
> + struct device_node *phy_node;
> + struct phy_device *phydev;
> + struct device_node *np = (struct device_node *)xphy->platform_data;
> +
> + phy_node = of_parse_phandle(np, "gmii2rgmii-phy-handle", 0);

Is that property documented in a binding document?

> + if (phy_node) {

Should not there be an else clause which does not assign
xphy->fix_mac_speed in case this property lookup fails?

> + phydev = of_phy_attach(xphy->dev, phy_node, 0, 0);
> + if (!phydev) {
> + netdev_err(xphy->dev,
> +"%s: no gmii to rgmii converter found\n",
> +xphy->dev->name);
> + return -1;
> + }
> + xphy->gmii2rgmii_phy_dev = phydev;
> + }
> + 

Re: [RFC] WireGuard: next generation secure network tunnel

2016-07-01 Thread Richard Weinberger
Jason,

Am 01.07.2016 um 16:25 schrieb Jason A. Donenfeld:
> Hi Richard,
> 
> On Fri, Jul 1, 2016 at 1:42 PM, Richard Weinberger
>  wrote:
>> So every logical tunnel will allocate a new net device?
>> Doesn't this scale badly? I have ipsec alike setups
>> with many, many road warriors in mind.
> 
> No, this isn't the case. Each net device has multiple peers. Check out
> the example config on the website, pasted here for convenience:
> 
>> [Interface]
>> PrivateKey = yAnz5TF+lXXJte14tji3zlMNq+hd2rYUIgJBgB3fBmk=
>> ListenPort = 41414
>>
>> [Peer]
>> PublicKey = xTIBA5rboUvnH4htodjb6e697QjLERt1NAB4mZqp8Dg=
>> AllowedIPs = 10.192.122.3/32, 10.192.124.1/24
>>
>> [Peer]
>> PublicKey = TrMvSoP4jYQlY6RIzBgbssQqY3vxI2Pi+y71lOWWXX0=
>> AllowedIPs = 10.192.122.4/32, 192.168.0.0/16
>>
>> [Peer]
>> PublicKey = gN65BkIKy1eCE9pP1wdc8ROUtkHLF2PfAqYdyYBz6EA=
>> AllowedIPs = 10.10.10.230/32
> 
> If that file is example.conf, you could set up a single device like this:
> 
> $ ip link add dev wg0 type wireguard
> $ wg setconf wg0 example.conf
> 
> That single netdev is now configured to communicate with several peers.
> 
> I hope this clarifies things. Let me know if you have further questions.

Yes. Makes sense. :-)

Thanks,
//richard


Re: [patch net-next 10/42] mlxsw: spectrum_router: Add basic ipv4 router initialization

2016-07-01 Thread David Ahern

On 7/1/16 8:04 AM, Jiri Pirko wrote:

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 05d5fcc..c2ac037 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -74,6 +74,8 @@

 #define MLXSW_SP_CELL_FACTOR 2 /* 2 * cell_size / (IPG + cell_size + 1) */

+#define MLXSW_SP_RIF_MAX 800


At most 800 RIFs can be created? Why 800?




Re: [PATCH v3] netfilter/nflog: nflog-range does not truncate packets (userspace)

2016-07-01 Thread Pablo Neira Ayuso
On Fri, Jun 24, 2016 at 04:42:31PM -0400, Vishwanath Pai wrote:
> Added tests to libxt_NFLOG.t for the new option --nflog-size
> 
> --
> 
> netfilter/nflog: nflog-range does not truncate packets
> 
> The option --nflog-range has never worked, but we cannot just fix this
> because users might be using this feature option and their behavior would
> change. Instead add a new option --nflog-size. This option works the same
> way nflog-range should have, and both of them are mutually exclusive. When
> someone uses --nflog-range we print a warning message informing them that
> this feature has no effect.
> 
> To indicate the kernel that the user has set --nflog-size we have to pass a
> new flag XT_NFLOG_F_COPY_LEN.
> 
> Also updated the man page to reflect the new option and added tests to
> extensions/libxt_NFLOG.t

Applied to iptables userspace, thanks.


Re: [PATCH 6/7] dt-bindings: net: bgmac: add bindings documentation for bgmac

2016-07-01 Thread Jon Mason
On Thu, Jun 30, 2016 at 10:56 PM, Rob Herring  wrote:
> On Thu, Jun 30, 2016 at 06:59:13PM -0400, Jon Mason wrote:
>> Signed-off-by: Jon Mason 
>> ---
>>  .../devicetree/bindings/net/brcm,bgmac-nsp.txt | 24 
>> ++
>>  1 file changed, 24 insertions(+)
>>  create mode 100644 Documentation/devicetree/bindings/net/brcm,bgmac-nsp.txt
>>
>> diff --git a/Documentation/devicetree/bindings/net/brcm,bgmac-nsp.txt 
>> b/Documentation/devicetree/bindings/net/brcm,bgmac-nsp.txt
>> new file mode 100644
>> index 000..022946c
>> --- /dev/null
>> +++ b/Documentation/devicetree/bindings/net/brcm,bgmac-nsp.txt
>> @@ -0,0 +1,24 @@
>> +Broadcom GMAC Ethernet Controller Device Tree Bindings
>> +-
>> +
>> +Required properties:
>> + - compatible:   "brcm,bgmac-nsp"
>
> Usually we do - order.

Thanks, I'll make the necessary changes and push out a v2.

>
>> + - reg:  Address and length of the GMAC registers,
>> + Address and length of the GMAC IDM registers
>> + - reg-names:Names of the registers.  Must have both "gmac_base" and
>> + "idm_base"
>> + - interrupts:   Interrupt number
>> +
>> +Optional properties:
>> +- mac-address:   See ethernet.txt file in the same directory
>> +
>> +Examples:
>> +
>> +gmac0: ethernet@18022000 {
>> + compatible = "brcm,bgmac-nsp";
>> + reg = <0x18022000 0x1000>,
>> +   <0x1811 0x1000>;
>> + reg-names = "gmac_base", "idm_base";
>> + interrupts = ;
>> + status = "disabled";
>> +};
>> --
>> 1.9.1
>>


[patch net-next 09/42] mlxsw: spectrum: Initialize ports at the end of init sequence

2016-07-01 Thread Jiri Pirko
From: Ido Schimmel 

During ports initialization a net device is registered for each
available port, which implies the port is usable. However, a port is
only usable after the different parts of the device (e.g. flooding,
buffers) are initialized. This is especially important now, when we must
initialize the router before the ports, as otherwise the device can't be
initialized.

Solve that by initializing the switch ports at the end of init sequence.

Also, remove an unnecessary warning about port up/down events, which
would otherwise be invoked whenever removing the driver, as ports are
removed before unregistering the listener for these events.

Signed-off-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 25 +++--
 1 file changed, 11 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 30fe0d2..b6f4dfb 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -2099,11 +2099,8 @@ static void mlxsw_sp_pude_event_func(const struct 
mlxsw_reg_info *reg,
 
local_port = mlxsw_reg_pude_local_port_get(pude_pl);
mlxsw_sp_port = mlxsw_sp->ports[local_port];
-   if (!mlxsw_sp_port) {
-   dev_warn(mlxsw_sp->bus_info->dev, "Port %d: Link event received 
for non-existent port\n",
-local_port);
+   if (!mlxsw_sp_port)
return;
-   }
 
status = mlxsw_reg_pude_oper_status_get(pude_pl);
if (status == MLXSW_PORT_OPER_STATUS_UP) {
@@ -2405,16 +2402,10 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
return err;
}
 
-   err = mlxsw_sp_ports_create(mlxsw_sp);
-   if (err) {
-   dev_err(mlxsw_sp->bus_info->dev, "Failed to create ports\n");
-   return err;
-   }
-
err = mlxsw_sp_event_register(mlxsw_sp, MLXSW_TRAP_ID_PUDE);
if (err) {
dev_err(mlxsw_sp->bus_info->dev, "Failed to register for PUDE 
events\n");
-   goto err_event_register;
+   return err;
}
 
err = mlxsw_sp_traps_init(mlxsw_sp);
@@ -2447,8 +2438,16 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
goto err_switchdev_init;
}
 
+   err = mlxsw_sp_ports_create(mlxsw_sp);
+   if (err) {
+   dev_err(mlxsw_sp->bus_info->dev, "Failed to create ports\n");
+   goto err_ports_create;
+   }
+
return 0;
 
+err_ports_create:
+   mlxsw_sp_switchdev_fini(mlxsw_sp);
 err_switchdev_init:
 err_lag_init:
mlxsw_sp_buffers_fini(mlxsw_sp);
@@ -2457,8 +2456,6 @@ err_flood_init:
mlxsw_sp_traps_fini(mlxsw_sp);
 err_rx_listener_register:
mlxsw_sp_event_unregister(mlxsw_sp, MLXSW_TRAP_ID_PUDE);
-err_event_register:
-   mlxsw_sp_ports_remove(mlxsw_sp);
return err;
 }
 
@@ -2466,11 +2463,11 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
 {
struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
 
+   mlxsw_sp_ports_remove(mlxsw_sp);
mlxsw_sp_switchdev_fini(mlxsw_sp);
mlxsw_sp_buffers_fini(mlxsw_sp);
mlxsw_sp_traps_fini(mlxsw_sp);
mlxsw_sp_event_unregister(mlxsw_sp, MLXSW_TRAP_ID_PUDE);
-   mlxsw_sp_ports_remove(mlxsw_sp);
WARN_ON(!list_empty(_sp->fids));
 }
 
-- 
2.5.5



[patch net-next 10/42] mlxsw: spectrum_router: Add basic ipv4 router initialization

2016-07-01 Thread Jiri Pirko
From: Ido Schimmel 

Create a skeleton router file and do basic HW initialization of router.

Signed-off-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 drivers/net/ethernet/mellanox/mlxsw/Makefile   |  2 +-
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c |  9 +++
 drivers/net/ethernet/mellanox/mlxsw/spectrum.h |  5 ++
 .../net/ethernet/mellanox/mlxsw/spectrum_router.c  | 68 ++
 4 files changed, 83 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c

diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile 
b/drivers/net/ethernet/mellanox/mlxsw/Makefile
index 9b5ebf8..ea05f8a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Makefile
+++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile
@@ -7,5 +7,5 @@ obj-$(CONFIG_MLXSW_SWITCHX2)+= mlxsw_switchx2.o
 mlxsw_switchx2-objs:= switchx2.o
 obj-$(CONFIG_MLXSW_SPECTRUM)   += mlxsw_spectrum.o
 mlxsw_spectrum-objs:= spectrum.o spectrum_buffers.o \
-  spectrum_switchdev.o
+  spectrum_switchdev.o spectrum_router.o
 mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB)+= spectrum_dcb.o
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index b6f4dfb..d011902 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -2438,6 +2438,12 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
goto err_switchdev_init;
}
 
+   err = mlxsw_sp_router_init(mlxsw_sp);
+   if (err) {
+   dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize 
router\n");
+   goto err_router_init;
+   }
+
err = mlxsw_sp_ports_create(mlxsw_sp);
if (err) {
dev_err(mlxsw_sp->bus_info->dev, "Failed to create ports\n");
@@ -2447,6 +2453,8 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
return 0;
 
 err_ports_create:
+   mlxsw_sp_router_fini(mlxsw_sp);
+err_router_init:
mlxsw_sp_switchdev_fini(mlxsw_sp);
 err_switchdev_init:
 err_lag_init:
@@ -2464,6 +2472,7 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
 
mlxsw_sp_ports_remove(mlxsw_sp);
+   mlxsw_sp_router_fini(mlxsw_sp);
mlxsw_sp_switchdev_fini(mlxsw_sp);
mlxsw_sp_buffers_fini(mlxsw_sp);
mlxsw_sp_traps_fini(mlxsw_sp);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 05d5fcc..c2ac037 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -74,6 +74,8 @@
 
 #define MLXSW_SP_CELL_FACTOR 2 /* 2 * cell_size / (IPG + cell_size + 1) */
 
+#define MLXSW_SP_RIF_MAX 800
+
 static inline u16 mlxsw_sp_pfc_delay_get(int mtu, u16 delay)
 {
delay = MLXSW_SP_BYTES_TO_CELLS(DIV_ROUND_UP(delay, BITS_PER_BYTE));
@@ -411,4 +413,7 @@ static inline void mlxsw_sp_port_dcb_fini(struct 
mlxsw_sp_port *mlxsw_sp_port)
 
 #endif
 
+int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp);
+void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp);
+
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
new file mode 100644
index 000..8d70496
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -0,0 +1,68 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Jiri Pirko 
+ * Copyright (c) 2016 Ido Schimmel 
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *notice, this list of conditions and the following disclaimer in the
+ *documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *contributors may be used to endorse or promote products derived from
+ *this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 

[patch net-next 36/42] mlxsw: spectrum: Define sizes of KVD areas

2016-07-01 Thread Jiri Pirko
From: Jiri Pirko 

Override the defaults and define the area sizes ourselves.

Signed-off-by: Jiri Pirko 
Reviewed-by: Ido Schimmel 
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 4 
 drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 4 
 2 files changed, 8 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 9bebb7a..c812513 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -2356,6 +2356,10 @@ static struct mlxsw_config_profile 
mlxsw_sp_config_profile = {
.max_ib_mc  = 0,
.used_max_pkey  = 1,
.max_pkey   = 0,
+   .used_kvd_sizes = 1,
+   .kvd_linear_size= MLXSW_SP_KVD_LINEAR_SIZE,
+   .kvd_hash_single_size   = MLXSW_SP_KVD_HASH_SINGLE_SIZE,
+   .kvd_hash_double_size   = MLXSW_SP_KVD_HASH_DOUBLE_SIZE,
.swid_config= {
{
.used_type  = 1,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 9c2a60f..f7d34d8 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -76,6 +76,10 @@
 #define MLXSW_SP_BYTES_TO_CELLS(b) DIV_ROUND_UP(b, MLXSW_SP_BYTES_PER_CELL)
 #define MLXSW_SP_CELLS_TO_BYTES(c) (c * MLXSW_SP_BYTES_PER_CELL)
 
+#define MLXSW_SP_KVD_LINEAR_SIZE 65536 /* entries */
+#define MLXSW_SP_KVD_HASH_SINGLE_SIZE 163840 /* entries */
+#define MLXSW_SP_KVD_HASH_DOUBLE_SIZE 32768 /* entries */
+
 /* Maximum delay buffer needed in case of PAUSE frames, in cells.
  * Assumes 100m cable and maximum MTU.
  */
-- 
2.5.5



[patch net-next 27/42] mlxsw: spectrum: Unsplit the vFID range

2016-07-01 Thread Jiri Pirko
From: Ido Schimmel 

Previous commit deprecated the vFIDs used to get traffic to the CPU
('port_vfids'). Thus, we now use the vFIDs as god intended and the
artificial split is no longer needed.

Rename functions and variables to reflect that.

Signed-off-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 55 +-
 drivers/net/ethernet/mellanox/mlxsw/spectrum.h |  4 +-
 2 files changed, 30 insertions(+), 29 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index bd8448a..e987a8a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -2235,7 +2235,7 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
mlxsw_sp->core = mlxsw_core;
mlxsw_sp->bus_info = mlxsw_bus_info;
INIT_LIST_HEAD(_sp->fids);
-   INIT_LIST_HEAD(_sp->br_vfids.list);
+   INIT_LIST_HEAD(_sp->vfids.list);
INIT_LIST_HEAD(_sp->br_mids.list);
 
err = mlxsw_sp_base_mac_get(mlxsw_sp);
@@ -2320,6 +2320,7 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
mlxsw_sp_buffers_fini(mlxsw_sp);
mlxsw_sp_traps_fini(mlxsw_sp);
mlxsw_sp_event_unregister(mlxsw_sp, MLXSW_TRAP_ID_PUDE);
+   WARN_ON(!list_empty(_sp->vfids.list));
WARN_ON(!list_empty(_sp->fids));
for (i = 0; i < MLXSW_SP_RIF_MAX; i++)
WARN_ON_ONCE(mlxsw_sp->rifs[i]);
@@ -3373,12 +3374,12 @@ static int mlxsw_sp_netdevice_lag_event(struct 
net_device *lag_dev,
 }
 
 static struct mlxsw_sp_fid *
-mlxsw_sp_br_vfid_find(const struct mlxsw_sp *mlxsw_sp,
- const struct net_device *br_dev)
+mlxsw_sp_vfid_find(const struct mlxsw_sp *mlxsw_sp,
+  const struct net_device *br_dev)
 {
struct mlxsw_sp_fid *f;
 
-   list_for_each_entry(f, _sp->br_vfids.list, list) {
+   list_for_each_entry(f, _sp->vfids.list, list) {
if (f->dev == br_dev)
return f;
}
@@ -3386,9 +3387,9 @@ mlxsw_sp_br_vfid_find(const struct mlxsw_sp *mlxsw_sp,
return NULL;
 }
 
-static u16 mlxsw_sp_avail_br_vfid_get(const struct mlxsw_sp *mlxsw_sp)
+static u16 mlxsw_sp_avail_vfid_get(const struct mlxsw_sp *mlxsw_sp)
 {
-   return find_first_zero_bit(mlxsw_sp->br_vfids.mapped,
+   return find_first_zero_bit(mlxsw_sp->vfids.mapped,
   MLXSW_SP_VFID_MAX);
 }
 
@@ -3400,17 +3401,17 @@ static int mlxsw_sp_vfid_op(struct mlxsw_sp *mlxsw_sp, 
u16 fid, bool create)
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl);
 }
 
-static void mlxsw_sp_vport_br_vfid_leave(struct mlxsw_sp_port *mlxsw_sp_vport);
+static void mlxsw_sp_vport_vfid_leave(struct mlxsw_sp_port *mlxsw_sp_vport);
 
-static struct mlxsw_sp_fid *mlxsw_sp_br_vfid_create(struct mlxsw_sp *mlxsw_sp,
-   struct net_device *br_dev)
+static struct mlxsw_sp_fid *mlxsw_sp_vfid_create(struct mlxsw_sp *mlxsw_sp,
+struct net_device *br_dev)
 {
struct device *dev = mlxsw_sp->bus_info->dev;
struct mlxsw_sp_fid *f;
u16 vfid, fid;
int err;
 
-   vfid = mlxsw_sp_avail_br_vfid_get(mlxsw_sp);
+   vfid = mlxsw_sp_avail_vfid_get(mlxsw_sp);
if (vfid == MLXSW_SP_VFID_MAX) {
dev_err(dev, "No available vFIDs\n");
return ERR_PTR(-ERANGE);
@@ -3427,12 +3428,12 @@ static struct mlxsw_sp_fid 
*mlxsw_sp_br_vfid_create(struct mlxsw_sp *mlxsw_sp,
if (!f)
goto err_allocate_vfid;
 
-   f->leave = mlxsw_sp_vport_br_vfid_leave;
+   f->leave = mlxsw_sp_vport_vfid_leave;
f->fid = fid;
f->dev = br_dev;
 
-   list_add(>list, _sp->br_vfids.list);
-   set_bit(vfid, mlxsw_sp->br_vfids.mapped);
+   list_add(>list, _sp->vfids.list);
+   set_bit(vfid, mlxsw_sp->vfids.mapped);
 
return f;
 
@@ -3441,12 +3442,12 @@ err_allocate_vfid:
return ERR_PTR(-ENOMEM);
 }
 
-static void mlxsw_sp_br_vfid_destroy(struct mlxsw_sp *mlxsw_sp,
-struct mlxsw_sp_fid *f)
+static void mlxsw_sp_vfid_destroy(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fid *f)
 {
u16 vfid = mlxsw_sp_fid_to_vfid(f->fid);
 
-   clear_bit(vfid, mlxsw_sp->br_vfids.mapped);
+   clear_bit(vfid, mlxsw_sp->vfids.mapped);
list_del(>list);
 
mlxsw_sp_vfid_op(mlxsw_sp, f->fid, false);
@@ -3464,15 +3465,15 @@ static int mlxsw_sp_vport_fid_map(struct mlxsw_sp_port 
*mlxsw_sp_vport, u16 fid,
vid);
 }
 
-static int mlxsw_sp_vport_br_vfid_join(struct mlxsw_sp_port *mlxsw_sp_vport,
-  struct net_device *br_dev)
+static int 

[patch net-next 40/42] mlxsw: spectrum_router: Implement next-hop routing

2016-07-01 Thread Jiri Pirko
From: Jiri Pirko 

Implement next-hop routing offload including ECMP. To make it possible,
introduce next-hop group entity. This entity keeps track of resolved
neighbours and updates HW adjacency table accordingly. Note that HW
next-hops are stored in this adjacency table, in form of MAC.

Signed-off-by: Jiri Pirko 
Reviewed-by: Ido Schimmel 
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.h |   1 +
 .../net/ethernet/mellanox/mlxsw/spectrum_router.c  | 493 -
 2 files changed, 492 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index e781128..0fe6051 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -222,6 +222,7 @@ struct mlxsw_sp_router {
struct delayed_work dw;
unsigned long interval; /* ms */
} neighs_update;
+   struct list_head nexthop_group_list;
 };
 
 struct mlxsw_sp {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index ed0e6c0..dc13178 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -117,6 +117,8 @@ enum mlxsw_sp_fib_entry_type {
MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
 };
 
+struct mlxsw_sp_nexthop_group;
+
 struct mlxsw_sp_fib_entry {
struct rhash_head ht_node;
struct mlxsw_sp_fib_key key;
@@ -124,6 +126,8 @@ struct mlxsw_sp_fib_entry {
u8 added:1;
u16 rif; /* used for action local */
struct mlxsw_sp_vr *vr;
+   struct list_head nexthop_group_node;
+   struct mlxsw_sp_nexthop_group *nh_group;
 };
 
 struct mlxsw_sp_fib {
@@ -563,6 +567,9 @@ struct mlxsw_sp_neigh_entry {
struct delayed_work dw;
struct mlxsw_sp_port *mlxsw_sp_port;
unsigned char ha[ETH_ALEN];
+   struct list_head nexthop_list; /* list of nexthops using
+   * this neigh entry
+   */
 };
 
 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
@@ -606,6 +613,7 @@ mlxsw_sp_neigh_entry_create(const void *addr, size_t 
addr_len,
neigh_entry->rif = rif;
neigh_entry->n = n;
INIT_DELAYED_WORK(_entry->dw, mlxsw_sp_router_neigh_update_hw);
+   INIT_LIST_HEAD(_entry->nexthop_list);
return neigh_entry;
 }
 
@@ -808,6 +816,11 @@ static void mlxsw_sp_router_neighs_update_work(struct 
work_struct *work)
mlxsw_sp_router_neighs_update_work_schedule(mlxsw_sp);
 }
 
+static void
+mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_neigh_entry *neigh_entry,
+ bool removing);
+
 static void mlxsw_sp_router_neigh_update_hw(struct work_struct *work)
 {
struct mlxsw_sp_neigh_entry *neigh_entry =
@@ -849,6 +862,7 @@ static void mlxsw_sp_router_neigh_update_hw(struct 
work_struct *work)
} else {
neigh_entry->offloaded = true;
}
+   mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, false);
} else if (removing) {
mlxsw_reg_rauht_pack4(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_DELETE,
  neigh_entry->rif,
@@ -861,6 +875,7 @@ static void mlxsw_sp_router_neigh_update_hw(struct 
work_struct *work)
} else {
neigh_entry->offloaded = false;
}
+   mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, true);
}
 
neigh_release(n);
@@ -978,6 +993,434 @@ static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
rhashtable_destroy(_sp->router.neigh_ht);
 }
 
+struct mlxsw_sp_nexthop {
+   struct list_head neigh_list_node; /* member of neigh entry list */
+   struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group
+   * this belongs to
+   */
+   u8 should_offload:1, /* set indicates this neigh is connected and
+ * should be put to KVD linear area of this group.
+ */
+  offloaded:1, /* set in case the neigh is actually put into
+* KVD linear area of this group.
+*/
+  update:1; /* set indicates that MAC of this neigh should be
+ * updated in HW
+ */
+   struct mlxsw_sp_neigh_entry *neigh_entry;
+};
+
+struct mlxsw_sp_nexthop_group {
+   struct list_head list; /* node in mlxsw->router.nexthop_group_list */
+   struct list_head fib_list; /* list of fib entries that use this group */
+   u8 adj_index_valid:1;
+   u32 adj_index;
+   u16 ecmp_size;
+  

[patch net-next 35/42] mlxsw: Add KVD sizes configuration into profile

2016-07-01 Thread Jiri Pirko
From: Jiri Pirko 

Up until now we only used hash-based tables in the device, but we are
going to use the linear table for remote routes adjacency lists.

Add the configuration fields that control the size of the linear table.

Signed-off-by: Jiri Pirko 
Reviewed-by: Ido Schimmel 
---
 drivers/net/ethernet/mellanox/mlxsw/cmd.h  | 43 ++
 drivers/net/ethernet/mellanox/mlxsw/core.h |  6 -
 drivers/net/ethernet/mellanox/mlxsw/pci.c  | 14 ++
 3 files changed, 62 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/cmd.h 
b/drivers/net/ethernet/mellanox/mlxsw/cmd.h
index cd63b82..f9cd6e3 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/cmd.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/cmd.h
@@ -607,6 +607,24 @@ MLXSW_ITEM32(cmd_mbox, config_profile,
  */
 MLXSW_ITEM32(cmd_mbox, config_profile, set_ar_sec, 0x0C, 15, 1);
 
+/* cmd_mbox_config_set_kvd_linear_size
+ * Capability bit. Setting a bit to 1 configures the profile
+ * according to the mailbox contents.
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, set_kvd_linear_size, 0x0C, 24, 1);
+
+/* cmd_mbox_config_set_kvd_hash_single_size
+ * Capability bit. Setting a bit to 1 configures the profile
+ * according to the mailbox contents.
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, set_kvd_hash_single_size, 0x0C, 25, 1);
+
+/* cmd_mbox_config_set_kvd_hash_double_size
+ * Capability bit. Setting a bit to 1 configures the profile
+ * according to the mailbox contents.
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, set_kvd_hash_double_size, 0x0C, 26, 1);
+
 /* cmd_mbox_config_profile_max_vepa_channels
  * Maximum number of VEPA channels per port (0 through 16)
  * 0 - multi-channel VEPA is disabled
@@ -733,6 +751,31 @@ MLXSW_ITEM32(cmd_mbox, config_profile, 
adaptive_routing_group_cap, 0x4C, 0, 16);
  */
 MLXSW_ITEM32(cmd_mbox, config_profile, arn, 0x50, 31, 1);
 
+/* cmd_mbox_config_kvd_linear_size
+ * KVD Linear Size
+ * Valid for Spectrum only
+ * Allowed values are 128*N where N=0 or higher
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, kvd_linear_size, 0x54, 0, 24);
+
+/* cmd_mbox_config_kvd_hash_single_size
+ * KVD Hash single-entries size
+ * Valid for Spectrum only
+ * Allowed values are 128*N where N=0 or higher
+ * Must be greater or equal to cap_min_kvd_hash_single_size
+ * Must be smaller or equal to cap_kvd_size - kvd_linear_size
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, kvd_hash_single_size, 0x58, 0, 24);
+
+/* cmd_mbox_config_kvd_hash_double_size
+ * KVD Hash double-entries size (units of single-size entries)
+ * Valid for Spectrum only
+ * Allowed values are 128*N where N=0 or higher
+ * Must be either 0 or greater or equal to cap_min_kvd_hash_double_size
+ * Must be smaller or equal to cap_kvd_size - kvd_linear_size
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, kvd_hash_double_size, 0x5C, 0, 24);
+
 /* cmd_mbox_config_profile_swid_config_mask
  * Modify Switch Partition Configuration mask. When set, the configu-
  * ration value for the Switch Partition are taken from the mailbox.
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h 
b/drivers/net/ethernet/mellanox/mlxsw/core.h
index 436bc49..2fe385c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.h
@@ -190,7 +190,8 @@ struct mlxsw_config_profile {
used_max_ib_mc:1,
used_max_pkey:1,
used_ar_sec:1,
-   used_adaptive_routing_group_cap:1;
+   used_adaptive_routing_group_cap:1,
+   used_kvd_sizes:1;
u8  max_vepa_channels;
u16 max_lag;
u16 max_port_per_lag;
@@ -211,6 +212,9 @@ struct mlxsw_config_profile {
u8  ar_sec;
u16 adaptive_routing_group_cap;
u8  arn;
+   u32 kvd_linear_size;
+   u32 kvd_hash_single_size;
+   u32 kvd_hash_double_size;
struct mlxsw_swid_config swid_config[MLXSW_CONFIG_PROFILE_SWID_COUNT];
 };
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c 
b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index 7f4173c..ddbc9f2 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -1255,6 +1255,20 @@ static int mlxsw_pci_config_profile(struct mlxsw_pci 
*mlxsw_pci, char *mbox,
mlxsw_cmd_mbox_config_profile_adaptive_routing_group_cap_set(
mbox, profile->adaptive_routing_group_cap);
}
+   if (profile->used_kvd_sizes) {
+   mlxsw_cmd_mbox_config_profile_set_kvd_linear_size_set(
+   mbox, 1);
+   mlxsw_cmd_mbox_config_profile_kvd_linear_size_set(
+   mbox, profile->kvd_linear_size);
+   mlxsw_cmd_mbox_config_profile_set_kvd_hash_single_size_set(
+   mbox, 1);
+   mlxsw_cmd_mbox_config_profile_kvd_hash_single_size_set(
+   

[patch net-next 30/42] mlxsw: spectrum_router: Add private neigh table

2016-07-01 Thread Jiri Pirko
From: Jiri Pirko 

We need to hold some private data for every neigh entry. It would be
possible to do it using neigh_priv_len/ndo_neigh_construct/
ndo_neigh_destroy however only for the port device itself. That would not
work for stacked devices like bridge/team/bond. So introduce a private
neigh table. Hook onto ndos neigh_construct/destroy and add/remove
table entry according to that.

Signed-off-by: Jiri Pirko 
Reviewed-by: Ido Schimmel 
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c |   2 +
 drivers/net/ethernet/mellanox/mlxsw/spectrum.h |   6 +
 .../net/ethernet/mellanox/mlxsw/spectrum_router.c  | 146 -
 3 files changed, 153 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 7b2b741b..9bebb7a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -803,6 +803,8 @@ static const struct net_device_ops mlxsw_sp_port_netdev_ops 
= {
.ndo_get_stats64= mlxsw_sp_port_get_stats64,
.ndo_vlan_rx_add_vid= mlxsw_sp_port_add_vid,
.ndo_vlan_rx_kill_vid   = mlxsw_sp_port_kill_vid,
+   .ndo_neigh_construct= mlxsw_sp_router_neigh_construct,
+   .ndo_neigh_destroy  = mlxsw_sp_router_neigh_destroy,
.ndo_fdb_add= switchdev_port_fdb_add,
.ndo_fdb_del= switchdev_port_fdb_del,
.ndo_fdb_dump   = switchdev_port_fdb_dump,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 958e821..734c5ba 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -39,6 +39,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -212,6 +213,7 @@ struct mlxsw_sp_vr {
 struct mlxsw_sp_router {
struct mlxsw_sp_lpm_tree lpm_trees[MLXSW_SP_LPM_TREE_COUNT];
struct mlxsw_sp_vr vrs[MLXSW_SP_VIRTUAL_ROUTER_MAX];
+   struct rhashtable neigh_ht;
 };
 
 struct mlxsw_sp {
@@ -524,5 +526,9 @@ int mlxsw_sp_router_fib4_add(struct mlxsw_sp_port 
*mlxsw_sp_port,
 struct switchdev_trans *trans);
 int mlxsw_sp_router_fib4_del(struct mlxsw_sp_port *mlxsw_sp_port,
 const struct switchdev_obj_ipv4_fib *fib4);
+int mlxsw_sp_router_neigh_construct(struct net_device *dev,
+   struct neighbour *n);
+void mlxsw_sp_router_neigh_destroy(struct net_device *dev,
+  struct neighbour *n);
 
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 7e3992a..90d382a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -38,6 +38,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 #include "spectrum.h"
 #include "core.h"
@@ -544,6 +546,147 @@ static void mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
}
 }
 
+struct mlxsw_sp_neigh_key {
+   unsigned char addr[sizeof(struct in6_addr)];
+   struct net_device *dev;
+};
+
+struct mlxsw_sp_neigh_entry {
+   struct rhash_head ht_node;
+   struct mlxsw_sp_neigh_key key;
+   u16 rif;
+   struct neighbour *n;
+};
+
+static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
+   .key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
+   .head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
+   .key_len = sizeof(struct mlxsw_sp_neigh_key),
+};
+
+static int
+mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
+   struct mlxsw_sp_neigh_entry *neigh_entry)
+{
+   return rhashtable_insert_fast(_sp->router.neigh_ht,
+ _entry->ht_node,
+ mlxsw_sp_neigh_ht_params);
+}
+
+static void
+mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
+   struct mlxsw_sp_neigh_entry *neigh_entry)
+{
+   rhashtable_remove_fast(_sp->router.neigh_ht,
+  _entry->ht_node,
+  mlxsw_sp_neigh_ht_params);
+}
+
+static struct mlxsw_sp_neigh_entry *
+mlxsw_sp_neigh_entry_create(const void *addr, size_t addr_len,
+   struct net_device *dev, u16 rif,
+   struct neighbour *n)
+{
+   struct mlxsw_sp_neigh_entry *neigh_entry;
+
+   neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_ATOMIC);
+   if (!neigh_entry)
+   return NULL;
+   memcpy(neigh_entry->key.addr, addr, addr_len);
+   neigh_entry->key.dev = dev;
+   neigh_entry->rif = rif;
+   neigh_entry->n = n;
+   return neigh_entry;
+}
+
+static void
+mlxsw_sp_neigh_entry_destroy(struct 

Re: [patch net-next 02/42] net: introduce default neigh_construct/destroy ndo calls for L2 upper devices

2016-07-01 Thread David Ahern

On 7/1/16 8:04 AM, Jiri Pirko wrote:

From: Jiri Pirko 

L2 upper device needs to propagate neigh_construct/destroy calls down to
lower devices. Do this by defining default ndo functions and use them in
team, bond, bridge and vlan.

Signed-off-by: Jiri Pirko 
Reviewed-by: Ido Schimmel 
---
 drivers/net/bonding/bond_main.c |  2 ++
 drivers/net/team/team.c |  2 ++
 include/linux/netdevice.h   |  4 
 net/8021q/vlan_dev.c|  2 ++
 net/bridge/br_device.c  |  2 ++
 net/core/dev.c  | 32 
 6 files changed, 44 insertions(+)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 90157e2..480d73a 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -4137,6 +4137,8 @@ static const struct net_device_ops bond_netdev_ops = {
.ndo_add_slave  = bond_enslave,
.ndo_del_slave  = bond_release,
.ndo_fix_features   = bond_fix_features,
+   .ndo_neigh_construct= netdev_default_l2upper_neigh_construct,
+   .ndo_neigh_destroy  = netdev_default_l2upper_neigh_destroy,
.ndo_bridge_setlink = switchdev_port_bridge_setlink,
.ndo_bridge_getlink = switchdev_port_bridge_getlink,
.ndo_bridge_dellink = switchdev_port_bridge_dellink,
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index f9eebea..a380649 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -2002,6 +2002,8 @@ static const struct net_device_ops team_netdev_ops = {
.ndo_add_slave  = team_add_slave,
.ndo_del_slave  = team_del_slave,
.ndo_fix_features   = team_fix_features,
+   .ndo_neigh_construct= netdev_default_l2upper_neigh_construct,
+   .ndo_neigh_destroy  = netdev_default_l2upper_neigh_destroy,
.ndo_change_carrier = team_change_carrier,
.ndo_bridge_setlink = switchdev_port_bridge_setlink,
.ndo_bridge_getlink = switchdev_port_bridge_getlink,
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f126119..fac5132 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3826,6 +3826,10 @@ void *netdev_lower_dev_get_private(struct net_device 
*dev,
   struct net_device *lower_dev);
 void netdev_lower_state_changed(struct net_device *lower_dev,
void *lower_state_info);
+int netdev_default_l2upper_neigh_construct(struct net_device *dev,
+  struct neighbour *n);
+void netdev_default_l2upper_neigh_destroy(struct net_device *dev,
+ struct neighbour *n);

 /* RSS keys are 40 or 52 bytes long */
 #define NETDEV_RSS_KEY_LEN 52
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 86ae75b..c8f422c 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -790,6 +790,8 @@ static const struct net_device_ops vlan_netdev_ops = {
.ndo_netpoll_cleanup= vlan_dev_netpoll_cleanup,
 #endif
.ndo_fix_features   = vlan_dev_fix_features,
+   .ndo_neigh_construct= netdev_default_l2upper_neigh_construct,
+   .ndo_neigh_destroy  = netdev_default_l2upper_neigh_destroy,
.ndo_fdb_add= switchdev_port_fdb_add,
.ndo_fdb_del= switchdev_port_fdb_del,
.ndo_fdb_dump   = switchdev_port_fdb_dump,
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 0c39e0f..8eecd0e 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -349,6 +349,8 @@ static const struct net_device_ops br_netdev_ops = {
.ndo_add_slave   = br_add_slave,
.ndo_del_slave   = br_del_slave,
.ndo_fix_features= br_fix_features,
+   .ndo_neigh_construct = netdev_default_l2upper_neigh_construct,
+   .ndo_neigh_destroy   = netdev_default_l2upper_neigh_destroy,
.ndo_fdb_add = br_fdb_add,
.ndo_fdb_del = br_fdb_delete,
.ndo_fdb_dump= br_fdb_dump,
diff --git a/net/core/dev.c b/net/core/dev.c
index aba10d2..eb13647 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6041,6 +6041,38 @@ void netdev_lower_state_changed(struct net_device 
*lower_dev,
 }
 EXPORT_SYMBOL(netdev_lower_state_changed);

+int netdev_default_l2upper_neigh_construct(struct net_device *dev,
+  struct neighbour *n)
+{
+   struct net_device *lower_dev;
+   struct list_head *iter;
+   int err;
+
+   netdev_for_each_lower_dev(dev, lower_dev, iter) {
+   if (!lower_dev->netdev_ops->ndo_neigh_construct)
+   continue;
+   err = lower_dev->netdev_ops->ndo_neigh_construct(lower_dev, n);
+   if (err)
+   return err;



Re: [RFC] WireGuard: next generation secure network tunnel

2016-07-01 Thread Jason A. Donenfeld
Hi Richard,

On Fri, Jul 1, 2016 at 1:42 PM, Richard Weinberger
 wrote:
> So every logical tunnel will allocate a new net device?
> Doesn't this scale badly? I have ipsec alike setups
> with many, many road warriors in mind.

No, this isn't the case. Each net device has multiple peers. Check out
the example config on the website, pasted here for convenience:

> [Interface]
> PrivateKey = yAnz5TF+lXXJte14tji3zlMNq+hd2rYUIgJBgB3fBmk=
> ListenPort = 41414
>
> [Peer]
> PublicKey = xTIBA5rboUvnH4htodjb6e697QjLERt1NAB4mZqp8Dg=
> AllowedIPs = 10.192.122.3/32, 10.192.124.1/24
>
> [Peer]
> PublicKey = TrMvSoP4jYQlY6RIzBgbssQqY3vxI2Pi+y71lOWWXX0=
> AllowedIPs = 10.192.122.4/32, 192.168.0.0/16
>
> [Peer]
> PublicKey = gN65BkIKy1eCE9pP1wdc8ROUtkHLF2PfAqYdyYBz6EA=
> AllowedIPs = 10.10.10.230/32

If that file is example.conf, you could set up a single device like this:

$ ip link add dev wg0 type wireguard
$ wg setconf wg0 example.conf

That single netdev is now configured to communicate with several peers.

I hope this clarifies things. Let me know if you have further questions.

Regards,
Jason


[patch net-next 24/42] mlxsw: spectrum: Add couple of lower device helper functions

2016-07-01 Thread Jiri Pirko
From: Jiri Pirko 

Add functions that iterate over lower devices and find port device.
As a dependency add netdev_for_each_all_lower_dev and
netdev_for_each_all_lower_dev_rcu macro with
netdev_all_lower_get_next and netdev_all_lower_get_next_rcu shelpers.

Also, add functions to return mlxsw struct according to lower device
found and mlxsw_port struct with a reference to lower device.

Signed-off-by: Jiri Pirko 
Reviewed-by: Ido Schimmel 
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 65 --
 drivers/net/ethernet/mellanox/mlxsw/spectrum.h |  3 ++
 include/linux/netdevice.h  | 18 +++
 net/core/dev.c | 46 ++
 4 files changed, 127 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index f0799898..f54fd6a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -2567,6 +2567,66 @@ static struct mlxsw_driver mlxsw_sp_driver = {
.profile= _sp_config_profile,
 };
 
+static bool mlxsw_sp_port_dev_check(const struct net_device *dev)
+{
+   return dev->netdev_ops == _sp_port_netdev_ops;
+}
+
+static struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find(struct net_device 
*dev)
+{
+   struct net_device *lower_dev;
+   struct list_head *iter;
+
+   if (mlxsw_sp_port_dev_check(dev))
+   return netdev_priv(dev);
+
+   netdev_for_each_all_lower_dev(dev, lower_dev, iter) {
+   if (mlxsw_sp_port_dev_check(lower_dev))
+   return netdev_priv(lower_dev);
+   }
+   return NULL;
+}
+
+static struct mlxsw_sp *mlxsw_sp_lower_get(struct net_device *dev)
+{
+   struct mlxsw_sp_port *mlxsw_sp_port;
+
+   mlxsw_sp_port = mlxsw_sp_port_dev_lower_find(dev);
+   return mlxsw_sp_port ? mlxsw_sp_port->mlxsw_sp : NULL;
+}
+
+static struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find_rcu(struct 
net_device *dev)
+{
+   struct net_device *lower_dev;
+   struct list_head *iter;
+
+   if (mlxsw_sp_port_dev_check(dev))
+   return netdev_priv(dev);
+
+   netdev_for_each_all_lower_dev_rcu(dev, lower_dev, iter) {
+   if (mlxsw_sp_port_dev_check(lower_dev))
+   return netdev_priv(lower_dev);
+   }
+   return NULL;
+}
+
+struct mlxsw_sp_port *mlxsw_sp_port_lower_dev_hold(struct net_device *dev)
+{
+   struct mlxsw_sp_port *mlxsw_sp_port;
+
+   rcu_read_lock();
+   mlxsw_sp_port = mlxsw_sp_port_dev_lower_find_rcu(dev);
+   if (mlxsw_sp_port)
+   dev_hold(mlxsw_sp_port->dev);
+   rcu_read_unlock();
+   return mlxsw_sp_port;
+}
+
+void mlxsw_sp_port_dev_put(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+   dev_put(mlxsw_sp_port->dev);
+}
+
 static bool mlxsw_sp_lag_port_fid_member(struct mlxsw_sp_port *lag_port,
 u16 fid)
 {
@@ -2647,11 +2707,6 @@ int mlxsw_sp_port_fdb_flush(struct mlxsw_sp_port 
*mlxsw_sp_port, u16 fid)
return mlxsw_sp_port_fdb_flush_by_port_fid(mlxsw_sp_port, fid);
 }
 
-static bool mlxsw_sp_port_dev_check(const struct net_device *dev)
-{
-   return dev->netdev_ops == _sp_port_netdev_ops;
-}
-
 static bool mlxsw_sp_master_bridge_check(struct mlxsw_sp *mlxsw_sp,
 struct net_device *br_dev)
 {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 877a879..fefff25 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -292,6 +292,9 @@ struct mlxsw_sp_port {
struct list_head vports_list;
 };
 
+struct mlxsw_sp_port *mlxsw_sp_port_lower_dev_hold(struct net_device *dev);
+void mlxsw_sp_port_dev_put(struct mlxsw_sp_port *mlxsw_sp_port);
+
 static inline bool
 mlxsw_sp_port_is_pause_en(const struct mlxsw_sp_port *mlxsw_sp_port)
 {
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index fac5132..02c5254 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3805,12 +3805,30 @@ void *netdev_lower_get_next_private_rcu(struct 
net_device *dev,
 
 void *netdev_lower_get_next(struct net_device *dev,
struct list_head **iter);
+
 #define netdev_for_each_lower_dev(dev, ldev, iter) \
for (iter = (dev)->adj_list.lower.next, \
 ldev = netdev_lower_get_next(dev, &(iter)); \
 ldev; \
 ldev = netdev_lower_get_next(dev, &(iter)))
 
+struct net_device *netdev_all_lower_get_next(struct net_device *dev,
+struct list_head **iter);
+struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev,
+struct 

[patch net-next 06/42] mlxsw: spectrum: Sync PVID vPort LAG status

2016-07-01 Thread Jiri Pirko
From: Ido Schimmel 

When VLAN devices are created on top of LAG, their underlying vPorts are
configured correctly with LAG membership.

However, the PVID vPort is implicit and already present when the port
netdev is put under LAG, so its LAG membership is never set. Set it
correctly when joining / leaving LAG.

This didn't matter until now, but we are going to introduce support for
router interfaces (RIFs), which need to take into account LAG membership.

Signed-off-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 31 ++
 1 file changed, 31 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 4f67a8c..f276c45 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -2795,6 +2795,32 @@ static int mlxsw_sp_port_lag_index_get(struct mlxsw_sp 
*mlxsw_sp,
return -EBUSY;
 }
 
+static void
+mlxsw_sp_port_pvid_vport_lag_join(struct mlxsw_sp_port *mlxsw_sp_port,
+ u16 lag_id)
+{
+   struct mlxsw_sp_port *mlxsw_sp_vport;
+
+   mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, 1);
+   if (WARN_ON(!mlxsw_sp_vport))
+   return;
+
+   mlxsw_sp_vport->lag_id = lag_id;
+   mlxsw_sp_vport->lagged = 1;
+}
+
+static void
+mlxsw_sp_port_pvid_vport_lag_leave(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+   struct mlxsw_sp_port *mlxsw_sp_vport;
+
+   mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, 1);
+   if (WARN_ON(!mlxsw_sp_vport))
+   return;
+
+   mlxsw_sp_vport->lagged = 0;
+}
+
 static int mlxsw_sp_port_lag_join(struct mlxsw_sp_port *mlxsw_sp_port,
  struct net_device *lag_dev)
 {
@@ -2830,6 +2856,9 @@ static int mlxsw_sp_port_lag_join(struct mlxsw_sp_port 
*mlxsw_sp_port,
mlxsw_sp_port->lag_id = lag_id;
mlxsw_sp_port->lagged = 1;
lag->ref_count++;
+
+   mlxsw_sp_port_pvid_vport_lag_join(mlxsw_sp_port, lag_id);
+
return 0;
 
 err_col_port_enable:
@@ -2867,6 +2896,8 @@ static void mlxsw_sp_port_lag_leave(struct mlxsw_sp_port 
*mlxsw_sp_port,
 mlxsw_sp_port->local_port);
mlxsw_sp_port->lagged = 0;
lag->ref_count--;
+
+   mlxsw_sp_port_pvid_vport_lag_leave(mlxsw_sp_port);
 }
 
 static int mlxsw_sp_lag_dist_port_add(struct mlxsw_sp_port *mlxsw_sp_port,
-- 
2.5.5



[patch net-next 21/42] mlxsw: spectrum_router: Add virtual router management

2016-07-01 Thread Jiri Pirko
From: Jiri Pirko 

Virtual router is a construct used inside HW. In this implementation
we map kernel tables to virtual routers one to one. Introduce management
logic to create virtual routers when needed and destroy in case they are
no longer in use. According to that, call into LPM tree management.
Each virtual router is always bound to one LPM tree.

Signed-off-by: Jiri Pirko 
Reviewed-by: Ido Schimmel 
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.h |  14 ++
 .../net/ethernet/mellanox/mlxsw/spectrum_router.c  | 228 +
 2 files changed, 242 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 5db57a7..5b40dfc 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -65,6 +65,8 @@
 #define MLXSW_SP_LPM_TREE_MAX 22
 #define MLXSW_SP_LPM_TREE_COUNT (MLXSW_SP_LPM_TREE_MAX - MLXSW_SP_LPM_TREE_MIN)
 
+#define MLXSW_SP_VIRTUAL_ROUTER_MAX 256
+
 #define MLXSW_SP_PORT_BASE_SPEED 25000 /* Mb/s */
 
 #define MLXSW_SP_BYTES_PER_CELL 96
@@ -183,8 +185,20 @@ struct mlxsw_sp_lpm_tree {
struct mlxsw_sp_prefix_usage prefix_usage;
 };
 
+struct mlxsw_sp_fib;
+
+struct mlxsw_sp_vr {
+   u16 id; /* virtual router ID */
+   bool used;
+   enum mlxsw_sp_l3proto proto;
+   u32 tb_id; /* kernel fib table id */
+   struct mlxsw_sp_lpm_tree *lpm_tree;
+   struct mlxsw_sp_fib *fib;
+};
+
 struct mlxsw_sp_router {
struct mlxsw_sp_lpm_tree lpm_trees[MLXSW_SP_LPM_TREE_COUNT];
+   struct mlxsw_sp_vr vrs[MLXSW_SP_VIRTUAL_ROUTER_MAX];
 };
 
 struct mlxsw_sp {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 73fd85c..11dab74 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -47,12 +47,46 @@
for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
 
 static bool
+mlxsw_sp_prefix_usage_subset(struct mlxsw_sp_prefix_usage *prefix_usage1,
+struct mlxsw_sp_prefix_usage *prefix_usage2)
+{
+   unsigned char prefix;
+
+   mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage1) {
+   if (!test_bit(prefix, prefix_usage2->b))
+   return false;
+   }
+   return true;
+}
+
+static bool
 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
 struct mlxsw_sp_prefix_usage *prefix_usage2)
 {
return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
 }
 
+static bool
+mlxsw_sp_prefix_usage_none(struct mlxsw_sp_prefix_usage *prefix_usage)
+{
+   struct mlxsw_sp_prefix_usage prefix_usage_none = {{ 0 } };
+
+   return mlxsw_sp_prefix_usage_eq(prefix_usage, _usage_none);
+}
+
+static void
+mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
+ struct mlxsw_sp_prefix_usage *prefix_usage2)
+{
+   memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
+}
+
+static void
+mlxsw_sp_prefix_usage_zero(struct mlxsw_sp_prefix_usage *prefix_usage)
+{
+   memset(prefix_usage, 0, sizeof(*prefix_usage));
+}
+
 static void
 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
  unsigned char prefix_len)
@@ -307,6 +341,199 @@ static void mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
}
 }
 
+static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
+{
+   struct mlxsw_sp_vr *vr;
+   int i;
+
+   for (i = 0; i < MLXSW_SP_VIRTUAL_ROUTER_MAX; i++) {
+   vr = _sp->router.vrs[i];
+   if (!vr->used)
+   return vr;
+   }
+   return NULL;
+}
+
+static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
+struct mlxsw_sp_vr *vr)
+{
+   char raltb_pl[MLXSW_REG_RALTB_LEN];
+
+   mlxsw_reg_raltb_pack(raltb_pl, vr->id, vr->proto, vr->lpm_tree->id);
+   return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
+}
+
+static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
+  struct mlxsw_sp_vr *vr)
+{
+   char raltb_pl[MLXSW_REG_RALTB_LEN];
+
+   /* Bind to tree 0 which is default */
+   mlxsw_reg_raltb_pack(raltb_pl, vr->id, vr->proto, 0);
+   return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
+}
+
+static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
+{
+   /* For our purpose, squash main and local table into one */
+   if (tb_id == RT_TABLE_LOCAL)
+   tb_id = RT_TABLE_MAIN;
+   return tb_id;
+}
+
+static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
+   u32 tb_id,
+   enum 

[patch net-next 28/42] mlxsw: spectrum: Configure FIDs based on bridge events

2016-07-01 Thread Jiri Pirko
From: Ido Schimmel 

Before introducing support for L3 interfaces on top of the VLAN-aware
bridge we need to add some missing infrastructure.

Such an interface can either be the bridge device itself or a VLAN
device on top of it. In the first case the router interface (RIF) is
associated with FID 1, which is created whenever the first port netdev
joins the bridge. We currently assume the default PVID is 1 and that
it's already created, as it seems reasonable. This can be extended in
the future.

However, in the second case it's entirely possible we've yet to create a
matching FID. This can happen if the VLAN device was configured before
making any bridge port member in the VLAN.

Prevent such ordering problems by using the VLAN device's CHANGEUPPER
event to configure the FID. Make the VLAN device hold a reference to the
FID and prevent it from being destroyed even if none of the port netdevs
is using it.

Signed-off-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 86 --
 drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 27 +++
 .../ethernet/mellanox/mlxsw/spectrum_switchdev.c   | 18 +
 3 files changed, 107 insertions(+), 24 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index e987a8a..e49f80ba 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -2887,6 +2887,17 @@ int mlxsw_sp_port_fdb_flush(struct mlxsw_sp_port 
*mlxsw_sp_port, u16 fid)
return mlxsw_sp_port_fdb_flush_by_port_fid(mlxsw_sp_port, fid);
 }
 
+static void mlxsw_sp_master_bridge_gone_sync(struct mlxsw_sp *mlxsw_sp)
+{
+   struct mlxsw_sp_fid *f, *tmp;
+
+   list_for_each_entry_safe(f, tmp, _sp->fids, list)
+   if (--f->ref_count == 0)
+   mlxsw_sp_fid_destroy(mlxsw_sp, f);
+   else
+   WARN_ON_ONCE(1);
+}
+
 static bool mlxsw_sp_master_bridge_check(struct mlxsw_sp *mlxsw_sp,
 struct net_device *br_dev)
 {
@@ -2903,8 +2914,15 @@ static void mlxsw_sp_master_bridge_inc(struct mlxsw_sp 
*mlxsw_sp,
 
 static void mlxsw_sp_master_bridge_dec(struct mlxsw_sp *mlxsw_sp)
 {
-   if (--mlxsw_sp->master_bridge.ref_count == 0)
+   if (--mlxsw_sp->master_bridge.ref_count == 0) {
mlxsw_sp->master_bridge.dev = NULL;
+   /* It's possible upper VLAN devices are still holding
+* references to underlying FIDs. Drop the reference
+* and release the resources if it was the last one.
+* If it wasn't, then something bad happened.
+*/
+   mlxsw_sp_master_bridge_gone_sync(mlxsw_sp);
+   }
 }
 
 static int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port,
@@ -3373,18 +3391,68 @@ static int mlxsw_sp_netdevice_lag_event(struct 
net_device *lag_dev,
return 0;
 }
 
-static struct mlxsw_sp_fid *
-mlxsw_sp_vfid_find(const struct mlxsw_sp *mlxsw_sp,
-  const struct net_device *br_dev)
+static int mlxsw_sp_master_bridge_vlan_link(struct mlxsw_sp *mlxsw_sp,
+   struct net_device *vlan_dev)
 {
+   u16 fid = vlan_dev_vlan_id(vlan_dev);
struct mlxsw_sp_fid *f;
 
-   list_for_each_entry(f, _sp->vfids.list, list) {
-   if (f->dev == br_dev)
-   return f;
+   f = mlxsw_sp_fid_find(mlxsw_sp, fid);
+   if (!f) {
+   f = mlxsw_sp_fid_create(mlxsw_sp, fid);
+   if (IS_ERR(f))
+   return PTR_ERR(f);
}
 
-   return NULL;
+   f->ref_count++;
+
+   return 0;
+}
+
+static void mlxsw_sp_master_bridge_vlan_unlink(struct mlxsw_sp *mlxsw_sp,
+  struct net_device *vlan_dev)
+{
+   u16 fid = vlan_dev_vlan_id(vlan_dev);
+   struct mlxsw_sp_fid *f;
+
+   f = mlxsw_sp_fid_find(mlxsw_sp, fid);
+   if (f && --f->ref_count == 0)
+   mlxsw_sp_fid_destroy(mlxsw_sp, f);
+}
+
+static int mlxsw_sp_netdevice_bridge_event(struct net_device *br_dev,
+  unsigned long event, void *ptr)
+{
+   struct netdev_notifier_changeupper_info *info;
+   struct net_device *upper_dev;
+   struct mlxsw_sp *mlxsw_sp;
+   int err;
+
+   mlxsw_sp = mlxsw_sp_lower_get(br_dev);
+   if (!mlxsw_sp)
+   return 0;
+   if (br_dev != mlxsw_sp->master_bridge.dev)
+   return 0;
+
+   info = ptr;
+
+   switch (event) {
+   case NETDEV_CHANGEUPPER:
+   upper_dev = info->upper_dev;
+   if (!is_vlan_dev(upper_dev))
+   break;
+   if (info->linking) {
+   err = 

[patch net-next 11/42] mlxsw: spectrum: Add router interface struct

2016-07-01 Thread Jiri Pirko
From: Ido Schimmel 

When enabling the router in the device we will represent L3 netdevs
using router interfaces (RIFs). These will be specified whenever
programming routes or neighbours on the netdev.

Introduce the basic RIF infrastructure which allows one to lookup a RIF
by its netdev. Later patches in the series will extend this, but the
basic routines are needed now in order to direct traffic to CPU.

Pointers to the RIF structs are stored in an array indexed by the RIF's
number. This will allow us to efficiently update the kernel's neighbour
table when regularly dumping the device's table.

Signed-off-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c |  3 +++
 drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 19 +++
 2 files changed, 22 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index d011902..d15ebf3 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -2470,6 +2470,7 @@ err_rx_listener_register:
 static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
 {
struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+   int i;
 
mlxsw_sp_ports_remove(mlxsw_sp);
mlxsw_sp_router_fini(mlxsw_sp);
@@ -2478,6 +2479,8 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
mlxsw_sp_traps_fini(mlxsw_sp);
mlxsw_sp_event_unregister(mlxsw_sp, MLXSW_TRAP_ID_PUDE);
WARN_ON(!list_empty(_sp->fids));
+   for (i = 0; i < MLXSW_SP_RIF_MAX; i++)
+   WARN_ON_ONCE(mlxsw_sp->rifs[i]);
 }
 
 static struct mlxsw_config_profile mlxsw_sp_config_profile = {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index c2ac037..83d5807 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -98,6 +98,11 @@ struct mlxsw_sp_fid {
u16 vid;
 };
 
+struct mlxsw_sp_rif {
+   struct net_device *dev;
+   u16 rif;
+};
+
 struct mlxsw_sp_mid {
struct list_head list;
unsigned char addr[ETH_ALEN];
@@ -169,6 +174,7 @@ struct mlxsw_sp {
DECLARE_BITMAP(mapped, MLXSW_SP_MID_MAX);
} br_mids;
struct list_head fids;  /* VLAN-aware bridge FIDs */
+   struct mlxsw_sp_rif *rifs[MLXSW_SP_RIF_MAX];
struct mlxsw_sp_port **ports;
struct mlxsw_core *core;
const struct mlxsw_bus_info *bus_info;
@@ -327,6 +333,19 @@ mlxsw_sp_port_vport_find_by_fid(const struct mlxsw_sp_port 
*mlxsw_sp_port,
return NULL;
 }
 
+static inline struct mlxsw_sp_rif *
+mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
+const struct net_device *dev)
+{
+   int i;
+
+   for (i = 0; i < MLXSW_SP_RIF_MAX; i++)
+   if (mlxsw_sp->rifs[i] && mlxsw_sp->rifs[i]->dev == dev)
+   return mlxsw_sp->rifs[i];
+
+   return NULL;
+}
+
 enum mlxsw_sp_flood_table {
MLXSW_SP_FLOOD_TABLE_UC,
MLXSW_SP_FLOOD_TABLE_BM,
-- 
2.5.5



[patch net-next 22/42] mlxsw: reg: Add Router Algorithmic LPM Unicast Entry Register definition

2016-07-01 Thread Jiri Pirko
From: Jiri Pirko 

Serves for adding, updating and removing fib entries.

Signed-off-by: Jiri Pirko 
Reviewed-by: Ido Schimmel 
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h | 264 ++
 1 file changed, 264 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h 
b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 0b7b91f..9280d96 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -3622,6 +3622,268 @@ static inline void mlxsw_reg_raltb_pack(char *payload, 
u16 virtual_router,
mlxsw_reg_raltb_tree_id_set(payload, tree_id);
 }
 
+/* RALUE - Router Algorithmic LPM Unicast Entry Register
+ * -
+ * RALUE is used to configure and query LPM entries that serve
+ * the Unicast protocols.
+ */
+#define MLXSW_REG_RALUE_ID 0x8013
+#define MLXSW_REG_RALUE_LEN 0x38
+
+static const struct mlxsw_reg_info mlxsw_reg_ralue = {
+   .id = MLXSW_REG_RALUE_ID,
+   .len = MLXSW_REG_RALUE_LEN,
+};
+
+/* reg_ralue_protocol
+ * Protocol.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ralue, protocol, 0x00, 24, 4);
+
+enum mlxsw_reg_ralue_op {
+   /* Read operation. If entry doesn't exist, the operation fails. */
+   MLXSW_REG_RALUE_OP_QUERY_READ = 0,
+   /* Clear on read operation. Used to read entry and
+* clear Activity bit.
+*/
+   MLXSW_REG_RALUE_OP_QUERY_CLEAR = 1,
+   /* Write operation. Used to write a new entry to the table. All RW
+* fields are written for new entry. Activity bit is set
+* for new entries.
+*/
+   MLXSW_REG_RALUE_OP_WRITE_WRITE = 0,
+   /* Update operation. Used to update an existing route entry and
+* only update the RW fields that are detailed in the field
+* op_u_mask. If entry doesn't exist, the operation fails.
+*/
+   MLXSW_REG_RALUE_OP_WRITE_UPDATE = 1,
+   /* Clear activity. The Activity bit (the field a) is cleared
+* for the entry.
+*/
+   MLXSW_REG_RALUE_OP_WRITE_CLEAR = 2,
+   /* Delete operation. Used to delete an existing entry. If entry
+* doesn't exist, the operation fails.
+*/
+   MLXSW_REG_RALUE_OP_WRITE_DELETE = 3,
+};
+
+/* reg_ralue_op
+ * Operation.
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, ralue, op, 0x00, 20, 3);
+
+/* reg_ralue_a
+ * Activity. Set for new entries. Set if a packet lookup has hit on the
+ * specific entry, only if the entry is a route. To clear the a bit, use
+ * "clear activity" op.
+ * Enabled by activity_dis in RGCR
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, ralue, a, 0x00, 16, 1);
+
+/* reg_ralue_virtual_router
+ * Virtual Router ID
+ * Range is 0..cap_max_virtual_routers-1
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ralue, virtual_router, 0x04, 16, 16);
+
+#define MLXSW_REG_RALUE_OP_U_MASK_ENTRY_TYPE   BIT(0)
+#define MLXSW_REG_RALUE_OP_U_MASK_BMP_LEN  BIT(1)
+#define MLXSW_REG_RALUE_OP_U_MASK_ACTION   BIT(2)
+
+/* reg_ralue_op_u_mask
+ * opcode update mask.
+ * On read operation, this field is reserved.
+ * This field is valid for update opcode, otherwise - reserved.
+ * This field is a bitmask of the fields that should be updated.
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, ralue, op_u_mask, 0x04, 8, 3);
+
+/* reg_ralue_prefix_len
+ * Number of bits in the prefix of the LPM route.
+ * Note that for IPv6 prefixes, if prefix_len>64 the entry consumes
+ * two entries in the physical HW table.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ralue, prefix_len, 0x08, 0, 8);
+
+/* reg_ralue_dip*
+ * The prefix of the route or of the marker that the object of the LPM
+ * is compared with. The most significant bits of the dip are the prefix.
+ * The list significant bits must be '0' if the prefix_len is smaller
+ * than 128 for IPv6 or smaller than 32 for IPv4.
+ * IPv4 address uses bits dip[31:0] and bits dip[127:32] are reserved.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ralue, dip4, 0x18, 0, 32);
+
+enum mlxsw_reg_ralue_entry_type {
+   MLXSW_REG_RALUE_ENTRY_TYPE_MARKER_ENTRY = 1,
+   MLXSW_REG_RALUE_ENTRY_TYPE_ROUTE_ENTRY = 2,
+   MLXSW_REG_RALUE_ENTRY_TYPE_MARKER_AND_ROUTE_ENTRY = 3,
+};
+
+/* reg_ralue_entry_type
+ * Entry type.
+ * Note - for Marker entries, the action_type and action fields are reserved.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ralue, entry_type, 0x1C, 30, 2);
+
+/* reg_ralue_bmp_len
+ * The best match prefix length in the case that there is no match for
+ * longer prefixes.
+ * If (entry_type != MARKER_ENTRY), bmp_len must be equal to prefix_len
+ * Note for any update operation with entry_type modification this
+ * field must be set.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ralue, bmp_len, 0x1C, 16, 8);
+
+enum mlxsw_reg_ralue_action_type {
+   MLXSW_REG_RALUE_ACTION_TYPE_REMOTE,
+   MLXSW_REG_RALUE_ACTION_TYPE_LOCAL,
+   MLXSW_REG_RALUE_ACTION_TYPE_IP2ME,
+};
+
+/* 

[patch net-next 31/42] mlxsw: reg: Add Router Algorithmic LPM Unicast Host Table register

2016-07-01 Thread Jiri Pirko
From: Yotam Gigi 

The RAUHT register is used to configure and query the Unicast Host Table
in devices that implement the Algorithmic LPM. In other words, it is
used to configure neighbour entries in the device.

Signed-off-by: Yotam Gigi 
Signed-off-by: Jiri Pirko 
Reviewed-by: Ido Schimmel 
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h | 141 ++
 1 file changed, 141 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h 
b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 9280d96..cc6a0b3 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -4,6 +4,7 @@
  * Copyright (c) 2015-2016 Ido Schimmel 
  * Copyright (c) 2015 Elad Raz 
  * Copyright (c) 2015-2016 Jiri Pirko 
+ * Copyright (c) 2016 Yotam Gigi 
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -3884,6 +3885,144 @@ mlxsw_reg_ralue_act_ip2me_pack(char *payload)
MLXSW_REG_RALUE_ACTION_TYPE_IP2ME);
 }
 
+/* RAUHT - Router Algorithmic LPM Unicast Host Table Register
+ * --
+ * The RAUHT register is used to configure and query the Unicast Host table in
+ * devices that implement the Algorithmic LPM.
+ */
+#define MLXSW_REG_RAUHT_ID 0x8014
+#define MLXSW_REG_RAUHT_LEN 0x74
+
+static const struct mlxsw_reg_info mlxsw_reg_rauht = {
+   .id = MLXSW_REG_RAUHT_ID,
+   .len = MLXSW_REG_RAUHT_LEN,
+};
+
+enum mlxsw_reg_rauht_type {
+   MLXSW_REG_RAUHT_TYPE_IPV4,
+   MLXSW_REG_RAUHT_TYPE_IPV6,
+};
+
+/* reg_rauht_type
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, rauht, type, 0x00, 24, 2);
+
+enum mlxsw_reg_rauht_op {
+   MLXSW_REG_RAUHT_OP_QUERY_READ = 0,
+   /* Read operation */
+   MLXSW_REG_RAUHT_OP_QUERY_CLEAR_ON_READ = 1,
+   /* Clear on read operation. Used to read entry and clear
+* activity bit.
+*/
+   MLXSW_REG_RAUHT_OP_WRITE_ADD = 0,
+   /* Add. Used to write a new entry to the table. All R/W fields are
+* relevant for new entry. Activity bit is set for new entries.
+*/
+   MLXSW_REG_RAUHT_OP_WRITE_UPDATE = 1,
+   /* Update action. Used to update an existing route entry and
+* only update the following fields:
+* trap_action, trap_id, mac, counter_set_type, counter_index
+*/
+   MLXSW_REG_RAUHT_OP_WRITE_CLEAR_ACTIVITY = 2,
+   /* Clear activity. A bit is cleared for the entry. */
+   MLXSW_REG_RAUHT_OP_WRITE_DELETE = 3,
+   /* Delete entry */
+   MLXSW_REG_RAUHT_OP_WRITE_DELETE_ALL = 4,
+   /* Delete all host entries on a RIF. In this command, dip
+* field is reserved.
+*/
+};
+
+/* reg_rauht_op
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, rauht, op, 0x00, 20, 3);
+
+/* reg_rauht_a
+ * Activity. Set for new entries. Set if a packet lookup has hit on
+ * the specific entry.
+ * To clear the a bit, use "clear activity" op.
+ * Enabled by activity_dis in RGCR
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, rauht, a, 0x00, 16, 1);
+
+/* reg_rauht_rif
+ * Router Interface
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, rauht, rif, 0x00, 0, 16);
+
+/* reg_rauht_dip*
+ * Destination address.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, rauht, dip4, 0x1C, 0x0, 32);
+
+enum mlxsw_reg_rauht_trap_action {
+   MLXSW_REG_RAUHT_TRAP_ACTION_NOP,
+   MLXSW_REG_RAUHT_TRAP_ACTION_TRAP,
+   MLXSW_REG_RAUHT_TRAP_ACTION_MIRROR_TO_CPU,
+   MLXSW_REG_RAUHT_TRAP_ACTION_MIRROR,
+   MLXSW_REG_RAUHT_TRAP_ACTION_DISCARD_ERRORS,
+};
+
+/* reg_rauht_trap_action
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rauht, trap_action, 0x60, 28, 4);
+
+enum mlxsw_reg_rauht_trap_id {
+   MLXSW_REG_RAUHT_TRAP_ID_RTR_EGRESS0,
+   MLXSW_REG_RAUHT_TRAP_ID_RTR_EGRESS1,
+};
+
+/* reg_rauht_trap_id
+ * Trap ID to be reported to CPU.
+ * Trap-ID is RTR_EGRESS0 or RTR_EGRESS1.
+ * For trap_action of NOP, MIRROR and DISCARD_ERROR,
+ * trap_id is reserved.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rauht, trap_id, 0x60, 0, 9);
+
+/* reg_rauht_counter_set_type
+ * Counter set type for flow counters
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rauht, counter_set_type, 0x68, 24, 8);
+
+/* reg_rauht_counter_index
+ * Counter index for flow counters
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rauht, counter_index, 0x68, 0, 24);
+
+/* reg_rauht_mac
+ * MAC address.
+ * Access: RW
+ */
+MLXSW_ITEM_BUF(reg, rauht, mac, 0x6E, 6);
+
+static inline void mlxsw_reg_rauht_pack(char *payload,
+   enum mlxsw_reg_rauht_op op, u16 rif,
+   const char *mac)
+{
+   MLXSW_REG_ZERO(rauht, payload);
+   mlxsw_reg_rauht_op_set(payload, op);
+   

[patch net-next 32/42] mlxsw: reg: Add Router Algorithmic LPM Unicast Host Table Dump register

2016-07-01 Thread Jiri Pirko
From: Yotam Gigi 

The RAUHTD register allows dumping entries from the Router Unicast Host
Table.

Signed-off-by: Yotam Gigi 
Signed-off-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h | 147 ++
 1 file changed, 147 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h 
b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index cc6a0b3..fcf379b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -4023,6 +4023,151 @@ static inline void mlxsw_reg_rauht_pack4(char *payload,
mlxsw_reg_rauht_dip4_set(payload, dip);
 }
 
+/* RAUHTD - Router Algorithmic LPM Unicast Host Table Dump Register
+ * 
+ * The RAUHTD register allows dumping entries from the Router Unicast Host
+ * Table. For a given session an entry is dumped no more than one time. The
+ * first RAUHTD access after reset is a new session. A session ends when the
+ * num_rec response is smaller than num_rec request or for IPv4 when the
+ * num_entries is smaller than 4. The clear activity affect the current session
+ * or the last session if a new session has not started.
+ */
+#define MLXSW_REG_RAUHTD_ID 0x8018
+#define MLXSW_REG_RAUHTD_BASE_LEN 0x20
+#define MLXSW_REG_RAUHTD_REC_LEN 0x20
+#define MLXSW_REG_RAUHTD_REC_MAX_NUM 32
+#define MLXSW_REG_RAUHTD_LEN (MLXSW_REG_RAUHTD_BASE_LEN + \
+   MLXSW_REG_RAUHTD_REC_MAX_NUM * MLXSW_REG_RAUHTD_REC_LEN)
+#define MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC 4
+
+static const struct mlxsw_reg_info mlxsw_reg_rauhtd = {
+   .id = MLXSW_REG_RAUHTD_ID,
+   .len = MLXSW_REG_RAUHTD_LEN,
+};
+
+#define MLXSW_REG_RAUHTD_FILTER_A BIT(0)
+#define MLXSW_REG_RAUHTD_FILTER_RIF BIT(3)
+
+/* reg_rauhtd_filter_fields
+ * if a bit is '0' then the relevant field is ignored and dump is done
+ * regardless of the field value
+ * Bit0 - filter by activity: entry_a
+ * Bit3 - filter by entry rip: entry_rif
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, rauhtd, filter_fields, 0x00, 0, 8);
+
+enum mlxsw_reg_rauhtd_op {
+   MLXSW_REG_RAUHTD_OP_DUMP,
+   MLXSW_REG_RAUHTD_OP_DUMP_AND_CLEAR,
+};
+
+/* reg_rauhtd_op
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, rauhtd, op, 0x04, 24, 2);
+
+/* reg_rauhtd_num_rec
+ * At request: number of records requested
+ * At response: number of records dumped
+ * For IPv4, each record has 4 entries at request and up to 4 entries
+ * at response
+ * Range is 0..MLXSW_REG_RAUHTD_REC_MAX_NUM
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, rauhtd, num_rec, 0x04, 0, 8);
+
+/* reg_rauhtd_entry_a
+ * Dump only if activity has value of entry_a
+ * Reserved if filter_fields bit0 is '0'
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, rauhtd, entry_a, 0x08, 16, 1);
+
+enum mlxsw_reg_rauhtd_type {
+   MLXSW_REG_RAUHTD_TYPE_IPV4,
+   MLXSW_REG_RAUHTD_TYPE_IPV6,
+};
+
+/* reg_rauhtd_type
+ * Dump only if record type is:
+ * 0 - IPv4
+ * 1 - IPv6
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, rauhtd, type, 0x08, 0, 4);
+
+/* reg_rauhtd_entry_rif
+ * Dump only if RIF has value of entry_rif
+ * Reserved if filter_fields bit3 is '0'
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, rauhtd, entry_rif, 0x0C, 0, 16);
+
+static inline void mlxsw_reg_rauhtd_pack(char *payload,
+enum mlxsw_reg_rauhtd_type type)
+{
+   MLXSW_REG_ZERO(rauhtd, payload);
+   mlxsw_reg_rauhtd_filter_fields_set(payload, MLXSW_REG_RAUHTD_FILTER_A);
+   mlxsw_reg_rauhtd_op_set(payload, MLXSW_REG_RAUHTD_OP_DUMP_AND_CLEAR);
+   mlxsw_reg_rauhtd_num_rec_set(payload, MLXSW_REG_RAUHTD_REC_MAX_NUM);
+   mlxsw_reg_rauhtd_entry_a_set(payload, 1);
+   mlxsw_reg_rauhtd_type_set(payload, type);
+}
+
+/* reg_rauhtd_ipv4_rec_num_entries
+ * Number of valid entries in this record:
+ * 0 - 1 valid entry
+ * 1 - 2 valid entries
+ * 2 - 3 valid entries
+ * 3 - 4 valid entries
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv4_rec_num_entries,
+MLXSW_REG_RAUHTD_BASE_LEN, 28, 2,
+MLXSW_REG_RAUHTD_REC_LEN, 0x00, false);
+
+/* reg_rauhtd_rec_type
+ * Record type.
+ * 0 - IPv4
+ * 1 - IPv6
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, rauhtd, rec_type, MLXSW_REG_RAUHTD_BASE_LEN, 24, 2,
+MLXSW_REG_RAUHTD_REC_LEN, 0x00, false);
+
+#define MLXSW_REG_RAUHTD_IPV4_ENT_LEN 0x8
+
+/* reg_rauhtd_ipv4_ent_a
+ * Activity. Set for new entries. Set if a packet lookup has hit on the
+ * specific entry.
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv4_ent_a, MLXSW_REG_RAUHTD_BASE_LEN, 16, 1,
+MLXSW_REG_RAUHTD_IPV4_ENT_LEN, 0x00, false);
+
+/* reg_rauhtd_ipv4_ent_rif
+ * Router interface.
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv4_ent_rif, MLXSW_REG_RAUHTD_BASE_LEN, 0,
+16, MLXSW_REG_RAUHTD_IPV4_ENT_LEN, 0x00, false);
+

[patch net-next 18/42] mlxsw: reg: Add Router Algorithmic LPM Structure Tree Register definition

2016-07-01 Thread Jiri Pirko
From: Jiri Pirko 

Serves to build LPM tree structure.

Signed-off-by: Jiri Pirko 
Reviewed-by: Ido Schimmel 
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h | 77 +++
 1 file changed, 77 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h 
b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index a358e1b..4ea608f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -3504,6 +3504,81 @@ static inline void mlxsw_reg_ralta_pack(char *payload, 
bool alloc,
mlxsw_reg_ralta_tree_id_set(payload, tree_id);
 }
 
+/* RALST - Router Algorithmic LPM Structure Tree Register
+ * --
+ * RALST is used to set and query the structure of an LPM tree.
+ * The structure of the tree must be sorted as a sorted binary tree, while
+ * each node is a bin that is tagged as the length of the prefixes the lookup
+ * will refer to. Therefore, bin X refers to a set of entries with prefixes
+ * of X bits to match with the destination address. The bin 0 indicates
+ * the default action, when there is no match of any prefix.
+ */
+#define MLXSW_REG_RALST_ID 0x8011
+#define MLXSW_REG_RALST_LEN 0x104
+
+static const struct mlxsw_reg_info mlxsw_reg_ralst = {
+   .id = MLXSW_REG_RALST_ID,
+   .len = MLXSW_REG_RALST_LEN,
+};
+
+/* reg_ralst_root_bin
+ * The bin number of the root bin.
+ * 

[patch net-next 05/42] mlxsw: spectrum: Remove VLANs configuration via SELF flag

2016-07-01 Thread Jiri Pirko
From: Ido Schimmel 

When port isn't bridged it is still possible to invoke switchdev ops and
configure the device's VLAN filters.

However, this will require us to use different Router InterFaces (RIFs)
for the same netdev, instead of one per-netdev as with any other
configuration.

Taking the above into account and the fact that this functionality is
questionable with regards to the device's normal use-case, remove it and
instead return an error.

Signed-off-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 24 ++-
 drivers/net/ethernet/mellanox/mlxsw/spectrum.h |  2 -
 .../ethernet/mellanox/mlxsw/spectrum_switchdev.c   | 46 +-
 3 files changed, 6 insertions(+), 66 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index afd06dc..4f67a8c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -890,8 +890,8 @@ err_port_vp_mode_trans:
return err;
 }
 
-int mlxsw_sp_port_kill_vid(struct net_device *dev,
-  __be16 __always_unused proto, u16 vid)
+static int mlxsw_sp_port_kill_vid(struct net_device *dev,
+ __be16 __always_unused proto, u16 vid)
 {
struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
struct mlxsw_sp_port *mlxsw_sp_vport;
@@ -1845,23 +1845,6 @@ err_port_active_vlans_alloc:
return err;
 }
 
-static void mlxsw_sp_port_vports_fini(struct mlxsw_sp_port *mlxsw_sp_port)
-{
-   struct net_device *dev = mlxsw_sp_port->dev;
-   struct mlxsw_sp_port *mlxsw_sp_vport, *tmp;
-
-   list_for_each_entry_safe(mlxsw_sp_vport, tmp,
-_sp_port->vports_list, vport.list) {
-   u16 vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport);
-
-   /* vPorts created for VLAN devices should already be gone
-* by now, since we unregistered the port netdev.
-*/
-   WARN_ON(is_vlan_dev(mlxsw_sp_vport->dev));
-   mlxsw_sp_port_kill_vid(dev, 0, vid);
-   }
-}
-
 static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port)
 {
struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp->ports[local_port];
@@ -1872,13 +1855,14 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp 
*mlxsw_sp, u8 local_port)
mlxsw_core_port_fini(_sp_port->core_port);
unregister_netdev(mlxsw_sp_port->dev); /* This calls ndo_stop */
mlxsw_sp_port_dcb_fini(mlxsw_sp_port);
-   mlxsw_sp_port_vports_fini(mlxsw_sp_port);
+   mlxsw_sp_port_kill_vid(mlxsw_sp_port->dev, 0, 1);
mlxsw_sp_port_switchdev_fini(mlxsw_sp_port);
mlxsw_sp_port_swid_set(mlxsw_sp_port, MLXSW_PORT_SWID_DISABLED_PORT);
mlxsw_sp_port_module_unmap(mlxsw_sp, mlxsw_sp_port->local_port);
free_percpu(mlxsw_sp_port->pcpu_stats);
kfree(mlxsw_sp_port->untagged_vlans);
kfree(mlxsw_sp_port->active_vlans);
+   WARN_ON_ONCE(!list_empty(_sp_port->vports_list));
free_netdev(mlxsw_sp_port->dev);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 36c9835..05d5fcc 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -377,8 +377,6 @@ int mlxsw_sp_port_vlan_set(struct mlxsw_sp_port 
*mlxsw_sp_port, u16 vid_begin,
   u16 vid_end, bool is_member, bool untagged);
 int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto,
  u16 vid);
-int mlxsw_sp_port_kill_vid(struct net_device *dev,
-  __be16 __always_unused proto, u16 vid);
 int mlxsw_sp_vport_flood_set(struct mlxsw_sp_port *mlxsw_sp_vport, u16 fid,
 bool set);
 void mlxsw_sp_port_active_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index a0c7376..927117e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -633,25 +633,6 @@ err_port_allow_untagged_set:
return err;
 }
 
-static int mlxsw_sp_port_add_vids(struct net_device *dev, u16 vid_begin,
- u16 vid_end)
-{
-   u16 vid;
-   int err;
-
-   for (vid = vid_begin; vid <= vid_end; vid++) {
-   err = mlxsw_sp_port_add_vid(dev, 0, vid);
-   if (err)
-   goto err_port_add_vid;
-   }
-   return 0;
-
-err_port_add_vid:
-   for (vid--; vid >= vid_begin; vid--)
-   mlxsw_sp_port_kill_vid(dev, 0, vid);
-   return err;
-}
-
 static int 

[patch net-next 33/42] mlxsw: spectrum_router: Periodically update the kernel's neigh table

2016-07-01 Thread Jiri Pirko
From: Yotam Gigi 

As previously explained, the driver should periodically poll the device
for neighbours activity according to the configured DELAY_PROBE_TIME.
This will prevent active neighbours from staying in STALE state for long
periods of time.

During init configure the polling interval according to the
DELAY_PROBE_TIME used in the default table. In addition, register a
netevent notification block, so that the interval is updated whenever
DELAY_PROBE_TIME changes.

Using the computed interval schedule a delayed work, which will update
the kernel via neigh_event_send() on any active neighbour since the last
delayed work.

Signed-off-by: Yotam Gigi 
Signed-off-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.h |   4 +
 .../net/ethernet/mellanox/mlxsw/spectrum_router.c  | 192 -
 2 files changed, 194 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 734c5ba..9c2a60f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -214,6 +214,10 @@ struct mlxsw_sp_router {
struct mlxsw_sp_lpm_tree lpm_trees[MLXSW_SP_LPM_TREE_COUNT];
struct mlxsw_sp_vr vrs[MLXSW_SP_VIRTUAL_ROUTER_MAX];
struct rhashtable neigh_ht;
+   struct {
+   struct delayed_work dw;
+   unsigned long interval; /* ms */
+   } neighs_update;
 };
 
 struct mlxsw_sp {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 90d382a..db1c2c4 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -3,6 +3,7 @@
  * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
  * Copyright (c) 2016 Jiri Pirko 
  * Copyright (c) 2016 Ido Schimmel 
+ * Copyright (c) 2016 Yotam Gigi 
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -38,6 +39,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 #include 
 #include 
 
@@ -676,14 +679,199 @@ void mlxsw_sp_router_neigh_destroy(struct net_device 
*dev,
mlxsw_sp_neigh_entry_destroy(neigh_entry);
 }
 
+static void
+mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
+{
+   unsigned long interval = NEIGH_VAR(_tbl.parms, DELAY_PROBE_TIME);
+
+   mlxsw_sp->router.neighs_update.interval = jiffies_to_msecs(interval);
+}
+
+static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
+  char *rauhtd_pl,
+  int ent_index)
+{
+   struct net_device *dev;
+   struct neighbour *n;
+   __be32 dipn;
+   u32 dip;
+   u16 rif;
+
+   mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, , );
+
+   if (!mlxsw_sp->rifs[rif]) {
+   dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in 
neighbour entry\n");
+   return;
+   }
+
+   dipn = htonl(dip);
+   dev = mlxsw_sp->rifs[rif]->dev;
+   n = neigh_lookup(_tbl, , dev);
+   if (!n) {
+   netdev_err(dev, "Failed to find matching neighbour for 
IP=%pI4h\n",
+  );
+   return;
+   }
+
+   netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", );
+   neigh_event_send(n, NULL);
+   neigh_release(n);
+}
+
+static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
+  char *rauhtd_pl,
+  int rec_index)
+{
+   u8 num_entries;
+   int i;
+
+   num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
+   rec_index);
+   /* Hardware starts counting at 0, so add 1. */
+   num_entries++;
+
+   /* Each record consists of several neighbour entries. */
+   for (i = 0; i < num_entries; i++) {
+   int ent_index;
+
+   ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
+   mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
+  ent_index);
+   }
+
+}
+
+static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
+ char *rauhtd_pl, int rec_index)
+{
+   switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
+   case MLXSW_REG_RAUHTD_TYPE_IPV4:
+   mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
+

[patch net-next 25/42] mlxsw: spectrum: Edit RIF properties based on netdev events

2016-07-01 Thread Jiri Pirko
From: Ido Schimmel 

We are just about to introduce router interfaces (RIFs), but before that
we need to be able update the device with the correct RIF attributes
whenever they change for the netdev the RIF is backing. Two such
attributes are MTU and MAC.

The MAC is used both to set the source MAC of packets egressing from the
RIF and also to program an FDB rule that will direct packets to the
router block.

Use the existing netdevice notification block and respond to CHANGEADDR
and CHANGEMTU accordingly. Store both attributes in the RIF struct
in case we need to revert to old attributes following a failed update.

Signed-off-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 61 +-
 drivers/net/ethernet/mellanox/mlxsw/spectrum.h |  5 ++
 .../ethernet/mellanox/mlxsw/spectrum_switchdev.c   | 26 +++--
 3 files changed, 86 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index f54fd6a..284f6ab 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -2627,6 +2627,63 @@ void mlxsw_sp_port_dev_put(struct mlxsw_sp_port 
*mlxsw_sp_port)
dev_put(mlxsw_sp_port->dev);
 }
 
+static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif,
+const char *mac, int mtu)
+{
+   char ritr_pl[MLXSW_REG_RITR_LEN];
+   int err;
+
+   mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
+   err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
+   if (err)
+   return err;
+
+   mlxsw_reg_ritr_mtu_set(ritr_pl, mtu);
+   mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac);
+   mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE);
+   return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
+}
+
+static int mlxsw_sp_netdevice_router_port_event(struct net_device *dev)
+{
+   struct mlxsw_sp *mlxsw_sp;
+   struct mlxsw_sp_rif *r;
+   int err;
+
+   mlxsw_sp = mlxsw_sp_lower_get(dev);
+   if (!mlxsw_sp)
+   return 0;
+
+   r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
+   if (!r)
+   return 0;
+
+   err = mlxsw_sp_rif_fdb_op(mlxsw_sp, r->addr, r->f->fid, false);
+   if (err)
+   return err;
+
+   err = mlxsw_sp_rif_edit(mlxsw_sp, r->rif, dev->dev_addr, dev->mtu);
+   if (err)
+   goto err_rif_edit;
+
+   err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, r->f->fid, true);
+   if (err)
+   goto err_rif_fdb_op;
+
+   ether_addr_copy(r->addr, dev->dev_addr);
+   r->mtu = dev->mtu;
+
+   netdev_dbg(dev, "Updated RIF=%d\n", r->rif);
+
+   return 0;
+
+err_rif_fdb_op:
+   mlxsw_sp_rif_edit(mlxsw_sp, r->rif, r->addr, r->mtu);
+err_rif_edit:
+   mlxsw_sp_rif_fdb_op(mlxsw_sp, r->addr, r->f->fid, true);
+   return err;
+}
+
 static bool mlxsw_sp_lag_port_fid_member(struct mlxsw_sp_port *lag_port,
 u16 fid)
 {
@@ -3487,7 +3544,9 @@ static int mlxsw_sp_netdevice_event(struct notifier_block 
*unused,
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
int err = 0;
 
-   if (mlxsw_sp_port_dev_check(dev))
+   if (event == NETDEV_CHANGEADDR || event == NETDEV_CHANGEMTU)
+   err = mlxsw_sp_netdevice_router_port_event(dev);
+   else if (mlxsw_sp_port_dev_check(dev))
err = mlxsw_sp_netdevice_port_event(dev, event, ptr);
else if (netif_is_lag_master(dev))
err = mlxsw_sp_netdevice_lag_event(dev, event, ptr);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index fefff25..0d3e0e3 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -107,6 +107,9 @@ struct mlxsw_sp_fid {
 
 struct mlxsw_sp_rif {
struct net_device *dev;
+   struct mlxsw_sp_fid *f;
+   unsigned char addr[ETH_ALEN];
+   int mtu;
u16 rif;
 };
 
@@ -448,6 +451,8 @@ int mlxsw_sp_vport_flood_set(struct mlxsw_sp_port 
*mlxsw_sp_vport, u16 fid,
 void mlxsw_sp_port_active_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port);
 int mlxsw_sp_port_pvid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid);
 int mlxsw_sp_port_fdb_flush(struct mlxsw_sp_port *mlxsw_sp_port, u16 fid);
+int mlxsw_sp_rif_fdb_op(struct mlxsw_sp *mlxsw_sp, const char *mac, u16 fid,
+   bool adding);
 int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port,
  enum mlxsw_reg_qeec_hr hr, u8 index, u8 next_index,
  bool dwrr, u8 dwrr_weight);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index 

[patch net-next 04/42] mlxsw: spectrum: Send untagged packets through a port netdev

2016-07-01 Thread Jiri Pirko
From: Ido Schimmel 

Port netdevs (e.g. swXpY) that are not bridged are represented in the
device using a vPort with VID=PVID=1 (the PVID vPort), as untagged
packets entering the switch are internally tagged with the PVID VLAN.
When these packets are routed through a different port netdev they
should egress untagged.

This wasn't a problem until now, as non-bridged traffic only originated
from the CPU, which transmits packets out of the port as-is.

When a vPort is created with VID 1 mark it as egress untagged.

Signed-off-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index a453fff..afd06dc 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -816,6 +816,7 @@ int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 
__always_unused proto,
 {
struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
struct mlxsw_sp_port *mlxsw_sp_vport;
+   bool untagged = vid == 1;
int err;
 
/* VLAN 0 is added to HW filter when device goes up, but it is
@@ -859,7 +860,7 @@ int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 
__always_unused proto,
goto err_port_vid_learning_set;
}
 
-   err = mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, true, false);
+   err = mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, true, untagged);
if (err) {
netdev_err(dev, "Failed to set VLAN membership for VID=%d\n",
   vid);
-- 
2.5.5



[patch net-next 29/42] mlxsw: spectrum: Enable L3 interfaces on top of bridge devices

2016-07-01 Thread Jiri Pirko
From: Ido Schimmel 

As with the previously introduced L3 interfaces, listen to 'inetaddr'
notifications sent for bridges devices configured on top of the port
netdevs and create / destroy router interfaces (RIFs) accordingly.
This also includes VLAN devices configured on top of the VLAN-aware
bridge.

The RIFs will be destroyed either when the last IP address is removed or
when the underlying FID is is destroyed.

Signed-off-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 139 -
 drivers/net/ethernet/mellanox/mlxsw/spectrum.h |   2 +
 .../ethernet/mellanox/mlxsw/spectrum_switchdev.c   |   3 +
 3 files changed, 143 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index e49f80ba..7b2b741b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -2706,10 +2706,135 @@ static int mlxsw_sp_inetaddr_lag_event(struct 
net_device *lag_dev,
return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event, 1);
 }
 
+static struct mlxsw_sp_fid *mlxsw_sp_bridge_fid_get(struct mlxsw_sp *mlxsw_sp,
+   struct net_device *l3_dev)
+{
+   u16 fid;
+
+   if (is_vlan_dev(l3_dev))
+   fid = vlan_dev_vlan_id(l3_dev);
+   else if (mlxsw_sp->master_bridge.dev == l3_dev)
+   fid = 1;
+   else
+   return mlxsw_sp_vfid_find(mlxsw_sp, l3_dev);
+
+   return mlxsw_sp_fid_find(mlxsw_sp, fid);
+}
+
+static enum mlxsw_reg_ritr_if_type mlxsw_sp_rif_type_get(u16 fid)
+{
+   if (mlxsw_sp_fid_is_vfid(fid))
+   return MLXSW_REG_RITR_FID_IF;
+   else
+   return MLXSW_REG_RITR_VLAN_IF;
+}
+
+static int mlxsw_sp_rif_bridge_op(struct mlxsw_sp *mlxsw_sp,
+ struct net_device *l3_dev,
+ u16 fid, u16 rif,
+ bool create)
+{
+   enum mlxsw_reg_ritr_if_type rif_type;
+   char ritr_pl[MLXSW_REG_RITR_LEN];
+
+   rif_type = mlxsw_sp_rif_type_get(fid);
+   mlxsw_reg_ritr_pack(ritr_pl, create, rif_type, rif, l3_dev->mtu,
+   l3_dev->dev_addr);
+   mlxsw_reg_ritr_fid_set(ritr_pl, rif_type, fid);
+
+   return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
+}
+
+static int mlxsw_sp_rif_bridge_create(struct mlxsw_sp *mlxsw_sp,
+ struct net_device *l3_dev,
+ struct mlxsw_sp_fid *f)
+{
+   struct mlxsw_sp_rif *r;
+   u16 rif;
+   int err;
+
+   rif = mlxsw_sp_avail_rif_get(mlxsw_sp);
+   if (rif == MLXSW_SP_RIF_MAX)
+   return -ERANGE;
+
+   err = mlxsw_sp_rif_bridge_op(mlxsw_sp, l3_dev, f->fid, rif, true);
+   if (err)
+   return err;
+
+   err = mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, true);
+   if (err)
+   goto err_rif_fdb_op;
+
+   r = mlxsw_sp_rif_alloc(rif, l3_dev, f);
+   if (!r) {
+   err = -ENOMEM;
+   goto err_rif_alloc;
+   }
+
+   f->r = r;
+   mlxsw_sp->rifs[rif] = r;
+
+   netdev_dbg(l3_dev, "RIF=%d created\n", rif);
+
+   return 0;
+
+err_rif_alloc:
+   mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, false);
+err_rif_fdb_op:
+   mlxsw_sp_rif_bridge_op(mlxsw_sp, l3_dev, f->fid, rif, false);
+   return err;
+}
+
+void mlxsw_sp_rif_bridge_destroy(struct mlxsw_sp *mlxsw_sp,
+struct mlxsw_sp_rif *r)
+{
+   struct net_device *l3_dev = r->dev;
+   struct mlxsw_sp_fid *f = r->f;
+   u16 rif = r->rif;
+
+   mlxsw_sp->rifs[rif] = NULL;
+   f->r = NULL;
+
+   kfree(r);
+
+   mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, false);
+
+   mlxsw_sp_rif_bridge_op(mlxsw_sp, l3_dev, f->fid, rif, false);
+
+   netdev_dbg(l3_dev, "RIF=%d destroyed\n", rif);
+}
+
+static int mlxsw_sp_inetaddr_bridge_event(struct net_device *l3_dev,
+ struct net_device *br_dev,
+ unsigned long event)
+{
+   struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
+   struct mlxsw_sp_fid *f;
+
+   /* FID can either be an actual FID if the L3 device is the
+* VLAN-aware bridge or a VLAN device on top. Otherwise, the
+* L3 device is a VLAN-unaware bridge and we get a vFID.
+*/
+   f = mlxsw_sp_bridge_fid_get(mlxsw_sp, l3_dev);
+   if (WARN_ON(!f))
+   return -EINVAL;
+
+   switch (event) {
+   case NETDEV_UP:
+   return mlxsw_sp_rif_bridge_create(mlxsw_sp, l3_dev, f);
+   case NETDEV_DOWN:
+   mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->r);
+

[patch net-next 14/42] mlxsw: spectrum: Use action 'discard' when removing traps

2016-07-01 Thread Jiri Pirko
From: Ido Schimmel 

When removing packet traps we should use action 'discard' instead of
'forward', as some trap IDs we'll add cannot be configured with the
later. However, result is the same, as packets are not trapped to the
CPU.

In the future we will be able to reverse the operation properly by
detaching the trap group from the CPU.

Signed-off-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index d15ebf3..de30763 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -2295,7 +2295,7 @@ err_rx_trap_set:
  mlxsw_sp);
 err_rx_listener_register:
for (i--; i >= 0; i--) {
-   mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_FORWARD,
+   mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_DISCARD,
mlxsw_sp_rx_listener[i].trap_id);
mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(hpkt), hpkt_pl);
 
@@ -2312,7 +2312,7 @@ static void mlxsw_sp_traps_fini(struct mlxsw_sp *mlxsw_sp)
int i;
 
for (i = 0; i < ARRAY_SIZE(mlxsw_sp_rx_listener); i++) {
-   mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_FORWARD,
+   mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_DISCARD,
mlxsw_sp_rx_listener[i].trap_id);
mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(hpkt), hpkt_pl);
 
-- 
2.5.5



[patch net-next 12/42] mlxsw: reg: Add FDB action to forward to router

2016-07-01 Thread Jiri Pirko
From: Ido Schimmel 

Incoming packets are directed to the router when they match an FDB
entry with action forward to IP router.

Add this action, which was mistakenly named "TRAP".

Signed-off-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h 
b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 7f74eb7..ea05e83 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -386,7 +386,9 @@ enum mlxsw_reg_sfd_rec_action {
/* forward and trap, trap_id is FDB_TRAP */
MLXSW_REG_SFD_REC_ACTION_MIRROR_TO_CPU = 1,
/* trap and do not forward, trap_id is FDB_TRAP */
-   MLXSW_REG_SFD_REC_ACTION_TRAP = 3,
+   MLXSW_REG_SFD_REC_ACTION_TRAP = 2,
+   /* forward to IP router */
+   MLXSW_REG_SFD_REC_ACTION_FORWARD_IP_ROUTER = 3,
MLXSW_REG_SFD_REC_ACTION_DISCARD_ERROR = 15,
 };
 
-- 
2.5.5



[patch net-next 17/42] mlxsw: reg: Add Router Algorithmic LPM Tree Allocation Register definition

2016-07-01 Thread Jiri Pirko
From: Jiri Pirko 

Register serves for allocation and deallocation of LPM search tree.

Signed-off-by: Jiri Pirko 
Reviewed-by: Ido Schimmel 
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h | 54 ++-
 1 file changed, 53 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h 
b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 5ddc1d3..a358e1b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -3,7 +3,7 @@
  * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
  * Copyright (c) 2015-2016 Ido Schimmel 
  * Copyright (c) 2015 Elad Raz 
- * Copyright (c) 2015 Jiri Pirko 
+ * Copyright (c) 2015-2016 Jiri Pirko 
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -3454,6 +3454,56 @@ static inline void mlxsw_reg_ritr_pack(char *payload, 
bool enable,
mlxsw_reg_ritr_if_mac_memcpy_to(payload, mac);
 }
 
+/* RALTA - Router Algorithmic LPM Tree Allocation Register
+ * ---
+ * RALTA is used to allocate the LPM trees of the SHSPM method.
+ */
+#define MLXSW_REG_RALTA_ID 0x8010
+#define MLXSW_REG_RALTA_LEN 0x04
+
+static const struct mlxsw_reg_info mlxsw_reg_ralta = {
+   .id = MLXSW_REG_RALTA_ID,
+   .len = MLXSW_REG_RALTA_LEN,
+};
+
+/* reg_ralta_op
+ * opcode (valid for Write, must be 0 on Read)
+ * 0 - allocate a tree
+ * 1 - deallocate a tree
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, ralta, op, 0x00, 28, 2);
+
+enum mlxsw_reg_ralxx_protocol {
+   MLXSW_REG_RALXX_PROTOCOL_IPV4,
+   MLXSW_REG_RALXX_PROTOCOL_IPV6,
+};
+
+/* reg_ralta_protocol
+ * Protocol.
+ * Deallocation opcode: Reserved.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ralta, protocol, 0x00, 24, 4);
+
+/* reg_ralta_tree_id
+ * An identifier (numbered from 1..cap_shspm_max_trees-1) representing
+ * the tree identifier (managed by software).
+ * Note that tree_id 0 is allocated for a default-route tree.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ralta, tree_id, 0x00, 0, 8);
+
+static inline void mlxsw_reg_ralta_pack(char *payload, bool alloc,
+   enum mlxsw_reg_ralxx_protocol protocol,
+   u8 tree_id)
+{
+   MLXSW_REG_ZERO(ralta, payload);
+   mlxsw_reg_ralta_op_set(payload, !alloc);
+   mlxsw_reg_ralta_protocol_set(payload, protocol);
+   mlxsw_reg_ralta_tree_id_set(payload, tree_id);
+}
+
 /* MFCR - Management Fan Control Register
  * --
  * This register controls the settings of the Fan Speed PWM mechanism.
@@ -4196,6 +4246,8 @@ static inline const char *mlxsw_reg_id_str(u16 reg_id)
return "RGCR";
case MLXSW_REG_RITR_ID:
return "RITR";
+   case MLXSW_REG_RALTA_ID:
+   return "RALTA";
case MLXSW_REG_MFCR_ID:
return "MFCR";
case MLXSW_REG_MFSC_ID:
-- 
2.5.5



[patch net-next 38/42] mlxsw: reg: Add Router Adjacency Table register

2016-07-01 Thread Jiri Pirko
From: Yotam Gigi 

The RATR register is used to configure the Router Adjacency (next-hop)
Table.

Signed-off-by: Yotam Gigi 
Signed-off-by: Jiri Pirko 
Reviewed-by: Ido Schimmel 
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h | 133 ++
 1 file changed, 133 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h 
b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index fcf379b..a2d7870 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -3455,6 +3455,137 @@ static inline void mlxsw_reg_ritr_pack(char *payload, 
bool enable,
mlxsw_reg_ritr_if_mac_memcpy_to(payload, mac);
 }
 
+/* RATR - Router Adjacency Table Register
+ * --
+ * The RATR register is used to configure the Router Adjacency (next-hop)
+ * Table.
+ */
+#define MLXSW_REG_RATR_ID 0x8008
+#define MLXSW_REG_RATR_LEN 0x2C
+
+static const struct mlxsw_reg_info mlxsw_reg_ratr = {
+   .id = MLXSW_REG_RATR_ID,
+   .len = MLXSW_REG_RATR_LEN,
+};
+
+enum mlxsw_reg_ratr_op {
+   /* Read */
+   MLXSW_REG_RATR_OP_QUERY_READ = 0,
+   /* Read and clear activity */
+   MLXSW_REG_RATR_OP_QUERY_READ_CLEAR = 2,
+   /* Write Adjacency entry */
+   MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY = 1,
+   /* Write Adjacency entry only if the activity is cleared.
+* The write may not succeed if the activity is set. There is not
+* direct feedback if the write has succeeded or not, however
+* the get will reveal the actual entry (SW can compare the get
+* response to the set command).
+*/
+   MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY_ON_ACTIVITY = 3,
+};
+
+/* reg_ratr_op
+ * Note that Write operation may also be used for updating
+ * counter_set_type and counter_index. In this case all other
+ * fields must not be updated.
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, ratr, op, 0x00, 28, 4);
+
+/* reg_ratr_v
+ * Valid bit. Indicates if the adjacency entry is valid.
+ * Note: the device may need some time before reusing an invalidated
+ * entry. During this time the entry can not be reused. It is
+ * recommended to use another entry before reusing an invalidated
+ * entry (e.g. software can put it at the end of the list for
+ * reusing). Trying to access an invalidated entry not yet cleared
+ * by the device results with failure indicating "Try Again" status.
+ * When valid is '0' then egress_router_interface,trap_action,
+ * adjacency_parameters and counters are reserved
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ratr, v, 0x00, 24, 1);
+
+/* reg_ratr_a
+ * Activity. Set for new entries. Set if a packet lookup has hit on
+ * the specific entry. To clear the a bit, use "clear activity".
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, ratr, a, 0x00, 16, 1);
+
+/* reg_ratr_adjacency_index_low
+ * Bits 15:0 of index into the adjacency table.
+ * For SwitchX and SwitchX-2, the adjacency table is linear and
+ * used for adjacency entries only.
+ * For Spectrum, the index is to the KVD linear.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ratr, adjacency_index_low, 0x04, 0, 16);
+
+/* reg_ratr_egress_router_interface
+ * Range is 0 .. cap_max_router_interfaces - 1
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ratr, egress_router_interface, 0x08, 0, 16);
+
+enum mlxsw_reg_ratr_trap_action {
+   MLXSW_REG_RATR_TRAP_ACTION_NOP,
+   MLXSW_REG_RATR_TRAP_ACTION_TRAP,
+   MLXSW_REG_RATR_TRAP_ACTION_MIRROR_TO_CPU,
+   MLXSW_REG_RATR_TRAP_ACTION_MIRROR,
+   MLXSW_REG_RATR_TRAP_ACTION_DISCARD_ERRORS,
+};
+
+/* reg_ratr_trap_action
+ * see mlxsw_reg_ratr_trap_action
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ratr, trap_action, 0x0C, 28, 4);
+
+enum mlxsw_reg_ratr_trap_id {
+   MLXSW_REG_RATR_TRAP_ID_RTR_EGRESS0 = 0,
+   MLXSW_REG_RATR_TRAP_ID_RTR_EGRESS1 = 1,
+};
+
+/* reg_ratr_adjacency_index_high
+ * Bits 23:16 of the adjacency_index.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ratr, adjacency_index_high, 0x0C, 16, 8);
+
+/* reg_ratr_trap_id
+ * Trap ID to be reported to CPU.
+ * Trap-ID is RTR_EGRESS0 or RTR_EGRESS1.
+ * For trap_action of NOP, MIRROR and DISCARD_ERROR
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ratr, trap_id, 0x0C, 0, 8);
+
+/* reg_ratr_eth_destination_mac
+ * MAC address of the destination next-hop.
+ * Access: RW
+ */
+MLXSW_ITEM_BUF(reg, ratr, eth_destination_mac, 0x12, 6);
+
+static inline void
+mlxsw_reg_ratr_pack(char *payload,
+   enum mlxsw_reg_ratr_op op, bool valid,
+   u32 adjacency_index, u16 egress_rif)
+{
+   MLXSW_REG_ZERO(ratr, payload);
+   mlxsw_reg_ratr_op_set(payload, op);
+   mlxsw_reg_ratr_v_set(payload, valid);
+   mlxsw_reg_ratr_adjacency_index_low_set(payload, adjacency_index);
+   mlxsw_reg_ratr_adjacency_index_high_set(payload, adjacency_index >> 16);
+   mlxsw_reg_ratr_egress_router_interface_set(payload, egress_rif);
+}
+

  1   2   3   >