[PATCH net-next 6/8] bnxt_en: Include some hardware port statistics in ndo_get_stats64().

2016-03-05 Thread Michael Chan
Include some of the port error counters (e.g. crc) in ->ndo_get_stats64()
for the PF device.

Signed-off-by: Michael Chan 
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c 
b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index c5e812a..18a14a5 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -4934,6 +4934,22 @@ bnxt_get_stats64(struct net_device *dev, struct 
rtnl_link_stats64 *stats)
stats->tx_dropped += le64_to_cpu(hw_stats->tx_drop_pkts);
}
 
+   if (bp->flags & BNXT_FLAG_PORT_STATS) {
+   struct rx_port_stats *rx = bp->hw_rx_port_stats;
+   struct tx_port_stats *tx = bp->hw_tx_port_stats;
+
+   stats->rx_crc_errors = le64_to_cpu(rx->rx_fcs_err_frames);
+   stats->rx_frame_errors = le64_to_cpu(rx->rx_align_err_frames);
+   stats->rx_length_errors = le64_to_cpu(rx->rx_undrsz_frames) +
+ le64_to_cpu(rx->rx_ovrsz_frames) +
+ le64_to_cpu(rx->rx_runt_frames);
+   stats->rx_errors = le64_to_cpu(rx->rx_false_carrier_frames) +
+  le64_to_cpu(rx->rx_jbr_frames);
+   stats->collisions = le64_to_cpu(tx->tx_total_collisions);
+   stats->tx_fifo_errors = le64_to_cpu(tx->tx_fifo_underruns);
+   stats->tx_errors = le64_to_cpu(tx->tx_err);
+   }
+
return stats;
 }
 
-- 
1.8.3.1



[PATCH net-next 7/8] bnxt_en: Include hardware port statistics in ethtool -S.

2016-03-05 Thread Michael Chan
Include the more useful port statistics in ethtool -S for the PF device.

Signed-off-by: Michael Chan 
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 105 +-
 1 file changed, 103 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c 
b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index 87dcc62..888f03f 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -8,6 +8,7 @@
  */
 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -83,13 +84,99 @@ static int bnxt_set_coalesce(struct net_device *dev,
 
 #define BNXT_NUM_STATS 21
 
+#define BNXT_RX_STATS_OFFSET(counter)  \
+   (offsetof(struct rx_port_stats, counter) / 8)
+
+#define BNXT_RX_STATS_ENTRY(counter)   \
+   { BNXT_RX_STATS_OFFSET(counter), __stringify(counter) }
+
+#define BNXT_TX_STATS_OFFSET(counter)  \
+   ((offsetof(struct tx_port_stats, counter) + \
+ sizeof(struct rx_port_stats) + 512) / 8)
+
+#define BNXT_TX_STATS_ENTRY(counter)   \
+   { BNXT_TX_STATS_OFFSET(counter), __stringify(counter) }
+
+static const struct {
+   long offset;
+   char string[ETH_GSTRING_LEN];
+} bnxt_port_stats_arr[] = {
+   BNXT_RX_STATS_ENTRY(rx_64b_frames),
+   BNXT_RX_STATS_ENTRY(rx_65b_127b_frames),
+   BNXT_RX_STATS_ENTRY(rx_128b_255b_frames),
+   BNXT_RX_STATS_ENTRY(rx_256b_511b_frames),
+   BNXT_RX_STATS_ENTRY(rx_512b_1023b_frames),
+   BNXT_RX_STATS_ENTRY(rx_1024b_1518_frames),
+   BNXT_RX_STATS_ENTRY(rx_good_vlan_frames),
+   BNXT_RX_STATS_ENTRY(rx_1519b_2047b_frames),
+   BNXT_RX_STATS_ENTRY(rx_2048b_4095b_frames),
+   BNXT_RX_STATS_ENTRY(rx_4096b_9216b_frames),
+   BNXT_RX_STATS_ENTRY(rx_9217b_16383b_frames),
+   BNXT_RX_STATS_ENTRY(rx_total_frames),
+   BNXT_RX_STATS_ENTRY(rx_ucast_frames),
+   BNXT_RX_STATS_ENTRY(rx_mcast_frames),
+   BNXT_RX_STATS_ENTRY(rx_bcast_frames),
+   BNXT_RX_STATS_ENTRY(rx_fcs_err_frames),
+   BNXT_RX_STATS_ENTRY(rx_ctrl_frames),
+   BNXT_RX_STATS_ENTRY(rx_pause_frames),
+   BNXT_RX_STATS_ENTRY(rx_pfc_frames),
+   BNXT_RX_STATS_ENTRY(rx_align_err_frames),
+   BNXT_RX_STATS_ENTRY(rx_ovrsz_frames),
+   BNXT_RX_STATS_ENTRY(rx_jbr_frames),
+   BNXT_RX_STATS_ENTRY(rx_mtu_err_frames),
+   BNXT_RX_STATS_ENTRY(rx_tagged_frames),
+   BNXT_RX_STATS_ENTRY(rx_double_tagged_frames),
+   BNXT_RX_STATS_ENTRY(rx_good_frames),
+   BNXT_RX_STATS_ENTRY(rx_undrsz_frames),
+   BNXT_RX_STATS_ENTRY(rx_eee_lpi_events),
+   BNXT_RX_STATS_ENTRY(rx_eee_lpi_duration),
+   BNXT_RX_STATS_ENTRY(rx_bytes),
+   BNXT_RX_STATS_ENTRY(rx_runt_bytes),
+   BNXT_RX_STATS_ENTRY(rx_runt_frames),
+
+   BNXT_TX_STATS_ENTRY(tx_64b_frames),
+   BNXT_TX_STATS_ENTRY(tx_65b_127b_frames),
+   BNXT_TX_STATS_ENTRY(tx_128b_255b_frames),
+   BNXT_TX_STATS_ENTRY(tx_256b_511b_frames),
+   BNXT_TX_STATS_ENTRY(tx_512b_1023b_frames),
+   BNXT_TX_STATS_ENTRY(tx_1024b_1518_frames),
+   BNXT_TX_STATS_ENTRY(tx_good_vlan_frames),
+   BNXT_TX_STATS_ENTRY(tx_1519b_2047_frames),
+   BNXT_TX_STATS_ENTRY(tx_2048b_4095b_frames),
+   BNXT_TX_STATS_ENTRY(tx_4096b_9216b_frames),
+   BNXT_TX_STATS_ENTRY(tx_9217b_16383b_frames),
+   BNXT_TX_STATS_ENTRY(tx_good_frames),
+   BNXT_TX_STATS_ENTRY(tx_total_frames),
+   BNXT_TX_STATS_ENTRY(tx_ucast_frames),
+   BNXT_TX_STATS_ENTRY(tx_mcast_frames),
+   BNXT_TX_STATS_ENTRY(tx_bcast_frames),
+   BNXT_TX_STATS_ENTRY(tx_pause_frames),
+   BNXT_TX_STATS_ENTRY(tx_pfc_frames),
+   BNXT_TX_STATS_ENTRY(tx_jabber_frames),
+   BNXT_TX_STATS_ENTRY(tx_fcs_err_frames),
+   BNXT_TX_STATS_ENTRY(tx_err),
+   BNXT_TX_STATS_ENTRY(tx_fifo_underruns),
+   BNXT_TX_STATS_ENTRY(tx_eee_lpi_events),
+   BNXT_TX_STATS_ENTRY(tx_eee_lpi_duration),
+   BNXT_TX_STATS_ENTRY(tx_total_collisions),
+   BNXT_TX_STATS_ENTRY(tx_bytes),
+};
+
+#define BNXT_NUM_PORT_STATS ARRAY_SIZE(bnxt_port_stats_arr)
+
 static int bnxt_get_sset_count(struct net_device *dev, int sset)
 {
struct bnxt *bp = netdev_priv(dev);
 
switch (sset) {
-   case ETH_SS_STATS:
-   return BNXT_NUM_STATS * bp->cp_nr_rings;
+   case ETH_SS_STATS: {
+   int num_stats = BNXT_NUM_STATS * bp->cp_nr_rings;
+
+   if (bp->flags & BNXT_FLAG_PORT_STATS)
+   num_stats += BNXT_NUM_PORT_STATS;
+
+   return num_stats;
+   }
default:
return -EOPNOTSUPP;
}
@@ -118,6 +205,14 @@ static void bnxt_get_ethtool_stats(struct net_device *dev,
buf[j] = le64_to_cpu(hw_stats[k]);
buf[j++] = cpr->rx_l4_csum_errors;
}
+   if (bp->flags & BNXT_FLAG_PORT_STATS) {
+   __le64 *port_stats = (__le64 *)bp->hw_rx_port_stats;
+

[PATCH net-next 4/8] bnxt_en: Extend autoneg to all speeds.

2016-03-05 Thread Michael Chan
Allow all autoneg speeds aupported by firmware to be advertised.  If
the advertising parameter is 0, then all supported speeds will be
advertised.

Remove BNXT_ALL_COPPER_ETHTOOL_SPEED which is no longer used as all
supported speeds can be advertised.

Signed-off-by: Michael Chan 
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.h |  4 
 drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 14 --
 2 files changed, 4 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h 
b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 2f24e4e..c4424b6 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -757,10 +757,6 @@ struct bnxt_ntuple_filter {
 #define BNXT_FLTR_UPDATE   1
 };
 
-#define BNXT_ALL_COPPER_ETHTOOL_SPEED  \
-   (ADVERTISED_100baseT_Full | ADVERTISED_1000baseT_Full | \
-ADVERTISED_1baseT_Full)
-
 struct bnxt_link_info {
u8  media_type;
u8  transceiver;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c 
b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index e2fe967..87dcc62 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -696,16 +696,10 @@ static int bnxt_set_settings(struct net_device *dev, 
struct ethtool_cmd *cmd)
return rc;
 
if (cmd->autoneg == AUTONEG_ENABLE) {
-   if (link_info->media_type != PORT_PHY_QCFG_RESP_MEDIA_TYPE_TP) {
-   netdev_err(dev, "Media type doesn't support autoneg\n");
-   rc = -EINVAL;
-   goto set_setting_exit;
-   }
-   if (cmd->advertising & ~(BNXT_ALL_COPPER_ETHTOOL_SPEED |
-ADVERTISED_Autoneg |
-ADVERTISED_TP |
-ADVERTISED_Pause |
-ADVERTISED_Asym_Pause)) {
+   u32 supported_spds = bnxt_fw_to_ethtool_support_spds(link_info);
+
+   if (cmd->advertising & ~(supported_spds | ADVERTISED_Autoneg |
+ADVERTISED_TP | ADVERTISED_FIBRE)) {
netdev_err(dev, "Unsupported advertising mask (adv: 
0x%x)\n",
   cmd->advertising);
rc = -EINVAL;
-- 
1.8.3.1



[PATCH net-next 0/8] bnxt_en: Updates for net-next.

2016-03-05 Thread Michael Chan
Updates to support autoneg for all supported speeds, add PF port statistics,
and Advanced Error Reporting.

Michael Chan (8):
  bnxt_en: Refactor bnxt_fw_to_ethtool_advertised_spds().
  bnxt_en: Add reporting of link partner advertisement.
  bnxt_en: Use common function to get ethtool supported flags.
  bnxt_en: Extend autoneg to all speeds.
  bnxt_en: Add port statistics support.
  bnxt_en: Include some hardware port statistics in ndo_get_stats64().
  bnxt_en: Include hardware port statistics in ethtool -S.
  bnxt_en: Enable AER support.

 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 178 +++
 drivers/net/ethernet/broadcom/bnxt/bnxt.h |  17 +-
 drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 201 +-
 3 files changed, 344 insertions(+), 52 deletions(-)

-- 
1.8.3.1



[PATCH net-next 5/8] bnxt_en: Add port statistics support.

2016-03-05 Thread Michael Chan
Gather periodic port statistics if the device is PF and link is up.  This
is triggered in bnxt_timer() every one second to request firmware to DMA
the counters.

Signed-off-by: Michael Chan 
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 50 +++
 drivers/net/ethernet/broadcom/bnxt/bnxt.h | 11 ++-
 2 files changed, 60 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c 
b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 072a12f..c5e812a 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -2361,6 +2361,14 @@ static void bnxt_free_stats(struct bnxt *bp)
u32 size, i;
struct pci_dev *pdev = bp->pdev;
 
+   if (bp->hw_rx_port_stats) {
+   dma_free_coherent(>dev, bp->hw_port_stats_size,
+ bp->hw_rx_port_stats,
+ bp->hw_rx_port_stats_map);
+   bp->hw_rx_port_stats = NULL;
+   bp->flags &= ~BNXT_FLAG_PORT_STATS;
+   }
+
if (!bp->bnapi)
return;
 
@@ -2397,6 +2405,24 @@ static int bnxt_alloc_stats(struct bnxt *bp)
 
cpr->hw_stats_ctx_id = INVALID_STATS_CTX_ID;
}
+
+   if (BNXT_PF(bp)) {
+   bp->hw_port_stats_size = sizeof(struct rx_port_stats) +
+sizeof(struct tx_port_stats) + 1024;
+
+   bp->hw_rx_port_stats =
+   dma_alloc_coherent(>dev, bp->hw_port_stats_size,
+  >hw_rx_port_stats_map,
+  GFP_KERNEL);
+   if (!bp->hw_rx_port_stats)
+   return -ENOMEM;
+
+   bp->hw_tx_port_stats = (void *)(bp->hw_rx_port_stats + 1) +
+  512;
+   bp->hw_tx_port_stats_map = bp->hw_rx_port_stats_map +
+  sizeof(struct rx_port_stats) + 512;
+   bp->flags |= BNXT_FLAG_PORT_STATS;
+   }
return 0;
 }
 
@@ -3833,6 +3859,23 @@ hwrm_ver_get_exit:
return rc;
 }
 
+static int bnxt_hwrm_port_qstats(struct bnxt *bp)
+{
+   int rc;
+   struct bnxt_pf_info *pf = >pf;
+   struct hwrm_port_qstats_input req = {0};
+
+   if (!(bp->flags & BNXT_FLAG_PORT_STATS))
+   return 0;
+
+   bnxt_hwrm_cmd_hdr_init(bp, , HWRM_PORT_QSTATS, -1, -1);
+   req.port_id = cpu_to_le16(pf->port_id);
+   req.tx_stat_host_addr = cpu_to_le64(bp->hw_tx_port_stats_map);
+   req.rx_stat_host_addr = cpu_to_le64(bp->hw_rx_port_stats_map);
+   rc = hwrm_send_message(bp, , sizeof(req), HWRM_CMD_TIMEOUT);
+   return rc;
+}
+
 static void bnxt_hwrm_free_tunnel_ports(struct bnxt *bp)
 {
if (bp->vxlan_port_cnt) {
@@ -5231,6 +5274,10 @@ static void bnxt_timer(unsigned long data)
if (atomic_read(>intr_sem) != 0)
goto bnxt_restart_timer;
 
+   if (bp->link_info.link_up && (bp->flags & BNXT_FLAG_PORT_STATS)) {
+   set_bit(BNXT_PERIODIC_STATS_SP_EVENT, >sp_event);
+   schedule_work(>sp_task);
+   }
 bnxt_restart_timer:
mod_timer(>timer, jiffies + bp->current_interval);
 }
@@ -5282,6 +5329,9 @@ static void bnxt_sp_task(struct work_struct *work)
rtnl_unlock();
}
 
+   if (test_and_clear_bit(BNXT_PERIODIC_STATS_SP_EVENT, >sp_event))
+   bnxt_hwrm_port_qstats(bp);
+
smp_mb__before_atomic();
clear_bit(BNXT_STATE_IN_SP_TASK, >state);
 }
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h 
b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index c4424b6..ec04c47 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -873,6 +873,7 @@ struct bnxt {
#define BNXT_FLAG_MSIX_CAP  0x80
#define BNXT_FLAG_RFS   0x100
#define BNXT_FLAG_SHARED_RINGS  0x200
+   #define BNXT_FLAG_PORT_STATS0x400
 
#define BNXT_FLAG_ALL_CONFIG_FEATS (BNXT_FLAG_TPA | \
BNXT_FLAG_RFS | \
@@ -925,7 +926,7 @@ struct bnxt {
struct bnxt_queue_info  q_info[BNXT_MAX_QUEUE];
 
unsigned intcurrent_interval;
-#define BNXT_TIMER_INTERVAL(HZ / 2)
+#define BNXT_TIMER_INTERVALHZ
 
struct timer_list   timer;
 
@@ -945,6 +946,13 @@ struct bnxt {
void*hwrm_dbg_resp_addr;
dma_addr_t  hwrm_dbg_resp_dma_addr;
 #define HWRM_DBG_REG_BUF_SIZE  128
+
+   struct rx_port_stats*hw_rx_port_stats;
+   struct tx_port_stats*hw_tx_port_stats;
+   dma_addr_t  hw_rx_port_stats_map;
+   dma_addr_t  hw_tx_port_stats_map;
+   int hw_port_stats_size;
+
int hwrm_cmd_timeout;
struct mutex  

[PATCH net-next 8/8] bnxt_en: Enable AER support.

2016-03-05 Thread Michael Chan
From: Satish Baddipadige 

Add pci_error_handler callbacks to support for pcie advanced error
recovery.

Signed-off-by: Satish Baddipadige 
Signed-off-by: Michael Chan 
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 109 ++
 1 file changed, 109 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c 
b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 18a14a5..0cd55de 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -5411,6 +5411,8 @@ static int bnxt_init_board(struct pci_dev *pdev, struct 
net_device *dev)
goto init_err_release;
}
 
+   pci_enable_pcie_error_reporting(pdev);
+
INIT_WORK(>sp_task, bnxt_sp_task);
 
spin_lock_init(>ntp_fltr_lock);
@@ -5790,6 +5792,7 @@ static void bnxt_remove_one(struct pci_dev *pdev)
if (BNXT_PF(bp))
bnxt_sriov_disable(bp);
 
+   pci_disable_pcie_error_reporting(pdev);
unregister_netdev(dev);
cancel_work_sync(>sp_task);
bp->sp_event = 0;
@@ -6029,11 +6032,117 @@ init_err_free:
return rc;
 }
 
+/**
+ * bnxt_io_error_detected - called when PCI error is detected
+ * @pdev: Pointer to PCI device
+ * @state: The current pci connection state
+ *
+ * This function is called after a PCI bus error affecting
+ * this device has been detected.
+ */
+static pci_ers_result_t bnxt_io_error_detected(struct pci_dev *pdev,
+  pci_channel_state_t state)
+{
+   struct net_device *netdev = pci_get_drvdata(pdev);
+
+   netdev_info(netdev, "PCI I/O error detected\n");
+
+   rtnl_lock();
+   netif_device_detach(netdev);
+
+   if (state == pci_channel_io_perm_failure) {
+   rtnl_unlock();
+   return PCI_ERS_RESULT_DISCONNECT;
+   }
+
+   if (netif_running(netdev))
+   bnxt_close(netdev);
+
+   pci_disable_device(pdev);
+   rtnl_unlock();
+
+   /* Request a slot slot reset. */
+   return PCI_ERS_RESULT_NEED_RESET;
+}
+
+/**
+ * bnxt_io_slot_reset - called after the pci bus has been reset.
+ * @pdev: Pointer to PCI device
+ *
+ * Restart the card from scratch, as if from a cold-boot.
+ * At this point, the card has exprienced a hard reset,
+ * followed by fixups by BIOS, and has its config space
+ * set up identically to what it was at cold boot.
+ */
+static pci_ers_result_t bnxt_io_slot_reset(struct pci_dev *pdev)
+{
+   struct net_device *netdev = pci_get_drvdata(pdev);
+   struct bnxt *bp = netdev_priv(netdev);
+   int err = 0;
+   pci_ers_result_t result = PCI_ERS_RESULT_DISCONNECT;
+
+   netdev_info(bp->dev, "PCI Slot Reset\n");
+
+   rtnl_lock();
+
+   if (pci_enable_device(pdev)) {
+   dev_err(>dev,
+   "Cannot re-enable PCI device after reset.\n");
+   } else {
+   pci_set_master(pdev);
+
+   if (netif_running(netdev))
+   err = bnxt_open(netdev);
+
+   if (!err)
+   result = PCI_ERS_RESULT_RECOVERED;
+   }
+
+   if (result != PCI_ERS_RESULT_RECOVERED && netif_running(netdev))
+   dev_close(netdev);
+
+   rtnl_unlock();
+
+   err = pci_cleanup_aer_uncorrect_error_status(pdev);
+   if (err) {
+   dev_err(>dev,
+   "pci_cleanup_aer_uncorrect_error_status failed 0x%0x\n",
+err); /* non-fatal, continue */
+   }
+
+   return PCI_ERS_RESULT_RECOVERED;
+}
+
+/**
+ * bnxt_io_resume - called when traffic can start flowing again.
+ * @pdev: Pointer to PCI device
+ *
+ * This callback is called when the error recovery driver tells
+ * us that its OK to resume normal operation.
+ */
+static void bnxt_io_resume(struct pci_dev *pdev)
+{
+   struct net_device *netdev = pci_get_drvdata(pdev);
+
+   rtnl_lock();
+
+   netif_device_attach(netdev);
+
+   rtnl_unlock();
+}
+
+static const struct pci_error_handlers bnxt_err_handler = {
+   .error_detected = bnxt_io_error_detected,
+   .slot_reset = bnxt_io_slot_reset,
+   .resume = bnxt_io_resume
+};
+
 static struct pci_driver bnxt_pci_driver = {
.name   = DRV_MODULE_NAME,
.id_table   = bnxt_pci_tbl,
.probe  = bnxt_init_one,
.remove = bnxt_remove_one,
+   .err_handler= _err_handler,
 #if defined(CONFIG_BNXT_SRIOV)
.sriov_configure = bnxt_sriov_configure,
 #endif
-- 
1.8.3.1



[PATCH net-next 2/8] bnxt_en: Add reporting of link partner advertisement.

2016-03-05 Thread Michael Chan
And report actual pause settings to ETHTOOL_GPAUSEPARAM to let ethtool
resolve the actual pause settings.

Signed-off-by: Michael Chan 
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c |  3 +++
 drivers/net/ethernet/broadcom/bnxt/bnxt.h |  2 ++
 drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 20 ++--
 3 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c 
b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index aa6a318..072a12f 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -4467,6 +4467,7 @@ static int bnxt_update_link(struct bnxt *bp, bool 
chng_link_state)
link_info->pause = resp->pause;
link_info->auto_mode = resp->auto_mode;
link_info->auto_pause_setting = resp->auto_pause;
+   link_info->lp_pause = resp->link_partner_adv_pause;
link_info->force_pause_setting = resp->force_pause;
link_info->duplex_setting = resp->duplex;
if (link_info->phy_link_status == BNXT_LINK_LINK)
@@ -4477,6 +4478,8 @@ static int bnxt_update_link(struct bnxt *bp, bool 
chng_link_state)
link_info->auto_link_speed = le16_to_cpu(resp->auto_link_speed);
link_info->support_speeds = le16_to_cpu(resp->support_speeds);
link_info->auto_link_speeds = le16_to_cpu(resp->auto_link_speed_mask);
+   link_info->lp_auto_link_speeds =
+   le16_to_cpu(resp->link_partner_adv_speeds);
link_info->preemphasis = le32_to_cpu(resp->preemphasis);
link_info->phy_ver[0] = resp->phy_maj;
link_info->phy_ver[1] = resp->phy_min;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h 
b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 9aa38f5..2f24e4e 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -780,6 +780,7 @@ struct bnxt_link_info {
 #define BNXT_LINK_PAUSE_RX PORT_PHY_QCFG_RESP_PAUSE_RX
 #define BNXT_LINK_PAUSE_BOTH   (PORT_PHY_QCFG_RESP_PAUSE_RX | \
 PORT_PHY_QCFG_RESP_PAUSE_TX)
+   u8  lp_pause;
u8  auto_pause_setting;
u8  force_pause_setting;
u8  duplex_setting;
@@ -814,6 +815,7 @@ struct bnxt_link_info {
 #define BNXT_LINK_SPEED_MSK_25GB PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS_25GB
 #define BNXT_LINK_SPEED_MSK_40GB PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS_40GB
 #define BNXT_LINK_SPEED_MSK_50GB PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS_50GB
+   u16 lp_auto_link_speeds;
u16 auto_link_speed;
u16 force_link_speed;
u32 preemphasis;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c 
b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index a6ee26a..591c290 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -553,6 +553,17 @@ static u32 bnxt_fw_to_ethtool_advertised_spds(struct 
bnxt_link_info *link_info)
return _bnxt_fw_to_ethtool_adv_spds(fw_speeds, fw_pause);
 }
 
+static u32 bnxt_fw_to_ethtool_lp_adv(struct bnxt_link_info *link_info)
+{
+   u16 fw_speeds = link_info->lp_auto_link_speeds;
+   u8 fw_pause = 0;
+
+   if (link_info->autoneg & BNXT_AUTONEG_FLOW_CTRL)
+   fw_pause = link_info->lp_pause;
+
+   return _bnxt_fw_to_ethtool_adv_spds(fw_speeds, fw_pause);
+}
+
 u32 bnxt_fw_to_ethtool_speed(u16 fw_link_speed)
 {
switch (fw_link_speed) {
@@ -594,6 +605,9 @@ static int bnxt_get_settings(struct net_device *dev, struct 
ethtool_cmd *cmd)
bnxt_fw_to_ethtool_advertised_spds(link_info);
cmd->advertising |= ADVERTISED_Autoneg;
cmd->autoneg = AUTONEG_ENABLE;
+   if (link_info->phy_link_status == BNXT_LINK_LINK)
+   cmd->lp_advertising =
+   bnxt_fw_to_ethtool_lp_adv(link_info);
} else {
cmd->autoneg = AUTONEG_DISABLE;
cmd->advertising = 0;
@@ -757,8 +771,10 @@ static void bnxt_get_pauseparam(struct net_device *dev,
if (BNXT_VF(bp))
return;
epause->autoneg = !!(link_info->autoneg & BNXT_AUTONEG_FLOW_CTRL);
-   epause->rx_pause = ((link_info->pause & BNXT_LINK_PAUSE_RX) != 0);
-   epause->tx_pause = ((link_info->pause & BNXT_LINK_PAUSE_TX) != 0);
+   epause->rx_pause =
+   ((link_info->auto_pause_setting & BNXT_LINK_PAUSE_RX) != 0);
+   epause->tx_pause =
+   ((link_info->auto_pause_setting & BNXT_LINK_PAUSE_TX) != 0);
 }
 
 static int bnxt_set_pauseparam(struct net_device *dev,
-- 
1.8.3.1



[PATCH net-next 1/8] bnxt_en: Refactor bnxt_fw_to_ethtool_advertised_spds().

2016-03-05 Thread Michael Chan
Include the conversion of pause bits and add one extra call layer so
that the same refactored function can be reused to get the link partner
advertisement bits.

Signed-off-by: Michael Chan 
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 33 ++-
 1 file changed, 20 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c 
b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index 84ea26d..a6ee26a 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -515,9 +515,8 @@ static u32 bnxt_fw_to_ethtool_support_spds(struct 
bnxt_link_info *link_info)
return speed_mask;
 }
 
-static u32 bnxt_fw_to_ethtool_advertised_spds(struct bnxt_link_info *link_info)
+static u32 _bnxt_fw_to_ethtool_adv_spds(u16 fw_speeds, u8 fw_pause)
 {
-   u16 fw_speeds = link_info->auto_link_speeds;
u32 speed_mask = 0;
 
/* TODO: support 25GB, 40GB, 50GB with different cable type */
@@ -532,9 +531,28 @@ static u32 bnxt_fw_to_ethtool_advertised_spds(struct 
bnxt_link_info *link_info)
speed_mask |= ADVERTISED_1baseT_Full;
if (fw_speeds & BNXT_LINK_SPEED_MSK_40GB)
speed_mask |= ADVERTISED_4baseCR4_Full;
+
+   if ((fw_pause & BNXT_LINK_PAUSE_BOTH) == BNXT_LINK_PAUSE_BOTH)
+   speed_mask |= ADVERTISED_Pause;
+   else if (fw_pause & BNXT_LINK_PAUSE_TX)
+   speed_mask |= ADVERTISED_Asym_Pause;
+   else if (fw_pause & BNXT_LINK_PAUSE_RX)
+   speed_mask |= ADVERTISED_Pause | ADVERTISED_Asym_Pause;
+
return speed_mask;
 }
 
+static u32 bnxt_fw_to_ethtool_advertised_spds(struct bnxt_link_info *link_info)
+{
+   u16 fw_speeds = link_info->auto_link_speeds;
+   u8 fw_pause = 0;
+
+   if (link_info->autoneg & BNXT_AUTONEG_FLOW_CTRL)
+   fw_pause = link_info->auto_pause_setting;
+
+   return _bnxt_fw_to_ethtool_adv_spds(fw_speeds, fw_pause);
+}
+
 u32 bnxt_fw_to_ethtool_speed(u16 fw_link_speed)
 {
switch (fw_link_speed) {
@@ -580,17 +598,6 @@ static int bnxt_get_settings(struct net_device *dev, 
struct ethtool_cmd *cmd)
cmd->autoneg = AUTONEG_DISABLE;
cmd->advertising = 0;
}
-   if (link_info->autoneg & BNXT_AUTONEG_FLOW_CTRL) {
-   if ((link_info->auto_pause_setting & BNXT_LINK_PAUSE_BOTH) ==
-   BNXT_LINK_PAUSE_BOTH) {
-   cmd->advertising |= ADVERTISED_Pause;
-   } else {
-   cmd->advertising |= ADVERTISED_Asym_Pause;
-   if (link_info->auto_pause_setting &
-   BNXT_LINK_PAUSE_RX)
-   cmd->advertising |= ADVERTISED_Pause;
-   }
-   }
 
cmd->port = PORT_NONE;
if (link_info->media_type == PORT_PHY_QCFG_RESP_MEDIA_TYPE_TP) {
-- 
1.8.3.1



[PATCH net-next 3/8] bnxt_en: Use common function to get ethtool supported flags.

2016-03-05 Thread Michael Chan
The supported bits and advertising bits in ethtool have the same
definitions.  The same is true for the firmware bits.  So use the
common function to handle the conversion for both supported and
advertising bits.

Signed-off-by: Michael Chan 
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 29 +++
 1 file changed, 9 insertions(+), 20 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c 
b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index 591c290..e2fe967 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -496,25 +496,6 @@ static void bnxt_get_drvinfo(struct net_device *dev,
kfree(pkglog);
 }
 
-static u32 bnxt_fw_to_ethtool_support_spds(struct bnxt_link_info *link_info)
-{
-   u16 fw_speeds = link_info->support_speeds;
-   u32 speed_mask = 0;
-
-   if (fw_speeds & BNXT_LINK_SPEED_MSK_100MB)
-   speed_mask |= SUPPORTED_100baseT_Full;
-   if (fw_speeds & BNXT_LINK_SPEED_MSK_1GB)
-   speed_mask |= SUPPORTED_1000baseT_Full;
-   if (fw_speeds & BNXT_LINK_SPEED_MSK_2_5GB)
-   speed_mask |= SUPPORTED_2500baseX_Full;
-   if (fw_speeds & BNXT_LINK_SPEED_MSK_10GB)
-   speed_mask |= SUPPORTED_1baseT_Full;
-   if (fw_speeds & BNXT_LINK_SPEED_MSK_40GB)
-   speed_mask |= SUPPORTED_4baseCR4_Full;
-
-   return speed_mask;
-}
-
 static u32 _bnxt_fw_to_ethtool_adv_spds(u16 fw_speeds, u8 fw_pause)
 {
u32 speed_mask = 0;
@@ -564,6 +545,15 @@ static u32 bnxt_fw_to_ethtool_lp_adv(struct bnxt_link_info 
*link_info)
return _bnxt_fw_to_ethtool_adv_spds(fw_speeds, fw_pause);
 }
 
+static u32 bnxt_fw_to_ethtool_support_spds(struct bnxt_link_info *link_info)
+{
+   u16 fw_speeds = link_info->support_speeds;
+   u32 supported;
+
+   supported = _bnxt_fw_to_ethtool_adv_spds(fw_speeds, 0);
+   return (supported | SUPPORTED_Pause | SUPPORTED_Asym_Pause);
+}
+
 u32 bnxt_fw_to_ethtool_speed(u16 fw_link_speed)
 {
switch (fw_link_speed) {
@@ -595,7 +585,6 @@ static int bnxt_get_settings(struct net_device *dev, struct 
ethtool_cmd *cmd)
u16 ethtool_speed;
 
cmd->supported = bnxt_fw_to_ethtool_support_spds(link_info);
-   cmd->supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
 
if (link_info->auto_link_speeds)
cmd->supported |= SUPPORTED_Autoneg;
-- 
1.8.3.1



Re: [PATCH net] bridge: a netlink notification should be sent whenever those attributes change

2016-03-05 Thread Xin Long
On Sun, Mar 6, 2016 at 3:43 AM, Nikolay Aleksandrov
 wrote:
> On 03/05/2016 03:44 PM, Xin Long wrote:
>
> See the comment above dev_ifsioc:
> /*
>  *  Perform the SIOCxIFxxx calls, inside rtnl_lock()
>  */
> static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
> {
> ...
> it is usually called like:
>rtnl_lock();
>ret = dev_ifsioc(net, , cmd);
>rtnl_unlock();
> And also you cannot be calling netdevice notifiers without RTNL. So in any
> case you do need it here as well, in fact you'll surely hit the ASSERT_RTNL();
> in call_netdevice_notifiers_info if you do so, thus I'm not sure how this
> patch was actually tested.
>
yes, I can see it now.
I will repost this patch with rtnl_lock in br_sysfs_*.
Thanks Nik.

>


Re: [PATCH] ipv6: Fix the pmtu path for connected UDP socket

2016-03-05 Thread David Miller
From: Wei Wang 
Date: Wed,  2 Mar 2016 11:19:21 -0800

> @@ -566,7 +567,16 @@ void __udp6_lib_err(struct sk_buff *skb, struct 
> inet6_skb_parm *opt,
>   if (type == ICMPV6_PKT_TOOBIG) {
>   if (!ip6_sk_accept_pmtu(sk))
>   goto out;
> - ip6_sk_update_pmtu(skb, sk, info);
> + bh_lock_sock(sk);
> + if (sk->sk_state == TCP_ESTABLISHED &&
> + !sock_owned_by_user(sk) &&
> + ipv6_addr_equal(saddr, >sk_v6_rcv_saddr) &&
> + ipv6_addr_equal(daddr, >sk_v6_daddr) &&
> + uh->dest == sk->sk_dport)
> + inet6_csk_update_pmtu(sk, ntohl(info));

If I apply this patch it will hide a bug.

Why isn't ip6_sk_update_pmtu() matching the same route as the
one attached to the socket?

I'd prefer you figure out what part of the lookup key used is
wrong, and fix that instead.

Thanks.


Re: [PATCH net] sctp: use gfp insteaad of GFP_NOWAIT in idr_alloc_cyclic when sctp_assoc_set_id

2016-03-05 Thread Xin Long
On Sun, Mar 6, 2016 at 12:42 AM, Eric Dumazet  wrote:
> On sam., 2016-03-05 at 23:59 +0800, Xin Long wrote:
>
> Are you sure idr_alloc(... GFP_KERNEL) makes sense inside spin_lock_bh()
> section ?
>
> idr_alloc() has :
>
> might_sleep_if(gfpflags_allow_blocking(gfp_mask));
>
> A debug kernel (CONFIG_DEBUG_ATOMIC_SLEEP=y) should probably complain at
> this point ?
>
OK, I got you, does it make sense to you if I just change GFP_NOWAIT
to GFP_ATOMIC
when sctp_assoc_set_id call idr_alloc_cyclic()?  which also can avoid
this call trace in my
test.

>
>


Re: PHY hardware reset

2016-03-05 Thread Andrew Lunn
On Sun, Mar 06, 2016 at 01:09:40AM +0300, Sergei Shtylyov wrote:
> Hello.
> 
>I have a need to de-assert the active-low PHY hardware reset
> signal (mapped to a GPIO) before the MDIO bus scansince it's left
> asserted by the bootloader (U-Boot).

Hi Sergei

There is a thread about power sequencing for USB devices going on at
the moment. You have a very similar issue. The solution should be
generic enough to apply to MDIO busses as well as USB busses.

Search for:

[PATCH 1/3] usb: core: add power sequence for USB devices

   Andrew


RE: [PATCH RESEND] net:fec:Fix error checking in the function fec_enet_init

2016-03-05 Thread Fugang Duan
From: Nicholas Krause  Sent: Saturday, March 05, 2016 4:00 
AM
> To: da...@davemloft.net
> Cc: b38...@freescale.com; and...@lunn.ch; fabio.este...@freescale.com;
> l.st...@pengutronix.de; rmk+ker...@arm.linux.org.uk; trem...@gmail.com;
> johan...@sipsolutions.net; u.kleine-koe...@pengutronix.de;
> haoke...@gmail.com; netdev@vger.kernel.org; linux-ker...@vger.kernel.org
> Subject: [PATCH RESEND] net:fec:Fix error checking in the function
> fec_enet_init
> 
> This fixes error checking in the function fec_enet_init to properly check if 
> the
> internal call to the function fec_enet_alloc_queue fails and if so immediately
> return the error code to the caller for it to handle it's own intended error 
> paths.
> 
> Signed-off-by: Nicholas Krause 
> ---
>  drivers/net/ethernet/freescale/fec_main.c | 5 -
>  1 file changed, 4 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/ethernet/freescale/fec_main.c
> b/drivers/net/ethernet/freescale/fec_main.c
> index b349e6f..18c625f 100644
> --- a/drivers/net/ethernet/freescale/fec_main.c
> +++ b/drivers/net/ethernet/freescale/fec_main.c
> @@ -3123,6 +3123,7 @@ static int fec_enet_init(struct net_device *ndev)
>   dma_addr_t bd_dma;
>   int bd_size;
>   unsigned int i;
> + int ret;
> 
>  #if defined(CONFIG_ARM)
>   fep->rx_align = 0xf;
> @@ -3132,7 +3133,9 @@ static int fec_enet_init(struct net_device *ndev)
>   fep->tx_align = 0x3;
>  #endif
> 
> - fec_enet_alloc_queue(ndev);
> + ret = fec_enet_alloc_queue(ndev);
> + if (ret)
> + return ret;
> 
>   if (fep->bufdesc_ex)
>   fep->bufdesc_size = sizeof(struct bufdesc_ex);
> --
> 2.1.4
Thanks.

Acked-by: Fugang Duan 


RE: [PATCH net-next V2 07/16] net: fec: set cbd_sc without relying on previous value

2016-03-05 Thread Fugang Duan
 From: Troy Kisky  Sent: Saturday, March 05, 
2016 12:08 AM
> To: Fugang Duan ; netdev@vger.kernel.org;
> da...@davemloft.net; b38...@freescale.com
> Cc: fabio.este...@freescale.com; l.st...@pengutronix.de; and...@lunn.ch;
> trem...@gmail.com; li...@arm.linux.org.uk; linux-arm-
> ker...@lists.infradead.org; l...@boundarydevices.com; shawn...@kernel.org;
> johan...@sipsolutions.net; stillcompil...@gmail.com;
> sergei.shtyl...@cogentembedded.com; a...@arndb.de
> Subject: Re: [PATCH net-next V2 07/16] net: fec: set cbd_sc without relying on
> previous value
> 
> On 3/4/2016 2:29 AM, Fugang Duan wrote:
> > From: Troy Kisky  Sent: Thursday,
> > February 25, 2016 8:37 AM
> >> To: netdev@vger.kernel.org; da...@davemloft.net;
> b38...@freescale.com
> >> Cc: fabio.este...@freescale.com; l.st...@pengutronix.de;
> >> and...@lunn.ch; trem...@gmail.com; li...@arm.linux.org.uk; linux-arm-
> >> ker...@lists.infradead.org; l...@boundarydevices.com;
> >> shawn...@kernel.org; johan...@sipsolutions.net;
> >> stillcompil...@gmail.com; sergei.shtyl...@cogentembedded.com;
> >> a...@arndb.de; Troy Kisky 
> >> Subject: [PATCH net-next V2 07/16] net: fec: set cbd_sc without
> >> relying on previous value
> >>
> >> Relying on the wrap bit to stay valid once initialized when the
> >> controller also writes to this byte seems undesirable since we can
> >> easily know what the value should be.
> >>
> >> Signed-off-by: Troy Kisky 
> >> ---
> >>  drivers/net/ethernet/freescale/fec_main.c | 38
> >> +--
> >>  1 file changed, 11 insertions(+), 27 deletions(-)
> >>
> >> diff --git a/drivers/net/ethernet/freescale/fec_main.c
> >> b/drivers/net/ethernet/freescale/fec_main.c
> >> index 791f385..6ceb5f9 100644
> >> --- a/drivers/net/ethernet/freescale/fec_main.c
> >> +++ b/drivers/net/ethernet/freescale/fec_main.c
> >> @@ -340,9 +340,8 @@ fec_enet_txq_submit_frag_skb(struct
> >> fec_enet_priv_tx_q *txq,
> >>bdp = fec_enet_get_nextdesc(bdp, >bd);
> >>ebdp = (struct bufdesc_ex *)bdp;
> >>
> >> -  status = fec16_to_cpu(bdp->cbd_sc);
> >> -  status &= ~BD_ENET_TX_STATS;
> >> -  status |= (BD_ENET_TX_TC | BD_ENET_TX_READY);
> >> +  status = BD_ENET_TX_TC | BD_ENET_TX_READY |
> >> +  ((bdp == txq->bd.last) ? BD_SC_WRAP : 0);
> >>frag_len = skb_shinfo(skb)->frags[frag].size;
> >>
> >>/* Handle the last BD specially */ @@ -436,8 +435,6 @@ static
> int
> >> fec_enet_txq_submit_skb(struct fec_enet_priv_tx_q *txq,
> >>/* Fill in a Tx ring entry */
> >>bdp = txq->bd.cur;
> >>last_bdp = bdp;
> >> -  status = fec16_to_cpu(bdp->cbd_sc);
> >> -  status &= ~BD_ENET_TX_STATS;
> >>
> >>/* Set buffer length and buffer pointer */
> >>bufaddr = skb->data;
> >> @@ -462,6 +459,8 @@ static int fec_enet_txq_submit_skb(struct
> >> fec_enet_priv_tx_q *txq,
> >>return NETDEV_TX_OK;
> >>}
> >>
> >> +  status = BD_ENET_TX_TC | BD_ENET_TX_READY |
> >> +  ((bdp == txq->bd.last) ? BD_SC_WRAP : 0);
> >>if (nr_frags) {
> >>last_bdp = fec_enet_txq_submit_frag_skb(txq, skb, ndev);
> >>if (IS_ERR(last_bdp)) {
> >> @@ -512,7 +511,6 @@ static int fec_enet_txq_submit_skb(struct
> >> fec_enet_priv_tx_q *txq,
> >>/* Send it on its way.  Tell FEC it's ready, interrupt when done,
> >> * it's the last BD of the frame, and to put the CRC on the end.
> >> */
> >> -  status |= (BD_ENET_TX_READY | BD_ENET_TX_TC);
> >
> > This is completely error.  We have to prepare all BDs for frag skb, and then
> enable "READY" and "TC" bit for the first BD, otherwise uDMA copy un-correct
> data to fifo.
> >
> 
> 
> 
> 
> I don't follow. Please read patch again.
> 
Understand, sorry, I take one mistake.  The patch is fine for me.


Re: skb_under_panic in ip_tunnel_xmit

2016-03-05 Thread Yuriy M. Kaminskiy
Eric Dumazet  writes:

> On mer., 2016-03-02 at 09:11 -0800, Francesco Ruggeri wrote:
>> I can consistently get this  panic on 4.4.1 as well as 3.18.
>> 
>> [ 2076.264975] gre: GRE over IPv4 demultiplexor driver
>> [ 2076.269326] ip_gre: GRE over IPv4 tunneling driver
>> [ 2076.274464] conntrack: generic helper won't handle protocol 47. Please 
>> consider loading the specific helper module.
>> [ 2088.868553] skbuff: skb_under_panic: text:814c03f2
>> len:108 put:20 head:8800a5f6c400 data:8800a5f6c3f8 tail:0x64
>> end:0xc0 dev:t3
>> [ 2088.869755] [ cut here ]
>> [ 2088.870179] kernel BUG at net/core/skbuff.c:102!
>> [ 2088.870599] invalid opcode:  [#1] SMP 
>> [ 2088.871024] Modules linked in: ip_gre ip_tunnel gre dummy
>> iptable_filter rfcomm bnep macvlan snd_seq_midi snd_seq_midi_event
>> btusb btrtl btbcm btintel sg bluetooth joydev snd_ens1371
>> snd_ac97_codec ac97_bus coretemp hwmon snd_seq snd_pcm snd_timer
>> snd_rawmidi snd_seq_device snd ppdev soundcore ip6table_filter
>> ip6_tables bonding gameport serio_raw pcspkr battery kvm parport_pc
>> nfsd auth_rpcgss oid_registry parport irqbypass ac irda crc_ccitt
>> fuse nfs_acl lockd xt_multiport acpi_cpufreq tpm_tis tpm iptable_nat
>> shpchp grace nf_conntrack_ipv4 nf_defrag_ipv4 i2c_piix4 nf_nat_ipv4
>> ip_tables nf_nat nf_conntrack x_tables tun 8021q uinput vmwgfx
>> crc32c_intel drm_kms_helper syscopyarea sysfillrect sysimgblt
>> fb_sys_fops ttm ehci_pci drm aesni_intel aes_x86_64 glue_helper lrw
>> gf128mul ablk_helper
>   cryptd i2c_core
>> [ 2088.875474] sr_mod mptspi mptscsih ehci_hcd mptbase
>> scsi_transport_spi e1000 uhci_hcd cdrom sunrpc dm_mirror
>> dm_region_hash dm_log dm_mod autofs4
>> [ 2088.876737] CPU: 1 PID: 6420 Comm: ping Not tainted 
>> 4.4.1-2980094.AroraKernelnextfruggeri.2.fc18.x86_64 #1
>> [ 2088.878039] Hardware name: VMware, Inc. VMware Virtual
>> Platform/440BX Desktop Reference Platform, BIOS 6.00 07/02/2012
>> [ 2088.879521] task: 880035ea ti: 8800b0ff8000 task.ti: 
>> 8800b0ff8000
>> [ 2088.880285] RIP: 0010:[]  [] 
>> skb_panic+0x66/0x68
>> [ 2088.881214] RSP: 0018:8800b0ffb728  EFLAGS: 00010292
>> [ 2088.881945] RAX: 0083 RBX: 8800b51b6c00 RCX: 
>> 
>> [ 2088.882692] RDX: 88013ae2f101 RSI: 88013ae2cae8 RDI: 
>> 88013ae2cae8
>> [ 2088.883424] RBP: 8800b0ffb748 R08:  R09: 
>> 
>> [ 2088.884110] R10:  R11: 88013621f3e0 R12: 
>> 0054
>> [ 2088.884846] R13: 81a9fe40 R14:  R15: 
>> 03030303
>> [ 2088.885535] FS:  7f24b4a24740() GS:88013ae2() 
>> knlGS:
>> [ 2088.886270] CS:  0010 DS:  ES:  CR0: 80050033
>> [ 2088.887014] CR2: 7f9d6fb26000 CR3: aae66000 CR4: 
>> 001406e0
>> [ 2088.887817] Stack:
>> [ 2088.888582]  8800a5f6c3f8 0064 00c0 
>> 880138fac000
>> [ 2088.889378]  8800b0ffb758 8143b640 8800b0ffb7c8 
>> 814c03f2
>> [ 2088.890188]  8840 0040 8800 
>> 002f
>> [ 2088.891011] Call Trace:
>> [ 2088.892073]  [] skb_push+0x3b/0x3c
>> [ 2088.893156]  [] iptunnel_xmit+0xad/0x145
>> [ 2088.894033]  [] ip_tunnel_xmit+0x673/0x790 [ip_tunnel]
>> [ 2088.894884]  [] __gre_xmit+0x76/0x7f [ip_gre]
>> [ 2088.895700]  [] ipgre_xmit+0xd3/0xef [ip_gre]
>> [ 2088.896573]  [] dev_hard_start_xmit+0x226/0x2fc
>> [ 2088.897387]  [] __dev_queue_xmit+0x333/0x425
>> [ 2088.898183]  [] dev_queue_xmit+0x10/0x12
>> [ 2088.898996]  [] neigh_direct_output+0x11/0x13
>> [ 2088.899793]  [] ip_finish_output2+0x233/0x293
>> [ 2088.900582]  [] ? dst_mtu+0xb/0xd
>> [ 2088.901283]  [] ip_finish_output+0x146/0x159
>> [ 2088.901990]  [] ? ip_generic_getfrag+0x5c/0x98
>> [ 2088.902658]  [] ip_output+0x5f/0x91
>> [ 2088.903306]  [] ? ip_finish_output2+0x293/0x293
>> [ 2088.903964]  [] ? __ip_local_out+0x73/0x7c
>> [ 2088.904625]  [] ? dst_mtu+0xd/0xd
>> [ 2088.905222]  [] dst_output+0xf/0x11
>> [ 2088.905820]  [] ip_local_out+0x31/0x3a
>> [ 2088.906378]  [] ip_send_skb+0x1a/0x3f
>> [ 2088.906938]  [] ip_push_pending_frames+0x33/0x3b
>> [ 2088.907465]  [] raw_sendmsg+0x794/0x8ea
>> [ 2088.907981]  [] ? __skb_recv_datagram+0x201/0x487
>> [ 2088.908548]  [] ? skb_recv_datagram+0x32/0x34
>> [ 2088.909050]  [] ? raw_recvmsg+0x68/0x159
>> [ 2088.909705]  [] ? rw_copy_check_uvector+0x6e/0x109
>> [ 2088.910247]  [] inet_sendmsg+0x3c/0x65
>> [ 2088.910775]  [] sock_sendmsg_nosec+0x12/0x1d
>> [ 2088.911279]  [] sock_sendmsg+0x29/0x2e
>> [ 2088.911764]  [] ___sys_sendmsg+0x1a6/0x23f
>> [ 2088.912349]  [] ? ldsem_up_read+0x1b/0x30
>> [ 2088.912820]  [] ? tty_ldisc_deref+0x16/0x18
>> [ 2088.913299]  [] ? tty_write+0x207/0x222
>> [ 2088.913743]  [] ? n_tty_receive_buf+0x13/0x13
>> [ 2088.914244]  [] ? __fget_light+0x2c/0x4d
>> [ 2088.914671]  [] __sys_sendmsg+0x42/0x60
>> [ 

Re: [PATCH v3 net-next 0/9] API set for HW Buffer management

2016-03-05 Thread Gregory CLEMENT
Hi,
 
 On sam., mars 05 2016, Gregory CLEMENT  
wrote:

> This is a third version of an API set for HW Buffer management that I

Please ignore this version.

Being able to select the HWBM support though the kernel configuration
was not as trivial as I initially thought. Fortunately, it was quickly
spotted by the 0-day kbuild bot, and I have just sent a new
version. Sorry for the noise.

Thanks,

Gregory

-- 
Gregory Clement, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com


[PATCH v4 net-next 5/9] ARM: dts: armada-xp-openblocks-ax3-4: Add BM support

2016-03-05 Thread Gregory CLEMENT
Allow Openblock AX3 using hardware buffer management with mvneta.

Signed-off-by: Gregory CLEMENT 
---
 arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts | 19 ++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts 
b/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts
index a5db17782e08..3aa29a91c7b8 100644
--- a/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts
+++ b/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts
@@ -67,7 +67,8 @@
  MBUS_ID(0x01, 0x1d) 0 0 0xfff0 0x10
  MBUS_ID(0x01, 0x2f) 0 0 0xf000 0x800
  MBUS_ID(0x09, 0x09) 0 0 0xf810 0x1
- MBUS_ID(0x09, 0x05) 0 0 0xf811 0x1>;
+ MBUS_ID(0x09, 0x05) 0 0 0xf811 0x1
+ MBUS_ID(0x0c, 0x04) 0 0 0xd120 0x10>;
 
devbus-bootcs {
status = "okay";
@@ -176,21 +177,29 @@
status = "okay";
phy = <>;
phy-mode = "sgmii";
+   buffer-manager = <>;
+   bm,pool-long = <0>;
};
ethernet@74000 {
status = "okay";
phy = <>;
phy-mode = "sgmii";
+   buffer-manager = <>;
+   bm,pool-long = <1>;
};
ethernet@3 {
status = "okay";
phy = <>;
phy-mode = "sgmii";
+   buffer-manager = <>;
+   bm,pool-long = <2>;
};
ethernet@34000 {
status = "okay";
phy = <>;
phy-mode = "sgmii";
+   buffer-manager = <>;
+   bm,pool-long = <3>;
};
i2c@11000 {
status = "okay";
@@ -219,6 +228,14 @@
usb@51000 {
status = "okay";
};
+
+   bm@c {
+   status = "okay";
+   };
+   };
+
+   bm-bppi {
+   status = "okay";
};
};
 };
-- 
2.5.0



[PATCH v4 net-next 1/9] ARM: dts: armada-38x: add buffer manager nodes

2016-03-05 Thread Gregory CLEMENT
From: Marcin Wojtas 

Armada 38x network controller supports hardware buffer management (BM).
Since it is now enabled in mvneta driver, appropriate nodes can be added
to armada-38x.dtsi - for the actual common BM unit (bm@c8000) and its
internal SRAM (bm-bppi), which is used for indirect access to buffer
pointer ring residing in DRAM.

Pools - ports mapping, bm-bppi entry in 'soc' node's ranges and optional
parameters are supposed to be set in board files.

Signed-off-by: Marcin Wojtas 
Signed-off-by: Gregory CLEMENT 
---
 arch/arm/boot/dts/armada-38x.dtsi | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/arch/arm/boot/dts/armada-38x.dtsi 
b/arch/arm/boot/dts/armada-38x.dtsi
index e8b7f6726772..1b7d690d8e10 100644
--- a/arch/arm/boot/dts/armada-38x.dtsi
+++ b/arch/arm/boot/dts/armada-38x.dtsi
@@ -540,6 +540,14 @@
status = "disabled";
};
 
+   bm: bm@c8000 {
+   compatible = "marvell,armada-380-neta-bm";
+   reg = <0xc8000 0xac>;
+   clocks = < 13>;
+   internal-mem = <_bppi>;
+   status = "disabled";
+   };
+
sata@e {
compatible = "marvell,armada-380-ahci";
reg = <0xe 0x2000>;
@@ -618,6 +626,16 @@
#size-cells = <1>;
ranges = <0 MBUS_ID(0x09, 0x15) 0 0x800>;
};
+
+   bm_bppi: bm-bppi {
+   compatible = "mmio-sram";
+   reg = ;
+   ranges = <0 MBUS_ID(0x0c, 0x04) 0 0x10>;
+   #address-cells = <1>;
+   #size-cells = <1>;
+   clocks = < 13>;
+   status = "disabled";
+   };
};
 
clocks {
-- 
2.5.0



[PATCH v4 net-next 3/9] ARM: dts: armada-xp: add buffer manager nodes

2016-03-05 Thread Gregory CLEMENT
From: Marcin Wojtas 

Armada XP network controller supports hardware buffer management (BM).
Since it is now enabled in mvneta driver, appropriate nodes can be added
to armada-xp.dtsi - for the actual common BM unit (bm@c) and its
internal SRAM (bm-bppi), which is used for indirect access to buffer
pointer ring residing in DRAM.

Pools - ports mapping, bm-bppi entry in 'soc' node's ranges and optional
parameters are supposed to be set in board files.

Signed-off-by: Marcin Wojtas 
Signed-off-by: Gregory CLEMENT 
---
 arch/arm/boot/dts/armada-xp.dtsi | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/arch/arm/boot/dts/armada-xp.dtsi b/arch/arm/boot/dts/armada-xp.dtsi
index be23196829bb..bd459360d7a6 100644
--- a/arch/arm/boot/dts/armada-xp.dtsi
+++ b/arch/arm/boot/dts/armada-xp.dtsi
@@ -253,6 +253,14 @@
marvell,crypto-sram-size = <0x800>;
};
 
+   bm: bm@c {
+   compatible = "marvell,armada-380-neta-bm";
+   reg = <0xc 0xac>;
+   clocks = < 13>;
+   internal-mem = <_bppi>;
+   status = "disabled";
+   };
+
xor@f0900 {
compatible = "marvell,orion-xor";
reg = <0xF0900 0x100
@@ -291,6 +299,16 @@
#size-cells = <1>;
ranges = <0 MBUS_ID(0x09, 0x05) 0 0x800>;
};
+
+   bm_bppi: bm-bppi {
+   compatible = "mmio-sram";
+   reg = ;
+   ranges = <0 MBUS_ID(0x0c, 0x04) 0 0x10>;
+   #address-cells = <1>;
+   #size-cells = <1>;
+   clocks = < 13>;
+   status = "disabled";
+   };
};
 
clocks {
-- 
2.5.0



[PATCH v4 net-next 6/9] bus: mvebu-mbus: provide api for obtaining IO and DRAM window information

2016-03-05 Thread Gregory CLEMENT
From: Marcin Wojtas 

This commit enables finding appropriate mbus window and obtaining its
target id and attribute for given physical address in two separate
routines, both for IO and DRAM windows. This functionality
is needed for Armada XP/38x Network Controller's Buffer Manager and
PnC configuration.

[gregory.clem...@free-electrons.com: Fix size test for
mvebu_mbus_get_dram_win_info]

Signed-off-by: Marcin Wojtas 
[DRAM window information reference in LKv3.10]
Signed-off-by: Evan Wang 
Signed-off-by: Gregory CLEMENT 
---
 drivers/bus/mvebu-mbus.c | 52 
 include/linux/mbus.h |  3 +++
 2 files changed, 55 insertions(+)

diff --git a/drivers/bus/mvebu-mbus.c b/drivers/bus/mvebu-mbus.c
index c43c3d2baf73..c2e52864bb03 100644
--- a/drivers/bus/mvebu-mbus.c
+++ b/drivers/bus/mvebu-mbus.c
@@ -948,6 +948,58 @@ void mvebu_mbus_get_pcie_io_aperture(struct resource *res)
*res = mbus_state.pcie_io_aperture;
 }
 
+int mvebu_mbus_get_dram_win_info(phys_addr_t phyaddr, u8 *target, u8 *attr)
+{
+   const struct mbus_dram_target_info *dram;
+   int i;
+
+   /* Get dram info */
+   dram = mv_mbus_dram_info();
+   if (!dram) {
+   pr_err("missing DRAM information\n");
+   return -ENODEV;
+   }
+
+   /* Try to find matching DRAM window for phyaddr */
+   for (i = 0; i < dram->num_cs; i++) {
+   const struct mbus_dram_window *cs = dram->cs + i;
+
+   if (cs->base <= phyaddr &&
+   phyaddr <= (cs->base + cs->size - 1)) {
+   *target = dram->mbus_dram_target_id;
+   *attr = cs->mbus_attr;
+   return 0;
+   }
+   }
+
+   pr_err("invalid dram address 0x%x\n", phyaddr);
+   return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(mvebu_mbus_get_dram_win_info);
+
+int mvebu_mbus_get_io_win_info(phys_addr_t phyaddr, u32 *size, u8 *target,
+  u8 *attr)
+{
+   int win;
+
+   for (win = 0; win < mbus_state.soc->num_wins; win++) {
+   u64 wbase;
+   int enabled;
+
+   mvebu_mbus_read_window(_state, win, , ,
+  size, target, attr, NULL);
+
+   if (!enabled)
+   continue;
+
+   if (wbase <= phyaddr && phyaddr <= wbase + *size)
+   return win;
+   }
+
+   return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(mvebu_mbus_get_io_win_info);
+
 static __init int mvebu_mbus_debugfs_init(void)
 {
struct mvebu_mbus_state *s = _state;
diff --git a/include/linux/mbus.h b/include/linux/mbus.h
index 1f7bc630d225..ea34a867caa0 100644
--- a/include/linux/mbus.h
+++ b/include/linux/mbus.h
@@ -69,6 +69,9 @@ static inline const struct mbus_dram_target_info 
*mv_mbus_dram_info_nooverlap(vo
 int mvebu_mbus_save_cpu_target(u32 *store_addr);
 void mvebu_mbus_get_pcie_mem_aperture(struct resource *res);
 void mvebu_mbus_get_pcie_io_aperture(struct resource *res);
+int mvebu_mbus_get_dram_win_info(phys_addr_t phyaddr, u8 *target, u8 *attr);
+int mvebu_mbus_get_io_win_info(phys_addr_t phyaddr, u32 *size, u8 *target,
+  u8 *attr);
 int mvebu_mbus_add_window_remap_by_id(unsigned int target,
  unsigned int attribute,
  phys_addr_t base, size_t size,
-- 
2.5.0



[PATCH v4 net-next 4/9] ARM: dts: armada-xp: enable buffer manager support on Armada XP boards

2016-03-05 Thread Gregory CLEMENT
From: Marcin Wojtas 

Since mvneta driver supports using hardware buffer management (BM), in
order to use it, board files have to be adjusted accordingly. This commit
enables BM on AXP-DB and AXP-GP in same manner - because number of ports
on those boards is the same as number of possible pools, each port is
supposed to use single pool for all kind of packets.

Moreover appropriate entry is added to 'soc' node ranges, as well as "okay"
status for 'bm' and 'bm-bppi' (internal SRAM) nodes.

Signed-off-by: Marcin Wojtas 
Signed-off-by: Gregory CLEMENT 
---
 arch/arm/boot/dts/armada-xp-db.dts | 19 ++-
 arch/arm/boot/dts/armada-xp-gp.dts | 19 ++-
 2 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/arch/arm/boot/dts/armada-xp-db.dts 
b/arch/arm/boot/dts/armada-xp-db.dts
index f774101416a5..30657302305d 100644
--- a/arch/arm/boot/dts/armada-xp-db.dts
+++ b/arch/arm/boot/dts/armada-xp-db.dts
@@ -77,7 +77,8 @@
  MBUS_ID(0x01, 0x1d) 0 0 0xfff0 0x10
  MBUS_ID(0x01, 0x2f) 0 0 0xf000 0x100
  MBUS_ID(0x09, 0x09) 0 0 0xf810 0x1
- MBUS_ID(0x09, 0x05) 0 0 0xf811 0x1>;
+ MBUS_ID(0x09, 0x05) 0 0 0xf811 0x1
+ MBUS_ID(0x0c, 0x04) 0 0 0xf120 0x10>;
 
devbus-bootcs {
status = "okay";
@@ -181,21 +182,33 @@
status = "okay";
phy = <>;
phy-mode = "rgmii-id";
+   buffer-manager = <>;
+   bm,pool-long = <0>;
};
ethernet@74000 {
status = "okay";
phy = <>;
phy-mode = "rgmii-id";
+   buffer-manager = <>;
+   bm,pool-long = <1>;
};
ethernet@3 {
status = "okay";
phy = <>;
phy-mode = "sgmii";
+   buffer-manager = <>;
+   bm,pool-long = <2>;
};
ethernet@34000 {
status = "okay";
phy = <>;
phy-mode = "sgmii";
+   buffer-manager = <>;
+   bm,pool-long = <3>;
+   };
+
+   bm@c {
+   status = "okay";
};
 
mvsdio@d4000 {
@@ -230,5 +243,9 @@
};
};
};
+
+   bm-bppi {
+   status = "okay";
+   };
};
 };
diff --git a/arch/arm/boot/dts/armada-xp-gp.dts 
b/arch/arm/boot/dts/armada-xp-gp.dts
index 4878d7353069..a1ded01d0c07 100644
--- a/arch/arm/boot/dts/armada-xp-gp.dts
+++ b/arch/arm/boot/dts/armada-xp-gp.dts
@@ -96,7 +96,8 @@
  MBUS_ID(0x01, 0x1d) 0 0 0xfff0 0x10
  MBUS_ID(0x01, 0x2f) 0 0 0xf000 0x100
  MBUS_ID(0x09, 0x09) 0 0 0xf810 0x1
- MBUS_ID(0x09, 0x05) 0 0 0xf811 0x1>;
+ MBUS_ID(0x09, 0x05) 0 0 0xf811 0x1
+ MBUS_ID(0x0c, 0x04) 0 0 0xf120 0x10>;
 
devbus-bootcs {
status = "okay";
@@ -196,21 +197,29 @@
status = "okay";
phy = <>;
phy-mode = "qsgmii";
+   buffer-manager = <>;
+   bm,pool-long = <0>;
};
ethernet@74000 {
status = "okay";
phy = <>;
phy-mode = "qsgmii";
+   buffer-manager = <>;
+   bm,pool-long = <1>;
};
ethernet@3 {
status = "okay";
phy = <>;
phy-mode = "qsgmii";
+   buffer-manager = <>;
+   bm,pool-long = <2>;
};
ethernet@34000 {
status = "okay";
phy = <>;
phy-mode = "qsgmii";
+  

[PATCH v4 net-next 2/9] ARM: dts: armada-38x: enable buffer manager support on Armada 38x boards

2016-03-05 Thread Gregory CLEMENT
From: Marcin Wojtas 

Since mvneta driver supports using hardware buffer management (BM), in
order to use it, board files have to be adjusted accordingly. This commit
enables BM on:
* A385-DB-AP - each port has its own pool for long and common pool for
short packets,
* A388-ClearFog - same as above,
* A388-DB - to each port unique 'short' and 'long' pools are mapped,
* A388-GP - same as above.

Moreover appropriate entry is added to 'soc' node ranges, as well as "okay"
status for 'bm' and 'bm-bppi' (internal SRAM) nodes.

[gregory.clem...@free-electrons.com: add suppport for the ClearFog board]

Signed-off-by: Marcin Wojtas 
Signed-off-by: Gregory CLEMENT 
---
 arch/arm/boot/dts/armada-385-db-ap.dts  | 20 +++-
 arch/arm/boot/dts/armada-388-clearfog.dts   |  6 ++
 arch/arm/boot/dts/armada-388-db.dts | 17 -
 arch/arm/boot/dts/armada-388-gp.dts | 17 -
 arch/arm/boot/dts/armada-38x-solidrun-microsom.dtsi | 15 ++-
 5 files changed, 71 insertions(+), 4 deletions(-)

diff --git a/arch/arm/boot/dts/armada-385-db-ap.dts 
b/arch/arm/boot/dts/armada-385-db-ap.dts
index acd5b1519edb..5f9451be21ff 100644
--- a/arch/arm/boot/dts/armada-385-db-ap.dts
+++ b/arch/arm/boot/dts/armada-385-db-ap.dts
@@ -61,7 +61,8 @@
ranges = ;
+ MBUS_ID(0x09, 0x15) 0 0xf111 0x1
+ MBUS_ID(0x0c, 0x04) 0 0xf120 0x10>;
 
internal-regs {
spi1: spi@10680 {
@@ -138,12 +139,18 @@
status = "okay";
phy = <>;
phy-mode = "sgmii";
+   buffer-manager = <>;
+   bm,pool-long = <1>;
+   bm,pool-short = <3>;
};
 
ethernet@34000 {
status = "okay";
phy = <>;
phy-mode = "sgmii";
+   buffer-manager = <>;
+   bm,pool-long = <2>;
+   bm,pool-short = <3>;
};
 
ethernet@7 {
@@ -157,6 +164,13 @@
status = "okay";
phy = <>;
phy-mode = "rgmii-id";
+   buffer-manager = <>;
+   bm,pool-long = <0>;
+   bm,pool-short = <3>;
+   };
+
+   bm@c8000 {
+   status = "okay";
};
 
nfc: flash@d {
@@ -178,6 +192,10 @@
};
};
 
+   bm-bppi {
+   status = "okay";
+   };
+
pcie-controller {
status = "okay";
 
diff --git a/arch/arm/boot/dts/armada-388-clearfog.dts 
b/arch/arm/boot/dts/armada-388-clearfog.dts
index c6e180eb3b11..c60206efb583 100644
--- a/arch/arm/boot/dts/armada-388-clearfog.dts
+++ b/arch/arm/boot/dts/armada-388-clearfog.dts
@@ -78,6 +78,9 @@
internal-regs {
ethernet@3 {
phy-mode = "sgmii";
+   buffer-manager = <>;
+   bm,pool-long = <2>;
+   bm,pool-short = <1>;
status = "okay";
 
fixed-link {
@@ -88,6 +91,9 @@
 
ethernet@34000 {
phy-mode = "sgmii";
+   buffer-manager = <>;
+   bm,pool-long = <3>;
+   bm,pool-short = <1>;
status = "okay";
 
fixed-link {
diff --git a/arch/arm/boot/dts/armada-388-db.dts 
b/arch/arm/boot/dts/armada-388-db.dts
index ff47af57f091..ea93ed727030 100644
--- a/arch/arm/boot/dts/armada-388-db.dts
+++ b/arch/arm/boot/dts/armada-388-db.dts
@@ -66,7 +66,8 @@
ranges = ;
+ MBUS_ID(0x09, 0x15) 0 0xf111 0x1
+ 

[PATCH v4 net-next 8/9] net: add a hardware buffer management helper API

2016-03-05 Thread Gregory CLEMENT
This basic implementation allows to share code between driver using
hardware buffer management. As the code is hardware agnostic, there is
few helpers, most of the optimization brought by the an HW BM has to be
done at driver level.

Signed-off-by: Gregory CLEMENT 
---
 include/net/hwbm.h | 26 
 net/Kconfig|  3 ++
 net/core/Makefile  |  1 +
 net/core/hwbm.c| 87 ++
 4 files changed, 117 insertions(+)
 create mode 100644 include/net/hwbm.h
 create mode 100644 net/core/hwbm.c

diff --git a/include/net/hwbm.h b/include/net/hwbm.h
new file mode 100644
index ..ecda062c18eb
--- /dev/null
+++ b/include/net/hwbm.h
@@ -0,0 +1,26 @@
+#ifndef _HWBM_H
+#define _HWBM_H
+
+struct hwbm_pool {
+   /* Size of the buffers managed */
+   int size;
+   /* Number of buffers currently used by this pool */
+   int buf_num;
+   /* constructor called during alocation */
+   int (*construct)(struct hwbm_pool *bm_pool, void *buf);
+   /* protect acces to the buffer counter*/
+   spinlock_t lock;
+   /* private data */
+   void *priv;
+};
+#ifdef CONFIG_HWBM
+void hwbm_buf_free(struct hwbm_pool *bm_pool, void *buf);
+int hwbm_pool_refill(struct hwbm_pool *bm_pool, gfp_t gfp);
+int hwbm_pool_add(struct hwbm_pool *bm_pool, unsigned int buf_num, gfp_t gfp);
+#else
+void hwbm_buf_free(struct hwbm_pool *bm_pool, void *buf) {}
+int hwbm_pool_refill(struct hwbm_pool *bm_pool, gfp_t gfp) { return 0; }
+int hwbm_pool_add(struct hwbm_pool *bm_pool, unsigned int buf_num, gfp_t gfp)
+{ return 0; }
+#endif /* CONFIG_HWBM */
+#endif /* _HWBM_H */
diff --git a/net/Kconfig b/net/Kconfig
index 174354618f8a..f50c8af4308b 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -253,6 +253,9 @@ config XPS
depends on SMP
default y
 
+config HWBM
+   bool
+
 config SOCK_CGROUP_DATA
bool
default n
diff --git a/net/core/Makefile b/net/core/Makefile
index 0b835de04de3..57e6dd81c6be 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -24,3 +24,4 @@ obj-$(CONFIG_NET_PTP_CLASSIFY) += ptp_classifier.o
 obj-$(CONFIG_CGROUP_NET_PRIO) += netprio_cgroup.o
 obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o
 obj-$(CONFIG_LWTUNNEL) += lwtunnel.o
+obj-$(CONFIG_HWBM) += hwbm.o
diff --git a/net/core/hwbm.c b/net/core/hwbm.c
new file mode 100644
index ..a98d2a74ca02
--- /dev/null
+++ b/net/core/hwbm.c
@@ -0,0 +1,87 @@
+/* Support for hardware buffer manager.
+ *
+ * Copyright (C) 2016 Marvell
+ *
+ * Gregory CLEMENT 
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ */
+#include 
+#include 
+#include 
+#include 
+
+void hwbm_buf_free(struct hwbm_pool *bm_pool, void *buf)
+{
+   if (likely(bm_pool->size <= PAGE_SIZE))
+   skb_free_frag(buf);
+   else
+   kfree(buf);
+}
+EXPORT_SYMBOL_GPL(hwbm_buf_free);
+
+/* Refill processing for HW buffer management */
+int hwbm_pool_refill(struct hwbm_pool *bm_pool, gfp_t gfp)
+{
+   int frag_size = bm_pool->size;
+   void *buf;
+
+   if (likely(frag_size <= PAGE_SIZE))
+   buf = netdev_alloc_frag(frag_size);
+   else
+   buf = kmalloc(frag_size, gfp);
+
+   if (!buf)
+   return -ENOMEM;
+
+   if (bm_pool->construct)
+   if (bm_pool->construct(bm_pool, buf)) {
+   hwbm_buf_free(bm_pool, buf);
+   return -ENOMEM;
+   }
+
+   return 0;
+}
+EXPORT_SYMBOL_GPL(hwbm_pool_refill);
+
+int hwbm_pool_add(struct hwbm_pool *bm_pool, unsigned int buf_num, gfp_t gfp)
+{
+   int err, i;
+   unsigned long flags;
+
+   spin_lock_irqsave(_pool->lock, flags);
+   if (bm_pool->buf_num == bm_pool->size) {
+   pr_warn("pool already filled\n");
+   return bm_pool->buf_num;
+   }
+
+   if (buf_num + bm_pool->buf_num > bm_pool->size) {
+   pr_warn("cannot allocate %d buffers for pool\n",
+   buf_num);
+   return 0;
+   }
+
+   if ((buf_num + bm_pool->buf_num) < bm_pool->buf_num) {
+   pr_warn("Adding %d buffers to the %d current buffers will 
overflow\n",
+   buf_num,  bm_pool->buf_num);
+   return 0;
+   }
+
+   for (i = 0; i < buf_num; i++) {
+   err = hwbm_pool_refill(bm_pool, gfp);
+   if (err < 0)
+   break;
+   }
+
+   /* Update BM driver with number of buffers added to pool */
+   bm_pool->buf_num += i;
+
+   pr_debug("hwpm pool: %d of %d buffers added\n", i, buf_num);
+   spin_unlock_irqrestore(_pool->lock, flags);
+
+   

[PATCH v4 net-next 7/9] net: mvneta: bm: add support for hardware buffer management

2016-03-05 Thread Gregory CLEMENT
From: Marcin Wojtas 

Buffer manager (BM) is a dedicated hardware unit that can be used by all
ethernet ports of Armada XP and 38x SoC's. It allows to offload CPU on RX
path by sparing DRAM access on refilling buffer pool, hardware-based
filling of descriptor ring data and better memory utilization due to HW
arbitration for using 'short' pools for small packets.

Tests performed with A388 SoC working as a network bridge between two
packet generators showed increase of maximum processed 64B packets by
~20k (~555k packets with BM enabled vs ~535 packets without BM). Also
when pushing 1500B-packets with a line rate achieved, CPU load decreased
from around 25% without BM to 20% with BM.

BM comprise up to 4 buffer pointers' (BP) rings kept in DRAM, which
are called external BP pools - BPPE. Allocating and releasing buffer
pointers (BP) to/from BPPE is performed indirectly by write/read access
to a dedicated internal SRAM, where internal BP pools (BPPI) are placed.
BM hardware controls status of BPPE automatically, as well as assigning
proper buffers to RX descriptors. For more details please refer to
Functional Specification of Armada XP or 38x SoC.

In order to enable support for a separate hardware block, common for all
ports, a new driver has to be implemented ('mvneta_bm'). It provides
initialization sequence of address space, clocks, registers, SRAM,
empty pools' structures and also obtaining optional configuration
from DT (please refer to device tree binding documentation). mvneta_bm
exposes also a necessary API to mvneta driver, as well as a dedicated
structure with BM information (bm_priv), whose presence is used as a
flag notifying of BM usage by port. It has to be ensured that mvneta_bm
probe is executed prior to the ones in ports' driver. In case BM is not
used or its probe fails, mvneta falls back to use software buffer
management.

A sequence executed in mvneta_probe function is modified in order to have
an access to needed resources before possible port's BM initialization is
done. According to port-pools mapping provided by DT appropriate registers
are configured and the buffer pools are filled. RX path is modified
accordingly. Becaues the hardware allows a wide variety of configuration
options, following assumptions are made:
* using BM mechanisms can be selectively disabled/enabled basing
  on DT configuration among the ports
* 'long' pool's single buffer size is tied to port's MTU
* using 'long' pool by port is obligatory and it cannot be shared
* using 'short' pool for smaller packets is optional
* one 'short' pool can be shared among all ports

This commit enables hardware buffer management operation cooperating with
existing mvneta driver. New device tree binding documentation is added and
the one of mvneta is updated accordingly.

[gregory.clem...@free-electrons.com: removed the suspend/resume part]

Signed-off-by: Marcin Wojtas 
Signed-off-by: Gregory CLEMENT 
---
 .../bindings/net/marvell-armada-370-neta.txt   |  19 +-
 .../devicetree/bindings/net/marvell-neta-bm.txt|  49 ++
 drivers/net/ethernet/marvell/Kconfig   |  13 +
 drivers/net/ethernet/marvell/Makefile  |   1 +
 drivers/net/ethernet/marvell/mvneta.c  | 507 +--
 drivers/net/ethernet/marvell/mvneta_bm.c   | 544 +
 drivers/net/ethernet/marvell/mvneta_bm.h   | 189 +++
 7 files changed, 1283 insertions(+), 39 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/net/marvell-neta-bm.txt
 create mode 100644 drivers/net/ethernet/marvell/mvneta_bm.c
 create mode 100644 drivers/net/ethernet/marvell/mvneta_bm.h

diff --git a/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt 
b/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt
index d0cb8693963b..73be8970815e 100644
--- a/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt
+++ b/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt
@@ -18,15 +18,30 @@ Optional properties:
   "core" for core clock and "bus" for the optional bus clock.
 
 
+Optional properties (valid only for Armada XP/38x):
+
+- buffer-manager: a phandle to a buffer manager node. Please refer to
+  Documentation/devicetree/bindings/net/marvell-neta-bm.txt
+- bm,pool-long: ID of a pool, that will accept all packets of a size
+  higher than 'short' pool's threshold (if set) and up to MTU value.
+  Obligatory, when the port is supposed to use hardware
+  buffer management.
+- bm,pool-short: ID of a pool, that will be used for accepting
+  packets of a size lower than given threshold. If not set, the port
+  will use a single 'long' pool for all packets, as defined above.
+
 Example:
 
-ethernet@d007 {
+ethernet@7 {
compatible = "marvell,armada-370-neta";
-   reg = <0xd007 0x2500>;
+   reg = <0x7 0x2500>;
interrupts = <8>;
clocks = <_clk 

[PATCH v4 net-next 0/9] API set for HW Buffer management

2016-03-05 Thread Gregory CLEMENT
This forth version of the API set for HW Buffer management (that was
initially submitted here:
http://thread.gmane.org/gmane.linux.kernel/2125152), is actually the
third version done right. Now it is really possible to disable the
HWBM through the kernel configuration.

Since the second version I took into account David's remarks:

- I made a HWBM and a SWBM version of the mvneta_rx() function in
  order to reduce the the conditional code. I kept a condition inside
  the mvneta_poll because specializing this function would have means
  duplicating 95% of the code.
- I put back the register_netdev() call at the end of the
  mvneta_probe() function. In order to have a uniq ID for each port I
  just used a global variable in the driver.

I also added a fix from Marcin in the "net: mvneta: bm: add support
for hardware buffer management" patch:
- "when dropping packets, only buffer pointers passed from BM to
  descriptors have to be returned to the pool. In submitted version
  after closing the port and mvneta_rxq_deinit(), it was very likely
  that a lot of fake buffers are added to the pool, because all
  descriptors took part in iteration."

Finally, I also removed the select MVNETA_BM from the Kconfig, it will
let the user the choice to use not use it if they want.

For the record in the previous version I made the following changes:
v1 -> v2

- The hardware buffer management helpers are no more built by default
  and now depend on a hidden config symbol which has to be selected
  by the driver if needed
- The hwbm_pool_refill() and hwbm_pool_add() now receive a gfp_t as
  argument allowing the caller to specify the flag it needs.
- buf_num is now tested to ensure there is no wrapping
- A spinlock has been added to protect the hwbm_pool_add() function in
  SMP or irq context.
- used pr_warn instead of pr_debug in case of errors.
- fixed the mvneta implementation by returning the buffer to the pool
  at various place instead of ignoring it.
- Squashed "bus: mvenus-mbus: Fix size test for
   mvebu_mbus_get_dram_win_info" into bus: mvebu-mbus: provide api for
   obtaining IO and DRAM window information.
- Added my signed-otf-by on all the patches as submitter of the series.
- Renamed the dts patches with the pattern "ARM: dts: platform:"
- Removed the patch "ARM: mvebu: enable SRAM support in
  mvebu_v7_defconfig" of this series and already applied it
- Modified the order of the patches.

In order to ease the test the branch mvneta-BM-framework-v4 is
available at g...@github.com:MISL-EBU-System-SW/mainline-public.git.

Thanks,

Gregory

Gregory CLEMENT (3):
  ARM: dts: armada-xp-openblocks-ax3-4: Add BM support
  net: add a hardware buffer management helper API
  net: mvneta: Use the new hwbm framework

Marcin Wojtas (6):
  ARM: dts: armada-38x: add buffer manager nodes
  ARM: dts: armada-38x: enable buffer manager support on Armada 38x
boards
  ARM: dts: armada-xp: add buffer manager nodes
  ARM: dts: armada-xp: enable buffer manager support on Armada XP boards
  bus: mvebu-mbus: provide api for obtaining IO and DRAM window
information
  net: mvneta: bm: add support for hardware buffer management

 .../bindings/net/marvell-armada-370-neta.txt   |  19 +-
 .../devicetree/bindings/net/marvell-neta-bm.txt|  49 ++
 arch/arm/boot/dts/armada-385-db-ap.dts |  20 +-
 arch/arm/boot/dts/armada-388-clearfog.dts  |   6 +
 arch/arm/boot/dts/armada-388-db.dts|  17 +-
 arch/arm/boot/dts/armada-388-gp.dts|  17 +-
 .../arm/boot/dts/armada-38x-solidrun-microsom.dtsi |  15 +-
 arch/arm/boot/dts/armada-38x.dtsi  |  18 +
 arch/arm/boot/dts/armada-xp-db.dts |  19 +-
 arch/arm/boot/dts/armada-xp-gp.dts |  19 +-
 arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts   |  19 +-
 arch/arm/boot/dts/armada-xp.dtsi   |  18 +
 drivers/bus/mvebu-mbus.c   |  52 +++
 drivers/net/ethernet/marvell/Kconfig   |  14 +
 drivers/net/ethernet/marvell/Makefile  |   1 +
 drivers/net/ethernet/marvell/mvneta.c  | 509 +++--
 drivers/net/ethernet/marvell/mvneta_bm.c   | 486 
 drivers/net/ethernet/marvell/mvneta_bm.h   | 184 
 include/linux/mbus.h   |   3 +
 include/net/hwbm.h |  26 ++
 net/Kconfig|   3 +
 net/core/Makefile  |   1 +
 net/core/hwbm.c|  87 
 23 files changed, 1556 insertions(+), 46 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/net/marvell-neta-bm.txt
 create mode 100644 drivers/net/ethernet/marvell/mvneta_bm.c
 create mode 100644 drivers/net/ethernet/marvell/mvneta_bm.h
 create mode 100644 include/net/hwbm.h
 create mode 100644 net/core/hwbm.c

-- 
2.5.0



PHY hardware reset

2016-03-05 Thread Sergei Shtylyov

Hello.

   I have a need to de-assert the active-low PHY hardware reset signal 
(mapped to a GPIO) before the MDIO bus scansince it's left asserted by the 
bootloader (U-Boot). I have a device tree probed MAX driver (ravb) and I'm 
somewhat at a loss about where and how to do this. The existing example 
(Freescale FEC) has DT props controlling the PHY reset GPIO in the MAC device 
node but it doesn't seem correct at all since this signal has nothing to do 
with the MAC, only with PHY! I therefore would like this "phy-reset-gpios" 
property to be defined under the PHY node but this way I'll have to add the 
handling of this prop to the phylib (it would be too late if I did that in a a 
PHY driver method since that). I'm also seeing the mii_bus::reset() method and 
it seems a good place but I'm not sure if my PHY's reset signal can be treated 
as the reset signal for the whole bus; if it would, the DT prop should be 
placed under the MAC node anyway...

   Florian (and everybody), what's your thoughts on this matter?

MBR, Sergei


Re: [iproute PATCH] libnetlink: Double the dump buffer size

2016-03-05 Thread Johannes Berg
On Fri, 2016-03-04 at 15:35 -0800, Stephen Hemminger wrote:
> 
> > There have been reports about 'ip addr' printing "Message
> > truncated" on
[...]
> I thought this was addressed in kernel by making the VF info
> optional.
> The netlink protocol is showing some strain, this is one of them.

I don't know how the dump is split here, but we had a similar issue
with nl80211 - originally each physical device info had to fit into a
single message (one message during dump for each device), but we fixed
that by having userspace to set a flag when it's able to understand a
multi-message single physical device info.

Before:
 msg1: phy1: A, B, C
 msg2: phy2: A, B, C

After:
 msg1: phy1: A
 msg2: phy1: B
 msg3: phy1: C
 msg4: phy1: D
 msg5: phy2: A
 [...]

For userspace not setting the flag, it only get partial info today for
compatibility (A, B, C, not D), but in our particular case this was
perfectly reasonable since it would be unaware of the new capabilities
anyway.

I don't know precisely enough what the issue at hand is to comment
whether such an approach will be feasible here, but it seems it could
be.

johannes


Re: Possible double-free in the usbnet driver

2016-03-05 Thread Bjørn Mork


On March 5, 2016 4:51:30 PM CET, Oliver Neukum  wrote:
>On Fri, 2016-03-04 at 14:43 -0800, Linus Torvalds wrote:
>
>> So you have usbnet_defer_kevent() getting triggered, which in turn
>> ends up using "usbnet->kevent"
>> 
>> But somebody like Oliver is really the right person to check this.
>For
>> example, it's entirely possible that we should just instead do
>> 
>> cancel_work_sync(>kevent);
>> 
>> before the "free_netdev(net)" in the "out1" label.
>
>Hi Bjorn,
>
>I thinbk Linus has analyzed this correctly, but the fix really needs
>to cancel the work, because we can also fail later after bind() has
>already run. However, still cdc-ncm and the other drivers should clean
>up after themselves if bind() fails, as usbnet really cannot known what
>the subdrivers have done.
>
>So in conclusion, I think Linus' fix should also go into cdc-ncm.

Definitely.  The patch is so obviously correct that we can only wonder how it 
was possible to miss it it the first place :)

Will take a look to see if we could do a better job cleaning up in other places.

(I do also wonder a bit about the failure to bind - is that expected or is 
there some bug in the cdc_ncm descriptor parsing?)


Bjørn



Re: [Patch] rose_route_frame() NULL pointer dereference kernel panic

2016-03-05 Thread Francois Romieu
f6bvp  :
> Le 05/03/2016 17:22, David Miller a écrit :
[...]
> > If that's what he intended he would have implemented the entirety of
> > rose_xmit() as "kfree_skb(skb)".  But that's obviously not the case.
> > 
> > The author meant the packet to be sent in some way, perhaps using a
> > default path or something like that.
> 
> Via a NULL pointer ?
> I don't see how it could work.

Ask G4KLX what he meant when he wrote rose_rebuild_header (since that's
where Eric B. took rose_xmit from) back in the 2.1.9 era ?

See 
https://git.kernel.org/cgit/linux/kernel/git/history/history.git/commit/?id=d75df542864496c92ff705d7d072a58b0119a4ff

-- 
Ueimor


Re: [PATCH net] bridge: a netlink notification should be sent whenever those attributes change

2016-03-05 Thread Nikolay Aleksandrov
On 03/05/2016 03:44 PM, Xin Long wrote:
> On Thu, Mar 3, 2016 at 8:29 PM, Nikolay Aleksandrov
>  wrote:
>>
>> This is incorrect because you don't have rtnl here, bridge device sysfs
>> options take care of rtnl only on per-option basis and they obtain and
>> release it themselves, so you won't have rtnl held when you call
>> netdev_state_change. While I agree that this is needed, a larger change
>> would be necessary for br_sysfs_br.c.
> Sorry, I can't follow you, cause I didn't see any held in dev_ioctl, like:
> ipip6_tunnel_ioctl
> ipip6_tunnel_update
> netdev_state_change
> 
> why sysfs have to hold rtnl ?
> 

See the comment above dev_ifsioc:
/*
 *  Perform the SIOCxIFxxx calls, inside rtnl_lock()
 */
static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
{
...
it is usually called like:
   rtnl_lock();
   ret = dev_ifsioc(net, , cmd);
   rtnl_unlock();
And also you cannot be calling netdevice notifiers without RTNL. So in any
case you do need it here as well, in fact you'll surely hit the ASSERT_RTNL();
in call_netdevice_notifiers_info if you do so, thus I'm not sure how this
patch was actually tested.

>> Off-topic: I've been looking into factoring out the bond option API and 
>> reusing
>> it here as it already has all of this handled, but I won't have time to 
>> finish
>> it before the next merge window, so if you fix the issue here I'm okay with
>> this as interim solution.
>>
>> Cheers,
>>  Nik
>>



Re: [RFC] net: ipv4 -- Introduce ifa limit per net

2016-03-05 Thread Cyrill Gorcunov
On Sat, Mar 05, 2016 at 11:33:12AM -0500, David Miller wrote:
...
> 
> Probably the same optimization can be applied there, see patch below.
> And if that doesn't do it, there is a really easy way to batch the
> delete by scanning the FIB tree in one go and deleting every entry
> that points to "in_dev".  But I suspect we really won't need that.

It made it to work faster but still for 1 addresses it takes
~3-4 minutes to become alive again.

David, give me some time, I'll prepare tests and report the
results on patches and unpatched versions. And thanks a huge
for both patches!

Cyrill


Re: [PATCH v3 net-next 9/9] net: mvneta: Use the new hwbm framework

2016-03-05 Thread kbuild test robot
Hi Gregory,

[auto build test ERROR on v4.5-rc6]
[cannot apply to net-next/master robh/for-next next-20160304]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improving the system]

url:
https://github.com/0day-ci/linux/commits/Gregory-CLEMENT/API-set-for-HW-Buffer-management/20160306-012250
config: arm-multi_v7_defconfig (attached as .config)
reproduce:
wget 
https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross
 -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
make.cross ARCH=arm 

All errors (new ones prefixed by >>):

   drivers/built-in.o: In function `mvneta_remove':
   :(.text+0x2b49d4): undefined reference to `mvneta_bm_pool_destroy'
   :(.text+0x2b49ec): undefined reference to `mvneta_bm_pool_destroy'
   drivers/built-in.o: In function `mvneta_bm_update_mtu':
   :(.text+0x2b52e8): undefined reference to `mvneta_bm_bufs_free'
   :(.text+0x2b532c): undefined reference to `mvneta_bm_pool_destroy'
   :(.text+0x2b5344): undefined reference to `mvneta_bm_pool_destroy'
>> :(.text+0x2b53ac): undefined reference to `hwbm_pool_add'
   drivers/built-in.o: In function `mvneta_probe':
   :(.text+0x2b5a74): undefined reference to `mvneta_bm_pool_use'
   :(.text+0x2b5b24): undefined reference to `mvneta_bm_pool_use'
   :(.text+0x2b5fa4): undefined reference to `mvneta_bm_pool_destroy'
   :(.text+0x2b5fbc): undefined reference to `mvneta_bm_pool_destroy'
   :(.text+0x2b6698): undefined reference to `mvneta_bm_pool_destroy'
   drivers/built-in.o: In function `mvneta_poll':
>> :(.text+0x2b7e98): undefined reference to `hwbm_pool_refill'

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: Binary data


Re: [PATCH v3 net-next 7/9] net: mvneta: bm: add support for hardware buffer management

2016-03-05 Thread kbuild test robot
Hi Marcin,

[auto build test ERROR on v4.5-rc6]
[also build test ERROR on next-20160304]
[cannot apply to net-next/master robh/for-next]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improving the system]

url:
https://github.com/0day-ci/linux/commits/Gregory-CLEMENT/API-set-for-HW-Buffer-management/20160306-012250
config: arm-multi_v7_defconfig (attached as .config)
reproduce:
wget 
https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross
 -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
make.cross ARCH=arm 

All errors (new ones prefixed by >>):

   drivers/built-in.o: In function `mvneta_rx_refill':
   :(.text+0x2b4168): undefined reference to `mvneta_frag_alloc'
   :(.text+0x2b4258): undefined reference to `mvneta_frag_free'
   drivers/built-in.o: In function `mvneta_remove':
>> :(.text+0x2b4aec): undefined reference to `mvneta_bm_pool_destroy'
   :(.text+0x2b4b04): undefined reference to `mvneta_bm_pool_destroy'
   drivers/built-in.o: In function `mvneta_cleanup_rxqs':
   :(.text+0x2b52ec): undefined reference to `mvneta_frag_free'
   drivers/built-in.o: In function `mvneta_poll':
   :(.text+0x2b579c): undefined reference to `mvneta_bm_pool_refill'
   drivers/built-in.o: In function `mvneta_bm_update_mtu':
>> :(.text+0x2b65d0): undefined reference to `mvneta_bm_bufs_free'
   :(.text+0x2b6614): undefined reference to `mvneta_bm_pool_destroy'
   :(.text+0x2b662c): undefined reference to `mvneta_bm_pool_destroy'
   :(.text+0x2b6694): undefined reference to `mvneta_bm_bufs_add'
   drivers/built-in.o: In function `mvneta_probe':
>> :(.text+0x2b6eac): undefined reference to `mvneta_bm_pool_use'
   :(.text+0x2b6f5c): undefined reference to `mvneta_bm_pool_use'
   :(.text+0x2b73cc): undefined reference to `mvneta_bm_pool_destroy'
   :(.text+0x2b73e4): undefined reference to `mvneta_bm_pool_destroy'
   :(.text+0x2b7ab4): undefined reference to `mvneta_bm_pool_destroy'

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: Binary data


Re: [Patch] rose_route_frame() NULL pointer dereference kernel panic

2016-03-05 Thread f6bvp


Le 05/03/2016 17:22, David Miller a écrit :
> From: f6bvp 
> Date: Sat, 5 Mar 2016 16:32:42 +0100
> 
>> I understand I did not explain clearly or completely things.
>>
>> I agree that each time patched rose_xmit() is calling
>> rose_route_frame() it will
>> get a 0 return.
>> And I think this is what was intended by the author of rose_xmit().
> 
> If that's what he intended he would have implemented the entirety of
> rose_xmit() as "kfree_skb(skb)".  But that's obviously not the case.
> 
> The author meant the packet to be sent in some way, perhaps using a
> default path or something like that.

Via a NULL pointer ?
I don't see how it could work.

> 
> So please stop telling me over and over again that this function
> is meant to simply drop all packets, it's not true.
> 
I am just making hypothesis and trying to infer some deductions from the
behaviour of program when there is no more kernel panic.

If there is a situation leading to a kernel panic, I thought code should
be changed ?
What is the problem replacing a NULL argument by an array of 0 ?




[PATCH v3 net-next 8/9] net: add a hardware buffer management helper API

2016-03-05 Thread Gregory CLEMENT
This basic implementation allows to share code between driver using
hardware buffer management. As the code is hardware agnostic, there is
few helpers, most of the optimization brought by the an HW BM has to be
done at driver level.

Signed-off-by: Gregory CLEMENT 
---
 include/net/hwbm.h | 21 +
 net/Kconfig|  3 ++
 net/core/Makefile  |  1 +
 net/core/hwbm.c| 87 ++
 4 files changed, 112 insertions(+)
 create mode 100644 include/net/hwbm.h
 create mode 100644 net/core/hwbm.c

diff --git a/include/net/hwbm.h b/include/net/hwbm.h
new file mode 100644
index ..9ae9449a7eda
--- /dev/null
+++ b/include/net/hwbm.h
@@ -0,0 +1,21 @@
+#ifndef _HWBM_H
+#define _HWBM_H
+
+struct hwbm_pool {
+   /* Size of the buffers managed */
+   int size;
+   /* Number of buffers currently used by this pool */
+   int buf_num;
+   /* constructor called during alocation */
+   int (*construct)(struct hwbm_pool *bm_pool, void *buf);
+   /* protect acces to the buffer counter*/
+   spinlock_t lock;
+   /* private data */
+   void *priv;
+};
+
+void hwbm_buf_free(struct hwbm_pool *bm_pool, void *buf);
+int hwbm_pool_refill(struct hwbm_pool *bm_pool, gfp_t gfp);
+int hwbm_pool_add(struct hwbm_pool *bm_pool, unsigned int buf_num, gfp_t gfp);
+
+#endif /* _HWBM_H */
diff --git a/net/Kconfig b/net/Kconfig
index 174354618f8a..f50c8af4308b 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -253,6 +253,9 @@ config XPS
depends on SMP
default y
 
+config HWBM
+   bool
+
 config SOCK_CGROUP_DATA
bool
default n
diff --git a/net/core/Makefile b/net/core/Makefile
index 0b835de04de3..57e6dd81c6be 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -24,3 +24,4 @@ obj-$(CONFIG_NET_PTP_CLASSIFY) += ptp_classifier.o
 obj-$(CONFIG_CGROUP_NET_PRIO) += netprio_cgroup.o
 obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o
 obj-$(CONFIG_LWTUNNEL) += lwtunnel.o
+obj-$(CONFIG_HWBM) += hwbm.o
diff --git a/net/core/hwbm.c b/net/core/hwbm.c
new file mode 100644
index ..a98d2a74ca02
--- /dev/null
+++ b/net/core/hwbm.c
@@ -0,0 +1,87 @@
+/* Support for hardware buffer manager.
+ *
+ * Copyright (C) 2016 Marvell
+ *
+ * Gregory CLEMENT 
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ */
+#include 
+#include 
+#include 
+#include 
+
+void hwbm_buf_free(struct hwbm_pool *bm_pool, void *buf)
+{
+   if (likely(bm_pool->size <= PAGE_SIZE))
+   skb_free_frag(buf);
+   else
+   kfree(buf);
+}
+EXPORT_SYMBOL_GPL(hwbm_buf_free);
+
+/* Refill processing for HW buffer management */
+int hwbm_pool_refill(struct hwbm_pool *bm_pool, gfp_t gfp)
+{
+   int frag_size = bm_pool->size;
+   void *buf;
+
+   if (likely(frag_size <= PAGE_SIZE))
+   buf = netdev_alloc_frag(frag_size);
+   else
+   buf = kmalloc(frag_size, gfp);
+
+   if (!buf)
+   return -ENOMEM;
+
+   if (bm_pool->construct)
+   if (bm_pool->construct(bm_pool, buf)) {
+   hwbm_buf_free(bm_pool, buf);
+   return -ENOMEM;
+   }
+
+   return 0;
+}
+EXPORT_SYMBOL_GPL(hwbm_pool_refill);
+
+int hwbm_pool_add(struct hwbm_pool *bm_pool, unsigned int buf_num, gfp_t gfp)
+{
+   int err, i;
+   unsigned long flags;
+
+   spin_lock_irqsave(_pool->lock, flags);
+   if (bm_pool->buf_num == bm_pool->size) {
+   pr_warn("pool already filled\n");
+   return bm_pool->buf_num;
+   }
+
+   if (buf_num + bm_pool->buf_num > bm_pool->size) {
+   pr_warn("cannot allocate %d buffers for pool\n",
+   buf_num);
+   return 0;
+   }
+
+   if ((buf_num + bm_pool->buf_num) < bm_pool->buf_num) {
+   pr_warn("Adding %d buffers to the %d current buffers will 
overflow\n",
+   buf_num,  bm_pool->buf_num);
+   return 0;
+   }
+
+   for (i = 0; i < buf_num; i++) {
+   err = hwbm_pool_refill(bm_pool, gfp);
+   if (err < 0)
+   break;
+   }
+
+   /* Update BM driver with number of buffers added to pool */
+   bm_pool->buf_num += i;
+
+   pr_debug("hwpm pool: %d of %d buffers added\n", i, buf_num);
+   spin_unlock_irqrestore(_pool->lock, flags);
+
+   return i;
+}
+EXPORT_SYMBOL_GPL(hwbm_pool_add);
-- 
2.5.0



[PATCH v3 net-next 4/9] ARM: dts: armada-xp: enable buffer manager support on Armada XP boards

2016-03-05 Thread Gregory CLEMENT
From: Marcin Wojtas 

Since mvneta driver supports using hardware buffer management (BM), in
order to use it, board files have to be adjusted accordingly. This commit
enables BM on AXP-DB and AXP-GP in same manner - because number of ports
on those boards is the same as number of possible pools, each port is
supposed to use single pool for all kind of packets.

Moreover appropriate entry is added to 'soc' node ranges, as well as "okay"
status for 'bm' and 'bm-bppi' (internal SRAM) nodes.

Signed-off-by: Marcin Wojtas 
Signed-off-by: Gregory CLEMENT 
---
 arch/arm/boot/dts/armada-xp-db.dts | 19 ++-
 arch/arm/boot/dts/armada-xp-gp.dts | 19 ++-
 2 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/arch/arm/boot/dts/armada-xp-db.dts 
b/arch/arm/boot/dts/armada-xp-db.dts
index f774101416a5..30657302305d 100644
--- a/arch/arm/boot/dts/armada-xp-db.dts
+++ b/arch/arm/boot/dts/armada-xp-db.dts
@@ -77,7 +77,8 @@
  MBUS_ID(0x01, 0x1d) 0 0 0xfff0 0x10
  MBUS_ID(0x01, 0x2f) 0 0 0xf000 0x100
  MBUS_ID(0x09, 0x09) 0 0 0xf810 0x1
- MBUS_ID(0x09, 0x05) 0 0 0xf811 0x1>;
+ MBUS_ID(0x09, 0x05) 0 0 0xf811 0x1
+ MBUS_ID(0x0c, 0x04) 0 0 0xf120 0x10>;
 
devbus-bootcs {
status = "okay";
@@ -181,21 +182,33 @@
status = "okay";
phy = <>;
phy-mode = "rgmii-id";
+   buffer-manager = <>;
+   bm,pool-long = <0>;
};
ethernet@74000 {
status = "okay";
phy = <>;
phy-mode = "rgmii-id";
+   buffer-manager = <>;
+   bm,pool-long = <1>;
};
ethernet@3 {
status = "okay";
phy = <>;
phy-mode = "sgmii";
+   buffer-manager = <>;
+   bm,pool-long = <2>;
};
ethernet@34000 {
status = "okay";
phy = <>;
phy-mode = "sgmii";
+   buffer-manager = <>;
+   bm,pool-long = <3>;
+   };
+
+   bm@c {
+   status = "okay";
};
 
mvsdio@d4000 {
@@ -230,5 +243,9 @@
};
};
};
+
+   bm-bppi {
+   status = "okay";
+   };
};
 };
diff --git a/arch/arm/boot/dts/armada-xp-gp.dts 
b/arch/arm/boot/dts/armada-xp-gp.dts
index 4878d7353069..a1ded01d0c07 100644
--- a/arch/arm/boot/dts/armada-xp-gp.dts
+++ b/arch/arm/boot/dts/armada-xp-gp.dts
@@ -96,7 +96,8 @@
  MBUS_ID(0x01, 0x1d) 0 0 0xfff0 0x10
  MBUS_ID(0x01, 0x2f) 0 0 0xf000 0x100
  MBUS_ID(0x09, 0x09) 0 0 0xf810 0x1
- MBUS_ID(0x09, 0x05) 0 0 0xf811 0x1>;
+ MBUS_ID(0x09, 0x05) 0 0 0xf811 0x1
+ MBUS_ID(0x0c, 0x04) 0 0 0xf120 0x10>;
 
devbus-bootcs {
status = "okay";
@@ -196,21 +197,29 @@
status = "okay";
phy = <>;
phy-mode = "qsgmii";
+   buffer-manager = <>;
+   bm,pool-long = <0>;
};
ethernet@74000 {
status = "okay";
phy = <>;
phy-mode = "qsgmii";
+   buffer-manager = <>;
+   bm,pool-long = <1>;
};
ethernet@3 {
status = "okay";
phy = <>;
phy-mode = "qsgmii";
+   buffer-manager = <>;
+   bm,pool-long = <2>;
};
ethernet@34000 {
status = "okay";
phy = <>;
phy-mode = "qsgmii";
+  

[PATCH v3 net-next 2/9] ARM: dts: armada-38x: enable buffer manager support on Armada 38x boards

2016-03-05 Thread Gregory CLEMENT
From: Marcin Wojtas 

Since mvneta driver supports using hardware buffer management (BM), in
order to use it, board files have to be adjusted accordingly. This commit
enables BM on:
* A385-DB-AP - each port has its own pool for long and common pool for
short packets,
* A388-ClearFog - same as above,
* A388-DB - to each port unique 'short' and 'long' pools are mapped,
* A388-GP - same as above.

Moreover appropriate entry is added to 'soc' node ranges, as well as "okay"
status for 'bm' and 'bm-bppi' (internal SRAM) nodes.

[gregory.clem...@free-electrons.com: add suppport for the ClearFog board]

Signed-off-by: Marcin Wojtas 
Signed-off-by: Gregory CLEMENT 
---
 arch/arm/boot/dts/armada-385-db-ap.dts  | 20 +++-
 arch/arm/boot/dts/armada-388-clearfog.dts   |  6 ++
 arch/arm/boot/dts/armada-388-db.dts | 17 -
 arch/arm/boot/dts/armada-388-gp.dts | 17 -
 arch/arm/boot/dts/armada-38x-solidrun-microsom.dtsi | 15 ++-
 5 files changed, 71 insertions(+), 4 deletions(-)

diff --git a/arch/arm/boot/dts/armada-385-db-ap.dts 
b/arch/arm/boot/dts/armada-385-db-ap.dts
index acd5b1519edb..5f9451be21ff 100644
--- a/arch/arm/boot/dts/armada-385-db-ap.dts
+++ b/arch/arm/boot/dts/armada-385-db-ap.dts
@@ -61,7 +61,8 @@
ranges = ;
+ MBUS_ID(0x09, 0x15) 0 0xf111 0x1
+ MBUS_ID(0x0c, 0x04) 0 0xf120 0x10>;
 
internal-regs {
spi1: spi@10680 {
@@ -138,12 +139,18 @@
status = "okay";
phy = <>;
phy-mode = "sgmii";
+   buffer-manager = <>;
+   bm,pool-long = <1>;
+   bm,pool-short = <3>;
};
 
ethernet@34000 {
status = "okay";
phy = <>;
phy-mode = "sgmii";
+   buffer-manager = <>;
+   bm,pool-long = <2>;
+   bm,pool-short = <3>;
};
 
ethernet@7 {
@@ -157,6 +164,13 @@
status = "okay";
phy = <>;
phy-mode = "rgmii-id";
+   buffer-manager = <>;
+   bm,pool-long = <0>;
+   bm,pool-short = <3>;
+   };
+
+   bm@c8000 {
+   status = "okay";
};
 
nfc: flash@d {
@@ -178,6 +192,10 @@
};
};
 
+   bm-bppi {
+   status = "okay";
+   };
+
pcie-controller {
status = "okay";
 
diff --git a/arch/arm/boot/dts/armada-388-clearfog.dts 
b/arch/arm/boot/dts/armada-388-clearfog.dts
index c6e180eb3b11..c60206efb583 100644
--- a/arch/arm/boot/dts/armada-388-clearfog.dts
+++ b/arch/arm/boot/dts/armada-388-clearfog.dts
@@ -78,6 +78,9 @@
internal-regs {
ethernet@3 {
phy-mode = "sgmii";
+   buffer-manager = <>;
+   bm,pool-long = <2>;
+   bm,pool-short = <1>;
status = "okay";
 
fixed-link {
@@ -88,6 +91,9 @@
 
ethernet@34000 {
phy-mode = "sgmii";
+   buffer-manager = <>;
+   bm,pool-long = <3>;
+   bm,pool-short = <1>;
status = "okay";
 
fixed-link {
diff --git a/arch/arm/boot/dts/armada-388-db.dts 
b/arch/arm/boot/dts/armada-388-db.dts
index ff47af57f091..ea93ed727030 100644
--- a/arch/arm/boot/dts/armada-388-db.dts
+++ b/arch/arm/boot/dts/armada-388-db.dts
@@ -66,7 +66,8 @@
ranges = ;
+ MBUS_ID(0x09, 0x15) 0 0xf111 0x1
+ 

[PATCH v3 net-next 9/9] net: mvneta: Use the new hwbm framework

2016-03-05 Thread Gregory CLEMENT
Now that the hardware buffer management framework had been introduced,
let's use it.

Signed-off-by: Gregory CLEMENT 
---
 drivers/net/ethernet/marvell/Kconfig |   1 +
 drivers/net/ethernet/marvell/mvneta.c|  38 +++--
 drivers/net/ethernet/marvell/mvneta_bm.c | 140 +++
 drivers/net/ethernet/marvell/mvneta_bm.h |  11 +--
 4 files changed, 67 insertions(+), 123 deletions(-)

diff --git a/drivers/net/ethernet/marvell/Kconfig 
b/drivers/net/ethernet/marvell/Kconfig
index ac6605c62f46..62d80fddbe34 100644
--- a/drivers/net/ethernet/marvell/Kconfig
+++ b/drivers/net/ethernet/marvell/Kconfig
@@ -43,6 +43,7 @@ config MVMDIO
 config MVNETA_BM
tristate "Marvell Armada 38x/XP network interface BM support"
depends on MVNETA
+   select HWBM
---help---
  This driver supports auxiliary block of the network
  interface units in the Marvell ARMADA XP and ARMADA 38x SoC
diff --git a/drivers/net/ethernet/marvell/mvneta.c 
b/drivers/net/ethernet/marvell/mvneta.c
index 26e0270e2d89..7f4039d1d970 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -30,6 +30,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "mvneta_bm.h"
 #include 
 #include 
@@ -1021,11 +1022,12 @@ static int mvneta_bm_port_init(struct platform_device 
*pdev,
 static void mvneta_bm_update_mtu(struct mvneta_port *pp, int mtu)
 {
struct mvneta_bm_pool *bm_pool = pp->pool_long;
+   struct hwbm_pool *hwbm_pool = _pool->hwbm_pool;
int num;
 
/* Release all buffers from long pool */
mvneta_bm_bufs_free(pp->bm_priv, bm_pool, 1 << pp->id);
-   if (bm_pool->buf_num) {
+   if (hwbm_pool->buf_num) {
WARN(1, "cannot free all buffers in pool %d\n",
 bm_pool->id);
goto bm_mtu_err;
@@ -1033,14 +1035,14 @@ static void mvneta_bm_update_mtu(struct mvneta_port 
*pp, int mtu)
 
bm_pool->pkt_size = MVNETA_RX_PKT_SIZE(mtu);
bm_pool->buf_size = MVNETA_RX_BUF_SIZE(bm_pool->pkt_size);
-   bm_pool->frag_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
+   hwbm_pool->size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
  SKB_DATA_ALIGN(MVNETA_RX_BUF_SIZE(bm_pool->pkt_size));
 
/* Fill entire long pool */
-   num = mvneta_bm_bufs_add(pp->bm_priv, bm_pool, bm_pool->size);
-   if (num != bm_pool->size) {
+   num = hwbm_pool_add(hwbm_pool, hwbm_pool->size, GFP_ATOMIC);
+   if (num != hwbm_pool->size) {
WARN(1, "pool %d: %d of %d allocated\n",
-bm_pool->id, num, bm_pool->size);
+bm_pool->id, num, hwbm_pool->size);
goto bm_mtu_err;
}
mvneta_bm_pool_bufsize_set(pp, bm_pool->buf_size, bm_pool->id);
@@ -1720,6 +1722,14 @@ static void mvneta_txq_done(struct mvneta_port *pp,
}
 }
 
+void *mvneta_frag_alloc(unsigned int frag_size)
+{
+   if (likely(frag_size <= PAGE_SIZE))
+   return netdev_alloc_frag(frag_size);
+   else
+   return kmalloc(frag_size, GFP_ATOMIC);
+}
+
 /* Refill processing for SW buffer management */
 static int mvneta_rx_refill(struct mvneta_port *pp,
struct mvneta_rx_desc *rx_desc)
@@ -1775,6 +1785,14 @@ static u32 mvneta_skb_tx_csum(struct mvneta_port *pp, 
struct sk_buff *skb)
return MVNETA_TX_L4_CSUM_NOT;
 }
 
+void mvneta_frag_free(unsigned int frag_size, void *data)
+{
+   if (likely(frag_size <= PAGE_SIZE))
+   skb_free_frag(data);
+   else
+   kfree(data);
+}
+
 /* Drop packets received by the RXQ and free buffers */
 static void mvneta_rxq_drop_pkts(struct mvneta_port *pp,
 struct mvneta_rx_queue *rxq)
@@ -2010,14 +2028,14 @@ err_drop_frame:
}
 
/* Refill processing */
-   err = mvneta_bm_pool_refill(pp->bm_priv, bm_pool);
+   err = hwbm_pool_refill(_pool->hwbm_pool, GFP_ATOMIC);
if (err) {
netdev_err(dev, "Linux processing - Can't refill\n");
rxq->missed++;
goto err_drop_frame_ret_pool;
}
 
-   frag_size = bm_pool->frag_size;
+   frag_size = bm_pool->hwbm_pool.size;
 
skb = build_skb(data, frag_size > PAGE_SIZE ? 0 : frag_size);
 
@@ -4084,6 +4102,12 @@ static int mvneta_probe(struct platform_device *pdev)
}
}
 
+   err = register_netdev(dev);
+   if (err < 0) {
+   dev_err(>dev, "failed to register\n");
+   goto err_free_stats;
+   }
+
err = mvneta_init(>dev, pp);
if (err < 0)
goto err_netdev;
diff --git a/drivers/net/ethernet/marvell/mvneta_bm.c 
b/drivers/net/ethernet/marvell/mvneta_bm.c
index 

[PATCH v3 net-next 3/9] ARM: dts: armada-xp: add buffer manager nodes

2016-03-05 Thread Gregory CLEMENT
From: Marcin Wojtas 

Armada XP network controller supports hardware buffer management (BM).
Since it is now enabled in mvneta driver, appropriate nodes can be added
to armada-xp.dtsi - for the actual common BM unit (bm@c) and its
internal SRAM (bm-bppi), which is used for indirect access to buffer
pointer ring residing in DRAM.

Pools - ports mapping, bm-bppi entry in 'soc' node's ranges and optional
parameters are supposed to be set in board files.

Signed-off-by: Marcin Wojtas 
Signed-off-by: Gregory CLEMENT 
---
 arch/arm/boot/dts/armada-xp.dtsi | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/arch/arm/boot/dts/armada-xp.dtsi b/arch/arm/boot/dts/armada-xp.dtsi
index be23196829bb..bd459360d7a6 100644
--- a/arch/arm/boot/dts/armada-xp.dtsi
+++ b/arch/arm/boot/dts/armada-xp.dtsi
@@ -253,6 +253,14 @@
marvell,crypto-sram-size = <0x800>;
};
 
+   bm: bm@c {
+   compatible = "marvell,armada-380-neta-bm";
+   reg = <0xc 0xac>;
+   clocks = < 13>;
+   internal-mem = <_bppi>;
+   status = "disabled";
+   };
+
xor@f0900 {
compatible = "marvell,orion-xor";
reg = <0xF0900 0x100
@@ -291,6 +299,16 @@
#size-cells = <1>;
ranges = <0 MBUS_ID(0x09, 0x05) 0 0x800>;
};
+
+   bm_bppi: bm-bppi {
+   compatible = "mmio-sram";
+   reg = ;
+   ranges = <0 MBUS_ID(0x0c, 0x04) 0 0x10>;
+   #address-cells = <1>;
+   #size-cells = <1>;
+   clocks = < 13>;
+   status = "disabled";
+   };
};
 
clocks {
-- 
2.5.0



[PATCH v3 net-next 7/9] net: mvneta: bm: add support for hardware buffer management

2016-03-05 Thread Gregory CLEMENT
From: Marcin Wojtas 

Buffer manager (BM) is a dedicated hardware unit that can be used by all
ethernet ports of Armada XP and 38x SoC's. It allows to offload CPU on RX
path by sparing DRAM access on refilling buffer pool, hardware-based
filling of descriptor ring data and better memory utilization due to HW
arbitration for using 'short' pools for small packets.

Tests performed with A388 SoC working as a network bridge between two
packet generators showed increase of maximum processed 64B packets by
~20k (~555k packets with BM enabled vs ~535 packets without BM). Also
when pushing 1500B-packets with a line rate achieved, CPU load decreased
from around 25% without BM to 20% with BM.

BM comprise up to 4 buffer pointers' (BP) rings kept in DRAM, which
are called external BP pools - BPPE. Allocating and releasing buffer
pointers (BP) to/from BPPE is performed indirectly by write/read access
to a dedicated internal SRAM, where internal BP pools (BPPI) are placed.
BM hardware controls status of BPPE automatically, as well as assigning
proper buffers to RX descriptors. For more details please refer to
Functional Specification of Armada XP or 38x SoC.

In order to enable support for a separate hardware block, common for all
ports, a new driver has to be implemented ('mvneta_bm'). It provides
initialization sequence of address space, clocks, registers, SRAM,
empty pools' structures and also obtaining optional configuration
from DT (please refer to device tree binding documentation). mvneta_bm
exposes also a necessary API to mvneta driver, as well as a dedicated
structure with BM information (bm_priv), whose presence is used as a
flag notifying of BM usage by port. It has to be ensured that mvneta_bm
probe is executed prior to the ones in ports' driver. In case BM is not
used or its probe fails, mvneta falls back to use software buffer
management.

A sequence executed in mvneta_probe function is modified in order to have
an access to needed resources before possible port's BM initialization is
done. According to port-pools mapping provided by DT appropriate registers
are configured and the buffer pools are filled. RX path is modified
accordingly. Becaues the hardware allows a wide variety of configuration
options, following assumptions are made:
* using BM mechanisms can be selectively disabled/enabled basing
  on DT configuration among the ports
* 'long' pool's single buffer size is tied to port's MTU
* using 'long' pool by port is obligatory and it cannot be shared
* using 'short' pool for smaller packets is optional
* one 'short' pool can be shared among all ports

This commit enables hardware buffer management operation cooperating with
existing mvneta driver. New device tree binding documentation is added and
the one of mvneta is updated accordingly.

[gregory.clem...@free-electrons.com: removed the suspend/resume part]

Signed-off-by: Marcin Wojtas 
Signed-off-by: Gregory CLEMENT 
---
 .../bindings/net/marvell-armada-370-neta.txt   |  19 +-
 .../devicetree/bindings/net/marvell-neta-bm.txt|  49 ++
 drivers/net/ethernet/marvell/Kconfig   |  13 +
 drivers/net/ethernet/marvell/Makefile  |   1 +
 drivers/net/ethernet/marvell/mvneta.c  | 509 +--
 drivers/net/ethernet/marvell/mvneta_bm.c   | 562 +
 drivers/net/ethernet/marvell/mvneta_bm.h   | 167 ++
 7 files changed, 1271 insertions(+), 49 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/net/marvell-neta-bm.txt
 create mode 100644 drivers/net/ethernet/marvell/mvneta_bm.c
 create mode 100644 drivers/net/ethernet/marvell/mvneta_bm.h

diff --git a/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt 
b/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt
index d0cb8693963b..73be8970815e 100644
--- a/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt
+++ b/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt
@@ -18,15 +18,30 @@ Optional properties:
   "core" for core clock and "bus" for the optional bus clock.
 
 
+Optional properties (valid only for Armada XP/38x):
+
+- buffer-manager: a phandle to a buffer manager node. Please refer to
+  Documentation/devicetree/bindings/net/marvell-neta-bm.txt
+- bm,pool-long: ID of a pool, that will accept all packets of a size
+  higher than 'short' pool's threshold (if set) and up to MTU value.
+  Obligatory, when the port is supposed to use hardware
+  buffer management.
+- bm,pool-short: ID of a pool, that will be used for accepting
+  packets of a size lower than given threshold. If not set, the port
+  will use a single 'long' pool for all packets, as defined above.
+
 Example:
 
-ethernet@d007 {
+ethernet@7 {
compatible = "marvell,armada-370-neta";
-   reg = <0xd007 0x2500>;
+   reg = <0x7 0x2500>;
interrupts = <8>;
clocks = <_clk 4>;

[PATCH v3 net-next 6/9] bus: mvebu-mbus: provide api for obtaining IO and DRAM window information

2016-03-05 Thread Gregory CLEMENT
From: Marcin Wojtas 

This commit enables finding appropriate mbus window and obtaining its
target id and attribute for given physical address in two separate
routines, both for IO and DRAM windows. This functionality
is needed for Armada XP/38x Network Controller's Buffer Manager and
PnC configuration.

[gregory.clem...@free-electrons.com: Fix size test for
mvebu_mbus_get_dram_win_info]

Signed-off-by: Marcin Wojtas 
[DRAM window information reference in LKv3.10]
Signed-off-by: Evan Wang 
Signed-off-by: Gregory CLEMENT 
---
 drivers/bus/mvebu-mbus.c | 52 
 include/linux/mbus.h |  3 +++
 2 files changed, 55 insertions(+)

diff --git a/drivers/bus/mvebu-mbus.c b/drivers/bus/mvebu-mbus.c
index c43c3d2baf73..c2e52864bb03 100644
--- a/drivers/bus/mvebu-mbus.c
+++ b/drivers/bus/mvebu-mbus.c
@@ -948,6 +948,58 @@ void mvebu_mbus_get_pcie_io_aperture(struct resource *res)
*res = mbus_state.pcie_io_aperture;
 }
 
+int mvebu_mbus_get_dram_win_info(phys_addr_t phyaddr, u8 *target, u8 *attr)
+{
+   const struct mbus_dram_target_info *dram;
+   int i;
+
+   /* Get dram info */
+   dram = mv_mbus_dram_info();
+   if (!dram) {
+   pr_err("missing DRAM information\n");
+   return -ENODEV;
+   }
+
+   /* Try to find matching DRAM window for phyaddr */
+   for (i = 0; i < dram->num_cs; i++) {
+   const struct mbus_dram_window *cs = dram->cs + i;
+
+   if (cs->base <= phyaddr &&
+   phyaddr <= (cs->base + cs->size - 1)) {
+   *target = dram->mbus_dram_target_id;
+   *attr = cs->mbus_attr;
+   return 0;
+   }
+   }
+
+   pr_err("invalid dram address 0x%x\n", phyaddr);
+   return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(mvebu_mbus_get_dram_win_info);
+
+int mvebu_mbus_get_io_win_info(phys_addr_t phyaddr, u32 *size, u8 *target,
+  u8 *attr)
+{
+   int win;
+
+   for (win = 0; win < mbus_state.soc->num_wins; win++) {
+   u64 wbase;
+   int enabled;
+
+   mvebu_mbus_read_window(_state, win, , ,
+  size, target, attr, NULL);
+
+   if (!enabled)
+   continue;
+
+   if (wbase <= phyaddr && phyaddr <= wbase + *size)
+   return win;
+   }
+
+   return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(mvebu_mbus_get_io_win_info);
+
 static __init int mvebu_mbus_debugfs_init(void)
 {
struct mvebu_mbus_state *s = _state;
diff --git a/include/linux/mbus.h b/include/linux/mbus.h
index 1f7bc630d225..ea34a867caa0 100644
--- a/include/linux/mbus.h
+++ b/include/linux/mbus.h
@@ -69,6 +69,9 @@ static inline const struct mbus_dram_target_info 
*mv_mbus_dram_info_nooverlap(vo
 int mvebu_mbus_save_cpu_target(u32 *store_addr);
 void mvebu_mbus_get_pcie_mem_aperture(struct resource *res);
 void mvebu_mbus_get_pcie_io_aperture(struct resource *res);
+int mvebu_mbus_get_dram_win_info(phys_addr_t phyaddr, u8 *target, u8 *attr);
+int mvebu_mbus_get_io_win_info(phys_addr_t phyaddr, u32 *size, u8 *target,
+  u8 *attr);
 int mvebu_mbus_add_window_remap_by_id(unsigned int target,
  unsigned int attribute,
  phys_addr_t base, size_t size,
-- 
2.5.0



[PATCH v3 net-next 1/9] ARM: dts: armada-38x: add buffer manager nodes

2016-03-05 Thread Gregory CLEMENT
From: Marcin Wojtas 

Armada 38x network controller supports hardware buffer management (BM).
Since it is now enabled in mvneta driver, appropriate nodes can be added
to armada-38x.dtsi - for the actual common BM unit (bm@c8000) and its
internal SRAM (bm-bppi), which is used for indirect access to buffer
pointer ring residing in DRAM.

Pools - ports mapping, bm-bppi entry in 'soc' node's ranges and optional
parameters are supposed to be set in board files.

Signed-off-by: Marcin Wojtas 
Signed-off-by: Gregory CLEMENT 
---
 arch/arm/boot/dts/armada-38x.dtsi | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/arch/arm/boot/dts/armada-38x.dtsi 
b/arch/arm/boot/dts/armada-38x.dtsi
index e8b7f6726772..1b7d690d8e10 100644
--- a/arch/arm/boot/dts/armada-38x.dtsi
+++ b/arch/arm/boot/dts/armada-38x.dtsi
@@ -540,6 +540,14 @@
status = "disabled";
};
 
+   bm: bm@c8000 {
+   compatible = "marvell,armada-380-neta-bm";
+   reg = <0xc8000 0xac>;
+   clocks = < 13>;
+   internal-mem = <_bppi>;
+   status = "disabled";
+   };
+
sata@e {
compatible = "marvell,armada-380-ahci";
reg = <0xe 0x2000>;
@@ -618,6 +626,16 @@
#size-cells = <1>;
ranges = <0 MBUS_ID(0x09, 0x15) 0 0x800>;
};
+
+   bm_bppi: bm-bppi {
+   compatible = "mmio-sram";
+   reg = ;
+   ranges = <0 MBUS_ID(0x0c, 0x04) 0 0x10>;
+   #address-cells = <1>;
+   #size-cells = <1>;
+   clocks = < 13>;
+   status = "disabled";
+   };
};
 
clocks {
-- 
2.5.0



[PATCH v3 net-next 0/9] API set for HW Buffer management

2016-03-05 Thread Gregory CLEMENT
This is a third version of an API set for HW Buffer management that I
initially submit here:
http://thread.gmane.org/gmane.linux.kernel/2125152

Since the last version I took into account David's remarks:

- I made a HWBM and a SWBM version of the mvneta_rx() function in
  order to reduce the the conditional code. I kept a condition inside
  the mvneta_poll because specializing this function would have means
  duplicating 95% of the code.
- I put back the register_netdev() call at the end of the
  mvneta_probe() function. In order to have a uniq ID for each port I
  just used a global variable in the driver.

I also added a fix from Marcin in the "net: mvneta: bm: add support
for hardware buffer management" patch:
- "when dropping packets, only buffer pointers passed from BM to
  descriptors have to be returned to the pool. In submitted version
  after closing the port and mvneta_rxq_deinit(), it was very likely
  that a lot of fake buffers are added to the pool, because all
  descriptors took part in iteration."

Finally, I also removed the select MVNETA_BM from the Kconfig, it will
let the user the choice to use not use it if they want.

For the record in the previous version I made the following changes:
v1 -> v2

- The hardware buffer management helpers are no more built by default
  and now depend on a hidden config symbol which has to be selected
  by the driver if needed
- The hwbm_pool_refill() and hwbm_pool_add() now receive a gfp_t as
  argument allowing the caller to specify the flag it needs.
- buf_num is now tested to ensure there is no wrapping
- A spinlock has been added to protect the hwbm_pool_add() function in
  SMP or irq context.
- used pr_warn instead of pr_debug in case of errors.
- fixed the mvneta implementation by returning the buffer to the pool
  at various place instead of ignoring it.
- Squashed "bus: mvenus-mbus: Fix size test for
   mvebu_mbus_get_dram_win_info" into bus: mvebu-mbus: provide api for
   obtaining IO and DRAM window information.
- Added my signed-otf-by on all the patches as submitter of the series.
- Renamed the dts patches with the pattern "ARM: dts: platform:"
- Removed the patch "ARM: mvebu: enable SRAM support in
  mvebu_v7_defconfig" of this series and already applied it
- Modified the order of the patches.

In order to ease the test the branch mvneta-BM-framework-v3 is
available at g...@github.com:MISL-EBU-System-SW/mainline-public.git.

Thanks,

Gregory

Gregory CLEMENT (3):
  ARM: dts: armada-xp-openblocks-ax3-4: Add BM support
  net: add a hardware buffer management helper API
  net: mvneta: Use the new hwbm framework

Marcin Wojtas (6):
  ARM: dts: armada-38x: add buffer manager nodes
  ARM: dts: armada-38x: enable buffer manager support on Armada 38x
boards
  ARM: dts: armada-xp: add buffer manager nodes
  ARM: dts: armada-xp: enable buffer manager support on Armada XP boards
  bus: mvebu-mbus: provide api for obtaining IO and DRAM window
information
  net: mvneta: bm: add support for hardware buffer management

 .../bindings/net/marvell-armada-370-neta.txt   |  19 +-
 .../devicetree/bindings/net/marvell-neta-bm.txt|  49 ++
 arch/arm/boot/dts/armada-385-db-ap.dts |  20 +-
 arch/arm/boot/dts/armada-388-clearfog.dts  |   6 +
 arch/arm/boot/dts/armada-388-db.dts|  17 +-
 arch/arm/boot/dts/armada-388-gp.dts|  17 +-
 .../arm/boot/dts/armada-38x-solidrun-microsom.dtsi |  15 +-
 arch/arm/boot/dts/armada-38x.dtsi  |  18 +
 arch/arm/boot/dts/armada-xp-db.dts |  19 +-
 arch/arm/boot/dts/armada-xp-gp.dts |  19 +-
 arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts   |  19 +-
 arch/arm/boot/dts/armada-xp.dtsi   |  18 +
 drivers/bus/mvebu-mbus.c   |  52 ++
 drivers/net/ethernet/marvell/Kconfig   |  14 +
 drivers/net/ethernet/marvell/Makefile  |   1 +
 drivers/net/ethernet/marvell/mvneta.c  | 525 +++--
 drivers/net/ethernet/marvell/mvneta_bm.c   | 486 +++
 drivers/net/ethernet/marvell/mvneta_bm.h   | 162 +++
 include/linux/mbus.h   |   3 +
 include/net/hwbm.h |  21 +
 net/Kconfig|   3 +
 net/core/Makefile  |   1 +
 net/core/hwbm.c|  87 
 23 files changed, 1539 insertions(+), 52 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/net/marvell-neta-bm.txt
 create mode 100644 drivers/net/ethernet/marvell/mvneta_bm.c
 create mode 100644 drivers/net/ethernet/marvell/mvneta_bm.h
 create mode 100644 include/net/hwbm.h
 create mode 100644 net/core/hwbm.c

-- 
2.5.0



[PATCH v3 net-next 5/9] ARM: dts: armada-xp-openblocks-ax3-4: Add BM support

2016-03-05 Thread Gregory CLEMENT
Allow Openblock AX3 using hardware buffer management with mvneta.

Signed-off-by: Gregory CLEMENT 
---
 arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts | 19 ++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts 
b/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts
index a5db17782e08..3aa29a91c7b8 100644
--- a/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts
+++ b/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts
@@ -67,7 +67,8 @@
  MBUS_ID(0x01, 0x1d) 0 0 0xfff0 0x10
  MBUS_ID(0x01, 0x2f) 0 0 0xf000 0x800
  MBUS_ID(0x09, 0x09) 0 0 0xf810 0x1
- MBUS_ID(0x09, 0x05) 0 0 0xf811 0x1>;
+ MBUS_ID(0x09, 0x05) 0 0 0xf811 0x1
+ MBUS_ID(0x0c, 0x04) 0 0 0xd120 0x10>;
 
devbus-bootcs {
status = "okay";
@@ -176,21 +177,29 @@
status = "okay";
phy = <>;
phy-mode = "sgmii";
+   buffer-manager = <>;
+   bm,pool-long = <0>;
};
ethernet@74000 {
status = "okay";
phy = <>;
phy-mode = "sgmii";
+   buffer-manager = <>;
+   bm,pool-long = <1>;
};
ethernet@3 {
status = "okay";
phy = <>;
phy-mode = "sgmii";
+   buffer-manager = <>;
+   bm,pool-long = <2>;
};
ethernet@34000 {
status = "okay";
phy = <>;
phy-mode = "sgmii";
+   buffer-manager = <>;
+   bm,pool-long = <3>;
};
i2c@11000 {
status = "okay";
@@ -219,6 +228,14 @@
usb@51000 {
status = "okay";
};
+
+   bm@c {
+   status = "okay";
+   };
+   };
+
+   bm-bppi {
+   status = "okay";
};
};
 };
-- 
2.5.0



Re: [RFC] net: ipv4 -- Introduce ifa limit per net

2016-03-05 Thread Cyrill Gorcunov
On Sat, Mar 05, 2016 at 11:33:12AM -0500, David Miller wrote:
> > and until everything get cleaned up I couldn't connect
> > to the node via ssh. I continue playing with patch maybe
> > I find some other optimization paths. Thanks!
> 
> What is the order of magnitude of the delay, as a function of
> number of IP aliases installed, compred to before the patch?

You know I didn't measured precise numbers. The script (which
I of course forgot to attach in first report) creates 65025
addresses and on exit it takes ~10 minutes (it also depends
on load on the host because I've been testing inside VM).

I'll create some kind of graph for that if you interested,
should I?

> The remaining cost you are seeing comes of course from the router
> deletion, whose path is:
> 
>   blocking_notifier_call_chain(_chain, NETDEV_DOWN, ifa1);
>   fib_inetaddr_event()
>   fib_del_ifaddr(ifa, NULL);
> 
> Which does another full list scan trying to handle primaries and
> secondaries.
> 
> Probably the same optimization can be applied there, see patch below.
> And if that doesn't do it, there is a really easy way to batch the
> delete by scanning the FIB tree in one go and deleting every entry
> that points to "in_dev".  But I suspect we really won't need that.

I'll test it David, in a couple of hours I hope. And report the
result.

Cyrill


Re: [PATCH net] sctp: use gfp insteaad of GFP_NOWAIT in idr_alloc_cyclic when sctp_assoc_set_id

2016-03-05 Thread Eric Dumazet
On sam., 2016-03-05 at 23:59 +0800, Xin Long wrote:
> The following call trace appears because of idr_alloc_cyclic(..., GFP_NOWAIT),
> that is a stress test, and the reason should be a heavy use. it will cause
> sctp_process_init return 0, and make connection init fail.
> 
> All the allocations of idr_alloc_cyclic should respect gfp flag.
> So we can fix it by using gfp insteaad of GFP_NOWAIT in idr_alloc_cyclic.
> 
> [ 2569.797532] Call Trace:
> [ 2569.809721]  [] dump_stack+0x19/0x1b
> [ 2569.837424]  [] warn_alloc_failed+0x110/0x180
> [ 2569.867013]  [] __alloc_pages_nodemask+0x9a8/0xb90
> [ 2569.900039]  [] alloc_pages_current+0xa9/0x170
> [ 2569.930100]  [] new_slab+0x2ec/0x300
> [ 2569.957505]  [] __slab_alloc+0x315/0x48f
> [ 2569.985168]  [] ? idr_layer_alloc+0x89/0x90
> [ 2570.014344]  [] ? memzero_explicit+0xe/0x10
> [ 2570.044116]  [] kmem_cache_alloc+0x193/0x1d0
> [ 2570.073283]  [] idr_layer_alloc+0x89/0x90
> [ 2570.102684]  [] idr_get_empty_slot+0x24c/0x3c0
> [ 2570.132634]  [] idr_alloc+0x5c/0x100
> [ 2570.159835]  [] ? sctp_hash_transport+0x110/0x290 [sctp]
> [ 2570.194974]  [] idr_alloc_cyclic+0x2b/0x60
> [ 2570.223695]  [] sctp_assoc_set_id+0x46/0xd0 [sctp]
> [ 2570.256245]  [] sctp_process_init+0x921/0x940 [sctp]
> [ 2570.288574]  [] ? sctp_csum_combine+0x10/0x10 [sctp]
> [ 2570.321758]  [] 
> sctp_cmd_interpreter.isra.25+0xe18/0x1330 [sctp]
> [ 2570.459745]  [] sctp_do_sm+0xaf/0x1b0 [sctp]
> [ 2570.489725]  [] sctp_assoc_bh_rcv+0xd5/0x140 [sctp]
> [ 2570.522628]  [] sctp_inq_push+0x4c/0x70 [sctp]
> [ 2570.553812]  [] sctp_backlog_rcv+0x40/0x130 [sctp]
> [ 2570.585534]  [] release_sock+0xa1/0x170
> [ 2570.613632]  [] __sctp_connect+0x41a/0x550 [sctp]
> [ 2570.644998]  [] ? wake_up_atomic_t+0x30/0x30
> [ 2570.675601]  [] ? _raw_spin_lock_bh+0x12/0x50
> [ 2570.705185]  [] sctp_connect+0x4d/0x70 [sctp]
> [ 2570.736234]  [] inet_dgram_connect+0x2e/0x80
> [ 2570.765514]  [] SYSC_connect+0xe7/0x120
> [ 2570.793691]  [] ? sock_alloc_file+0xa0/0x140
> [ 2570.822558]  [] ? __fd_install+0x47/0x60
> [ 2570.851732]  [] SyS_connect+0xe/0x10
> [ 2570.877433]  [] system_call_fastpath+0x16/0x1b
> [ 2570.908682] SLUB: Unable to allocate memory on node -1 (gfp=0x8000)
> [ 2570.939959]   cache: idr_layer_cache, object size: 2112, buffer size: 
> 2112, default order: 3, min order: 0
> [ 2570.989683]   node 0: slabs: 839, objs: 11829, free: 0
> 
> Signed-off-by: Xin Long 
> ---
>  net/sctp/associola.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/net/sctp/associola.c b/net/sctp/associola.c
> index 2bf8ec9..481187a 100644
> --- a/net/sctp/associola.c
> +++ b/net/sctp/associola.c
> @@ -1606,7 +1606,7 @@ int sctp_assoc_set_id(struct sctp_association *asoc, 
> gfp_t gfp)
>   idr_preload(gfp);
>   spin_lock_bh(_assocs_id_lock);
>   /* 0 is not a valid assoc_id, must be >= 1 */
> - ret = idr_alloc_cyclic(_assocs_id, asoc, 1, 0, GFP_NOWAIT);
> + ret = idr_alloc_cyclic(_assocs_id, asoc, 1, 0, gfp);
>   spin_unlock_bh(_assocs_id_lock);
>   if (preload)
>   idr_preload_end();

Are you sure idr_alloc(... GFP_KERNEL) makes sense inside spin_lock_bh()
section ?

idr_alloc() has :

might_sleep_if(gfpflags_allow_blocking(gfp_mask));

A debug kernel (CONFIG_DEBUG_ATOMIC_SLEEP=y) should probably complain at
this point ?





Re: [RFC] net: ipv4 -- Introduce ifa limit per net

2016-03-05 Thread David Miller
From: Cyrill Gorcunov 
Date: Sat, 5 Mar 2016 18:57:14 +0300

> On Fri, Mar 04, 2016 at 11:11:09PM -0500, David Miller wrote:
>> From: Eric Dumazet 
>> Date: Fri, 04 Mar 2016 16:08:30 -0800
>> 
>> > __inet_del_ifa() should probably take into account in_dev->dead (no
>> > promotion, no list scan...)
>> 
>> Indeed, that is the real problem:
> 
> Well, tried it out. Indeed it partially released the contention
> but with patch applied I stuck with
 ...
> and until everything get cleaned up I couldn't connect
> to the node via ssh. I continue playing with patch maybe
> I find some other optimization paths. Thanks!

What is the order of magnitude of the delay, as a function of
number of IP aliases installed, compred to before the patch?

The remaining cost you are seeing comes of course from the router
deletion, whose path is:

blocking_notifier_call_chain(_chain, NETDEV_DOWN, ifa1);
fib_inetaddr_event()
fib_del_ifaddr(ifa, NULL);

Which does another full list scan trying to handle primaries and
secondaries.

Probably the same optimization can be applied there, see patch below.
And if that doesn't do it, there is a really easy way to batch the
delete by scanning the FIB tree in one go and deleting every entry
that points to "in_dev".  But I suspect we really won't need that.

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 4734475..21add55 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -922,6 +922,9 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr 
*iprim)
subnet = 1;
}
 
+   if (in_dev->dead)
+   goto no_promotions;
+
/* Deletion is more complicated than add.
 * We should take care of not to delete too much :-)
 *
@@ -997,6 +1000,7 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct 
in_ifaddr *iprim)
}
}
 
+no_promotions:
if (!(ok & BRD_OK))
fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, 
prim);
if (subnet && ifa->ifa_prefixlen < 31) {



Re: [Patch] rose_route_frame() NULL pointer dereference kernel panic

2016-03-05 Thread David Miller
From: f6bvp 
Date: Sat, 5 Mar 2016 16:32:42 +0100

> I understand I did not explain clearly or completely things.
> 
> I agree that each time patched rose_xmit() is calling
> rose_route_frame() it will
> get a 0 return.
> And I think this is what was intended by the author of rose_xmit().

If that's what he intended he would have implemented the entirety of
rose_xmit() as "kfree_skb(skb)".  But that's obviously not the case.

The author meant the packet to be sent in some way, perhaps using a
default path or something like that.

So please stop telling me over and over again that this function
is meant to simply drop all packets, it's not true.


Hellion dear

2016-03-05 Thread bijohnson
Nice meet you i"m ms delson by name you got me interested on fb how are you 
doing my dear? 


[PATCH net] sctp: use gfp insteaad of GFP_NOWAIT in idr_alloc_cyclic when sctp_assoc_set_id

2016-03-05 Thread Xin Long
The following call trace appears because of idr_alloc_cyclic(..., GFP_NOWAIT),
that is a stress test, and the reason should be a heavy use. it will cause
sctp_process_init return 0, and make connection init fail.

All the allocations of idr_alloc_cyclic should respect gfp flag.
So we can fix it by using gfp insteaad of GFP_NOWAIT in idr_alloc_cyclic.

[ 2569.797532] Call Trace:
[ 2569.809721]  [] dump_stack+0x19/0x1b
[ 2569.837424]  [] warn_alloc_failed+0x110/0x180
[ 2569.867013]  [] __alloc_pages_nodemask+0x9a8/0xb90
[ 2569.900039]  [] alloc_pages_current+0xa9/0x170
[ 2569.930100]  [] new_slab+0x2ec/0x300
[ 2569.957505]  [] __slab_alloc+0x315/0x48f
[ 2569.985168]  [] ? idr_layer_alloc+0x89/0x90
[ 2570.014344]  [] ? memzero_explicit+0xe/0x10
[ 2570.044116]  [] kmem_cache_alloc+0x193/0x1d0
[ 2570.073283]  [] idr_layer_alloc+0x89/0x90
[ 2570.102684]  [] idr_get_empty_slot+0x24c/0x3c0
[ 2570.132634]  [] idr_alloc+0x5c/0x100
[ 2570.159835]  [] ? sctp_hash_transport+0x110/0x290 [sctp]
[ 2570.194974]  [] idr_alloc_cyclic+0x2b/0x60
[ 2570.223695]  [] sctp_assoc_set_id+0x46/0xd0 [sctp]
[ 2570.256245]  [] sctp_process_init+0x921/0x940 [sctp]
[ 2570.288574]  [] ? sctp_csum_combine+0x10/0x10 [sctp]
[ 2570.321758]  [] sctp_cmd_interpreter.isra.25+0xe18/0x1330 
[sctp]
[ 2570.459745]  [] sctp_do_sm+0xaf/0x1b0 [sctp]
[ 2570.489725]  [] sctp_assoc_bh_rcv+0xd5/0x140 [sctp]
[ 2570.522628]  [] sctp_inq_push+0x4c/0x70 [sctp]
[ 2570.553812]  [] sctp_backlog_rcv+0x40/0x130 [sctp]
[ 2570.585534]  [] release_sock+0xa1/0x170
[ 2570.613632]  [] __sctp_connect+0x41a/0x550 [sctp]
[ 2570.644998]  [] ? wake_up_atomic_t+0x30/0x30
[ 2570.675601]  [] ? _raw_spin_lock_bh+0x12/0x50
[ 2570.705185]  [] sctp_connect+0x4d/0x70 [sctp]
[ 2570.736234]  [] inet_dgram_connect+0x2e/0x80
[ 2570.765514]  [] SYSC_connect+0xe7/0x120
[ 2570.793691]  [] ? sock_alloc_file+0xa0/0x140
[ 2570.822558]  [] ? __fd_install+0x47/0x60
[ 2570.851732]  [] SyS_connect+0xe/0x10
[ 2570.877433]  [] system_call_fastpath+0x16/0x1b
[ 2570.908682] SLUB: Unable to allocate memory on node -1 (gfp=0x8000)
[ 2570.939959]   cache: idr_layer_cache, object size: 2112, buffer size: 2112, 
default order: 3, min order: 0
[ 2570.989683]   node 0: slabs: 839, objs: 11829, free: 0

Signed-off-by: Xin Long 
---
 net/sctp/associola.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 2bf8ec9..481187a 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1606,7 +1606,7 @@ int sctp_assoc_set_id(struct sctp_association *asoc, 
gfp_t gfp)
idr_preload(gfp);
spin_lock_bh(_assocs_id_lock);
/* 0 is not a valid assoc_id, must be >= 1 */
-   ret = idr_alloc_cyclic(_assocs_id, asoc, 1, 0, GFP_NOWAIT);
+   ret = idr_alloc_cyclic(_assocs_id, asoc, 1, 0, gfp);
spin_unlock_bh(_assocs_id_lock);
if (preload)
idr_preload_end();
-- 
2.1.0



Re: [RFC] net: ipv4 -- Introduce ifa limit per net

2016-03-05 Thread Cyrill Gorcunov
On Fri, Mar 04, 2016 at 11:11:09PM -0500, David Miller wrote:
> From: Eric Dumazet 
> Date: Fri, 04 Mar 2016 16:08:30 -0800
> 
> > __inet_del_ifa() should probably take into account in_dev->dead (no
> > promotion, no list scan...)
> 
> Indeed, that is the real problem:

Well, tried it out. Indeed it partially released the contention
but with patch applied I stuck with

Samples: 20K of event 'cpu-clock', Event count (approx.): 4647374938
Overhead  Shared ObjectSymbol
  19.27%  [kernel] [k] __local_bh_enable_ip
  15.97%  [kernel] [k] lock_acquire
  15.12%  [kernel] [k] fib_del_ifaddr
  11.66%  [kernel] [k] lock_release
   7.57%  [kernel] [k] lock_is_held
   5.35%  [kernel] [k] lock_acquired
   3.26%  [kernel] [k] _raw_spin_unlock_irqrestore
   3.04%  [kernel] [k] __local_bh_disable_ip
   2.10%  [kernel] [k] _raw_spin_unlock_irq
   1.54%  [kernel] [k] native_save_fl
   1.37%  [kernel] [k] ___might_sleep
   0.90%  [kernel] [k] do_raw_spin_trylock
   0.83%  [kernel] [k] nf_ct_iterate_cleanup
   0.77%  [kernel] [k] debug_lockdep_rcu_enabled
   0.62%  [kernel] [k] tick_nohz_idle_enter
   0.61%  [kernel] [k] _raw_spin_lock
   0.58%  [kernel] [k] __slab_alloc.isra.43.constprop.47
   0.42%  [kernel] [k] get_parent_ip
   0.40%  [kernel] [k] preempt_count_sub
   0.36%  [kernel] [k] native_save_fl
   0.34%  [kernel] [k] _raw_spin_unlock
   0.31%  [kernel] [k] do_raw_spin_unlock

and until everything get cleaned up I couldn't connect
to the node via ssh. I continue playing with patch maybe
I find some other optimization paths. Thanks!


Re: Possible double-free in the usbnet driver

2016-03-05 Thread Oliver Neukum
On Fri, 2016-03-04 at 14:43 -0800, Linus Torvalds wrote:

> So you have usbnet_defer_kevent() getting triggered, which in turn
> ends up using "usbnet->kevent"
> 
> But somebody like Oliver is really the right person to check this. For
> example, it's entirely possible that we should just instead do
> 
> cancel_work_sync(>kevent);
> 
> before the "free_netdev(net)" in the "out1" label.

Hi Bjorn,

I thinbk Linus has analyzed this correctly, but the fix really needs
to cancel the work, because we can also fail later after bind() has
already run. However, still cdc-ncm and the other drivers should clean
up after themselves if bind() fails, as usbnet really cannot known what
the subdrivers have done.

So in conclusion, I think Linus' fix should also go into cdc-ncm.

Regards
Oliver




Re: [Patch] rose_route_frame() NULL pointer dereference kernel panic

2016-03-05 Thread f6bvp

David,

I understand I did not explain clearly or completely things.

I agree that each time patched rose_xmit() is calling rose_route_frame() 
it will

get a 0 return.
And I think this is what was intended by the author of rose_xmit().
He wrote a null argument in order to obtain this result but this situation
was never reached until I configurerd a secondary network with the
following attributes (lack of route gateway) and thus the bug had not
been detected before.

/sbin/ifconfig enp4s0:1 44.168.19.22 netmask 255.255.255.240

With original rose_route_frame(),  when ax25cmp() was called with a NULL
argument it always got a null dereference pointer and a kernel panic
occured.

I conducted a few trials with printks in rose functions and patched
rose_xmit() otherwise kernel panic would have occured.
For my setting of rose network, I configured a device axudp to send
encapsulated AX.25 frames into UDP frames via ax25ipd daemon.
First, I set ax25ipd configuration with a rose neighbour with a local
network address.

# Route HAMNET
# F6BVP-10/11
route f6bvp-10 44.168.19.19 udp 10093
route f6bvp-11 44.168.19.19 udp 10093 b
#

kernel route looks like:

Destination Passerelle  Genmask Indic Metric Ref Use Iface
0.0.0.0 192.168.0.254   0.0.0.0 UG10 00 enp4s0
44.168.19.160.0.0.0 255.255.255.240 U 0 00 enp4s0
169.254.0.0 0.0.0.0 255.255.0.0 U 10 00 enp4s0
192.168.0.0 0.0.0.0 255.255.255.0   U 10 00 enp4s0

Those are printks when starting AX.25, and  ROSE fpac node application:

[  410.759423] NET: Registered protocol family 3
[  410.784477] mkiss: AX.25 Multikiss, Hans Albas PE1AYX
[  410.785506] mkiss: ax0: crc mode is auto.
[  410.786135] IPv6: ADDRCONF(NETDEV_CHANGE): ax0: link becomes ready
[  411.011461] ROSE: rose_setup()
[  411.012685] ROSE: rose_setup()
[  411.014506] ROSE: rose_setup()
[  411.016902] ROSE: rose_setup()
[  411.021736] ROSE: rose_setup()
[  411.023884] ROSE: rose_setup()
[  411.026132] ROSE: rose_setup()
[  411.028349] ROSE: rose_setup()
[  411.030648] ROSE: rose_setup()
[  411.032975] ROSE: rose_setup()
[  411.033688] NET: Registered protocol family 11
[  411.037511] ROSE: rose_set_mac_address()
[  411.037987] ROSE: rose_open()
[  412.041361] ROSE: rose_connect()
[  414.053240] ROSE: rose_connect()
[  414.053599] mkiss: ax0: Trying crc-smack
[  414.058881] mkiss: ax0: Trying crc-flexnet
[  414.086670] ROSE: rose_route_frame()
[  414.152265] ROSE: rose_route_frame()
[  471.414622] ROSE: rose_connect()
[  471.449136] ROSE: rose_route_frame()
[  471.694472] ROSE: rose_route_frame()
[  471.695823] ROSE: rose_recvmsg()

Application fpacnode client shows that node is connected to local 
neighbour and

application works normally.

Next configuration trial was with ax25ipd.conf configured for and a 
remote subnet

rose neighbour:

# Route HAMNET
# F6BVP-10/11
route f6cnb-9 44.168.12.18 udp 10092 b
route f6cnb-11 44.168.12.20 udp 10092 b
#

Kernel route table is the same as before, i.e. still without any gateway
for 44.0.0.0 route.

This time printks show a different scenario when starting rose:

 1863.750045] mkiss: AX.25 Multikiss, Hans Albas PE1AYX
[ 1863.751165] mkiss: ax0: crc mode is auto.
[ 1863.755760] IPv6: ADDRCONF(NETDEV_CHANGE): ax0: link becomes ready
[ 1863.792418] ROSE: rose_set_mac_address()
[ 1863.795116] ROSE: rose_open()
[ 1864.797375] ROSE: rose_connect()
[ 1866.809240] ROSE: rose_connect()
[ 1866.809662] mkiss: ax0: Trying crc-smack
[ 1866.811740] ROSE: rose_connect()
[ 1866.811983] ROSE: rose_header()
[ 1866.811990] ROSE: rose_xmit()
[ 1866.811990] ROSE: rose_route_frame()
[ 1866.811992] rose_route : unknown neighbour or device '*'
[ 1866.813647] mkiss: ax0: Trying crc-flexnet
[ 1866.815623] ROSE: rose_header()
[ 1866.817228] ROSE: rose_xmit()
[ 1866.818808] ROSE: rose_route_frame()
[ 1866.820411] rose_route : unknown neighbour or device '*'
[ 1876.832984] ROSE: rose_header()
[ 1876.834572] ROSE: rose_xmit()
[ 1876.836093] ROSE: rose_route_frame()
[ 1876.837614] rose_route : unknown neighbour or device '*'
[ 1876.839574] ROSE: rose_header()
[ 1876.841099] ROSE: rose_xmit()
[ 1876.842586] ROSE: rose_route_frame()
[ 1876.844083] rose_route : unknown neighbour or device '*'

With the patch ax25cmp() comparison fails and message
unknown neighbor or device '*'
is correctly displayed for rose_neigh == NULL

Of course, because there is no route or gateway toward this subnetwork
in kernel route table, neighbour node is not connected and fpacnode 
application

is informed about it. Thus system operator knows there is something
wrong in rose network configuration.

Next trial was performed after adding a gateway toward remote rose neighbour
sub net.

/sbin/route add -net 44.0.0.0/8 gw 44.168.19.17

This time starting rose shows no more rose_xmit():

 9871.374021] mkiss: AX.25 Multikiss, Hans Albas PE1AYX
[ 9871.375418] mkiss: ax0: crc mode is auto.
[ 9871.376747] IPv6: 

Re: [PATCH net] bridge: a netlink notification should be sent whenever those attributes change

2016-03-05 Thread Xin Long
On Thu, Mar 3, 2016 at 8:29 PM, Nikolay Aleksandrov
 wrote:
>
> This is incorrect because you don't have rtnl here, bridge device sysfs
> options take care of rtnl only on per-option basis and they obtain and
> release it themselves, so you won't have rtnl held when you call
> netdev_state_change. While I agree that this is needed, a larger change
> would be necessary for br_sysfs_br.c.
Sorry, I can't follow you, cause I didn't see any held in dev_ioctl, like:
ipip6_tunnel_ioctl
ipip6_tunnel_update
netdev_state_change

why sysfs have to hold rtnl ?

> Off-topic: I've been looking into factoring out the bond option API and 
> reusing
> it here as it already has all of this handled, but I won't have time to finish
> it before the next merge window, so if you fix the issue here I'm okay with
> this as interim solution.
>
> Cheers,
>  Nik
>


Re: [PATCH 2/2] isdn: i4l: move active-isdn drivers to staging

2016-03-05 Thread Tilman Schmidt
Am 04.03.2016 um 17:18 schrieb Paul Bolle:
> [Added Tilman and Christoph.]
> 
> On vr, 2016-03-04 at 16:24 +0100, Arnd Bergmann wrote:
>> I actually did more patches that I ended up not submitting:
>>
>> * move hisax to staging
>> * remove i4l support from gigaset
> 
> For the record: I have no reason to object a patch that does that. (I'm
> not aware anyone complained when gigaset switched its default from i4l
> to capi. By now all relevant distributions should use our capi driver.)

No objection from me either. When the Gigaset driver is built for CAPI
it can still be used from i4l applications via capidrv with no loss of
functionality. That was a primary goal of the CAPI port.

>> * move i4l core to staging

That's a different story. Removing i4l support will actually remove a
userspace visible feature.

> On a local tree I have two (draft) patches that do some related
> preliminary work:
> - isdnhdlc: move into separate directory
> - mISDN: NETJet: stop selecting ISDN_I4L
> 
> These trivial patches untangle mISDN and i4l.

That would be a good thing regardless of any decision on the future of
the i4l userspace interface.

> For my part I'm surprised that anyone is still using it. But apparently
> the hardware that required commit 19cebbcb04c8 and 3460baa62068  (which
> I'm unfamiliar with) is still operational. And since there never has
> been, as far as I know, a global i4l to capi migration nor a global i4l
> (and capi) to mISDN migration it might be that some people are stuck on
> i4l drivers for their hardware. Perhaps that explains Cristoph's
> commits.

The trouble is that mISDN never cared about migration or backward
compatibility. So while users of i4l applications have no problem with
i4l drivers being ported to CAPI and dropping native i4l support, they
do have a problem with drivers making that move to mISDN.

That is the situation of the hisax driver today. mISDN started as a
project to migrate hisax to CAPI but regrettably dropped that goal in
favor of a newly invented API leaving old i4l based applications behind.
As a consequence, owners of HiSAX type adapters are in fact stuck with
the old hisax driver if they want to continue using i4l userspace tools.

In my opinion, i4l, capidrv and hisax need to stay in the supported part
for the time being as they are still actively used. Native i4l support
can and should be dropped for hardware with CAPI drivers (ie. gigaset)
but not for hardware with only mISDN drivers (ie. hisax). And finally,
ISDN_CAPI_CAPIDRV should be enabled automatically if both ISDN_I4L and
ISDN_CAPI are enabled, ie. something like:

--- a/drivers/isdn/capi/Kconfig
+++ b/drivers/isdn/capi/Kconfig
@@ -27,8 +27,8 @@ config ISDN_CAPI_MIDDLEWARE
  your ISP, say Y here.

 config ISDN_CAPI_CAPIDRV
-   tristate "CAPI2.0 capidrv interface support"
-   depends on ISDN_I4L
+   tristate
+   default ISDN_I4L
help
  This option provides the glue code to hook up CAPI driven cards to
  the legacy isdn4linux link layer.  If you have a card which is


Jm2c,
Tilman

-- 
Tilman Schmidt  E-Mail: til...@imap.cc
Bonn, Germany
Nous, on a des fleurs et des bougies pour nous protéger.



signature.asc
Description: OpenPGP digital signature


Re: [PATCH nf 2/3] netfilter: ipvs: allow rescheduling after RST

2016-03-05 Thread Julian Anastasov

Hello,

On Thu, 18 Feb 2016, Sergei Shtylyov wrote:

> On 2/18/2016 3:41 AM, Simon Horman wrote:
> 
> > From: Julian Anastasov 
> >
> > "RFC 5961, 4.2. Mitigation" describes a mechanism to request
> > client to confirm with RST the restart of TCP connection
> > before resending its SYN. As result, IPVS can see SYNs for
> > existing connection in CLOSE state. Add check to allow
> > rescheduling in this state.
> >
> > Signed-off-by: Julian Anastasov 
> > Signed-off-by: Simon Horman 
> > ---
> >   net/netfilter/ipvs/ip_vs_core.c | 1 +
> >   1 file changed, 1 insertion(+)
> >
> > diff --git a/net/netfilter/ipvs/ip_vs_core.c
> > b/net/netfilter/ipvs/ip_vs_core.c
> > index 4da560005b0e..0c1d3fef9a7c 100644
> > --- a/net/netfilter/ipvs/ip_vs_core.c
> > +++ b/net/netfilter/ipvs/ip_vs_core.c
> > @@ -1089,6 +1089,7 @@ static inline bool is_new_conn_expected(const struct
> > ip_vs_conn *cp,
> >switch (cp->protocol) {
> >case IPPROTO_TCP:
> > return (cp->state == IP_VS_TCP_S_TIME_WAIT) ||
> > +   cp->state == IP_VS_TCP_S_CLOSE ||
> 
>I would have been consistent and enclosed this expression into parens as
> well. BTW, the indentation is not correct anyway.

Thanks! I'll send new version...

> 
> >  ((conn_reuse_mode & 2) &&
> >   (cp->state == IP_VS_TCP_S_FIN_WAIT) &&
> >   (cp->flags & IP_VS_CONN_F_NOOUTPUT));
> 
> MBR, Sergei

Regards

--
Julian Anastasov 


Re: [PATCH net 1/3] net: validate variable length ll headers

2016-03-05 Thread walter harms


Am 04.03.2016 21:44, schrieb Willem de Bruijn:
> From: Willem de Bruijn 
> 
> Netdevice parameter hard_header_len is variously interpreted both as
> an upper and lower bound on link layer header length. The field is
> used as upper bound when reserving room at allocation, as lower bound
> when validating user input in PF_PACKET.
> 
> Clarify the definition to be maximum header length. For validation
> of untrusted headers, add an optional validate member to header_ops.
> 
> Allow bypassing of validation by passing CAP_SYS_RAWIO, for instance
> for deliberate testing of corrupt input. In this case, pad trailing
> bytes, as some device drivers expect completely initialized headers.
> 
> See also http://comments.gmane.org/gmane.linux.network/401064
> 
> Signed-off-by: Willem de Bruijn 
> ---
>  include/linux/netdevice.h | 22 --
>  1 file changed, 20 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 5440b7b..6d1d8f4 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -267,6 +267,7 @@ struct header_ops {
>   void(*cache_update)(struct hh_cache *hh,
>   const struct net_device *dev,
>   const unsigned char *haddr);
> + bool(*validate)(const char *ll_header, unsigned int len);
>  };
>  
>  /* These flag bits are private to the generic network queueing
> @@ -1420,8 +1421,7 @@ enum netdev_priv_flags {
>   *   @dma:   DMA channel
>   *   @mtu:   Interface MTU value
>   *   @type:  Interface hardware type
> - *   @hard_header_len: Hardware header length, which means that this is the
> - * minimum size of a packet.
> + *   @hard_header_len: Maximum hardware header length.
>   *
>   *   @needed_headroom: Extra headroom the hardware may need, but not in all
>   * cases can this be guaranteed
> @@ -2627,6 +2627,24 @@ static inline int dev_parse_header(const struct 
> sk_buff *skb,
>   return dev->header_ops->parse(skb, haddr);
>  }
>  
> +/* ll_header must have at least hard_header_len allocated */
> +static inline bool dev_validate_header(const struct net_device *dev,
> +char *ll_header, int len)
> +{
> + if (likely(len >= dev->hard_header_len))
> + return true;
> +
> + if (capable(CAP_SYS_RAWIO)) {
> + memset(ll_header + len, 0, dev->hard_header_len - len);
> + return true;
> + }
> +
> + if (dev->header_ops && dev->header_ops->validate)
> + return dev->header_ops->validate(ll_header, len);
> +
> + return false;
> +}
> +


you could use

real_len=dev->hard_header_len-len;

if (real_len < 0)
...
if (capable(CAP_SYS_RAWIO))
memset(ll_header + len, 0,real_len);
..

IMHO that makes the code more clear.

re,
 wh



>  typedef int gifconf_func_t(struct net_device * dev, char __user * bufptr, 
> int len);
>  int register_gifconf(unsigned int family, gifconf_func_t *gifconf);
>  static inline int unregister_gifconf(unsigned int family)