[PATCH net-next v2 01/13] net: dsa: Change DSA slave FDB API to be switchdev independent

2017-07-19 Thread Arkadi Sharshevsky
In order to support FDB add/del to be on a notifier chain the slave
API need to be changed to be switchdev independent.

Signed-off-by: Arkadi Sharshevsky 
Reviewed-by: Vivien Didelot 
Reviewed-by: Florian Fainelli 
---
 drivers/net/dsa/b53/b53_common.c   | 12 +---
 drivers/net/dsa/b53/b53_priv.h |  8 +++-
 drivers/net/dsa/microchip/ksz_common.c | 34 --
 drivers/net/dsa/mt7530.c   | 14 ++
 drivers/net/dsa/mv88e6xxx/chip.c   | 12 +---
 drivers/net/dsa/qca8k.c| 15 ++-
 include/net/dsa.h  |  8 +++-
 net/dsa/switch.c   |  8 +---
 8 files changed, 49 insertions(+), 62 deletions(-)

diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index e68d368..d0156dc 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -1214,8 +1214,7 @@ static int b53_arl_op(struct b53_device *dev, int op, int 
port,
 }
 
 int b53_fdb_prepare(struct dsa_switch *ds, int port,
-   const struct switchdev_obj_port_fdb *fdb,
-   struct switchdev_trans *trans)
+   const unsigned char *addr, u16 vid)
 {
struct b53_device *priv = ds->priv;
 
@@ -1230,22 +1229,21 @@ int b53_fdb_prepare(struct dsa_switch *ds, int port,
 EXPORT_SYMBOL(b53_fdb_prepare);
 
 void b53_fdb_add(struct dsa_switch *ds, int port,
-const struct switchdev_obj_port_fdb *fdb,
-struct switchdev_trans *trans)
+const unsigned char *addr, u16 vid)
 {
struct b53_device *priv = ds->priv;
 
-   if (b53_arl_op(priv, 0, port, fdb->addr, fdb->vid, true))
+   if (b53_arl_op(priv, 0, port, addr, vid, true))
pr_err("%s: failed to add MAC address\n", __func__);
 }
 EXPORT_SYMBOL(b53_fdb_add);
 
 int b53_fdb_del(struct dsa_switch *ds, int port,
-   const struct switchdev_obj_port_fdb *fdb)
+   const unsigned char *addr, u16 vid)
 {
struct b53_device *priv = ds->priv;
 
-   return b53_arl_op(priv, 0, port, fdb->addr, fdb->vid, false);
+   return b53_arl_op(priv, 0, port, addr, vid, false);
 }
 EXPORT_SYMBOL(b53_fdb_del);
 
diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h
index 155a9c4..d417bca 100644
--- a/drivers/net/dsa/b53/b53_priv.h
+++ b/drivers/net/dsa/b53/b53_priv.h
@@ -397,13 +397,11 @@ int b53_vlan_dump(struct dsa_switch *ds, int port,
  struct switchdev_obj_port_vlan *vlan,
  switchdev_obj_dump_cb_t *cb);
 int b53_fdb_prepare(struct dsa_switch *ds, int port,
-   const struct switchdev_obj_port_fdb *fdb,
-   struct switchdev_trans *trans);
+   const unsigned char *addr, u16 vid);
 void b53_fdb_add(struct dsa_switch *ds, int port,
-const struct switchdev_obj_port_fdb *fdb,
-struct switchdev_trans *trans);
+const unsigned char *addr, u16 vid);
 int b53_fdb_del(struct dsa_switch *ds, int port,
-   const struct switchdev_obj_port_fdb *fdb);
+   const unsigned char *addr, u16 vid);
 int b53_fdb_dump(struct dsa_switch *ds, int port,
 struct switchdev_obj_port_fdb *fdb,
 switchdev_obj_dump_cb_t *cb);
diff --git a/drivers/net/dsa/microchip/ksz_common.c 
b/drivers/net/dsa/microchip/ksz_common.c
index b313ecd..db82808 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -679,8 +679,7 @@ static int ksz_port_vlan_dump(struct dsa_switch *ds, int 
port,
 }
 
 static int ksz_port_fdb_prepare(struct dsa_switch *ds, int port,
-   const struct switchdev_obj_port_fdb *fdb,
-   struct switchdev_trans *trans)
+   const unsigned char *addr, u16 vid)
 {
/* nothing needed */
 
@@ -707,8 +706,7 @@ struct alu_struct {
 };
 
 static void ksz_port_fdb_add(struct dsa_switch *ds, int port,
-const struct switchdev_obj_port_fdb *fdb,
-struct switchdev_trans *trans)
+const unsigned char *addr, u16 vid)
 {
struct ksz_device *dev = ds->priv;
u32 alu_table[4];
@@ -717,12 +715,12 @@ static void ksz_port_fdb_add(struct dsa_switch *ds, int 
port,
mutex_lock(>alu_mutex);
 
/* find any entry with mac & vid */
-   data = fdb->vid << ALU_FID_INDEX_S;
-   data |= ((fdb->addr[0] << 8) | fdb->addr[1]);
+   data = vid << ALU_FID_INDEX_S;
+   data |= ((addr[0] << 8) | addr[1]);
ksz_write32(dev, REG_SW_ALU_INDEX_0, data);
 
-   data = ((fdb->addr[2] << 24) | (fdb->addr[3] << 16));
-   data |= ((fdb->addr[4] << 8) | fdb->addr[5]);
+   data = ((addr[2] << 24) | 

[PATCH net-next v2 05/13] net: dsa: Remove support for FDB add/del via SELF

2017-07-19 Thread Arkadi Sharshevsky
FDB add/del can be added via switchdev notification chain. Thus the support
for configuration via switchdev objects can be removed.

Signed-off-by: Arkadi Sharshevsky 
Reviewed-by: Vivien Didelot 
---
 net/dsa/slave.c | 12 
 1 file changed, 12 deletions(-)

diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index f595133..6bd2d42 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -250,12 +250,6 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
 */
 
switch (obj->id) {
-   case SWITCHDEV_OBJ_ID_PORT_FDB:
-   if (switchdev_trans_ph_prepare(trans))
-   return 0;
-   err = dsa_port_fdb_add(dp, SWITCHDEV_OBJ_PORT_FDB(obj)->addr,
-  SWITCHDEV_OBJ_PORT_FDB(obj)->vid);
-   break;
case SWITCHDEV_OBJ_ID_PORT_MDB:
err = dsa_port_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj), trans);
break;
@@ -279,10 +273,6 @@ static int dsa_slave_port_obj_del(struct net_device *dev,
int err;
 
switch (obj->id) {
-   case SWITCHDEV_OBJ_ID_PORT_FDB:
-   err = dsa_port_fdb_del(dp, SWITCHDEV_OBJ_PORT_FDB(obj)->addr,
-  SWITCHDEV_OBJ_PORT_FDB(obj)->vid);
-   break;
case SWITCHDEV_OBJ_ID_PORT_MDB:
err = dsa_port_mdb_del(dp, SWITCHDEV_OBJ_PORT_MDB(obj));
break;
@@ -925,8 +915,6 @@ static const struct net_device_ops dsa_slave_netdev_ops = {
.ndo_change_rx_flags= dsa_slave_change_rx_flags,
.ndo_set_rx_mode= dsa_slave_set_rx_mode,
.ndo_set_mac_address= dsa_slave_set_mac_address,
-   .ndo_fdb_add= switchdev_port_fdb_add,
-   .ndo_fdb_del= switchdev_port_fdb_del,
.ndo_fdb_dump   = switchdev_port_fdb_dump,
.ndo_do_ioctl   = dsa_slave_ioctl,
.ndo_get_iflink = dsa_slave_get_iflink,
-- 
2.4.11



[PATCH net-next v2 06/13] net: dsa: Add support for querying supported bridge flags

2017-07-19 Thread Arkadi Sharshevsky
The DSA drivers do not support bridge flags offload. Yet, this attribute
should be added in order for the bridge to fail when one tries set a
flag on the port, as explained in commit dc0ecabd6231 ("net: switchdev:
Add support for querying supported bridge flags by hardware").

Signed-off-by: Arkadi Sharshevsky 
Reviewed-by: Vivien Didelot 
---
 net/dsa/slave.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 6bd2d42..3ad1f4d 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -324,6 +324,9 @@ static int dsa_slave_port_attr_get(struct net_device *dev,
attr->u.ppid.id_len = sizeof(ds->index);
memcpy(>u.ppid.id, >index, attr->u.ppid.id_len);
break;
+   case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT:
+   attr->u.brport_flags_support = 0;
+   break;
default:
return -EOPNOTSUPP;
}
-- 
2.4.11



[PATCH net-next v2 13/13] net: switchdev: Remove bridge bypass support from switchdev

2017-07-19 Thread Arkadi Sharshevsky
Currently the bridge port flags, vlans, FDBs and MDBs can be offloaded
through the bridge code, making the switchdev's SELF bridge bypass
implementation to be redundant. This implies several changes:
- No need for dump infra in switchdev, DSA's special case is handled
  privately.
- Remove obj_dump from switchdev_ops.
- FDBs are removed from obj_add/del routines, due to the fact that they
  are offloaded through the bridge notification chain.
- The switchdev_port_bridge_xx() and switchdev_port_fdb_xx() functions
  can be removed.

Signed-off-by: Arkadi Sharshevsky 
Reviewed-by: Vivien Didelot 
---
v1->v2
- Fix typo in commit message.
---
 include/net/switchdev.h   |  75 
 net/switchdev/switchdev.c | 435 --
 2 files changed, 510 deletions(-)

diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index d2637a6..d767b79 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -74,7 +74,6 @@ struct switchdev_attr {
 enum switchdev_obj_id {
SWITCHDEV_OBJ_ID_UNDEFINED,
SWITCHDEV_OBJ_ID_PORT_VLAN,
-   SWITCHDEV_OBJ_ID_PORT_FDB,
SWITCHDEV_OBJ_ID_PORT_MDB,
 };
 
@@ -97,17 +96,6 @@ struct switchdev_obj_port_vlan {
 #define SWITCHDEV_OBJ_PORT_VLAN(obj) \
container_of(obj, struct switchdev_obj_port_vlan, obj)
 
-/* SWITCHDEV_OBJ_ID_PORT_FDB */
-struct switchdev_obj_port_fdb {
-   struct switchdev_obj obj;
-   unsigned char addr[ETH_ALEN];
-   u16 vid;
-   u16 ndm_state;
-};
-
-#define SWITCHDEV_OBJ_PORT_FDB(obj) \
-   container_of(obj, struct switchdev_obj_port_fdb, obj)
-
 /* SWITCHDEV_OBJ_ID_PORT_MDB */
 struct switchdev_obj_port_mdb {
struct switchdev_obj obj;
@@ -135,8 +123,6 @@ typedef int switchdev_obj_dump_cb_t(struct switchdev_obj 
*obj);
  * @switchdev_port_obj_add: Add an object to port (see switchdev_obj_*).
  *
  * @switchdev_port_obj_del: Delete an object from port (see switchdev_obj_*).
- *
- * @switchdev_port_obj_dump: Dump port objects (see switchdev_obj_*).
  */
 struct switchdev_ops {
int (*switchdev_port_attr_get)(struct net_device *dev,
@@ -149,9 +135,6 @@ struct switchdev_ops {
  struct switchdev_trans *trans);
int (*switchdev_port_obj_del)(struct net_device *dev,
  const struct switchdev_obj *obj);
-   int (*switchdev_port_obj_dump)(struct net_device *dev,
-  struct switchdev_obj *obj,
-  switchdev_obj_dump_cb_t *cb);
 };
 
 enum switchdev_notifier_type {
@@ -189,25 +172,10 @@ int switchdev_port_obj_add(struct net_device *dev,
   const struct switchdev_obj *obj);
 int switchdev_port_obj_del(struct net_device *dev,
   const struct switchdev_obj *obj);
-int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj,
-   switchdev_obj_dump_cb_t *cb);
 int register_switchdev_notifier(struct notifier_block *nb);
 int unregister_switchdev_notifier(struct notifier_block *nb);
 int call_switchdev_notifiers(unsigned long val, struct net_device *dev,
 struct switchdev_notifier_info *info);
-int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
- struct net_device *dev, u32 filter_mask,
- int nlflags);
-int switchdev_port_bridge_setlink(struct net_device *dev,
- struct nlmsghdr *nlh, u16 flags);
-int switchdev_port_bridge_dellink(struct net_device *dev,
- struct nlmsghdr *nlh, u16 flags);
-int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
-  struct net_device *dev, const unsigned char *addr,
-  u16 vid, u16 nlm_flags);
-int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
-  struct net_device *dev, const unsigned char *addr,
-  u16 vid);
 void switchdev_port_fwd_mark_set(struct net_device *dev,
 struct net_device *group_dev,
 bool joining);
@@ -246,13 +214,6 @@ static inline int switchdev_port_obj_del(struct net_device 
*dev,
return -EOPNOTSUPP;
 }
 
-static inline int switchdev_port_obj_dump(struct net_device *dev,
- const struct switchdev_obj *obj,
- switchdev_obj_dump_cb_t *cb)
-{
-   return -EOPNOTSUPP;
-}
-
 static inline int register_switchdev_notifier(struct notifier_block *nb)
 {
return 0;
@@ -270,42 +231,6 @@ static inline int call_switchdev_notifiers(unsigned long 
val,
return NOTIFY_DONE;
 }
 
-static inline int switchdev_port_bridge_getlink(struct sk_buff *skb, 

[PATCH net-next v2 11/13] net: dsa: Move FDB dump implementation inside DSA

2017-07-19 Thread Arkadi Sharshevsky
>From all switchdev devices only DSA requires special FDB dump. This is due
to lack of ability for syncing the hardware learned FDBs with the bridge.
Due to this it is removed from switchdev and moved inside DSA.

Signed-off-by: Arkadi Sharshevsky 
---
v1->v2
- Cosmetics. ndm_state->is_static.
---
 drivers/net/dsa/b53/b53_common.c   |  16 ++
 drivers/net/dsa/b53/b53_priv.h |   3 +-
 drivers/net/dsa/microchip/ksz_common.c |  20 ++-
 drivers/net/dsa/mt7530.c   |  10 +---
 drivers/net/dsa/mv88e6xxx/chip.c   |  38 -
 drivers/net/dsa/qca8k.c|  15 ++---
 include/net/dsa.h  |   5 +-
 include/net/switchdev.h|  12 
 net/dsa/dsa_priv.h |   2 -
 net/dsa/port.c |  11 
 net/dsa/slave.c| 100 +
 net/switchdev/switchdev.c  |  84 ---
 12 files changed, 112 insertions(+), 204 deletions(-)

diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index 6020e88..28e06b6 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -1227,8 +1227,7 @@ static void b53_arl_search_rd(struct b53_device *dev, u8 
idx,
 }
 
 static int b53_fdb_copy(int port, const struct b53_arl_entry *ent,
-   struct switchdev_obj_port_fdb *fdb,
-   switchdev_obj_dump_cb_t *cb)
+   dsa_fdb_dump_cb_t *cb, void *data)
 {
if (!ent->is_valid)
return 0;
@@ -1236,16 +1235,11 @@ static int b53_fdb_copy(int port, const struct 
b53_arl_entry *ent,
if (port != ent->port)
return 0;
 
-   ether_addr_copy(fdb->addr, ent->mac);
-   fdb->vid = ent->vid;
-   fdb->ndm_state = ent->is_static ? NUD_NOARP : NUD_REACHABLE;
-
-   return cb(>obj);
+   return cb(ent->mac, ent->vid, ent->is_static, data);
 }
 
 int b53_fdb_dump(struct dsa_switch *ds, int port,
-struct switchdev_obj_port_fdb *fdb,
-switchdev_obj_dump_cb_t *cb)
+dsa_fdb_dump_cb_t *cb, void *data)
 {
struct b53_device *priv = ds->priv;
struct b53_arl_entry results[2];
@@ -1263,13 +1257,13 @@ int b53_fdb_dump(struct dsa_switch *ds, int port,
return ret;
 
b53_arl_search_rd(priv, 0, [0]);
-   ret = b53_fdb_copy(port, [0], fdb, cb);
+   ret = b53_fdb_copy(port, [0], cb, data);
if (ret)
return ret;
 
if (priv->num_arl_entries > 2) {
b53_arl_search_rd(priv, 1, [1]);
-   ret = b53_fdb_copy(port, [1], fdb, cb);
+   ret = b53_fdb_copy(port, [1], cb, data);
if (ret)
return ret;
 
diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h
index af5d6c1..01bd8cb 100644
--- a/drivers/net/dsa/b53/b53_priv.h
+++ b/drivers/net/dsa/b53/b53_priv.h
@@ -398,8 +398,7 @@ int b53_fdb_add(struct dsa_switch *ds, int port,
 int b53_fdb_del(struct dsa_switch *ds, int port,
const unsigned char *addr, u16 vid);
 int b53_fdb_dump(struct dsa_switch *ds, int port,
-struct switchdev_obj_port_fdb *fdb,
-switchdev_obj_dump_cb_t *cb);
+dsa_fdb_dump_cb_t *cb, void *data);
 int b53_mirror_add(struct dsa_switch *ds, int port,
   struct dsa_mall_mirror_tc_entry *mirror, bool ingress);
 void b53_mirror_del(struct dsa_switch *ds, int port,
diff --git a/drivers/net/dsa/microchip/ksz_common.c 
b/drivers/net/dsa/microchip/ksz_common.c
index 4de9d90..56cd6d3 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -805,12 +805,11 @@ static void convert_alu(struct alu_struct *alu, u32 
*alu_table)
 }
 
 static int ksz_port_fdb_dump(struct dsa_switch *ds, int port,
-struct switchdev_obj_port_fdb *fdb,
-switchdev_obj_dump_cb_t *cb)
+dsa_fdb_dump_cb_t *cb, void *data)
 {
struct ksz_device *dev = ds->priv;
int ret = 0;
-   u32 data;
+   u32 ksz_data;
u32 alu_table[4];
struct alu_struct alu;
int timeout;
@@ -823,8 +822,8 @@ static int ksz_port_fdb_dump(struct dsa_switch *ds, int 
port,
do {
timeout = 1000;
do {
-   ksz_read32(dev, REG_SW_ALU_CTRL__4, );
-   if ((data & ALU_VALID) || !(data & ALU_START))
+   ksz_read32(dev, REG_SW_ALU_CTRL__4, _data);
+   if ((ksz_data & ALU_VALID) || !(ksz_data & ALU_START))
break;
usleep_range(1, 10);
} while (timeout-- > 0);
@@ 

[PATCH net-next v2 12/13] net: bridge: Remove FDB deletion through switchdev object

2017-07-19 Thread Arkadi Sharshevsky
At this point no driver supports FDB add/del through switchdev object
but rather via notification chain, thus, it is removed.

Signed-off-by: Arkadi Sharshevsky 
Reviewed-by: Vivien Didelot 
---
 net/bridge/br_fdb.c | 18 --
 1 file changed, 18 deletions(-)

diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index a5e4a73..a79b648 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -169,29 +169,11 @@ static void fdb_del_hw_addr(struct net_bridge *br, const 
unsigned char *addr)
}
 }
 
-static void fdb_del_external_learn(struct net_bridge_fdb_entry *f)
-{
-   struct switchdev_obj_port_fdb fdb = {
-   .obj = {
-   .orig_dev = f->dst->dev,
-   .id = SWITCHDEV_OBJ_ID_PORT_FDB,
-   .flags = SWITCHDEV_F_DEFER,
-   },
-   .vid = f->vlan_id,
-   };
-
-   ether_addr_copy(fdb.addr, f->addr.addr);
-   switchdev_port_obj_del(f->dst->dev, );
-}
-
 static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f)
 {
if (f->is_static)
fdb_del_hw_addr(br, f->addr.addr);
 
-   if (f->added_by_external_learn)
-   fdb_del_external_learn(f);
-
hlist_del_init_rcu(>hlist);
fdb_notify(br, f, RTM_DELNEIGH);
call_rcu(>rcu, fdb_rcu_free);
-- 
2.4.11



[PATCH net-next v2 09/13] net: dsa: Remove support for MDB dump from DSA's drivers

2017-07-19 Thread Arkadi Sharshevsky
This is done as a preparation before removing support for MDB dump from
DSA core. The MDBs are synced with the bridge and thus there is no
need for special dump operation support.

Signed-off-by: Arkadi Sharshevsky 
---
 drivers/net/dsa/microchip/ksz_common.c |  9 -
 drivers/net/dsa/mv88e6xxx/chip.c   | 24 
 2 files changed, 33 deletions(-)

diff --git a/drivers/net/dsa/microchip/ksz_common.c 
b/drivers/net/dsa/microchip/ksz_common.c
index a53ce59..4de9d90 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -1020,14 +1020,6 @@ static int ksz_port_mdb_del(struct dsa_switch *ds, int 
port,
return ret;
 }
 
-static int ksz_port_mdb_dump(struct dsa_switch *ds, int port,
-struct switchdev_obj_port_mdb *mdb,
-switchdev_obj_dump_cb_t *cb)
-{
-   /* this is not called by switch layer */
-   return 0;
-}
-
 static int ksz_port_mirror_add(struct dsa_switch *ds, int port,
   struct dsa_mall_mirror_tc_entry *mirror,
   bool ingress)
@@ -1090,7 +1082,6 @@ static const struct dsa_switch_ops ksz_switch_ops = {
.port_mdb_prepare   = ksz_port_mdb_prepare,
.port_mdb_add   = ksz_port_mdb_add,
.port_mdb_del   = ksz_port_mdb_del,
-   .port_mdb_dump  = ksz_port_mdb_dump,
.port_mirror_add= ksz_port_mirror_add,
.port_mirror_del= ksz_port_mirror_del,
 };
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 9cc6269..97b77b9 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -1443,15 +1443,6 @@ static int mv88e6xxx_port_db_dump_fid(struct 
mv88e6xxx_chip *chip,
fdb->ndm_state = NUD_NOARP;
else
fdb->ndm_state = NUD_REACHABLE;
-   } else if (obj->id == SWITCHDEV_OBJ_ID_PORT_MDB) {
-   struct switchdev_obj_port_mdb *mdb;
-
-   if (!is_multicast_ether_addr(addr.mac))
-   continue;
-
-   mdb = SWITCHDEV_OBJ_PORT_MDB(obj);
-   mdb->vid = vid;
-   ether_addr_copy(mdb->addr, addr.mac);
} else {
return -EOPNOTSUPP;
}
@@ -3762,20 +3753,6 @@ static int mv88e6xxx_port_mdb_del(struct dsa_switch *ds, 
int port,
return err;
 }
 
-static int mv88e6xxx_port_mdb_dump(struct dsa_switch *ds, int port,
-  struct switchdev_obj_port_mdb *mdb,
-  switchdev_obj_dump_cb_t *cb)
-{
-   struct mv88e6xxx_chip *chip = ds->priv;
-   int err;
-
-   mutex_lock(>reg_lock);
-   err = mv88e6xxx_port_db_dump(chip, port, >obj, cb);
-   mutex_unlock(>reg_lock);
-
-   return err;
-}
-
 static const struct dsa_switch_ops mv88e6xxx_switch_ops = {
.probe  = mv88e6xxx_drv_probe,
.get_tag_protocol   = mv88e6xxx_get_tag_protocol,
@@ -3809,7 +3786,6 @@ static const struct dsa_switch_ops mv88e6xxx_switch_ops = 
{
.port_mdb_prepare   = mv88e6xxx_port_mdb_prepare,
.port_mdb_add   = mv88e6xxx_port_mdb_add,
.port_mdb_del   = mv88e6xxx_port_mdb_del,
-   .port_mdb_dump  = mv88e6xxx_port_mdb_dump,
.crosschip_bridge_join  = mv88e6xxx_crosschip_bridge_join,
.crosschip_bridge_leave = mv88e6xxx_crosschip_bridge_leave,
 };
-- 
2.4.11



[PATCH net-next v2 10/13] net: dsa: Remove redundant MDB dump support

2017-07-19 Thread Arkadi Sharshevsky
Currently the MDB HW database is synced with the bridge's one, thus,
There is no need to support special dump functionality.

Signed-off-by: Arkadi Sharshevsky 
---
 include/net/dsa.h  |  4 
 net/dsa/dsa_priv.h |  2 --
 net/dsa/port.c | 11 ---
 net/dsa/slave.c|  3 ---
 4 files changed, 20 deletions(-)

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 4b82647..0f4912b 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -406,10 +406,6 @@ struct dsa_switch_ops {
struct switchdev_trans *trans);
int (*port_mdb_del)(struct dsa_switch *ds, int port,
const struct switchdev_obj_port_mdb *mdb);
-   int (*port_mdb_dump)(struct dsa_switch *ds, int port,
-struct switchdev_obj_port_mdb *mdb,
- switchdev_obj_dump_cb_t *cb);
-
/*
 * RXNFC
 */
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index cddcea2..897ac24 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -132,8 +132,6 @@ int dsa_port_mdb_add(struct dsa_port *dp,
 struct switchdev_trans *trans);
 int dsa_port_mdb_del(struct dsa_port *dp,
 const struct switchdev_obj_port_mdb *mdb);
-int dsa_port_mdb_dump(struct dsa_port *dp, struct switchdev_obj_port_mdb *mdb,
- switchdev_obj_dump_cb_t *cb);
 int dsa_port_vlan_add(struct dsa_port *dp,
  const struct switchdev_obj_port_vlan *vlan,
  struct switchdev_trans *trans);
diff --git a/net/dsa/port.c b/net/dsa/port.c
index ce19216..7378782 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -210,17 +210,6 @@ int dsa_port_mdb_del(struct dsa_port *dp,
return dsa_port_notify(dp, DSA_NOTIFIER_MDB_DEL, );
 }
 
-int dsa_port_mdb_dump(struct dsa_port *dp, struct switchdev_obj_port_mdb *mdb,
- switchdev_obj_dump_cb_t *cb)
-{
-   struct dsa_switch *ds = dp->ds;
-
-   if (ds->ops->port_mdb_dump)
-   return ds->ops->port_mdb_dump(ds, dp->index, mdb, cb);
-
-   return -EOPNOTSUPP;
-}
-
 int dsa_port_vlan_add(struct dsa_port *dp,
  const struct switchdev_obj_port_vlan *vlan,
  struct switchdev_trans *trans)
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index f939d79..14f4d69 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -299,9 +299,6 @@ static int dsa_slave_port_obj_dump(struct net_device *dev,
case SWITCHDEV_OBJ_ID_PORT_FDB:
err = dsa_port_fdb_dump(dp, SWITCHDEV_OBJ_PORT_FDB(obj), cb);
break;
-   case SWITCHDEV_OBJ_ID_PORT_MDB:
-   err = dsa_port_mdb_dump(dp, SWITCHDEV_OBJ_PORT_MDB(obj), cb);
-   break;
default:
err = -EOPNOTSUPP;
break;
-- 
2.4.11



[PATCH net-next v2 03/13] net: dsa: Remove switchdev dependency from DSA switch notifier chain

2017-07-19 Thread Arkadi Sharshevsky
Currently, the switchdev objects are embedded inside the DSA notifier
info. This patch removes this dependency. This is done as a preparation
stage before adding support for learning FDB through the switchdev
notification chain.

Signed-off-by: Arkadi Sharshevsky 
Reviewed-by: Florian Fainelli 
Reviewed-by: Vivien Didelot 
---
 net/dsa/dsa_priv.h | 11 ++-
 net/dsa/port.c | 15 +--
 net/dsa/slave.c|  6 --
 net/dsa/switch.c   | 11 ---
 4 files changed, 23 insertions(+), 20 deletions(-)

diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 428402f..2b2f124 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -43,9 +43,10 @@ struct dsa_notifier_bridge_info {
 
 /* DSA_NOTIFIER_FDB_* */
 struct dsa_notifier_fdb_info {
-   const struct switchdev_obj_port_fdb *fdb;
int sw_index;
int port;
+   const unsigned char *addr;
+   u16 vid;
 };
 
 /* DSA_NOTIFIER_MDB_* */
@@ -119,10 +120,10 @@ int dsa_port_vlan_filtering(struct dsa_port *dp, bool 
vlan_filtering,
struct switchdev_trans *trans);
 int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock,
 struct switchdev_trans *trans);
-int dsa_port_fdb_add(struct dsa_port *dp,
-const struct switchdev_obj_port_fdb *fdb);
-int dsa_port_fdb_del(struct dsa_port *dp,
-const struct switchdev_obj_port_fdb *fdb);
+int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr,
+u16 vid);
+int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr,
+u16 vid);
 int dsa_port_fdb_dump(struct dsa_port *dp, struct switchdev_obj_port_fdb *fdb,
  switchdev_obj_dump_cb_t *cb);
 int dsa_port_mdb_add(struct dsa_port *dp,
diff --git a/net/dsa/port.c b/net/dsa/port.c
index bd271b9..86e0585 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -146,25 +146,28 @@ int dsa_port_ageing_time(struct dsa_port *dp, clock_t 
ageing_clock,
return dsa_port_notify(dp, DSA_NOTIFIER_AGEING_TIME, );
 }
 
-int dsa_port_fdb_add(struct dsa_port *dp,
-const struct switchdev_obj_port_fdb *fdb)
+int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr,
+u16 vid)
 {
struct dsa_notifier_fdb_info info = {
.sw_index = dp->ds->index,
.port = dp->index,
-   .fdb = fdb,
+   .addr = addr,
+   .vid = vid,
};
 
return dsa_port_notify(dp, DSA_NOTIFIER_FDB_ADD, );
 }
 
-int dsa_port_fdb_del(struct dsa_port *dp,
-const struct switchdev_obj_port_fdb *fdb)
+int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr,
+u16 vid)
 {
struct dsa_notifier_fdb_info info = {
.sw_index = dp->ds->index,
.port = dp->index,
-   .fdb = fdb,
+   .addr = addr,
+   .vid = vid,
+
};
 
return dsa_port_notify(dp, DSA_NOTIFIER_FDB_DEL, );
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index b4e68b2..19395cc 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -253,7 +253,8 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
case SWITCHDEV_OBJ_ID_PORT_FDB:
if (switchdev_trans_ph_prepare(trans))
return 0;
-   err = dsa_port_fdb_add(dp, SWITCHDEV_OBJ_PORT_FDB(obj));
+   err = dsa_port_fdb_add(dp, SWITCHDEV_OBJ_PORT_FDB(obj)->addr,
+  SWITCHDEV_OBJ_PORT_FDB(obj)->vid);
break;
case SWITCHDEV_OBJ_ID_PORT_MDB:
err = dsa_port_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj), trans);
@@ -279,7 +280,8 @@ static int dsa_slave_port_obj_del(struct net_device *dev,
 
switch (obj->id) {
case SWITCHDEV_OBJ_ID_PORT_FDB:
-   err = dsa_port_fdb_del(dp, SWITCHDEV_OBJ_PORT_FDB(obj));
+   err = dsa_port_fdb_del(dp, SWITCHDEV_OBJ_PORT_FDB(obj)->addr,
+  SWITCHDEV_OBJ_PORT_FDB(obj)->vid);
break;
case SWITCHDEV_OBJ_ID_PORT_MDB:
err = dsa_port_mdb_del(dp, SWITCHDEV_OBJ_PORT_MDB(obj));
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index eb20e0f..e6c06aa 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -83,8 +83,6 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds,
 static int dsa_switch_fdb_add(struct dsa_switch *ds,
  struct dsa_notifier_fdb_info *info)
 {
-   const struct switchdev_obj_port_fdb *fdb = info->fdb;
-
/* Do not care yet about other switch chips of the fabric */
if (ds->index != info->sw_index)
return 0;
@@ -92,14 +90,13 @@ static int dsa_switch_fdb_add(struct dsa_switch *ds,
if 

Re: [patch net-next 10/17] ipv6: fib: Add offload indication to routes

2017-07-19 Thread David Ahern
On 7/19/17 1:02 AM, Jiri Pirko wrote:
> Allow user space applications to see which routes are offloaded and
> which aren't by setting the RTNH_F_OFFLOAD flag when dumping them.
> 
> To be consistent with IPv4, a multipath route is marked as offloaded if
> one of its nexthops is offloaded. Individual nexthops aren't marked with
> the 'offload' flag.

It is more user friendly to report the offload per nexthop especially
given the implications. There are already flags per nexthop and those
flags are pushed to userspace so not an API change at all.


Re: [patch net-next 10/17] ipv6: fib: Add offload indication to routes

2017-07-19 Thread David Ahern
On 7/19/17 9:49 AM, Ido Schimmel wrote:
> On Wed, Jul 19, 2017 at 09:27:30AM -0600, David Ahern wrote:
>> On 7/19/17 1:02 AM, Jiri Pirko wrote:
>>> Allow user space applications to see which routes are offloaded and
>>> which aren't by setting the RTNH_F_OFFLOAD flag when dumping them.
>>>
>>> To be consistent with IPv4, a multipath route is marked as offloaded if
>>> one of its nexthops is offloaded. Individual nexthops aren't marked with
>>> the 'offload' flag.
>>
>> It is more user friendly to report the offload per nexthop especially
>> given the implications. There are already flags per nexthop and those
>> flags are pushed to userspace so not an API change at all.
> 
> I thought about it, but then just decided to be consistent with IPv4.

And the comment stems from just that. I was looking at IPv4 ECMP routes
a few days ago and the existence / lack of offload flag was not intuitive.

> 
> I can send a follow-up patchset that aligns both families to the
> behavior you requested. Need to teach iproute2 to look for
> RTNH_F_OFFLOAD in rtnh_flags as well.
> 



Re: [patch net-next 07/17] ipv6: fib: Add in-kernel notifications for route add / delete

2017-07-19 Thread Ido Schimmel
On Wed, Jul 19, 2017 at 09:38:11AM -0600, David Ahern wrote:
> On 7/19/17 1:02 AM, Jiri Pirko wrote:
> > @@ -879,6 +891,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, 
> > struct rt6_info *rt,
> > *ins = rt;
> > rt->rt6i_node = fn;
> > atomic_inc(>rt6i_ref);
> > +   call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_ADD,
> > + rt);
> > if (!info->skip_notify)
> > inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
> > info->nl_net->ipv6.rt6_stats->fib_rt_entries++;
> > @@ -906,6 +920,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, 
> > struct rt6_info *rt,
> > rt->rt6i_node = fn;
> > rt->dst.rt6_next = iter->dst.rt6_next;
> > atomic_inc(>rt6i_ref);
> > +   call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_REPLACE,
> > + rt);
> > if (!info->skip_notify)
> > inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE);
> > if (!(fn->fn_flags & RTN_RTINFO)) {
> > @@ -1459,6 +1475,7 @@ static void fib6_del_route(struct fib6_node *fn, 
> > struct rt6_info **rtp,
> >  
> > fib6_purge_rt(rt, fn, net);
> >  
> > +   call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt);
> > if (!info->skip_notify)
> > inet6_rt_notify(RTM_DELROUTE, rt, info, 0);
> > rt6_release(rt);
> > 
> 
> 
> Why aren't all of the notifier calls under the skip_notify? That flag is
> used to make handling of ipv6 multipath routes on par with ipv4. See
> commit 3b1137fe74829

>From the cover letter:

"Unlike user space notifications for IPv6 multipath routes, the FIB
notification chain notifies these on a per-nexthop basis. This allows us
to keep the common code lean and is also unnecessary, as notifications
are serialized by each table's lock whereas applications maintaining
netlink caches may suffer from concurrent dumps and deletions /
additions of routes."


Re: [patch net-next 10/17] ipv6: fib: Add offload indication to routes

2017-07-19 Thread Ido Schimmel
On Wed, Jul 19, 2017 at 09:27:30AM -0600, David Ahern wrote:
> On 7/19/17 1:02 AM, Jiri Pirko wrote:
> > Allow user space applications to see which routes are offloaded and
> > which aren't by setting the RTNH_F_OFFLOAD flag when dumping them.
> > 
> > To be consistent with IPv4, a multipath route is marked as offloaded if
> > one of its nexthops is offloaded. Individual nexthops aren't marked with
> > the 'offload' flag.
> 
> It is more user friendly to report the offload per nexthop especially
> given the implications. There are already flags per nexthop and those
> flags are pushed to userspace so not an API change at all.

I thought about it, but then just decided to be consistent with IPv4.

I can send a follow-up patchset that aligns both families to the
behavior you requested. Need to teach iproute2 to look for
RTNH_F_OFFLOAD in rtnh_flags as well.


Re: [patch net-next 11/17] ipv6: fib: Allow non-FIB users to take reference on route

2017-07-19 Thread David Ahern
On 7/19/17 1:02 AM, Jiri Pirko wrote:
> From: Ido Schimmel 
> 
> Listeners of the FIB notification chain are expected to be able to take
> and release a reference on notified IPv6 routes. This is needed in the
> case of drivers capable of offloading these routes to a capable device.
> 
> Since notifications are sent in an atomic context, these drivers need to
> take a reference on the route, prepare a work item to offload the route
> and release the reference at the end of the work.
> 
> Currently, rt6i_ref is used to indicate in how many FIB nodes a route
> appears. Different code paths rely on rt6i_ref being 0 to indicate the
> route is no longer used by the FIB.
> 
> For example, whenever a route is deleted or replaced, fib6_purge_rt() is
> run to make sure the route is no longer present in intermediate nodes. A
> BUG_ON() at the end of the function is executed in case the reference
> count isn't 1, as it's only supposed to appear in the non-intermediate
> node from which it's going to be deleted.
> 
> Instead of changing the semantics of rt6i_ref, a new reference count is
> added, so that external users could also take a reference on routes
> without modifying rt6i_ref.
> 
> To make sure external users don't release routes used by the FIB, the
> reference count is set to 1 upon creation of a route and decremented by
> the FIB upon rt6_release().
> 
> The reference count is atomic, as it's not protected by any locks and
> placed in the 40 bytes hole after the existing rt6i_ref.

I'd rather not add another reference counter. Debugging reference leaks
is a huge PITA now; adding another counter just makes it worse.

Why can't the BUG_ON in fib6_purge_rt be removed since there are other
reference holders now?


[PATCH 047/102] net: stmmac: explicitly request exclusive reset control

2017-07-19 Thread Philipp Zabel
Commit a53e35db70d1 ("reset: Ensure drivers are explicit when requesting
reset lines") started to transition the reset control request API calls
to explicitly state whether the driver needs exclusive or shared reset
control behavior. Convert all drivers requesting exclusive resets to the
explicit API call so the temporary transition helpers can be removed.

No functional changes.

Cc: Giuseppe Cavallaro 
Cc: Alexandre Torgue 
Cc: Maxime Ripard 
Cc: Chen-Yu Tsai 
Cc: netdev@vger.kernel.org
Signed-off-by: Philipp Zabel 
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c | 3 ++-
 drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c 
b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
index fffd6d5fc907b..2771369c105d6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
@@ -942,7 +942,8 @@ static int sun8i_dwmac_probe(struct platform_device *pdev)
return -EINVAL;
}
 
-   gmac->rst_ephy = of_reset_control_get(plat_dat->phy_node, NULL);
+   gmac->rst_ephy = 
of_reset_control_get_exclusive(plat_dat->phy_node,
+   NULL);
if (IS_ERR(gmac->rst_ephy)) {
ret = PTR_ERR(gmac->rst_ephy);
if (ret == -EPROBE_DEFER)
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c 
b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index a366b3747eeb5..5f94bbf745546 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -526,8 +526,8 @@ stmmac_probe_config_dt(struct platform_device *pdev, const 
char **mac)
dev_dbg(>dev, "PTP rate %d\n", plat->clk_ptp_rate);
}
 
-   plat->stmmac_rst = devm_reset_control_get(>dev,
- STMMAC_RESOURCE_NAME);
+   plat->stmmac_rst = devm_reset_control_get_exclusive(>dev,
+   
STMMAC_RESOURCE_NAME);
if (IS_ERR(plat->stmmac_rst)) {
if (PTR_ERR(plat->stmmac_rst) == -EPROBE_DEFER)
goto error_hw_init;
-- 
2.11.0



[PATCH 044/102] net: dsa: mt7530: explicitly request exclusive reset control

2017-07-19 Thread Philipp Zabel
Commit a53e35db70d1 ("reset: Ensure drivers are explicit when requesting
reset lines") started to transition the reset control request API calls
to explicitly state whether the driver needs exclusive or shared reset
control behavior. Convert all drivers requesting exclusive resets to the
explicit API call so the temporary transition helpers can be removed.

No functional changes.

Cc: Andrew Lunn 
Cc: Vivien Didelot 
Cc: Florian Fainelli 
Cc: netdev@vger.kernel.org
Signed-off-by: Philipp Zabel 
---
 drivers/net/dsa/mt7530.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index 1e46418a3b74c..657d06b3c6c47 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -1044,7 +1044,8 @@ mt7530_probe(struct mdio_device *mdiodev)
if (priv->mcm) {
dev_info(>dev, "MT7530 adapts as multi-chip module\n");
 
-   priv->rstc = devm_reset_control_get(>dev, "mcm");
+   priv->rstc = devm_reset_control_get_exclusive(>dev,
+ "mcm");
if (IS_ERR(priv->rstc)) {
dev_err(>dev, "Couldn't get our reset line\n");
return PTR_ERR(priv->rstc);
-- 
2.11.0



[PATCH 048/102] net: stmmac: dwc-qos: explicitly request exclusive reset control

2017-07-19 Thread Philipp Zabel
Commit a53e35db70d1 ("reset: Ensure drivers are explicit when requesting
reset lines") started to transition the reset control request API calls
to explicitly state whether the driver needs exclusive or shared reset
control behavior. Convert all drivers requesting exclusive resets to the
explicit API call so the temporary transition helpers can be removed.

No functional changes.

Cc: Giuseppe Cavallaro 
Cc: Alexandre Torgue 
Cc: netdev@vger.kernel.org
Signed-off-by: Philipp Zabel 
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c 
b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
index dd6a2f9791cc1..cf4e0f09c0361 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
@@ -339,7 +339,7 @@ static void *tegra_eqos_probe(struct platform_device *pdev,
usleep_range(2000, 4000);
gpiod_set_value(eqos->reset, 0);
 
-   eqos->rst = devm_reset_control_get(>dev, "eqos");
+   eqos->rst = devm_reset_control_get_exclusive(>dev, "eqos");
if (IS_ERR(eqos->rst)) {
err = PTR_ERR(eqos->rst);
goto reset_phy;
-- 
2.11.0



[PATCH 045/102] net: ethernet: hisi_femac: explicitly request exclusive reset control

2017-07-19 Thread Philipp Zabel
Commit a53e35db70d1 ("reset: Ensure drivers are explicit when requesting
reset lines") started to transition the reset control request API calls
to explicitly state whether the driver needs exclusive or shared reset
control behavior. Convert all drivers requesting exclusive resets to the
explicit API call so the temporary transition helpers can be removed.

No functional changes.

Cc: Yisen Zhuang 
Cc: Salil Mehta 
Cc: netdev@vger.kernel.org
Signed-off-by: Philipp Zabel 
---
 drivers/net/ethernet/hisilicon/hisi_femac.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hisi_femac.c 
b/drivers/net/ethernet/hisilicon/hisi_femac.c
index 2c2808830e957..10aa7590afd54 100644
--- a/drivers/net/ethernet/hisilicon/hisi_femac.c
+++ b/drivers/net/ethernet/hisilicon/hisi_femac.c
@@ -838,14 +838,14 @@ static int hisi_femac_drv_probe(struct platform_device 
*pdev)
goto out_free_netdev;
}
 
-   priv->mac_rst = devm_reset_control_get(dev, "mac");
+   priv->mac_rst = devm_reset_control_get_exclusive(dev, "mac");
if (IS_ERR(priv->mac_rst)) {
ret = PTR_ERR(priv->mac_rst);
goto out_disable_clk;
}
hisi_femac_core_reset(priv);
 
-   priv->phy_rst = devm_reset_control_get(dev, "phy");
+   priv->phy_rst = devm_reset_control_get_exclusive(dev, "phy");
if (IS_ERR(priv->phy_rst)) {
priv->phy_rst = NULL;
} else {
-- 
2.11.0



[PATCH 046/102] net: ethernet: hix5hd2_gmac: explicitly request exclusive reset control

2017-07-19 Thread Philipp Zabel
Commit a53e35db70d1 ("reset: Ensure drivers are explicit when requesting
reset lines") started to transition the reset control request API calls
to explicitly state whether the driver needs exclusive or shared reset
control behavior. Convert all drivers requesting exclusive resets to the
explicit API call so the temporary transition helpers can be removed.

No functional changes.

Cc: Yisen Zhuang 
Cc: Salil Mehta 
Cc: netdev@vger.kernel.org
Signed-off-by: Philipp Zabel 
---
 drivers/net/ethernet/hisilicon/hix5hd2_gmac.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c 
b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c
index 25a6c8722ecac..02b7e2f490099 100644
--- a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c
+++ b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c
@@ -1161,16 +1161,16 @@ static int hix5hd2_dev_probe(struct platform_device 
*pdev)
goto out_disable_mac_core_clk;
}
 
-   priv->mac_core_rst = devm_reset_control_get(dev, "mac_core");
+   priv->mac_core_rst = devm_reset_control_get_exclusive(dev, "mac_core");
if (IS_ERR(priv->mac_core_rst))
priv->mac_core_rst = NULL;
hix5hd2_mac_core_reset(priv);
 
-   priv->mac_ifc_rst = devm_reset_control_get(dev, "mac_ifc");
+   priv->mac_ifc_rst = devm_reset_control_get_exclusive(dev, "mac_ifc");
if (IS_ERR(priv->mac_ifc_rst))
priv->mac_ifc_rst = NULL;
 
-   priv->phy_rst = devm_reset_control_get(dev, "phy");
+   priv->phy_rst = devm_reset_control_get_exclusive(dev, "phy");
if (IS_ERR(priv->phy_rst)) {
priv->phy_rst = NULL;
} else {
-- 
2.11.0



[PATCH 0/2] Atheros 803x PHY RGMII clock delays

2017-07-19 Thread Marc Gonzalez
These patches address the quirks of the Atheros PHY, with respect
to RGMII RX and TX clock delays.

Once the PHY driver is fixed, the second patch fixes the ethernet
controller used on tango boards.

Marc Gonzalez (2):
  net: phy: at803x: Fix RGMII RX and TX clock delays setup
  net: ethernet: nb8800: Fix RGMII TX clock delay setup

 drivers/net/ethernet/aurora/nb8800.c |  8 +---
 drivers/net/phy/at803x.c | 32 
 2 files changed, 29 insertions(+), 11 deletions(-)

-- 
2.11.0



[PATCH 049/102] ath10k: explicitly request exclusive reset control

2017-07-19 Thread Philipp Zabel
Commit a53e35db70d1 ("reset: Ensure drivers are explicit when requesting
reset lines") started to transition the reset control request API calls
to explicitly state whether the driver needs exclusive or shared reset
control behavior. Convert all drivers requesting exclusive resets to the
explicit API call so the temporary transition helpers can be removed.

No functional changes.

Cc: Kalle Valo 
Cc: ath...@lists.infradead.org
Cc: linux-wirel...@vger.kernel.org
Cc: netdev@vger.kernel.org
Signed-off-by: Philipp Zabel 
---
 drivers/net/wireless/ath/ath10k/ahb.c | 15 ++-
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/ahb.c 
b/drivers/net/wireless/ath/ath10k/ahb.c
index da770af830369..2ad3ed7b89417 100644
--- a/drivers/net/wireless/ath/ath10k/ahb.c
+++ b/drivers/net/wireless/ath/ath10k/ahb.c
@@ -197,35 +197,40 @@ static int ath10k_ahb_rst_ctrl_init(struct ath10k *ar)
 
dev = _ahb->pdev->dev;
 
-   ar_ahb->core_cold_rst = devm_reset_control_get(dev, "wifi_core_cold");
+   ar_ahb->core_cold_rst = devm_reset_control_get_exclusive(dev,
+
"wifi_core_cold");
if (IS_ERR(ar_ahb->core_cold_rst)) {
ath10k_err(ar, "failed to get core cold rst ctrl: %ld\n",
   PTR_ERR(ar_ahb->core_cold_rst));
return PTR_ERR(ar_ahb->core_cold_rst);
}
 
-   ar_ahb->radio_cold_rst = devm_reset_control_get(dev, "wifi_radio_cold");
+   ar_ahb->radio_cold_rst = devm_reset_control_get_exclusive(dev,
+ 
"wifi_radio_cold");
if (IS_ERR(ar_ahb->radio_cold_rst)) {
ath10k_err(ar, "failed to get radio cold rst ctrl: %ld\n",
   PTR_ERR(ar_ahb->radio_cold_rst));
return PTR_ERR(ar_ahb->radio_cold_rst);
}
 
-   ar_ahb->radio_warm_rst = devm_reset_control_get(dev, "wifi_radio_warm");
+   ar_ahb->radio_warm_rst = devm_reset_control_get_exclusive(dev,
+ 
"wifi_radio_warm");
if (IS_ERR(ar_ahb->radio_warm_rst)) {
ath10k_err(ar, "failed to get radio warm rst ctrl: %ld\n",
   PTR_ERR(ar_ahb->radio_warm_rst));
return PTR_ERR(ar_ahb->radio_warm_rst);
}
 
-   ar_ahb->radio_srif_rst = devm_reset_control_get(dev, "wifi_radio_srif");
+   ar_ahb->radio_srif_rst = devm_reset_control_get_exclusive(dev,
+ 
"wifi_radio_srif");
if (IS_ERR(ar_ahb->radio_srif_rst)) {
ath10k_err(ar, "failed to get radio srif rst ctrl: %ld\n",
   PTR_ERR(ar_ahb->radio_srif_rst));
return PTR_ERR(ar_ahb->radio_srif_rst);
}
 
-   ar_ahb->cpu_init_rst = devm_reset_control_get(dev, "wifi_cpu_init");
+   ar_ahb->cpu_init_rst = devm_reset_control_get_exclusive(dev,
+   
"wifi_cpu_init");
if (IS_ERR(ar_ahb->cpu_init_rst)) {
ath10k_err(ar, "failed to get cpu init rst ctrl: %ld\n",
   PTR_ERR(ar_ahb->cpu_init_rst));
-- 
2.11.0



[PATCH 2/2] net: ethernet: nb8800: Fix RGMII TX clock delay setup

2017-07-19 Thread Marc Gonzalez
According to commit e5f3a4a56ce2a707b2fb8ce37e4414dcac89c672
("Documentation: devicetree: clarify usage of the RGMII phy-modes")
there are 4 RGMII phy-modes to handle:

"rgmii" (RX and TX delays are added by the MAC when required)
"rgmii-id" (RGMII with internal RX and TX delays provided by the PHY,
the MAC should not add the RX or TX delays in this case)
"rgmii-rxid" (RGMII with internal RX delay provided by the PHY,
the MAC should not add an RX delay in this case)
"rgmii-txid" (RGMII with internal TX delay provided by the PHY,
the MAC should not add an TX delay in this case)

Let the MAC handle TX clock delay for rgmii and rgmii-rxid.

Signed-off-by: Marc Gonzalez 
---
 drivers/net/ethernet/aurora/nb8800.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/aurora/nb8800.c 
b/drivers/net/ethernet/aurora/nb8800.c
index 041cfb7952f8..f3ed320eb4ad 100644
--- a/drivers/net/ethernet/aurora/nb8800.c
+++ b/drivers/net/ethernet/aurora/nb8800.c
@@ -609,7 +609,7 @@ static void nb8800_mac_config(struct net_device *dev)
mac_mode |= HALF_DUPLEX;
 
if (gigabit) {
-   if (priv->phy_mode == PHY_INTERFACE_MODE_RGMII)
+   if (phy_interface_is_rgmii(dev->phydev))
mac_mode |= RGMII_MODE;
 
mac_mode |= GMAC_MODE;
@@ -1268,11 +1268,13 @@ static int nb8800_tangox_init(struct net_device *dev)
break;
 
case PHY_INTERFACE_MODE_RGMII:
-   pad_mode = PAD_MODE_RGMII;
+   case PHY_INTERFACE_MODE_RGMII_RXID:
+   pad_mode = PAD_MODE_RGMII | PAD_MODE_GTX_CLK_DELAY;
break;
 
+   case PHY_INTERFACE_MODE_RGMII_ID:
case PHY_INTERFACE_MODE_RGMII_TXID:
-   pad_mode = PAD_MODE_RGMII | PAD_MODE_GTX_CLK_DELAY;
+   pad_mode = PAD_MODE_RGMII;
break;
 
default:
-- 
2.11.0



[PATCH 1/2] net: phy: at803x: Fix RGMII RX and TX clock delays setup

2017-07-19 Thread Marc Gonzalez
The current code supports enabling RGMII RX and TX clock delays.
The unstated assumption is that these settings are disabled by
default at reset, which is not the case.

RX clock delay is enabled at reset. And TX clock delay "survives"
across SW resets. Thus, if the bootloader enables TX clock delay,
it will remain enabled at reset in Linux.

Provide disable functions to configure the RGMII clock delays
exactly as specified in the fwspec.

Signed-off-by: Marc Gonzalez 
---
 drivers/net/phy/at803x.c | 32 
 1 file changed, 24 insertions(+), 8 deletions(-)

diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c
index c1e52b9dc58d..9c96e2cb 100644
--- a/drivers/net/phy/at803x.c
+++ b/drivers/net/phy/at803x.c
@@ -117,12 +117,24 @@ static inline int at803x_enable_rx_delay(struct 
phy_device *phydev)
AT803X_DEBUG_RX_CLK_DLY_EN);
 }
 
+static inline int at803x_disable_rx_delay(struct phy_device *phydev)
+{
+   return at803x_debug_reg_mask(phydev, AT803X_DEBUG_REG_0,
+   AT803X_DEBUG_RX_CLK_DLY_EN, 0);
+}
+
 static inline int at803x_enable_tx_delay(struct phy_device *phydev)
 {
return at803x_debug_reg_mask(phydev, AT803X_DEBUG_REG_5, 0,
AT803X_DEBUG_TX_CLK_DLY_EN);
 }
 
+static inline int at803x_disable_tx_delay(struct phy_device *phydev)
+{
+   return at803x_debug_reg_mask(phydev, AT803X_DEBUG_REG_5,
+   AT803X_DEBUG_TX_CLK_DLY_EN, 0);
+}
+
 /* save relevant PHY registers to private copy */
 static void at803x_context_save(struct phy_device *phydev,
struct at803x_context *context)
@@ -284,18 +296,22 @@ static int at803x_config_init(struct phy_device *phydev)
return ret;
 
if (phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID ||
-   phydev->interface == PHY_INTERFACE_MODE_RGMII_ID) {
+   phydev->interface == PHY_INTERFACE_MODE_RGMII_ID)
ret = at803x_enable_rx_delay(phydev);
-   if (ret < 0)
-   return ret;
-   }
+   else
+   ret = at803x_disable_rx_delay(phydev);
+
+   if (ret < 0)
+   return ret;
 
if (phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID ||
-   phydev->interface == PHY_INTERFACE_MODE_RGMII_ID) {
+   phydev->interface == PHY_INTERFACE_MODE_RGMII_ID)
ret = at803x_enable_tx_delay(phydev);
-   if (ret < 0)
-   return ret;
-   }
+   else
+   ret = at803x_disable_tx_delay(phydev);
+
+   if (ret < 0)
+   return ret;
 
return 0;
 }
-- 
2.11.0



Re: [patch net-next 07/17] ipv6: fib: Add in-kernel notifications for route add / delete

2017-07-19 Thread David Ahern
On 7/19/17 1:02 AM, Jiri Pirko wrote:
> @@ -879,6 +891,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct 
> rt6_info *rt,
>   *ins = rt;
>   rt->rt6i_node = fn;
>   atomic_inc(>rt6i_ref);
> + call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_ADD,
> +   rt);
>   if (!info->skip_notify)
>   inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
>   info->nl_net->ipv6.rt6_stats->fib_rt_entries++;
> @@ -906,6 +920,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct 
> rt6_info *rt,
>   rt->rt6i_node = fn;
>   rt->dst.rt6_next = iter->dst.rt6_next;
>   atomic_inc(>rt6i_ref);
> + call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_REPLACE,
> +   rt);
>   if (!info->skip_notify)
>   inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE);
>   if (!(fn->fn_flags & RTN_RTINFO)) {
> @@ -1459,6 +1475,7 @@ static void fib6_del_route(struct fib6_node *fn, struct 
> rt6_info **rtp,
>  
>   fib6_purge_rt(rt, fn, net);
>  
> + call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt);
>   if (!info->skip_notify)
>   inet6_rt_notify(RTM_DELROUTE, rt, info, 0);
>   rt6_release(rt);
> 


Why aren't all of the notifier calls under the skip_notify? That flag is
used to make handling of ipv6 multipath routes on par with ipv4. See
commit 3b1137fe74829


[PATCH] ath10k_htt_rx_amsdu_allowed(): use ath10k_dbg()

2017-07-19 Thread Gabriel C


Each time we get disconencted from AP we get flooded with messages like:

...
ath10k_pci :03:00.0: no channel configured; ignoring frame(s)!

ath10k_warn: 155 callbacks suppressed
...

Use ath10k_dbg() here too.

Signed-off-by: Gabriel Craciunescu 
---
 drivers/net/wireless/ath/ath10k/htt_rx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c 
b/drivers/net/wireless/ath/ath10k/htt_rx.c
index 398dda978d6e..75d9b59b7e63 100644
--- a/drivers/net/wireless/ath/ath10k/htt_rx.c
+++ b/drivers/net/wireless/ath/ath10k/htt_rx.c
@@ -1514,7 +1514,7 @@ static bool ath10k_htt_rx_amsdu_allowed(struct ath10k *ar,
 */

if (!rx_status->freq) {
-   ath10k_warn(ar, "no channel configured; ignoring frame(s)!\n");
+   ath10k_dbg(ar, ATH10K_DBG_HTT, "no channel configured; ignoring 
frame(s)!\n");
return false;
}

--
2.13.3


Re: [PATCH] net: Convert to using %pOF instead of full_name

2017-07-19 Thread Rob Herring
On Tue, Jul 18, 2017 at 4:51 PM, David Miller  wrote:
> From: Rob Herring 
> Date: Tue, 18 Jul 2017 16:43:19 -0500
>
>> Now that we have a custom printf format specifier, convert users of
>> full_name to use %pOF instead. This is preparation to remove storing
>> of the full path string for each node.
>>
>> Signed-off-by: Rob Herring 
>
> Acked-by: David S. Miller 

The dependency went into 4.13. You can take this thru netdev.

Rob


Re: ath10k_htt_rx_amsdu_allowed() noise

2017-07-19 Thread Gabriel C

On 18.07.2017 09:14, Ryan Hsu wrote:

On 07/16/2017 03:56 PM, Gabriel C wrote:



Can ath10k_warn() be ath10k_dbg() there ? Maybe this ?



Looks good to me.



 From d4138d936635ca7b69ed7f7b0cda4914f0f07917 Mon Sep 17 00:00:00 2001
From: Gabriel Craciunescu 
Date: Mon, 17 Jul 2017 00:45:29 +0200
Subject: [PATCH] ath10k_htt_rx_amsdu_allowed(): change ath10k_warn() to 
th10k_dbg()

Signed-off-by: Gabriel Craciunescu 
---
  drivers/net/wireless/ath/ath10k/htt_rx.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c 
b/drivers/net/wireless/ath/ath10k/htt_rx.c
index 398dda978d6e..ad0306cd6ee1 100644
--- a/drivers/net/wireless/ath/ath10k/htt_rx.c
+++ b/drivers/net/wireless/ath/ath10k/htt_rx.c
@@ -1514,7 +1514,7 @@ static bool ath10k_htt_rx_amsdu_allowed(struct ath10k *ar,
  */

 if (!rx_status->freq) {
-   ath10k_warn(ar, "no channel configured; ignoriframe(s)!\n");
+   ath10k_dbg(ar, ATH10K_DBG_HTT, "no channel configured, ignoring 
frame(s)!\n");
 return false;
 }



Can you send this as patch for reviewing, please?



Sure .. I'll try to send the patch to the list later on today..

Regards,

Gabriel C


Re: [patch net-next 01/17] net: core: Make the FIB notification chain generic

2017-07-19 Thread Ido Schimmel
On Wed, Jul 19, 2017 at 08:11:56AM -0600, David Ahern wrote:
> On 7/19/17 1:02 AM, Jiri Pirko wrote:
> > +struct fib_notifier_ops *
> > +fib_notifier_ops_register(const struct fib_notifier_ops *tmpl, struct net 
> > *net)
> > +{
> > +   struct fib_notifier_ops *ops;
> > +   int err;
> > +
> > +   ops = kmemdup(tmpl, sizeof(*ops), GFP_KERNEL);
> 
> why allocate memory to copy the ops?

It contains a list pointer that I use to list all the registered
families in each net namespace. Same pattern used in FIB rules.


Re: commit 16ecba59 breaks 82574L under heavy load.

2017-07-19 Thread Lennart Sorensen
On Tue, Jul 18, 2017 at 04:14:35PM -0700, Benjamin Poirier wrote:
> Thanks for the detailed analysis.
> 
> Refering to the original discussion around this patch series, it seemed like
> the IMS bit for a condition had to be set for the Other interrupt to be raised
> for that condition.
> 
> https://lkml.org/lkml/2015/11/4/683
> 
> In this case however, E1000_ICR_RXT0 is not set in IMS so Other shouldn't be
> raised for Receiver Overrun. Apparently something is going on...
> 
> I can reproduce the spurious Other interrupts with a simple mdelay()
> With the debugging patch at the end of the mail I see stuff like this
> while blasting with udp frames:
>   -0 [086] d.h1 15338.742675: e1000_msix_other: got Other 
> interrupt, count 15127
><...>-54504 [086] d.h. 15338.742724: e1000_msix_other: got Other 
> interrupt, count 1
><...>-54504 [086] d.h. 15338.742774: e1000_msix_other: got Other 
> interrupt, count 1
><...>-54504 [086] d.h. 15338.742824: e1000_msix_other: got Other 
> interrupt, count 1
>   -0 [086] d.h1 15340.745123: e1000_msix_other: got Other 
> interrupt, count 27584
><...>-54504 [086] d.h. 15340.745172: e1000_msix_other: got Other 
> interrupt, count 1
><...>-54504 [086] d.h. 15340.745222: e1000_msix_other: got Other 
> interrupt, count 1
><...>-54504 [086] d.h. 15340.745272: e1000_msix_other: got Other 
> interrupt, count 1
> 
> > hence sets the flag that (unfortunately) means both link is down and link
> > state should be checked.  Since this now happens 3000 times per second,
> > the chances of it happening while the watchdog_task is checking the link
> > state becomes pretty high, and it if does happen to coincice, then the
> > watchdog_task will reset the adapter, which causes a real loss of link.
> 
> Through which path does watchdog_task reset the adapter? I didn't
> reproduce that.

The other interrupt happens and sets get_link_status to true.  At some
point the watchdog_task runs on some core and calls e1000e_has_link,
which then calls check_for_link to find out the current link status.
While e1000e_check_for_copper_link is checking the link state and
after updating get_link_status to false to indicate link is up, another
interrupt occurs and another core handles it and changes get_link_status
to true again.  So by the time e1000e_has_link goes to determine the
return value, get_link_state has changed back again so now it returns
link down, and as a result the watchdog_task calls reset, because we
have packets in the transmit queue (we were busy forwarding over 10
packets per second when it happened).

Running on an Atom D525 which isn't very fast and uses hyperthreading
might have something to do with how the scheduling manages to trigger
this race condition.  On a faster CPU you very likely would be done
checking the link state quickly enough that the interrupt handler rarely
gets a chance to interfere.  Also we have the irq affinity set so the
RX/TX of one port is handled by one CPU, the RX/TX of the other port
by a different CPU and the Other interrupts and other tasks (like the
watchdog) are handled by the last two CPUs.

Either making the current link state its own bool and keeping it's meaning
away from get_link_state, or making the interrupt handler only change
get_link_state when LSC is actually present makes the problem go away.
Having two meanings to get_link_state (both link state needs checking
and what the link state is) causes issues.  After all it is using a bool
to store 3 values: Link is up, link needs checking but is up and link
needs checking but is down.  Of course the last two states are rather
quantum, in that you don't know which it is until you check.

> diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c 
> b/drivers/net/ethernet/intel/e1000e/netdev.c
> index b3679728caac..689ad76d0d12 100644
> --- a/drivers/net/ethernet/intel/e1000e/netdev.c
> +++ b/drivers/net/ethernet/intel/e1000e/netdev.c
> @@ -46,6 +46,8 @@
>  
>  #include "e1000.h"
>  
> +DEFINE_RATELIMIT_STATE(e1000e_ratelimit_state, 2 * HZ, 4);
> +
>  #define DRV_EXTRAVERSION "-k"
>  
>  #define DRV_VERSION "3.2.6" DRV_EXTRAVERSION
> @@ -937,6 +939,8 @@ static bool e1000_clean_rx_irq(struct e1000_ring 
> *rx_ring, int *work_done,
>   bool cleaned = false;
>   unsigned int total_rx_bytes = 0, total_rx_packets = 0;
>  
> + mdelay(10);
> +
>   i = rx_ring->next_to_clean;
>   rx_desc = E1000_RX_DESC_EXT(*rx_ring, i);
>   staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
> @@ -1067,6 +1071,13 @@ static bool e1000_clean_rx_irq(struct e1000_ring 
> *rx_ring, int *work_done,
>  
>   adapter->total_rx_bytes += total_rx_bytes;
>   adapter->total_rx_packets += total_rx_packets;
> +
> + if (__ratelimit(_ratelimit_state)) {
> + static unsigned int max;
> + max = max(max, total_rx_packets);
> + trace_printk("received %u max %u\n", 

Re: [patch net-next 01/17] net: core: Make the FIB notification chain generic

2017-07-19 Thread David Ahern
On 7/19/17 1:02 AM, Jiri Pirko wrote:
> +struct fib_notifier_ops *
> +fib_notifier_ops_register(const struct fib_notifier_ops *tmpl, struct net 
> *net)
> +{
> + struct fib_notifier_ops *ops;
> + int err;
> +
> + ops = kmemdup(tmpl, sizeof(*ops), GFP_KERNEL);

why allocate memory to copy the ops?


Re: [PATCH] net: dsa: mv88e6xxx: Enable CMODE config support for 6390X

2017-07-19 Thread Andrew Lunn
On Wed, Jul 19, 2017 at 08:17:02AM +0200, Martin Hundebøll wrote:
> Commit f39908d3b1c45 ('net: dsa: mv88e6xxx: Set the CMODE for mv88e6390
> ports 9 & 10') added support for setting the CMODE for the 6390X family,
> but only enabled it for 9290 and 6390 - and left out 6390X.
> 
> Fix support for setting the CMODE on 6390X also by assigning
> mv88e6390x_port_set_cmode() to the .port_set_cmode function pointer in
> mv88e6390x_ops too.
> 
> Signed-off-by: Martin Hundebøll 

Duh!

Fixes: f39908d3b1c4 ("net: dsa: mv88e6xxx: Set the CMODE for mv88e6390 ports 9 
& 10")
Reviewed-by: Andrew Lunn 

Andrew


Re: [PATCH V2 net-next 21/21] net-next/hinic: Add select_queue and netpoll

2017-07-19 Thread Aviad Krawczyk
Hi Sergei,

I meant that drivers/net/ethernet/huawei/* is all the dir under huawei dir.
(I saw this style of '*' is used in the maintainers file)
I will change it to drivers/net/ethernet/huawei/hinic/ in the next fix.

Thanks for review,
Aviad

On 7/19/2017 2:34 PM, Sergei Shtylyov wrote:
> On 7/19/2017 12:19 PM, Aviad Krawczyk wrote:
> 
>> Add more netdev operations.
>>
>> Signed-off-by: Aviad Krawczyk 
>> Signed-off-by: Zhao Chen 
>> ---
>>   MAINTAINERS|  7 ++
>>   drivers/net/ethernet/huawei/hinic/hinic_main.c | 35 
>> ++
>>   2 files changed, 42 insertions(+)
>>
>> diff --git a/MAINTAINERS b/MAINTAINERS
>> index 1347726..5c85602 100644
>> --- a/MAINTAINERS
>> +++ b/MAINTAINERS
>> @@ -6269,6 +6269,13 @@ L:linux-in...@vger.kernel.org
>>   S:Maintained
>>   F:drivers/input/touchscreen/htcpen.c
>>   +HUAWEI ETHERNET DRIVER
>> +M:Aviad Krawczyk 
>> +L:netdev@vger.kernel.org
>> +S:Supported
>> +F:Documentation/networking/hinic.txt
>> +F:drivers/net/ethernet/huawei/*
> 
>This means that subdirs there are not maintained, did you really mean it?
> 
> [...]
> 
> MNR, Sergei
> 
> .
> 



Re: [PATCH V2 net-next 20/21] net-next/hinic: Add ethtool and stats

2017-07-19 Thread Aviad Krawczyk
Hi Joe,

I tried to be consistent with the comments before, that requested
that we will use dev_err exclude some special cases for use netif.

We will replace the dev_err(>dev,.. to netdev_err in the
next fix.

Thanks for review,
Aviad

On 7/19/2017 1:27 PM, Joe Perches wrote:
> On Wed, 2017-07-19 at 17:19 +0800, Aviad Krawczyk wrote:
>> Add ethtool operations and statistics operations.
> 
> series trivia:
> 
>> diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c 
>> b/drivers/net/ethernet/huawei/hinic/hinic_main.c
> []
>> @@ -67,6 +67,178 @@
> []
>> +static int hinic_get_link_ksettings(struct net_device *netdev,
>> +struct ethtool_link_ksettings
>> +*link_ksettings)
>> +{
> []
>> +err = hinic_port_get_cap(nic_dev, _cap);
>> +if (err) {
>> +dev_err(>dev, "Failed to get port capabilities\n");
>> +return err;
>> +}
> 
> it looks like a lot of these dev_ calls
> should be converted to netdev_
> 
> from:
>   dev_err(>dev, ...);
> to:
>   netdev_err(netdev, ...);
> 
> etc...
> 
> 
> .
> 



Re: [PATCH net-next 09/11] net: dsa: Move FDB dump implementation inside DSA

2017-07-19 Thread Arkadi Sharshevsky


On 07/18/2017 09:06 PM, Vivien Didelot wrote:
> Hi Arkadi,
> 
> Arkadi Sharshevsky  writes:
> 
>> +typedef int dsa_fdb_dump_cb_t(const unsigned char *addr, u16 vid,
>> +  u16 ndm_state, void *data);
> 
> Can I ask you to change u16 ndm_state for bool is_static at the same
> time? Ethernet switches do not need to report more than that.
>

Will fix, thanks.

>> +static int
>> +dsa_slave_port_fdb_do_dump(const unsigned char *addr, u16 vid,
>> +   u16 ndm_state, void *data)
>> +{
>> +struct dsa_slave_dump_ctx *dump = data;
>> +u32 portid = NETLINK_CB(dump->cb->skb).portid;
>> +u32 seq = dump->cb->nlh->nlmsg_seq;
>> +struct nlmsghdr *nlh;
>> +struct ndmsg *ndm;
>> +
>> +if (dump->idx < dump->cb->args[2])
>> +goto skip;
>> +
>> +nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH,
>> +sizeof(*ndm), NLM_F_MULTI);
>> +if (!nlh)
>> +return -EMSGSIZE;
>> +
>> +ndm = nlmsg_data(nlh);
>> +ndm->ndm_family  = AF_BRIDGE;
>> +ndm->ndm_pad1= 0;
>> +ndm->ndm_pad2= 0;
>> +ndm->ndm_flags   = NTF_SELF;
>> +ndm->ndm_type= 0;
>> +ndm->ndm_ifindex = dump->dev->ifindex;
>> +ndm->ndm_state   = ndm_state;
> 
> So we can simply scope this here:
> 
> ndm->ndm_state = is_static ? NUD_NOARP : NUD_REACHABLE;
> 
>> +
>> +if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, addr))
>> +goto nla_put_failure;
>> +
>> +if (vid && nla_put_u16(dump->skb, NDA_VLAN, vid))
>> +goto nla_put_failure;
>> +
>> +nlmsg_end(dump->skb, nlh);
>> +
>> +skip:
>> +dump->idx++;
>> +return 0;
>> +
>> +nla_put_failure:
>> +nlmsg_cancel(dump->skb, nlh);
>> +return -EMSGSIZE;
>> +}
> 
> Other than that, LGTM.
> 
> 
> Thanks,
> 
> Vivien
> 


Re: [PATCH net-next 07/11] net: dsa: Remove support for bypass bridge port attributes/vlan set

2017-07-19 Thread Arkadi Sharshevsky


On 07/18/2017 08:40 PM, Vivien Didelot wrote:
> Hi Arkadi,
> 
> Arkadi Sharshevsky  writes:
> 
>> The bridge port attributes/vlan for DSA devices should be set only
>> from bridge code. Furthermore, The vlans are synced totally with the
>> bridge so there is no need for special dump support.
>>
>> Signed-off-by: Arkadi Sharshevsky 
>> ---
>>  drivers/net/dsa/b53/b53_common.c   | 44 --
>>  drivers/net/dsa/b53/b53_priv.h |  3 --
>>  drivers/net/dsa/bcm_sf2.c  |  1 -
>>  drivers/net/dsa/dsa_loop.c | 38 ---
>>  drivers/net/dsa/microchip/ksz_common.c | 41 -
>>  drivers/net/dsa/mv88e6xxx/chip.c   | 56 
>> --
>>  include/net/dsa.h  |  4 ---
>>  net/dsa/dsa_priv.h |  4 ---
>>  net/dsa/port.c | 12 
>>  net/dsa/slave.c|  6 
> 
> Regarding this massive deletion, can you please split it in two patches,
> one deleting first the DSA core usage of .port_vlan_dump, i.e. in:
> 
> net/dsa/dsa_priv.h
> net/dsa/port.c
> net/dsa/slave.c
> 
> Then a second patch which deletes the .port_vlan_dump implementations?
> 
> This may sound useless but it will actually make it easy for us to
> restore the VLAN dump support in drivers once we introduce an
> alternative way to query the hardware.
> 
> 
> Thanks,
> 
> Vivien
> 

Yeah, no problem. But doesn't it make more sense to remove it first from
the drivers and then from core? If it will be removed from core first it
will leave unused code behind in the driver. Furthermore, it is
symmetric with the code adding. You first add the core implementation
and then only the drivers.




Re: [PATCH V2 net-next 21/21] net-next/hinic: Add select_queue and netpoll

2017-07-19 Thread Sergei Shtylyov

On 7/19/2017 12:19 PM, Aviad Krawczyk wrote:


Add more netdev operations.

Signed-off-by: Aviad Krawczyk 
Signed-off-by: Zhao Chen 
---
  MAINTAINERS|  7 ++
  drivers/net/ethernet/huawei/hinic/hinic_main.c | 35 ++
  2 files changed, 42 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 1347726..5c85602 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6269,6 +6269,13 @@ L:   linux-in...@vger.kernel.org
  S:Maintained
  F:drivers/input/touchscreen/htcpen.c
  
+HUAWEI ETHERNET DRIVER

+M: Aviad Krawczyk 
+L: netdev@vger.kernel.org
+S: Supported
+F: Documentation/networking/hinic.txt
+F: drivers/net/ethernet/huawei/*


   This means that subdirs there are not maintained, did you really mean it?

[...]

MNR, Sergei


Re: [net-next v3 1/5] ixgbe: Ensure MAC filter was added before setting MACVLAN

2017-07-19 Thread Joe Perches
On Tue, 2017-07-18 at 18:23 -0700, Jeff Kirsher wrote:
> This patch adds a check to ensure that adding the MAC filter was
> successful before setting the MACVLAN.  If it was unsuccessful, propagate
> the error.
[]
> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c 
> b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
[]
> @@ -681,6 +681,7 @@ static int ixgbe_set_vf_macvlan(struct ixgbe_adapter 
> *adapter,
>  {
>   struct list_head *pos;
>   struct vf_macvlans *entry;
> + s32 retval = 0;

This function returns int, why use s32 here?
 
>   if (index <= 1) {
>   list_for_each(pos, >vf_mvs.l) {
> @@ -721,14 +722,15 @@ static int ixgbe_set_vf_macvlan(struct ixgbe_adapter 
> *adapter,
>   if (!entry || !entry->free)
>   return -ENOSPC;
>  
> - entry->free = false;
> - entry->is_macvlan = true;
> - entry->vf = vf;
> - memcpy(entry->vf_macvlan, mac_addr, ETH_ALEN);
> -
> - ixgbe_add_mac_filter(adapter, mac_addr, vf);
> + retval = ixgbe_add_mac_filter(adapter, mac_addr, vf);
> + if (retval >= 0) {
> + entry->free = false;
> + entry->is_macvlan = true;
> + entry->vf = vf;
> + memcpy(entry->vf_macvlan, mac_addr, ETH_ALEN);
> + }
>  
> - return 0;
> + return retval;

This is also backwards logic from typical style
and unnecessarily indents code.

retval = ixgbe_add_mac_filter(adapter, mac_addr, vf);
if (retval < 0)
return retval;

entry->free = false;
entry->is_macvlan = true;
entry->vf = vf;
memcpy(entry->vf_macvlan, mac_addr, ETH_ALEN);> 

return 0;
}

This patch also sets the return value to a
possible positive value.

Is that really desired?

The only code that seems to use a possible
positive value also limits its return to 0

static int ixgbe_uc_sync(struct net_device *netdev, const unsigned char *addr)
{
struct ixgbe_adapter *adapter = netdev_priv(netdev);
int ret;

ret = ixgbe_add_mac_filter(adapter, addr, VMDQ_P(0));

return min_t(int, ret, 0);
}




Re: [PATCH V2 net-next 20/21] net-next/hinic: Add ethtool and stats

2017-07-19 Thread Joe Perches
On Wed, 2017-07-19 at 17:19 +0800, Aviad Krawczyk wrote:
> Add ethtool operations and statistics operations.

series trivia:

> diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c 
> b/drivers/net/ethernet/huawei/hinic/hinic_main.c
[]
> @@ -67,6 +67,178 @@
[]
> +static int hinic_get_link_ksettings(struct net_device *netdev,
> + struct ethtool_link_ksettings
> + *link_ksettings)
> +{
[]
> + err = hinic_port_get_cap(nic_dev, _cap);
> + if (err) {
> + dev_err(>dev, "Failed to get port capabilities\n");
> + return err;
> + }

it looks like a lot of these dev_ calls
should be converted to netdev_

from:
dev_err(>dev, ...);
to:
netdev_err(netdev, ...);

etc...



Re: [PATCH net-next 04/11] net: dsa: Add support for learning FDB through notification

2017-07-19 Thread Arkadi Sharshevsky


On 07/18/2017 08:16 PM, Vivien Didelot wrote:
> Hi Arkadi,
> 
> Arkadi Sharshevsky  writes:
> 
>> --- a/include/net/dsa.h
>> +++ b/include/net/dsa.h
>> @@ -451,6 +451,7 @@ void unregister_switch_driver(struct dsa_switch_driver 
>> *type);
>>  struct mii_bus *dsa_host_dev_to_mii_bus(struct device *dev);
>>  
>>  struct net_device *dsa_dev_to_net_device(struct device *dev);
>> +bool dsa_schedule_work(struct work_struct *work);
> 
> You forgot to move this declaration to net/dsa/dsa_priv.h, since this is
> private to DSA core and does not need to be exposed to drivers ;-)
>

No problem, will move it to dsa_priv.h, thanks.

>> +err = unregister_netdevice_notifier(_slave_switchdev_notifier);
>> +if (err)
>> +pr_err("DSA: failed to unregister switchdev notifier (%d)\n", 
>> err);
> 
> I think you meant unregister_switchdev_notifier() here.
> 
> Thanks,
> 
> Vivien
> 


[PATCH V2 net-next 01/21] net-next/hinic: Initialize hw interface

2017-07-19 Thread Aviad Krawczyk
Initialize hw interface as part of the nic initialization for accessing hw.

Signed-off-by: Aviad Krawczyk 
Signed-off-by: Zhao Chen 
---
 Documentation/networking/hinic.txt | 125 
 drivers/net/ethernet/Kconfig   |   1 +
 drivers/net/ethernet/Makefile  |   1 +
 drivers/net/ethernet/huawei/Kconfig|  19 ++
 drivers/net/ethernet/huawei/Makefile   |   5 +
 drivers/net/ethernet/huawei/hinic/Kconfig  |  13 ++
 drivers/net/ethernet/huawei/hinic/Makefile |   3 +
 drivers/net/ethernet/huawei/hinic/hinic_dev.h  |  33 
 drivers/net/ethernet/huawei/hinic/hinic_hw_csr.h   |  36 
 drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c   | 208 
 drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h   |  42 +
 drivers/net/ethernet/huawei/hinic/hinic_hw_if.c| 208 
 drivers/net/ethernet/huawei/hinic/hinic_hw_if.h| 160 
 drivers/net/ethernet/huawei/hinic/hinic_main.c | 209 +
 .../net/ethernet/huawei/hinic/hinic_pci_id_tbl.h   |  27 +++
 15 files changed, 1090 insertions(+)
 create mode 100644 Documentation/networking/hinic.txt
 create mode 100644 drivers/net/ethernet/huawei/Kconfig
 create mode 100644 drivers/net/ethernet/huawei/Makefile
 create mode 100644 drivers/net/ethernet/huawei/hinic/Kconfig
 create mode 100644 drivers/net/ethernet/huawei/hinic/Makefile
 create mode 100644 drivers/net/ethernet/huawei/hinic/hinic_dev.h
 create mode 100644 drivers/net/ethernet/huawei/hinic/hinic_hw_csr.h
 create mode 100644 drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
 create mode 100644 drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
 create mode 100644 drivers/net/ethernet/huawei/hinic/hinic_hw_if.c
 create mode 100644 drivers/net/ethernet/huawei/hinic/hinic_hw_if.h
 create mode 100644 drivers/net/ethernet/huawei/hinic/hinic_main.c
 create mode 100644 drivers/net/ethernet/huawei/hinic/hinic_pci_id_tbl.h

diff --git a/Documentation/networking/hinic.txt 
b/Documentation/networking/hinic.txt
new file mode 100644
index 000..989366a
--- /dev/null
+++ b/Documentation/networking/hinic.txt
@@ -0,0 +1,125 @@
+Linux Kernel Driver for Huawei Intelligent NIC(HiNIC) family
+
+
+Overview:
+=
+HiNIC is a network interface card for the Data Center Area.
+
+The driver supports a range of link-speed devices (10GbE, 25GbE, 40GbE, etc.).
+The driver supports also a negotiated and extendable feature set.
+
+Some HiNIC devices support SR-IOV. This driver is used for Physical Function
+(PF).
+
+HiNIC devices support MSI-X interrupt vector for each Tx/Rx queue and
+adaptive interrupt moderation.
+
+HiNIC devices support also various offload features such as checksum offload,
+TCP Transmit Segmentation Offload(TSO), Receive-Side Scaling(RSS) and
+LRO(Large Receive Offload).
+
+
+Supported PCI vendor ID/device IDs:
+===
+
+19e5:1822 - HiNIC PF
+
+
+Driver Architecture and Source Code:
+
+
+hinic_dev - Implement a Logical Network device that is independent from
+specific HW details about HW data structure formats.
+
+hinic_hwdev - Implement the HW details of the device and include the components
+for accessing the PCI NIC.
+
+hinic_hwdev contains the following components:
+===
+
+HW Interface:
+=
+
+The interface for accessing the pci device (DMA memory and PCI BARs).
+(hinic_hw_if.c, hinic_hw_if.h)
+
+Configuration Status Registers Area that describes the HW Registers on the
+configuration and status BAR0. (hinic_hw_csr.h)
+
+MGMT components:
+
+
+Asynchronous Event Queues(AEQs) - The event queues for receiving messages from
+the MGMT modules on the cards. (hinic_hw_eqs.c, hinic_hw_eqs.h)
+
+Application Programmable Interface commands(API CMD) - Interface for sending
+MGMT commands to the card. (hinic_hw_api_cmd.c, hinic_hw_api_cmd.h)
+
+Management (MGMT) - the PF to MGMT channel that uses API CMD for sending MGMT
+commands to the card and receives notifications from the MGMT modules on the
+card by AEQs. Also set the addresses of the IO CMDQs in HW.
+(hinic_hw_mgmt.c, hinic_hw_mgmt.h)
+
+IO components:
+==
+
+Completion Event Queues(CEQs) - The completion Event Queues that describe IO
+tasks that are finished. (hinic_hw_eqs.c, hinic_hw_eqs.h)
+
+Work Queues(WQ) - Contain the memory and operations for use by CMD queues and
+the Queue Pairs. The WQ is a Memory Block in a Page. The Block contains
+pointers to Memory Areas that are the Memory for the Work Queue Elements(WQEs).
+(hinic_hw_wq.c, hinic_hw_wq.h)
+
+Command Queues(CMDQ) - The queues for sending commands for IO management and is
+used to set the QPs addresses in HW. The commands completion events are
+accumulated on the CEQ 

[PATCH V2 net-next 07/21] net-next/hinic: Add aeqs

2017-07-19 Thread Aviad Krawczyk
Handle aeq elements that are accumulated on the aeq by calling the
registered handler for the specific event.

Signed-off-by: Aviad Krawczyk 
Signed-off-by: Zhao Chen 
---
 drivers/net/ethernet/huawei/hinic/hinic_hw_csr.h |  49 +++
 drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c | 454 ++-
 drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.h |  81 
 drivers/net/ethernet/huawei/hinic/hinic_hw_if.c  |  92 +
 drivers/net/ethernet/huawei/hinic/hinic_hw_if.h  |  58 ++-
 5 files changed, 726 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_csr.h 
b/drivers/net/ethernet/huawei/hinic/hinic_hw_csr.h
index ebbf054..52eb89c 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_csr.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_csr.h
@@ -65,4 +65,53 @@
 #define HINIC_CSR_API_CMD_STATUS_ADDR(idx)  \
(HINIC_CSR_API_CMD_BASE + 0x30 + (idx) * HINIC_CSR_API_CMD_STRIDE)
 
+/* MSI-X registers */
+#define HINIC_CSR_MSIX_CTRL_BASE0x2000
+#define HINIC_CSR_MSIX_CNT_BASE 0x2004
+
+#define HINIC_CSR_MSIX_STRIDE   0x8
+
+#define HINIC_CSR_MSIX_CTRL_ADDR(idx)   \
+   (HINIC_CSR_MSIX_CTRL_BASE + (idx) * HINIC_CSR_MSIX_STRIDE)
+
+#define HINIC_CSR_MSIX_CNT_ADDR(idx)\
+   (HINIC_CSR_MSIX_CNT_BASE + (idx) * HINIC_CSR_MSIX_STRIDE)
+
+/* EQ registers */
+#define HINIC_AEQ_MTT_OFF_BASE_ADDR 0x200
+
+#define HINIC_EQ_MTT_OFF_STRIDE 0x40
+
+#define HINIC_CSR_AEQ_MTT_OFF(id)   \
+   (HINIC_AEQ_MTT_OFF_BASE_ADDR + (id) * HINIC_EQ_MTT_OFF_STRIDE)
+
+#define HINIC_CSR_EQ_PAGE_OFF_STRIDE8
+
+#define HINIC_CSR_AEQ_HI_PHYS_ADDR_REG(q_id, pg_num)\
+   (HINIC_CSR_AEQ_MTT_OFF(q_id) + \
+(pg_num) * HINIC_CSR_EQ_PAGE_OFF_STRIDE)
+
+#define HINIC_CSR_AEQ_LO_PHYS_ADDR_REG(q_id, pg_num)\
+   (HINIC_CSR_AEQ_MTT_OFF(q_id) + \
+(pg_num) * HINIC_CSR_EQ_PAGE_OFF_STRIDE + 4)
+
+#define HINIC_AEQ_CTRL_0_ADDR_BASE  0xE00
+#define HINIC_AEQ_CTRL_1_ADDR_BASE  0xE04
+#define HINIC_AEQ_CONS_IDX_ADDR_BASE0xE08
+#define HINIC_AEQ_PROD_IDX_ADDR_BASE0xE0C
+
+#define HINIC_EQ_OFF_STRIDE 0x80
+
+#define HINIC_CSR_AEQ_CTRL_0_ADDR(idx)  \
+   (HINIC_AEQ_CTRL_0_ADDR_BASE + (idx) * HINIC_EQ_OFF_STRIDE)
+
+#define HINIC_CSR_AEQ_CTRL_1_ADDR(idx)  \
+   (HINIC_AEQ_CTRL_1_ADDR_BASE + (idx) * HINIC_EQ_OFF_STRIDE)
+
+#define HINIC_CSR_AEQ_CONS_IDX_ADDR(idx)\
+   (HINIC_AEQ_CONS_IDX_ADDR_BASE + (idx) * HINIC_EQ_OFF_STRIDE)
+
+#define HINIC_CSR_AEQ_PROD_IDX_ADDR(idx)\
+   (HINIC_AEQ_PROD_IDX_ADDR_BASE + (idx) * HINIC_EQ_OFF_STRIDE)
+
 #endif
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c 
b/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c
index 09d881b..afeab36 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c
@@ -13,17 +13,74 @@
  *
  */
 
+#include 
 #include 
 #include 
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
 
+#include "hinic_hw_csr.h"
 #include "hinic_hw_if.h"
 #include "hinic_hw_eqs.h"
 
 #define HINIC_EQS_WQ_NAME   "hinic_eqs"
 
+#define GET_EQ_NUM_PAGES(eq, pg_size)   \
+   (ALIGN((eq)->q_len * (eq)->elem_size, pg_size) / (pg_size))
+
+#define GET_EQ_NUM_ELEMS_IN_PG(eq, pg_size) ((pg_size) / (eq)->elem_size)
+
+#define EQ_CONS_IDX_REG_ADDR(eq)HINIC_CSR_AEQ_CONS_IDX_ADDR((eq)->q_id)
+#define EQ_PROD_IDX_REG_ADDR(eq)HINIC_CSR_AEQ_PROD_IDX_ADDR((eq)->q_id)
+
+#define EQ_HI_PHYS_ADDR_REG(eq, pg_num) \
+   HINIC_CSR_AEQ_HI_PHYS_ADDR_REG((eq)->q_id, pg_num)
+
+#define EQ_LO_PHYS_ADDR_REG(eq, pg_num) \
+   HINIC_CSR_AEQ_LO_PHYS_ADDR_REG((eq)->q_id, pg_num)
+
+#define GET_EQ_ELEMENT(eq, idx) \
+   ((eq)->virt_addr[(idx) / (eq)->num_elem_in_pg] + \
+(((idx) & ((eq)->num_elem_in_pg - 1)) * (eq)->elem_size))
+
+#define GET_AEQ_ELEM(eq, idx)   ((struct hinic_aeq_elem *) \
+   GET_EQ_ELEMENT(eq, idx))
+
+#define GET_CURR_AEQ_ELEM(eq)   GET_AEQ_ELEM(eq, (eq)->cons_idx)
+
+#define PAGE_IN_4K(page_size)   ((page_size) >> 12)
+#define EQ_SET_HW_PAGE_SIZE_VAL(eq) (ilog2(PAGE_IN_4K((eq)->page_size)))
+
+#define ELEMENT_SIZE_IN_32B(eq) (((eq)->elem_size) >> 5)
+#define EQ_SET_HW_ELEM_SIZE_VAL(eq) (ilog2(ELEMENT_SIZE_IN_32B(eq)))
+
+#define EQ_MAX_PAGES8
+
+#define aeq_to_aeqs(eq) \
+   container_of((eq) - (eq)->q_id, struct hinic_aeqs, aeq[0])
+
+#define 

[PATCH V2 net-next 06/21] net-next/hinic: Add api cmd commands

2017-07-19 Thread Aviad Krawczyk
Add the api cmd commands for sending management messages to the nic.

Signed-off-by: Aviad Krawczyk 
Signed-off-by: Zhao Chen 
---
 .../net/ethernet/huawei/hinic/hinic_hw_api_cmd.c   | 331 -
 .../net/ethernet/huawei/hinic/hinic_hw_api_cmd.h   |  65 
 drivers/net/ethernet/huawei/hinic/hinic_hw_csr.h   |   7 +
 3 files changed, 401 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.c 
b/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.c
index eacb33b..5627191 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.c
@@ -24,7 +24,9 @@
 #include 
 #include 
 #include 
+#include 
 #include 
+#include 
 
 #include "hinic_hw_csr.h"
 #include "hinic_hw_if.h"
@@ -44,14 +46,314 @@
 
 #define API_CMD_BUF_SIZE2048
 
+/* Sizes of the members in hinic_api_cmd_cell */
+#define API_CMD_CELL_DESC_SIZE  8
+#define API_CMD_CELL_DATA_ADDR_SIZE 8
+
+#define API_CMD_CELL_ALIGNMENT  8
+
 #define API_CMD_TIMEOUT 1000
 
+#define MASKED_IDX(chain, idx)  ((idx) & ((chain)->num_cells - 1))
+
+#define SIZE_8BYTES(size)   (ALIGN((size), 8) >> 3)
+#define SIZE_4BYTES(size)   (ALIGN((size), 4) >> 2)
+
+#define RD_DMA_ATTR_DEFAULT 0
+#define WR_DMA_ATTR_DEFAULT 0
+
+enum api_cmd_data_format {
+   SGE_DATA = 1,   /* cell data is passed by hw address */
+};
+
+enum api_cmd_type {
+   API_CMD_WRITE = 0,
+};
+
+enum api_cmd_bypass {
+   NO_BYPASS   = 0,
+   BYPASS  = 1,
+};
+
 enum api_cmd_xor_chk_level {
XOR_CHK_DIS = 0,
 
XOR_CHK_ALL = 3,
 };
 
+static u8 xor_chksum_set(void *data)
+{
+   int idx;
+   u8 *val, checksum = 0;
+
+   val = data;
+
+   for (idx = 0; idx < 7; idx++)
+   checksum ^= val[idx];
+
+   return checksum;
+}
+
+static void set_prod_idx(struct hinic_api_cmd_chain *chain)
+{
+   struct hinic_hwif *hwif = chain->hwif;
+   enum hinic_api_cmd_chain_type chain_type = chain->chain_type;
+   u32 hw_prod_idx_addr = HINIC_CSR_API_CMD_CHAIN_PI_ADDR(chain_type);
+   u32 prod_idx;
+
+   prod_idx = hinic_hwif_read_reg(hwif, hw_prod_idx_addr);
+
+   prod_idx = HINIC_API_CMD_PI_CLEAR(prod_idx, IDX);
+
+   prod_idx |= HINIC_API_CMD_PI_SET(chain->prod_idx, IDX);
+
+   hinic_hwif_write_reg(hwif, hw_prod_idx_addr, prod_idx);
+}
+
+static u32 get_hw_cons_idx(struct hinic_api_cmd_chain *chain)
+{
+   u32 addr, val;
+
+   addr = HINIC_CSR_API_CMD_STATUS_ADDR(chain->chain_type);
+   val  = hinic_hwif_read_reg(chain->hwif, addr);
+
+   return HINIC_API_CMD_STATUS_GET(val, CONS_IDX);
+}
+
+/**
+ * chain_busy - check if the chain is still processing last requests
+ * @chain: chain to check
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int chain_busy(struct hinic_api_cmd_chain *chain)
+{
+   struct hinic_hwif *hwif = chain->hwif;
+   struct pci_dev *pdev = hwif->pdev;
+   u32 prod_idx;
+
+   switch (chain->chain_type) {
+   case HINIC_API_CMD_WRITE_TO_MGMT_CPU:
+   chain->cons_idx = get_hw_cons_idx(chain);
+   prod_idx = chain->prod_idx;
+
+   /* check for a space for a new command */
+   if (chain->cons_idx == MASKED_IDX(chain, prod_idx + 1)) {
+   dev_err(>dev, "API CMD chain %d is busy\n",
+   chain->chain_type);
+   return -EBUSY;
+   }
+   break;
+
+   default:
+   dev_err(>dev, "Unknown API CMD Chain type\n");
+   break;
+   }
+
+   return 0;
+}
+
+/**
+ * get_cell_data_size - get the data size of a specific cell type
+ * @type: chain type
+ *
+ * Return the data(Desc + Address) size in the cell
+ **/
+static u8 get_cell_data_size(enum hinic_api_cmd_chain_type type)
+{
+   u8 cell_data_size = 0;
+
+   switch (type) {
+   case HINIC_API_CMD_WRITE_TO_MGMT_CPU:
+   cell_data_size = ALIGN(API_CMD_CELL_DESC_SIZE +
+  API_CMD_CELL_DATA_ADDR_SIZE,
+  API_CMD_CELL_ALIGNMENT);
+   break;
+   default:
+   break;
+   }
+
+   return cell_data_size;
+}
+
+/**
+ * prepare_cell_ctrl - prepare the ctrl of the cell for the command
+ * @cell_ctrl: the control of the cell to set the control value into it
+ * @data_size: the size of the data in the cell
+ **/
+static void prepare_cell_ctrl(u64 *cell_ctrl, u16 data_size)
+{
+   u64 ctrl;
+   u8 chksum;
+
+   ctrl =  HINIC_API_CMD_CELL_CTRL_SET(SIZE_8BYTES(data_size), DATA_SZ) |
+   HINIC_API_CMD_CELL_CTRL_SET(RD_DMA_ATTR_DEFAULT, RD_DMA_ATTR) |
+   HINIC_API_CMD_CELL_CTRL_SET(WR_DMA_ATTR_DEFAULT, WR_DMA_ATTR);
+
+  

[PATCH V2 net-next 09/21] net-next/hinic: Add Rx mode and link event handler

2017-07-19 Thread Aviad Krawczyk
Add port management message for setting Rx mode in the card, used for
rx_mode netdev operation. The link event handler is used for getting
a notification about the link state.

Signed-off-by: Aviad Krawczyk 
Signed-off-by: Zhao Chen 
---
 drivers/net/ethernet/huawei/hinic/hinic_dev.h |  17 ++
 drivers/net/ethernet/huawei/hinic/hinic_hw_csr.h  |   2 +
 drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c  | 120 +
 drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h  |  63 -
 drivers/net/ethernet/huawei/hinic/hinic_hw_if.c   |  17 ++
 drivers/net/ethernet/huawei/hinic/hinic_hw_if.h   |  17 ++
 drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c |  69 +-
 drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.h |  28 +++
 drivers/net/ethernet/huawei/hinic/hinic_main.c| 285 +-
 drivers/net/ethernet/huawei/hinic/hinic_port.c| 101 
 drivers/net/ethernet/huawei/hinic/hinic_port.h|  66 +
 11 files changed, 767 insertions(+), 18 deletions(-)

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_dev.h 
b/drivers/net/ethernet/huawei/hinic/hinic_dev.h
index dd540b4..91a3356 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_dev.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_dev.h
@@ -19,19 +19,36 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 #include "hinic_hw_dev.h"
 
 #define HINIC_DRV_NAME  "HiNIC"
 
+enum hinic_flags {
+   HINIC_LINK_UP = BIT(0),
+   HINIC_INTF_UP = BIT(1),
+};
+
+struct hinic_rx_mode_work {
+   struct work_struct  work;
+   u32 rx_mode;
+};
+
 struct hinic_dev {
struct net_device   *netdev;
struct hinic_hwdev  *hwdev;
 
u32 msg_enable;
 
+   unsigned intflags;
+
struct semaphoremgmt_lock;
unsigned long   *vlan_bitmap;
+
+   struct hinic_rx_mode_work   rx_mode_work;
+   struct workqueue_struct *workq;
 };
 
 #endif
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_csr.h 
b/drivers/net/ethernet/huawei/hinic/hinic_hw_csr.h
index 52eb89c..1f57301 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_csr.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_csr.h
@@ -20,6 +20,8 @@
 #define HINIC_CSR_FUNC_ATTR0_ADDR   0x0
 #define HINIC_CSR_FUNC_ATTR1_ADDR   0x4
 
+#define HINIC_CSR_FUNC_ATTR5_ADDR   0x14
+
 #define HINIC_DMA_ATTR_BASE 0xC80
 #define HINIC_ELECTION_BASE 0x4200
 
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c 
b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
index 52d9bb7..288b4b0 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
@@ -232,6 +232,114 @@ int hinic_port_msg_cmd(struct hinic_hwdev *hwdev, enum 
hinic_port_cmd cmd,
 }
 
 /**
+ * hinic_hwdev_cb_register - register callback handler for MGMT events
+ * @hwdev: the NIC HW device
+ * @cmd: the mgmt event
+ * @handle: private data for the handler
+ * @handler: event handler
+ **/
+void hinic_hwdev_cb_register(struct hinic_hwdev *hwdev,
+enum hinic_mgmt_msg_cmd cmd, void *handle,
+void (*handler)(void *handle, void *buf_in,
+u16 in_size, void *buf_out,
+u16 *out_size))
+{
+   struct hinic_hwif *hwif = hwdev->hwif;
+   struct pci_dev *pdev = hwif->pdev;
+   struct hinic_pfhwdev *pfhwdev;
+   struct hinic_nic_cb *nic_cb;
+   u8 cmd_cb;
+
+   if (!HINIC_IS_PF(hwif) && !HINIC_IS_PPF(hwif)) {
+   dev_err(>dev, "unsupported PCI Function type\n");
+   return;
+   }
+
+   pfhwdev = container_of(hwdev, struct hinic_pfhwdev, hwdev);
+
+   cmd_cb = cmd - HINIC_MGMT_MSG_CMD_BASE;
+   nic_cb = >nic_cb[cmd_cb];
+
+   nic_cb->handler = handler;
+   nic_cb->handle = handle;
+   nic_cb->cb_state = HINIC_CB_ENABLED;
+}
+
+/**
+ * hinic_hwdev_cb_unregister - unregister callback handler for MGMT events
+ * @hwdev: the NIC HW device
+ * @cmd: the mgmt event
+ **/
+void hinic_hwdev_cb_unregister(struct hinic_hwdev *hwdev,
+  enum hinic_mgmt_msg_cmd cmd)
+{
+   struct hinic_hwif *hwif = hwdev->hwif;
+   struct pci_dev *pdev = hwif->pdev;
+   struct hinic_pfhwdev *pfhwdev;
+   struct hinic_nic_cb *nic_cb;
+   u8 cmd_cb;
+
+   if (!HINIC_IS_PF(hwif) && !HINIC_IS_PPF(hwif)) {
+   dev_err(>dev, "unsupported PCI Function type\n");
+   return;
+   }
+
+   pfhwdev = container_of(hwdev, struct hinic_pfhwdev, hwdev);
+
+   cmd_cb = cmd - HINIC_MGMT_MSG_CMD_BASE;
+   nic_cb = >nic_cb[cmd_cb];
+
+   nic_cb->cb_state &= 

[PATCH V2 net-next 16/21] net-next/hinic: Add cmdq commands

2017-07-19 Thread Aviad Krawczyk
Add cmdq commands for setting queue pair contexts in the nic.

Signed-off-by: Aviad Krawczyk 
Signed-off-by: Zhao Chen 
---
 drivers/net/ethernet/huawei/hinic/hinic_common.c  |  25 ++
 drivers/net/ethernet/huawei/hinic/hinic_common.h  |  15 ++
 drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c | 284 +-
 drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.h | 153 
 drivers/net/ethernet/huawei/hinic/hinic_hw_io.h   |   9 +
 drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c   | 193 +++
 drivers/net/ethernet/huawei/hinic/hinic_hw_wq.h   |   8 +
 7 files changed, 685 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_common.c 
b/drivers/net/ethernet/huawei/hinic/hinic_common.c
index 3b439e9..07d264c 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_common.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_common.c
@@ -13,6 +13,7 @@
  *
  */
 
+#include 
 #include 
 #include 
 
@@ -53,3 +54,27 @@ void hinic_be32_to_cpu(void *data, int len)
mem++;
}
 }
+
+/**
+ * hinic_set_sge - set dma area in scatter gather entry
+ * @sge: scatter gather entry
+ * @addr: dma address
+ * @len: length of relevant data in the dma address
+ **/
+void hinic_set_sge(struct hinic_sge *sge, dma_addr_t addr, int len)
+{
+   sge->hi_addr = upper_32_bits(addr);
+   sge->lo_addr = lower_32_bits(addr);
+   sge->len  = len;
+}
+
+/**
+ * hinic_sge_to_dma - get dma address from scatter gather entry
+ * @sge: scatter gather entry
+ *
+ * Return dma address of sg entry
+ **/
+dma_addr_t hinic_sge_to_dma(struct hinic_sge *sge)
+{
+   return (dma_addr_t)u64)sge->hi_addr) << 32) | sge->lo_addr);
+}
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_common.h 
b/drivers/net/ethernet/huawei/hinic/hinic_common.h
index 21921ec..2c06b76 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_common.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_common.h
@@ -16,8 +16,23 @@
 #ifndef HINIC_COMMON_H
 #define HINIC_COMMON_H
 
+#include 
+
+#define UPPER_8_BITS(data)  (((data) >> 8) & 0xFF)
+#define LOWER_8_BITS(data)  ((data) & 0xFF)
+
+struct hinic_sge {
+   u32 hi_addr;
+   u32 lo_addr;
+   u32 len;
+};
+
 void hinic_cpu_to_be32(void *data, int len);
 
 void hinic_be32_to_cpu(void *data, int len);
 
+void hinic_set_sge(struct hinic_sge *sge, dma_addr_t addr, int len);
+
+dma_addr_t hinic_sge_to_dma(struct hinic_sge *sge);
+
 #endif
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c 
b/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c
index 7099341..1bc51d7 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c
@@ -24,8 +24,12 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 #include 
+#include 
 
+#include "hinic_common.h"
 #include "hinic_hw_if.h"
 #include "hinic_hw_eqs.h"
 #include "hinic_hw_mgmt.h"
@@ -34,9 +38,18 @@
 #include "hinic_hw_io.h"
 #include "hinic_hw_dev.h"
 
+#define CMDQ_DB_PI_OFF(pi)  (((u16)LOWER_8_BITS(pi)) << 3)
+
+#define CMDQ_DB_ADDR(db_base, pi)   ((db_base) + CMDQ_DB_PI_OFF(pi))
+
+#define CMDQ_WQE_HEADER(wqe)((struct hinic_cmdq_header *)(wqe))
+
+#define FIRST_DATA_TO_WRITE_LASTsizeof(u64)
+
 #define CMDQ_DB_OFF SZ_2K
 
 #define CMDQ_WQEBB_SIZE 64
+#define CMDQ_WQE_SIZE   64
 #define CMDQ_DEPTH  SZ_4K
 
 #define CMDQ_WQ_PAGE_SIZE   SZ_4K
@@ -44,6 +57,10 @@
 #define WQE_LCMD_SIZE   64
 #define WQE_SCMD_SIZE   64
 
+#define COMPLETE_LEN3
+
+#define CMDQ_TIMEOUT1000
+
 #define CMDQ_PFN(addr, page_size)   ((addr) >> (ilog2(page_size)))
 
 #define cmdq_to_cmdqs(cmdq) container_of((cmdq) - (cmdq)->cmdq_type, \
@@ -58,6 +75,40 @@ enum cmdq_wqe_type {
WQE_SCMD_TYPE,
 };
 
+enum completion_format {
+   COMPLETE_DIRECT,
+   COMPLETE_SGE,
+};
+
+enum data_format {
+   DATA_SGE,
+   DATA_DIRECT,
+};
+
+enum bufdesc_len {
+   BUFDESC_LCMD_LEN = 2,   /* 16 bytes - 2(8 byte unit) */
+   BUFDESC_SCMD_LEN = 3,   /* 24 bytes - 3(8 byte unit) */
+};
+
+enum ctrl_sect_len {
+   CTRL_SECT_LEN= 1, /* 4 bytes (ctrl) - 1(8 byte unit) */
+   CTRL_DIRECT_SECT_LEN = 2, /* 12 bytes (ctrl + rsvd) - 2(8 byte unit) */
+};
+
+enum cmdq_scmd_type {
+   CMDQ_SET_ARM_CMD = 2,
+};
+
+enum cmdq_cmd_type {
+   CMDQ_CMD_SYNC_DIRECT_RESP,
+   CMDQ_CMD_SYNC_SGE_RESP,
+};
+
+enum completion_request {
+   NO_CEQ,
+   CEQ_SET,
+};
+
 /**
  * hinic_alloc_cmdq_buf - alloc buffer for sending command
  * @cmdqs: the cmdqs
@@ -92,6 +143,226 @@ void hinic_free_cmdq_buf(struct hinic_cmdqs *cmdqs,
pci_pool_free(cmdqs->cmdq_buf_pool, cmdq_buf->buf, cmdq_buf->dma_addr);
 }
 
+static void 

[PATCH V2 net-next 15/21] net-next/hinic: Add ceqs

2017-07-19 Thread Aviad Krawczyk
Initialize the completion event queues and handle ceq events by calling
the registered handlers. Used for cmdq command completion.

Signed-off-by: Aviad Krawczyk 
Signed-off-by: Zhao Chen 
---
 drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c |  17 ++
 drivers/net/ethernet/huawei/hinic/hinic_hw_csr.h  |  29 +++
 drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c  |   7 +-
 drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c  | 290 +-
 drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.h  |  75 ++
 drivers/net/ethernet/huawei/hinic/hinic_hw_io.c   |  15 +-
 drivers/net/ethernet/huawei/hinic/hinic_hw_io.h   |   3 +
 7 files changed, 428 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c 
b/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c
index f3a6e24..7099341 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c
@@ -27,6 +27,7 @@
 #include 
 
 #include "hinic_hw_if.h"
+#include "hinic_hw_eqs.h"
 #include "hinic_hw_mgmt.h"
 #include "hinic_hw_wq.h"
 #include "hinic_hw_cmdq.h"
@@ -110,6 +111,16 @@ int hinic_cmdq_direct_resp(struct hinic_cmdqs *cmdqs,
 }
 
 /**
+ * cmdq_ceq_handler - cmdq completion event handler
+ * @handle: private data for the handler(cmdqs)
+ * @ceqe_data: ceq element data
+ **/
+static void cmdq_ceq_handler(void *handle, u32 ceqe_data)
+{
+   /* should be implemented */
+}
+
+/**
  * cmdq_init_queue_ctxt - init the queue ctxt of a cmdq
  * @cmdq: the cmdq
  * @cmdq_pages: the memory of the queue
@@ -322,6 +333,9 @@ int hinic_init_cmdqs(struct hinic_cmdqs *cmdqs, struct 
hinic_hwif *hwif,
goto cmdq_ctxt_err;
}
 
+   hinic_ceq_register_cb(_to_io->ceqs, HINIC_CEQ_CMDQ, cmdqs,
+ cmdq_ceq_handler);
+
return 0;
 
 cmdq_ctxt_err:
@@ -342,10 +356,13 @@ int hinic_init_cmdqs(struct hinic_cmdqs *cmdqs, struct 
hinic_hwif *hwif,
  **/
 void hinic_free_cmdqs(struct hinic_cmdqs *cmdqs)
 {
+   struct hinic_func_to_io *func_to_io = cmdqs_to_func_to_io(cmdqs);
struct hinic_hwif *hwif = cmdqs->hwif;
struct pci_dev *pdev = hwif->pdev;
enum hinic_cmdq_type cmdq_type;
 
+   hinic_ceq_unregister_cb(_to_io->ceqs, HINIC_CEQ_CMDQ);
+
cmdq_type = HINIC_CMDQ_SYNC;
for (; cmdq_type < HINIC_MAX_CMDQ_TYPES; cmdq_type++)
free_cmdq(>cmdq[cmdq_type]);
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_csr.h 
b/drivers/net/ethernet/huawei/hinic/hinic_hw_csr.h
index 1f57301..10b8c7b 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_csr.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_csr.h
@@ -81,27 +81,44 @@
 
 /* EQ registers */
 #define HINIC_AEQ_MTT_OFF_BASE_ADDR 0x200
+#define HINIC_CEQ_MTT_OFF_BASE_ADDR 0x400
 
 #define HINIC_EQ_MTT_OFF_STRIDE 0x40
 
 #define HINIC_CSR_AEQ_MTT_OFF(id)   \
(HINIC_AEQ_MTT_OFF_BASE_ADDR + (id) * HINIC_EQ_MTT_OFF_STRIDE)
 
+#define HINIC_CSR_CEQ_MTT_OFF(id)   \
+   (HINIC_CEQ_MTT_OFF_BASE_ADDR + (id) * HINIC_EQ_MTT_OFF_STRIDE)
+
 #define HINIC_CSR_EQ_PAGE_OFF_STRIDE8
 
 #define HINIC_CSR_AEQ_HI_PHYS_ADDR_REG(q_id, pg_num)\
(HINIC_CSR_AEQ_MTT_OFF(q_id) + \
 (pg_num) * HINIC_CSR_EQ_PAGE_OFF_STRIDE)
 
+#define HINIC_CSR_CEQ_HI_PHYS_ADDR_REG(q_id, pg_num)\
+   (HINIC_CSR_CEQ_MTT_OFF(q_id) +  \
+(pg_num) * HINIC_CSR_EQ_PAGE_OFF_STRIDE)
+
 #define HINIC_CSR_AEQ_LO_PHYS_ADDR_REG(q_id, pg_num)\
(HINIC_CSR_AEQ_MTT_OFF(q_id) + \
 (pg_num) * HINIC_CSR_EQ_PAGE_OFF_STRIDE + 4)
 
+#define HINIC_CSR_CEQ_LO_PHYS_ADDR_REG(q_id, pg_num)\
+   (HINIC_CSR_CEQ_MTT_OFF(q_id) +  \
+(pg_num) * HINIC_CSR_EQ_PAGE_OFF_STRIDE + 4)
+
 #define HINIC_AEQ_CTRL_0_ADDR_BASE  0xE00
 #define HINIC_AEQ_CTRL_1_ADDR_BASE  0xE04
 #define HINIC_AEQ_CONS_IDX_ADDR_BASE0xE08
 #define HINIC_AEQ_PROD_IDX_ADDR_BASE0xE0C
 
+#define HINIC_CEQ_CTRL_0_ADDR_BASE  0x1000
+#define HINIC_CEQ_CTRL_1_ADDR_BASE  0x1004
+#define HINIC_CEQ_CONS_IDX_ADDR_BASE0x1008
+#define HINIC_CEQ_PROD_IDX_ADDR_BASE0x100C
+
 #define HINIC_EQ_OFF_STRIDE 0x80
 
 #define HINIC_CSR_AEQ_CTRL_0_ADDR(idx)  \
@@ -116,4 +133,16 @@
 #define HINIC_CSR_AEQ_PROD_IDX_ADDR(idx)\
(HINIC_AEQ_PROD_IDX_ADDR_BASE + (idx) * HINIC_EQ_OFF_STRIDE)
 
+#define HINIC_CSR_CEQ_CTRL_0_ADDR(idx)  \
+   (HINIC_CEQ_CTRL_0_ADDR_BASE + (idx) * HINIC_EQ_OFF_STRIDE)
+
+#define HINIC_CSR_CEQ_CTRL_1_ADDR(idx)  \
+   (HINIC_CEQ_CTRL_1_ADDR_BASE + (idx) * HINIC_EQ_OFF_STRIDE)
+
+#define 

[PATCH V2 net-next 10/21] net-next/hinic: Add logical Txq and Rxq

2017-07-19 Thread Aviad Krawczyk
Create the logical queues of the nic.

Signed-off-by: Aviad Krawczyk 
Signed-off-by: Zhao Chen 
---
 drivers/net/ethernet/huawei/hinic/Makefile   |   5 +-
 drivers/net/ethernet/huawei/hinic/hinic_dev.h|   5 +
 drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c | 133 ++
 drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h |  20 +++
 drivers/net/ethernet/huawei/hinic/hinic_hw_io.c  | 144 +++
 drivers/net/ethernet/huawei/hinic/hinic_hw_io.h  |  46 ++
 drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h  |  32 +
 drivers/net/ethernet/huawei/hinic/hinic_main.c   | 172 ++-
 drivers/net/ethernet/huawei/hinic/hinic_rx.c |  72 ++
 drivers/net/ethernet/huawei/hinic/hinic_rx.h |  46 ++
 drivers/net/ethernet/huawei/hinic/hinic_tx.c |  75 ++
 drivers/net/ethernet/huawei/hinic/hinic_tx.h |  49 +++
 12 files changed, 795 insertions(+), 4 deletions(-)
 create mode 100644 drivers/net/ethernet/huawei/hinic/hinic_hw_io.c
 create mode 100644 drivers/net/ethernet/huawei/hinic/hinic_hw_io.h
 create mode 100644 drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h
 create mode 100644 drivers/net/ethernet/huawei/hinic/hinic_rx.c
 create mode 100644 drivers/net/ethernet/huawei/hinic/hinic_rx.h
 create mode 100644 drivers/net/ethernet/huawei/hinic/hinic_tx.c
 create mode 100644 drivers/net/ethernet/huawei/hinic/hinic_tx.h

diff --git a/drivers/net/ethernet/huawei/hinic/Makefile 
b/drivers/net/ethernet/huawei/hinic/Makefile
index 08951a6..ce0787c 100644
--- a/drivers/net/ethernet/huawei/hinic/Makefile
+++ b/drivers/net/ethernet/huawei/hinic/Makefile
@@ -1,4 +1,5 @@
 obj-$(CONFIG_HINIC) += hinic.o
 
-hinic-y := hinic_main.o hinic_port.o hinic_hw_dev.o hinic_hw_mgmt.o \
-  hinic_hw_api_cmd.o hinic_hw_eqs.o hinic_hw_if.o
\ No newline at end of file
+hinic-y := hinic_main.o hinic_tx.o hinic_rx.o hinic_port.o hinic_hw_dev.o \
+  hinic_hw_io.o hinic_hw_mgmt.o hinic_hw_api_cmd.o hinic_hw_eqs.o \
+  hinic_hw_if.o
\ No newline at end of file
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_dev.h 
b/drivers/net/ethernet/huawei/hinic/hinic_dev.h
index 91a3356..72bd5d0 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_dev.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_dev.h
@@ -23,6 +23,8 @@
 #include 
 
 #include "hinic_hw_dev.h"
+#include "hinic_tx.h"
+#include "hinic_rx.h"
 
 #define HINIC_DRV_NAME  "HiNIC"
 
@@ -49,6 +51,9 @@ struct hinic_dev {
 
struct hinic_rx_mode_work   rx_mode_work;
struct workqueue_struct *workq;
+
+   struct hinic_txq*txqs;
+   struct hinic_rxq*rxqs;
 };
 
 #endif
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c 
b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
index 288b4b0..1568d0a 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
@@ -24,6 +24,8 @@
 #include "hinic_hw_if.h"
 #include "hinic_hw_eqs.h"
 #include "hinic_hw_mgmt.h"
+#include "hinic_hw_qp.h"
+#include "hinic_hw_io.h"
 #include "hinic_hw_dev.h"
 
 #define MAX_IRQS(max_qps, num_aeqs, num_ceqs)   \
@@ -232,6 +234,101 @@ int hinic_port_msg_cmd(struct hinic_hwdev *hwdev, enum 
hinic_port_cmd cmd,
 }
 
 /**
+ * get_base_qpn - get the first qp number
+ * @hwdev: the NIC HW device
+ * @base_qpn: returned qp number
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int get_base_qpn(struct hinic_hwdev *hwdev, u16 *base_qpn)
+{
+   struct hinic_hwif *hwif = hwdev->hwif;
+   struct pci_dev *pdev = hwif->pdev;
+   struct hinic_cmd_base_qpn cmd_base_qpn;
+   u16 out_size;
+   int err;
+
+   cmd_base_qpn.func_idx = HINIC_HWIF_GLOB_IDX(hwif);
+
+   err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_GET_GLOBAL_QPN,
+_base_qpn, sizeof(cmd_base_qpn),
+_base_qpn, _size);
+   if (err || (out_size != sizeof(cmd_base_qpn)) || cmd_base_qpn.status) {
+   dev_err(>dev, "Failed to get base qpn, status = %d\n",
+   cmd_base_qpn.status);
+   return -EFAULT;
+   }
+
+   *base_qpn = cmd_base_qpn.qpn;
+   return 0;
+}
+
+/**
+ * hinic_hwdev_ifup - Preparing the HW for passing IO
+ * @hwdev: the NIC HW device
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+int hinic_hwdev_ifup(struct hinic_hwdev *hwdev)
+{
+   struct hinic_func_to_io *func_to_io = >func_to_io;
+   struct hinic_cap *nic_cap = >nic_cap;
+   struct hinic_hwif *hwif = hwdev->hwif;
+   struct pci_dev *pdev = hwif->pdev;
+   int num_qps = nic_cap->num_qps;
+   int max_qps = nic_cap->max_qps;
+   struct msix_entry *sq_msix_entries;
+   struct msix_entry *rq_msix_entries;
+   u16 base_qpn;
+   int err, num_aeqs, num_ceqs;
+
+   err = get_base_qpn(hwdev, _qpn);
+   if (err) {
+   

[PATCH V2 net-next 19/21] net-next/hinic: Add Tx operation

2017-07-19 Thread Aviad Krawczyk
Add transmit operation for sending data by qp operations.

Signed-off-by: Aviad Krawczyk 
Signed-off-by: Zhao Chen 
---
 drivers/net/ethernet/huawei/hinic/hinic_dev.h |   1 +
 drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c  |  47 +++
 drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h  |  22 ++
 drivers/net/ethernet/huawei/hinic/hinic_hw_io.h   |   1 +
 drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.h |   2 +
 drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c   | 244 +
 drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h   | 197 ++
 drivers/net/ethernet/huawei/hinic/hinic_main.c|  12 +-
 drivers/net/ethernet/huawei/hinic/hinic_tx.c  | 419 ++
 drivers/net/ethernet/huawei/hinic/hinic_tx.h  |  11 +
 10 files changed, 954 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_dev.h 
b/drivers/net/ethernet/huawei/hinic/hinic_dev.h
index 5fcde35..d798bab 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_dev.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_dev.h
@@ -43,6 +43,7 @@ struct hinic_dev {
struct hinic_hwdev  *hwdev;
 
u32 msg_enable;
+   unsigned inttx_weight;
unsigned intrx_weight;
 
unsigned intflags;
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c 
b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
index e7481ed..862bfe2 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
@@ -39,6 +39,8 @@
 #define MAX_IRQS(max_qps, num_aeqs, num_ceqs)   \
 (2 * (max_qps) + (num_aeqs) + (num_ceqs))
 
+#define ADDR_IN_4BYTES(addr)((addr) >> 2)
+
 enum intr_type {
INTR_MSIX_TYPE,
 };
@@ -1009,3 +1011,48 @@ int hinic_hwdev_msix_set(struct hinic_hwdev *hwdev, u16 
msix_index,
   lli_timer_cfg, lli_credit_limit,
   resend_timer);
 }
+
+/**
+ * hinic_hwdev_hw_ci_addr_set - set cons idx addr and attributes in HW for sq
+ * @hwdev: the NIC HW device
+ * @sq: send queue
+ * @pending_limit: the maximum pending update ci events (unit 8)
+ * @coalesc_timer: coalesc period for update ci (unit 8 us)
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+int hinic_hwdev_hw_ci_addr_set(struct hinic_hwdev *hwdev, struct hinic_sq *sq,
+  u8 pending_limit, u8 coalesc_timer)
+{
+   struct hinic_hwif *hwif = hwdev->hwif;
+   struct pci_dev *pdev = hwif->pdev;
+   struct hinic_qp *qp = container_of(sq, struct hinic_qp, sq);
+   struct hinic_pfhwdev *pfhwdev;
+   struct hinic_cmd_hw_ci hw_ci;
+
+   if (!HINIC_IS_PF(hwif) && !HINIC_IS_PPF(hwif)) {
+   dev_err(>dev, "Unsupported PCI Function type\n");
+   return -EINVAL;
+   }
+
+   hw_ci.dma_attr_off  = 0;
+   hw_ci.pending_limit = pending_limit;
+   hw_ci.coalesc_timer  = coalesc_timer;
+
+   hw_ci.msix_en = 1;
+   hw_ci.msix_entry_idx = sq->msix_entry;
+
+   hw_ci.func_idx = HINIC_HWIF_GLOB_IDX(hwif);
+
+   hw_ci.sq_id = qp->q_id;
+
+   hw_ci.ci_addr = ADDR_IN_4BYTES(sq->hw_ci_dma_addr);
+
+   pfhwdev = container_of(hwdev, struct hinic_pfhwdev, hwdev);
+
+   return hinic_msg_to_mgmt(>pf_to_mgmt,
+HINIC_MOD_COMM,
+HINIC_COMM_CMD_SQ_HI_CI_SET,
+_ci, sizeof(hw_ci), NULL,
+NULL, HINIC_MGMT_MSG_SYNC);
+}
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h 
b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
index 147d5b7..056a458 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
@@ -153,6 +153,25 @@ struct hinic_cmd_base_qpn {
u16 qpn;
 };
 
+struct hinic_cmd_hw_ci {
+   u8  status;
+   u8  version;
+   u8  rsvd0[6];
+
+   u16 func_idx;
+
+   u8  dma_attr_off;
+   u8  pending_limit;
+   u8  coalesc_timer;
+
+   u8  msix_en;
+   u16 msix_entry_idx;
+
+   u32 sq_id;
+   u32 rsvd1;
+   u64 ci_addr;
+};
+
 struct hinic_hwdev {
struct hinic_hwif   *hwif;
struct msix_entry   *msix_entries;
@@ -214,4 +233,7 @@ int hinic_hwdev_msix_set(struct hinic_hwdev *hwdev, u16 
msix_index,
 u8 lli_timer_cfg, u8 lli_credit_limit,
 u8 resend_timer);
 
+int hinic_hwdev_hw_ci_addr_set(struct hinic_hwdev *hwdev, struct hinic_sq *sq,
+  u8 pending_limit, u8 coalesc_timer);
+
 #endif
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_io.h 
b/drivers/net/ethernet/huawei/hinic/hinic_hw_io.h
index 6f40e6a..adb6417 100644
--- 

[PATCH V2 net-next 21/21] net-next/hinic: Add select_queue and netpoll

2017-07-19 Thread Aviad Krawczyk
Add more netdev operations.

Signed-off-by: Aviad Krawczyk 
Signed-off-by: Zhao Chen 
---
 MAINTAINERS|  7 ++
 drivers/net/ethernet/huawei/hinic/hinic_main.c | 35 ++
 2 files changed, 42 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 1347726..5c85602 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6269,6 +6269,13 @@ L:   linux-in...@vger.kernel.org
 S: Maintained
 F: drivers/input/touchscreen/htcpen.c
 
+HUAWEI ETHERNET DRIVER
+M: Aviad Krawczyk 
+L: netdev@vger.kernel.org
+S: Supported
+F: Documentation/networking/hinic.txt
+F: drivers/net/ethernet/huawei/*
+
 HUGETLB FILESYSTEM
 M: Nadia Yvette Chambers 
 S: Maintained
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c 
b/drivers/net/ethernet/huawei/hinic/hinic_main.c
index d0f8b9e..9e464cf 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_main.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c
@@ -786,6 +786,37 @@ static void hinic_get_stats64(struct net_device *netdev,
stats->tx_errors = nic_tx_stats->tx_dropped;
 }
 
+static u16 hinic_select_queue(struct net_device *netdev, struct sk_buff *skb,
+ void *accel_priv,
+ select_queue_fallback_t fallback)
+{
+   u16 qid;
+
+   if (skb_rx_queue_recorded(skb))
+   qid = skb_get_rx_queue(skb);
+   else
+   qid = fallback(netdev, skb);
+
+   return qid;
+}
+
+#ifdef CONFIG_NET_POLL_CONTROLLER
+static void hinic_netpoll(struct net_device *netdev)
+{
+   struct hinic_dev *nic_dev = netdev_priv(netdev);
+   struct hinic_hwdev *hwdev = nic_dev->hwdev;
+   int i, num_qps = hinic_hwdev_num_qps(hwdev);
+
+   for (i = 0; i < num_qps; i++) {
+   struct hinic_txq *txq = _dev->txqs[i];
+   struct hinic_rxq *rxq = _dev->rxqs[i];
+
+   napi_schedule(>napi);
+   napi_schedule(>napi);
+   }
+}
+#endif
+
 static const struct net_device_ops hinic_netdev_ops = {
.ndo_open = hinic_open,
.ndo_stop = hinic_close,
@@ -798,6 +829,10 @@ static void hinic_get_stats64(struct net_device *netdev,
.ndo_start_xmit = hinic_xmit_frame,
.ndo_tx_timeout = hinic_tx_timeout,
.ndo_get_stats64 = hinic_get_stats64,
+   .ndo_select_queue = hinic_select_queue,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+   .ndo_poll_controller = hinic_netpoll,
+#endif
 };
 
 static void netdev_features_init(struct net_device *netdev)
-- 
1.9.1



[PATCH V2 net-next 20/21] net-next/hinic: Add ethtool and stats

2017-07-19 Thread Aviad Krawczyk
Add ethtool operations and statistics operations.

Signed-off-by: Aviad Krawczyk 
Signed-off-by: Zhao Chen 
---
 drivers/net/ethernet/huawei/hinic/hinic_dev.h  |   3 +
 drivers/net/ethernet/huawei/hinic/hinic_main.c | 218 -
 drivers/net/ethernet/huawei/hinic/hinic_port.c |  29 
 drivers/net/ethernet/huawei/hinic/hinic_port.h |  45 +
 drivers/net/ethernet/huawei/hinic/hinic_rx.c   |  19 +++
 drivers/net/ethernet/huawei/hinic/hinic_rx.h   |   2 +
 drivers/net/ethernet/huawei/hinic/hinic_tx.c   |  22 +++
 drivers/net/ethernet/huawei/hinic/hinic_tx.h   |   2 +
 8 files changed, 339 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_dev.h 
b/drivers/net/ethernet/huawei/hinic/hinic_dev.h
index d798bab..4dc1b09 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_dev.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_dev.h
@@ -56,6 +56,9 @@ struct hinic_dev {
 
struct hinic_txq*txqs;
struct hinic_rxq*rxqs;
+
+   struct hinic_txq_stats  tx_stats;
+   struct hinic_rxq_stats  rx_stats;
 };
 
 #endif
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c 
b/drivers/net/ethernet/huawei/hinic/hinic_main.c
index ef40c7d..d0f8b9e 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_main.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c
@@ -67,6 +67,178 @@
 
 static int change_mac_addr(struct net_device *netdev, const u8 *addr);
 
+static int hinic_get_link_ksettings(struct net_device *netdev,
+   struct ethtool_link_ksettings
+   *link_ksettings)
+{
+   struct hinic_dev *nic_dev = netdev_priv(netdev);
+   struct hinic_port_cap port_cap;
+   enum hinic_autoneg_cap autoneg_cap;
+   enum hinic_autoneg_state autoneg_state;
+   enum hinic_port_link_state link_state;
+   int err;
+
+   ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising);
+   ethtool_link_ksettings_add_link_mode(link_ksettings, supported,
+Autoneg);
+
+   link_ksettings->base.speed = SPEED_UNKNOWN;
+   link_ksettings->base.autoneg = AUTONEG_DISABLE;
+   link_ksettings->base.duplex = DUPLEX_UNKNOWN;
+
+   err = hinic_port_get_cap(nic_dev, _cap);
+   if (err) {
+   dev_err(>dev, "Failed to get port capabilities\n");
+   return err;
+   }
+
+   err = hinic_port_link_state(nic_dev, _state);
+   if (err) {
+   dev_err(>dev, "Failed to get port link state\n");
+   return err;
+   }
+
+   if (link_state != HINIC_LINK_STATE_UP) {
+   dev_info(>dev, "No link\n");
+   return err;
+   }
+
+   switch (port_cap.speed) {
+   case HINIC_SPEED_10MB_LINK:
+   link_ksettings->base.speed = SPEED_10;
+   break;
+
+   case HINIC_SPEED_100MB_LINK:
+   link_ksettings->base.speed = SPEED_100;
+   break;
+
+   case HINIC_SPEED_1000MB_LINK:
+   link_ksettings->base.speed = SPEED_1000;
+   break;
+
+   case HINIC_SPEED_10GB_LINK:
+   link_ksettings->base.speed = SPEED_1;
+   break;
+
+   case HINIC_SPEED_25GB_LINK:
+   link_ksettings->base.speed = SPEED_25000;
+   break;
+
+   case HINIC_SPEED_40GB_LINK:
+   link_ksettings->base.speed = SPEED_4;
+   break;
+
+   case HINIC_SPEED_100GB_LINK:
+   link_ksettings->base.speed = SPEED_10;
+   break;
+
+   default:
+   link_ksettings->base.speed = SPEED_UNKNOWN;
+   break;
+   }
+
+   autoneg_cap = port_cap.autoneg_cap;
+   autoneg_state = port_cap.autoneg_state;
+
+   if (!!(autoneg_cap & HINIC_AUTONEG_SUPPORTED))
+   ethtool_link_ksettings_add_link_mode(link_ksettings,
+advertising, Autoneg);
+
+   link_ksettings->base.autoneg = (autoneg_state == HINIC_AUTONEG_ACTIVE) ?
+  AUTONEG_ENABLE : AUTONEG_DISABLE;
+   link_ksettings->base.duplex = (port_cap.duplex == HINIC_DUPLEX_FULL) ?
+ DUPLEX_FULL : DUPLEX_HALF;
+
+   return 0;
+}
+
+static void hinic_get_drvinfo(struct net_device *netdev,
+ struct ethtool_drvinfo *info)
+{
+   struct hinic_dev *nic_dev = netdev_priv(netdev);
+   struct hinic_hwdev *hwdev = nic_dev->hwdev;
+   struct hinic_hwif *hwif = hwdev->hwif;
+   struct pci_dev *pdev = hwif->pdev;
+
+   strlcpy(info->driver, HINIC_DRV_NAME, sizeof(info->driver));
+   strlcpy(info->bus_info, pci_name(pdev), sizeof(info->bus_info));
+}
+
+static void hinic_get_ringparam(struct net_device *netdev,
+   struct 

[PATCH V2 net-next 18/21] net-next/hinic: Add Rx handler

2017-07-19 Thread Aviad Krawczyk
Set the io resources in the nic and handle rx events by qp operations.

Signed-off-by: Aviad Krawczyk 
Signed-off-by: Zhao Chen 
---
 drivers/net/ethernet/huawei/hinic/hinic_dev.h |   1 +
 drivers/net/ethernet/huawei/hinic/hinic_hw_csr.h  |   1 +
 drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c  | 361 ++
 drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h  |  77 
 drivers/net/ethernet/huawei/hinic/hinic_hw_if.c   |  36 ++
 drivers/net/ethernet/huawei/hinic/hinic_hw_if.h   |  35 ++
 drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.h |  15 +-
 drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c   | 195 ++
 drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h   |  81 
 drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c   |  12 +
 drivers/net/ethernet/huawei/hinic/hinic_hw_wq.h   |   2 +
 drivers/net/ethernet/huawei/hinic/hinic_main.c|  24 ++
 drivers/net/ethernet/huawei/hinic/hinic_port.c|  32 ++
 drivers/net/ethernet/huawei/hinic/hinic_port.h|  19 +
 drivers/net/ethernet/huawei/hinic/hinic_rx.c  | 427 ++
 drivers/net/ethernet/huawei/hinic/hinic_rx.h  |   7 +
 16 files changed, 1323 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_dev.h 
b/drivers/net/ethernet/huawei/hinic/hinic_dev.h
index 72bd5d0..5fcde35 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_dev.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_dev.h
@@ -43,6 +43,7 @@ struct hinic_dev {
struct hinic_hwdev  *hwdev;
 
u32 msg_enable;
+   unsigned intrx_weight;
 
unsigned intflags;
 
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_csr.h 
b/drivers/net/ethernet/huawei/hinic/hinic_hw_csr.h
index 10b8c7b..f39b184 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_csr.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_csr.h
@@ -20,6 +20,7 @@
 #define HINIC_CSR_FUNC_ATTR0_ADDR   0x0
 #define HINIC_CSR_FUNC_ATTR1_ADDR   0x4
 
+#define HINIC_CSR_FUNC_ATTR4_ADDR   0x10
 #define HINIC_CSR_FUNC_ATTR5_ADDR   0x14
 
 #define HINIC_DMA_ATTR_BASE 0xC80
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c 
b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
index 8c21b3a..e7481ed 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
@@ -20,6 +20,9 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
 
 #include "hinic_hw_if.h"
 #include "hinic_hw_eqs.h"
@@ -29,6 +32,10 @@
 #include "hinic_hw_io.h"
 #include "hinic_hw_dev.h"
 
+#define IO_STATUS_TIMEOUT   100
+#define OUTBOUND_STATE_TIMEOUT  100
+#define DB_STATE_TIMEOUT100
+
 #define MAX_IRQS(max_qps, num_aeqs, num_ceqs)   \
 (2 * (max_qps) + (num_aeqs) + (num_ceqs))
 
@@ -36,6 +43,15 @@ enum intr_type {
INTR_MSIX_TYPE,
 };
 
+enum io_status {
+   IO_STOPPED = 0,
+   IO_RUNNING = 1,
+};
+
+enum hw_ioctxt_set_cmdq_depth {
+   HW_IOCTXT_SET_CMDQ_DEPTH_DEFAULT,
+};
+
 /* HW struct */
 struct hinic_dev_cap {
u8  status;
@@ -50,6 +66,31 @@ struct hinic_dev_cap {
u8  rsvd3[208];
 };
 
+struct rx_buf_sz {
+   int idx;
+   size_t  sz;
+};
+
+static struct rx_buf_sz rx_buf_sz_table[] = {
+   {0, 32},
+   {1, 64},
+   {2, 96},
+   {3, 128},
+   {4, 192},
+   {5, 256},
+   {6, 384},
+   {7, 512},
+   {8, 768},
+   {9, 1024},
+   {10, 1536},
+   {11, 2048},
+   {12, 3072},
+   {13, 4096},
+   {14, 8192},
+   {15, 16384},
+   {-1, -1},
+};
+
 /**
  * get_capability - convert device capabilities to NIC capabilities
  * @hwdev: the HW device to set and convert device capabilities for
@@ -238,6 +279,252 @@ int hinic_port_msg_cmd(struct hinic_hwdev *hwdev, enum 
hinic_port_cmd cmd,
 }
 
 /**
+ * init_fw_ctxt- Init Firmware tables before network mgmt and io operations
+ * @hwdev: the NIC HW device
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int init_fw_ctxt(struct hinic_hwdev *hwdev)
+{
+   struct hinic_hwif *hwif = hwdev->hwif;
+   struct pci_dev *pdev = hwif->pdev;
+   struct hinic_pfhwdev *pfhwdev;
+   struct hinic_cmd_fw_ctxt fw_ctxt;
+   u16 out_size;
+   int err;
+
+   if (!HINIC_IS_PF(hwif) && !HINIC_IS_PPF(hwif)) {
+   dev_err(>dev, "Unsupported PCI Function type\n");
+   return -EINVAL;
+   }
+
+   fw_ctxt.func_idx = HINIC_HWIF_GLOB_IDX(hwif);
+   fw_ctxt.rx_buf_sz = HINIC_RX_BUF_SZ;
+
+   pfhwdev = container_of(hwdev, struct hinic_pfhwdev, hwdev);
+
+   err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_FWCTXT_INIT,
+_ctxt, sizeof(fw_ctxt),
+

[PATCH V2 net-next 17/21] net-next/hinic: Add cmdq completion handler

2017-07-19 Thread Aviad Krawczyk
Add cmdq completion handler for getting a notification about the
completion of cmdq commands.

Signed-off-by: Aviad Krawczyk 
Signed-off-by: Zhao Chen 
---
 drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c | 286 +-
 drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.h |  12 +
 2 files changed, 297 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c 
b/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c
index 1bc51d7..5761e74 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c
@@ -38,12 +38,31 @@
 #include "hinic_hw_io.h"
 #include "hinic_hw_dev.h"
 
+#define CMDQ_CEQE_TYPE_SHIFT0
+
+#define CMDQ_CEQE_TYPE_MASK 0x7
+
+#define CMDQ_CEQE_GET(val, member)  \
+   (((val) >> CMDQ_CEQE_##member##_SHIFT) \
+& CMDQ_CEQE_##member##_MASK)
+
+#define CMDQ_WQE_ERRCODE_VAL_SHIFT  20
+
+#define CMDQ_WQE_ERRCODE_VAL_MASK   0xF
+
+#define CMDQ_WQE_ERRCODE_GET(val, member)   \
+   (((val) >> CMDQ_WQE_ERRCODE_##member##_SHIFT) \
+& CMDQ_WQE_ERRCODE_##member##_MASK)
+
 #define CMDQ_DB_PI_OFF(pi)  (((u16)LOWER_8_BITS(pi)) << 3)
 
 #define CMDQ_DB_ADDR(db_base, pi)   ((db_base) + CMDQ_DB_PI_OFF(pi))
 
 #define CMDQ_WQE_HEADER(wqe)((struct hinic_cmdq_header *)(wqe))
 
+#define CMDQ_WQE_COMPLETED(ctrl_info)   \
+   HINIC_CMDQ_CTRL_GET(ctrl_info, HW_BUSY_BIT)
+
 #define FIRST_DATA_TO_WRITE_LASTsizeof(u64)
 
 #define CMDQ_DB_OFF SZ_2K
@@ -109,6 +128,9 @@ enum completion_request {
CEQ_SET,
 };
 
+static void clear_wqe_complete_bit(struct hinic_cmdq *cmdq,
+  struct hinic_cmdq_wqe *wqe);
+
 /**
  * hinic_alloc_cmdq_buf - alloc buffer for sending command
  * @cmdqs: the cmdqs
@@ -143,6 +165,22 @@ void hinic_free_cmdq_buf(struct hinic_cmdqs *cmdqs,
pci_pool_free(cmdqs->cmdq_buf_pool, cmdq_buf->buf, cmdq_buf->dma_addr);
 }
 
+static int cmdq_wqe_size_from_bdlen(enum bufdesc_len len)
+{
+   int wqe_size = 0;
+
+   switch (len) {
+   case BUFDESC_LCMD_LEN:
+   wqe_size = WQE_LCMD_SIZE;
+   break;
+   case BUFDESC_SCMD_LEN:
+   wqe_size = WQE_SCMD_SIZE;
+   break;
+   }
+
+   return wqe_size;
+}
+
 static void cmdq_set_sge_completion(struct hinic_cmdq_completion *completion,
struct hinic_cmdq_buf *buf_out)
 {
@@ -209,6 +247,15 @@ static void cmdq_set_lcmd_bufdesc(struct 
hinic_cmdq_wqe_lcmd *wqe_lcmd,
hinic_set_sge(_lcmd->buf_desc.sge, buf_in->dma_addr, buf_in->size);
 }
 
+static void cmdq_set_direct_wqe_data(struct hinic_cmdq_direct_wqe *wqe,
+void *buf_in, u32 in_size)
+{
+   struct hinic_cmdq_wqe_scmd *wqe_scmd = >wqe_scmd;
+
+   wqe_scmd->buf_desc.buf_len = in_size;
+   memcpy(wqe_scmd->buf_desc.data, buf_in, in_size);
+}
+
 static void cmdq_set_lcmd_wqe(struct hinic_cmdq_wqe *wqe,
  enum cmdq_cmd_type cmd_type,
  struct hinic_cmdq_buf *buf_in,
@@ -237,6 +284,34 @@ static void cmdq_set_lcmd_wqe(struct hinic_cmdq_wqe *wqe,
cmdq_set_lcmd_bufdesc(wqe_lcmd, buf_in);
 }
 
+static void cmdq_set_direct_wqe(struct hinic_cmdq_wqe *wqe,
+   enum cmdq_cmd_type cmd_type,
+   void *buf_in, u16 in_size,
+   struct hinic_cmdq_buf *buf_out, int wrapped,
+   enum hinic_cmd_ack_type ack_type,
+   enum hinic_mod_type mod, u8 cmd, u16 prod_idx)
+{
+   struct hinic_cmdq_direct_wqe *direct_wqe = >direct_wqe;
+   struct hinic_cmdq_wqe_scmd *wqe_scmd = _wqe->wqe_scmd;
+   enum completion_format complete_format;
+
+   switch (cmd_type) {
+   case CMDQ_CMD_SYNC_SGE_RESP:
+   complete_format = COMPLETE_SGE;
+   cmdq_set_sge_completion(_scmd->completion, buf_out);
+   break;
+   case CMDQ_CMD_SYNC_DIRECT_RESP:
+   complete_format = COMPLETE_DIRECT;
+   wqe_scmd->completion.direct_resp = 0;
+   break;
+   }
+
+   cmdq_prepare_wqe_ctrl(wqe, wrapped, ack_type, mod, cmd, prod_idx,
+ complete_format, DATA_DIRECT, BUFDESC_SCMD_LEN);
+
+   cmdq_set_direct_wqe_data(direct_wqe, buf_in, in_size);
+}
+
 static void cmdq_wqe_fill(void *dst, void *src)
 {
memcpy(dst + FIRST_DATA_TO_WRITE_LAST, src + FIRST_DATA_TO_WRITE_LAST,
@@ -355,6 +430,50 @@ static int cmdq_sync_cmd_direct_resp(struct hinic_cmdq 
*cmdq,
return 0;
 }
 
+static int cmdq_set_arm_bit(struct hinic_cmdq *cmdq, void *buf_in,
+   

[PATCH V2 net-next 14/21] net-next/hinic: Initialize cmdq

2017-07-19 Thread Aviad Krawczyk
Create the work queues for cmdq and update the nic about the work queue
contexts. cmdq commands are used for updating the the nic about the qp
contexts.

Signed-off-by: Aviad Krawczyk 
Signed-off-by: Zhao Chen 
---
 drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c | 284 +-
 drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.h |  53 
 drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.h  |   2 +
 drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.h |   5 +
 drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c   | 157 
 drivers/net/ethernet/huawei/hinic/hinic_hw_wq.h   |   8 +
 6 files changed, 503 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c 
b/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c
index 2fd3924..f3a6e24 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c
@@ -13,11 +13,49 @@
  *
  */
 
+#include 
 #include 
 #include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
 
 #include "hinic_hw_if.h"
+#include "hinic_hw_mgmt.h"
+#include "hinic_hw_wq.h"
 #include "hinic_hw_cmdq.h"
+#include "hinic_hw_io.h"
+#include "hinic_hw_dev.h"
+
+#define CMDQ_DB_OFF SZ_2K
+
+#define CMDQ_WQEBB_SIZE 64
+#define CMDQ_DEPTH  SZ_4K
+
+#define CMDQ_WQ_PAGE_SIZE   SZ_4K
+
+#define WQE_LCMD_SIZE   64
+#define WQE_SCMD_SIZE   64
+
+#define CMDQ_PFN(addr, page_size)   ((addr) >> (ilog2(page_size)))
+
+#define cmdq_to_cmdqs(cmdq) container_of((cmdq) - (cmdq)->cmdq_type, \
+struct hinic_cmdqs, cmdq[0])
+
+#define cmdqs_to_func_to_io(cmdqs)  container_of(cmdqs, \
+struct hinic_func_to_io, \
+cmdqs)
+
+enum cmdq_wqe_type {
+   WQE_LCMD_TYPE,
+   WQE_SCMD_TYPE,
+};
 
 /**
  * hinic_alloc_cmdq_buf - alloc buffer for sending command
@@ -29,8 +67,17 @@
 int hinic_alloc_cmdq_buf(struct hinic_cmdqs *cmdqs,
 struct hinic_cmdq_buf *cmdq_buf)
 {
-   /* should be implemented */
-   return -ENOMEM;
+   struct hinic_hwif *hwif = cmdqs->hwif;
+   struct pci_dev *pdev = hwif->pdev;
+
+   cmdq_buf->buf = pci_pool_alloc(cmdqs->cmdq_buf_pool, GFP_KERNEL,
+  _buf->dma_addr);
+   if (!cmdq_buf->buf) {
+   dev_err(>dev, "Failed to allocate cmd from the pool\n");
+   return -ENOMEM;
+   }
+
+   return 0;
 }
 
 /**
@@ -41,7 +88,7 @@ int hinic_alloc_cmdq_buf(struct hinic_cmdqs *cmdqs,
 void hinic_free_cmdq_buf(struct hinic_cmdqs *cmdqs,
 struct hinic_cmdq_buf *cmdq_buf)
 {
-   /* should be implemented */
+   pci_pool_free(cmdqs->cmdq_buf_pool, cmdq_buf->buf, cmdq_buf->dma_addr);
 }
 
 /**
@@ -63,6 +110,172 @@ int hinic_cmdq_direct_resp(struct hinic_cmdqs *cmdqs,
 }
 
 /**
+ * cmdq_init_queue_ctxt - init the queue ctxt of a cmdq
+ * @cmdq: the cmdq
+ * @cmdq_pages: the memory of the queue
+ * @cmdq_ctxt: returned cmdq ctxt
+ **/
+static void cmdq_init_queue_ctxt(struct hinic_cmdq *cmdq,
+struct hinic_cmdq_pages *cmdq_pages,
+struct hinic_cmdq_ctxt *cmdq_ctxt)
+{
+   struct hinic_cmdqs *cmdqs = cmdq_to_cmdqs(cmdq);
+   struct hinic_hwif *hwif = cmdqs->hwif;
+   struct hinic_wq *wq = cmdq->wq;
+   struct hinic_cmdq_ctxt_info *ctxt_info = _ctxt->ctxt_info;
+   u16 start_ci = atomic_read(>cons_idx);
+   u64 wq_first_page_paddr, cmdq_first_block_paddr, pfn;
+
+   /* The data in the HW is in Big Endian Format */
+   wq_first_page_paddr = be64_to_cpu(*wq->block_vaddr);
+
+   pfn = CMDQ_PFN(wq_first_page_paddr, wq->wq_page_size);
+
+   ctxt_info->curr_wqe_page_pfn =
+   HINIC_CMDQ_CTXT_PAGE_INFO_SET(pfn, CURR_WQE_PAGE_PFN)   |
+   HINIC_CMDQ_CTXT_PAGE_INFO_SET(HINIC_CEQ_ID_CMDQ, EQ_ID) |
+   HINIC_CMDQ_CTXT_PAGE_INFO_SET(1, CEQ_ARM)   |
+   HINIC_CMDQ_CTXT_PAGE_INFO_SET(1, CEQ_EN)|
+   HINIC_CMDQ_CTXT_PAGE_INFO_SET(cmdq->wrapped, WRAPPED);
+
+   /* block PFN - Read Modify Write */
+   cmdq_first_block_paddr = cmdq_pages->page_paddr;
+
+   pfn = CMDQ_PFN(cmdq_first_block_paddr, wq->wq_page_size);
+
+   ctxt_info->wq_block_pfn =
+   HINIC_CMDQ_CTXT_BLOCK_INFO_SET(pfn, WQ_BLOCK_PFN) |
+   HINIC_CMDQ_CTXT_BLOCK_INFO_SET(start_ci, CI);
+
+   cmdq_ctxt->func_idx = HINIC_HWIF_GLOB_IDX(hwif);
+   cmdq_ctxt->cmdq_type  = cmdq->cmdq_type;
+}
+
+/**
+ * init_cmdq - initialize cmdq
+ * @cmdq: the cmdq
+ * @wq: the wq attaced to the cmdq
+ * @q_type: the cmdq type of the cmdq
+ * @db_area: doorbell area for 

[PATCH V2 net-next 12/21] net-next/hinic: Add qp resources

2017-07-19 Thread Aviad Krawczyk
Create the resources for queue pair operations: doorbell area,
consumer index address and producer index address.

Signed-off-by: Aviad Krawczyk 
Signed-off-by: Zhao Chen 
---
 drivers/net/ethernet/huawei/hinic/Makefile  |   4 +-
 drivers/net/ethernet/huawei/hinic/hinic_hw_if.h |   1 +
 drivers/net/ethernet/huawei/hinic/hinic_hw_io.c | 167 ++-
 drivers/net/ethernet/huawei/hinic/hinic_hw_io.h |  27 +++
 drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c | 264 
 drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h |  60 +-
 6 files changed, 518 insertions(+), 5 deletions(-)
 create mode 100644 drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c

diff --git a/drivers/net/ethernet/huawei/hinic/Makefile 
b/drivers/net/ethernet/huawei/hinic/Makefile
index 519382b..24728f0 100644
--- a/drivers/net/ethernet/huawei/hinic/Makefile
+++ b/drivers/net/ethernet/huawei/hinic/Makefile
@@ -1,5 +1,5 @@
 obj-$(CONFIG_HINIC) += hinic.o
 
 hinic-y := hinic_main.o hinic_tx.o hinic_rx.o hinic_port.o hinic_hw_dev.o \
-  hinic_hw_io.o hinic_hw_wq.o hinic_hw_mgmt.o hinic_hw_api_cmd.o \
-  hinic_hw_eqs.o hinic_hw_if.o
\ No newline at end of file
+  hinic_hw_io.o hinic_hw_qp.o hinic_hw_wq.o hinic_hw_mgmt.o \
+  hinic_hw_api_cmd.o hinic_hw_eqs.o hinic_hw_if.o
\ No newline at end of file
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h 
b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h
index 88df3c0..89f71e5 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h
@@ -137,6 +137,7 @@
 #define HINIC_IS_PPF(hwif)  (HINIC_FUNC_TYPE(hwif) == HINIC_PPF)
 
 #define HINIC_PCI_CFG_REGS_BAR  0
+#define HINIC_PCI_DB_BAR4
 
 #define HINIC_PCIE_ST_DISABLE   0
 #define HINIC_PCIE_AT_DISABLE   0
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_io.c 
b/drivers/net/ethernet/huawei/hinic/hinic_hw_io.c
index 419fcb0..fa789da 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_io.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_io.c
@@ -13,17 +13,93 @@
  *
  */
 
+#include 
 #include 
 #include 
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
 
 #include "hinic_hw_if.h"
 #include "hinic_hw_wq.h"
 #include "hinic_hw_qp.h"
 #include "hinic_hw_io.h"
 
+#define CI_Q_ADDR_SIZE  sizeof(u32)
+
+#define CI_ADDR(base_addr, q_id)((base_addr) + \
+(q_id) * CI_Q_ADDR_SIZE)
+
+#define CI_TABLE_SIZE(num_qps)  ((num_qps) * CI_Q_ADDR_SIZE)
+
+#define DB_IDX(db, db_base) \
+   (((unsigned long)(db) - (unsigned long)(db_base)) / HINIC_DB_PAGE_SIZE)
+
+static void init_db_area_idx(struct hinic_free_db_area *free_db_area)
+{
+   int i;
+
+   for (i = 0; i < HINIC_DB_MAX_AREAS; i++)
+   free_db_area->db_idx[i] = i;
+
+   free_db_area->alloc_pos = 0;
+   free_db_area->return_pos = HINIC_DB_MAX_AREAS;
+
+   free_db_area->num_free = HINIC_DB_MAX_AREAS;
+
+   sema_init(_db_area->idx_lock, 1);
+}
+
+static int get_db_area(struct hinic_func_to_io *func_to_io,
+  void __iomem **db_base)
+{
+   struct hinic_free_db_area *free_db_area = _to_io->free_db_area;
+   int pos, idx;
+
+   down(_db_area->idx_lock);
+
+   free_db_area->num_free--;
+
+   if (free_db_area->num_free < 0) {
+   free_db_area->num_free++;
+   up(_db_area->idx_lock);
+   return -ENOMEM;
+   }
+
+   pos = free_db_area->alloc_pos++;
+   pos &= HINIC_DB_MAX_AREAS - 1;
+
+   idx = free_db_area->db_idx[pos];
+
+   free_db_area->db_idx[pos] = -1;
+
+   up(_db_area->idx_lock);
+
+   *db_base = func_to_io->db_base + idx * HINIC_DB_PAGE_SIZE;
+   return 0;
+}
+
+static void return_db_area(struct hinic_func_to_io *func_to_io,
+  void __iomem *db_base)
+{
+   struct hinic_free_db_area *free_db_area = _to_io->free_db_area;
+   int pos, idx = DB_IDX(db_base, func_to_io->db_base);
+
+   down(_db_area->idx_lock);
+
+   pos = free_db_area->return_pos++;
+   pos &= HINIC_DB_MAX_AREAS - 1;
+
+   free_db_area->db_idx[pos] = idx;
+
+   free_db_area->num_free++;
+
+   up(_db_area->idx_lock);
+}
+
 /**
  * init_qp - Initialize a Queue Pair
  * @func_to_io: func to io channel that holds the IO components
@@ -41,6 +117,9 @@ static int init_qp(struct hinic_func_to_io *func_to_io,
 {
struct hinic_hwif *hwif = func_to_io->hwif;
struct pci_dev *pdev = hwif->pdev;
+   void *ci_addr_base = func_to_io->ci_addr_base;
+   dma_addr_t ci_dma_base = func_to_io->ci_dma_base;
+   void __iomem *db_base;
int err;
 
qp->q_id = q_id;
@@ -61,8 +140,40 @@ static int init_qp(struct hinic_func_to_io *func_to_io,
goto rq_alloc_err;
}
 
+   err = 

[PATCH V2 net-next 13/21] net-next/hinic: Set qp context

2017-07-19 Thread Aviad Krawczyk
Update the nic about the resources of the queue pairs.

Signed-off-by: Aviad Krawczyk 
Signed-off-by: Zhao Chen 
---
 drivers/net/ethernet/huawei/hinic/Makefile |   5 +-
 drivers/net/ethernet/huawei/hinic/hinic_common.c   |  55 ++
 drivers/net/ethernet/huawei/hinic/hinic_common.h   |  23 +++
 drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c  |  87 +
 drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.h  |  84 
 drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c   |   4 +
 drivers/net/ethernet/huawei/hinic/hinic_hw_io.c| 153 +++
 drivers/net/ethernet/huawei/hinic/hinic_hw_io.h|   5 +
 drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c| 161 
 drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h|  11 ++
 .../net/ethernet/huawei/hinic/hinic_hw_qp_ctxt.h   | 214 +
 drivers/net/ethernet/huawei/hinic/hinic_hw_wq.h|   9 +
 12 files changed, 809 insertions(+), 2 deletions(-)
 create mode 100644 drivers/net/ethernet/huawei/hinic/hinic_common.c
 create mode 100644 drivers/net/ethernet/huawei/hinic/hinic_common.h
 create mode 100644 drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c
 create mode 100644 drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.h
 create mode 100644 drivers/net/ethernet/huawei/hinic/hinic_hw_qp_ctxt.h

diff --git a/drivers/net/ethernet/huawei/hinic/Makefile 
b/drivers/net/ethernet/huawei/hinic/Makefile
index 24728f0..82c1f68 100644
--- a/drivers/net/ethernet/huawei/hinic/Makefile
+++ b/drivers/net/ethernet/huawei/hinic/Makefile
@@ -1,5 +1,6 @@
 obj-$(CONFIG_HINIC) += hinic.o
 
 hinic-y := hinic_main.o hinic_tx.o hinic_rx.o hinic_port.o hinic_hw_dev.o \
-  hinic_hw_io.o hinic_hw_qp.o hinic_hw_wq.o hinic_hw_mgmt.o \
-  hinic_hw_api_cmd.o hinic_hw_eqs.o hinic_hw_if.o
\ No newline at end of file
+  hinic_hw_io.o hinic_hw_qp.o hinic_hw_cmdq.o hinic_hw_wq.o \
+  hinic_hw_mgmt.o hinic_hw_api_cmd.o hinic_hw_eqs.o hinic_hw_if.o \
+  hinic_common.o
\ No newline at end of file
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_common.c 
b/drivers/net/ethernet/huawei/hinic/hinic_common.c
new file mode 100644
index 000..3b439e9
--- /dev/null
+++ b/drivers/net/ethernet/huawei/hinic/hinic_common.c
@@ -0,0 +1,55 @@
+/*
+ * Huawei HiNIC PCI Express Linux driver
+ * Copyright(c) 2017 Huawei Technologies Co., Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ */
+
+#include 
+#include 
+
+#include "hinic_common.h"
+
+/**
+ * hinic_cpu_to_be32 - convert data to big endian 32 bit format
+ * @data: the data to convert
+ * @len: length of data to convert
+ **/
+void hinic_cpu_to_be32(void *data, int len)
+{
+   int i, chunk_sz = sizeof(u32);
+   u32 *mem = data;
+
+   len = len / chunk_sz;
+
+   for (i = 0; i < len; i++) {
+   *mem = cpu_to_be32(*mem);
+   mem++;
+   }
+}
+
+/**
+ * hinic_be32_to_cpu - convert data from big endian 32 bit format
+ * @data: the data to convert
+ * @len: length of data to convert
+ **/
+void hinic_be32_to_cpu(void *data, int len)
+{
+   int i, chunk_sz = sizeof(u32);
+   u32 *mem = data;
+
+   len = len / chunk_sz;
+
+   for (i = 0; i < len; i++) {
+   *mem = be32_to_cpu(*mem);
+   mem++;
+   }
+}
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_common.h 
b/drivers/net/ethernet/huawei/hinic/hinic_common.h
new file mode 100644
index 000..21921ec
--- /dev/null
+++ b/drivers/net/ethernet/huawei/hinic/hinic_common.h
@@ -0,0 +1,23 @@
+/*
+ * Huawei HiNIC PCI Express Linux driver
+ * Copyright(c) 2017 Huawei Technologies Co., Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ */
+
+#ifndef HINIC_COMMON_H
+#define HINIC_COMMON_H
+
+void hinic_cpu_to_be32(void *data, int len);
+
+void hinic_be32_to_cpu(void *data, int len);
+
+#endif
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c 
b/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c
new file mode 100644
index 000..2fd3924
--- /dev/null
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c
@@ -0,0 +1,87 @@
+/*
+ * Huawei HiNIC PCI 

[PATCH V2 net-next 11/21] net-next/hinic: Add wq

2017-07-19 Thread Aviad Krawczyk
Create work queues for being used by the queue pairs.

Signed-off-by: Aviad Krawczyk 
Signed-off-by: Zhao Chen 
---
 drivers/net/ethernet/huawei/hinic/Makefile  |   4 +-
 drivers/net/ethernet/huawei/hinic/hinic_hw_io.c |  68 ++-
 drivers/net/ethernet/huawei/hinic/hinic_hw_io.h |   6 +
 drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h |  17 +
 drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c | 523 
 drivers/net/ethernet/huawei/hinic/hinic_hw_wq.h |  86 
 6 files changed, 699 insertions(+), 5 deletions(-)
 create mode 100644 drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c
 create mode 100644 drivers/net/ethernet/huawei/hinic/hinic_hw_wq.h

diff --git a/drivers/net/ethernet/huawei/hinic/Makefile 
b/drivers/net/ethernet/huawei/hinic/Makefile
index ce0787c..519382b 100644
--- a/drivers/net/ethernet/huawei/hinic/Makefile
+++ b/drivers/net/ethernet/huawei/hinic/Makefile
@@ -1,5 +1,5 @@
 obj-$(CONFIG_HINIC) += hinic.o
 
 hinic-y := hinic_main.o hinic_tx.o hinic_rx.o hinic_port.o hinic_hw_dev.o \
-  hinic_hw_io.o hinic_hw_mgmt.o hinic_hw_api_cmd.o hinic_hw_eqs.o \
-  hinic_hw_if.o
\ No newline at end of file
+  hinic_hw_io.o hinic_hw_wq.o hinic_hw_mgmt.o hinic_hw_api_cmd.o \
+  hinic_hw_eqs.o hinic_hw_if.o
\ No newline at end of file
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_io.c 
b/drivers/net/ethernet/huawei/hinic/hinic_hw_io.c
index 7e8c4f3..419fcb0 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_io.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_io.c
@@ -20,6 +20,7 @@
 #include 
 
 #include "hinic_hw_if.h"
+#include "hinic_hw_wq.h"
 #include "hinic_hw_qp.h"
 #include "hinic_hw_io.h"
 
@@ -38,8 +39,33 @@ static int init_qp(struct hinic_func_to_io *func_to_io,
   struct msix_entry *sq_msix_entry,
   struct msix_entry *rq_msix_entry)
 {
-   /* should be implemented */
+   struct hinic_hwif *hwif = func_to_io->hwif;
+   struct pci_dev *pdev = hwif->pdev;
+   int err;
+
+   qp->q_id = q_id;
+
+   err = hinic_wq_allocate(_to_io->wqs, _to_io->sq_wq[q_id],
+   HINIC_SQ_WQEBB_SIZE, HINIC_SQ_PAGE_SIZE,
+   HINIC_SQ_DEPTH, HINIC_SQ_WQE_MAX_SIZE);
+   if (err) {
+   dev_err(>dev, "Failed to allocate WQ for SQ\n");
+   return err;
+   }
+
+   err = hinic_wq_allocate(_to_io->wqs, _to_io->rq_wq[q_id],
+   HINIC_RQ_WQEBB_SIZE, HINIC_RQ_PAGE_SIZE,
+   HINIC_RQ_DEPTH, HINIC_RQ_WQE_SIZE);
+   if (err) {
+   dev_err(>dev, "Failed to allocate WQ for RQ\n");
+   goto rq_alloc_err;
+   }
+
return 0;
+
+rq_alloc_err:
+   hinic_wq_free(_to_io->wqs, _to_io->sq_wq[q_id]);
+   return err;
 }
 
 /**
@@ -50,7 +76,10 @@ static int init_qp(struct hinic_func_to_io *func_to_io,
 static void destroy_qp(struct hinic_func_to_io *func_to_io,
   struct hinic_qp *qp)
 {
-   /* should be implemented */
+   int q_id = qp->q_id;
+
+   hinic_wq_free(_to_io->wqs, _to_io->rq_wq[q_id]);
+   hinic_wq_free(_to_io->wqs, _to_io->sq_wq[q_id]);
 }
 
 /**
@@ -70,7 +99,7 @@ int hinic_io_create_qps(struct hinic_func_to_io *func_to_io,
 {
struct hinic_hwif *hwif = func_to_io->hwif;
struct pci_dev *pdev = hwif->pdev;
-   size_t qps_size;
+   size_t qps_size, wq_size;
int i, j, err;
 
qps_size = num_qps * sizeof(*func_to_io->qps);
@@ -78,6 +107,20 @@ int hinic_io_create_qps(struct hinic_func_to_io *func_to_io,
if (!func_to_io->qps)
return -ENOMEM;
 
+   wq_size = num_qps * sizeof(*func_to_io->sq_wq);
+   func_to_io->sq_wq = devm_kzalloc(>dev, wq_size, GFP_KERNEL);
+   if (!func_to_io->sq_wq) {
+   err = -ENOMEM;
+   goto sq_wq_err;
+   }
+
+   wq_size = num_qps * sizeof(*func_to_io->rq_wq);
+   func_to_io->rq_wq = devm_kzalloc(>dev, wq_size, GFP_KERNEL);
+   if (!func_to_io->rq_wq) {
+   err = -ENOMEM;
+   goto rq_wq_err;
+   }
+
for (i = 0; i < num_qps; i++) {
err = init_qp(func_to_io, _to_io->qps[i], i,
  _msix_entries[i], _msix_entries[i]);
@@ -93,6 +136,12 @@ int hinic_io_create_qps(struct hinic_func_to_io *func_to_io,
for (j = 0; j < i; j++)
destroy_qp(func_to_io, _to_io->qps[j]);
 
+   devm_kfree(>dev, func_to_io->rq_wq);
+
+rq_wq_err:
+   devm_kfree(>dev, func_to_io->sq_wq);
+
+sq_wq_err:
devm_kfree(>dev, func_to_io->qps);
return err;
 }
@@ -111,6 +160,9 @@ void hinic_io_destroy_qps(struct hinic_func_to_io 
*func_to_io, int num_qps)
for (i = 0; i < num_qps; i++)
destroy_qp(func_to_io, _to_io->qps[i]);
 
+   devm_kfree(>dev, func_to_io->rq_wq);
+   devm_kfree(>dev, 

[PATCH V2 net-next 08/21] net-next/hinic: Add port management commands

2017-07-19 Thread Aviad Krawczyk
Add the port management commands that are sent as management messages.
The port management commands are used for netdev operations.

Signed-off-by: Aviad Krawczyk 
Signed-off-by: Zhao Chen 
---
 drivers/net/ethernet/huawei/hinic/Makefile   |   4 +-
 drivers/net/ethernet/huawei/hinic/hinic_dev.h|   4 +
 drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c |  30 +++
 drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h |  29 +++
 drivers/net/ethernet/huawei/hinic/hinic_main.c   | 201 ++-
 drivers/net/ethernet/huawei/hinic/hinic_port.c   | 241 +++
 drivers/net/ethernet/huawei/hinic/hinic_port.h   |  68 +++
 7 files changed, 574 insertions(+), 3 deletions(-)
 create mode 100644 drivers/net/ethernet/huawei/hinic/hinic_port.c
 create mode 100644 drivers/net/ethernet/huawei/hinic/hinic_port.h

diff --git a/drivers/net/ethernet/huawei/hinic/Makefile 
b/drivers/net/ethernet/huawei/hinic/Makefile
index 88223d0..08951a6 100644
--- a/drivers/net/ethernet/huawei/hinic/Makefile
+++ b/drivers/net/ethernet/huawei/hinic/Makefile
@@ -1,4 +1,4 @@
 obj-$(CONFIG_HINIC) += hinic.o
 
-hinic-y := hinic_main.o hinic_hw_dev.o hinic_hw_mgmt.o hinic_hw_api_cmd.o \
-  hinic_hw_eqs.o hinic_hw_if.o
\ No newline at end of file
+hinic-y := hinic_main.o hinic_port.o hinic_hw_dev.o hinic_hw_mgmt.o \
+  hinic_hw_api_cmd.o hinic_hw_eqs.o hinic_hw_if.o
\ No newline at end of file
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_dev.h 
b/drivers/net/ethernet/huawei/hinic/hinic_dev.h
index f23e8af..dd540b4 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_dev.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_dev.h
@@ -18,6 +18,7 @@
 
 #include 
 #include 
+#include 
 
 #include "hinic_hw_dev.h"
 
@@ -28,6 +29,9 @@ struct hinic_dev {
struct hinic_hwdev  *hwdev;
 
u32 msg_enable;
+
+   struct semaphoremgmt_lock;
+   unsigned long   *vlan_bitmap;
 };
 
 #endif
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c 
b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
index c61ff10..52d9bb7 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
@@ -202,6 +202,36 @@ static void disable_msix(struct hinic_hwdev *hwdev)
 }
 
 /**
+ * hinic_port_msg_cmd - send port msg to mgmt
+ * @hwdev: the NIC HW device
+ * @cmd: the port command
+ * @buf_in: input buffer
+ * @in_size: input size
+ * @buf_out: output buffer
+ * @out_size: returned output size
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+int hinic_port_msg_cmd(struct hinic_hwdev *hwdev, enum hinic_port_cmd cmd,
+  void *buf_in, u16 in_size, void *buf_out, u16 *out_size)
+{
+   struct hinic_pfhwdev *pfhwdev;
+   struct hinic_hwif *hwif = hwdev->hwif;
+   struct pci_dev *pdev = hwif->pdev;
+
+   if (!HINIC_IS_PF(hwif) && !HINIC_IS_PPF(hwif)) {
+   dev_err(>dev, "unsupported PCI Function type\n");
+   return -EINVAL;
+   }
+
+   pfhwdev = container_of(hwdev, struct hinic_pfhwdev, hwdev);
+
+   return hinic_msg_to_mgmt(>pf_to_mgmt, HINIC_MOD_L2NIC, cmd,
+buf_in, in_size, buf_out, out_size,
+HINIC_MGMT_MSG_SYNC);
+}
+
+/**
  * init_pfhwdev - Initialize the extended components of PF
  * @pfhwdev: the HW device for PF
  *
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h 
b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
index 5b4b686..b3325e7 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
@@ -30,6 +30,31 @@ struct hinic_cap {
u16 num_qps;
 };
 
+enum hinic_port_cmd {
+   HINIC_PORT_CMD_CHANGE_MTU = 2,
+
+   HINIC_PORT_CMD_ADD_VLAN = 3,
+   HINIC_PORT_CMD_DEL_VLAN = 4,
+
+   HINIC_PORT_CMD_SET_MAC = 9,
+   HINIC_PORT_CMD_GET_MAC = 10,
+   HINIC_PORT_CMD_DEL_MAC = 11,
+
+   HINIC_PORT_CMD_SET_RX_MODE = 12,
+
+   HINIC_PORT_CMD_GET_LINK_STATE = 24,
+
+   HINIC_PORT_CMD_SET_PORT_STATE = 41,
+
+   HINIC_PORT_CMD_FWCTXT_INIT = 69,
+
+   HINIC_PORT_CMD_SET_FUNC_STATE = 93,
+
+   HINIC_PORT_CMD_GET_GLOBAL_QPN = 102,
+
+   HINIC_PORT_CMD_GET_CAP = 170,
+};
+
 struct hinic_hwdev {
struct hinic_hwif   *hwif;
struct msix_entry   *msix_entries;
@@ -45,6 +70,10 @@ struct hinic_pfhwdev {
struct hinic_pf_to_mgmt pf_to_mgmt;
 };
 
+int hinic_port_msg_cmd(struct hinic_hwdev *hwdev, enum hinic_port_cmd cmd,
+  void *buf_in, u16 in_size, void *buf_out,
+  u16 *out_size);
+
 int hinic_init_hwdev(struct hinic_hwdev **hwdev, struct pci_dev *pdev);
 
 void hinic_free_hwdev(struct hinic_hwdev *hwdev);
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c 

[PATCH V2 net-next 00/21] Huawei HiNIC Ethernet Driver

2017-07-19 Thread Aviad Krawczyk
The patch-set contains the support of the HiNIC Ethernet driver for
hinic family of PCIE Network Interface Cards.

The Huawei's PCIE HiNIC card is a new Ethernet card and hence there was
a need of a new driver.

The current driver is meant to be used for the Physical Function and there
would soon be a support for Virtual Function and more features once the
basic PF driver has been accepted.

Changes v1 -> v2:
1. remove driver version - Andrew Lunn comment
https://lkml.org/lkml/2017/7/12/372
2. replace kzalloc by devm_kzalloc for short clean - Andrew Lunn comment
https://lkml.org/lkml/2017/7/12/372
3. replace pr_ functions by dev_ functions - Andrew Lunn comment
https://lkml.org/lkml/2017/7/12/375
4. seperate last patch by moving ops to a new patch - Andrew Lunn comment
https://lkml.org/lkml/2017/7/12/377

Aviad Krawczyk (21):
  net-next/hinic: Initialize hw interface
  net-next/hinic: Initialize hw device components
  net-next/hinic: Initialize api cmd resources
  net-next/hinic: Initialize api cmd hw
  net-next/hinic: Add management messages
  net-next/hinic: Add api cmd commands
  net-next/hinic: Add aeqs
  net-next/hinic: Add port management commands
  net-next/hinic: Add Rx mode and link event handler
  net-next/hinic: Add logical Txq and Rxq
  net-next/hinic: Add wq
  net-next/hinic: Add qp resources
  net-next/hinic: Set qp context
  net-next/hinic: Initialize cmdq
  net-next/hinic: Add ceqs
  net-next/hinic: Add cmdq commands
  net-next/hinic: Add cmdq completion handler
  net-next/hinic: Add Rx handler
  net-next/hinic: Add Tx operation
  net-next/hinic: Add ethtool and stats
  net-next/hinic: Add select_queue and netpoll

 Documentation/networking/hinic.txt |  125 +++
 MAINTAINERS|7 +
 drivers/net/ethernet/Kconfig   |1 +
 drivers/net/ethernet/Makefile  |1 +
 drivers/net/ethernet/huawei/Kconfig|   19 +
 drivers/net/ethernet/huawei/Makefile   |5 +
 drivers/net/ethernet/huawei/hinic/Kconfig  |   13 +
 drivers/net/ethernet/huawei/hinic/Makefile |6 +
 drivers/net/ethernet/huawei/hinic/hinic_common.c   |   80 ++
 drivers/net/ethernet/huawei/hinic/hinic_common.h   |   38 +
 drivers/net/ethernet/huawei/hinic/hinic_dev.h  |   64 ++
 .../net/ethernet/huawei/hinic/hinic_hw_api_cmd.c   |  978 +
 .../net/ethernet/huawei/hinic/hinic_hw_api_cmd.h   |  208 
 drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c  |  940 
 drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.h  |  302 ++
 drivers/net/ethernet/huawei/hinic/hinic_hw_csr.h   |  149 +++
 drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c   | 1058 ++
 drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h   |  239 
 drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c   |  877 +++
 drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.h   |  265 +
 drivers/net/ethernet/huawei/hinic/hinic_hw_if.c|  353 ++
 drivers/net/ethernet/huawei/hinic/hinic_hw_if.h|  272 +
 drivers/net/ethernet/huawei/hinic/hinic_hw_io.c|  537 +
 drivers/net/ethernet/huawei/hinic/hinic_hw_io.h|   97 ++
 drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c  |  599 ++
 drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.h  |  153 +++
 drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c|  864 +++
 drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h|  394 +++
 .../net/ethernet/huawei/hinic/hinic_hw_qp_ctxt.h   |  214 
 drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c|  885 +++
 drivers/net/ethernet/huawei/hinic/hinic_hw_wq.h|  113 ++
 drivers/net/ethernet/huawei/hinic/hinic_main.c | 1142 
 .../net/ethernet/huawei/hinic/hinic_pci_id_tbl.h   |   27 +
 drivers/net/ethernet/huawei/hinic/hinic_port.c |  403 +++
 drivers/net/ethernet/huawei/hinic/hinic_port.h |  198 
 drivers/net/ethernet/huawei/hinic/hinic_rx.c   |  518 +
 drivers/net/ethernet/huawei/hinic/hinic_rx.h   |   55 +
 drivers/net/ethernet/huawei/hinic/hinic_tx.c   |  516 +
 drivers/net/ethernet/huawei/hinic/hinic_tx.h   |   62 ++
 39 files changed, 12777 insertions(+)
 create mode 100644 Documentation/networking/hinic.txt
 create mode 100644 drivers/net/ethernet/huawei/Kconfig
 create mode 100644 drivers/net/ethernet/huawei/Makefile
 create mode 100644 drivers/net/ethernet/huawei/hinic/Kconfig
 create mode 100644 drivers/net/ethernet/huawei/hinic/Makefile
 create mode 100644 drivers/net/ethernet/huawei/hinic/hinic_common.c
 create mode 100644 drivers/net/ethernet/huawei/hinic/hinic_common.h
 create mode 100644 drivers/net/ethernet/huawei/hinic/hinic_dev.h
 create mode 100644 drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.c
 create mode 100644 drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.h
 create mode 100644 

[PATCH V2 net-next 05/21] net-next/hinic: Add management messages

2017-07-19 Thread Aviad Krawczyk
Add the management messages for sending to api cmd and the asynchronous
event handler for the completion of the messages.

Signed-off-by: Aviad Krawczyk 
Signed-off-by: Zhao Chen 
---
 .../net/ethernet/huawei/hinic/hinic_hw_api_cmd.c   |  35 ++
 .../net/ethernet/huawei/hinic/hinic_hw_api_cmd.h   |   3 +
 drivers/net/ethernet/huawei/hinic/hinic_hw_if.h|   5 +
 drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c  | 441 -
 drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.h  |  59 +++
 5 files changed, 540 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.c 
b/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.c
index c07b02c..eacb33b 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.c
@@ -53,6 +53,41 @@ enum api_cmd_xor_chk_level {
 };
 
 /**
+ * api_cmd - API CMD command
+ * @chain: chain for the command
+ * @dest: destination node on the card that will receive the command
+ * @cmd: command data
+ * @size: the command size
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int api_cmd(struct hinic_api_cmd_chain *chain,
+  enum hinic_node_id dest, void *cmd, u16 cmd_size)
+{
+   /* should be implemented */
+   return -EINVAL;
+}
+
+/**
+ * hinic_api_cmd_write - Write API CMD command
+ * @chain: chain for write command
+ * @dest: destination node on the card that will receive the command
+ * @cmd: command data
+ * @size: the command size
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+int hinic_api_cmd_write(struct hinic_api_cmd_chain *chain,
+   enum hinic_node_id dest, void *cmd, u16 size)
+{
+   /* Verify the chain type */
+   if (chain->chain_type == HINIC_API_CMD_WRITE_TO_MGMT_CPU)
+   return api_cmd(chain, dest, cmd, size);
+
+   return -EINVAL;
+}
+
+/**
  * api_cmd_hw_restart - restart the chain in the HW
  * @chain: the API CMD specific chain to restart
  *
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.h 
b/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.h
index 21c8c12..c2c8b5f 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.h
@@ -132,6 +132,9 @@ struct hinic_api_cmd_chain {
struct hinic_api_cmd_cell   *curr_node;
 };
 
+int hinic_api_cmd_write(struct hinic_api_cmd_chain *chain,
+   enum hinic_node_id dest, void *cmd, u16 size);
+
 int hinic_api_cmd_init(struct hinic_hwif *hwif,
   struct hinic_api_cmd_chain **chain);
 
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h 
b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h
index 68e4bb9..4cfe325 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h
@@ -93,6 +93,7 @@
 #define HINIC_HWIF_NUM_IRQS(hwif)   ((hwif)->attr.num_irqs)
 #define HINIC_HWIF_GLOB_IDX(hwif)   ((hwif)->attr.func_global_idx)
 #define HINIC_HWIF_PCI_INTF(hwif)   ((hwif)->attr.pci_intf_idx)
+#define HINIC_HWIF_PF_IDX(hwif) ((hwif)->attr.pf_idx)
 
 #define HINIC_FUNC_TYPE(hwif)   ((hwif)->attr.func_type)
 #define HINIC_IS_PF(hwif)   (HINIC_FUNC_TYPE(hwif) == HINIC_PF)
@@ -127,6 +128,10 @@ enum hinic_mod_type {
HINIC_MOD_MAX   = 15
 };
 
+enum hinic_node_id {
+   HINIC_NODE_ID_MGMT = 21,
+};
+
 struct hinic_func_attr {
u16 func_global_idx;
u8  pf_idx;
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c 
b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c
index 545ef3a..006aa3d 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c
@@ -16,6 +16,12 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
 
 #include "hinic_hw_if.h"
 #include "hinic_hw_eqs.h"
@@ -23,9 +29,269 @@
 #include "hinic_hw_mgmt.h"
 #include "hinic_hw_dev.h"
 
+#define SYNC_MSG_ID_MASK0x1FF
+
+#define SYNC_MSG_ID(pf_to_mgmt) ((pf_to_mgmt)->sync_msg_id)
+
+#define SYNC_MSG_ID_INC(pf_to_mgmt) (SYNC_MSG_ID(pf_to_mgmt) = \
+   ((SYNC_MSG_ID(pf_to_mgmt) + 1) & \
+SYNC_MSG_ID_MASK))
+
+#define MSG_SZ_IS_VALID(in_size)((in_size) <= MAX_MSG_LEN)
+
+#define MGMT_MSG_LEN_MIN20
+#define MGMT_MSG_LEN_STEP   16
+#define MGMT_MSG_RSVD_FOR_DEV   8
+
+#define SEGMENT_LEN 48
+
+#define MAX_PF_MGMT_BUF_SIZE2048
+
+/* Data should be SEG LEN size aligned */
+#define MAX_MSG_LEN 2016
+
+#define MSG_NOT_RESP0x
+
+#define MGMT_MSG_TIMEOUT1000
+
 #define mgmt_to_pfhwdev(pf_mgmt)\
 

[PATCH V2 net-next 04/21] net-next/hinic: Initialize api cmd hw

2017-07-19 Thread Aviad Krawczyk
Update the hardware about api cmd resources and initialize it.

Signed-off-by: Aviad Krawczyk 
Signed-off-by: Zhao Chen 
---
 .../net/ethernet/huawei/hinic/hinic_hw_api_cmd.c   | 173 -
 .../net/ethernet/huawei/hinic/hinic_hw_api_cmd.h   |  38 +
 drivers/net/ethernet/huawei/hinic/hinic_hw_csr.h   |  25 +++
 3 files changed, 235 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.c 
b/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.c
index 352397c..c07b02c 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.c
@@ -13,6 +13,7 @@
  *
  */
 
+#include 
 #include 
 #include 
 #include 
@@ -20,8 +21,12 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
 #include 
 
+#include "hinic_hw_csr.h"
 #include "hinic_hw_if.h"
 #include "hinic_hw_api_cmd.h"
 
@@ -34,8 +39,157 @@
(((cell_size) >= API_CMD_CELL_SIZE_MIN) ? \
 (1 << (fls(cell_size - 1))) : API_CMD_CELL_SIZE_MIN)
 
+#define API_CMD_CELL_SIZE_VAL(size) \
+   ilog2((size) >> API_CMD_CELL_SIZE_SHIFT)
+
 #define API_CMD_BUF_SIZE2048
 
+#define API_CMD_TIMEOUT 1000
+
+enum api_cmd_xor_chk_level {
+   XOR_CHK_DIS = 0,
+
+   XOR_CHK_ALL = 3,
+};
+
+/**
+ * api_cmd_hw_restart - restart the chain in the HW
+ * @chain: the API CMD specific chain to restart
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int api_cmd_hw_restart(struct hinic_api_cmd_chain *chain)
+{
+   struct hinic_hwif *hwif = chain->hwif;
+   unsigned long end;
+   u32 reg_addr, val;
+   int err = -ETIMEDOUT;
+
+   /* Read Modify Write */
+   reg_addr = HINIC_CSR_API_CMD_CHAIN_REQ_ADDR(chain->chain_type);
+   val = hinic_hwif_read_reg(hwif, reg_addr);
+
+   val = HINIC_API_CMD_CHAIN_REQ_CLEAR(val, RESTART);
+   val |= HINIC_API_CMD_CHAIN_REQ_SET(1, RESTART);
+
+   hinic_hwif_write_reg(hwif, reg_addr, val);
+
+   end = jiffies + msecs_to_jiffies(API_CMD_TIMEOUT);
+   do {
+   val = hinic_hwif_read_reg(hwif, reg_addr);
+
+   if (!HINIC_API_CMD_CHAIN_REQ_GET(val, RESTART)) {
+   err = 0;
+   break;
+   }
+
+   msleep(20);
+   } while (time_before(jiffies, end));
+
+   return err;
+}
+
+/**
+ * api_cmd_ctrl_init - set the control register of a chain
+ * @chain: the API CMD specific chain to set control register for
+ **/
+static void api_cmd_ctrl_init(struct hinic_api_cmd_chain *chain)
+{
+   struct hinic_hwif *hwif = chain->hwif;
+   u32 reg_addr, ctrl;
+   u16 cell_size;
+
+   /* Read Modify Write */
+   reg_addr = HINIC_CSR_API_CMD_CHAIN_CTRL_ADDR(chain->chain_type);
+
+   cell_size = API_CMD_CELL_SIZE_VAL(chain->cell_size);
+
+   ctrl = hinic_hwif_read_reg(hwif, reg_addr);
+
+   ctrl =  HINIC_API_CMD_CHAIN_CTRL_CLEAR(ctrl, RESTART_WB_STAT) &
+   HINIC_API_CMD_CHAIN_CTRL_CLEAR(ctrl, XOR_ERR)   &
+   HINIC_API_CMD_CHAIN_CTRL_CLEAR(ctrl, AEQE_EN)   &
+   HINIC_API_CMD_CHAIN_CTRL_CLEAR(ctrl, XOR_CHK_EN) &
+   HINIC_API_CMD_CHAIN_CTRL_CLEAR(ctrl, CELL_SIZE);
+
+   ctrl |= HINIC_API_CMD_CHAIN_CTRL_SET(1, XOR_ERR) |
+   HINIC_API_CMD_CHAIN_CTRL_SET(XOR_CHK_ALL, XOR_CHK_EN) |
+   HINIC_API_CMD_CHAIN_CTRL_SET(cell_size, CELL_SIZE);
+
+   hinic_hwif_write_reg(hwif, reg_addr, ctrl);
+}
+
+/**
+ * api_cmd_set_status_addr - set the status address of a chain in the HW
+ * @chain: the API CMD specific chain to set in HW status address for
+ **/
+static void api_cmd_set_status_addr(struct hinic_api_cmd_chain *chain)
+{
+   struct hinic_hwif *hwif = chain->hwif;
+   u32 addr, val;
+
+   addr = HINIC_CSR_API_CMD_STATUS_HI_ADDR(chain->chain_type);
+   val = upper_32_bits(chain->wb_status_paddr);
+   hinic_hwif_write_reg(hwif, addr, val);
+
+   addr = HINIC_CSR_API_CMD_STATUS_LO_ADDR(chain->chain_type);
+   val = lower_32_bits(chain->wb_status_paddr);
+   hinic_hwif_write_reg(hwif, addr, val);
+}
+
+/**
+ * api_cmd_set_num_cells - set the number cells of a chain in the HW
+ * @chain: the API CMD specific chain to set in HW the number of cells for
+ **/
+static void api_cmd_set_num_cells(struct hinic_api_cmd_chain *chain)
+{
+   struct hinic_hwif *hwif = chain->hwif;
+   u32 addr, val;
+
+   addr = HINIC_CSR_API_CMD_CHAIN_NUM_CELLS_ADDR(chain->chain_type);
+   val  = chain->num_cells;
+   hinic_hwif_write_reg(hwif, addr, val);
+}
+
+/**
+ * api_cmd_head_init - set the head of a chain in the HW
+ * @chain: the API CMD specific chain to set in HW the head for
+ **/
+static void api_cmd_head_init(struct hinic_api_cmd_chain *chain)
+{
+   struct hinic_hwif *hwif = chain->hwif;
+   u32 addr, val;
+
+   

[PATCH V2 net-next 03/21] net-next/hinic: Initialize api cmd resources

2017-07-19 Thread Aviad Krawczyk
Initialize api cmd resources as part of management initialization.

Signed-off-by: Aviad Krawczyk 
Signed-off-by: Zhao Chen 
---
 drivers/net/ethernet/huawei/hinic/Makefile |   4 +-
 .../net/ethernet/huawei/hinic/hinic_hw_api_cmd.c   | 445 +
 .../net/ethernet/huawei/hinic/hinic_hw_api_cmd.h   | 102 +
 drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c  |  10 +
 drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.h  |   3 +
 5 files changed, 562 insertions(+), 2 deletions(-)
 create mode 100644 drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.c
 create mode 100644 drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.h

diff --git a/drivers/net/ethernet/huawei/hinic/Makefile 
b/drivers/net/ethernet/huawei/hinic/Makefile
index d080dfb..88223d0 100644
--- a/drivers/net/ethernet/huawei/hinic/Makefile
+++ b/drivers/net/ethernet/huawei/hinic/Makefile
@@ -1,4 +1,4 @@
 obj-$(CONFIG_HINIC) += hinic.o
 
-hinic-y := hinic_main.o hinic_hw_dev.o hinic_hw_mgmt.o hinic_hw_eqs.o \
-  hinic_hw_if.o
\ No newline at end of file
+hinic-y := hinic_main.o hinic_hw_dev.o hinic_hw_mgmt.o hinic_hw_api_cmd.o \
+  hinic_hw_eqs.o hinic_hw_if.o
\ No newline at end of file
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.c 
b/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.c
new file mode 100644
index 000..352397c
--- /dev/null
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.c
@@ -0,0 +1,445 @@
+/*
+ * Huawei HiNIC PCI Express Linux driver
+ * Copyright(c) 2017 Huawei Technologies Co., Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "hinic_hw_if.h"
+#include "hinic_hw_api_cmd.h"
+
+#define API_CHAIN_NUM_CELLS 32
+
+#define API_CMD_CELL_SIZE_SHIFT 6
+#define API_CMD_CELL_SIZE_MIN   (BIT(API_CMD_CELL_SIZE_SHIFT))
+
+#define API_CMD_CELL_SIZE(cell_size)\
+   (((cell_size) >= API_CMD_CELL_SIZE_MIN) ? \
+(1 << (fls(cell_size - 1))) : API_CMD_CELL_SIZE_MIN)
+
+#define API_CMD_BUF_SIZE2048
+
+/**
+ * api_cmd_chain_hw_init - initialize the chain in the HW
+ * @chain: the API CMD specific chain to initialize in HW
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int api_cmd_chain_hw_init(struct hinic_api_cmd_chain *chain)
+{
+   /* should be implemented */
+   return 0;
+}
+
+/**
+ * free_cmd_buf - free the dma buffer of API CMD command
+ * @chain: the API CMD specific chain of the cmd
+ * @cell_idx: the cell index of the cmd
+ **/
+static void free_cmd_buf(struct hinic_api_cmd_chain *chain, int cell_idx)
+{
+   struct hinic_api_cmd_cell_ctxt *cell_ctxt;
+   struct hinic_hwif *hwif = chain->hwif;
+   struct pci_dev *pdev = hwif->pdev;
+
+   cell_ctxt = >cell_ctxt[cell_idx];
+
+   dma_free_coherent(>dev, API_CMD_BUF_SIZE,
+ cell_ctxt->api_cmd_vaddr,
+ cell_ctxt->api_cmd_paddr);
+}
+
+/**
+ * alloc_cmd_buf - allocate a dma buffer for API CMD command
+ * @chain: the API CMD specific chain for the cmd
+ * @cell: the cell in the HW for the cmd
+ * @cell_idx: the index of the cell
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int alloc_cmd_buf(struct hinic_api_cmd_chain *chain,
+struct hinic_api_cmd_cell *cell, int cell_idx)
+{
+   struct hinic_hwif *hwif = chain->hwif;
+   struct pci_dev *pdev = hwif->pdev;
+   struct hinic_api_cmd_cell_ctxt *cell_ctxt;
+   dma_addr_t cmd_paddr;
+   void *cmd_vaddr;
+   int err = 0;
+
+   cmd_vaddr = dma_zalloc_coherent(>dev, API_CMD_BUF_SIZE,
+   _paddr, GFP_KERNEL);
+   if (!cmd_vaddr) {
+   dev_err(>dev, "Failed to allocate API CMD DMA memory\n");
+   return -ENOMEM;
+   }
+
+   cell_ctxt = >cell_ctxt[cell_idx];
+
+   cell_ctxt->api_cmd_vaddr = cmd_vaddr;
+   cell_ctxt->api_cmd_paddr = cmd_paddr;
+
+   /* set the cmd DMA address in the cell */
+   switch (chain->chain_type) {
+   case HINIC_API_CMD_WRITE_TO_MGMT_CPU:
+   /* The data in the HW should be in Big Endian Format */
+   cell->write.hw_cmd_paddr = cpu_to_be64(cmd_paddr);
+   break;
+
+   default:
+   dev_err(>dev, "Unsupported API CMD chain type\n");
+   free_cmd_buf(chain, cell_idx);
+ 

[PATCH V2 net-next 02/21] net-next/hinic: Initialize hw device components

2017-07-19 Thread Aviad Krawczyk
Initialize hw device by calling the initialization functions of aeqs
and management channel.

Signed-off-by: Aviad Krawczyk 
Signed-off-by: Zhao Chen 
---
 drivers/net/ethernet/huawei/hinic/Makefile|   3 +-
 drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c  | 176 --
 drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h  |  14 +-
 drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c  | 149 ++
 drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.h  | 107 +
 drivers/net/ethernet/huawei/hinic/hinic_hw_if.h   |   8 +
 drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c |  93 
 drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.h |  45 ++
 8 files changed, 580 insertions(+), 15 deletions(-)
 create mode 100644 drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c
 create mode 100644 drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.h
 create mode 100644 drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c
 create mode 100644 drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.h

diff --git a/drivers/net/ethernet/huawei/hinic/Makefile 
b/drivers/net/ethernet/huawei/hinic/Makefile
index 353cee0..d080dfb 100644
--- a/drivers/net/ethernet/huawei/hinic/Makefile
+++ b/drivers/net/ethernet/huawei/hinic/Makefile
@@ -1,3 +1,4 @@
 obj-$(CONFIG_HINIC) += hinic.o
 
-hinic-y := hinic_main.o hinic_hw_dev.o hinic_hw_if.o
+hinic-y := hinic_main.o hinic_hw_dev.o hinic_hw_mgmt.o hinic_hw_eqs.o \
+  hinic_hw_if.o
\ No newline at end of file
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c 
b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
index fbc9de4..c61ff10 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
@@ -22,11 +22,135 @@
 #include 
 
 #include "hinic_hw_if.h"
+#include "hinic_hw_eqs.h"
+#include "hinic_hw_mgmt.h"
 #include "hinic_hw_dev.h"
 
 #define MAX_IRQS(max_qps, num_aeqs, num_ceqs)   \
 (2 * (max_qps) + (num_aeqs) + (num_ceqs))
 
+enum intr_type {
+   INTR_MSIX_TYPE,
+};
+
+/* HW struct */
+struct hinic_dev_cap {
+   u8  status;
+   u8  version;
+   u8  rsvd0[6];
+
+   u8  rsvd1[5];
+   u8  intr_type;
+   u8  rsvd2[66];
+   u16 max_sqs;
+   u16 max_rqs;
+   u8  rsvd3[208];
+};
+
+/**
+ * get_capability - convert device capabilities to NIC capabilities
+ * @hwdev: the HW device to set and convert device capabilities for
+ * @dev_cap: device capabilities from FW
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int get_capability(struct hinic_hwdev *hwdev,
+ struct hinic_dev_cap *dev_cap)
+{
+   struct hinic_hwif *hwif = hwdev->hwif;
+   struct hinic_cap *nic_cap = >nic_cap;
+   int num_aeqs, num_ceqs, num_irqs, num_qps;
+
+   if (!HINIC_IS_PF(hwif) && !HINIC_IS_PPF(hwif))
+   return -EINVAL;
+
+   if (dev_cap->intr_type != INTR_MSIX_TYPE)
+   return -EFAULT;
+
+   num_aeqs = HINIC_HWIF_NUM_AEQS(hwif);
+   num_ceqs = HINIC_HWIF_NUM_CEQS(hwif);
+   num_irqs = HINIC_HWIF_NUM_IRQS(hwif);
+
+   /* Each QP has its own (SQ + RQ) interrupts */
+   num_qps = (num_irqs - (num_aeqs + num_ceqs)) / 2;
+
+   /* num_qps must be power of 2 */
+   num_qps = BIT(fls(num_qps) - 1);
+
+   nic_cap->max_qps = dev_cap->max_sqs + 1;
+   if (nic_cap->max_qps != (dev_cap->max_rqs + 1))
+   return -EFAULT;
+
+   if (num_qps < nic_cap->max_qps)
+   nic_cap->num_qps = num_qps;
+   else
+   nic_cap->num_qps = nic_cap->max_qps;
+
+   return 0;
+}
+
+/**
+ * get_cap_from_fw - get device capabilities from FW
+ * @pfhwdev: the PF HW device to get capabilities for
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int get_cap_from_fw(struct hinic_pfhwdev *pfhwdev)
+{
+   struct hinic_hwdev *hwdev = >hwdev;
+   struct hinic_hwif *hwif = hwdev->hwif;
+   struct pci_dev *pdev = hwif->pdev;
+   struct hinic_dev_cap dev_cap;
+   u16 in_len, out_len;
+   int err;
+
+   in_len = 0;
+   out_len = sizeof(dev_cap);
+
+   err = hinic_msg_to_mgmt(>pf_to_mgmt, HINIC_MOD_CFGM,
+   HINIC_CFG_NIC_CAP, _cap, in_len, _cap,
+   _len, HINIC_MGMT_MSG_SYNC);
+   if (err) {
+   dev_err(>dev, "Failed to get capability from FW\n");
+   return err;
+   }
+
+   return get_capability(hwdev, _cap);
+}
+
+/**
+ * get_dev_cap - get device capabilities
+ * @hwdev: the NIC HW device to get capabilities for
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int get_dev_cap(struct hinic_hwdev *hwdev)
+{
+   struct hinic_pfhwdev *pfhwdev;
+   struct hinic_hwif *hwif = hwdev->hwif;
+   struct pci_dev *pdev = hwif->pdev;
+   int err;
+
+   switch (HINIC_FUNC_TYPE(hwif)) {
+   case HINIC_PPF:
+  

[PATCH net-next V2 0/5] Refine virtio-net XDP

2017-07-19 Thread Jason Wang
Hi:

This series brings two optimizations for virtio-net XDP:

- avoid reset during XDP set
- turn off offloads on demand

Changes from V1:
- Various tweaks on commit logs and comments
- Use virtnet_napi_enable() when enabling NAPI on XDP set
- Copy the small buffer packet only if xdp_headroom is smaller than
  required

Please review.

Thanks

Jason Wang (5):
  virtio_ring: allow to store zero as the ctx
  virtio-net: pack headroom into ctx for mergeable buffers
  virtio-net: switch to use new ctx API for small buffer
  virtio-net: do not reset during XDP set
  virtio-net: switch off offloads on demand if possible on XDP set

 drivers/net/virtio_net.c | 332 ++-
 drivers/virtio/virtio_ring.c |   2 +-
 2 files changed, 200 insertions(+), 134 deletions(-)

-- 
2.7.4



[PATCH net-next V2 1/5] virtio_ring: allow to store zero as the ctx

2017-07-19 Thread Jason Wang
Allow zero to be store as a ctx, with this we could store e.g zero
value which could be meaningful for the case of storing headroom
through ctx.

Signed-off-by: Jason Wang 
---
 drivers/virtio/virtio_ring.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 5e1b548..9aaa177 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -391,7 +391,7 @@ static inline int virtqueue_add(struct virtqueue *_vq,
vq->desc_state[head].data = data;
if (indirect)
vq->desc_state[head].indir_desc = desc;
-   if (ctx)
+   else
vq->desc_state[head].indir_desc = ctx;
 
/* Put entry in available array (but don't update avail->idx until they
-- 
2.7.4



[PATCH net-next V2 2/5] virtio-net: pack headroom into ctx for mergeable buffers

2017-07-19 Thread Jason Wang
Pack headroom into ctx - this way when we get a buffer we can figure out
the actual headroom that was allocated for the buffer. Will be helpful
to optimize switching between XDP and non-XDP modes which have different
headroom requirements.

Signed-off-by: Jason Wang 
---
 drivers/net/virtio_net.c | 29 -
 1 file changed, 24 insertions(+), 5 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 1f8c15c..8fae9a8 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -270,6 +270,23 @@ static void skb_xmit_done(struct virtqueue *vq)
netif_wake_subqueue(vi->dev, vq2txq(vq));
 }
 
+#define MRG_CTX_HEADER_SHIFT 22
+static void *mergeable_len_to_ctx(unsigned int truesize,
+ unsigned int headroom)
+{
+   return (void *)(unsigned long)((headroom << MRG_CTX_HEADER_SHIFT) | 
truesize);
+}
+
+static unsigned int mergeable_ctx_to_headroom(void *mrg_ctx)
+{
+   return (unsigned long)mrg_ctx >> MRG_CTX_HEADER_SHIFT;
+}
+
+static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx)
+{
+   return (unsigned long)mrg_ctx & ((1 << MRG_CTX_HEADER_SHIFT) - 1);
+}
+
 /* Called from bottom half context */
 static struct sk_buff *page_to_skb(struct virtnet_info *vi,
   struct receive_queue *rq,
@@ -639,13 +656,14 @@ static struct sk_buff *receive_mergeable(struct 
net_device *dev,
}
rcu_read_unlock();
 
-   if (unlikely(len > (unsigned long)ctx)) {
+   truesize = mergeable_ctx_to_truesize(ctx);
+   if (unlikely(len > truesize)) {
pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
 dev->name, len, (unsigned long)ctx);
dev->stats.rx_length_errors++;
goto err_skb;
}
-   truesize = (unsigned long)ctx;
+
head_skb = page_to_skb(vi, rq, page, offset, len, truesize);
curr_skb = head_skb;
 
@@ -665,13 +683,14 @@ static struct sk_buff *receive_mergeable(struct 
net_device *dev,
}
 
page = virt_to_head_page(buf);
-   if (unlikely(len > (unsigned long)ctx)) {
+
+   truesize = mergeable_ctx_to_truesize(ctx);
+   if (unlikely(len > truesize)) {
pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
 dev->name, len, (unsigned long)ctx);
dev->stats.rx_length_errors++;
goto err_skb;
}
-   truesize = (unsigned long)ctx;
 
num_skb_frags = skb_shinfo(curr_skb)->nr_frags;
if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) {
@@ -889,7 +908,7 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi,
 
buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
buf += headroom; /* advance address leaving hole at front of pkt */
-   ctx = (void *)(unsigned long)len;
+   ctx = mergeable_len_to_ctx(len, headroom);
get_page(alloc_frag->page);
alloc_frag->offset += len + headroom;
hole = alloc_frag->size - alloc_frag->offset;
-- 
2.7.4



[PATCH net-next V2 3/5] virtio-net: switch to use new ctx API for small buffer

2017-07-19 Thread Jason Wang
Use ctx API to store headroom for small buffers.
Following patches will retrieve this info and use it for XDP.

Signed-off-by: Jason Wang 
---
 drivers/net/virtio_net.c | 17 -
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 8fae9a8..640f1de 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -410,7 +410,8 @@ static unsigned int virtnet_get_headroom(struct 
virtnet_info *vi)
 static struct sk_buff *receive_small(struct net_device *dev,
 struct virtnet_info *vi,
 struct receive_queue *rq,
-void *buf, unsigned int len)
+void *buf, void *ctx,
+unsigned int len)
 {
struct sk_buff *skb;
struct bpf_prog *xdp_prog;
@@ -773,7 +774,7 @@ static int receive_buf(struct virtnet_info *vi, struct 
receive_queue *rq,
else if (vi->big_packets)
skb = receive_big(dev, vi, rq, buf, len);
else
-   skb = receive_small(dev, vi, rq, buf, len);
+   skb = receive_small(dev, vi, rq, buf, ctx, len);
 
if (unlikely(!skb))
return 0;
@@ -806,12 +807,18 @@ static int receive_buf(struct virtnet_info *vi, struct 
receive_queue *rq,
return 0;
 }
 
+/* Unlike mergeable buffers, all buffers are allocated to the
+ * same size, except for the headroom. For this reason we do
+ * not need to use  mergeable_len_to_ctx here - it is enough
+ * to store the headroom as the context ignoring the truesize.
+ */
 static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq,
 gfp_t gfp)
 {
struct page_frag *alloc_frag = >alloc_frag;
char *buf;
unsigned int xdp_headroom = virtnet_get_headroom(vi);
+   void *ctx = (void *)(unsigned long)xdp_headroom;
int len = vi->hdr_len + VIRTNET_RX_PAD + GOOD_PACKET_LEN + xdp_headroom;
int err;
 
@@ -825,7 +832,7 @@ static int add_recvbuf_small(struct virtnet_info *vi, 
struct receive_queue *rq,
alloc_frag->offset += len;
sg_init_one(rq->sg, buf + VIRTNET_RX_PAD + xdp_headroom,
vi->hdr_len + GOOD_PACKET_LEN);
-   err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, buf, gfp);
+   err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp);
if (err < 0)
put_page(virt_to_head_page(buf));
 
@@ -1034,7 +1041,7 @@ static int virtnet_receive(struct receive_queue *rq, int 
budget)
void *buf;
struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
 
-   if (vi->mergeable_rx_bufs) {
+   if (!vi->big_packets || vi->mergeable_rx_bufs) {
void *ctx;
 
while (received < budget &&
@@ -2198,7 +2205,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
names = kmalloc(total_vqs * sizeof(*names), GFP_KERNEL);
if (!names)
goto err_names;
-   if (vi->mergeable_rx_bufs) {
+   if (!vi->big_packets || vi->mergeable_rx_bufs) {
ctx = kzalloc(total_vqs * sizeof(*ctx), GFP_KERNEL);
if (!ctx)
goto err_ctx;
-- 
2.7.4



[PATCH net-next V2 4/5] virtio-net: do not reset during XDP set

2017-07-19 Thread Jason Wang
We currently reset the device during XDP set, the main reason is
that we allocate more headroom with XDP (for header adjustment).

This works but causes network downtime for users.

Previous patches encoded the headroom in the buffer context,
this makes it possible to detect the case where a buffer
with headroom insufficient for XDP is added to the queue and
XDP is enabled afterwards.

Upon detection, we handle this case by copying the packet
(slow, but it's a temporary condition).

Signed-off-by: Jason Wang 
---
 drivers/net/virtio_net.c | 232 ++-
 1 file changed, 106 insertions(+), 126 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 640f1de..b3fc01d 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -407,6 +407,69 @@ static unsigned int virtnet_get_headroom(struct 
virtnet_info *vi)
return vi->xdp_queue_pairs ? VIRTIO_XDP_HEADROOM : 0;
 }
 
+/* We copy the packet for XDP in the following cases:
+ *
+ * 1) Packet is scattered across multiple rx buffers.
+ * 2) Headroom space is insufficient.
+ *
+ * This is inefficient but it's a temporary condition that
+ * we hit right after XDP is enabled and until queue is refilled
+ * with large buffers with sufficient headroom - so it should affect
+ * at most queue size packets.
+ * Afterwards, the conditions to enable
+ * XDP should preclude the underlying device from sending packets
+ * across multiple buffers (num_buf > 1), and we make sure buffers
+ * have enough headroom.
+ */
+static struct page *xdp_linearize_page(struct receive_queue *rq,
+  u16 *num_buf,
+  struct page *p,
+  int offset,
+  int page_off,
+  unsigned int *len)
+{
+   struct page *page = alloc_page(GFP_ATOMIC);
+
+   if (!page)
+   return NULL;
+
+   memcpy(page_address(page) + page_off, page_address(p) + offset, *len);
+   page_off += *len;
+
+   while (--*num_buf) {
+   unsigned int buflen;
+   void *buf;
+   int off;
+
+   buf = virtqueue_get_buf(rq->vq, );
+   if (unlikely(!buf))
+   goto err_buf;
+
+   p = virt_to_head_page(buf);
+   off = buf - page_address(p);
+
+   /* guard against a misconfigured or uncooperative backend that
+* is sending packet larger than the MTU.
+*/
+   if ((page_off + buflen) > PAGE_SIZE) {
+   put_page(p);
+   goto err_buf;
+   }
+
+   memcpy(page_address(page) + page_off,
+  page_address(p) + off, buflen);
+   page_off += buflen;
+   put_page(p);
+   }
+
+   /* Headroom does not contribute to packet length */
+   *len = page_off - VIRTIO_XDP_HEADROOM;
+   return page;
+err_buf:
+   __free_pages(page, 0);
+   return NULL;
+}
+
 static struct sk_buff *receive_small(struct net_device *dev,
 struct virtnet_info *vi,
 struct receive_queue *rq,
@@ -415,12 +478,14 @@ static struct sk_buff *receive_small(struct net_device 
*dev,
 {
struct sk_buff *skb;
struct bpf_prog *xdp_prog;
-   unsigned int xdp_headroom = virtnet_get_headroom(vi);
+   unsigned int xdp_headroom = (unsigned long)ctx;
unsigned int header_offset = VIRTNET_RX_PAD + xdp_headroom;
unsigned int headroom = vi->hdr_len + header_offset;
unsigned int buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
  SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+   struct page *page = virt_to_head_page(buf);
unsigned int delta = 0;
+   struct page *xdp_page;
len -= vi->hdr_len;
 
rcu_read_lock();
@@ -434,6 +499,27 @@ static struct sk_buff *receive_small(struct net_device 
*dev,
if (unlikely(hdr->hdr.gso_type || hdr->hdr.flags))
goto err_xdp;
 
+   if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) {
+   int offset = buf - page_address(page) + header_offset;
+   unsigned int tlen = len + vi->hdr_len;
+   u16 num_buf = 1;
+
+   xdp_headroom = virtnet_get_headroom(vi);
+   header_offset = VIRTNET_RX_PAD + xdp_headroom;
+   headroom = vi->hdr_len + header_offset;
+   buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
+SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+   xdp_page = xdp_linearize_page(rq, _buf, page,
+ 

[PATCH net-next V2 5/5] virtio-net: switch off offloads on demand if possible on XDP set

2017-07-19 Thread Jason Wang
Current XDP implementation wants guest offloads feature to be disabled
on device. This is inconvenient and means guest can't benefit from
offloads if XDP is not used. This patch tries to address this
limitation by disabling the offloads on demand through control guest
offloads. Guest offloads will be disabled and enabled on demand on XDP
set.

Signed-off-by: Jason Wang 
---
 drivers/net/virtio_net.c | 70 
 1 file changed, 65 insertions(+), 5 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index b3fc01d..5fbd15e 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -57,6 +57,11 @@ DECLARE_EWMA(pkt_len, 0, 64)
 
 #define VIRTNET_DRIVER_VERSION "1.0.0"
 
+const unsigned long guest_offloads[] = { VIRTIO_NET_F_GUEST_TSO4,
+VIRTIO_NET_F_GUEST_TSO6,
+VIRTIO_NET_F_GUEST_ECN,
+VIRTIO_NET_F_GUEST_UFO };
+
 struct virtnet_stats {
struct u64_stats_sync tx_syncp;
struct u64_stats_sync rx_syncp;
@@ -164,10 +169,13 @@ struct virtnet_info {
u8 ctrl_promisc;
u8 ctrl_allmulti;
u16 ctrl_vid;
+   u64 ctrl_offloads;
 
/* Ethtool settings */
u8 duplex;
u32 speed;
+
+   unsigned long guest_offloads;
 };
 
 struct padded_vnet_hdr {
@@ -1896,6 +1904,47 @@ static int virtnet_restore_up(struct virtio_device *vdev)
return err;
 }
 
+static int virtnet_set_guest_offloads(struct virtnet_info *vi, u64 offloads)
+{
+   struct scatterlist sg;
+   vi->ctrl_offloads = cpu_to_virtio64(vi->vdev, offloads);
+
+   sg_init_one(, >ctrl_offloads, sizeof(vi->ctrl_offloads));
+
+   if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_GUEST_OFFLOADS,
+ VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET, )) {
+   dev_warn(>dev->dev, "Fail to set guest offload. \n");
+   return -EINVAL;
+   }
+
+   return 0;
+}
+
+static int virtnet_clear_guest_offloads(struct virtnet_info *vi)
+{
+   u64 offloads = 0;
+
+   if (!vi->guest_offloads)
+   return 0;
+
+   if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM))
+   offloads = 1ULL << VIRTIO_NET_F_GUEST_CSUM;
+
+   return virtnet_set_guest_offloads(vi, offloads);
+}
+
+static int virtnet_restore_guest_offloads(struct virtnet_info *vi)
+{
+   u64 offloads = vi->guest_offloads;
+
+   if (!vi->guest_offloads)
+   return 0;
+   if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM))
+   offloads |= 1ULL << VIRTIO_NET_F_GUEST_CSUM;
+
+   return virtnet_set_guest_offloads(vi, offloads);
+}
+
 static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
   struct netlink_ext_ack *extack)
 {
@@ -1905,10 +1954,11 @@ static int virtnet_xdp_set(struct net_device *dev, 
struct bpf_prog *prog,
u16 xdp_qp = 0, curr_qp;
int i, err;
 
-   if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) ||
-   virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) ||
-   virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) ||
-   virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO)) {
+   if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)
+   && (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) ||
+   virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) ||
+   virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) ||
+   virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO))) {
NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is 
implementing LRO, disable LRO first");
return -EOPNOTSUPP;
}
@@ -1955,6 +2005,12 @@ static int virtnet_xdp_set(struct net_device *dev, 
struct bpf_prog *prog,
for (i = 0; i < vi->max_queue_pairs; i++) {
old_prog = rtnl_dereference(vi->rq[i].xdp_prog);
rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
+   if (i == 0) {
+   if (!old_prog)
+   virtnet_clear_guest_offloads(vi);
+   if (!prog)
+   virtnet_restore_guest_offloads(vi);
+   }
if (old_prog)
bpf_prog_put(old_prog);
virtnet_napi_enable(vi->rq[i].vq, >rq[i].napi);
@@ -2588,6 +2644,10 @@ static int virtnet_probe(struct virtio_device *vdev)
netif_carrier_on(dev);
}
 
+   for (i = 0; i < ARRAY_SIZE(guest_offloads); i++)
+   if (virtio_has_feature(vi->vdev, guest_offloads[i]))
+   set_bit(guest_offloads[i], >guest_offloads);
+
pr_debug("virtnet: registered device %s with %d RX and TX vq's\n",
 dev->name, max_queue_pairs);
 

Re: Use sock_diag instead of procfs for new address families?

2017-07-19 Thread Stefan Hajnoczi
On Tue, Jul 18, 2017 at 09:58:38AM -0700, Stephen Hemminger wrote:
> On Tue, 18 Jul 2017 17:18:06 +0100
> Stefan Hajnoczi  wrote:
> 
> > I am implementing userspace access to socket information for AF_VSOCK.
> > A few hours into writing and testing a /proc/net/vsock seq_file I
> > noticed that ss(8) prefers NETLINK_SOCK_DIAG over procfs.
> > 
> > Before potentially wasting time implementing a legacy interface that
> > won't be accepted, I thought it might be good to ask :).
> > 
> > Which approach is preferred?
> > 1. New address families must implement only sock_diag.
> > 2. Both sock_diag and procfs must be implemented.
> > 3. Implement whichever interface you prefer.
> > 
> > Thanks,
> > Stefan
> 
> You are correct, I am unlikely to take any new code using /proc
> in ss.

Thanks Stephen and David, will switch to sock_diag.

Stefan


signature.asc
Description: PGP signature


[PATCH net-next] cxgb4: Update register ranges of T4/T5/T6 adapters

2017-07-19 Thread Ganesh Goudar
From: Arjun Vynipadath 

Signed-off-by: Arjun Vynipadath 
Signed-off-by: Casey Leedom 
Signed-off-by: Ganesh Goudar 
---
 drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 51 --
 1 file changed, 14 insertions(+), 37 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c 
b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index 82bf7aa..570c095 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -913,7 +913,8 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t 
buf_size)
0xd010, 0xd03c,
0xdfc0, 0xdfe0,
0xe000, 0xea7c,
-   0xf000, 0x11190,
+   0xf000, 0x0,
+   0x8, 0x11190,
0x19040, 0x1906c,
0x19078, 0x19080,
0x1908c, 0x190e4,
@@ -1439,8 +1440,6 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t 
buf_size)
0x1ff00, 0x1ff84,
0x1ffc0, 0x1ffc8,
0x3, 0x30030,
-   0x30038, 0x30038,
-   0x30040, 0x30040,
0x30100, 0x30144,
0x30190, 0x301a0,
0x301a8, 0x301b8,
@@ -1551,8 +1550,6 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t 
buf_size)
0x33c3c, 0x33c50,
0x33cf0, 0x33cfc,
0x34000, 0x34030,
-   0x34038, 0x34038,
-   0x34040, 0x34040,
0x34100, 0x34144,
0x34190, 0x341a0,
0x341a8, 0x341b8,
@@ -1663,8 +1660,6 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t 
buf_size)
0x37c3c, 0x37c50,
0x37cf0, 0x37cfc,
0x38000, 0x38030,
-   0x38038, 0x38038,
-   0x38040, 0x38040,
0x38100, 0x38144,
0x38190, 0x381a0,
0x381a8, 0x381b8,
@@ -1775,8 +1770,6 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t 
buf_size)
0x3bc3c, 0x3bc50,
0x3bcf0, 0x3bcfc,
0x3c000, 0x3c030,
-   0x3c038, 0x3c038,
-   0x3c040, 0x3c040,
0x3c100, 0x3c144,
0x3c190, 0x3c1a0,
0x3c1a8, 0x3c1b8,
@@ -2040,12 +2033,8 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t 
buf_size)
0x1190, 0x1194,
0x11a0, 0x11a4,
0x11b0, 0x11b4,
-   0x11fc, 0x1258,
-   0x1280, 0x12d4,
-   0x12d9, 0x12d9,
-   0x12de, 0x12de,
-   0x12e3, 0x12e3,
-   0x12e8, 0x133c,
+   0x11fc, 0x1274,
+   0x1280, 0x133c,
0x1800, 0x18fc,
0x3000, 0x302c,
0x3060, 0x30b0,
@@ -2076,6 +2065,9 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t 
buf_size)
0x5ea0, 0x5eb0,
0x5ec0, 0x5ec0,
0x5ec8, 0x5ed0,
+   0x5ee0, 0x5ee0,
+   0x5ef0, 0x5ef0,
+   0x5f00, 0x5f00,
0x6000, 0x6020,
0x6028, 0x6040,
0x6058, 0x609c,
@@ -2133,6 +2125,8 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t 
buf_size)
0xd300, 0xd31c,
0xdfc0, 0xdfe0,
0xe000, 0xf008,
+   0xf010, 0xf018,
+   0xf020, 0xf028,
0x11000, 0x11014,
0x11048, 0x1106c,
0x11074, 0x11088,
@@ -2256,13 +2250,6 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t 
buf_size)
0x1ff00, 0x1ff84,
0x1ffc0, 0x1ffc8,
0x3, 0x30030,
-   0x30038, 0x30038,
-   0x30040, 0x30040,
-   0x30048, 0x30048,
-   0x30050, 0x30050,
-   0x3005c, 0x30060,
-   0x30068, 0x30068,
-   0x30070, 0x30070,
0x30100, 0x30168,
0x30190, 0x301a0,
0x301a8, 0x301b8,
@@ -2325,13 +2312,12 @@ void t4_get_regs(struct adapter *adap, void *buf, 
size_t buf_size)
0x326a8, 0x326a8,
0x326ec, 0x326ec,
0x32a00, 0x32abc,
-   0x32b00, 0x32b38,
+   0x32b00, 0x32b18,
+   0x32b20, 0x32b38,
0x32b40, 0x32b58,
0x32b60, 0x32b78,
0x32c00, 0x32c00,
0x32c08, 0x32c3c,
-   0x32e00, 0x32e2c,
-   0x32f00, 0x32f2c,
0x33000, 0x3302c,
0x33034, 0x33050,
0x33058, 0x33058,
@@ -2396,13 +2382,6 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t 
buf_size)
0x33c38, 0x33c50,
0x33cf0, 0x33cfc,
0x34000, 0x34030,
-

[patch net-next 11/17] ipv6: fib: Allow non-FIB users to take reference on route

2017-07-19 Thread Jiri Pirko
From: Ido Schimmel 

Listeners of the FIB notification chain are expected to be able to take
and release a reference on notified IPv6 routes. This is needed in the
case of drivers capable of offloading these routes to a capable device.

Since notifications are sent in an atomic context, these drivers need to
take a reference on the route, prepare a work item to offload the route
and release the reference at the end of the work.

Currently, rt6i_ref is used to indicate in how many FIB nodes a route
appears. Different code paths rely on rt6i_ref being 0 to indicate the
route is no longer used by the FIB.

For example, whenever a route is deleted or replaced, fib6_purge_rt() is
run to make sure the route is no longer present in intermediate nodes. A
BUG_ON() at the end of the function is executed in case the reference
count isn't 1, as it's only supposed to appear in the non-intermediate
node from which it's going to be deleted.

Instead of changing the semantics of rt6i_ref, a new reference count is
added, so that external users could also take a reference on routes
without modifying rt6i_ref.

To make sure external users don't release routes used by the FIB, the
reference count is set to 1 upon creation of a route and decremented by
the FIB upon rt6_release().

The reference count is atomic, as it's not protected by any locks and
placed in the 40 bytes hole after the existing rt6i_ref.

rt6_free_pcpu() is exported so that modules could invoke rt6_put().
Similar to commit b423cb10807b ("ipv4: fib: Export free_fib_info()").

Signed-off-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 include/net/ip6_fib.h | 17 +
 net/ipv6/ip6_fib.c| 10 --
 net/ipv6/route.c  |  4 
 3 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 0b30521..e8ecd08 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -119,6 +119,7 @@ struct rt6_info {
unsigned intrt6i_nsiblings;
 
atomic_trt6i_ref;
+   refcount_t  rt6i_extref;
 
/* These are in a separate cache line. */
struct rt6key   rt6i_dst cacheline_aligned_in_smp;
@@ -187,6 +188,22 @@ static inline void ip6_rt_put(struct rt6_info *rt)
dst_release(>dst);
 }
 
+void rt6_free_pcpu(struct rt6_info *non_pcpu_rt);
+
+static inline void rt6_get(struct rt6_info *rt)
+{
+   refcount_inc(>rt6i_extref);
+}
+
+static inline void rt6_put(struct rt6_info *rt)
+{
+   if (refcount_dec_and_test(>rt6i_extref)) {
+   rt6_free_pcpu(rt);
+   dst_dev_put(>dst);
+   dst_release(>dst);
+   }
+}
+
 enum fib6_walk_state {
 #ifdef CONFIG_IPV6_SUBTREES
FWS_S,
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 719c1048..99ca785 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -154,7 +154,7 @@ static void node_free(struct fib6_node *fn)
kmem_cache_free(fib6_node_kmem, fn);
 }
 
-static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
+void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
 {
int cpu;
 
@@ -177,14 +177,12 @@ static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
free_percpu(non_pcpu_rt->rt6i_pcpu);
non_pcpu_rt->rt6i_pcpu = NULL;
 }
+EXPORT_SYMBOL_GPL(rt6_free_pcpu);
 
 static void rt6_release(struct rt6_info *rt)
 {
-   if (atomic_dec_and_test(>rt6i_ref)) {
-   rt6_free_pcpu(rt);
-   dst_dev_put(>dst);
-   dst_release(>dst);
-   }
+   if (atomic_dec_and_test(>rt6i_ref))
+   rt6_put(rt);
 }
 
 static void fib6_link_table(struct net *net, struct fib6_table *tb)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 924e02d..cabe0c6 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -345,6 +345,10 @@ static void rt6_info_init(struct rt6_info *rt)
memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
INIT_LIST_HEAD(>rt6i_siblings);
INIT_LIST_HEAD(>rt6i_uncached);
+   /* Make sure route can't be released as long as it's used by
+* the FIB.
+*/
+   refcount_set(>rt6i_extref, 1);
 }
 
 /* allocate dst with ip6_dst_ops */
-- 
2.9.3



[patch net-next 05/17] ipv6: fib_rules: Check if rule is a default rule

2017-07-19 Thread Jiri Pirko
From: Ido Schimmel 

As explained in commit 3c71006d15fd ("ipv4: fib_rules: Check if rule is
a default rule"), drivers supporting IPv6 FIB offload need to be able to
sanitize the rules they don't support and potentially flush their
tables.

Add an IPv6 helper to check if a FIB rule is a default rule.

Signed-off-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 include/net/ip6_fib.h |  5 +
 net/ipv6/fib6_rules.c | 20 
 2 files changed, 25 insertions(+)

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 1a88008..6000b0d 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -295,6 +295,7 @@ int ipv6_route_open(struct inode *inode, struct file *file);
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
 int fib6_rules_init(void);
 void fib6_rules_cleanup(void);
+bool fib6_rule_default(const struct fib_rule *rule);
 #else
 static inline int   fib6_rules_init(void)
 {
@@ -304,5 +305,9 @@ static inline void  fib6_rules_cleanup(void)
 {
return ;
 }
+static inline bool fib6_rule_default(const struct fib_rule *rule)
+{
+   return true;
+}
 #endif
 #endif
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index ec849d8..ef1fcee 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -29,6 +29,26 @@ struct fib6_rule {
u8  tclass;
 };
 
+static bool fib6_rule_matchall(const struct fib_rule *rule)
+{
+   struct fib6_rule *r = container_of(rule, struct fib6_rule, common);
+
+   if (r->dst.plen || r->src.plen || r->tclass)
+   return false;
+   return fib_rule_matchall(rule);
+}
+
+bool fib6_rule_default(const struct fib_rule *rule)
+{
+   if (!fib6_rule_matchall(rule) || rule->action != FR_ACT_TO_TBL ||
+   rule->l3mdev)
+   return false;
+   if (rule->table != RT6_TABLE_LOCAL && rule->table != RT6_TABLE_MAIN)
+   return false;
+   return true;
+}
+EXPORT_SYMBOL_GPL(fib6_rule_default);
+
 struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
   int flags, pol_lookup_t lookup)
 {
-- 
2.9.3



[patch net-next 02/17] mlxsw: spectrum_router: Ignore address families other than IPv4

2017-07-19 Thread Jiri Pirko
From: Ido Schimmel 

We're about to add IPv6 notifications in the FIB notification chain, but
the driver currently doesn't support these, so ignore them.

Signed-off-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 6069681..7965a53 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -43,6 +43,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -3034,7 +3035,7 @@ static int mlxsw_sp_router_fib_event(struct 
notifier_block *nb,
struct fib_notifier_info *info = ptr;
struct mlxsw_sp_router *router;
 
-   if (!net_eq(info->net, _net))
+   if (!net_eq(info->net, _net) || info->family != AF_INET)
return NOTIFY_DONE;
 
fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
-- 
2.9.3



[patch net-next 13/17] mlxsw: spectrum_router: Sanitize IPv6 FIB rules

2017-07-19 Thread Jiri Pirko
From: Ido Schimmel 

We only allow FIB offload in the presence of default rules or an l3mdev
rule. In a similar fashion to IPv4 FIB rules, sanitize IPv6 rules.

Signed-off-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 .../net/ethernet/mellanox/mlxsw/spectrum_router.c  | 25 ++
 1 file changed, 25 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index d3b20bc..cf06b7d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -48,6 +48,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -3029,6 +3030,23 @@ static void mlxsw_sp_router_fib4_event_work(struct 
work_struct *work)
 
 static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
 {
+   struct mlxsw_sp_fib_event_work *fib_work =
+   container_of(work, struct mlxsw_sp_fib_event_work, work);
+   struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
+   struct fib_rule *rule;
+
+   rtnl_lock();
+   switch (fib_work->event) {
+   case FIB_EVENT_RULE_ADD: /* fall through */
+   case FIB_EVENT_RULE_DEL:
+   rule = fib_work->fr_info.rule;
+   if (!fib6_rule_default(rule) && !rule->l3mdev)
+   mlxsw_sp_router_fib_abort(mlxsw_sp);
+   fib_rule_put(rule);
+   break;
+   }
+   rtnl_unlock();
+   kfree(fib_work);
 }
 
 static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work 
*fib_work,
@@ -3061,6 +3079,13 @@ static void mlxsw_sp_router_fib4_event(struct 
mlxsw_sp_fib_event_work *fib_work,
 static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work 
*fib_work,
   struct fib_notifier_info *info)
 {
+   switch (fib_work->event) {
+   case FIB_EVENT_RULE_ADD: /* fall through */
+   case FIB_EVENT_RULE_DEL:
+   memcpy(_work->fr_info, info, sizeof(fib_work->fr_info));
+   fib_rule_get(fib_work->fr_info.rule);
+   break;
+   }
 }
 
 /* Called with rcu_read_lock() */
-- 
2.9.3



[patch net-next 16/17] mlxsw: spectrum_router: Abort on source-specific routes

2017-07-19 Thread Jiri Pirko
From: Ido Schimmel 

Without resorting to ACLs, the device performs route lookup solely based
on the destination IP address.

In case source-specific routing is needed, an error is returned and the
abort mechanism is activated, thus allowing the kernel to take over
forwarding decisions.

Instead of aborting, we can trap specific destination prefixes where
source-specific routes are present, but this will result in a lot more
code that is unlikely to ever be used.

Signed-off-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index c56c700..33cb6b6 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -3429,6 +3429,9 @@ static int mlxsw_sp_router_fib6_add(struct mlxsw_sp 
*mlxsw_sp,
if (mlxsw_sp->router->aborted)
return 0;
 
+   if (rt->rt6i_src.plen)
+   return -EINVAL;
+
if (mlxsw_sp_fib6_rt_should_ignore(rt))
return 0;
 
-- 
2.9.3



[patch net-next 04/17] net: fib_rules: Implement notification logic in core

2017-07-19 Thread Jiri Pirko
From: Ido Schimmel 

Unlike the routing tables, the FIB rules share a common core, so instead
of replicating the same logic for each address family we can simply dump
the rules and send notifications from the core itself.

To protect the integrity of the dump, a rules-specific sequence counter
is added for each address family and incremented whenever a rule is
added or deleted (under RTNL).

Signed-off-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 include/net/fib_rules.h |  9 +++
 include/net/ip_fib.h| 24 +--
 net/core/fib_rules.c| 63 +
 net/ipv4/fib_notifier.c |  9 +--
 net/ipv4/fib_rules.c| 45 ---
 5 files changed, 101 insertions(+), 49 deletions(-)

diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index c487bfa..3d7f1ce 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -8,6 +8,7 @@
 #include 
 #include 
 #include 
+#include 
 
 struct fib_kuid_range {
kuid_t start;
@@ -57,6 +58,7 @@ struct fib_rules_ops {
int addr_size;
int unresolved_rules;
int nr_goto_rules;
+   unsigned intfib_rules_seq;
 
int (*action)(struct fib_rule *,
  struct flowi *, int,
@@ -89,6 +91,11 @@ struct fib_rules_ops {
struct rcu_head rcu;
 };
 
+struct fib_rule_notifier_info {
+   struct fib_notifier_info info; /* must be first */
+   struct fib_rule *rule;
+};
+
 #define FRA_GENERIC_POLICY \
[FRA_IIFNAME]   = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \
[FRA_OIFNAME]   = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \
@@ -143,6 +150,8 @@ int fib_rules_lookup(struct fib_rules_ops *, struct flowi 
*, int flags,
 int fib_default_rule_add(struct fib_rules_ops *, u32 pref, u32 table,
 u32 flags);
 bool fib_rule_matchall(const struct fib_rule *rule);
+int fib_rules_dump(struct net *net, struct notifier_block *nb, int family);
+unsigned int fib_rules_seq_read(struct net *net, int family);
 
 int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
   struct netlink_ext_ack *extack);
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 800a006..593d8e2 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -212,11 +212,6 @@ struct fib_entry_notifier_info {
u32 tb_id;
 };
 
-struct fib_rule_notifier_info {
-   struct fib_notifier_info info; /* must be first */
-   struct fib_rule *rule;
-};
-
 struct fib_nh_notifier_info {
struct fib_notifier_info info; /* must be first */
struct fib_nh *fib_nh;
@@ -232,13 +227,6 @@ int __net_init fib4_notifier_init(struct net *net);
 void __net_exit fib4_notifier_exit(struct net *net);
 
 void fib_notify(struct net *net, struct notifier_block *nb);
-#ifdef CONFIG_IP_MULTIPLE_TABLES
-void fib_rules_notify(struct net *net, struct notifier_block *nb);
-#else
-static inline void fib_rules_notify(struct net *net, struct notifier_block *nb)
-{
-}
-#endif
 
 struct fib_table {
struct hlist_node   tb_hlist;
@@ -311,6 +299,16 @@ static inline bool fib4_rule_default(const struct fib_rule 
*rule)
return true;
 }
 
+static inline int fib4_rules_dump(struct net *net, struct notifier_block *nb)
+{
+   return 0;
+}
+
+static inline unsigned int fib4_rules_seq_read(struct net *net)
+{
+   return 0;
+}
+
 #else /* CONFIG_IP_MULTIPLE_TABLES */
 int __net_init fib4_rules_init(struct net *net);
 void __net_exit fib4_rules_exit(struct net *net);
@@ -356,6 +354,8 @@ static inline int fib_lookup(struct net *net, struct flowi4 
*flp,
 }
 
 bool fib4_rule_default(const struct fib_rule *rule);
+int fib4_rules_dump(struct net *net, struct notifier_block *nb);
+unsigned int fib4_rules_seq_read(struct net *net);
 
 #endif /* CONFIG_IP_MULTIPLE_TABLES */
 
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index a0093e1..6678813 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -299,6 +299,67 @@ int fib_rules_lookup(struct fib_rules_ops *ops, struct 
flowi *fl,
 }
 EXPORT_SYMBOL_GPL(fib_rules_lookup);
 
+static int call_fib_rule_notifier(struct notifier_block *nb, struct net *net,
+ enum fib_event_type event_type,
+ struct fib_rule *rule, int family)
+{
+   struct fib_rule_notifier_info info = {
+   .info.family = family,
+   .rule = rule,
+   };
+
+   return call_fib_notifier(nb, net, event_type, );
+}
+
+static int call_fib_rule_notifiers(struct net *net,
+  enum fib_event_type event_type,
+  struct fib_rule *rule,
+  struct fib_rules_ops *ops)
+{
+   struct 

[patch net-next 14/17] mlxsw: spectrum_router: Add support for IPv6 routes addition / deletion

2017-07-19 Thread Jiri Pirko
From: Ido Schimmel 

Allow directly connected and remote unicast IPv6 routes to be programmed
to the device's tables.

As with IPv4, identical routes - sharing the same destination prefix -
are ordered in a FIB node according to their table ID and then the
metric. While the kernel doesn't share the same trie for the local and
main table, this does happen in the device, so ordering according to
table ID is needed.

Since individual nexthops can be added and deleted in IPv6, each FIB
entry stores a linked list of the rt6_info structs it represents. Upon
the addition or deletion of a nexthop, a new nexthop group is allocated
according to the new configuration and the old one is destroyed.
Identical groups aren't currently consolidated, but will be in a
follow-up patchset.

Signed-off-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 .../net/ethernet/mellanox/mlxsw/spectrum_router.c  | 685 -
 1 file changed, 682 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index cf06b7d..33e5b16 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -44,6 +44,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -407,6 +408,17 @@ struct mlxsw_sp_fib4_entry {
u8 type;
 };
 
+struct mlxsw_sp_fib6_entry {
+   struct mlxsw_sp_fib_entry common;
+   struct list_head rt6_list;
+   unsigned int nrt6;
+};
+
+struct mlxsw_sp_rt6 {
+   struct list_head list;
+   struct rt6_info *rt;
+};
+
 enum mlxsw_sp_l3proto {
MLXSW_SP_L3_PROTO_IPV4,
MLXSW_SP_L3_PROTO_IPV6,
@@ -2094,6 +2106,40 @@ mlxsw_sp_fib_entry_should_offload(const struct 
mlxsw_sp_fib_entry *fib_entry)
}
 }
 
+static void
+mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
+{
+   struct mlxsw_sp_fib6_entry *fib6_entry;
+   struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
+
+   fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
+ common);
+   list_for_each_entry(mlxsw_sp_rt6, _entry->rt6_list, list) {
+   struct rt6_info *rt = mlxsw_sp_rt6->rt;
+
+   write_lock_bh(>rt6i_table->tb6_lock);
+   rt->rt6i_flags |= RTF_OFFLOAD;
+   write_unlock_bh(>rt6i_table->tb6_lock);
+   }
+}
+
+static void
+mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
+{
+   struct mlxsw_sp_fib6_entry *fib6_entry;
+   struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
+
+   fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
+ common);
+   list_for_each_entry(mlxsw_sp_rt6, _entry->rt6_list, list) {
+   struct rt6_info *rt = mlxsw_sp_rt6->rt;
+
+   write_lock_bh(>rt6i_table->tb6_lock);
+   rt->rt6i_flags &= ~RTF_OFFLOAD;
+   write_unlock_bh(>rt6i_table->tb6_lock);
+   }
+}
+
 static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry 
*fib_entry)
 {
fib_entry->offloaded = true;
@@ -2103,7 +2149,8 @@ static void mlxsw_sp_fib_entry_offload_set(struct 
mlxsw_sp_fib_entry *fib_entry)
fib_info_offload_inc(fib_entry->nh_group->key.fi);
break;
case MLXSW_SP_L3_PROTO_IPV6:
-   WARN_ON_ONCE(1);
+   mlxsw_sp_fib6_entry_offload_set(fib_entry);
+   break;
}
 }
 
@@ -2115,7 +2162,8 @@ mlxsw_sp_fib_entry_offload_unset(struct 
mlxsw_sp_fib_entry *fib_entry)
fib_info_offload_dec(fib_entry->nh_group->key.fi);
break;
case MLXSW_SP_L3_PROTO_IPV6:
-   WARN_ON_ONCE(1);
+   mlxsw_sp_fib6_entry_offload_unset(fib_entry);
+   break;
}
 
fib_entry->offloaded = false;
@@ -2829,6 +2877,602 @@ static void mlxsw_sp_router_fib4_del(struct mlxsw_sp 
*mlxsw_sp,
mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
 }
 
+static bool mlxsw_sp_fib6_rt_should_ignore(const struct rt6_info *rt)
+{
+   /* Packets with link-local destination IP arriving to the router
+* are trapped to the CPU, so no need to program specific routes
+* for them.
+*/
+   if (ipv6_addr_type(>rt6i_dst.addr) & IPV6_ADDR_LINKLOCAL)
+   return true;
+
+   /* Multicast routes aren't supported, so ignore them. Neighbour
+* Discovery packets are specifically trapped.
+*/
+   if (ipv6_addr_type(>rt6i_dst.addr) & IPV6_ADDR_MULTICAST)
+   return true;
+
+   /* Cloned routes are irrelevant in the forwarding path. */
+   if (rt->rt6i_flags & RTF_CACHE)
+   return true;
+
+   return false;
+}
+
+static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct rt6_info *rt)
+{
+   struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
+
+   

[patch net-next 09/17] ipv6: fib: Dump tables during registration to FIB chain

2017-07-19 Thread Jiri Pirko
From: Ido Schimmel 

Dump all the FIB tables in each net namespace upon registration to the
FIB notification chain so that the callee will have a complete view of
the tables.

The integrity of the dump is ensured by a per-table sequence counter
that is incremented (under write lock) whenever a route is added or
deleted from the table.

All the sequence counters are read (under each table's read lock) and
summed, prior and after the dump. In case the counters differ, then the
dump is either restarted or the registration fails.

While it's possible for a table to be modified after its counter has
been read, this isn't really a problem. In case it happened before it
was read the second time, then the comparison at the end will fail. If
it happened afterwards, then we're guaranteed to be notified about the
change, as the notification block is registered prior to the second
read.

Signed-off-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 include/net/ip6_fib.h|  4 +++
 net/ipv6/fib6_notifier.c | 10 --
 net/ipv6/ip6_fib.c   | 92 
 3 files changed, 104 insertions(+), 2 deletions(-)

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index dbe5537..0b30521 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -235,6 +235,7 @@ struct fib6_table {
struct fib6_nodetb6_root;
struct inet_peer_base   tb6_peers;
unsigned intflags;
+   unsigned intfib_seq;
 #define RT6_TABLE_HAS_DFLT_ROUTER  BIT(0)
 };
 
@@ -308,6 +309,9 @@ int call_fib6_notifiers(struct net *net, enum 
fib_event_type event_type,
 int __net_init fib6_notifier_init(struct net *net);
 void __net_exit fib6_notifier_exit(struct net *net);
 
+unsigned int fib6_tables_seq_read(struct net *net);
+int fib6_tables_dump(struct net *net, struct notifier_block *nb);
+
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
 int fib6_rules_init(void);
 void fib6_rules_cleanup(void);
diff --git a/net/ipv6/fib6_notifier.c b/net/ipv6/fib6_notifier.c
index 298efc6..66a103e 100644
--- a/net/ipv6/fib6_notifier.c
+++ b/net/ipv6/fib6_notifier.c
@@ -23,12 +23,18 @@ int call_fib6_notifiers(struct net *net, enum 
fib_event_type event_type,
 
 static unsigned int fib6_seq_read(struct net *net)
 {
-   return fib6_rules_seq_read(net);
+   return fib6_tables_seq_read(net) + fib6_rules_seq_read(net);
 }
 
 static int fib6_dump(struct net *net, struct notifier_block *nb)
 {
-   return fib6_rules_dump(net, nb);
+   int err;
+
+   err = fib6_rules_dump(net, nb);
+   if (err)
+   return err;
+
+   return fib6_tables_dump(net, nb);
 }
 
 static const struct fib_notifier_ops fib6_notifier_ops_template = {
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 595a57c..719c1048 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -303,6 +303,37 @@ static void __net_init fib6_tables_init(struct net *net)
 
 #endif
 
+unsigned int fib6_tables_seq_read(struct net *net)
+{
+   unsigned int h, fib_seq = 0;
+
+   rcu_read_lock();
+   for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
+   struct hlist_head *head = >ipv6.fib_table_hash[h];
+   struct fib6_table *tb;
+
+   hlist_for_each_entry_rcu(tb, head, tb6_hlist) {
+   read_lock_bh(>tb6_lock);
+   fib_seq += tb->fib_seq;
+   read_unlock_bh(>tb6_lock);
+   }
+   }
+   rcu_read_unlock();
+
+   return fib_seq;
+}
+
+static int call_fib6_entry_notifier(struct notifier_block *nb, struct net *net,
+   enum fib_event_type event_type,
+   struct rt6_info *rt)
+{
+   struct fib6_entry_notifier_info info = {
+   .rt = rt,
+   };
+
+   return call_fib6_notifier(nb, net, event_type, );
+}
+
 static int call_fib6_entry_notifiers(struct net *net,
 enum fib_event_type event_type,
 struct rt6_info *rt)
@@ -311,9 +342,70 @@ static int call_fib6_entry_notifiers(struct net *net,
.rt = rt,
};
 
+   rt->rt6i_table->fib_seq++;
return call_fib6_notifiers(net, event_type, );
 }
 
+struct fib6_dump_arg {
+   struct net *net;
+   struct notifier_block *nb;
+};
+
+static void fib6_rt_dump(struct rt6_info *rt, struct fib6_dump_arg *arg)
+{
+   if (rt == arg->net->ipv6.ip6_null_entry)
+   return;
+   call_fib6_entry_notifier(arg->nb, arg->net, FIB_EVENT_ENTRY_ADD, rt);
+}
+
+static int fib6_node_dump(struct fib6_walker *w)
+{
+   struct rt6_info *rt;
+
+   for (rt = w->leaf; rt; rt = rt->dst.rt6_next)
+   fib6_rt_dump(rt, w->args);
+   w->leaf = NULL;
+   return 0;
+}
+
+static void fib6_table_dump(struct net *net, struct fib6_table *tb,
+   

[patch net-next 07/17] ipv6: fib: Add in-kernel notifications for route add / delete

2017-07-19 Thread Jiri Pirko
From: Ido Schimmel 

As with IPv4, allow listeners of the FIB notification chain to receive
notifications whenever a route is added, replaced or deleted. This is
done by placing calls to the FIB notification chain in the two lowest
level functions that end up performing these operations - namely,
fib6_add_rt2node() and fib6_del_route().

Unlike IPv4, APPEND notifications aren't sent as the kernel doesn't
distinguish between "append" (NLM_F_CREATE|NLM_F_APPEND) and "prepend"
(NLM_F_CREATE). If NLM_F_EXCL isn't set, duplicate routes are always
added after the existing duplicate routes.

Signed-off-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 include/net/ip6_fib.h |  5 +
 net/ipv6/ip6_fib.c| 17 +
 2 files changed, 22 insertions(+)

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index be8ddf3..e2b292b 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -258,6 +258,11 @@ typedef struct rt6_info *(*pol_lookup_t)(struct net *,
 struct fib6_table *,
 struct flowi6 *, int);
 
+struct fib6_entry_notifier_info {
+   struct fib_notifier_info info; /* must be first */
+   struct rt6_info *rt;
+};
+
 /*
  * exported functions
  */
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index f93976e..595a57c 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -33,6 +33,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -302,6 +303,17 @@ static void __net_init fib6_tables_init(struct net *net)
 
 #endif
 
+static int call_fib6_entry_notifiers(struct net *net,
+enum fib_event_type event_type,
+struct rt6_info *rt)
+{
+   struct fib6_entry_notifier_info info = {
+   .rt = rt,
+   };
+
+   return call_fib6_notifiers(net, event_type, );
+}
+
 static int fib6_dump_node(struct fib6_walker *w)
 {
int res;
@@ -879,6 +891,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct 
rt6_info *rt,
*ins = rt;
rt->rt6i_node = fn;
atomic_inc(>rt6i_ref);
+   call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_ADD,
+ rt);
if (!info->skip_notify)
inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
info->nl_net->ipv6.rt6_stats->fib_rt_entries++;
@@ -906,6 +920,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct 
rt6_info *rt,
rt->rt6i_node = fn;
rt->dst.rt6_next = iter->dst.rt6_next;
atomic_inc(>rt6i_ref);
+   call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_REPLACE,
+ rt);
if (!info->skip_notify)
inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE);
if (!(fn->fn_flags & RTN_RTINFO)) {
@@ -1459,6 +1475,7 @@ static void fib6_del_route(struct fib6_node *fn, struct 
rt6_info **rtp,
 
fib6_purge_rt(rt, fn, net);
 
+   call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt);
if (!info->skip_notify)
inet6_rt_notify(RTM_DELROUTE, rt, info, 0);
rt6_release(rt);
-- 
2.9.3



[patch net-next 10/17] ipv6: fib: Add offload indication to routes

2017-07-19 Thread Jiri Pirko
From: Ido Schimmel 

Allow user space applications to see which routes are offloaded and
which aren't by setting the RTNH_F_OFFLOAD flag when dumping them.

To be consistent with IPv4, a multipath route is marked as offloaded if
one of its nexthops is offloaded. Individual nexthops aren't marked with
the 'offload' flag.

Signed-off-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 include/uapi/linux/ipv6_route.h |  1 +
 net/ipv6/route.c| 19 ---
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/include/uapi/linux/ipv6_route.h b/include/uapi/linux/ipv6_route.h
index d496c02..33e2a57 100644
--- a/include/uapi/linux/ipv6_route.h
+++ b/include/uapi/linux/ipv6_route.h
@@ -35,6 +35,7 @@
 #define RTF_PREF(pref) ((pref) << 27)
 #define RTF_PREF_MASK  0x1800
 
+#define RTF_OFFLOAD0x2000  /* offloaded route  */
 #define RTF_PCPU   0x4000  /* read-only: can not be set by user */
 #define RTF_LOCAL  0x8000
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 4d30c96..924e02d 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1820,6 +1820,11 @@ static struct rt6_info *ip6_route_info_create(struct 
fib6_config *cfg,
goto out;
}
 
+   if (cfg->fc_flags & RTF_OFFLOAD) {
+   NL_SET_ERR_MSG(extack, "Userspace can not set RTF_OFFLOAD");
+   goto out;
+   }
+
if (cfg->fc_dst_len > 128) {
NL_SET_ERR_MSG(extack, "Invalid prefix length");
goto out;
@@ -3327,6 +3332,9 @@ static int rt6_nexthop_info(struct sk_buff *skb, struct 
rt6_info *rt,
goto nla_put_failure;
}
 
+   if (rt->rt6i_flags & RTF_OFFLOAD)
+   *flags |= RTNH_F_OFFLOAD;
+
/* not needed for multipath encoding b/c it has a rtnexthop struct */
if (!skip_oif && rt->dst.dev &&
nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
@@ -3343,7 +3351,8 @@ static int rt6_nexthop_info(struct sk_buff *skb, struct 
rt6_info *rt,
 }
 
 /* add multipath next hop */
-static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
+static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt,
+  unsigned int *rtm_flags)
 {
struct rtnexthop *rtnh;
unsigned int flags = 0;
@@ -3359,6 +3368,10 @@ static int rt6_add_nexthop(struct sk_buff *skb, struct 
rt6_info *rt)
goto nla_put_failure;
 
rtnh->rtnh_flags = flags;
+   if (rtnh->rtnh_flags & RTNH_F_OFFLOAD) {
+   rtnh->rtnh_flags &= ~RTNH_F_OFFLOAD;
+   *rtm_flags |= RTNH_F_OFFLOAD;
+   }
 
/* length of rtnetlink header + attributes */
rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
@@ -3499,12 +3512,12 @@ static int rt6_fill_node(struct net *net,
if (!mp)
goto nla_put_failure;
 
-   if (rt6_add_nexthop(skb, rt) < 0)
+   if (rt6_add_nexthop(skb, rt, >rtm_flags) < 0)
goto nla_put_failure;
 
list_for_each_entry_safe(sibling, next_sibling,
 >rt6i_siblings, rt6i_siblings) {
-   if (rt6_add_nexthop(skb, sibling) < 0)
+   if (rt6_add_nexthop(skb, sibling, >rtm_flags) < 0)
goto nla_put_failure;
}
 
-- 
2.9.3



[patch net-next 15/17] mlxsw: spectrum_router: Add support for route replace

2017-07-19 Thread Jiri Pirko
From: Ido Schimmel 

In case we got a replace event, then the replaced route must exist. If
the route isn't capable of multipath, then replace first matching
non-multipath capable route.

If the route is capable of multipath and matching multipath capable
route is found, then replace it. Otherwise, replace first matching
non-multipath capable route.

The new route is inserted before the replaced one. In case the replaced
route is currently offloaded, then it's overwritten in the device's table
by the new route and later deleted, thus not impacting routed traffic.

Signed-off-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 .../net/ethernet/mellanox/mlxsw/spectrum_router.c  | 63 +-
 1 file changed, 49 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 33e5b16..c56c700 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -2938,11 +2938,11 @@ mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry 
*fib6_entry)
 
 static struct mlxsw_sp_fib6_entry *
 mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
-const struct rt6_info *nrt)
+const struct rt6_info *nrt, bool replace)
 {
struct mlxsw_sp_fib6_entry *fib6_entry;
 
-   if (!mlxsw_sp_fib6_rt_can_mp(nrt))
+   if (!mlxsw_sp_fib6_rt_can_mp(nrt) || replace)
return NULL;
 
list_for_each_entry(fib6_entry, _node->entry_list, common.list) {
@@ -3272,9 +3272,9 @@ static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp 
*mlxsw_sp,
 
 static struct mlxsw_sp_fib6_entry *
 mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
- const struct rt6_info *nrt)
+ const struct rt6_info *nrt, bool replace)
 {
-   struct mlxsw_sp_fib6_entry *fib6_entry;
+   struct mlxsw_sp_fib6_entry *fib6_entry, *fallback = NULL;
 
list_for_each_entry(fib6_entry, _node->entry_list, common.list) {
struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
@@ -3283,21 +3283,32 @@ mlxsw_sp_fib6_node_entry_find(const struct 
mlxsw_sp_fib_node *fib_node,
continue;
if (rt->rt6i_table->tb6_id != nrt->rt6i_table->tb6_id)
break;
+   if (replace && rt->rt6i_metric == nrt->rt6i_metric) {
+   if (mlxsw_sp_fib6_rt_can_mp(rt) ==
+   mlxsw_sp_fib6_rt_can_mp(nrt))
+   return fib6_entry;
+   if (mlxsw_sp_fib6_rt_can_mp(nrt))
+   fallback = fallback ?: fib6_entry;
+   }
if (rt->rt6i_metric > nrt->rt6i_metric)
-   return fib6_entry;
+   return fallback ?: fib6_entry;
}
 
-   return NULL;
+   return fallback;
 }
 
 static int
-mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry)
+mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry,
+  bool replace)
 {
struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node;
struct rt6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry);
struct mlxsw_sp_fib6_entry *fib6_entry;
 
-   fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt);
+   fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, replace);
+
+   if (replace && WARN_ON(!fib6_entry))
+   return -EINVAL;
 
if (fib6_entry) {
list_add_tail(_entry->common.list,
@@ -3331,11 +3342,12 @@ mlxsw_sp_fib6_node_list_remove(struct 
mlxsw_sp_fib6_entry *fib6_entry)
 }
 
 static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp,
-struct mlxsw_sp_fib6_entry *fib6_entry)
+struct mlxsw_sp_fib6_entry *fib6_entry,
+bool replace)
 {
int err;
 
-   err = mlxsw_sp_fib6_node_list_insert(fib6_entry);
+   err = mlxsw_sp_fib6_node_list_insert(fib6_entry, replace);
if (err)
return err;
 
@@ -3390,8 +3402,25 @@ mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
return NULL;
 }
 
+static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp,
+   struct mlxsw_sp_fib6_entry *fib6_entry,
+   bool replace)
+{
+   struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
+   struct mlxsw_sp_fib6_entry *replaced;
+
+   if (!replace)
+   return;
+
+   replaced = list_next_entry(fib6_entry, common.list);
+
+   mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, 

[patch net-next 17/17] mlxsw: spectrum_router: Don't ignore IPv6 notifications

2017-07-19 Thread Jiri Pirko
From: Ido Schimmel 

We now have all the necessary IPv6 infrastructure in place, so stop
ignoring these notifications.

Signed-off-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 33cb6b6..dc9a032 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -3813,7 +3813,7 @@ static int mlxsw_sp_router_fib_event(struct 
notifier_block *nb,
struct fib_notifier_info *info = ptr;
struct mlxsw_sp_router *router;
 
-   if (!net_eq(info->net, _net) || info->family != AF_INET)
+   if (!net_eq(info->net, _net))
return NOTIFY_DONE;
 
fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
-- 
2.9.3



[patch net-next 06/17] ipv6: fib: Add FIB notifiers callbacks

2017-07-19 Thread Jiri Pirko
From: Ido Schimmel 

We're about to add IPv6 FIB offload support, so implement the necessary
callbacks in IPv6 code, which will later allow us to add routes and
rules notifications.

Signed-off-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 include/net/ip6_fib.h| 11 ++
 include/net/netns/ipv6.h |  1 +
 net/ipv6/Makefile|  2 +-
 net/ipv6/fib6_notifier.c | 55 
 net/ipv6/ip6_fib.c   |  7 ++
 5 files changed, 75 insertions(+), 1 deletion(-)
 create mode 100644 net/ipv6/fib6_notifier.c

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 6000b0d..be8ddf3 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -16,10 +16,12 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
 #include 
+#include 
 
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
 #define FIB6_TABLE_HASHSZ 256
@@ -292,6 +294,15 @@ int fib6_init(void);
 
 int ipv6_route_open(struct inode *inode, struct file *file);
 
+int call_fib6_notifier(struct notifier_block *nb, struct net *net,
+  enum fib_event_type event_type,
+  struct fib_notifier_info *info);
+int call_fib6_notifiers(struct net *net, enum fib_event_type event_type,
+   struct fib_notifier_info *info);
+
+int __net_init fib6_notifier_init(struct net *net);
+void __net_exit fib6_notifier_exit(struct net *net);
+
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
 int fib6_rules_init(void);
 void fib6_rules_cleanup(void);
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index de7745e..abdf3b4 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -86,6 +86,7 @@ struct netns_ipv6 {
atomic_tdev_addr_genid;
atomic_tfib6_sernum;
struct seg6_pernet_data *seg6_data;
+   struct fib_notifier_ops *notifier_ops;
 };
 
 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 217e9ff..f8b24c2 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -9,7 +9,7 @@ ipv6-objs :=af_inet6.o anycast.o ip6_output.o ip6_input.o 
addrconf.o \
route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \
raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \
exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o \
-   udp_offload.o seg6.o
+   udp_offload.o seg6.o fib6_notifier.o
 
 ipv6-offload :=ip6_offload.o tcpv6_offload.o exthdrs_offload.o
 
diff --git a/net/ipv6/fib6_notifier.c b/net/ipv6/fib6_notifier.c
new file mode 100644
index 000..c2bb1ab
--- /dev/null
+++ b/net/ipv6/fib6_notifier.c
@@ -0,0 +1,55 @@
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+int call_fib6_notifier(struct notifier_block *nb, struct net *net,
+  enum fib_event_type event_type,
+  struct fib_notifier_info *info)
+{
+   info->family = AF_INET6;
+   return call_fib_notifier(nb, net, event_type, info);
+}
+
+int call_fib6_notifiers(struct net *net, enum fib_event_type event_type,
+   struct fib_notifier_info *info)
+{
+   info->family = AF_INET6;
+   return call_fib_notifiers(net, event_type, info);
+}
+
+static unsigned int fib6_seq_read(struct net *net)
+{
+   return 0;
+}
+
+static int fib6_dump(struct net *net, struct notifier_block *nb)
+{
+   return 0;
+}
+
+static const struct fib_notifier_ops fib6_notifier_ops_template = {
+   .family = AF_INET6,
+   .fib_seq_read   = fib6_seq_read,
+   .fib_dump   = fib6_dump,
+};
+
+int __net_init fib6_notifier_init(struct net *net)
+{
+   struct fib_notifier_ops *ops;
+
+   ops = fib_notifier_ops_register(_notifier_ops_template, net);
+   if (IS_ERR(ops))
+   return PTR_ERR(ops);
+   net->ipv6.notifier_ops = ops;
+
+   return 0;
+}
+
+void __net_exit fib6_notifier_exit(struct net *net)
+{
+   fib_notifier_ops_unregister(net->ipv6.notifier_ops);
+}
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index ebb299c..f93976e 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1839,6 +1839,11 @@ static void fib6_gc_timer_cb(unsigned long arg)
 static int __net_init fib6_net_init(struct net *net)
 {
size_t size = sizeof(struct hlist_head) * FIB6_TABLE_HASHSZ;
+   int err;
+
+   err = fib6_notifier_init(net);
+   if (err)
+   return err;
 
spin_lock_init(>ipv6.fib6_gc_lock);
rwlock_init(>ipv6.fib6_walker_lock);
@@ -1891,6 +1896,7 @@ static int __net_init fib6_net_init(struct net *net)
 out_rt6_stats:
kfree(net->ipv6.rt6_stats);
 out_timer:
+   fib6_notifier_exit(net);
return -ENOMEM;
 }
 
@@ -1907,6 +1913,7 @@ static void fib6_net_exit(struct net *net)
kfree(net->ipv6.fib6_main_tbl);

[patch net-next 12/17] mlxsw: spectrum_router: Demultiplex FIB event based on family

2017-07-19 Thread Jiri Pirko
From: Ido Schimmel 

The FIB notification block currently only handles IPv4 events, but we
want to start handling IPv6 events soon, so lay the groundwork now.

Do that by preparing the work item and process it according to the
notified address family.

Signed-off-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 .../net/ethernet/mellanox/mlxsw/spectrum_router.c  | 65 +++---
 1 file changed, 44 insertions(+), 21 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 7965a53..d3b20bc 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -2982,7 +2982,7 @@ struct mlxsw_sp_fib_event_work {
unsigned long event;
 };
 
-static void mlxsw_sp_router_fib_event_work(struct work_struct *work)
+static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
 {
struct mlxsw_sp_fib_event_work *fib_work =
container_of(work, struct mlxsw_sp_fib_event_work, work);
@@ -3027,6 +3027,42 @@ static void mlxsw_sp_router_fib_event_work(struct 
work_struct *work)
kfree(fib_work);
 }
 
+static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
+{
+}
+
+static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work 
*fib_work,
+  struct fib_notifier_info *info)
+{
+   switch (fib_work->event) {
+   case FIB_EVENT_ENTRY_REPLACE: /* fall through */
+   case FIB_EVENT_ENTRY_APPEND: /* fall through */
+   case FIB_EVENT_ENTRY_ADD: /* fall through */
+   case FIB_EVENT_ENTRY_DEL:
+   memcpy(_work->fen_info, info, sizeof(fib_work->fen_info));
+   /* Take referece on fib_info to prevent it from being
+* freed while work is queued. Release it afterwards.
+*/
+   fib_info_hold(fib_work->fen_info.fi);
+   break;
+   case FIB_EVENT_RULE_ADD: /* fall through */
+   case FIB_EVENT_RULE_DEL:
+   memcpy(_work->fr_info, info, sizeof(fib_work->fr_info));
+   fib_rule_get(fib_work->fr_info.rule);
+   break;
+   case FIB_EVENT_NH_ADD: /* fall through */
+   case FIB_EVENT_NH_DEL:
+   memcpy(_work->fnh_info, info, sizeof(fib_work->fnh_info));
+   fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
+   break;
+   }
+}
+
+static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work 
*fib_work,
+  struct fib_notifier_info *info)
+{
+}
+
 /* Called with rcu_read_lock() */
 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
 unsigned long event, void *ptr)
@@ -3042,31 +3078,18 @@ static int mlxsw_sp_router_fib_event(struct 
notifier_block *nb,
if (WARN_ON(!fib_work))
return NOTIFY_BAD;
 
-   INIT_WORK(_work->work, mlxsw_sp_router_fib_event_work);
router = container_of(nb, struct mlxsw_sp_router, fib_nb);
fib_work->mlxsw_sp = router->mlxsw_sp;
fib_work->event = event;
 
-   switch (event) {
-   case FIB_EVENT_ENTRY_REPLACE: /* fall through */
-   case FIB_EVENT_ENTRY_APPEND: /* fall through */
-   case FIB_EVENT_ENTRY_ADD: /* fall through */
-   case FIB_EVENT_ENTRY_DEL:
-   memcpy(_work->fen_info, ptr, sizeof(fib_work->fen_info));
-   /* Take referece on fib_info to prevent it from being
-* freed while work is queued. Release it afterwards.
-*/
-   fib_info_hold(fib_work->fen_info.fi);
+   switch (info->family) {
+   case AF_INET:
+   INIT_WORK(_work->work, mlxsw_sp_router_fib4_event_work);
+   mlxsw_sp_router_fib4_event(fib_work, info);
break;
-   case FIB_EVENT_RULE_ADD: /* fall through */
-   case FIB_EVENT_RULE_DEL:
-   memcpy(_work->fr_info, ptr, sizeof(fib_work->fr_info));
-   fib_rule_get(fib_work->fr_info.rule);
-   break;
-   case FIB_EVENT_NH_ADD: /* fall through */
-   case FIB_EVENT_NH_DEL:
-   memcpy(_work->fnh_info, ptr, sizeof(fib_work->fnh_info));
-   fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
+   case AF_INET6:
+   INIT_WORK(_work->work, mlxsw_sp_router_fib6_event_work);
+   mlxsw_sp_router_fib6_event(fib_work, info);
break;
}
 
-- 
2.9.3



[patch net-next 08/17] ipv6: fib_rules: Dump rules during registration to FIB chain

2017-07-19 Thread Jiri Pirko
From: Ido Schimmel 

Allow users of the FIB notification chain to receive a complete view of
the IPv6 FIB rules upon registration to the chain.

The integrity of the dump is ensured by a per-family sequence counter
that is incremented (under RTNL) whenever a rule is added or deleted.

All the sequence counters are read (under RTNL) and summed, prior and
after the dump. In case the counters differ, then the dump is either
restarted or the registration fails.

Signed-off-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 include/net/ip6_fib.h| 10 ++
 net/ipv6/fib6_notifier.c |  4 ++--
 net/ipv6/fib6_rules.c| 11 +++
 3 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index e2b292b..dbe5537 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -312,6 +312,8 @@ void __net_exit fib6_notifier_exit(struct net *net);
 int fib6_rules_init(void);
 void fib6_rules_cleanup(void);
 bool fib6_rule_default(const struct fib_rule *rule);
+int fib6_rules_dump(struct net *net, struct notifier_block *nb);
+unsigned int fib6_rules_seq_read(struct net *net);
 #else
 static inline int   fib6_rules_init(void)
 {
@@ -325,5 +327,13 @@ static inline bool fib6_rule_default(const struct fib_rule 
*rule)
 {
return true;
 }
+static inline int fib6_rules_dump(struct net *net, struct notifier_block *nb)
+{
+   return 0;
+}
+static inline unsigned int fib6_rules_seq_read(struct net *net)
+{
+   return 0;
+}
 #endif
 #endif
diff --git a/net/ipv6/fib6_notifier.c b/net/ipv6/fib6_notifier.c
index c2bb1ab..298efc6 100644
--- a/net/ipv6/fib6_notifier.c
+++ b/net/ipv6/fib6_notifier.c
@@ -23,12 +23,12 @@ int call_fib6_notifiers(struct net *net, enum 
fib_event_type event_type,
 
 static unsigned int fib6_seq_read(struct net *net)
 {
-   return 0;
+   return fib6_rules_seq_read(net);
 }
 
 static int fib6_dump(struct net *net, struct notifier_block *nb)
 {
-   return 0;
+   return fib6_rules_dump(net, nb);
 }
 
 static const struct fib_notifier_ops fib6_notifier_ops_template = {
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index ef1fcee..2f29e4e 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -14,6 +14,7 @@
  */
 
 #include 
+#include 
 #include 
 
 #include 
@@ -49,6 +50,16 @@ bool fib6_rule_default(const struct fib_rule *rule)
 }
 EXPORT_SYMBOL_GPL(fib6_rule_default);
 
+int fib6_rules_dump(struct net *net, struct notifier_block *nb)
+{
+   return fib_rules_dump(net, nb, AF_INET6);
+}
+
+unsigned int fib6_rules_seq_read(struct net *net)
+{
+   return fib_rules_seq_read(net, AF_INET6);
+}
+
 struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
   int flags, pol_lookup_t lookup)
 {
-- 
2.9.3



[patch net-next 03/17] rocker: Ignore address families other than IPv4

2017-07-19 Thread Jiri Pirko
From: Ido Schimmel 

As in previous patch, ignore IPv6 notifications since the driver doesn't
support these.

Signed-off-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 drivers/net/ethernet/rocker/rocker_main.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/rocker/rocker_main.c 
b/drivers/net/ethernet/rocker/rocker_main.c
index ef38c1a..fc8f8bd 100644
--- a/drivers/net/ethernet/rocker/rocker_main.c
+++ b/drivers/net/ethernet/rocker/rocker_main.c
@@ -2192,6 +2192,10 @@ static int rocker_router_fib_event(struct notifier_block 
*nb,
 {
struct rocker *rocker = container_of(nb, struct rocker, fib_nb);
struct rocker_fib_event_work *fib_work;
+   struct fib_notifier_info *info = ptr;
+
+   if (info->family != AF_INET)
+   return NOTIFY_DONE;
 
fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
if (WARN_ON(!fib_work))
-- 
2.9.3



[patch net-next 00/17] mlxsw: Support for IPv6 UC router

2017-07-19 Thread Jiri Pirko
From: Jiri Pirko 

This set adds support for IPv6 unicast routes offload. The first four
patches make the FIB notification chain generic so that it could be used
by address families other than IPv4. This is done by having each address
family register its callbacks with the common code, so that its FIB tables
and rules could be dumped upon registration to the chain, while ensuring
the integrity of the dump. The exact mechanics are explained in detail in
the first patch.

The next seven patches build upon this work and add the necessary
callbacks in IPv6 code. This allows listeners of the chain to receive
notifications about IPv6 routes addition, deletion and replacement as
well as FIB rules notifications.

Unlike user space notifications for IPv6 multipath routes, the FIB
notification chain notifies these on a per-nexthop basis. This allows
us to keep the common code lean and is also unnecessary, as notifications
are serialized by each table's lock whereas applications maintaining
netlink caches may suffer from concurrent dumps and deletions / additions
of routes.

The last six patches enable the mlxsw driver to offload IPv6 unicast
routes to the Spectrum ASIC. Without resorting to ACLs, lookup is done
solely based on the destination IP, so the abort mechanism is invoked
upon the addition of source-specific routes.

Follow-up patch sets will increase the scale of gatewayed routes by
consolidating identical nexthop groups to one adjacency entry in the
device's adjacency table (as in IPv4), as well as add support for
NH_{ADD,DEL} events which enable support for the
'ignore_routes_with_linkdown' sysctl.

Ido Schimmel (17):
  net: core: Make the FIB notification chain generic
  mlxsw: spectrum_router: Ignore address families other than IPv4
  rocker: Ignore address families other than IPv4
  net: fib_rules: Implement notification logic in core
  ipv6: fib_rules: Check if rule is a default rule
  ipv6: fib: Add FIB notifiers callbacks
  ipv6: fib: Add in-kernel notifications for route add / delete
  ipv6: fib_rules: Dump rules during registration to FIB chain
  ipv6: fib: Dump tables during registration to FIB chain
  ipv6: fib: Add offload indication to routes
  ipv6: fib: Allow non-FIB users to take reference on route
  mlxsw: spectrum_router: Demultiplex FIB event based on family
  mlxsw: spectrum_router: Sanitize IPv6 FIB rules
  mlxsw: spectrum_router: Add support for IPv6 routes addition /
deletion
  mlxsw: spectrum_router: Add support for route replace
  mlxsw: spectrum_router: Abort on source-specific routes
  mlxsw: spectrum_router: Don't ignore IPv6 notifications

 .../net/ethernet/mellanox/mlxsw/spectrum_router.c  | 815 -
 drivers/net/ethernet/rocker/rocker_main.c  |   5 +
 include/net/fib_notifier.h |  44 ++
 include/net/fib_rules.h|   9 +
 include/net/ip6_fib.h  |  52 ++
 include/net/ip_fib.h   |  54 +-
 include/net/net_namespace.h|   1 +
 include/net/netns/ipv4.h   |   1 +
 include/net/netns/ipv6.h   |   1 +
 include/uapi/linux/ipv6_route.h|   1 +
 net/core/Makefile  |   3 +-
 net/core/fib_notifier.c| 164 +
 net/core/fib_rules.c   |  63 ++
 net/ipv4/fib_frontend.c|  17 +-
 net/ipv4/fib_notifier.c|  99 ++-
 net/ipv4/fib_rules.c   |  44 +-
 net/ipv4/fib_semantics.c   |   9 +-
 net/ipv4/fib_trie.c|   5 +-
 net/ipv6/Makefile  |   2 +-
 net/ipv6/fib6_notifier.c   |  61 ++
 net/ipv6/fib6_rules.c  |  31 +
 net/ipv6/ip6_fib.c | 126 +++-
 net/ipv6/route.c   |  23 +-
 23 files changed, 1459 insertions(+), 171 deletions(-)
 create mode 100644 include/net/fib_notifier.h
 create mode 100644 net/core/fib_notifier.c
 create mode 100644 net/ipv6/fib6_notifier.c

-- 
2.9.3



[patch net-next 01/17] net: core: Make the FIB notification chain generic

2017-07-19 Thread Jiri Pirko
From: Ido Schimmel 

The FIB notification chain is currently soley used by IPv4 code.
However, we're going to introduce IPv6 FIB offload support, which
requires these notification as well.

As explained in commit c3852ef7f2f8 ("ipv4: fib: Replay events when
registering FIB notifier"), upon registration to the chain, the callee
receives a full dump of the FIB tables and rules by traversing all the
net namespaces. The integrity of the dump is ensured by a per-namespace
sequence counter that is incremented whenever a change to the tables or
rules occurs.

In order to allow more address families to use the chain, each family is
expected to register its fib_notifier_ops in its pernet init. These
operations allow the common code to read the family's sequence counter
as well as dump its tables and rules in the given net namespace.

Additionally, a 'family' parameter is added to sent notifications, so
that listeners could distinguish between the different families.

Implement the common code that allows listeners to register to the chain
and for address families to register their fib_notifier_ops. Subsequent
patches will implement these operations in IPv6.

In the future, ipmr and ip6mr will be extended to provide these
notifications as well.

Signed-off-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 .../net/ethernet/mellanox/mlxsw/spectrum_router.c  |   1 +
 drivers/net/ethernet/rocker/rocker_main.c  |   1 +
 include/net/fib_notifier.h |  44 ++
 include/net/ip_fib.h   |  30 +---
 include/net/net_namespace.h|   1 +
 include/net/netns/ipv4.h   |   1 +
 net/core/Makefile  |   3 +-
 net/core/fib_notifier.c| 164 +
 net/ipv4/fib_frontend.c|  17 ++-
 net/ipv4/fib_notifier.c|  94 +---
 net/ipv4/fib_rules.c   |   5 +-
 net/ipv4/fib_semantics.c   |   9 +-
 net/ipv4/fib_trie.c|   5 +-
 13 files changed, 282 insertions(+), 93 deletions(-)
 create mode 100644 include/net/fib_notifier.h
 create mode 100644 net/core/fib_notifier.c

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index e6d629f..6069681 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -52,6 +52,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "spectrum.h"
 #include "core.h"
diff --git a/drivers/net/ethernet/rocker/rocker_main.c 
b/drivers/net/ethernet/rocker/rocker_main.c
index b1e5c07..ef38c1a 100644
--- a/drivers/net/ethernet/rocker/rocker_main.c
+++ b/drivers/net/ethernet/rocker/rocker_main.c
@@ -34,6 +34,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
diff --git a/include/net/fib_notifier.h b/include/net/fib_notifier.h
new file mode 100644
index 000..2414752
--- /dev/null
+++ b/include/net/fib_notifier.h
@@ -0,0 +1,44 @@
+#ifndef __NET_FIB_NOTIFIER_H
+#define __NET_FIB_NOTIFIER_H
+
+#include 
+#include 
+#include 
+
+struct fib_notifier_info {
+   struct net *net;
+   int family;
+};
+
+enum fib_event_type {
+   FIB_EVENT_ENTRY_REPLACE,
+   FIB_EVENT_ENTRY_APPEND,
+   FIB_EVENT_ENTRY_ADD,
+   FIB_EVENT_ENTRY_DEL,
+   FIB_EVENT_RULE_ADD,
+   FIB_EVENT_RULE_DEL,
+   FIB_EVENT_NH_ADD,
+   FIB_EVENT_NH_DEL,
+};
+
+struct fib_notifier_ops {
+   int family;
+   struct list_head list;
+   unsigned int (*fib_seq_read)(struct net *net);
+   int (*fib_dump)(struct net *net, struct notifier_block *nb);
+   struct rcu_head rcu;
+};
+
+int call_fib_notifier(struct notifier_block *nb, struct net *net,
+ enum fib_event_type event_type,
+ struct fib_notifier_info *info);
+int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
+  struct fib_notifier_info *info);
+int register_fib_notifier(struct notifier_block *nb,
+ void (*cb)(struct notifier_block *nb));
+int unregister_fib_notifier(struct notifier_block *nb);
+struct fib_notifier_ops *
+fib_notifier_ops_register(const struct fib_notifier_ops *tmpl, struct net 
*net);
+void fib_notifier_ops_unregister(struct fib_notifier_ops *ops);
+
+#endif
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 41d580c..800a006 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -19,6 +19,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -201,10 +202,6 @@ static inline void fib_info_offload_dec(struct fib_info 
*fi)
 #define FIB_RES_PREFSRC(net, res)  ((res).fi->fib_prefsrc ? : \
 

Re: [PATCH v2] iwlwifi: mvm: Fix a memory leak in an error handling path in 'iwl_mvm_sar_get_wgds_table()'

2017-07-19 Thread Luca Coelho
On Fri, 2017-07-14 at 12:06 +0200, Christophe JAILLET wrote:
> We should free 'wgds.pointer' here as done a few lines above in another
> error handling path.
> It was allocated within 'acpi_evaluate_object()'.
> 
> Signed-off-by: Christophe JAILLET 
> ---
> v2: rebase after 7fe90e0e3d60 ("iwlwifi: mvm: refactor geo init")

Thanks, Christophe!

I've pushed this to our internal tree and it will eventually reach the
mainline via our normal process.


> Moreovern a comment in '/drivers/acpi/acpica/utalloc.c' states that:
> /* [...] Note: The caller should use acpi_os_free to free this
>  * buffer created via ACPI_ALLOCATE_BUFFER.
>  */
> 
> So, at the end of this function:
>   out_free:
>   kfree(wgds.pointer);
> should maybe be:
>   out_free:
>   acpi_os_free(wgds.pointer);
> 
> If correct, several places should be fixed accordingly.

Thanks for pointing out! I'm about to do some refactoring in this code
and I'll make sure I check the proper way to free the acpi buffer when
doing so.

--
Cheers,
Luca.


[PATCH] net: dsa: mv88e6xxx: Enable CMODE config support for 6390X

2017-07-19 Thread Martin Hundebøll
Commit f39908d3b1c45 ('net: dsa: mv88e6xxx: Set the CMODE for mv88e6390
ports 9 & 10') added support for setting the CMODE for the 6390X family,
but only enabled it for 9290 and 6390 - and left out 6390X.

Fix support for setting the CMODE on 6390X also by assigning
mv88e6390x_port_set_cmode() to the .port_set_cmode function pointer in
mv88e6390x_ops too.

Signed-off-by: Martin Hundebøll 
---
 drivers/net/dsa/mv88e6xxx/chip.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 947ea352a57a..7fa19d4a8e13 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -3233,6 +3233,7 @@ static const struct mv88e6xxx_ops mv88e6390x_ops = {
.port_set_jumbo_size = mv88e6165_port_set_jumbo_size,
.port_egress_rate_limiting = mv88e6097_port_egress_rate_limiting,
.port_pause_limit = mv88e6390_port_pause_limit,
+   .port_set_cmode = mv88e6390x_port_set_cmode,
.port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit,
.port_disable_pri_override = mv88e6xxx_port_disable_pri_override,
.stats_snapshot = mv88e6390_g1_stats_snapshot,
-- 
2.13.3



<    1   2