[PATCH net-next v4 2/2] switchdev: fix: pass correct obj size when deferring obj add

2015-10-28 Thread sfeldma
From: Scott Feldman 

Fixes: 4d429c5dd ("switchdev: introduce possibility to defer obj_add/del")
Signed-off-by: Scott Feldman 
Acked-by: Jiri Pirko 
---
v3->v4: rebase sync
v2->v3: add Jiri's Acked-by
v1->v2: use correct "Fixes" tag, use common func to calc obj size for add/del

 net/switchdev/switchdev.c |   19 +--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 8d3e6c3..2433e75 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -337,6 +337,21 @@ int switchdev_port_attr_set(struct net_device *dev,
 }
 EXPORT_SYMBOL_GPL(switchdev_port_attr_set);
 
+static size_t switchdev_obj_size(const struct switchdev_obj *obj)
+{
+   switch (obj->id) {
+   case SWITCHDEV_OBJ_ID_PORT_VLAN:
+   return sizeof(struct switchdev_obj_port_vlan);
+   case SWITCHDEV_OBJ_ID_IPV4_FIB:
+   return sizeof(struct switchdev_obj_ipv4_fib);
+   case SWITCHDEV_OBJ_ID_PORT_FDB:
+   return sizeof(struct switchdev_obj_port_fdb);
+   default:
+   BUG();
+   }
+   return 0;
+}
+
 static int __switchdev_port_obj_add(struct net_device *dev,
const struct switchdev_obj *obj,
struct switchdev_trans *trans)
@@ -422,7 +437,7 @@ static void switchdev_port_obj_add_deferred(struct 
net_device *dev,
 static int switchdev_port_obj_add_defer(struct net_device *dev,
const struct switchdev_obj *obj)
 {
-   return switchdev_deferred_enqueue(dev, obj, sizeof(*obj),
+   return switchdev_deferred_enqueue(dev, obj, switchdev_obj_size(obj),
  switchdev_port_obj_add_deferred);
 }
 
@@ -490,7 +505,7 @@ static void switchdev_port_obj_del_deferred(struct 
net_device *dev,
 static int switchdev_port_obj_del_defer(struct net_device *dev,
const struct switchdev_obj *obj)
 {
-   return switchdev_deferred_enqueue(dev, obj, sizeof(*obj),
+   return switchdev_deferred_enqueue(dev, obj, switchdev_obj_size(obj),
  switchdev_port_obj_del_deferred);
 }
 
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next v4 1/2] switchdev: fix: erasing too much of vlan obj when handling multiple vlan specs

2015-10-28 Thread sfeldma
From: Scott Feldman 

When adding vlans with multiple IFLA_BRIDGE_VLAN_INFO attrs set in AFSPEC,
we would wipe the vlan obj struct after the first IFLA_BRIDGE_VLAN_INFO.
Fix this by only clearing what's necessary on each IFLA_BRIDGE_VLAN_INFO
iteration.

Fixes: 9e8f4a54 ("switchdev: push object ID back to object structure")
Signed-off-by: Scott Feldman 
Acked-by: Jiri Pirko 
---
v3->v4: rebase sync
v2->v3: no change
v1->v2: add Jiri's Acked-by

 net/switchdev/switchdev.c |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 1eb76956..8d3e6c3 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -866,7 +866,7 @@ static int switchdev_port_br_afspec(struct net_device *dev,
err = f(dev, &vlan.obj);
if (err)
return err;
-   memset(&vlan, 0, sizeof(vlan));
+   vlan.vid_begin = 0;
} else {
if (vlan.vid_begin)
return -EINVAL;
@@ -875,7 +875,7 @@ static int switchdev_port_br_afspec(struct net_device *dev,
err = f(dev, &vlan.obj);
if (err)
return err;
-   memset(&vlan, 0, sizeof(vlan));
+   vlan.vid_begin = 0;
}
}
 
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next v3 2/3] switchdev: fix: pass correct obj size when deferring obj add

2015-10-22 Thread sfeldma
From: Scott Feldman 

Fixes: 4d429c5dd ("switchdev: introduce possibility to defer obj_add/del")
Signed-off-by: Scott Feldman 
Acked-by: Jiri Pirko 
---
v2->v3: add Jiri's Acked-by
v1->v2: use correct "Fixes" tag, use common func to calc obj size for add/del

 net/switchdev/switchdev.c |   19 +--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 56d8479..bff8e2b 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -336,6 +336,21 @@ int switchdev_port_attr_set(struct net_device *dev,
 }
 EXPORT_SYMBOL_GPL(switchdev_port_attr_set);
 
+static size_t switchdev_obj_size(const struct switchdev_obj *obj)
+{
+   switch (obj->id) {
+   case SWITCHDEV_OBJ_ID_PORT_VLAN:
+   return sizeof(struct switchdev_obj_port_vlan);
+   case SWITCHDEV_OBJ_ID_IPV4_FIB:
+   return sizeof(struct switchdev_obj_ipv4_fib);
+   case SWITCHDEV_OBJ_ID_PORT_FDB:
+   return sizeof(struct switchdev_obj_port_fdb);
+   default:
+   BUG();
+   }
+   return 0;
+}
+
 static int __switchdev_port_obj_add(struct net_device *dev,
const struct switchdev_obj *obj,
struct switchdev_trans *trans)
@@ -421,7 +436,7 @@ static void switchdev_port_obj_add_deferred(struct 
net_device *dev,
 static int switchdev_port_obj_add_defer(struct net_device *dev,
const struct switchdev_obj *obj)
 {
-   return switchdev_deferred_enqueue(dev, obj, sizeof(*obj),
+   return switchdev_deferred_enqueue(dev, obj, switchdev_obj_size(obj),
  switchdev_port_obj_add_deferred);
 }
 
@@ -489,7 +504,7 @@ static void switchdev_port_obj_del_deferred(struct 
net_device *dev,
 static int switchdev_port_obj_del_defer(struct net_device *dev,
const struct switchdev_obj *obj)
 {
-   return switchdev_deferred_enqueue(dev, obj, sizeof(*obj),
+   return switchdev_deferred_enqueue(dev, obj, switchdev_obj_size(obj),
  switchdev_port_obj_del_deferred);
 }
 
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next v3 1/3] switchdev: fix: erasing too much of vlan obj when handling multiple vlan specs

2015-10-22 Thread sfeldma
From: Scott Feldman 

When adding vlans with multiple IFLA_BRIDGE_VLAN_INFO attrs set in AFSPEC,
we would wipe the vlan obj struct after the first IFLA_BRIDGE_VLAN_INFO.
Fix this by only clearing what's necessary on each IFLA_BRIDGE_VLAN_INFO
iteration.

Fixes: 9e8f4a54 ("switchdev: push object ID back to object structure")
Signed-off-by: Scott Feldman 
Acked-by: Jiri Pirko 
---
v2->v3: no change
v1->v2: add Jiri's Acked-by

 net/switchdev/switchdev.c |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 73e3895..56d8479 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -863,7 +863,7 @@ static int switchdev_port_br_afspec(struct net_device *dev,
err = f(dev, &vlan.obj);
if (err)
return err;
-   memset(&vlan, 0, sizeof(vlan));
+   vlan.vid_begin = 0;
} else {
if (vlan.vid_begin)
return -EINVAL;
@@ -872,7 +872,7 @@ static int switchdev_port_br_afspec(struct net_device *dev,
err = f(dev, &vlan.obj);
if (err)
return err;
-   memset(&vlan, 0, sizeof(vlan));
+   vlan.vid_begin = 0;
}
}
 
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next v3 3/3] switchdev: split switchdev_attr into individual structs

2015-10-22 Thread sfeldma
From: Scott Feldman 

This was already done for switchdev_objs.   Changing switchdev_attrs to new
style makes switchdev API consistent for both attrs and objs.

No functional changes here.

Signed-off-by: Scott Feldman 
Acked-by: Jiri Pirko 
---
v2->v3: remove something that slipped in from the future.  I got a piece of
another patchset mixed in with this one.  Oops.

v1->v2: add Jiri's Acked-by

 .../ethernet/mellanox/mlxsw/spectrum_switchdev.c   |   24 --
 drivers/net/ethernet/mellanox/mlxsw/switchx2.c |7 +-
 drivers/net/ethernet/rocker/rocker.c   |   23 --
 include/net/switchdev.h|   42 +--
 net/bridge/br_stp.c|   24 +++---
 net/core/net-sysfs.c   |   14 ++--
 net/core/rtnetlink.c   |   14 ++--
 net/dsa/slave.c|   10 ++-
 net/switchdev/switchdev.c  |   77 +---
 9 files changed, 163 insertions(+), 72 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index c39b7a1..efa1aa8 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -56,15 +56,19 @@ static int mlxsw_sp_port_attr_get(struct net_device *dev,
 {
struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+   struct switchdev_attr_port_parent_id *parent_id;
+   struct switchdev_attr_port_bridge_flags *brport_flags;
 
switch (attr->id) {
case SWITCHDEV_ATTR_ID_PORT_PARENT_ID:
-   attr->u.ppid.id_len = sizeof(mlxsw_sp->base_mac);
-   memcpy(&attr->u.ppid.id, &mlxsw_sp->base_mac,
-  attr->u.ppid.id_len);
+   parent_id = SWITCHDEV_ATTR_PORT_PARENT_ID(attr);
+   parent_id->ppid.id_len = sizeof(mlxsw_sp->base_mac);
+   memcpy(&parent_id->ppid.id, &mlxsw_sp->base_mac,
+  parent_id->ppid.id_len);
break;
case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS:
-   attr->u.brport_flags =
+   brport_flags = SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS(attr);
+   brport_flags->brport_flags =
(mlxsw_sp_port->learning ? BR_LEARNING : 0) |
(mlxsw_sp_port->learning_sync ? BR_LEARNING_SYNC : 0);
break;
@@ -166,20 +170,26 @@ static int mlxsw_sp_port_attr_set(struct net_device *dev,
  struct switchdev_trans *trans)
 {
struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+   struct switchdev_attr_port_stp_state *stp_state;
+   struct switchdev_attr_port_bridge_flags *brport_flags;
+   struct switchdev_attr_bridge_ageing_time *ageing_time;
int err = 0;
 
switch (attr->id) {
case SWITCHDEV_ATTR_ID_PORT_STP_STATE:
+   stp_state = SWITCHDEV_ATTR_PORT_STP_STATE(attr);
err = mlxsw_sp_port_attr_stp_state_set(mlxsw_sp_port, trans,
-  attr->u.stp_state);
+  stp_state->state);
break;
case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS:
+   brport_flags = SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS(attr);
err = mlxsw_sp_port_attr_br_flags_set(mlxsw_sp_port, trans,
- attr->u.brport_flags);
+ 
brport_flags->brport_flags);
break;
case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME:
+   ageing_time = SWITCHDEV_ATTR_BRIDGE_AGEING_TIME(attr);
err = mlxsw_sp_port_attr_br_ageing_set(mlxsw_sp_port, trans,
-  attr->u.ageing_time);
+  
ageing_time->ageing_time);
break;
default:
err = -EOPNOTSUPP;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c 
b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
index 2fd2279..edabc82 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
@@ -864,11 +864,14 @@ static int mlxsw_sx_port_attr_get(struct net_device *dev,
 {
struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev);
struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx;
+   struct switchdev_attr_port_parent_id *parent_id;
 
switch (attr->id) {
case SWITCHDEV_ATTR_ID_PORT_PARENT_ID:
-   attr->u.ppid.id_len = sizeof(mlxsw_sx->hw_id);
-   memcpy(&attr->u.ppid.id, &mlxsw_sx->hw_id, attr->u.ppid.id_len);
+   parent_id = SWITCHDEV_ATTR_PORT_PARENT_ID(attr);
+  

[PATCH net-next v2 2/3] switchdev: fix: pass correct obj size when deferring obj add/del

2015-10-21 Thread sfeldma
From: Scott Feldman 

Fixes: 4d429c5dd ("switchdev: introduce possibility to defer obj_add/del")
Signed-off-by: Scott Feldman 
---
v1->v2: use correct "Fixes" tag, use common func to calc obj size for add/del

 net/switchdev/switchdev.c |   19 +--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 56d8479..bff8e2b 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -336,6 +336,21 @@ int switchdev_port_attr_set(struct net_device *dev,
 }
 EXPORT_SYMBOL_GPL(switchdev_port_attr_set);
 
+static size_t switchdev_obj_size(const struct switchdev_obj *obj)
+{
+   switch (obj->id) {
+   case SWITCHDEV_OBJ_ID_PORT_VLAN:
+   return sizeof(struct switchdev_obj_port_vlan);
+   case SWITCHDEV_OBJ_ID_IPV4_FIB:
+   return sizeof(struct switchdev_obj_ipv4_fib);
+   case SWITCHDEV_OBJ_ID_PORT_FDB:
+   return sizeof(struct switchdev_obj_port_fdb);
+   default:
+   BUG();
+   }
+   return 0;
+}
+
 static int __switchdev_port_obj_add(struct net_device *dev,
const struct switchdev_obj *obj,
struct switchdev_trans *trans)
@@ -421,7 +436,7 @@ static void switchdev_port_obj_add_deferred(struct 
net_device *dev,
 static int switchdev_port_obj_add_defer(struct net_device *dev,
const struct switchdev_obj *obj)
 {
-   return switchdev_deferred_enqueue(dev, obj, sizeof(*obj),
+   return switchdev_deferred_enqueue(dev, obj, switchdev_obj_size(obj),
  switchdev_port_obj_add_deferred);
 }
 
@@ -489,7 +504,7 @@ static void switchdev_port_obj_del_deferred(struct 
net_device *dev,
 static int switchdev_port_obj_del_defer(struct net_device *dev,
const struct switchdev_obj *obj)
 {
-   return switchdev_deferred_enqueue(dev, obj, sizeof(*obj),
+   return switchdev_deferred_enqueue(dev, obj, switchdev_obj_size(obj),
  switchdev_port_obj_del_deferred);
 }
 
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next v2 1/3] switchdev: fix: erasing too much of vlan obj when handling multiple vlan specs

2015-10-21 Thread sfeldma
From: Scott Feldman 

When adding vlans with multiple IFLA_BRIDGE_VLAN_INFO attrs set in AFSPEC,
we would wipe the vlan obj struct after the first IFLA_BRIDGE_VLAN_INFO.
Fix this by only clearing what's necessary on each IFLA_BRIDGE_VLAN_INFO
iteration.

Fixes: 9e8f4a54 ("switchdev: push object ID back to object structure")
Signed-off-by: Scott Feldman 
Acked-by: Jiri Pirko 
---
v1->v2: add Jiri's Acked-by

 net/switchdev/switchdev.c |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 73e3895..56d8479 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -863,7 +863,7 @@ static int switchdev_port_br_afspec(struct net_device *dev,
err = f(dev, &vlan.obj);
if (err)
return err;
-   memset(&vlan, 0, sizeof(vlan));
+   vlan.vid_begin = 0;
} else {
if (vlan.vid_begin)
return -EINVAL;
@@ -872,7 +872,7 @@ static int switchdev_port_br_afspec(struct net_device *dev,
err = f(dev, &vlan.obj);
if (err)
return err;
-   memset(&vlan, 0, sizeof(vlan));
+   vlan.vid_begin = 0;
}
}
 
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next v2 3/3] switchdev: split switchdev_attr into individual structs

2015-10-21 Thread sfeldma
From: Scott Feldman 

This was already done for switchdev_objs.   Changing switchdev_attrs to new
style makes switchdev API consistent for both attrs and objs.

No functional changes here.

Signed-off-by: Scott Feldman 
Acked-by: Jiri Pirko 
---
v1->v2: add Jiri's Acked-by

 .../ethernet/mellanox/mlxsw/spectrum_switchdev.c   |   24 --
 drivers/net/ethernet/mellanox/mlxsw/switchx2.c |7 +-
 drivers/net/ethernet/rocker/rocker.c   |   23 --
 include/net/switchdev.h|   51 +++--
 net/bridge/br_stp.c|   24 +++---
 net/core/net-sysfs.c   |   14 ++--
 net/core/rtnetlink.c   |   14 ++--
 net/dsa/slave.c|   10 ++-
 net/switchdev/switchdev.c  |   77 +---
 9 files changed, 172 insertions(+), 72 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index c39b7a1..efa1aa8 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -56,15 +56,19 @@ static int mlxsw_sp_port_attr_get(struct net_device *dev,
 {
struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+   struct switchdev_attr_port_parent_id *parent_id;
+   struct switchdev_attr_port_bridge_flags *brport_flags;
 
switch (attr->id) {
case SWITCHDEV_ATTR_ID_PORT_PARENT_ID:
-   attr->u.ppid.id_len = sizeof(mlxsw_sp->base_mac);
-   memcpy(&attr->u.ppid.id, &mlxsw_sp->base_mac,
-  attr->u.ppid.id_len);
+   parent_id = SWITCHDEV_ATTR_PORT_PARENT_ID(attr);
+   parent_id->ppid.id_len = sizeof(mlxsw_sp->base_mac);
+   memcpy(&parent_id->ppid.id, &mlxsw_sp->base_mac,
+  parent_id->ppid.id_len);
break;
case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS:
-   attr->u.brport_flags =
+   brport_flags = SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS(attr);
+   brport_flags->brport_flags =
(mlxsw_sp_port->learning ? BR_LEARNING : 0) |
(mlxsw_sp_port->learning_sync ? BR_LEARNING_SYNC : 0);
break;
@@ -166,20 +170,26 @@ static int mlxsw_sp_port_attr_set(struct net_device *dev,
  struct switchdev_trans *trans)
 {
struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+   struct switchdev_attr_port_stp_state *stp_state;
+   struct switchdev_attr_port_bridge_flags *brport_flags;
+   struct switchdev_attr_bridge_ageing_time *ageing_time;
int err = 0;
 
switch (attr->id) {
case SWITCHDEV_ATTR_ID_PORT_STP_STATE:
+   stp_state = SWITCHDEV_ATTR_PORT_STP_STATE(attr);
err = mlxsw_sp_port_attr_stp_state_set(mlxsw_sp_port, trans,
-  attr->u.stp_state);
+  stp_state->state);
break;
case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS:
+   brport_flags = SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS(attr);
err = mlxsw_sp_port_attr_br_flags_set(mlxsw_sp_port, trans,
- attr->u.brport_flags);
+ 
brport_flags->brport_flags);
break;
case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME:
+   ageing_time = SWITCHDEV_ATTR_BRIDGE_AGEING_TIME(attr);
err = mlxsw_sp_port_attr_br_ageing_set(mlxsw_sp_port, trans,
-  attr->u.ageing_time);
+  
ageing_time->ageing_time);
break;
default:
err = -EOPNOTSUPP;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c 
b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
index 2fd2279..edabc82 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
@@ -864,11 +864,14 @@ static int mlxsw_sx_port_attr_get(struct net_device *dev,
 {
struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev);
struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx;
+   struct switchdev_attr_port_parent_id *parent_id;
 
switch (attr->id) {
case SWITCHDEV_ATTR_ID_PORT_PARENT_ID:
-   attr->u.ppid.id_len = sizeof(mlxsw_sx->hw_id);
-   memcpy(&attr->u.ppid.id, &mlxsw_sx->hw_id, attr->u.ppid.id_len);
+   parent_id = SWITCHDEV_ATTR_PORT_PARENT_ID(attr);
+   parent_id->ppid.id_len = sizeof(mlxsw_sx->hw_id);
+   memcpy(&parent_id->ppid.id, &mlxsw_sx->hw_id,
+ 

[PATCH net-next 2/3] switchdev: fix: pass correct obj size when deferring obj add

2015-10-20 Thread sfeldma
From: Scott Feldman 

Fixes: 0bc05d585d ("switchdev: allow caller to explicitly request attr_set as 
deferred")
Signed-off-by: Scott Feldman 
---
 net/switchdev/switchdev.c |   19 ++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 56d8479..be8ced1 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -489,7 +489,24 @@ static void switchdev_port_obj_del_deferred(struct 
net_device *dev,
 static int switchdev_port_obj_del_defer(struct net_device *dev,
const struct switchdev_obj *obj)
 {
-   return switchdev_deferred_enqueue(dev, obj, sizeof(*obj),
+   size_t size = 0;
+
+   switch (obj->id) {
+   case SWITCHDEV_OBJ_ID_PORT_VLAN:
+   size = sizeof(struct switchdev_obj_port_vlan);
+   break;
+   case SWITCHDEV_OBJ_ID_IPV4_FIB:
+   size = sizeof(struct switchdev_obj_ipv4_fib);
+   break;
+   case SWITCHDEV_OBJ_ID_PORT_FDB:
+   size = sizeof(struct switchdev_obj_port_fdb);
+   break;
+   default:
+   WARN_ON(!size);
+   return -EINVAL;
+   }
+
+   return switchdev_deferred_enqueue(dev, obj, size,
  switchdev_port_obj_del_deferred);
 }
 
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next 3/3] switchdev: split switchdev_attr into individual structs

2015-10-20 Thread sfeldma
From: Scott Feldman 

This was already done for switchdev_objs.   Changing switchdev_attrs to new
style makes switchdev API consistent for both attrs and objs.

No functional changes here.

Signed-off-by: Scott Feldman 
---
 .../ethernet/mellanox/mlxsw/spectrum_switchdev.c   |   24 --
 drivers/net/ethernet/mellanox/mlxsw/switchx2.c |7 +-
 drivers/net/ethernet/rocker/rocker.c   |   23 --
 include/net/switchdev.h|   51 +++--
 net/bridge/br_stp.c|   24 +++---
 net/core/net-sysfs.c   |   14 ++--
 net/core/rtnetlink.c   |   14 ++--
 net/dsa/slave.c|   10 ++-
 net/switchdev/switchdev.c  |   77 +---
 9 files changed, 172 insertions(+), 72 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index c39b7a1..efa1aa8 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -56,15 +56,19 @@ static int mlxsw_sp_port_attr_get(struct net_device *dev,
 {
struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+   struct switchdev_attr_port_parent_id *parent_id;
+   struct switchdev_attr_port_bridge_flags *brport_flags;
 
switch (attr->id) {
case SWITCHDEV_ATTR_ID_PORT_PARENT_ID:
-   attr->u.ppid.id_len = sizeof(mlxsw_sp->base_mac);
-   memcpy(&attr->u.ppid.id, &mlxsw_sp->base_mac,
-  attr->u.ppid.id_len);
+   parent_id = SWITCHDEV_ATTR_PORT_PARENT_ID(attr);
+   parent_id->ppid.id_len = sizeof(mlxsw_sp->base_mac);
+   memcpy(&parent_id->ppid.id, &mlxsw_sp->base_mac,
+  parent_id->ppid.id_len);
break;
case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS:
-   attr->u.brport_flags =
+   brport_flags = SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS(attr);
+   brport_flags->brport_flags =
(mlxsw_sp_port->learning ? BR_LEARNING : 0) |
(mlxsw_sp_port->learning_sync ? BR_LEARNING_SYNC : 0);
break;
@@ -166,20 +170,26 @@ static int mlxsw_sp_port_attr_set(struct net_device *dev,
  struct switchdev_trans *trans)
 {
struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+   struct switchdev_attr_port_stp_state *stp_state;
+   struct switchdev_attr_port_bridge_flags *brport_flags;
+   struct switchdev_attr_bridge_ageing_time *ageing_time;
int err = 0;
 
switch (attr->id) {
case SWITCHDEV_ATTR_ID_PORT_STP_STATE:
+   stp_state = SWITCHDEV_ATTR_PORT_STP_STATE(attr);
err = mlxsw_sp_port_attr_stp_state_set(mlxsw_sp_port, trans,
-  attr->u.stp_state);
+  stp_state->state);
break;
case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS:
+   brport_flags = SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS(attr);
err = mlxsw_sp_port_attr_br_flags_set(mlxsw_sp_port, trans,
- attr->u.brport_flags);
+ 
brport_flags->brport_flags);
break;
case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME:
+   ageing_time = SWITCHDEV_ATTR_BRIDGE_AGEING_TIME(attr);
err = mlxsw_sp_port_attr_br_ageing_set(mlxsw_sp_port, trans,
-  attr->u.ageing_time);
+  
ageing_time->ageing_time);
break;
default:
err = -EOPNOTSUPP;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c 
b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
index 2fd2279..edabc82 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
@@ -864,11 +864,14 @@ static int mlxsw_sx_port_attr_get(struct net_device *dev,
 {
struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev);
struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx;
+   struct switchdev_attr_port_parent_id *parent_id;
 
switch (attr->id) {
case SWITCHDEV_ATTR_ID_PORT_PARENT_ID:
-   attr->u.ppid.id_len = sizeof(mlxsw_sx->hw_id);
-   memcpy(&attr->u.ppid.id, &mlxsw_sx->hw_id, attr->u.ppid.id_len);
+   parent_id = SWITCHDEV_ATTR_PORT_PARENT_ID(attr);
+   parent_id->ppid.id_len = sizeof(mlxsw_sx->hw_id);
+   memcpy(&parent_id->ppid.id, &mlxsw_sx->hw_id,
+  parent_id->ppid.id_len);
 

[PATCH net-next 1/3] switchdev: fix: erasing too much of vlan obj when handling multiple vlan specs

2015-10-20 Thread sfeldma
From: Scott Feldman 

When adding vlans with multiple IFLA_BRIDGE_VLAN_INFO attrs set in AFSPEC,
we would wipe the vlan obj struct after the first IFLA_BRIDGE_VLAN_INFO.
Fix this by only clearing what's necessary on each IFLA_BRIDGE_VLAN_INFO
iteration.

Fixes: 9e8f4a54 ("switchdev: push object ID back to object structure")
Signed-off-by: Scott Feldman 
---
 net/switchdev/switchdev.c |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 73e3895..56d8479 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -863,7 +863,7 @@ static int switchdev_port_br_afspec(struct net_device *dev,
err = f(dev, &vlan.obj);
if (err)
return err;
-   memset(&vlan, 0, sizeof(vlan));
+   vlan.vid_begin = 0;
} else {
if (vlan.vid_begin)
return -EINVAL;
@@ -872,7 +872,7 @@ static int switchdev_port_br_afspec(struct net_device *dev,
err = f(dev, &vlan.obj);
if (err)
return err;
-   memset(&vlan, 0, sizeof(vlan));
+   vlan.vid_begin = 0;
}
}
 
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next v3 0/4] switchdev: push bridge ageing_time attribute down

2015-10-08 Thread sfeldma
From: Scott Feldman 

Push bridge-level attributes down to switchdev drivers.  This patchset
adds the infrastructure and then pushes, as an example, ageing_time attribute
down from bridge to switchdev (rocker) driver.  Add some range-checking
for ageing_time.

# ip link set dev br0 type bridge ageing_time 1000

# ip link set dev br0 type bridge ageing_time 999
RTNETLINK answers: Numerical result out of range

Up until now, switchdev attrs where port-level attrs, so the netdev used in
switchdev_attr_set() would be a switch port or bond of switch ports.  With
bridge-level attrs, the netdev passed to switchdev_attr_set() is the bridge
netdev.  The same recusive algo is used to visit the leaves of the stacked
drivers to set the attr, it's just in this case we start one layer higher in
the stack.  One note is not all ports in the bridge may support setting a
bridge-level attribute, so rather than failing the entire set, we'll skip over
those ports returning -EOPNOTSUPP.

v2->v3: Per Jiri review: push only ageing_time attr down at this time, and
don't pass raw bridge IFLA_BR_* values; rather use new switchdev attr ID for
ageing_time.

v1->v2: rebase w/ net-next


Scott Feldman (4):
  switchdev: add bridge ageing_time attribute
  switchdev: skip over ports returning -EOPNOTSUPP when recursing ports
  bridge: push bridge setting ageing_time down to switchdev
  rocker: handle setting bridge ageing_time

 drivers/net/ethernet/rocker/rocker.c |   16 
 include/net/switchdev.h  |3 +++
 net/bridge/br_ioctl.c|3 +--
 net/bridge/br_netlink.c  |6 +++---
 net/bridge/br_private.h  |1 +
 net/bridge/br_stp.c  |   23 +++
 net/bridge/br_sysfs_br.c |3 +--
 net/switchdev/switchdev.c|9 -
 8 files changed, 56 insertions(+), 8 deletions(-)

-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next v3 3/4] bridge: push bridge setting ageing_time down to switchdev

2015-10-08 Thread sfeldma
From: Scott Feldman 

Use SWITCHDEV_F_SKIP_EOPNOTSUPP to skip over ports in bridge that don't
support setting ageing_time (or setting bridge attrs in general).

If push fails, don't update ageing_time in bridge and return err to user.

If push succeeds, update ageing_time in bridge and run gc_timer now to
recalabrate when to run gc_timer next, based on new ageing_time.

Signed-off-by: Scott Feldman 
Signed-off-by: Jiri Pirko 
---
 net/bridge/br_ioctl.c|3 +--
 net/bridge/br_netlink.c  |6 +++---
 net/bridge/br_private.h  |1 +
 net/bridge/br_stp.c  |   23 +++
 net/bridge/br_sysfs_br.c |3 +--
 5 files changed, 29 insertions(+), 7 deletions(-)

diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index 8d423bc..263b4de 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -200,8 +200,7 @@ static int old_dev_ioctl(struct net_device *dev, struct 
ifreq *rq, int cmd)
if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN))
return -EPERM;
 
-   br->ageing_time = clock_t_to_jiffies(args[1]);
-   return 0;
+   return br_set_ageing_time(br, args[1]);
 
case BRCTL_GET_PORT_INFO:
{
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index d78b442..544ab96 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -870,9 +870,9 @@ static int br_changelink(struct net_device *brdev, struct 
nlattr *tb[],
}
 
if (data[IFLA_BR_AGEING_TIME]) {
-   u32 ageing_time = nla_get_u32(data[IFLA_BR_AGEING_TIME]);
-
-   br->ageing_time = clock_t_to_jiffies(ageing_time);
+   err = br_set_ageing_time(br, 
nla_get_u32(data[IFLA_BR_AGEING_TIME]));
+   if (err)
+   return err;
}
 
if (data[IFLA_BR_STP_STATE]) {
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 09d3ecb..ba0c67b 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -882,6 +882,7 @@ void __br_set_forward_delay(struct net_bridge *br, unsigned 
long t);
 int br_set_forward_delay(struct net_bridge *br, unsigned long x);
 int br_set_hello_time(struct net_bridge *br, unsigned long x);
 int br_set_max_age(struct net_bridge *br, unsigned long x);
+int br_set_ageing_time(struct net_bridge *br, u32 ageing_time);
 
 
 /* br_stp_if.c */
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 3a982c0..db6d243de 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -566,6 +566,29 @@ int br_set_max_age(struct net_bridge *br, unsigned long 
val)
 
 }
 
+int br_set_ageing_time(struct net_bridge *br, u32 ageing_time)
+{
+   struct switchdev_attr attr = {
+   .id = SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME,
+   .flags = SWITCHDEV_F_SKIP_EOPNOTSUPP,
+   .u.ageing_time = ageing_time,
+   };
+   unsigned long t = clock_t_to_jiffies(ageing_time);
+   int err;
+
+   if (t < BR_MIN_AGEING_TIME || t > BR_MAX_AGEING_TIME)
+   return -ERANGE;
+
+   err = switchdev_port_attr_set(br->dev, &attr);
+   if (err)
+   return err;
+
+   br->ageing_time = t;
+   mod_timer(&br->gc_timer, jiffies);
+
+   return 0;
+}
+
 void __br_set_forward_delay(struct net_bridge *br, unsigned long t)
 {
br->bridge_forward_delay = t;
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 4c97fc5..04ef192 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -102,8 +102,7 @@ static ssize_t ageing_time_show(struct device *d,
 
 static int set_ageing_time(struct net_bridge *br, unsigned long val)
 {
-   br->ageing_time = clock_t_to_jiffies(val);
-   return 0;
+   return br_set_ageing_time(br, val);
 }
 
 static ssize_t ageing_time_store(struct device *d,
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next v3 4/4] rocker: handle setting bridge ageing_time

2015-10-08 Thread sfeldma
From: Scott Feldman 

The FDB cleanup timer will get rescheduled to re-evaluate FDB entries
based on new ageing_time.

Signed-off-by: Scott Feldman 
---
 drivers/net/ethernet/rocker/rocker.c |   16 
 1 file changed, 16 insertions(+)

diff --git a/drivers/net/ethernet/rocker/rocker.c 
b/drivers/net/ethernet/rocker/rocker.c
index cf91ffc..eafa907 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -4361,6 +4361,18 @@ static int rocker_port_brport_flags_set(struct 
rocker_port *rocker_port,
return err;
 }
 
+static int rocker_port_bridge_ageing_time(struct rocker_port *rocker_port,
+ struct switchdev_trans *trans,
+ u32 ageing_time)
+{
+   if (!switchdev_trans_ph_prepare(trans)) {
+   rocker_port->ageing_time = clock_t_to_jiffies(ageing_time);
+   mod_timer(&rocker_port->rocker->fdb_cleanup_timer, jiffies);
+   }
+
+   return 0;
+}
+
 static int rocker_port_attr_set(struct net_device *dev,
struct switchdev_attr *attr,
struct switchdev_trans *trans)
@@ -4378,6 +4390,10 @@ static int rocker_port_attr_set(struct net_device *dev,
err = rocker_port_brport_flags_set(rocker_port, trans,
   attr->u.brport_flags);
break;
+   case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME:
+   err = rocker_port_bridge_ageing_time(rocker_port, trans,
+attr->u.ageing_time);
+   break;
default:
err = -EOPNOTSUPP;
break;
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next v3 2/4] switchdev: skip over ports returning -EOPNOTSUPP when recursing ports

2015-10-08 Thread sfeldma
From: Scott Feldman 

This allows us to recurse over all the ports, skipping over unsupporting
ports.  Without the change, the recursion would stop at first unsupported
port.

Signed-off-by: Scott Feldman 
---
 include/net/switchdev.h   |1 +
 net/switchdev/switchdev.c |9 -
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 61f129b..1ce7083 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -16,6 +16,7 @@
 #include 
 
 #define SWITCHDEV_F_NO_RECURSE BIT(0)
+#define SWITCHDEV_F_SKIP_EOPNOTSUPPBIT(1)
 
 struct switchdev_trans_item {
struct list_head list;
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 6e4a4f9..7a9ab90 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -147,7 +147,7 @@ static int __switchdev_port_attr_set(struct net_device *dev,
return ops->switchdev_port_attr_set(dev, attr, trans);
 
if (attr->flags & SWITCHDEV_F_NO_RECURSE)
-   return err;
+   goto done;
 
/* Switch device port(s) may be stacked under
 * bond/team/vlan dev, so recurse down to set attr on
@@ -156,10 +156,17 @@ static int __switchdev_port_attr_set(struct net_device 
*dev,
 
netdev_for_each_lower_dev(dev, lower_dev, iter) {
err = __switchdev_port_attr_set(lower_dev, attr, trans);
+   if (err == -EOPNOTSUPP &&
+   attr->flags & SWITCHDEV_F_SKIP_EOPNOTSUPP)
+   continue;
if (err)
break;
}
 
+done:
+   if (err == -EOPNOTSUPP && attr->flags & SWITCHDEV_F_SKIP_EOPNOTSUPP)
+   err = 0;
+
return err;
 }
 
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next v3 1/4] switchdev: add bridge ageing_time attribute

2015-10-08 Thread sfeldma
From: Scott Feldman 

Setting the stage to push bridge-level attributes down to port driver so
hardware can be programmed accordingly.  Bridge-level attribute example is
ageing_time.  This is a per-bridge attribute, not a per-bridge-port attr.

Signed-off-by: Scott Feldman 
---
 include/net/switchdev.h |2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 89266a3..61f129b 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -43,6 +43,7 @@ enum switchdev_attr_id {
SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
SWITCHDEV_ATTR_ID_PORT_STP_STATE,
SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS,
+   SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME,
 };
 
 struct switchdev_attr {
@@ -52,6 +53,7 @@ struct switchdev_attr {
struct netdev_phys_item_id ppid;/* PORT_PARENT_ID */
u8 stp_state;   /* PORT_STP_STATE */
unsigned long brport_flags; /* PORT_BRIDGE_FLAGS */
+   u32 ageing_time;/* BRIDGE_AGEING_TIME */
} u;
 };
 
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next v2 4/4] rocker: handle setting bridge ageing_time

2015-10-07 Thread sfeldma
From: Scott Feldman 

The FDB cleanup timer will get rescheduled to re-evaluate FDB entries
based on new ageing_time.

Signed-off-by: Scott Feldman 
---
 drivers/net/ethernet/rocker/rocker.c |   22 ++
 1 file changed, 22 insertions(+)

diff --git a/drivers/net/ethernet/rocker/rocker.c 
b/drivers/net/ethernet/rocker/rocker.c
index cf91ffc..3c7f9ae 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -4361,6 +4361,24 @@ static int rocker_port_brport_flags_set(struct 
rocker_port *rocker_port,
return err;
 }
 
+static int rocker_port_bridge_set(struct rocker_port *rocker_port,
+ struct switchdev_trans *trans,
+ struct switchdev_attr_bridge *bridge)
+{
+   switch (bridge->attr) {
+   case IFLA_BR_AGEING_TIME:
+   if (switchdev_trans_ph_prepare(trans))
+   return 0;
+   rocker_port->ageing_time = clock_t_to_jiffies(bridge->val);
+   mod_timer(&rocker_port->rocker->fdb_cleanup_timer, jiffies);
+   break;
+   default:
+   return -EOPNOTSUPP;
+   }
+
+   return 0;
+}
+
 static int rocker_port_attr_set(struct net_device *dev,
struct switchdev_attr *attr,
struct switchdev_trans *trans)
@@ -4378,6 +4396,10 @@ static int rocker_port_attr_set(struct net_device *dev,
err = rocker_port_brport_flags_set(rocker_port, trans,
   attr->u.brport_flags);
break;
+   case SWITCHDEV_ATTR_ID_BRIDGE:
+   err = rocker_port_bridge_set(rocker_port, trans,
+&attr->u.bridge);
+   break;
default:
err = -EOPNOTSUPP;
break;
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next v2 3/4] bridge: push bridge setting ageing_time down to switchdev

2015-10-07 Thread sfeldma
From: Scott Feldman 

Use SWITCHDEV_F_SKIP_EOPNOTSUPP to skip over ports in bridge that don't
support setting ageing_time (or setting bridge attrs in general).

If push fails, don't update ageing_time in bridge and return err to user.

If push succeeds, update ageing_time in bridge and run gc_timer now to
recalabrate when to run gc_timer next, based on new ageing_time.

Signed-off-by: Scott Feldman 
Signed-off-by: Jiri Pirko 
---
 net/bridge/br_ioctl.c|3 +--
 net/bridge/br_netlink.c  |6 +++---
 net/bridge/br_private.h  |1 +
 net/bridge/br_stp.c  |   24 
 net/bridge/br_sysfs_br.c |3 +--
 5 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index 8d423bc..263b4de 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -200,8 +200,7 @@ static int old_dev_ioctl(struct net_device *dev, struct 
ifreq *rq, int cmd)
if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN))
return -EPERM;
 
-   br->ageing_time = clock_t_to_jiffies(args[1]);
-   return 0;
+   return br_set_ageing_time(br, args[1]);
 
case BRCTL_GET_PORT_INFO:
{
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index d78b442..544ab96 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -870,9 +870,9 @@ static int br_changelink(struct net_device *brdev, struct 
nlattr *tb[],
}
 
if (data[IFLA_BR_AGEING_TIME]) {
-   u32 ageing_time = nla_get_u32(data[IFLA_BR_AGEING_TIME]);
-
-   br->ageing_time = clock_t_to_jiffies(ageing_time);
+   err = br_set_ageing_time(br, 
nla_get_u32(data[IFLA_BR_AGEING_TIME]));
+   if (err)
+   return err;
}
 
if (data[IFLA_BR_STP_STATE]) {
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 09d3ecb..ba0c67b 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -882,6 +882,7 @@ void __br_set_forward_delay(struct net_bridge *br, unsigned 
long t);
 int br_set_forward_delay(struct net_bridge *br, unsigned long x);
 int br_set_hello_time(struct net_bridge *br, unsigned long x);
 int br_set_max_age(struct net_bridge *br, unsigned long x);
+int br_set_ageing_time(struct net_bridge *br, u32 ageing_time);
 
 
 /* br_stp_if.c */
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 3a982c0..ae3286b 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -566,6 +566,30 @@ int br_set_max_age(struct net_bridge *br, unsigned long 
val)
 
 }
 
+int br_set_ageing_time(struct net_bridge *br, u32 ageing_time)
+{
+   struct switchdev_attr attr = {
+   .id = SWITCHDEV_ATTR_ID_BRIDGE,
+   .flags = SWITCHDEV_F_SKIP_EOPNOTSUPP,
+   .u.bridge.attr = IFLA_BR_AGEING_TIME,
+   .u.bridge.val = ageing_time,
+   };
+   unsigned long t = clock_t_to_jiffies(ageing_time);
+   int err;
+
+   if (t < BR_MIN_AGEING_TIME || t > BR_MAX_AGEING_TIME)
+   return -ERANGE;
+
+   err = switchdev_port_attr_set(br->dev, &attr);
+   if (err)
+   return err;
+
+   br->ageing_time = t;
+   mod_timer(&br->gc_timer, jiffies);
+
+   return 0;
+}
+
 void __br_set_forward_delay(struct net_bridge *br, unsigned long t)
 {
br->bridge_forward_delay = t;
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 4c97fc5..04ef192 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -102,8 +102,7 @@ static ssize_t ageing_time_show(struct device *d,
 
 static int set_ageing_time(struct net_bridge *br, unsigned long val)
 {
-   br->ageing_time = clock_t_to_jiffies(val);
-   return 0;
+   return br_set_ageing_time(br, val);
 }
 
 static ssize_t ageing_time_store(struct device *d,
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next v2 1/4] switchdev: add bridge attributes

2015-10-07 Thread sfeldma
From: Scott Feldman 

Setting the stage to push bridge-level attributes down to port driver so
hardware can be programmed accordingly.  Bridge-level attribute example is
ageing_time.  This is a per-bridge attribute, not a per-bridge-port attr.

Signed-off-by: Scott Feldman 
---
 include/net/switchdev.h  |5 +
 include/uapi/linux/if_link.h |2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 89266a3..8d92cd0 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -43,6 +43,7 @@ enum switchdev_attr_id {
SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
SWITCHDEV_ATTR_ID_PORT_STP_STATE,
SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS,
+   SWITCHDEV_ATTR_ID_BRIDGE,
 };
 
 struct switchdev_attr {
@@ -52,6 +53,10 @@ struct switchdev_attr {
struct netdev_phys_item_id ppid;/* PORT_PARENT_ID */
u8 stp_state;   /* PORT_STP_STATE */
unsigned long brport_flags; /* PORT_BRIDGE_FLAGS */
+   struct switchdev_attr_bridge {  /* BRIDGE */
+   enum ifla_br attr;
+   u32 val;
+   } bridge;
} u;
 };
 
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index e3b6217..30177b3 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -222,7 +222,7 @@ enum in6_addr_gen_mode {
 
 /* Bridge section */
 
-enum {
+enum ifla_br {
IFLA_BR_UNSPEC,
IFLA_BR_FORWARD_DELAY,
IFLA_BR_HELLO_TIME,
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next v2 0/4] switchdev: push bridge attributes down

2015-10-07 Thread sfeldma
From: Scott Feldman 

Push bridge-level attributes down to switchdev drivers.  This patchset
adds the infrastructure and then pushes, as an example, ageing_time attribute
down from bridge to switchdev (rocker) driver.  Add some range-checking
for ageing_time.

# ip link set dev br0 type bridge ageing_time 1000

# ip link set dev br0 type bridge ageing_time 999
RTNETLINK answers: Numerical result out of range

Up until now, switchdev attrs where port-level attrs, so the netdev used in
switchdev_attr_set() would be a switch port or bond of switch ports.  With
bridge-level attrs, the netdev passed to switchdev_attr_set() is the bridge
netdev.  The same recusive algo is used to visit the leaves of the stacked
drivers to set the attr, it's just in this case we start one layer higher in
the stack.  One note is not all ports in the bridge may support setting a
bridge-level attribute, so rather than failing the entire set, we'll skip over
those ports returning -EOPNOTSUPP.

v1->v2: rebase w/ net-next

Scott Feldman (4):
  switchdev: add bridge attributes
  switchdev: skip over ports returning -EOPNOTSUPP when recursing ports
  bridge: push bridge setting ageing_time down to switchdev
  rocker: handle setting bridge ageing_time

 drivers/net/ethernet/rocker/rocker.c |   22 ++
 include/net/switchdev.h  |6 ++
 include/uapi/linux/if_link.h |2 +-
 net/bridge/br_ioctl.c|3 +--
 net/bridge/br_netlink.c  |6 +++---
 net/bridge/br_private.h  |1 +
 net/bridge/br_stp.c  |   24 
 net/bridge/br_sysfs_br.c |3 +--
 net/switchdev/switchdev.c|9 -
 9 files changed, 67 insertions(+), 9 deletions(-)

-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next v2 2/4] switchdev: skip over ports returning -EOPNOTSUPP when recursing ports

2015-10-07 Thread sfeldma
From: Scott Feldman 

This allows us to recurse over all the ports, skipping over unsupporting
ports.  Without the change, the recursion would stop at first unsupported
port.

Signed-off-by: Scott Feldman 
---
 include/net/switchdev.h   |1 +
 net/switchdev/switchdev.c |9 -
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 8d92cd0..f3de6f4 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -16,6 +16,7 @@
 #include 
 
 #define SWITCHDEV_F_NO_RECURSE BIT(0)
+#define SWITCHDEV_F_SKIP_EOPNOTSUPPBIT(1)
 
 struct switchdev_trans_item {
struct list_head list;
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 6e4a4f9..7a9ab90 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -147,7 +147,7 @@ static int __switchdev_port_attr_set(struct net_device *dev,
return ops->switchdev_port_attr_set(dev, attr, trans);
 
if (attr->flags & SWITCHDEV_F_NO_RECURSE)
-   return err;
+   goto done;
 
/* Switch device port(s) may be stacked under
 * bond/team/vlan dev, so recurse down to set attr on
@@ -156,10 +156,17 @@ static int __switchdev_port_attr_set(struct net_device 
*dev,
 
netdev_for_each_lower_dev(dev, lower_dev, iter) {
err = __switchdev_port_attr_set(lower_dev, attr, trans);
+   if (err == -EOPNOTSUPP &&
+   attr->flags & SWITCHDEV_F_SKIP_EOPNOTSUPP)
+   continue;
if (err)
break;
}
 
+done:
+   if (err == -EOPNOTSUPP && attr->flags & SWITCHDEV_F_SKIP_EOPNOTSUPP)
+   err = 0;
+
return err;
 }
 
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next 4/4] rocker: handle setting bridge ageing_time

2015-09-24 Thread sfeldma
From: Scott Feldman 

The FDB cleanup timer will get rescheduled to re-evaluate FDB entries
based on new ageing_time.

Signed-off-by: Scott Feldman 
---
 drivers/net/ethernet/rocker/rocker.c |   22 ++
 1 file changed, 22 insertions(+)

diff --git a/drivers/net/ethernet/rocker/rocker.c 
b/drivers/net/ethernet/rocker/rocker.c
index 32c5429..99ec715 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -4382,6 +4382,24 @@ static int rocker_port_brport_flags_set(struct 
rocker_port *rocker_port,
return err;
 }
 
+static int rocker_port_bridge_set(struct rocker_port *rocker_port,
+ enum switchdev_trans trans,
+ struct switchdev_attr_bridge *bridge)
+{
+   switch (bridge->attr) {
+   case IFLA_BR_AGEING_TIME:
+   if (trans == SWITCHDEV_TRANS_PREPARE)
+   return 0;
+   rocker_port->ageing_time = clock_t_to_jiffies(bridge->val);
+   mod_timer(&rocker_port->rocker->fdb_cleanup_timer, jiffies);
+   break;
+   default:
+   return -EOPNOTSUPP;
+   }
+
+   return 0;
+}
+
 static int rocker_port_attr_set(struct net_device *dev,
struct switchdev_attr *attr)
 {
@@ -4409,6 +4427,10 @@ static int rocker_port_attr_set(struct net_device *dev,
err = rocker_port_brport_flags_set(rocker_port, attr->trans,
   attr->u.brport_flags);
break;
+   case SWITCHDEV_ATTR_BRIDGE:
+   err = rocker_port_bridge_set(rocker_port, attr->trans,
+&attr->u.bridge);
+   break;
default:
err = -EOPNOTSUPP;
break;
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next 2/4] switchdev: skip over ports returning -EOPNOTSUPP when recursing ports

2015-09-24 Thread sfeldma
From: Scott Feldman 

This allows us to recurse over all the ports, skipping over unsupporting
ports.  Without the change, the recursion would stop at first unsupported
port.

Signed-off-by: Scott Feldman 
---
 include/net/switchdev.h   |1 +
 net/switchdev/switchdev.c |9 -
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 54b2faa..22a6dbe 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -15,6 +15,7 @@
 #include 
 
 #define SWITCHDEV_F_NO_RECURSE BIT(0)
+#define SWITCHDEV_F_SKIP_EOPNOTSUPPBIT(1)
 
 enum switchdev_trans {
SWITCHDEV_TRANS_NONE,
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index fda38f8..5c30da0 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -73,7 +73,7 @@ static int __switchdev_port_attr_set(struct net_device *dev,
return ops->switchdev_port_attr_set(dev, attr);
 
if (attr->flags & SWITCHDEV_F_NO_RECURSE)
-   return err;
+   goto done;
 
/* Switch device port(s) may be stacked under
 * bond/team/vlan dev, so recurse down to set attr on
@@ -82,10 +82,17 @@ static int __switchdev_port_attr_set(struct net_device *dev,
 
netdev_for_each_lower_dev(dev, lower_dev, iter) {
err = __switchdev_port_attr_set(lower_dev, attr);
+   if (err == -EOPNOTSUPP &&
+   attr->flags & SWITCHDEV_F_SKIP_EOPNOTSUPP)
+   continue;
if (err)
break;
}
 
+done:
+   if (err == -EOPNOTSUPP && attr->flags & SWITCHDEV_F_SKIP_EOPNOTSUPP)
+   err = 0;
+
return err;
 }
 
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next 1/4] switchdev: add bridge attributes

2015-09-24 Thread sfeldma
From: Scott Feldman 

Setting the stage to push bridge-level attributes down to port driver so
hardware can be programmed accordingly.  Bridge-level attribute example is
ageing_time.  This is a per-bridge attribute, not a per-bridge-port attr.

Signed-off-by: Scott Feldman 
---
 include/net/switchdev.h  |5 +
 include/uapi/linux/if_link.h |2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 319baab..54b2faa 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -28,6 +28,7 @@ enum switchdev_attr_id {
SWITCHDEV_ATTR_PORT_PARENT_ID,
SWITCHDEV_ATTR_PORT_STP_STATE,
SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS,
+   SWITCHDEV_ATTR_BRIDGE,
 };
 
 struct switchdev_attr {
@@ -38,6 +39,10 @@ struct switchdev_attr {
struct netdev_phys_item_id ppid;/* PORT_PARENT_ID */
u8 stp_state;   /* PORT_STP_STATE */
unsigned long brport_flags; /* PORT_BRIDGE_FLAGS */
+   struct switchdev_attr_bridge {  /* BRIDGE */
+   enum ifla_br attr;
+   u32 val;
+   } bridge;
} u;
 };
 
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 3a5f263..8d0ef1c 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -222,7 +222,7 @@ enum in6_addr_gen_mode {
 
 /* Bridge section */
 
-enum {
+enum ifla_br {
IFLA_BR_UNSPEC,
IFLA_BR_FORWARD_DELAY,
IFLA_BR_HELLO_TIME,
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next 3/4] bridge: push bridge setting ageing_time down to switchdev

2015-09-24 Thread sfeldma
From: Scott Feldman 

Use SWITCHDEV_F_SKIP_EOPNOTSUPP to skip over ports in bridge that don't
support setting ageing_time (or setting bridge attrs in general).

If push fails, don't update ageing_time in bridge and return err to user.

If push succeeds, update ageing_time in bridge and run gc_timer now to
recalabrate when to run gc_timer next, based on new ageing_time.

Signed-off-by: Scott Feldman 
Signed-off-by: Jiri Pirko 
---
 net/bridge/br_ioctl.c|3 +--
 net/bridge/br_netlink.c  |6 +++---
 net/bridge/br_private.h  |1 +
 net/bridge/br_stp.c  |   24 
 net/bridge/br_sysfs_br.c |3 +--
 5 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index 8d423bc..263b4de 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -200,8 +200,7 @@ static int old_dev_ioctl(struct net_device *dev, struct 
ifreq *rq, int cmd)
if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN))
return -EPERM;
 
-   br->ageing_time = clock_t_to_jiffies(args[1]);
-   return 0;
+   return br_set_ageing_time(br, args[1]);
 
case BRCTL_GET_PORT_INFO:
{
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index ea748c9..6dbdd4d 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -774,9 +774,9 @@ static int br_changelink(struct net_device *brdev, struct 
nlattr *tb[],
}
 
if (data[IFLA_BR_AGEING_TIME]) {
-   u32 ageing_time = nla_get_u32(data[IFLA_BR_AGEING_TIME]);
-
-   br->ageing_time = clock_t_to_jiffies(ageing_time);
+   err = br_set_ageing_time(br, 
nla_get_u32(data[IFLA_BR_AGEING_TIME]));
+   if (err)
+   return err;
}
 
if (data[IFLA_BR_STP_STATE]) {
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 74e99c7..dc4b390 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -808,6 +808,7 @@ void __br_set_forward_delay(struct net_bridge *br, unsigned 
long t);
 int br_set_forward_delay(struct net_bridge *br, unsigned long x);
 int br_set_hello_time(struct net_bridge *br, unsigned long x);
 int br_set_max_age(struct net_bridge *br, unsigned long x);
+int br_set_ageing_time(struct net_bridge *br, u32 ageing_time);
 
 
 /* br_stp_if.c */
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index ed74ffa..6241bab 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -566,6 +566,30 @@ int br_set_max_age(struct net_bridge *br, unsigned long 
val)
 
 }
 
+int br_set_ageing_time(struct net_bridge *br, u32 ageing_time)
+{
+   struct switchdev_attr attr = {
+   .id = SWITCHDEV_ATTR_BRIDGE,
+   .flags = SWITCHDEV_F_SKIP_EOPNOTSUPP,
+   .u.bridge.attr = IFLA_BR_AGEING_TIME,
+   .u.bridge.val = ageing_time,
+   };
+   unsigned long t = clock_t_to_jiffies(ageing_time);
+   int err;
+
+   if (t < BR_MIN_AGEING_TIME || t > BR_MAX_AGEING_TIME)
+   return -ERANGE;
+
+   err = switchdev_port_attr_set(br->dev, &attr);
+   if (err)
+   return err;
+
+   br->ageing_time = t;
+   mod_timer(&br->gc_timer, jiffies);
+
+   return 0;
+}
+
 void __br_set_forward_delay(struct net_bridge *br, unsigned long t)
 {
br->bridge_forward_delay = t;
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 4c97fc5..04ef192 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -102,8 +102,7 @@ static ssize_t ageing_time_show(struct device *d,
 
 static int set_ageing_time(struct net_bridge *br, unsigned long val)
 {
-   br->ageing_time = clock_t_to_jiffies(val);
-   return 0;
+   return br_set_ageing_time(br, val);
 }
 
 static ssize_t ageing_time_store(struct device *d,
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next 0/4] switchdev: push bridge attributes down

2015-09-24 Thread sfeldma
From: Scott Feldman 

Push bridge-level attributes down to switchdev drivers.  This patchset
adds the infrastructure and then pushes, as an example, ageing_time attribute
down from bridge to switchdev (rocker) driver.  Add some range-checking
for ageing_time.

# ip link set dev br0 type bridge ageing_time 1000

# ip link set dev br0 type bridge ageing_time 999
RTNETLINK answers: Numerical result out of range

Up until now, switchdev attrs where port-level attrs, so the netdev used in
switchdev_attr_set() would be a switch port or bond of switch ports.  With
bridge-level attrs, the netdev passed to switchdev_attr_set() is the bridge
netdev.  The same recusive algo is used to visit the leaves of the stacked
drivers to set the attr, it's just in this case we start one layer higher in
the stack.  One note is not all ports in the bridge may support setting a
bridge-level attribute, so rather than failing the entire set, we'll skip over
those ports returning -EOPNOTSUPP.

Scott Feldman (4):
  switchdev: add bridge attributes
  switchdev: skip over ports returning -EOPNOTSUPP when recursing ports
  bridge: push bridge setting ageing_time down to switchdev
  rocker: handle setting bridge ageing_time

 drivers/net/ethernet/rocker/rocker.c |   22 ++
 include/net/switchdev.h  |6 ++
 include/uapi/linux/if_link.h |2 +-
 net/bridge/br_ioctl.c|3 +--
 net/bridge/br_netlink.c  |6 +++---
 net/bridge/br_private.h  |1 +
 net/bridge/br_stp.c  |   24 
 net/bridge/br_sysfs_br.c |3 +--
 net/switchdev/switchdev.c|9 -
 9 files changed, 67 insertions(+), 9 deletions(-)

-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next v3 5/7] rocker: add FDB cleanup timer

2015-09-23 Thread sfeldma
From: Scott Feldman 

Add a timer to each rocker switch to do FDB entry cleanup by ageing out
expired entries.  The timer scheduling algo is copied from the bridge
driver, for the most part, to keep the firing of the timer to a minimum.

Signed-off-by: Scott Feldman 
Acked-by: Jiri Pirko 
---
v3: Per davem review: add del_timer_sync on rocker port remove.

 drivers/net/ethernet/rocker/rocker.c |   42 ++
 1 file changed, 42 insertions(+)

diff --git a/drivers/net/ethernet/rocker/rocker.c 
b/drivers/net/ethernet/rocker/rocker.c
index be8bb04..32c5429 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -248,6 +248,7 @@ struct rocker {
u64 flow_tbl_next_cookie;
DECLARE_HASHTABLE(group_tbl, 16);
spinlock_t group_tbl_lock;  /* for group tbl accesses */
+   struct timer_list fdb_cleanup_timer;
DECLARE_HASHTABLE(fdb_tbl, 16);
spinlock_t fdb_tbl_lock;/* for fdb tbl accesses */
unsigned long internal_vlan_bitmap[ROCKER_INTERNAL_VLAN_BITMAP_LEN];
@@ -3706,6 +3707,41 @@ err_out:
return err;
 }
 
+static void rocker_fdb_cleanup(unsigned long data)
+{
+   struct rocker *rocker = (struct rocker *)data;
+   struct rocker_port *rocker_port;
+   struct rocker_fdb_tbl_entry *entry;
+   struct hlist_node *tmp;
+   unsigned long next_timer = jiffies + BR_MIN_AGEING_TIME;
+   unsigned long expires;
+   unsigned long lock_flags;
+   int flags = ROCKER_OP_FLAG_NOWAIT | ROCKER_OP_FLAG_REMOVE |
+   ROCKER_OP_FLAG_LEARNED;
+   int bkt;
+
+   spin_lock_irqsave(&rocker->fdb_tbl_lock, lock_flags);
+
+   hash_for_each_safe(rocker->fdb_tbl, bkt, tmp, entry, entry) {
+   if (!entry->learned)
+   continue;
+   rocker_port = entry->key.rocker_port;
+   expires = entry->touched + rocker_port->ageing_time;
+   if (time_before_eq(expires, jiffies)) {
+   rocker_port_fdb_learn(rocker_port, SWITCHDEV_TRANS_NONE,
+ flags, entry->key.addr,
+ entry->key.vlan_id);
+   hash_del(&entry->entry);
+   } else if (time_before(expires, next_timer)) {
+   next_timer = expires;
+   }
+   }
+
+   spin_unlock_irqrestore(&rocker->fdb_tbl_lock, lock_flags);
+
+   mod_timer(&rocker->fdb_cleanup_timer, round_jiffies_up(next_timer));
+}
+
 static int rocker_port_router_mac(struct rocker_port *rocker_port,
  enum switchdev_trans trans, int flags,
  __be16 vlan_id)
@@ -5191,6 +5227,10 @@ static int rocker_probe(struct pci_dev *pdev, const 
struct pci_device_id *id)
goto err_init_tbls;
}
 
+   setup_timer(&rocker->fdb_cleanup_timer, rocker_fdb_cleanup,
+   (unsigned long) rocker);
+   mod_timer(&rocker->fdb_cleanup_timer, jiffies);
+
err = rocker_probe_ports(rocker);
if (err) {
dev_err(&pdev->dev, "failed to probe ports\n");
@@ -5203,6 +5243,7 @@ static int rocker_probe(struct pci_dev *pdev, const 
struct pci_device_id *id)
return 0;
 
 err_probe_ports:
+   del_timer_sync(&rocker->fdb_cleanup_timer);
rocker_free_tbls(rocker);
 err_init_tbls:
free_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_EVENT), rocker);
@@ -5230,6 +5271,7 @@ static void rocker_remove(struct pci_dev *pdev)
 {
struct rocker *rocker = pci_get_drvdata(pdev);
 
+   del_timer_sync(&rocker->fdb_cleanup_timer);
rocker_free_tbls(rocker);
rocker_write32(rocker, CONTROL, ROCKER_CONTROL_RESET);
rocker_remove_ports(rocker);
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next v3 7/7] switchdev: update documentation on FDB ageing_time

2015-09-23 Thread sfeldma
From: Scott Feldman 

Signed-off-by: Scott Feldman 
Reviewed-by: Vivien Didelot 
Acked-by: Jiri Pirko 
---
 Documentation/networking/switchdev.txt |   24 
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/Documentation/networking/switchdev.txt 
b/Documentation/networking/switchdev.txt
index 476df04..67e43ee 100644
--- a/Documentation/networking/switchdev.txt
+++ b/Documentation/networking/switchdev.txt
@@ -239,20 +239,20 @@ The driver should initialize the attributes to the 
hardware defaults.
 FDB Ageing
 ^^
 
-There are two FDB ageing models supported: 1) ageing by the device, and 2)
-ageing by the kernel.  Ageing by the device is preferred if many FDB entries
-are supported.  The driver calls call_switchdev_notifiers(SWITCHDEV_FDB_DEL,
-...) to age out the FDB entry.  In this model, ageing by the kernel should be
-turned off.  XXX: how to turn off ageing in kernel on a per-port basis or
-otherwise prevent the kernel from ageing out the FDB entry?
-
-In the kernel ageing model, the standard bridge ageing mechanism is used to age
-out stale FDB entries.  To keep an FDB entry "alive", the driver should refresh
-the FDB entry by calling call_switchdev_notifiers(SWITCHDEV_FDB_ADD, ...).  The
+The bridge will skip ageing FDB entries marked with NTF_EXT_LEARNED and it is
+the responsibility of the port driver/device to age out these entries.  If the
+port device supports ageing, when the FDB entry expires, it will notify the
+driver which in turn will notify the bridge with SWITCHDEV_FDB_DEL.  If the
+device does not support ageing, the driver can simulate ageing using a
+garbage collection timer to monitor FBD entries.  Expired entries will be
+notified to the bridge using SWITCHDEV_FDB_DEL.  See rocker driver for
+example of driver running ageing timer.
+
+To keep an NTF_EXT_LEARNED entry "alive", the driver should refresh the FDB
+entry by calling call_switchdev_notifiers(SWITCHDEV_FDB_ADD, ...).  The
 notification will reset the FDB entry's last-used time to now.  The driver
 should rate limit refresh notifications, for example, no more than once a
-second.  If the FDB entry expires, fdb_delete is called to remove entry from
-the device.
+second.  (The last-used time is visible using the bridge -s fdb option).
 
 STP State Change on Port
 
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next v3 6/7] bridge: don't age externally added FDB entries

2015-09-23 Thread sfeldma
From: Siva Mannem 

Signed-off-by: Siva Mannem 
Signed-off-by: Scott Feldman 
Acked-by: Vivien Didelot 
Acked-by: Jiri Pirko 
Acked-by: Premkumar Jonnala 
---
 net/bridge/br_fdb.c |2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 9e9875d..6663cc0 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -299,6 +299,8 @@ void br_fdb_cleanup(unsigned long _data)
unsigned long this_timer;
if (f->is_static)
continue;
+   if (f->added_by_external_learn)
+   continue;
this_timer = f->updated + delay;
if (time_before_eq(this_timer, jiffies))
fdb_delete(br, f);
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next v3 3/7] bridge: define some min/max/default ageing time constants

2015-09-23 Thread sfeldma
From: Scott Feldman 

Signed-off-by: Scott Feldman 
---
v2: Per Jiri review comment: add BR_DEFAULT_AGEING_TIME to defines

 include/linux/if_bridge.h |6 ++
 net/bridge/br_device.c|2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index dad8b00..a338a68 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -46,6 +46,12 @@ struct br_ip_list {
 #define BR_LEARNING_SYNC   BIT(9)
 #define BR_PROXYARP_WIFI   BIT(10)
 
+/* values as per ieee8021QBridgeFdbAgingTime */
+#define BR_MIN_AGEING_TIME (10 * HZ)
+#define BR_MAX_AGEING_TIME (100 * HZ)
+
+#define BR_DEFAULT_AGEING_TIME (300 * HZ)
+
 extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void 
__user *));
 
 typedef int br_should_route_hook_t(struct sk_buff *skb);
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 6ed2feb..2f81624 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -391,7 +391,7 @@ void br_dev_setup(struct net_device *dev)
br->bridge_max_age = br->max_age = 20 * HZ;
br->bridge_hello_time = br->hello_time = 2 * HZ;
br->bridge_forward_delay = br->forward_delay = 15 * HZ;
-   br->ageing_time = 300 * HZ;
+   br->ageing_time = BR_DEFAULT_AGEING_TIME;
 
br_netfilter_rtable_init(br);
br_stp_timer_init(br);
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next v3 1/7] rocker: track when FDB entry is touched.

2015-09-23 Thread sfeldma
From: Scott Feldman 

The entry is touched once when created, and touched again for each update.
The touched time is used to calculate FDB entry age.

Signed-off-by: Scott Feldman 
Acked-by: Jiri Pirko 
---
 drivers/net/ethernet/rocker/rocker.c |   18 --
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/rocker/rocker.c 
b/drivers/net/ethernet/rocker/rocker.c
index 34ac41a..e517e9c 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -152,6 +152,7 @@ struct rocker_fdb_tbl_entry {
struct hlist_node entry;
u32 key_crc32; /* key */
bool learned;
+   unsigned long touched;
struct rocker_fdb_tbl_key {
u32 pport;
u8 addr[ETH_ALEN];
@@ -3629,6 +3630,7 @@ static int rocker_port_fdb(struct rocker_port 
*rocker_port,
return -ENOMEM;
 
fdb->learned = (flags & ROCKER_OP_FLAG_LEARNED);
+   fdb->touched = jiffies;
fdb->key.pport = rocker_port->pport;
ether_addr_copy(fdb->key.addr, addr);
fdb->key.vlan_id = vlan_id;
@@ -3638,13 +3640,17 @@ static int rocker_port_fdb(struct rocker_port 
*rocker_port,
 
found = rocker_fdb_tbl_find(rocker, fdb);
 
-   if (removing && found) {
-   rocker_port_kfree(trans, fdb);
-   if (trans != SWITCHDEV_TRANS_PREPARE)
-   hash_del(&found->entry);
-   } else if (!removing && !found) {
+   if (found) {
+   found->touched = jiffies;
+   if (removing) {
+   rocker_port_kfree(trans, fdb);
+   if (trans != SWITCHDEV_TRANS_PREPARE)
+   hash_del(&found->entry);
+   }
+   } else if (!removing) {
if (trans != SWITCHDEV_TRANS_PREPARE)
-   hash_add(rocker->fdb_tbl, &fdb->entry, fdb->key_crc32);
+   hash_add(rocker->fdb_tbl, &fdb->entry,
+fdb->key_crc32);
}
 
spin_unlock_irqrestore(&rocker->fdb_tbl_lock, lock_flags);
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next v3 0/7] bridge: don't age out externally added FDB entries

2015-09-23 Thread sfeldma
From: Scott Feldman 

v3: Per davem review: add del_timer_sync on rocker port remove.

v2: Per Jiri review comment: add BR_DEFAULT_AGEING_TIME to defines

Siva originally proposed skipping externally added FDB entries in the bridge's
FDB garbage collection func, and moving the ageing of externally added entries
to the port driver/device.  This broke rocker, since rocker didn't have a
hardware (or software) mechanism for ageing out its learned FDB entries.

This patchset reintroduces Siva's bridge driver patch to skip externally added
entries and adds support in rocker so rocker can age out its own entries.
Rocker does this using a software timer similar to the bridge's FDB garbage
collection timer.  Other switchdev devices/drivers can use this software timer
method or program the device to nofity aged-out entries to the driver.

Updated switchdev.txt documentation to reflect current state-of-the-art.  This
removes one more XXX todo comment in switchdev.txt.


Scott Feldman (6):
  rocker: track when FDB entry is touched.
  rocker: store rocker_port in fdb key rather than pport
  bridge: define some min/max/default ageing time constants
  rocker: adding port ageing_time for ageing out FDB entries
  rocker: add FDB cleanup timer
  switchdev: update documentation on FDB ageing_time

Siva Mannem (1):
  bridge: don't age externally added FDB entries

 Documentation/networking/switchdev.txt |   24 +--
 drivers/net/ethernet/rocker/rocker.c   |   70 +++-
 include/linux/if_bridge.h  |6 +++
 net/bridge/br_device.c |2 +-
 net/bridge/br_fdb.c|2 +
 5 files changed, 81 insertions(+), 23 deletions(-)

-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next v3 2/7] rocker: store rocker_port in fdb key rather than pport

2015-09-23 Thread sfeldma
From: Scott Feldman 

We'll need more info from rocker_port than just pport when we age out fdb
entries, so store rocker_port rather than pport in each fdb entry.

Signed-off-by: Scott Feldman 
Acked-by: Jiri Pirko 
---
 drivers/net/ethernet/rocker/rocker.c |8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/rocker/rocker.c 
b/drivers/net/ethernet/rocker/rocker.c
index e517e9c..f55ed2c 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -154,7 +154,7 @@ struct rocker_fdb_tbl_entry {
bool learned;
unsigned long touched;
struct rocker_fdb_tbl_key {
-   u32 pport;
+   struct rocker_port *rocker_port;
u8 addr[ETH_ALEN];
__be16 vlan_id;
} key;
@@ -3631,7 +3631,7 @@ static int rocker_port_fdb(struct rocker_port 
*rocker_port,
 
fdb->learned = (flags & ROCKER_OP_FLAG_LEARNED);
fdb->touched = jiffies;
-   fdb->key.pport = rocker_port->pport;
+   fdb->key.rocker_port = rocker_port;
ether_addr_copy(fdb->key.addr, addr);
fdb->key.vlan_id = vlan_id;
fdb->key_crc32 = crc32(~0, &fdb->key, sizeof(fdb->key));
@@ -3686,7 +3686,7 @@ static int rocker_port_fdb_flush(struct rocker_port 
*rocker_port,
spin_lock_irqsave(&rocker->fdb_tbl_lock, lock_flags);
 
hash_for_each_safe(rocker->fdb_tbl, bkt, tmp, found, entry) {
-   if (found->key.pport != rocker_port->pport)
+   if (found->key.rocker_port != rocker_port)
continue;
if (!found->learned)
continue;
@@ -4553,7 +4553,7 @@ static int rocker_port_fdb_dump(const struct rocker_port 
*rocker_port,
 
spin_lock_irqsave(&rocker->fdb_tbl_lock, lock_flags);
hash_for_each_safe(rocker->fdb_tbl, bkt, tmp, found, entry) {
-   if (found->key.pport != rocker_port->pport)
+   if (found->key.rocker_port != rocker_port)
continue;
fdb->addr = found->key.addr;
fdb->ndm_state = NUD_REACHABLE;
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next v3 4/7] rocker: adding port ageing_time for ageing out FDB entries

2015-09-23 Thread sfeldma
From: Scott Feldman 

Follow-up patcheset will allow user to change ageing_time, but for now
just hard-code it to a fixed value (the same value used as the default
for the bridge driver).

Signed-off-by: Scott Feldman 
---
 drivers/net/ethernet/rocker/rocker.c |2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/rocker/rocker.c 
b/drivers/net/ethernet/rocker/rocker.c
index f55ed2c..be8bb04 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -221,6 +221,7 @@ struct rocker_port {
__be16 internal_vlan_id;
int stp_state;
u32 brport_flags;
+   unsigned long ageing_time;
bool ctrls[ROCKER_CTRL_MAX];
unsigned long vlan_bitmap[ROCKER_VLAN_BITMAP_LEN];
struct napi_struct napi_tx;
@@ -4975,6 +4976,7 @@ static int rocker_probe_port(struct rocker *rocker, 
unsigned int port_number)
rocker_port->port_number = port_number;
rocker_port->pport = port_number + 1;
rocker_port->brport_flags = BR_LEARNING | BR_LEARNING_SYNC;
+   rocker_port->ageing_time = BR_DEFAULT_AGEING_TIME;
INIT_LIST_HEAD(&rocker_port->trans_mem);
 
rocker_port_dev_addr_init(rocker_port);
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next v2 6/7] bridge: don't age externally added FDB entries

2015-09-20 Thread sfeldma
From: Siva Mannem 

Signed-off-by: Siva Mannem 
Signed-off-by: Scott Feldman 
Acked-by: Vivien Didelot 
Acked-by: Jiri Pirko 
---
 net/bridge/br_fdb.c |2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 9e9875d..6663cc0 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -299,6 +299,8 @@ void br_fdb_cleanup(unsigned long _data)
unsigned long this_timer;
if (f->is_static)
continue;
+   if (f->added_by_external_learn)
+   continue;
this_timer = f->updated + delay;
if (time_before_eq(this_timer, jiffies))
fdb_delete(br, f);
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next v2 4/7] rocker: adding port ageing_time for ageing out FDB entries

2015-09-20 Thread sfeldma
From: Scott Feldman 

Follow-up patcheset will allow user to change ageing_time, but for now
just hard-code it to a fixed value (the same value used as the default
for the bridge driver).

Signed-off-by: Scott Feldman 
---
 drivers/net/ethernet/rocker/rocker.c |2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/rocker/rocker.c 
b/drivers/net/ethernet/rocker/rocker.c
index f55ed2c..be8bb04 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -221,6 +221,7 @@ struct rocker_port {
__be16 internal_vlan_id;
int stp_state;
u32 brport_flags;
+   unsigned long ageing_time;
bool ctrls[ROCKER_CTRL_MAX];
unsigned long vlan_bitmap[ROCKER_VLAN_BITMAP_LEN];
struct napi_struct napi_tx;
@@ -4975,6 +4976,7 @@ static int rocker_probe_port(struct rocker *rocker, 
unsigned int port_number)
rocker_port->port_number = port_number;
rocker_port->pport = port_number + 1;
rocker_port->brport_flags = BR_LEARNING | BR_LEARNING_SYNC;
+   rocker_port->ageing_time = BR_DEFAULT_AGEING_TIME;
INIT_LIST_HEAD(&rocker_port->trans_mem);
 
rocker_port_dev_addr_init(rocker_port);
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next v2 3/7] bridge: define some min/max/default ageing time constants

2015-09-20 Thread sfeldma
From: Scott Feldman 

Signed-off-by: Scott Feldman 
---
v2: Per Jiri review comment: add BR_DEFAULT_AGEING_TIME to defines

 include/linux/if_bridge.h |6 ++
 net/bridge/br_device.c|2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index dad8b00..a338a68 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -46,6 +46,12 @@ struct br_ip_list {
 #define BR_LEARNING_SYNC   BIT(9)
 #define BR_PROXYARP_WIFI   BIT(10)
 
+/* values as per ieee8021QBridgeFdbAgingTime */
+#define BR_MIN_AGEING_TIME (10 * HZ)
+#define BR_MAX_AGEING_TIME (100 * HZ)
+
+#define BR_DEFAULT_AGEING_TIME (300 * HZ)
+
 extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void 
__user *));
 
 typedef int br_should_route_hook_t(struct sk_buff *skb);
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 6ed2feb..2f81624 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -391,7 +391,7 @@ void br_dev_setup(struct net_device *dev)
br->bridge_max_age = br->max_age = 20 * HZ;
br->bridge_hello_time = br->hello_time = 2 * HZ;
br->bridge_forward_delay = br->forward_delay = 15 * HZ;
-   br->ageing_time = 300 * HZ;
+   br->ageing_time = BR_DEFAULT_AGEING_TIME;
 
br_netfilter_rtable_init(br);
br_stp_timer_init(br);
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next v2 7/7] switchdev: update documentation on FDB ageing_time

2015-09-20 Thread sfeldma
From: Scott Feldman 

Signed-off-by: Scott Feldman 
Reviewed-by: Vivien Didelot 
Acked-by: Jiri Pirko 
---
 Documentation/networking/switchdev.txt |   24 
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/Documentation/networking/switchdev.txt 
b/Documentation/networking/switchdev.txt
index 476df04..67e43ee 100644
--- a/Documentation/networking/switchdev.txt
+++ b/Documentation/networking/switchdev.txt
@@ -239,20 +239,20 @@ The driver should initialize the attributes to the 
hardware defaults.
 FDB Ageing
 ^^
 
-There are two FDB ageing models supported: 1) ageing by the device, and 2)
-ageing by the kernel.  Ageing by the device is preferred if many FDB entries
-are supported.  The driver calls call_switchdev_notifiers(SWITCHDEV_FDB_DEL,
-...) to age out the FDB entry.  In this model, ageing by the kernel should be
-turned off.  XXX: how to turn off ageing in kernel on a per-port basis or
-otherwise prevent the kernel from ageing out the FDB entry?
-
-In the kernel ageing model, the standard bridge ageing mechanism is used to age
-out stale FDB entries.  To keep an FDB entry "alive", the driver should refresh
-the FDB entry by calling call_switchdev_notifiers(SWITCHDEV_FDB_ADD, ...).  The
+The bridge will skip ageing FDB entries marked with NTF_EXT_LEARNED and it is
+the responsibility of the port driver/device to age out these entries.  If the
+port device supports ageing, when the FDB entry expires, it will notify the
+driver which in turn will notify the bridge with SWITCHDEV_FDB_DEL.  If the
+device does not support ageing, the driver can simulate ageing using a
+garbage collection timer to monitor FBD entries.  Expired entries will be
+notified to the bridge using SWITCHDEV_FDB_DEL.  See rocker driver for
+example of driver running ageing timer.
+
+To keep an NTF_EXT_LEARNED entry "alive", the driver should refresh the FDB
+entry by calling call_switchdev_notifiers(SWITCHDEV_FDB_ADD, ...).  The
 notification will reset the FDB entry's last-used time to now.  The driver
 should rate limit refresh notifications, for example, no more than once a
-second.  If the FDB entry expires, fdb_delete is called to remove entry from
-the device.
+second.  (The last-used time is visible using the bridge -s fdb option).
 
 STP State Change on Port
 
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next v2 0/7] bridge: don't age out externally added FDB entries

2015-09-20 Thread sfeldma
From: Scott Feldman 

v2: Per Jiri review comment: add BR_DEFAULT_AGEING_TIME to defines

Siva originally proposed skipping externally added FDB entries in the bridge's
FDB garbage collection func, and moving the ageing of externally added entries
to the port driver/device.  This broke rocker, since rocker didn't have a
hardware (or software) mechanism for ageing out its learned FDB entries.

This patchset reintroduces Siva's bridge driver patch to skip externally added
entries and adds support in rocker so rocker can age out its own entries.
Rocker does this using a software timer similar to the bridge's FDB garbage
collection timer.  Other switchdev devices/drivers can use this software timer
method or program the device to nofity aged-out entries to the driver.

Updated switchdev.txt documentation to reflect current state-of-the-art.  This
removes one more XXX todo comment in switchdev.txt.

Scott Feldman (6):
  rocker: track when FDB entry is touched.
  rocker: store rocker_port in fdb key rather than pport
  bridge: define some min/max/default ageing time constants
  rocker: adding port ageing_time for ageing out FDB entries
  rocker: add FDB cleanup timer
  switchdev: update documentation on FDB ageing_time

Siva Mannem (1):
  bridge: don't age externally added FDB entries

 Documentation/networking/switchdev.txt |   24 +--
 drivers/net/ethernet/rocker/rocker.c   |   69 +++-
 include/linux/if_bridge.h  |6 +++
 net/bridge/br_device.c |2 +-
 net/bridge/br_fdb.c|2 +
 5 files changed, 80 insertions(+), 23 deletions(-)

-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next v2 5/7] rocker: add FDB cleanup timer

2015-09-20 Thread sfeldma
From: Scott Feldman 

Add a timer to each rocker switch to do FDB entry cleanup by ageing out
expired entries.  The timer scheduling algo is copied from the bridge
driver, for the most part, to keep the firing of the timer to a minimum.

Signed-off-by: Scott Feldman 
Acked-by: Jiri Pirko 
---
 drivers/net/ethernet/rocker/rocker.c |   41 ++
 1 file changed, 41 insertions(+)

diff --git a/drivers/net/ethernet/rocker/rocker.c 
b/drivers/net/ethernet/rocker/rocker.c
index be8bb04..e4e0278 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -248,6 +248,7 @@ struct rocker {
u64 flow_tbl_next_cookie;
DECLARE_HASHTABLE(group_tbl, 16);
spinlock_t group_tbl_lock;  /* for group tbl accesses */
+   struct timer_list fdb_cleanup_timer;
DECLARE_HASHTABLE(fdb_tbl, 16);
spinlock_t fdb_tbl_lock;/* for fdb tbl accesses */
unsigned long internal_vlan_bitmap[ROCKER_INTERNAL_VLAN_BITMAP_LEN];
@@ -3706,6 +3707,41 @@ err_out:
return err;
 }
 
+static void rocker_fdb_cleanup(unsigned long data)
+{
+   struct rocker *rocker = (struct rocker *)data;
+   struct rocker_port *rocker_port;
+   struct rocker_fdb_tbl_entry *entry;
+   struct hlist_node *tmp;
+   unsigned long next_timer = jiffies + BR_MIN_AGEING_TIME;
+   unsigned long expires;
+   unsigned long lock_flags;
+   int flags = ROCKER_OP_FLAG_NOWAIT | ROCKER_OP_FLAG_REMOVE |
+   ROCKER_OP_FLAG_LEARNED;
+   int bkt;
+
+   spin_lock_irqsave(&rocker->fdb_tbl_lock, lock_flags);
+
+   hash_for_each_safe(rocker->fdb_tbl, bkt, tmp, entry, entry) {
+   if (!entry->learned)
+   continue;
+   rocker_port = entry->key.rocker_port;
+   expires = entry->touched + rocker_port->ageing_time;
+   if (time_before_eq(expires, jiffies)) {
+   rocker_port_fdb_learn(rocker_port, SWITCHDEV_TRANS_NONE,
+ flags, entry->key.addr,
+ entry->key.vlan_id);
+   hash_del(&entry->entry);
+   } else if (time_before(expires, next_timer)) {
+   next_timer = expires;
+   }
+   }
+
+   spin_unlock_irqrestore(&rocker->fdb_tbl_lock, lock_flags);
+
+   mod_timer(&rocker->fdb_cleanup_timer, round_jiffies_up(next_timer));
+}
+
 static int rocker_port_router_mac(struct rocker_port *rocker_port,
  enum switchdev_trans trans, int flags,
  __be16 vlan_id)
@@ -5191,6 +5227,10 @@ static int rocker_probe(struct pci_dev *pdev, const 
struct pci_device_id *id)
goto err_init_tbls;
}
 
+   setup_timer(&rocker->fdb_cleanup_timer, rocker_fdb_cleanup,
+   (unsigned long) rocker);
+   mod_timer(&rocker->fdb_cleanup_timer, jiffies);
+
err = rocker_probe_ports(rocker);
if (err) {
dev_err(&pdev->dev, "failed to probe ports\n");
@@ -5203,6 +5243,7 @@ static int rocker_probe(struct pci_dev *pdev, const 
struct pci_device_id *id)
return 0;
 
 err_probe_ports:
+   del_timer(&rocker->fdb_cleanup_timer);
rocker_free_tbls(rocker);
 err_init_tbls:
free_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_EVENT), rocker);
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next v2 1/7] rocker: track when FDB entry is touched.

2015-09-20 Thread sfeldma
From: Scott Feldman 

The entry is touched once when created, and touched again for each update.
The touched time is used to calculate FDB entry age.

Signed-off-by: Scott Feldman 
Acked-by: Jiri Pirko 
---
 drivers/net/ethernet/rocker/rocker.c |   18 --
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/rocker/rocker.c 
b/drivers/net/ethernet/rocker/rocker.c
index 34ac41a..e517e9c 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -152,6 +152,7 @@ struct rocker_fdb_tbl_entry {
struct hlist_node entry;
u32 key_crc32; /* key */
bool learned;
+   unsigned long touched;
struct rocker_fdb_tbl_key {
u32 pport;
u8 addr[ETH_ALEN];
@@ -3629,6 +3630,7 @@ static int rocker_port_fdb(struct rocker_port 
*rocker_port,
return -ENOMEM;
 
fdb->learned = (flags & ROCKER_OP_FLAG_LEARNED);
+   fdb->touched = jiffies;
fdb->key.pport = rocker_port->pport;
ether_addr_copy(fdb->key.addr, addr);
fdb->key.vlan_id = vlan_id;
@@ -3638,13 +3640,17 @@ static int rocker_port_fdb(struct rocker_port 
*rocker_port,
 
found = rocker_fdb_tbl_find(rocker, fdb);
 
-   if (removing && found) {
-   rocker_port_kfree(trans, fdb);
-   if (trans != SWITCHDEV_TRANS_PREPARE)
-   hash_del(&found->entry);
-   } else if (!removing && !found) {
+   if (found) {
+   found->touched = jiffies;
+   if (removing) {
+   rocker_port_kfree(trans, fdb);
+   if (trans != SWITCHDEV_TRANS_PREPARE)
+   hash_del(&found->entry);
+   }
+   } else if (!removing) {
if (trans != SWITCHDEV_TRANS_PREPARE)
-   hash_add(rocker->fdb_tbl, &fdb->entry, fdb->key_crc32);
+   hash_add(rocker->fdb_tbl, &fdb->entry,
+fdb->key_crc32);
}
 
spin_unlock_irqrestore(&rocker->fdb_tbl_lock, lock_flags);
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next v2 2/7] rocker: store rocker_port in fdb key rather than pport

2015-09-20 Thread sfeldma
From: Scott Feldman 

We'll need more info from rocker_port than just pport when we age out fdb
entries, so store rocker_port rather than pport in each fdb entry.

Signed-off-by: Scott Feldman 
Acked-by: Jiri Pirko 
---
 drivers/net/ethernet/rocker/rocker.c |8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/rocker/rocker.c 
b/drivers/net/ethernet/rocker/rocker.c
index e517e9c..f55ed2c 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -154,7 +154,7 @@ struct rocker_fdb_tbl_entry {
bool learned;
unsigned long touched;
struct rocker_fdb_tbl_key {
-   u32 pport;
+   struct rocker_port *rocker_port;
u8 addr[ETH_ALEN];
__be16 vlan_id;
} key;
@@ -3631,7 +3631,7 @@ static int rocker_port_fdb(struct rocker_port 
*rocker_port,
 
fdb->learned = (flags & ROCKER_OP_FLAG_LEARNED);
fdb->touched = jiffies;
-   fdb->key.pport = rocker_port->pport;
+   fdb->key.rocker_port = rocker_port;
ether_addr_copy(fdb->key.addr, addr);
fdb->key.vlan_id = vlan_id;
fdb->key_crc32 = crc32(~0, &fdb->key, sizeof(fdb->key));
@@ -3686,7 +3686,7 @@ static int rocker_port_fdb_flush(struct rocker_port 
*rocker_port,
spin_lock_irqsave(&rocker->fdb_tbl_lock, lock_flags);
 
hash_for_each_safe(rocker->fdb_tbl, bkt, tmp, found, entry) {
-   if (found->key.pport != rocker_port->pport)
+   if (found->key.rocker_port != rocker_port)
continue;
if (!found->learned)
continue;
@@ -4553,7 +4553,7 @@ static int rocker_port_fdb_dump(const struct rocker_port 
*rocker_port,
 
spin_lock_irqsave(&rocker->fdb_tbl_lock, lock_flags);
hash_for_each_safe(rocker->fdb_tbl, bkt, tmp, found, entry) {
-   if (found->key.pport != rocker_port->pport)
+   if (found->key.rocker_port != rocker_port)
continue;
fdb->addr = found->key.addr;
fdb->ndm_state = NUD_REACHABLE;
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next 7/7] switchdev: update documentation on FDB ageing_time

2015-09-18 Thread sfeldma
From: Scott Feldman 

Signed-off-by: Scott Feldman 
---
 Documentation/networking/switchdev.txt |   24 
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/Documentation/networking/switchdev.txt 
b/Documentation/networking/switchdev.txt
index 476df04..67e43ee 100644
--- a/Documentation/networking/switchdev.txt
+++ b/Documentation/networking/switchdev.txt
@@ -239,20 +239,20 @@ The driver should initialize the attributes to the 
hardware defaults.
 FDB Ageing
 ^^
 
-There are two FDB ageing models supported: 1) ageing by the device, and 2)
-ageing by the kernel.  Ageing by the device is preferred if many FDB entries
-are supported.  The driver calls call_switchdev_notifiers(SWITCHDEV_FDB_DEL,
-...) to age out the FDB entry.  In this model, ageing by the kernel should be
-turned off.  XXX: how to turn off ageing in kernel on a per-port basis or
-otherwise prevent the kernel from ageing out the FDB entry?
-
-In the kernel ageing model, the standard bridge ageing mechanism is used to age
-out stale FDB entries.  To keep an FDB entry "alive", the driver should refresh
-the FDB entry by calling call_switchdev_notifiers(SWITCHDEV_FDB_ADD, ...).  The
+The bridge will skip ageing FDB entries marked with NTF_EXT_LEARNED and it is
+the responsibility of the port driver/device to age out these entries.  If the
+port device supports ageing, when the FDB entry expires, it will notify the
+driver which in turn will notify the bridge with SWITCHDEV_FDB_DEL.  If the
+device does not support ageing, the driver can simulate ageing using a
+garbage collection timer to monitor FBD entries.  Expired entries will be
+notified to the bridge using SWITCHDEV_FDB_DEL.  See rocker driver for
+example of driver running ageing timer.
+
+To keep an NTF_EXT_LEARNED entry "alive", the driver should refresh the FDB
+entry by calling call_switchdev_notifiers(SWITCHDEV_FDB_ADD, ...).  The
 notification will reset the FDB entry's last-used time to now.  The driver
 should rate limit refresh notifications, for example, no more than once a
-second.  If the FDB entry expires, fdb_delete is called to remove entry from
-the device.
+second.  (The last-used time is visible using the bridge -s fdb option).
 
 STP State Change on Port
 
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next 3/7] rocker: adding port ageing_time for ageing out FDB entries

2015-09-18 Thread sfeldma
From: Scott Feldman 

Follow-up patcheset will allow user to change ageing_time, but for now
just hard-code it to a fixed value (the same value used as the default
for the bridge driver).

Signed-off-by: Scott Feldman 
---
 drivers/net/ethernet/rocker/rocker.c |2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/rocker/rocker.c 
b/drivers/net/ethernet/rocker/rocker.c
index f55ed2c..eba22f5 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -221,6 +221,7 @@ struct rocker_port {
__be16 internal_vlan_id;
int stp_state;
u32 brport_flags;
+   unsigned long ageing_time;
bool ctrls[ROCKER_CTRL_MAX];
unsigned long vlan_bitmap[ROCKER_VLAN_BITMAP_LEN];
struct napi_struct napi_tx;
@@ -4975,6 +4976,7 @@ static int rocker_probe_port(struct rocker *rocker, 
unsigned int port_number)
rocker_port->port_number = port_number;
rocker_port->pport = port_number + 1;
rocker_port->brport_flags = BR_LEARNING | BR_LEARNING_SYNC;
+   rocker_port->ageing_time = 300 * HZ;
INIT_LIST_HEAD(&rocker_port->trans_mem);
 
rocker_port_dev_addr_init(rocker_port);
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next 5/7] rocker: add FDB cleanup timer

2015-09-18 Thread sfeldma
From: Scott Feldman 

Add a timer to each rocker switch to do FDB entry cleanup by ageing out
expired entries.  The timer scheduling algo is copied from the bridge
driver, for the most part, to keep the firing of the timer to a minimum.

Signed-off-by: Scott Feldman 
---
 drivers/net/ethernet/rocker/rocker.c |   41 ++
 1 file changed, 41 insertions(+)

diff --git a/drivers/net/ethernet/rocker/rocker.c 
b/drivers/net/ethernet/rocker/rocker.c
index eba22f5..232df69 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -248,6 +248,7 @@ struct rocker {
u64 flow_tbl_next_cookie;
DECLARE_HASHTABLE(group_tbl, 16);
spinlock_t group_tbl_lock;  /* for group tbl accesses */
+   struct timer_list fdb_cleanup_timer;
DECLARE_HASHTABLE(fdb_tbl, 16);
spinlock_t fdb_tbl_lock;/* for fdb tbl accesses */
unsigned long internal_vlan_bitmap[ROCKER_INTERNAL_VLAN_BITMAP_LEN];
@@ -3706,6 +3707,41 @@ err_out:
return err;
 }
 
+static void rocker_fdb_cleanup(unsigned long data)
+{
+   struct rocker *rocker = (struct rocker *)data;
+   struct rocker_port *rocker_port;
+   struct rocker_fdb_tbl_entry *entry;
+   struct hlist_node *tmp;
+   unsigned long next_timer = jiffies + BR_MIN_AGEING_TIME;
+   unsigned long expires;
+   unsigned long lock_flags;
+   int flags = ROCKER_OP_FLAG_NOWAIT | ROCKER_OP_FLAG_REMOVE |
+   ROCKER_OP_FLAG_LEARNED;
+   int bkt;
+
+   spin_lock_irqsave(&rocker->fdb_tbl_lock, lock_flags);
+
+   hash_for_each_safe(rocker->fdb_tbl, bkt, tmp, entry, entry) {
+   if (!entry->learned)
+   continue;
+   rocker_port = entry->key.rocker_port;
+   expires = entry->touched + rocker_port->ageing_time;
+   if (time_before_eq(expires, jiffies)) {
+   rocker_port_fdb_learn(rocker_port, SWITCHDEV_TRANS_NONE,
+ flags, entry->key.addr,
+ entry->key.vlan_id);
+   hash_del(&entry->entry);
+   } else if (time_before(expires, next_timer)) {
+   next_timer = expires;
+   }
+   }
+
+   spin_unlock_irqrestore(&rocker->fdb_tbl_lock, lock_flags);
+
+   mod_timer(&rocker->fdb_cleanup_timer, round_jiffies_up(next_timer));
+}
+
 static int rocker_port_router_mac(struct rocker_port *rocker_port,
  enum switchdev_trans trans, int flags,
  __be16 vlan_id)
@@ -5191,6 +5227,10 @@ static int rocker_probe(struct pci_dev *pdev, const 
struct pci_device_id *id)
goto err_init_tbls;
}
 
+   setup_timer(&rocker->fdb_cleanup_timer, rocker_fdb_cleanup,
+   (unsigned long) rocker);
+   mod_timer(&rocker->fdb_cleanup_timer, jiffies);
+
err = rocker_probe_ports(rocker);
if (err) {
dev_err(&pdev->dev, "failed to probe ports\n");
@@ -5203,6 +5243,7 @@ static int rocker_probe(struct pci_dev *pdev, const 
struct pci_device_id *id)
return 0;
 
 err_probe_ports:
+   del_timer(&rocker->fdb_cleanup_timer);
rocker_free_tbls(rocker);
 err_init_tbls:
free_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_EVENT), rocker);
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next 0/7] bridge: don't age out externally added FDB entries

2015-09-18 Thread sfeldma
From: Scott Feldman 

Siva originally proposed skipping externally added FDB entries in the bridge's
FDB garbage collection func, and moving the ageing of externally added entries
to the port driver/device.  This broke rocker, since rocker didn't have a
hardware (or software) mechanism for ageing out its learned FDB entries.

This patchset reintroduces Siva's bridge driver patch to skip externally added
entries and adds support in rocker so rocker can age out its own entries.
Rocker does this using a software timer similar to the bridge's FDB garbage
collection timer.  Other switchdev devices/drivers can use this software timer
method or program the device to nofity aged-out entries to the driver.

Updated switchdev.txt documentation to reflect current state-of-the-art.  This
removes one more XXX todo comment in switchdev.txt.

Scott Feldman (6):
  rocker: track when FDB entry is touched.
  rocker: store rocker_port in fdb key rather than pport
  rocker: adding port ageing_time for ageing out FDB entries
  bridge: define some min/max ageing time constants we'll use next
  rocker: add FDB cleanup timer
  switchdev: update documentation on FDB ageing_time

Siva Mannem (1):
  bridge: don't age externally added FDB entries

 Documentation/networking/switchdev.txt |   24 +--
 drivers/net/ethernet/rocker/rocker.c   |   69 +++-
 include/linux/if_bridge.h  |4 ++
 net/bridge/br_fdb.c|2 +
 4 files changed, 77 insertions(+), 22 deletions(-)

-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next 6/7] bridge: don't age externally added FDB entries

2015-09-18 Thread sfeldma
From: Siva Mannem 

Signed-off-by: Siva Mannem 
Signed-off-by: Scott Feldman 
---
 net/bridge/br_fdb.c |2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 9e9875d..6663cc0 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -299,6 +299,8 @@ void br_fdb_cleanup(unsigned long _data)
unsigned long this_timer;
if (f->is_static)
continue;
+   if (f->added_by_external_learn)
+   continue;
this_timer = f->updated + delay;
if (time_before_eq(this_timer, jiffies))
fdb_delete(br, f);
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next 1/7] rocker: track when FDB entry is touched.

2015-09-18 Thread sfeldma
From: Scott Feldman 

The entry is touched once when created, and touched again for each update.
The touched time is used to calculate FDB entry age.

Signed-off-by: Scott Feldman 
---
 drivers/net/ethernet/rocker/rocker.c |   18 --
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/rocker/rocker.c 
b/drivers/net/ethernet/rocker/rocker.c
index 34ac41a..e517e9c 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -152,6 +152,7 @@ struct rocker_fdb_tbl_entry {
struct hlist_node entry;
u32 key_crc32; /* key */
bool learned;
+   unsigned long touched;
struct rocker_fdb_tbl_key {
u32 pport;
u8 addr[ETH_ALEN];
@@ -3629,6 +3630,7 @@ static int rocker_port_fdb(struct rocker_port 
*rocker_port,
return -ENOMEM;
 
fdb->learned = (flags & ROCKER_OP_FLAG_LEARNED);
+   fdb->touched = jiffies;
fdb->key.pport = rocker_port->pport;
ether_addr_copy(fdb->key.addr, addr);
fdb->key.vlan_id = vlan_id;
@@ -3638,13 +3640,17 @@ static int rocker_port_fdb(struct rocker_port 
*rocker_port,
 
found = rocker_fdb_tbl_find(rocker, fdb);
 
-   if (removing && found) {
-   rocker_port_kfree(trans, fdb);
-   if (trans != SWITCHDEV_TRANS_PREPARE)
-   hash_del(&found->entry);
-   } else if (!removing && !found) {
+   if (found) {
+   found->touched = jiffies;
+   if (removing) {
+   rocker_port_kfree(trans, fdb);
+   if (trans != SWITCHDEV_TRANS_PREPARE)
+   hash_del(&found->entry);
+   }
+   } else if (!removing) {
if (trans != SWITCHDEV_TRANS_PREPARE)
-   hash_add(rocker->fdb_tbl, &fdb->entry, fdb->key_crc32);
+   hash_add(rocker->fdb_tbl, &fdb->entry,
+fdb->key_crc32);
}
 
spin_unlock_irqrestore(&rocker->fdb_tbl_lock, lock_flags);
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next 2/7] rocker: store rocker_port in fdb key rather than pport

2015-09-18 Thread sfeldma
From: Scott Feldman 

We'll need more info from rocker_port than just pport when we age out fdb
entries, so store rocker_port rather than pport in each fdb entry.

Signed-off-by: Scott Feldman 
---
 drivers/net/ethernet/rocker/rocker.c |8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/rocker/rocker.c 
b/drivers/net/ethernet/rocker/rocker.c
index e517e9c..f55ed2c 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -154,7 +154,7 @@ struct rocker_fdb_tbl_entry {
bool learned;
unsigned long touched;
struct rocker_fdb_tbl_key {
-   u32 pport;
+   struct rocker_port *rocker_port;
u8 addr[ETH_ALEN];
__be16 vlan_id;
} key;
@@ -3631,7 +3631,7 @@ static int rocker_port_fdb(struct rocker_port 
*rocker_port,
 
fdb->learned = (flags & ROCKER_OP_FLAG_LEARNED);
fdb->touched = jiffies;
-   fdb->key.pport = rocker_port->pport;
+   fdb->key.rocker_port = rocker_port;
ether_addr_copy(fdb->key.addr, addr);
fdb->key.vlan_id = vlan_id;
fdb->key_crc32 = crc32(~0, &fdb->key, sizeof(fdb->key));
@@ -3686,7 +3686,7 @@ static int rocker_port_fdb_flush(struct rocker_port 
*rocker_port,
spin_lock_irqsave(&rocker->fdb_tbl_lock, lock_flags);
 
hash_for_each_safe(rocker->fdb_tbl, bkt, tmp, found, entry) {
-   if (found->key.pport != rocker_port->pport)
+   if (found->key.rocker_port != rocker_port)
continue;
if (!found->learned)
continue;
@@ -4553,7 +4553,7 @@ static int rocker_port_fdb_dump(const struct rocker_port 
*rocker_port,
 
spin_lock_irqsave(&rocker->fdb_tbl_lock, lock_flags);
hash_for_each_safe(rocker->fdb_tbl, bkt, tmp, found, entry) {
-   if (found->key.pport != rocker_port->pport)
+   if (found->key.rocker_port != rocker_port)
continue;
fdb->addr = found->key.addr;
fdb->ndm_state = NUD_REACHABLE;
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next 4/7] bridge: define some min/max ageing time constants we'll use next

2015-09-18 Thread sfeldma
From: Scott Feldman 

Signed-off-by: Scott Feldman 
---
 include/linux/if_bridge.h |4 
 1 file changed, 4 insertions(+)

diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index dad8b00..6cc6dbc 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -46,6 +46,10 @@ struct br_ip_list {
 #define BR_LEARNING_SYNC   BIT(9)
 #define BR_PROXYARP_WIFI   BIT(10)
 
+/* values as per ieee8021QBridgeFdbAgingTime */
+#define BR_MIN_AGEING_TIME (10 * HZ)
+#define BR_MAX_AGEING_TIME (100 * HZ)
+
 extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void 
__user *));
 
 typedef int br_should_route_hook_t(struct sk_buff *skb);
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC PATCH net-next 2/2] rocker: register each switch as a switchdev

2015-08-27 Thread sfeldma
From: Scott Feldman 

Signed-off-by: Scott Feldman 
---
 drivers/net/ethernet/rocker/rocker.c |   23 +++
 1 file changed, 23 insertions(+)

diff --git a/drivers/net/ethernet/rocker/rocker.c 
b/drivers/net/ethernet/rocker/rocker.c
index a7cb74a..9555ae4 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -233,6 +233,7 @@ struct rocker {
struct pci_dev *pdev;
u8 __iomem *hw_addr;
struct msix_entry *msix_entries;
+   struct switchdev switchdev;
unsigned int port_count;
struct rocker_port **ports;
struct {
@@ -5090,6 +5091,19 @@ static void rocker_msix_fini(const struct rocker *rocker)
kfree(rocker->msix_entries);
 }
 
+static int rocker_probe_register_switchdev(struct rocker *rocker)
+{
+   char name[sizeof(rocker->hw.id) * 2 + 1];
+
+   sprintf(name, "%*phN", (int)sizeof(rocker->hw.id), &rocker->hw.id);
+   return register_switchdev(&rocker->switchdev, name);
+}
+
+static void rocker_probe_unregister_switchdev(struct rocker *rocker)
+{
+   unregister_switchdev(&rocker->switchdev);
+}
+
 static int rocker_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
struct rocker *rocker;
@@ -5194,11 +5208,19 @@ static int rocker_probe(struct pci_dev *pdev, const 
struct pci_device_id *id)
goto err_probe_ports;
}
 
+   err = rocker_probe_register_switchdev(rocker);
+   if (err) {
+   dev_err(&pdev->dev, "cannot register switchdev\n");
+   goto err_register_switchdev;
+   }
+
dev_info(&pdev->dev, "Rocker switch with id %*phN\n",
 (int)sizeof(rocker->hw.id), &rocker->hw.id);
 
return 0;
 
+err_register_switchdev:
+   rocker_remove_ports(rocker);
 err_probe_ports:
rocker_free_tbls(rocker);
 err_init_tbls:
@@ -5227,6 +5249,7 @@ static void rocker_remove(struct pci_dev *pdev)
 {
struct rocker *rocker = pci_get_drvdata(pdev);
 
+   rocker_probe_unregister_switchdev(rocker);
rocker_free_tbls(rocker);
rocker_write32(rocker, CONTROL, ROCKER_CONTROL_RESET);
rocker_remove_ports(rocker);
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC PATCH net-next 0/2] Add new switchdev device class

2015-08-27 Thread sfeldma
From: Scott Feldman 

In the switchdev model, we use netdevs to represent switchdev ports, but we
have no representation for the switch itself.  So, introduce a new switchdev
device class so we can define semantics and programming interfaces for the
switch itself.  Switchdev device class isn't tied to any particular bus.

This patch set is just the skeleton to get us started.  It adds the sysfs
object registration for the new class and defines a class-level attr "foo".
With the new class, we could hook PM functions, for example, to handle power
transitions at the switch level.  I registered rocker and get:

   $ ls /sys/class/switchdev/525400123501/
   foo  power  subsystem  uevent

So what next?  I'd rather not build APIs around sysfs, so we need a netlink API
we can build on top of this.  It's not really rtnl.  Maybe genl would work?
What ever it is, we'd need to teach iproute2 about a new 'switch' command.

Netlink API would allow us to represent switch-wide objects such as registers,
tables, stats, firmware, and maybe even control.  I think with with netlink
TLVs, we can create a framework for these objects but still allow the switch
driver provide switch-specific info.  For example, a table object:

[TABLES]
[TABLE]
[FIELDS]
[FIELD]
[ID, TYPE]
[DATA]
[ID, VALUE]

Maybe iproute2 has pretty-printers for specific switches like ethtool has for
reg dumps.

I don't know about how this overlaps with DSA platform_class.  Florian?

Comments?


Scott Feldman (2):
  switchdev: create new switchdev device class
  rocker: register each switch as a switchdev

 drivers/net/ethernet/rocker/rocker.c |   23 ++
 include/net/switchdev.h  |   16 +++
 net/switchdev/switchdev.c|   76 ++
 3 files changed, 115 insertions(+)

-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC PATCH net-next 1/2] switchdev: create new switchdev device class

2015-08-27 Thread sfeldma
From: Scott Feldman 

Signed-off-by: Scott Feldman 
---
 include/net/switchdev.h   |   16 ++
 net/switchdev/switchdev.c |   76 +
 2 files changed, 92 insertions(+)

diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 319baab..d61e73c 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -16,6 +16,11 @@
 
 #define SWITCHDEV_F_NO_RECURSE BIT(0)
 
+struct switchdev {
+   struct device dev;
+   atomic_t foo;
+};
+
 enum switchdev_trans {
SWITCHDEV_TRANS_NONE,
SWITCHDEV_TRANS_PREPARE,
@@ -126,6 +131,8 @@ switchdev_notifier_info_to_dev(const struct 
switchdev_notifier_info *info)
 
 #ifdef CONFIG_NET_SWITCHDEV
 
+int register_switchdev(struct switchdev *sdev, const char *name);
+void unregister_switchdev(struct switchdev *sdev);
 int switchdev_port_attr_get(struct net_device *dev,
struct switchdev_attr *attr);
 int switchdev_port_attr_set(struct net_device *dev,
@@ -164,6 +171,15 @@ void switchdev_port_fwd_mark_set(struct net_device *dev,
 
 #else
 
+static inline int register_switchdev(struct switchdev *sdev, const char *name)
+{
+   return -EOPNOTSUPP;
+}
+
+static inline void unregister_switchdev(struct switchdev *sdev)
+{
+}
+
 static inline int switchdev_port_attr_get(struct net_device *dev,
  struct switchdev_attr *attr)
 {
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 16c1c43..f705202 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -10,6 +10,7 @@
  */
 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -19,6 +20,63 @@
 #include 
 #include 
 
+#define to_switchdev(d) container_of(d, struct switchdev, dev)
+
+static void switchdev_release(struct device *dev)
+{
+}
+
+static ssize_t foo_show(struct device *dev, struct device_attribute *attr,
+   char *buf)
+{
+   struct switchdev *switchdev = to_switchdev(dev);
+
+   return sprintf(buf, "%d\n", atomic_read(&switchdev->foo));
+}
+
+static DEVICE_ATTR_RO(foo);
+
+static struct attribute *switchdev_attrs[] = {
+   &dev_attr_foo.attr
+};
+
+ATTRIBUTE_GROUPS(switchdev);
+
+static int switchdev_uevent(struct device *d, struct kobj_uevent_env *env)
+{
+   return 0;
+}
+
+static struct class switchdev_class = {
+   .name = "switchdev",
+   .dev_release = switchdev_release,
+   .dev_groups = switchdev_groups,
+   .dev_uevent = switchdev_uevent,
+};
+
+int register_switchdev(struct switchdev *sdev, const char *name)
+{
+   struct device *dev = &sdev->dev;
+   int err;
+
+   device_initialize(dev);
+
+   dev->class = &switchdev_class;
+
+   err = dev_set_name(dev, "%s", name);
+   if (err)
+   return err;
+
+   return device_add(dev);
+}
+EXPORT_SYMBOL_GPL(register_switchdev);
+
+void unregister_switchdev(struct switchdev *sdev)
+{
+   put_device(&sdev->dev);
+}
+EXPORT_SYMBOL_GPL(unregister_switchdev);
+
 /**
  * switchdev_port_attr_get - Get port attribute
  *
@@ -1142,3 +1200,21 @@ void switchdev_port_fwd_mark_set(struct net_device *dev,
dev->offload_fwd_mark = mark;
 }
 EXPORT_SYMBOL_GPL(switchdev_port_fwd_mark_set);
+
+static int __init switchdev_module_init(void)
+{
+   return class_register(&switchdev_class);
+}
+
+static void __exit switchdev_module_exit(void)
+{
+   class_unregister(&switchdev_class);
+}
+
+module_init(switchdev_module_init);
+module_exit(switchdev_module_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Jiri Pirko ");
+MODULE_AUTHOR("Scott Feldman ");
+MODULE_DESCRIPTION("Ethernet switch device model");
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next] bridge: fix netlink max attr size

2015-08-19 Thread sfeldma
From: Scott Feldman 

.maxtype should match .policy.  Probably just been getting lucky here
because IFLA_BRPORT_MAX > IFLA_BR_MAX.

Fixes: 13323516 ("bridge: implement rtnl_link_ops->changelink")
Signed-off-by: Scott Feldman 
---
 net/bridge/br_netlink.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 01401ea..d2c4d66 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -849,7 +849,7 @@ struct rtnl_link_ops br_link_ops __read_mostly = {
.kind   = "bridge",
.priv_size  = sizeof(struct net_bridge),
.setup  = br_dev_setup,
-   .maxtype= IFLA_BRPORT_MAX,
+   .maxtype= IFLA_BR_MAX,
.policy = br_policy,
.validate   = br_validate,
.newlink= br_dev_newlink,
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next v3] rocker: add debugfs support to dump internal tables

2015-08-17 Thread sfeldma
From: Scott Feldman 

> tree /sys/kernel/debug/rocker
/sys/kernel/debug/rocker
└── 525400123501
├── fdb_tbl
├── internal_vlan_tbl
├── neigh_tbl
├── of_dpa_flow_tbl
└── of_dpa_group_tbl

1 directory, 5 files

> cat /sys/kernel/debug/rocker/525400123501/internal_vlan_tbl
ifindex 5 ref_count 1 vlan 3843
ifindex 7 ref_count 2 vlan 3840
ifindex 4 ref_count 1 vlan 3842

> cat /sys/kernel/debug/rocker/525400123501/fdb_tbl
learned 1 pport 1 addr 00:02:00:00:02:00 vlan 3840
learned 1 pport 2 addr 00:02:00:00:03:00 vlan 3840

> cat /sys/kernel/debug/rocker/525400123501/neigh_tbl
11.0.0.9 dev sw1p2 ref_count 3 index 1 dst 00:02:00:00:01:00 ttl_check 1
11.0.0.1 dev sw1p1 ref_count 3 index 0 dst 00:02:00:00:00:00 ttl_check 1

> cat /sys/kernel/debug/rocker/525400123501/of_dpa_flow_tbl
cmd 3 cookie 15 priority 3  tbl aclin_pport 2 
01:80:c2:00:00:00/ff:ff:ff:ff:ff:f0 eth_type 0x vlan_id 3841 ip proto 0/0 
ip tos 0/0 group_id 0x0f01
cmd 3 cookie 2  priority 0  tbl term_mac   in_pport 1 eth_type 0x0800 
52:54:00:12:35:01 vlan_id 3840 goto_tbl ucast_routing copy_to_cpu 0
cmd 3 cookie 1f priority 3  tbl bridge 00:02:00:00:00:00 vlan_id 3840 
tunnel_id 0 goto_tbl acl group_id 0x copy_to_cpu 0
cmd 3 cookie 4  priority 1  tbl vlan   in_pport 2 vlan_id 0 goto_tbl 
term_mac untagged 1 new_vlan_id 3841
cmd 3 cookie 20 priority 0  tbl ucast_routing  eth_type 0x0800 11.0.0.1 
goto_tbl acl group_id 0x2000
cmd 3 cookie 21 priority 3  tbl bridge 00:02:00:00:01:00 vlan_id 3841 
tunnel_id 0 goto_tbl acl group_id 0x copy_to_cpu 0
cmd 3 cookie 16 priority 2  tbl aclin_pport 2 eth_type 0x0806 
vlan_id 3841 ip proto 0/0 ip tos 0/0 group_id 0x0f01
cmd 3 cookie 12 priority 0  tbl ucast_routing  eth_type 0x0800 11.0.0.0 
goto_tbl acl group_id 0x0f00
cmd 3 cookie 9  priority 3  tbl aclin_pport 1 
01:80:c2:00:00:00/ff:ff:ff:ff:ff:f0 eth_type 0x vlan_id 3840 ip proto 0/0 
ip tos 0/0 group_id 0x0f00
cmd 3 cookie 6  priority 0  tbl term_mac   in_pport 2 eth_type 0x86dd 
52:54:00:12:35:02 vlan_id 3841 goto_tbl ucast_routing copy_to_cpu 0
cmd 4 cookie 0  priority 1  tbl ig_portin_pport 0/0x goto_tbl 
vlan
cmd 4 cookie e  priority 0  tbl ucast_routing  eth_type 0x0800 11.0.0.3 
goto_tbl acl group_id 0x0f00
cmd 3 cookie 1  priority 1  tbl vlan   in_pport 1 vlan_id 0 goto_tbl 
term_mac untagged 1 new_vlan_id 3840
cmd 3 cookie 24 priority 20 tbl ucast_routing  eth_type 0x0800 
11.0.0.4/255.255.255.252 goto_tbl acl group_id 0x2000
cmd 4 cookie 14 priority 0  tbl ucast_routing  eth_type 0x0800 11.0.0.10 
goto_tbl acl group_id 0x0f01
cmd 3 cookie 2c priority 20 tbl ucast_routing  eth_type 0x0800 12.0.0.4 
goto_tbl acl group_id 0x2001
cmd 3 cookie 17 priority 1  tbl term_mac   in_pport 2 eth_type 0x0800 
01:00:5e:00:00:00/ff:ff:ff:80:00:00 vlan_id 3841 goto_tbl mcast_routing 
copy_to_cpu 1
cmd 3 cookie 26 priority 20 tbl ucast_routing  eth_type 0x0800 12.0.0.3 
goto_tbl acl group_id 0x2000
cmd 3 cookie 2e priority 30 tbl ucast_routing  eth_type 0x0800 12.0.0.2 
goto_tbl acl group_id 0x0f01
cmd 3 cookie 22 priority 0  tbl ucast_routing  eth_type 0x0800 11.0.0.9 
goto_tbl acl group_id 0x2001
cmd 3 cookie 1c priority 0  tbl ucast_routing  eth_type 0x0800 
11.0.0.8/255.255.255.252 goto_tbl acl group_id 0x0f01
cmd 3 cookie 18 priority 1  tbl term_mac   in_pport 2 eth_type 0x86dd 
33:33:00:00:00:00/ff:ff:00:00:00:00 vlan_id 3841 goto_tbl mcast_routing 
copy_to_cpu 1
cmd 3 cookie 5  priority 0  tbl term_mac   in_pport 2 eth_type 0x0800 
52:54:00:12:35:02 vlan_id 3841 goto_tbl ucast_routing copy_to_cpu 0
cmd 3 cookie a  priority 2  tbl aclin_pport 1 eth_type 0x0806 
vlan_id 3840 ip proto 0/0 ip tos 0/0 group_id 0x0f00
cmd 4 cookie 1a priority 0  tbl ucast_routing  eth_type 0x0800 11.0.0.11 
goto_tbl acl group_id 0x0f01
cmd 3 cookie 1e priority 0  tbl ucast_routing  eth_type 0x0800 11.0.0.8 
goto_tbl acl group_id 0x0f01
cmd 3 cookie 3  priority 0  tbl term_mac   in_pport 1 eth_type 0x86dd 
52:54:00:12:35:01 vlan_id 3840 goto_tbl ucast_routing copy_to_cpu 0
cmd 3 cookie b  priority 1  tbl term_mac   in_pport 1 eth_type 0x0800 
01:00:5e:00:00:00/ff:ff:ff:80:00:00 vlan_id 3840 goto_tbl mcast_routing 
copy_to_cpu 1
cmd 4 cookie 8  priority 0  tbl ucast_routing  eth_type 0x0800 11.0.0.2 
goto_tbl acl group_id 0x0f00
cmd 3 cookie 10 priority 0  tbl ucast_routing  eth_type 0x0800 
11.0.0.0/255.255.255.252 goto_tbl acl group_id 0x0f00
cmd 3 cookie 28 priority 20 tbl ucast_routing  eth_type 0x0800 
11.0.0.12/255.255.255.252 goto_tbl acl group_id 0x2001
cmd 3 cookie c  priority 1  tbl term_mac   in_pport 1 eth_type 0x86dd 
33:33:00:00:00:00/ff:ff:00:00:00:00 vlan_id 3840 goto_tbl mcast_routing 
copy_to_cpu 1

> cat /sys/kernel/debug/rocker/525400123501/of_dpa_group_tbl
cmd 7 group_id 0x0f00 (L2 interface v

[PATCH net-next v2] rocker: add debugfs support to dump internal tables

2015-08-16 Thread sfeldma
From: Scott Feldman 

> tree /sys/kernel/debug/rocker
/sys/kernel/debug/rocker
└── 525400123501
├── fdb_tbl
├── internal_vlan_tbl
├── neigh_tbl
├── of_dpa_flow_tbl
└── of_dpa_group_tbl

1 directory, 5 files

> cat /sys/kernel/debug/rocker/525400123501/internal_vlan_tbl
ifindex 5 ref_count 1 vlan 3843
ifindex 7 ref_count 2 vlan 3840
ifindex 4 ref_count 1 vlan 3842

> cat /sys/kernel/debug/rocker/525400123501/fdb_tbl
learned 1 pport 1 addr 00:02:00:00:02:00 vlan 3840
learned 1 pport 2 addr 00:02:00:00:03:00 vlan 3840

> cat /sys/kernel/debug/rocker/525400123501/neigh_tbl
11.0.0.9 dev sw1p2 ref_count 3 index 1 dst 00:02:00:00:01:00 ttl_check 1
11.0.0.1 dev sw1p1 ref_count 3 index 0 dst 00:02:00:00:00:00 ttl_check 1

> cat /sys/kernel/debug/rocker/525400123501/of_dpa_flow_tbl
cmd 3 cookie 15 priority 3  tbl aclin_pport 2 
01:80:c2:00:00:00/ff:ff:ff:ff:ff:f0 eth_type 0x vlan_id 3841 ip proto 0/0 
ip tos 0/0 group_id 0x0f01
cmd 3 cookie 2  priority 0  tbl term_mac   in_pport 1 eth_type 0x0800 
52:54:00:12:35:01 vlan_id 3840 goto_tbl ucast_routing copy_to_cpu 0
cmd 3 cookie 1f priority 3  tbl bridge 00:02:00:00:00:00 vlan_id 3840 
tunnel_id 0 goto_tbl acl group_id 0x copy_to_cpu 0
cmd 3 cookie 4  priority 1  tbl vlan   in_pport 2 vlan_id 0 goto_tbl 
term_mac untagged 1 new_vlan_id 3841
cmd 3 cookie 20 priority 0  tbl ucast_routing  eth_type 0x0800 11.0.0.1 
goto_tbl acl group_id 0x2000
cmd 3 cookie 21 priority 3  tbl bridge 00:02:00:00:01:00 vlan_id 3841 
tunnel_id 0 goto_tbl acl group_id 0x copy_to_cpu 0
cmd 3 cookie 16 priority 2  tbl aclin_pport 2 eth_type 0x0806 
vlan_id 3841 ip proto 0/0 ip tos 0/0 group_id 0x0f01
cmd 3 cookie 12 priority 0  tbl ucast_routing  eth_type 0x0800 11.0.0.0 
goto_tbl acl group_id 0x0f00
cmd 3 cookie 9  priority 3  tbl aclin_pport 1 
01:80:c2:00:00:00/ff:ff:ff:ff:ff:f0 eth_type 0x vlan_id 3840 ip proto 0/0 
ip tos 0/0 group_id 0x0f00
cmd 3 cookie 6  priority 0  tbl term_mac   in_pport 2 eth_type 0x86dd 
52:54:00:12:35:02 vlan_id 3841 goto_tbl ucast_routing copy_to_cpu 0
cmd 4 cookie 0  priority 1  tbl ig_portin_pport 0/0x goto_tbl 
vlan
cmd 4 cookie e  priority 0  tbl ucast_routing  eth_type 0x0800 11.0.0.3 
goto_tbl acl group_id 0x0f00
cmd 3 cookie 1  priority 1  tbl vlan   in_pport 1 vlan_id 0 goto_tbl 
term_mac untagged 1 new_vlan_id 3840
cmd 3 cookie 24 priority 20 tbl ucast_routing  eth_type 0x0800 
11.0.0.4/255.255.255.252 goto_tbl acl group_id 0x2000
cmd 4 cookie 14 priority 0  tbl ucast_routing  eth_type 0x0800 11.0.0.10 
goto_tbl acl group_id 0x0f01
cmd 3 cookie 2c priority 20 tbl ucast_routing  eth_type 0x0800 12.0.0.4 
goto_tbl acl group_id 0x2001
cmd 3 cookie 17 priority 1  tbl term_mac   in_pport 2 eth_type 0x0800 
01:00:5e:00:00:00/ff:ff:ff:80:00:00 vlan_id 3841 goto_tbl mcast_routing 
copy_to_cpu 1
cmd 3 cookie 26 priority 20 tbl ucast_routing  eth_type 0x0800 12.0.0.3 
goto_tbl acl group_id 0x2000
cmd 3 cookie 2e priority 30 tbl ucast_routing  eth_type 0x0800 12.0.0.2 
goto_tbl acl group_id 0x0f01
cmd 3 cookie 22 priority 0  tbl ucast_routing  eth_type 0x0800 11.0.0.9 
goto_tbl acl group_id 0x2001
cmd 3 cookie 1c priority 0  tbl ucast_routing  eth_type 0x0800 
11.0.0.8/255.255.255.252 goto_tbl acl group_id 0x0f01
cmd 3 cookie 18 priority 1  tbl term_mac   in_pport 2 eth_type 0x86dd 
33:33:00:00:00:00/ff:ff:00:00:00:00 vlan_id 3841 goto_tbl mcast_routing 
copy_to_cpu 1
cmd 3 cookie 5  priority 0  tbl term_mac   in_pport 2 eth_type 0x0800 
52:54:00:12:35:02 vlan_id 3841 goto_tbl ucast_routing copy_to_cpu 0
cmd 3 cookie a  priority 2  tbl aclin_pport 1 eth_type 0x0806 
vlan_id 3840 ip proto 0/0 ip tos 0/0 group_id 0x0f00
cmd 4 cookie 1a priority 0  tbl ucast_routing  eth_type 0x0800 11.0.0.11 
goto_tbl acl group_id 0x0f01
cmd 3 cookie 1e priority 0  tbl ucast_routing  eth_type 0x0800 11.0.0.8 
goto_tbl acl group_id 0x0f01
cmd 3 cookie 3  priority 0  tbl term_mac   in_pport 1 eth_type 0x86dd 
52:54:00:12:35:01 vlan_id 3840 goto_tbl ucast_routing copy_to_cpu 0
cmd 3 cookie b  priority 1  tbl term_mac   in_pport 1 eth_type 0x0800 
01:00:5e:00:00:00/ff:ff:ff:80:00:00 vlan_id 3840 goto_tbl mcast_routing 
copy_to_cpu 1
cmd 4 cookie 8  priority 0  tbl ucast_routing  eth_type 0x0800 11.0.0.2 
goto_tbl acl group_id 0x0f00
cmd 3 cookie 10 priority 0  tbl ucast_routing  eth_type 0x0800 
11.0.0.0/255.255.255.252 goto_tbl acl group_id 0x0f00
cmd 3 cookie 28 priority 20 tbl ucast_routing  eth_type 0x0800 
11.0.0.12/255.255.255.252 goto_tbl acl group_id 0x2001
cmd 3 cookie c  priority 1  tbl term_mac   in_pport 1 eth_type 0x86dd 
33:33:00:00:00:00/ff:ff:00:00:00:00 vlan_id 3840 goto_tbl mcast_routing 
copy_to_cpu 1

> cat /sys/kernel/debug/rocker/525400123501/of_dpa_group_tbl
cmd 7 group_id 0x0f00 (L2 interface v

[PATCH net-next] rocker: add debugfs support to dump internal tables

2015-08-13 Thread sfeldma
From: Scott Feldman 

> tree /sys/kernel/debug/rocker
/sys/kernel/debug/rocker
└── 525400123501
├── fdb_tbl
├── internal_vlan_tbl
├── neigh_tbl
├── of_dpa_flow_tbl
└── of_dpa_group_tbl

1 directory, 5 files

> cat /sys/kernel/debug/rocker/525400123501/internal_vlan_tbl
ifindex 5 ref_count 1 vlan 3843
ifindex 7 ref_count 2 vlan 3840
ifindex 4 ref_count 1 vlan 3842

> cat /sys/kernel/debug/rocker/525400123501/fdb_tbl
learned 1 pport 1 addr 00:02:00:00:02:00 vlan 3840
learned 1 pport 2 addr 00:02:00:00:03:00 vlan 3840

> cat /sys/kernel/debug/rocker/525400123501/neigh_tbl
11.0.0.9 dev sw1p2 ref_count 3 index 1 dst 00:02:00:00:01:00 ttl_check 1
11.0.0.1 dev sw1p1 ref_count 3 index 0 dst 00:02:00:00:00:00 ttl_check 1

> cat /sys/kernel/debug/rocker/525400123501/of_dpa_flow_tbl
cmd 3 cookie 15 priority 3  tbl aclin_pport 2 
01:80:c2:00:00:00/ff:ff:ff:ff:ff:f0 eth_type 0x vlan_id 3841 ip proto 0/0 
ip tos 0/0 group_id 0x0f01
cmd 3 cookie 2  priority 0  tbl term_mac   in_pport 1 eth_type 0x0800 
52:54:00:12:35:01 vlan_id 3840 goto_tbl ucast_routing copy_to_cpu 0
cmd 3 cookie 1f priority 3  tbl bridge 00:02:00:00:00:00 vlan_id 3840 
tunnel_id 0 goto_tbl acl group_id 0x copy_to_cpu 0
cmd 3 cookie 4  priority 1  tbl vlan   in_pport 2 vlan_id 0 goto_tbl 
term_mac untagged 1 new_vlan_id 3841
cmd 3 cookie 20 priority 0  tbl ucast_routing  eth_type 0x0800 11.0.0.1 
goto_tbl acl group_id 0x2000
cmd 3 cookie 21 priority 3  tbl bridge 00:02:00:00:01:00 vlan_id 3841 
tunnel_id 0 goto_tbl acl group_id 0x copy_to_cpu 0
cmd 3 cookie 16 priority 2  tbl aclin_pport 2 eth_type 0x0806 
vlan_id 3841 ip proto 0/0 ip tos 0/0 group_id 0x0f01
cmd 3 cookie 12 priority 0  tbl ucast_routing  eth_type 0x0800 11.0.0.0 
goto_tbl acl group_id 0x0f00
cmd 3 cookie 9  priority 3  tbl aclin_pport 1 
01:80:c2:00:00:00/ff:ff:ff:ff:ff:f0 eth_type 0x vlan_id 3840 ip proto 0/0 
ip tos 0/0 group_id 0x0f00
cmd 3 cookie 6  priority 0  tbl term_mac   in_pport 2 eth_type 0x86dd 
52:54:00:12:35:02 vlan_id 3841 goto_tbl ucast_routing copy_to_cpu 0
cmd 4 cookie 0  priority 1  tbl ig_portin_pport 0/0x goto_tbl 
vlan
cmd 4 cookie e  priority 0  tbl ucast_routing  eth_type 0x0800 11.0.0.3 
goto_tbl acl group_id 0x0f00
cmd 3 cookie 1  priority 1  tbl vlan   in_pport 1 vlan_id 0 goto_tbl 
term_mac untagged 1 new_vlan_id 3840
cmd 3 cookie 24 priority 20 tbl ucast_routing  eth_type 0x0800 
11.0.0.4/255.255.255.252 goto_tbl acl group_id 0x2000
cmd 4 cookie 14 priority 0  tbl ucast_routing  eth_type 0x0800 11.0.0.10 
goto_tbl acl group_id 0x0f01
cmd 3 cookie 2c priority 20 tbl ucast_routing  eth_type 0x0800 12.0.0.4 
goto_tbl acl group_id 0x2001
cmd 3 cookie 17 priority 1  tbl term_mac   in_pport 2 eth_type 0x0800 
01:00:5e:00:00:00/ff:ff:ff:80:00:00 vlan_id 3841 goto_tbl mcast_routing 
copy_to_cpu 1
cmd 3 cookie 26 priority 20 tbl ucast_routing  eth_type 0x0800 12.0.0.3 
goto_tbl acl group_id 0x2000
cmd 3 cookie 2e priority 30 tbl ucast_routing  eth_type 0x0800 12.0.0.2 
goto_tbl acl group_id 0x0f01
cmd 3 cookie 22 priority 0  tbl ucast_routing  eth_type 0x0800 11.0.0.9 
goto_tbl acl group_id 0x2001
cmd 3 cookie 1c priority 0  tbl ucast_routing  eth_type 0x0800 
11.0.0.8/255.255.255.252 goto_tbl acl group_id 0x0f01
cmd 3 cookie 18 priority 1  tbl term_mac   in_pport 2 eth_type 0x86dd 
33:33:00:00:00:00/ff:ff:00:00:00:00 vlan_id 3841 goto_tbl mcast_routing 
copy_to_cpu 1
cmd 3 cookie 5  priority 0  tbl term_mac   in_pport 2 eth_type 0x0800 
52:54:00:12:35:02 vlan_id 3841 goto_tbl ucast_routing copy_to_cpu 0
cmd 3 cookie a  priority 2  tbl aclin_pport 1 eth_type 0x0806 
vlan_id 3840 ip proto 0/0 ip tos 0/0 group_id 0x0f00
cmd 4 cookie 1a priority 0  tbl ucast_routing  eth_type 0x0800 11.0.0.11 
goto_tbl acl group_id 0x0f01
cmd 3 cookie 1e priority 0  tbl ucast_routing  eth_type 0x0800 11.0.0.8 
goto_tbl acl group_id 0x0f01
cmd 3 cookie 3  priority 0  tbl term_mac   in_pport 1 eth_type 0x86dd 
52:54:00:12:35:01 vlan_id 3840 goto_tbl ucast_routing copy_to_cpu 0
cmd 3 cookie b  priority 1  tbl term_mac   in_pport 1 eth_type 0x0800 
01:00:5e:00:00:00/ff:ff:ff:80:00:00 vlan_id 3840 goto_tbl mcast_routing 
copy_to_cpu 1
cmd 4 cookie 8  priority 0  tbl ucast_routing  eth_type 0x0800 11.0.0.2 
goto_tbl acl group_id 0x0f00
cmd 3 cookie 10 priority 0  tbl ucast_routing  eth_type 0x0800 
11.0.0.0/255.255.255.252 goto_tbl acl group_id 0x0f00
cmd 3 cookie 28 priority 20 tbl ucast_routing  eth_type 0x0800 
11.0.0.12/255.255.255.252 goto_tbl acl group_id 0x2001
cmd 3 cookie c  priority 1  tbl term_mac   in_pport 1 eth_type 0x86dd 
33:33:00:00:00:00/ff:ff:00:00:00:00 vlan_id 3840 goto_tbl mcast_routing 
copy_to_cpu 1

> cat /sys/kernel/debug/rocker/525400123501/of_dpa_group_tbl
cmd 7 group_id 0x0f00 (L2 interface v

[PATCH net-next] rocker: hook ndo_neigh_destroy to cleanup neigh refs in driver

2015-08-12 Thread sfeldma
From: Scott Feldman 

Rocker driver tracks arp_tbl neighs to resolve IPv4 route nexthops.  The
driver uses NETEVENT_NEIGH_UPDATE for neigh adds and updates, but there is
no event when the neigh is removed from the device (such as when the device
goes admin down).  This patches hooks ndo_neigh_destroy so the driver can
know when a neigh is removed from the device.  In response, the driver will
purge the neigh entry from its internal tbl.

I didn't find an in-tree users of ndo_neigh_destroy, so I'm not sure if
this ndo is vestigial or if there are out-of-tree users.  In any case, it
does what I need here.  An alternative design would be to generate
NETEVENT_NEIGH_UPDATE event when neigh is being destroyed, setting state to
NUD_NONE so driver knows neigh entry is dead.

Signed-off-by: Scott Feldman 
---
 Documentation/networking/switchdev.txt |3 ++-
 drivers/net/ethernet/rocker/rocker.c   |   11 +++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/Documentation/networking/switchdev.txt 
b/Documentation/networking/switchdev.txt
index 9825f32..476df04 100644
--- a/Documentation/networking/switchdev.txt
+++ b/Documentation/networking/switchdev.txt
@@ -367,4 +367,5 @@ driver's rocker_port_ipv4_resolve() for an example.
 
 The driver can monitor for updates to arp_tbl using the netevent notifier
 NETEVENT_NEIGH_UPDATE.  The device can be programmed with resolved nexthops
-for the routes as arp_tbl updates.
+for the routes as arp_tbl updates.  The driver implements ndo_neigh_destroy
+to know when arp_tbl neighbor entries are purged from the port.
diff --git a/drivers/net/ethernet/rocker/rocker.c 
b/drivers/net/ethernet/rocker/rocker.c
index af05075..619b2e2 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -4264,6 +4264,16 @@ static int rocker_port_change_proto_down(struct 
net_device *dev,
return 0;
 }
 
+static void rocker_port_neigh_destroy(struct neighbour *n)
+{
+   struct rocker_port *rocker_port = netdev_priv(n->dev);
+   int flags = ROCKER_OP_FLAG_REMOVE | ROCKER_OP_FLAG_NOWAIT;
+   __be32 ip_addr = *(__be32 *)n->primary_key;
+
+   rocker_port_ipv4_neigh(rocker_port, SWITCHDEV_TRANS_NONE,
+  flags, ip_addr, n->ha);
+}
+
 static const struct net_device_ops rocker_port_netdev_ops = {
.ndo_open   = rocker_port_open,
.ndo_stop   = rocker_port_stop,
@@ -4278,6 +4288,7 @@ static const struct net_device_ops rocker_port_netdev_ops 
= {
.ndo_fdb_dump   = switchdev_port_fdb_dump,
.ndo_get_phys_port_name = rocker_port_get_phys_port_name,
.ndo_change_proto_down  = rocker_port_change_proto_down,
+   .ndo_neigh_destroy  = rocker_port_neigh_destroy,
 };
 
 /
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next] rocker: print switch ID consistent with phys_switch_id sysfs node

2015-08-12 Thread sfeldma
From: Scott Feldman 

On sucessful probe, driver prints the switch ID.  This patch changes the
format of the printed ID to match what's used in sysfs phys_switch_id node.

Signed-off-by: Scott Feldman 
---
 drivers/net/ethernet/rocker/rocker.c |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/rocker/rocker.c 
b/drivers/net/ethernet/rocker/rocker.c
index 619b2e2..d963cdc 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -5193,7 +5193,8 @@ static int rocker_probe(struct pci_dev *pdev, const 
struct pci_device_id *id)
goto err_probe_ports;
}
 
-   dev_info(&pdev->dev, "Rocker switch with id %016llx\n", rocker->hw.id);
+   dev_info(&pdev->dev, "Rocker switch with id %*phN\n",
+(int)sizeof(rocker->hw.id), &rocker->hw.id);
 
return 0;
 
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next 2/2] rocker: use netdev_err after register_netdev

2015-08-03 Thread sfeldma
From: Scott Feldman 

After successful register_netdev, we can use netdev_err rather the more
generic dev_err.

Signed-off-by: Scott Feldman 
---
 drivers/net/ethernet/rocker/rocker.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/rocker/rocker.c 
b/drivers/net/ethernet/rocker/rocker.c
index 0ab3a3b..4e8cad0 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -4985,7 +4985,7 @@ static int rocker_probe_port(struct rocker *rocker, 
unsigned int port_number)
 
err = rocker_port_ig_tbl(rocker_port, SWITCHDEV_TRANS_NONE, 0);
if (err) {
-   dev_err(&pdev->dev, "install ig port table failed\n");
+   netdev_err(rocker_port->dev, "install ig port table failed\n");
goto err_port_ig_tbl;
}
 
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next 1/2] rocker: NULL port if port probe fails

2015-08-03 Thread sfeldma
From: Scott Feldman 

Set port to NULL if port probe fails so we don't try to remove partially
initialized port on port probe err cleanup path.

Signed-off-by: Scott Feldman 
---
 drivers/net/ethernet/rocker/rocker.c |1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/rocker/rocker.c 
b/drivers/net/ethernet/rocker/rocker.c
index 7b4c347..0ab3a3b 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -5005,6 +5005,7 @@ err_untagged_vlan:
rocker_port_ig_tbl(rocker_port, SWITCHDEV_TRANS_NONE,
   ROCKER_OP_FLAG_REMOVE);
 err_port_ig_tbl:
+   rocker->ports[port_number] = NULL;
unregister_netdev(dev);
 err_register_netdev:
free_netdev(dev);
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next v3 4/5] rocker: add offload_fwd_mark support

2015-07-18 Thread sfeldma
From: Scott Feldman 

If device flags ingress packet as "fwd offload", mark the
skb->offlaod_fwd_mark using the ingress port's dev->offlaod_fwd_mark.  This
will be the hint to the kernel that this packet has already been forwarded
by device to egress ports matching skb->offlaod_fwd_mark.

For rocker, derive port dev->offlaod_fwd_mark based on device switch ID and
port ifindex.  If port is bridged, use the bridge ifindex rather than the
port ifindex.

Signed-off-by: Scott Feldman 
Acked-by: Jiri Pirko 
---
 drivers/net/ethernet/rocker/rocker.c |   11 +++
 drivers/net/ethernet/rocker/rocker.h |1 +
 2 files changed, 12 insertions(+)

diff --git a/drivers/net/ethernet/rocker/rocker.c 
b/drivers/net/ethernet/rocker/rocker.c
index 9324283..0fdfa47 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -4800,6 +4800,7 @@ static int rocker_port_rx_proc(const struct rocker 
*rocker,
const struct rocker_tlv *attrs[ROCKER_TLV_RX_MAX + 1];
struct sk_buff *skb = rocker_desc_cookie_ptr_get(desc_info);
size_t rx_len;
+   u16 rx_flags = 0;
 
if (!skb)
return -ENOENT;
@@ -4807,6 +4808,8 @@ static int rocker_port_rx_proc(const struct rocker 
*rocker,
rocker_tlv_parse_desc(attrs, ROCKER_TLV_RX_MAX, desc_info);
if (!attrs[ROCKER_TLV_RX_FRAG_LEN])
return -EINVAL;
+   if (attrs[ROCKER_TLV_RX_FLAGS])
+   rx_flags = rocker_tlv_get_u16(attrs[ROCKER_TLV_RX_FLAGS]);
 
rocker_dma_rx_ring_skb_unmap(rocker, attrs);
 
@@ -4814,6 +4817,9 @@ static int rocker_port_rx_proc(const struct rocker 
*rocker,
skb_put(skb, rx_len);
skb->protocol = eth_type_trans(skb, rocker_port->dev);
 
+   if (rx_flags & ROCKER_RX_FLAGS_FWD_OFFLOAD)
+   skb->offload_fwd_mark = rocker_port->dev->offload_fwd_mark;
+
rocker_port->dev->stats.rx_packets++;
rocker_port->dev->stats.rx_bytes += skb->len;
 
@@ -4951,6 +4957,8 @@ static int rocker_probe_port(struct rocker *rocker, 
unsigned int port_number)
}
rocker->ports[port_number] = rocker_port;
 
+   switchdev_port_fwd_mark_set(rocker_port->dev, NULL, false);
+
rocker_port_set_learning(rocker_port, SWITCHDEV_TRANS_NONE);
 
err = rocker_port_ig_tbl(rocker_port, SWITCHDEV_TRANS_NONE, 0);
@@ -5230,6 +5238,7 @@ static int rocker_port_bridge_join(struct rocker_port 
*rocker_port,
rocker_port_internal_vlan_id_get(rocker_port, bridge->ifindex);
 
rocker_port->bridge_dev = bridge;
+   switchdev_port_fwd_mark_set(rocker_port->dev, bridge, true);
 
return rocker_port_vlan_add(rocker_port, SWITCHDEV_TRANS_NONE,
untagged_vid, 0);
@@ -5250,6 +5259,8 @@ static int rocker_port_bridge_leave(struct rocker_port 
*rocker_port)
rocker_port_internal_vlan_id_get(rocker_port,
 rocker_port->dev->ifindex);
 
+   switchdev_port_fwd_mark_set(rocker_port->dev, rocker_port->bridge_dev,
+   false);
rocker_port->bridge_dev = NULL;
 
err = rocker_port_vlan_add(rocker_port, SWITCHDEV_TRANS_NONE,
diff --git a/drivers/net/ethernet/rocker/rocker.h 
b/drivers/net/ethernet/rocker/rocker.h
index 08b2c3d..12490b2 100644
--- a/drivers/net/ethernet/rocker/rocker.h
+++ b/drivers/net/ethernet/rocker/rocker.h
@@ -246,6 +246,7 @@ enum {
 #define ROCKER_RX_FLAGS_TCPBIT(5)
 #define ROCKER_RX_FLAGS_UDPBIT(6)
 #define ROCKER_RX_FLAGS_TCP_UDP_CSUM_GOOD  BIT(7)
+#define ROCKER_RX_FLAGS_FWD_OFFLOADBIT(8)
 
 enum {
ROCKER_TLV_TX_UNSPEC,
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next v3 5/5] switchdev: update documentation for offload_fwd_mark

2015-07-18 Thread sfeldma
From: Scott Feldman 

Signed-off-by: Scott Feldman 
Acked-by: Jiri Pirko 
---
 Documentation/networking/switchdev.txt |   14 --
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/Documentation/networking/switchdev.txt 
b/Documentation/networking/switchdev.txt
index c5d7ade..9825f32 100644
--- a/Documentation/networking/switchdev.txt
+++ b/Documentation/networking/switchdev.txt
@@ -279,8 +279,18 @@ and unknown unicast packets to all ports in domain, if 
allowed by port's
 current STP state.  The switch driver, knowing which ports are within which
 vlan L2 domain, can program the switch device for flooding.  The packet should
 also be sent to the port netdev for processing by the bridge driver.  The
-bridge should not reflood the packet to the same ports the device flooded.
-XXX: the mechanism to avoid duplicate flood packets is being discuseed.
+bridge should not reflood the packet to the same ports the device flooded,
+otherwise there will be duplicate packets on the wire.
+
+To avoid duplicate packets, the device/driver should mark a packet as already
+forwarded using skb->offload_fwd_mark.  The same mark is set on the device
+ports in the domain using dev->offload_fwd_mark.  If the skb->offload_fwd_mark
+is non-zero and matches the forwarding egress port's dev->skb_mark, the kernel
+will drop the skb right before transmit on the egress port, with the
+understanding that the device already forwarded the packet on same egress port.
+The driver can use switchdev_port_fwd_mark_set() to set a globally unique mark
+for port's dev->offload_fwd_mark, based on the port's parent ID (switch ID) and
+a group ifindex.
 
 It is possible for the switch device to not handle flooding and push the
 packets up to the bridge driver for flooding.  This is not ideal as the number
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next v3 2/5] net: add phys ID compare helper to test if two IDs are the same

2015-07-18 Thread sfeldma
From: Scott Feldman 

Signed-off-by: Scott Feldman 
Acked-by: Jiri Pirko 
---
 include/linux/netdevice.h |7 +++
 net/switchdev/switchdev.c |8 ++--
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 8364f29..607b5f4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -766,6 +766,13 @@ struct netdev_phys_item_id {
unsigned char id_len;
 };
 
+static inline bool netdev_phys_item_id_same(struct netdev_phys_item_id *a,
+   struct netdev_phys_item_id *b)
+{
+   return a->id_len == b->id_len &&
+  memcmp(a->id, b->id, a->id_len) == 0;
+}
+
 typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
   struct sk_buff *skb);
 
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 9f2add3..4e5bba5 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -910,13 +910,9 @@ static struct net_device *switchdev_get_dev_by_nhs(struct 
fib_info *fi)
if (switchdev_port_attr_get(dev, &attr))
return NULL;
 
-   if (nhsel > 0) {
-   if (prev_attr.u.ppid.id_len != attr.u.ppid.id_len)
+   if (nhsel > 0 &&
+   !netdev_phys_item_id_same(&prev_attr.u.ppid, &attr.u.ppid))
return NULL;
-   if (memcmp(prev_attr.u.ppid.id, attr.u.ppid.id,
-  attr.u.ppid.id_len))
-   return NULL;
-   }
 
prev_attr = attr;
}
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next v3 0/5] switchdev: avoid duplicate packet forwarding

2015-07-18 Thread sfeldma
From: Scott Feldman 

v3:

 - Per Nicolas Dichtel review: remove errant empty union.

v2:

 - Per davem review: in sk_buff, union fwd_mark with secmark to save space
   since features appear to be mutually exclusive.
 - Per Simon Horman review:
   - fix grammar in switchdev.txt wrt fwd_mark
   - remove some unrelated changes that snuck in

v1:

This patchset was previously submitted as RFC.  No changes from the last
version (v2) sent under RFC.  Including RFC version history here for reference.

RFC v2:

 - s/fwd_mark/offload_fwd_mark
 - use consume_skb rather than kfree_skb when dropping pkt on egress.
 - Use Jiri's suggestion to use ifindex of one of the ports in a group
   as the mark for all the ports in the group.  This can be done with
   no additional storage (no hashtable from v1).  To pull it off, we
   need some simple recursive routines to walk the netdev tree ensuring
   all leaves in the tree (ports) in the same group (e.g. bridge)
   belonging to the same switch device will have the same offload fwd mark.
   Maybe someone sees a better design for the recusive routines?  They're
   not too bad, and should cover the stacked driver cases.

RFC v1:

With switchdev support for offloading L2/L3 forwarding data path to a
switch device, we have a general problem where both the device and the
kernel may forward the packet, resulting in duplicate packets on the wire.
Anytime a packet is forwarded by the device and a copy is sent to the CPU,
there is potential for duplicate forwarding, as the kernel may also do a
forwarding lookup and send the packet on the wire.

The specific problem this patch series is interested in solving is avoiding
duplicate packets on bridged ports.  There was a previous RFC from Roopa
(http://marc.info/?l=linux-netdev&m=142687073314252&w=2) to address this
problem, but didn't solve the problem of mixed ports in the bridge from
different devices; there was no way to exclude some ports from forwarding
and include others.  This RFC solves that problem by tagging the ingressing
packet with a unique mark, and then comparing the packet mark with the
egress port mark, and skip forwarding when there is a match.  For the mixed
ports bridge case, only those ports with matching marks are skipped.

The switchdev port driver must do two things:

1) Generate a fwd_mark for each switch port, using some unique key of the
   switch device (and optionally port).  This is done when the port netdev
   is registered or if the port's group membership changes (joins/leaves
   a bridge, for example).

2) On packet ingress from port, mark the skb with the ingress port's
   fwd_mark.  If the device supports it, it's useful to only mark skbs
   which were already forwarded by the device.  If the device does not
   support such indication, all skbs can be marked, even if they're
   local dst.

Two new 32-bit fields are added to struct sk_buff and struct netdevice to
hold the fwd_mark.  I've wrapped these with CONFIG_NET_SWITCHDEV for now. I
tried using skb->mark for this purpose, but ebtables can overwrite the
skb->mark before the bridge gets it, so that will not work.

In general, this fwd_mark can be used for any case where a packet is
forwarded by the device and a copy is sent to the CPU, to avoid the kernel
re-forwarding the packet.  sFlow is another use-case that comes to mind,
but I haven't explored the details.
Scott Feldman (5):
  net: don't reforward packets already forwarded by offload device
  net: add phys ID compare helper to test if two IDs are the same
  switchdev: add offload_fwd_mark generator helper
  rocker: add offload_fwd_mark support
  switchdev: update documentation for offload_fwd_mark

 Documentation/networking/switchdev.txt |   14 +++-
 drivers/net/ethernet/rocker/rocker.c   |   11 
 drivers/net/ethernet/rocker/rocker.h   |1 +
 include/linux/netdevice.h  |   13 
 include/linux/skbuff.h |9 ++-
 include/net/switchdev.h|9 +++
 net/core/dev.c |   10 +++
 net/switchdev/switchdev.c  |  111 ++--
 8 files changed, 169 insertions(+), 9 deletions(-)

-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next v3 1/5] net: don't reforward packets already forwarded by offload device

2015-07-18 Thread sfeldma
From: Scott Feldman 

Just before queuing skb for xmit on port, check if skb has been marked by
switchdev port driver as already fordwarded by device.  If so, drop skb.  A
non-zero skb->offload_fwd_mark field is set by the switchdev port
driver/device on ingress to indicate the skb has already been forwarded by
the device to egress ports with matching dev->skb_mark.  The switchdev port
driver would assign a non-zero dev->offload_skb_mark for each device port
netdev during registration, for example.

Signed-off-by: Scott Feldman 
Acked-by: Jiri Pirko 
Acked-by: Roopa Prabhu 
---
 include/linux/netdevice.h |6 ++
 include/linux/skbuff.h|9 -
 net/core/dev.c|   10 ++
 3 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 45cfd79..8364f29 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1456,6 +1456,8 @@ enum netdev_priv_flags {
  *
  * @xps_maps:  XXX: need comments on this one
  *
+ * @offload_fwd_mark:  Offload device fwding mark
+ *
  * @trans_start:   Time (in jiffies) of last Tx
  * @watchdog_timeo:Represents the timeout that is used by
  * the watchdog ( see dev_watchdog() )
@@ -1697,6 +1699,10 @@ struct net_device {
struct xps_dev_maps __rcu *xps_maps;
 #endif
 
+#ifdef CONFIG_NET_SWITCHDEV
+   u32 offload_fwd_mark;
+#endif
+
/* These may be needed for future network-power-down code. */
 
/*
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index d6cdd6e..af7a096 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -506,6 +506,7 @@ static inline u32 skb_mstamp_us_delta(const struct 
skb_mstamp *t1,
  * @no_fcs:  Request NIC to treat last 4 bytes as Ethernet FCS
   *@napi_id: id of the NAPI struct this skb came from
  * @secmark: security marking
+ * @offload_fwd_mark: fwding offload mark
  * @mark: Generic packet mark
  * @vlan_proto: vlan encapsulation protocol
  * @vlan_tci: vlan tag control information
@@ -650,9 +651,15 @@ struct sk_buff {
unsigned intsender_cpu;
};
 #endif
+   union {
 #ifdef CONFIG_NETWORK_SECMARK
-   __u32   secmark;
+   __u32   secmark;
+#endif
+#ifdef CONFIG_NET_SWITCHDEV
+   __u32   offload_fwd_mark;
 #endif
+   };
+
union {
__u32   mark;
__u32   reserved_tailroom;
diff --git a/net/core/dev.c b/net/core/dev.c
index 8810b6b..2ee15af 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3061,6 +3061,16 @@ static int __dev_queue_xmit(struct sk_buff *skb, void 
*accel_priv)
else
skb_dst_force(skb);
 
+#ifdef CONFIG_NET_SWITCHDEV
+   /* Don't forward if offload device already forwarded */
+   if (skb->offload_fwd_mark &&
+   skb->offload_fwd_mark == dev->offload_fwd_mark) {
+   consume_skb(skb);
+   rc = NET_XMIT_SUCCESS;
+   goto out;
+   }
+#endif
+
txq = netdev_pick_tx(dev, skb, accel_priv);
q = rcu_dereference_bh(txq->qdisc);
 
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next v3 3/5] switchdev: add offload_fwd_mark generator helper

2015-07-18 Thread sfeldma
From: Scott Feldman 

skb->offload_fwd_mark and dev->offload_fwd_mark are 32-bit and should be
unique for device and may even be unique for a sub-set of ports within
device, so add switchdev helper function to generate unique marks based on
port's switch ID and group_ifindex.  group_ifindex would typically be the
container dev's ifindex, such as the bridge's ifindex.

The generator uses a global hash table to store offload_fwd_marks hashed by
{switch ID, group_ifindex} key.

Signed-off-by: Scott Feldman 
Acked-by: Jiri Pirko 
---
 include/net/switchdev.h   |9 
 net/switchdev/switchdev.c |  103 +
 2 files changed, 112 insertions(+)

diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index d5671f1..89da893 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -157,6 +157,9 @@ int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr 
*tb[],
 int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
struct net_device *dev,
struct net_device *filter_dev, int idx);
+void switchdev_port_fwd_mark_set(struct net_device *dev,
+struct net_device *group_dev,
+bool joining);
 
 #else
 
@@ -271,6 +274,12 @@ static inline int switchdev_port_fdb_dump(struct sk_buff 
*skb,
return -EOPNOTSUPP;
 }
 
+static inline void switchdev_port_fwd_mark_set(struct net_device *dev,
+  struct net_device *group_dev,
+  bool joining)
+{
+}
+
 #endif
 
 #endif /* _LINUX_SWITCHDEV_H_ */
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 4e5bba5..33bafa2 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -1039,3 +1039,106 @@ void switchdev_fib_ipv4_abort(struct fib_info *fi)
fi->fib_net->ipv4.fib_offload_disabled = true;
 }
 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_abort);
+
+static bool switchdev_port_same_parent_id(struct net_device *a,
+ struct net_device *b)
+{
+   struct switchdev_attr a_attr = {
+   .id = SWITCHDEV_ATTR_PORT_PARENT_ID,
+   .flags = SWITCHDEV_F_NO_RECURSE,
+   };
+   struct switchdev_attr b_attr = {
+   .id = SWITCHDEV_ATTR_PORT_PARENT_ID,
+   .flags = SWITCHDEV_F_NO_RECURSE,
+   };
+
+   if (switchdev_port_attr_get(a, &a_attr) ||
+   switchdev_port_attr_get(b, &b_attr))
+   return false;
+
+   return netdev_phys_item_id_same(&a_attr.u.ppid, &b_attr.u.ppid);
+}
+
+static u32 switchdev_port_fwd_mark_get(struct net_device *dev,
+  struct net_device *group_dev)
+{
+   struct net_device *lower_dev;
+   struct list_head *iter;
+
+   netdev_for_each_lower_dev(group_dev, lower_dev, iter) {
+   if (lower_dev == dev)
+   continue;
+   if (switchdev_port_same_parent_id(dev, lower_dev))
+   return lower_dev->offload_fwd_mark;
+   return switchdev_port_fwd_mark_get(dev, lower_dev);
+   }
+
+   return dev->ifindex;
+}
+
+static void switchdev_port_fwd_mark_reset(struct net_device *group_dev,
+ u32 old_mark, u32 *reset_mark)
+{
+   struct net_device *lower_dev;
+   struct list_head *iter;
+
+   netdev_for_each_lower_dev(group_dev, lower_dev, iter) {
+   if (lower_dev->offload_fwd_mark == old_mark) {
+   if (!*reset_mark)
+   *reset_mark = lower_dev->ifindex;
+   lower_dev->offload_fwd_mark = *reset_mark;
+   }
+   switchdev_port_fwd_mark_reset(lower_dev, old_mark, reset_mark);
+   }
+}
+
+/**
+ * switchdev_port_fwd_mark_set - Set port offload forwarding mark
+ *
+ * @dev: port device
+ * @group_dev: containing device
+ * @joining: true if dev is joining group; false if leaving group
+ *
+ * An ungrouped port's offload mark is just its ifindex.  A grouped
+ * port's (member of a bridge, for example) offload mark is the ifindex
+ * of one of the ports in the group with the same parent (switch) ID.
+ * Ports on the same device in the same group will have the same mark.
+ *
+ * Example:
+ *
+ * br0 ifindex=9
+ *   sw1p1 ifindex=2   mark=2
+ *   sw1p2 ifindex=3   mark=2
+ *   sw2p1 ifindex=4   mark=5
+ *   sw2p2 ifindex=5   mark=5
+ *
+ * If sw2p2 leaves the bridge, we'll have:
+ *
+ * br0 ifindex=9
+ *   sw1p1 ifindex=2   mark=2
+ *   sw1p2 ifindex=3   mark=2
+ *   sw2p1 ifindex=4   mark=4
+ * sw2p2

[PATCH net-next v2 5/5] switchdev: update documentation for offload_fwd_mark

2015-07-16 Thread sfeldma
From: Scott Feldman 

Signed-off-by: Scott Feldman 
---
 Documentation/networking/switchdev.txt |   14 --
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/Documentation/networking/switchdev.txt 
b/Documentation/networking/switchdev.txt
index c5d7ade..9825f32 100644
--- a/Documentation/networking/switchdev.txt
+++ b/Documentation/networking/switchdev.txt
@@ -279,8 +279,18 @@ and unknown unicast packets to all ports in domain, if 
allowed by port's
 current STP state.  The switch driver, knowing which ports are within which
 vlan L2 domain, can program the switch device for flooding.  The packet should
 also be sent to the port netdev for processing by the bridge driver.  The
-bridge should not reflood the packet to the same ports the device flooded.
-XXX: the mechanism to avoid duplicate flood packets is being discuseed.
+bridge should not reflood the packet to the same ports the device flooded,
+otherwise there will be duplicate packets on the wire.
+
+To avoid duplicate packets, the device/driver should mark a packet as already
+forwarded using skb->offload_fwd_mark.  The same mark is set on the device
+ports in the domain using dev->offload_fwd_mark.  If the skb->offload_fwd_mark
+is non-zero and matches the forwarding egress port's dev->skb_mark, the kernel
+will drop the skb right before transmit on the egress port, with the
+understanding that the device already forwarded the packet on same egress port.
+The driver can use switchdev_port_fwd_mark_set() to set a globally unique mark
+for port's dev->offload_fwd_mark, based on the port's parent ID (switch ID) and
+a group ifindex.
 
 It is possible for the switch device to not handle flooding and push the
 packets up to the bridge driver for flooding.  This is not ideal as the number
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next v2 1/5] net: don't reforward packets already forwarded by offload device

2015-07-16 Thread sfeldma
From: Scott Feldman 

Just before queuing skb for xmit on port, check if skb has been marked by
switchdev port driver as already fordwarded by device.  If so, drop skb.  A
non-zero skb->offload_fwd_mark field is set by the switchdev port
driver/device on ingress to indicate the skb has already been forwarded by
the device to egress ports with matching dev->skb_mark.  The switchdev port
driver would assign a non-zero dev->skb_mark for each device port netdev
during registration, for example.

Signed-off-by: Scott Feldman 
---
 include/linux/netdevice.h |6 ++
 include/linux/skbuff.h|   11 ++-
 net/core/dev.c|   10 ++
 3 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 45cfd79..8364f29 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1456,6 +1456,8 @@ enum netdev_priv_flags {
  *
  * @xps_maps:  XXX: need comments on this one
  *
+ * @offload_fwd_mark:  Offload device fwding mark
+ *
  * @trans_start:   Time (in jiffies) of last Tx
  * @watchdog_timeo:Represents the timeout that is used by
  * the watchdog ( see dev_watchdog() )
@@ -1697,6 +1699,10 @@ struct net_device {
struct xps_dev_maps __rcu *xps_maps;
 #endif
 
+#ifdef CONFIG_NET_SWITCHDEV
+   u32 offload_fwd_mark;
+#endif
+
/* These may be needed for future network-power-down code. */
 
/*
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index d6cdd6e..2edcf50 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -506,6 +506,7 @@ static inline u32 skb_mstamp_us_delta(const struct 
skb_mstamp *t1,
  * @no_fcs:  Request NIC to treat last 4 bytes as Ethernet FCS
   *@napi_id: id of the NAPI struct this skb came from
  * @secmark: security marking
+ * @offload_fwd_mark: fwding offload mark
  * @mark: Generic packet mark
  * @vlan_proto: vlan encapsulation protocol
  * @vlan_tci: vlan tag control information
@@ -650,9 +651,17 @@ struct sk_buff {
unsigned intsender_cpu;
};
 #endif
+   union {
 #ifdef CONFIG_NETWORK_SECMARK
-   __u32   secmark;
+   __u32   secmark;
+#endif
+#ifdef CONFIG_NET_SWITCHDEV
+   __u32   offload_fwd_mark;
 #endif
+   };
+
+   union {};
+
union {
__u32   mark;
__u32   reserved_tailroom;
diff --git a/net/core/dev.c b/net/core/dev.c
index 8810b6b..2ee15af 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3061,6 +3061,16 @@ static int __dev_queue_xmit(struct sk_buff *skb, void 
*accel_priv)
else
skb_dst_force(skb);
 
+#ifdef CONFIG_NET_SWITCHDEV
+   /* Don't forward if offload device already forwarded */
+   if (skb->offload_fwd_mark &&
+   skb->offload_fwd_mark == dev->offload_fwd_mark) {
+   consume_skb(skb);
+   rc = NET_XMIT_SUCCESS;
+   goto out;
+   }
+#endif
+
txq = netdev_pick_tx(dev, skb, accel_priv);
q = rcu_dereference_bh(txq->qdisc);
 
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next v2 3/5] switchdev: add offload_fwd_mark generator helper

2015-07-16 Thread sfeldma
From: Scott Feldman 

skb->offload_fwd_mark and dev->offload_fwd_mark are 32-bit and should be
unique for device and may even be unique for a sub-set of ports within
device, so add switchdev helper function to generate unique marks based on
port's switch ID and group_ifindex.  group_ifindex would typically be the
container dev's ifindex, such as the bridge's ifindex.

The generator uses a global hash table to store offload_fwd_marks hashed by
{switch ID, group_ifindex} key.

Signed-off-by: Scott Feldman 
---
 include/net/switchdev.h   |9 
 net/switchdev/switchdev.c |  103 +
 2 files changed, 112 insertions(+)

diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index d5671f1..89da893 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -157,6 +157,9 @@ int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr 
*tb[],
 int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
struct net_device *dev,
struct net_device *filter_dev, int idx);
+void switchdev_port_fwd_mark_set(struct net_device *dev,
+struct net_device *group_dev,
+bool joining);
 
 #else
 
@@ -271,6 +274,12 @@ static inline int switchdev_port_fdb_dump(struct sk_buff 
*skb,
return -EOPNOTSUPP;
 }
 
+static inline void switchdev_port_fwd_mark_set(struct net_device *dev,
+  struct net_device *group_dev,
+  bool joining)
+{
+}
+
 #endif
 
 #endif /* _LINUX_SWITCHDEV_H_ */
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 4e5bba5..33bafa2 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -1039,3 +1039,106 @@ void switchdev_fib_ipv4_abort(struct fib_info *fi)
fi->fib_net->ipv4.fib_offload_disabled = true;
 }
 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_abort);
+
+static bool switchdev_port_same_parent_id(struct net_device *a,
+ struct net_device *b)
+{
+   struct switchdev_attr a_attr = {
+   .id = SWITCHDEV_ATTR_PORT_PARENT_ID,
+   .flags = SWITCHDEV_F_NO_RECURSE,
+   };
+   struct switchdev_attr b_attr = {
+   .id = SWITCHDEV_ATTR_PORT_PARENT_ID,
+   .flags = SWITCHDEV_F_NO_RECURSE,
+   };
+
+   if (switchdev_port_attr_get(a, &a_attr) ||
+   switchdev_port_attr_get(b, &b_attr))
+   return false;
+
+   return netdev_phys_item_id_same(&a_attr.u.ppid, &b_attr.u.ppid);
+}
+
+static u32 switchdev_port_fwd_mark_get(struct net_device *dev,
+  struct net_device *group_dev)
+{
+   struct net_device *lower_dev;
+   struct list_head *iter;
+
+   netdev_for_each_lower_dev(group_dev, lower_dev, iter) {
+   if (lower_dev == dev)
+   continue;
+   if (switchdev_port_same_parent_id(dev, lower_dev))
+   return lower_dev->offload_fwd_mark;
+   return switchdev_port_fwd_mark_get(dev, lower_dev);
+   }
+
+   return dev->ifindex;
+}
+
+static void switchdev_port_fwd_mark_reset(struct net_device *group_dev,
+ u32 old_mark, u32 *reset_mark)
+{
+   struct net_device *lower_dev;
+   struct list_head *iter;
+
+   netdev_for_each_lower_dev(group_dev, lower_dev, iter) {
+   if (lower_dev->offload_fwd_mark == old_mark) {
+   if (!*reset_mark)
+   *reset_mark = lower_dev->ifindex;
+   lower_dev->offload_fwd_mark = *reset_mark;
+   }
+   switchdev_port_fwd_mark_reset(lower_dev, old_mark, reset_mark);
+   }
+}
+
+/**
+ * switchdev_port_fwd_mark_set - Set port offload forwarding mark
+ *
+ * @dev: port device
+ * @group_dev: containing device
+ * @joining: true if dev is joining group; false if leaving group
+ *
+ * An ungrouped port's offload mark is just its ifindex.  A grouped
+ * port's (member of a bridge, for example) offload mark is the ifindex
+ * of one of the ports in the group with the same parent (switch) ID.
+ * Ports on the same device in the same group will have the same mark.
+ *
+ * Example:
+ *
+ * br0 ifindex=9
+ *   sw1p1 ifindex=2   mark=2
+ *   sw1p2 ifindex=3   mark=2
+ *   sw2p1 ifindex=4   mark=5
+ *   sw2p2 ifindex=5   mark=5
+ *
+ * If sw2p2 leaves the bridge, we'll have:
+ *
+ * br0 ifindex=9
+ *   sw1p1 ifindex=2   mark=2
+ *   sw1p2 ifindex=3   mark=2
+ *   sw2p1 ifindex=4   mark=4
+ * sw2p2   ifindex=5  

[PATCH net-next v2 2/5] net: add phys ID compare helper to test if two IDs are the same

2015-07-16 Thread sfeldma
From: Scott Feldman 

Signed-off-by: Scott Feldman 
---
 include/linux/netdevice.h |7 +++
 net/switchdev/switchdev.c |8 ++--
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 8364f29..607b5f4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -766,6 +766,13 @@ struct netdev_phys_item_id {
unsigned char id_len;
 };
 
+static inline bool netdev_phys_item_id_same(struct netdev_phys_item_id *a,
+   struct netdev_phys_item_id *b)
+{
+   return a->id_len == b->id_len &&
+  memcmp(a->id, b->id, a->id_len) == 0;
+}
+
 typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
   struct sk_buff *skb);
 
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 9f2add3..4e5bba5 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -910,13 +910,9 @@ static struct net_device *switchdev_get_dev_by_nhs(struct 
fib_info *fi)
if (switchdev_port_attr_get(dev, &attr))
return NULL;
 
-   if (nhsel > 0) {
-   if (prev_attr.u.ppid.id_len != attr.u.ppid.id_len)
+   if (nhsel > 0 &&
+   !netdev_phys_item_id_same(&prev_attr.u.ppid, &attr.u.ppid))
return NULL;
-   if (memcmp(prev_attr.u.ppid.id, attr.u.ppid.id,
-  attr.u.ppid.id_len))
-   return NULL;
-   }
 
prev_attr = attr;
}
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next v2 0/5] switchdev: avoid duplicate packet forwarding

2015-07-16 Thread sfeldma
From: Scott Feldman 

v2:

 - Per davem review: in sk_buff, union fwd_mark with secmark to save space
   since features appear to be mutually exclusive.
 - Per Simon Horman review:
   - fix grammar in switchdev.txt wrt fwd_mark
   - remove some unrelated changes that snuck in

v1:

This patchset was previously submitted as RFC.  No changes from the last
version (v2) sent under RFC.  Including RFC version history here for reference.

RFC v2:

 - s/fwd_mark/offload_fwd_mark
 - use consume_skb rather than kfree_skb when dropping pkt on egress.
 - Use Jiri's suggestion to use ifindex of one of the ports in a group
   as the mark for all the ports in the group.  This can be done with
   no additional storage (no hashtable from v1).  To pull it off, we
   need some simple recursive routines to walk the netdev tree ensuring
   all leaves in the tree (ports) in the same group (e.g. bridge)
   belonging to the same switch device will have the same offload fwd mark.
   Maybe someone sees a better design for the recusive routines?  They're
   not too bad, and should cover the stacked driver cases.

RFC v1:

With switchdev support for offloading L2/L3 forwarding data path to a
switch device, we have a general problem where both the device and the
kernel may forward the packet, resulting in duplicate packets on the wire.
Anytime a packet is forwarded by the device and a copy is sent to the CPU,
there is potential for duplicate forwarding, as the kernel may also do a
forwarding lookup and send the packet on the wire.

The specific problem this patch series is interested in solving is avoiding
duplicate packets on bridged ports.  There was a previous RFC from Roopa
(http://marc.info/?l=linux-netdev&m=142687073314252&w=2) to address this
problem, but didn't solve the problem of mixed ports in the bridge from
different devices; there was no way to exclude some ports from forwarding
and include others.  This RFC solves that problem by tagging the ingressing
packet with a unique mark, and then comparing the packet mark with the
egress port mark, and skip forwarding when there is a match.  For the mixed
ports bridge case, only those ports with matching marks are skipped.

The switchdev port driver must do two things:

1) Generate a fwd_mark for each switch port, using some unique key of the
   switch device (and optionally port).  This is done when the port netdev
   is registered or if the port's group membership changes (joins/leaves
   a bridge, for example).

2) On packet ingress from port, mark the skb with the ingress port's
   fwd_mark.  If the device supports it, it's useful to only mark skbs
   which were already forwarded by the device.  If the device does not
   support such indication, all skbs can be marked, even if they're
   local dst.

Two new 32-bit fields are added to struct sk_buff and struct netdevice to
hold the fwd_mark.  I've wrapped these with CONFIG_NET_SWITCHDEV for now. I
tried using skb->mark for this purpose, but ebtables can overwrite the
skb->mark before the bridge gets it, so that will not work.

In general, this fwd_mark can be used for any case where a packet is
forwarded by the device and a copy is sent to the CPU, to avoid the kernel
re-forwarding the packet.  sFlow is another use-case that comes to mind,
but I haven't explored the details.


Scott Feldman (5):
  net: don't reforward packets already forwarded by offload device
  net: add phys ID compare helper to test if two IDs are the same
  switchdev: add offload_fwd_mark generator helper
  rocker: add offload_fwd_mark support
  switchdev: update documentation for offload_fwd_mark

 Documentation/networking/switchdev.txt |   14 +++-
 drivers/net/ethernet/rocker/rocker.c   |   11 
 drivers/net/ethernet/rocker/rocker.h   |1 +
 include/linux/netdevice.h  |   13 
 include/linux/skbuff.h |   11 +++-
 include/net/switchdev.h|9 +++
 net/core/dev.c |   10 +++
 net/switchdev/switchdev.c  |  111 ++--
 8 files changed, 171 insertions(+), 9 deletions(-)

-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next v2 4/5] rocker: add offload_fwd_mark support

2015-07-16 Thread sfeldma
From: Scott Feldman 

If device flags ingress packet as "fwd offload", mark the
skb->offlaod_fwd_mark using the ingress port's dev->offlaod_fwd_mark.  This
will be the hint to the kernel that this packet has already been forwarded
by device to egress ports matching skb->offlaod_fwd_mark.

For rocker, derive port dev->offlaod_fwd_mark based on device switch ID and
port ifindex.  If port is bridged, use the bridge ifindex rather than the
port ifindex.

Signed-off-by: Scott Feldman 
---
 drivers/net/ethernet/rocker/rocker.c |   11 +++
 drivers/net/ethernet/rocker/rocker.h |1 +
 2 files changed, 12 insertions(+)

diff --git a/drivers/net/ethernet/rocker/rocker.c 
b/drivers/net/ethernet/rocker/rocker.c
index 9324283..0fdfa47 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -4800,6 +4800,7 @@ static int rocker_port_rx_proc(const struct rocker 
*rocker,
const struct rocker_tlv *attrs[ROCKER_TLV_RX_MAX + 1];
struct sk_buff *skb = rocker_desc_cookie_ptr_get(desc_info);
size_t rx_len;
+   u16 rx_flags = 0;
 
if (!skb)
return -ENOENT;
@@ -4807,6 +4808,8 @@ static int rocker_port_rx_proc(const struct rocker 
*rocker,
rocker_tlv_parse_desc(attrs, ROCKER_TLV_RX_MAX, desc_info);
if (!attrs[ROCKER_TLV_RX_FRAG_LEN])
return -EINVAL;
+   if (attrs[ROCKER_TLV_RX_FLAGS])
+   rx_flags = rocker_tlv_get_u16(attrs[ROCKER_TLV_RX_FLAGS]);
 
rocker_dma_rx_ring_skb_unmap(rocker, attrs);
 
@@ -4814,6 +4817,9 @@ static int rocker_port_rx_proc(const struct rocker 
*rocker,
skb_put(skb, rx_len);
skb->protocol = eth_type_trans(skb, rocker_port->dev);
 
+   if (rx_flags & ROCKER_RX_FLAGS_FWD_OFFLOAD)
+   skb->offload_fwd_mark = rocker_port->dev->offload_fwd_mark;
+
rocker_port->dev->stats.rx_packets++;
rocker_port->dev->stats.rx_bytes += skb->len;
 
@@ -4951,6 +4957,8 @@ static int rocker_probe_port(struct rocker *rocker, 
unsigned int port_number)
}
rocker->ports[port_number] = rocker_port;
 
+   switchdev_port_fwd_mark_set(rocker_port->dev, NULL, false);
+
rocker_port_set_learning(rocker_port, SWITCHDEV_TRANS_NONE);
 
err = rocker_port_ig_tbl(rocker_port, SWITCHDEV_TRANS_NONE, 0);
@@ -5230,6 +5238,7 @@ static int rocker_port_bridge_join(struct rocker_port 
*rocker_port,
rocker_port_internal_vlan_id_get(rocker_port, bridge->ifindex);
 
rocker_port->bridge_dev = bridge;
+   switchdev_port_fwd_mark_set(rocker_port->dev, bridge, true);
 
return rocker_port_vlan_add(rocker_port, SWITCHDEV_TRANS_NONE,
untagged_vid, 0);
@@ -5250,6 +5259,8 @@ static int rocker_port_bridge_leave(struct rocker_port 
*rocker_port)
rocker_port_internal_vlan_id_get(rocker_port,
 rocker_port->dev->ifindex);
 
+   switchdev_port_fwd_mark_set(rocker_port->dev, rocker_port->bridge_dev,
+   false);
rocker_port->bridge_dev = NULL;
 
err = rocker_port_vlan_add(rocker_port, SWITCHDEV_TRANS_NONE,
diff --git a/drivers/net/ethernet/rocker/rocker.h 
b/drivers/net/ethernet/rocker/rocker.h
index 08b2c3d..12490b2 100644
--- a/drivers/net/ethernet/rocker/rocker.h
+++ b/drivers/net/ethernet/rocker/rocker.h
@@ -246,6 +246,7 @@ enum {
 #define ROCKER_RX_FLAGS_TCPBIT(5)
 #define ROCKER_RX_FLAGS_UDPBIT(6)
 #define ROCKER_RX_FLAGS_TCP_UDP_CSUM_GOOD  BIT(7)
+#define ROCKER_RX_FLAGS_FWD_OFFLOADBIT(8)
 
 enum {
ROCKER_TLV_TX_UNSPEC,
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next 3/5] switchdev: add offload_fwd_mark generator helper

2015-07-08 Thread sfeldma
From: Scott Feldman 

skb->offload_fwd_mark and dev->offload_fwd_mark are 32-bit and should be
unique for device and may even be unique for a sub-set of ports within
device, so add switchdev helper function to generate unique marks based on
port's switch ID and group_ifindex.  group_ifindex would typically be the
container dev's ifindex, such as the bridge's ifindex.

The generator uses a global hash table to store offload_fwd_marks hashed by
{switch ID, group_ifindex} key.

Signed-off-by: Scott Feldman 
---
 include/net/switchdev.h   |9 
 net/switchdev/switchdev.c |  103 +
 2 files changed, 112 insertions(+)

diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index d5671f1..89da893 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -157,6 +157,9 @@ int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr 
*tb[],
 int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
struct net_device *dev,
struct net_device *filter_dev, int idx);
+void switchdev_port_fwd_mark_set(struct net_device *dev,
+struct net_device *group_dev,
+bool joining);
 
 #else
 
@@ -271,6 +274,12 @@ static inline int switchdev_port_fdb_dump(struct sk_buff 
*skb,
return -EOPNOTSUPP;
 }
 
+static inline void switchdev_port_fwd_mark_set(struct net_device *dev,
+  struct net_device *group_dev,
+  bool joining)
+{
+}
+
 #endif
 
 #endif /* _LINUX_SWITCHDEV_H_ */
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index e16586f..6de147d 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -1035,3 +1035,106 @@ void switchdev_fib_ipv4_abort(struct fib_info *fi)
fi->fib_net->ipv4.fib_offload_disabled = true;
 }
 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_abort);
+
+static bool switchdev_port_same_parent_id(struct net_device *a,
+ struct net_device *b)
+{
+   struct switchdev_attr a_attr = {
+   .id = SWITCHDEV_ATTR_PORT_PARENT_ID,
+   .flags = SWITCHDEV_F_NO_RECURSE,
+   };
+   struct switchdev_attr b_attr = {
+   .id = SWITCHDEV_ATTR_PORT_PARENT_ID,
+   .flags = SWITCHDEV_F_NO_RECURSE,
+   };
+
+   if (switchdev_port_attr_get(a, &a_attr) ||
+   switchdev_port_attr_get(b, &b_attr))
+   return false;
+
+   return netdev_phys_item_id_same(&a_attr.u.ppid, &b_attr.u.ppid);
+}
+
+static u32 switchdev_port_fwd_mark_get(struct net_device *dev,
+  struct net_device *group_dev)
+{
+   struct net_device *lower_dev;
+   struct list_head *iter;
+
+   netdev_for_each_lower_dev(group_dev, lower_dev, iter) {
+   if (lower_dev == dev)
+   continue;
+   if (switchdev_port_same_parent_id(dev, lower_dev))
+   return lower_dev->offload_fwd_mark;
+   return switchdev_port_fwd_mark_get(dev, lower_dev);
+   }
+
+   return dev->ifindex;
+}
+
+static void switchdev_port_fwd_mark_reset(struct net_device *group_dev,
+ u32 old_mark, u32 *reset_mark)
+{
+   struct net_device *lower_dev;
+   struct list_head *iter;
+
+   netdev_for_each_lower_dev(group_dev, lower_dev, iter) {
+   if (lower_dev->offload_fwd_mark == old_mark) {
+   if (!*reset_mark)
+   *reset_mark = lower_dev->ifindex;
+   lower_dev->offload_fwd_mark = *reset_mark;
+   }
+   switchdev_port_fwd_mark_reset(lower_dev, old_mark, reset_mark);
+   }
+}
+
+/**
+ * switchdev_port_fwd_mark_set - Set port offload forwarding mark
+ *
+ * @dev: port device
+ * @group_dev: containing device
+ * @joining: true if dev is joining group; false if leaving group
+ *
+ * An ungrouped port's offload mark is just its ifindex.  A grouped
+ * port's (member of a bridge, for example) offload mark is the ifindex
+ * of one of the ports in the group with the same parent (switch) ID.
+ * Ports on the same device in the same group will have the same mark.
+ *
+ * Example:
+ *
+ * br0 ifindex=9
+ *   sw1p1 ifindex=2   mark=2
+ *   sw1p2 ifindex=3   mark=2
+ *   sw2p1 ifindex=4   mark=5
+ *   sw2p2 ifindex=5   mark=5
+ *
+ * If sw2p2 leaves the bridge, we'll have:
+ *
+ * br0 ifindex=9
+ *   sw1p1 ifindex=2   mark=2
+ *   sw1p2 ifindex=3   mark=2
+ *   sw2p1 ifindex=4   mark=4
+ * sw2p2   ifindex=5  

[PATCH net-next 5/5] switchdev: update documentation for offload_fwd_mark

2015-07-08 Thread sfeldma
From: Scott Feldman 

Signed-off-by: Scott Feldman 
---
 Documentation/networking/switchdev.txt |   14 --
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/Documentation/networking/switchdev.txt 
b/Documentation/networking/switchdev.txt
index c5d7ade..b864e47 100644
--- a/Documentation/networking/switchdev.txt
+++ b/Documentation/networking/switchdev.txt
@@ -279,8 +279,18 @@ and unknown unicast packets to all ports in domain, if 
allowed by port's
 current STP state.  The switch driver, knowing which ports are within which
 vlan L2 domain, can program the switch device for flooding.  The packet should
 also be sent to the port netdev for processing by the bridge driver.  The
-bridge should not reflood the packet to the same ports the device flooded.
-XXX: the mechanism to avoid duplicate flood packets is being discuseed.
+bridge should not reflood the packet to the same ports the device flooded,
+otherwise there will be duplicate packets on the wire.
+
+To avoid duplicate packets, the device/driver can mark a packet as already
+forwarded using skb->offload_fwd_mark.  The same mark is set on the device
+ports in the domain using dev->offload_fwd_mark.  If the skb->offload_fwd_mark
+is non-zero and matches the forwarding egress port's dev->skb_mark, the kernel
+will drop the skb right before transmit on the egress port, with the
+understanding that the device already forwarded the packet on same egress port.
+The driver can use switchdev_port_fwd_mark_set() to set a globally unique mark
+for port's dev->offload_fwd_mark, based on the port's parent ID (switch ID) and
+a group ifindex.
 
 It is possible for the switch device to not handle flooding and push the
 packets up to the bridge driver for flooding.  This is not ideal as the number
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next 0/5] switchdev: avoid duplicate packet forwarding

2015-07-08 Thread sfeldma
From: Scott Feldman 

This patchset was previously submitted as RFC.  No changes from the last
version (v2) sent under RFC.  Including RFC version history here for reference.

RFC v2:

 - s/fwd_mark/offload_fwd_mark
 - use consume_skb rather than kfree_skb when dropping pkt on egress.
 - Use Jiri's suggestion to use ifindex of one of the ports in a group
   as the mark for all the ports in the group.  This can be done with
   no additional storage (no hashtable from v1).  To pull it off, we
   need some simple recursive routines to walk the netdev tree ensuring
   all leaves in the tree (ports) in the same group (e.g. bridge)
   belonging to the same switch device will have the same offload fwd mark.
   Maybe someone sees a better design for the recusive routines?  They're
   not too bad, and should cover the stacked driver cases.

RFC v1:

With switchdev support for offloading L2/L3 forwarding data path to a
switch device, we have a general problem where both the device and the
kernel may forward the packet, resulting in duplicate packets on the wire.
Anytime a packet is forwarded by the device and a copy is sent to the CPU,
there is potential for duplicate forwarding, as the kernel may also do a
forwarding lookup and send the packet on the wire.

The specific problem this patch series is interested in solving is avoiding
duplicate packets on bridged ports.  There was a previous RFC from Roopa
(http://marc.info/?l=linux-netdev&m=142687073314252&w=2) to address this
problem, but didn't solve the problem of mixed ports in the bridge from
different devices; there was no way to exclude some ports from forwarding
and include others.  This RFC solves that problem by tagging the ingressing
packet with a unique mark, and then comparing the packet mark with the
egress port mark, and skip forwarding when there is a match.  For the mixed
ports bridge case, only those ports with matching marks are skipped.

The switchdev port driver must do two things:

1) Generate a fwd_mark for each switch port, using some unique key of the
   switch device (and optionally port).  This is done when the port netdev
   is registered or if the port's group membership changes (joins/leaves
   a bridge, for example).

2) On packet ingress from port, mark the skb with the ingress port's
   fwd_mark.  If the device supports it, it's useful to only mark skbs
   which were already forwarded by the device.  If the device does not
   support such indication, all skbs can be marked, even if they're
   local dst.

Two new 32-bit fields are added to struct sk_buff and struct netdevice to
hold the fwd_mark.  I've wrapped these with CONFIG_NET_SWITCHDEV for now. I
tried using skb->mark for this purpose, but ebtables can overwrite the
skb->mark before the bridge gets it, so that will not work.

In general, this fwd_mark can be used for any case where a packet is
forwarded by the device and a copy is sent to the CPU, to avoid the kernel
re-forwarding the packet.  sFlow is another use-case that comes to mind,
but I haven't explored the details.

Scott Feldman (5):
  net: don't reforward packets already forwarded by offload device
  net: add phys ID compare helper to test if two IDs are the same
  switchdev: add offload_fwd_mark generator helper
  rocker: add offload_fwd_mark support
  switchdev: update documentation for offload_fwd_mark

 Documentation/networking/switchdev.txt |   14 +++-
 drivers/net/ethernet/rocker/rocker.c   |   14 +++-
 drivers/net/ethernet/rocker/rocker.h   |1 +
 include/linux/netdevice.h  |   13 
 include/linux/skbuff.h |4 ++
 include/net/switchdev.h|9 +++
 net/core/dev.c |   10 +++
 net/switchdev/switchdev.c  |  111 ++--
 8 files changed, 167 insertions(+), 9 deletions(-)

-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next 2/5] net: add phys ID compare helper to test if two IDs are the same

2015-07-08 Thread sfeldma
From: Scott Feldman 

Signed-off-by: Scott Feldman 
---
 include/linux/netdevice.h |7 +++
 net/switchdev/switchdev.c |8 ++--
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7be616e1..89db412 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -766,6 +766,13 @@ struct netdev_phys_item_id {
unsigned char id_len;
 };
 
+static inline bool netdev_phys_item_id_same(struct netdev_phys_item_id *a,
+   struct netdev_phys_item_id *b)
+{
+   return a->id_len == b->id_len &&
+  memcmp(a->id, b->id, a->id_len) == 0;
+}
+
 typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
   struct sk_buff *skb);
 
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 84f77a0..e16586f 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -906,13 +906,9 @@ static struct net_device *switchdev_get_dev_by_nhs(struct 
fib_info *fi)
if (switchdev_port_attr_get(dev, &attr))
return NULL;
 
-   if (nhsel > 0) {
-   if (prev_attr.u.ppid.id_len != attr.u.ppid.id_len)
+   if (nhsel > 0 &&
+   !netdev_phys_item_id_same(&prev_attr.u.ppid, &attr.u.ppid))
return NULL;
-   if (memcmp(prev_attr.u.ppid.id, attr.u.ppid.id,
-  attr.u.ppid.id_len))
-   return NULL;
-   }
 
prev_attr = attr;
}
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next 1/5] net: don't reforward packets already forwarded by offload device

2015-07-08 Thread sfeldma
From: Scott Feldman 

Just before queuing skb for xmit on port, check if skb has been marked by
switchdev port driver as already fordwarded by device.  If so, drop skb.  A
non-zero skb->offload_fwd_mark field is set by the switchdev port
driver/device on ingress to indicate the skb has already been forwarded by
the device to egress ports with matching dev->skb_mark.  The switchdev port
driver would assign a non-zero dev->skb_mark for each device port netdev
during registration, for example.

Signed-off-by: Scott Feldman 
---
 include/linux/netdevice.h |6 ++
 include/linux/skbuff.h|4 
 net/core/dev.c|   10 ++
 3 files changed, 20 insertions(+)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e20979d..7be616e1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1448,6 +1448,8 @@ enum netdev_priv_flags {
  *
  * @xps_maps:  XXX: need comments on this one
  *
+ * @offload_fwd_mark:  Offload device fwding mark
+ *
  * @trans_start:   Time (in jiffies) of last Tx
  * @watchdog_timeo:Represents the timeout that is used by
  * the watchdog ( see dev_watchdog() )
@@ -1685,6 +1687,10 @@ struct net_device {
struct xps_dev_maps __rcu *xps_maps;
 #endif
 
+#ifdef CONFIG_NET_SWITCHDEV
+   u32 offload_fwd_mark;
+#endif
+
/* These may be needed for future network-power-down code. */
 
/*
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index d6cdd6e..1533c4f 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -506,6 +506,7 @@ static inline u32 skb_mstamp_us_delta(const struct 
skb_mstamp *t1,
  * @no_fcs:  Request NIC to treat last 4 bytes as Ethernet FCS
   *@napi_id: id of the NAPI struct this skb came from
  * @secmark: security marking
+ * @offload_fwd_mark: fwding offload mark
  * @mark: Generic packet mark
  * @vlan_proto: vlan encapsulation protocol
  * @vlan_tci: vlan tag control information
@@ -653,6 +654,9 @@ struct sk_buff {
 #ifdef CONFIG_NETWORK_SECMARK
__u32   secmark;
 #endif
+#ifdef CONFIG_NET_SWITCHDEV
+   __u32   offload_fwd_mark;
+#endif
union {
__u32   mark;
__u32   reserved_tailroom;
diff --git a/net/core/dev.c b/net/core/dev.c
index e0d2701..71919cc 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3065,6 +3065,16 @@ static int __dev_queue_xmit(struct sk_buff *skb, void 
*accel_priv)
else
skb_dst_force(skb);
 
+#ifdef CONFIG_NET_SWITCHDEV
+   /* Don't forward if offload device already forwarded */
+   if (skb->offload_fwd_mark &&
+   skb->offload_fwd_mark == dev->offload_fwd_mark) {
+   consume_skb(skb);
+   rc = NET_XMIT_SUCCESS;
+   goto out;
+   }
+#endif
+
txq = netdev_pick_tx(dev, skb, accel_priv);
q = rcu_dereference_bh(txq->qdisc);
 
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next 4/5] rocker: add offload_fwd_mark support

2015-07-08 Thread sfeldma
From: Scott Feldman 

If device flags ingress packet as "fwd offload", mark the
skb->offlaod_fwd_mark using the ingress port's dev->offlaod_fwd_mark.  This
will be the hint to the kernel that this packet has already been forwarded
by device to egress ports matching skb->offlaod_fwd_mark.

For rocker, derive port dev->offlaod_fwd_mark based on device switch ID and
port ifindex.  If port is bridged, use the bridge ifindex rather than the
port ifindex.

Signed-off-by: Scott Feldman 
---
 drivers/net/ethernet/rocker/rocker.c |   14 +-
 drivers/net/ethernet/rocker/rocker.h |1 +
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/rocker/rocker.c 
b/drivers/net/ethernet/rocker/rocker.c
index c005167..a4ced91 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -4787,6 +4787,7 @@ static int rocker_port_rx_proc(const struct rocker 
*rocker,
const struct rocker_tlv *attrs[ROCKER_TLV_RX_MAX + 1];
struct sk_buff *skb = rocker_desc_cookie_ptr_get(desc_info);
size_t rx_len;
+   u16 rx_flags = 0;
 
if (!skb)
return -ENOENT;
@@ -4794,6 +4795,8 @@ static int rocker_port_rx_proc(const struct rocker 
*rocker,
rocker_tlv_parse_desc(attrs, ROCKER_TLV_RX_MAX, desc_info);
if (!attrs[ROCKER_TLV_RX_FRAG_LEN])
return -EINVAL;
+   if (attrs[ROCKER_TLV_RX_FLAGS])
+   rx_flags = rocker_tlv_get_u16(attrs[ROCKER_TLV_RX_FLAGS]);
 
rocker_dma_rx_ring_skb_unmap(rocker, attrs);
 
@@ -4801,6 +4804,9 @@ static int rocker_port_rx_proc(const struct rocker 
*rocker,
skb_put(skb, rx_len);
skb->protocol = eth_type_trans(skb, rocker_port->dev);
 
+   if (rx_flags & ROCKER_RX_FLAGS_FWD_OFFLOAD)
+   skb->offload_fwd_mark = rocker_port->dev->offload_fwd_mark;
+
rocker_port->dev->stats.rx_packets++;
rocker_port->dev->stats.rx_bytes += skb->len;
 
@@ -4938,11 +4944,13 @@ static int rocker_probe_port(struct rocker *rocker, 
unsigned int port_number)
}
rocker->ports[port_number] = rocker_port;
 
+   switchdev_port_fwd_mark_set(rocker_port->dev, NULL, false);
+
rocker_port_set_learning(rocker_port, SWITCHDEV_TRANS_NONE);
 
err = rocker_port_ig_tbl(rocker_port, SWITCHDEV_TRANS_NONE, 0);
if (err) {
-   dev_err(&pdev->dev, "install ig port table failed\n");
+   netdev_err(rocker_port->dev, "install ig port table failed\n");
goto err_port_ig_tbl;
}
 
@@ -4962,6 +4970,7 @@ err_untagged_vlan:
rocker_port_ig_tbl(rocker_port, SWITCHDEV_TRANS_NONE,
   ROCKER_OP_FLAG_REMOVE);
 err_port_ig_tbl:
+   rocker->ports[port_number] = NULL;
unregister_netdev(dev);
 err_register_netdev:
free_netdev(dev);
@@ -5217,6 +5226,7 @@ static int rocker_port_bridge_join(struct rocker_port 
*rocker_port,
rocker_port_internal_vlan_id_get(rocker_port, bridge->ifindex);
 
rocker_port->bridge_dev = bridge;
+   switchdev_port_fwd_mark_set(rocker_port->dev, bridge, true);
 
return rocker_port_vlan_add(rocker_port, SWITCHDEV_TRANS_NONE,
untagged_vid, 0);
@@ -5237,6 +5247,8 @@ static int rocker_port_bridge_leave(struct rocker_port 
*rocker_port)
rocker_port_internal_vlan_id_get(rocker_port,
 rocker_port->dev->ifindex);
 
+   switchdev_port_fwd_mark_set(rocker_port->dev, rocker_port->bridge_dev,
+   false);
rocker_port->bridge_dev = NULL;
 
err = rocker_port_vlan_add(rocker_port, SWITCHDEV_TRANS_NONE,
diff --git a/drivers/net/ethernet/rocker/rocker.h 
b/drivers/net/ethernet/rocker/rocker.h
index 08b2c3d..12490b2 100644
--- a/drivers/net/ethernet/rocker/rocker.h
+++ b/drivers/net/ethernet/rocker/rocker.h
@@ -246,6 +246,7 @@ enum {
 #define ROCKER_RX_FLAGS_TCPBIT(5)
 #define ROCKER_RX_FLAGS_UDPBIT(6)
 #define ROCKER_RX_FLAGS_TCP_UDP_CSUM_GOOD  BIT(7)
+#define ROCKER_RX_FLAGS_FWD_OFFLOADBIT(8)
 
 enum {
ROCKER_TLV_TX_UNSPEC,
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next] rocker: add change MTU support

2015-07-08 Thread sfeldma
From: Scott Feldman 

Implement ndo_change_mtu: on MTU change, reallocate Rx ring bufs and signal
HW of new port MTU value.

Signed-off-by: Scott Feldman 
---
 drivers/net/ethernet/rocker/rocker.c |   61 ++
 drivers/net/ethernet/rocker/rocker.h |1 +
 2 files changed, 62 insertions(+)

diff --git a/drivers/net/ethernet/rocker/rocker.c 
b/drivers/net/ethernet/rocker/rocker.c
index 2d8578cade..c005167 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -1818,6 +1818,30 @@ rocker_cmd_set_port_settings_macaddr_prep(const struct 
rocker_port *rocker_port,
 }
 
 static int
+rocker_cmd_set_port_settings_mtu_prep(const struct rocker_port *rocker_port,
+ struct rocker_desc_info *desc_info,
+ void *priv)
+{
+   int mtu = *(int *)priv;
+   struct rocker_tlv *cmd_info;
+
+   if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_CMD_TYPE,
+  ROCKER_TLV_CMD_TYPE_SET_PORT_SETTINGS))
+   return -EMSGSIZE;
+   cmd_info = rocker_tlv_nest_start(desc_info, ROCKER_TLV_CMD_INFO);
+   if (!cmd_info)
+   return -EMSGSIZE;
+   if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_CMD_PORT_SETTINGS_PPORT,
+  rocker_port->pport))
+   return -EMSGSIZE;
+   if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_CMD_PORT_SETTINGS_MTU,
+  mtu))
+   return -EMSGSIZE;
+   rocker_tlv_nest_end(desc_info, cmd_info);
+   return 0;
+}
+
+static int
 rocker_cmd_set_port_learning_prep(const struct rocker_port *rocker_port,
  struct rocker_desc_info *desc_info,
  void *priv)
@@ -1874,6 +1898,14 @@ static int rocker_cmd_set_port_settings_macaddr(struct 
rocker_port *rocker_port,
   macaddr, NULL, NULL);
 }
 
+static int rocker_cmd_set_port_settings_mtu(struct rocker_port *rocker_port,
+   int mtu)
+{
+   return rocker_cmd_exec(rocker_port, SWITCHDEV_TRANS_NONE, 0,
+  rocker_cmd_set_port_settings_mtu_prep,
+  &mtu, NULL, NULL);
+}
+
 static int rocker_port_set_learning(struct rocker_port *rocker_port,
enum switchdev_trans trans)
 {
@@ -4152,6 +4184,34 @@ static int rocker_port_set_mac_address(struct net_device 
*dev, void *p)
return 0;
 }
 
+static int rocker_port_change_mtu(struct net_device *dev, int new_mtu)
+{
+   struct rocker_port *rocker_port = netdev_priv(dev);
+   int running = netif_running(dev);
+   int err;
+
+#define ROCKER_PORT_MIN_MTU68
+#define ROCKER_PORT_MAX_MTU9000
+
+   if (new_mtu < ROCKER_PORT_MIN_MTU || new_mtu > ROCKER_PORT_MAX_MTU)
+   return -EINVAL;
+
+   if (running)
+   rocker_port_stop(dev);
+
+   netdev_info(dev, "MTU change from %d to %d\n", dev->mtu, new_mtu);
+   dev->mtu = new_mtu;
+
+   err = rocker_cmd_set_port_settings_mtu(rocker_port, new_mtu);
+   if (err)
+   return err;
+
+   if (running)
+   err = rocker_port_open(dev);
+
+   return err;
+}
+
 static int rocker_port_get_phys_port_name(struct net_device *dev,
  char *buf, size_t len)
 {
@@ -4172,6 +4232,7 @@ static const struct net_device_ops rocker_port_netdev_ops 
= {
.ndo_stop   = rocker_port_stop,
.ndo_start_xmit = rocker_port_xmit,
.ndo_set_mac_address= rocker_port_set_mac_address,
+   .ndo_change_mtu = rocker_port_change_mtu,
.ndo_bridge_getlink = switchdev_port_bridge_getlink,
.ndo_bridge_setlink = switchdev_port_bridge_setlink,
.ndo_bridge_dellink = switchdev_port_bridge_dellink,
diff --git a/drivers/net/ethernet/rocker/rocker.h 
b/drivers/net/ethernet/rocker/rocker.h
index c61fbf9..08b2c3d 100644
--- a/drivers/net/ethernet/rocker/rocker.h
+++ b/drivers/net/ethernet/rocker/rocker.h
@@ -159,6 +159,7 @@ enum {
ROCKER_TLV_CMD_PORT_SETTINGS_MODE,  /* u8 */
ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING,  /* u8 */
ROCKER_TLV_CMD_PORT_SETTINGS_PHYS_NAME, /* binary */
+   ROCKER_TLV_CMD_PORT_SETTINGS_MTU,   /* u16 */
 
__ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
ROCKER_TLV_CMD_PORT_SETTINGS_MAX =
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next 1/2] switchdev: rename vlan vid_start to vid_begin

2015-06-22 Thread sfeldma
From: Scott Feldman 

Use vid_begin/end to be consistent with BRIDGE_VLAN_INFO_RANGE_BEGIN/END.

Signed-off-by: Scott Feldman 
---
 drivers/net/ethernet/rocker/rocker.c |4 ++--
 include/net/switchdev.h  |2 +-
 net/bridge/br_vlan.c |4 ++--
 net/switchdev/switchdev.c|   12 ++--
 4 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/rocker/rocker.c 
b/drivers/net/ethernet/rocker/rocker.c
index b72674c..76c3086 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -4295,7 +4295,7 @@ static int rocker_port_vlans_add(struct rocker_port 
*rocker_port,
u16 vid;
int err;
 
-   for (vid = vlan->vid_start; vid <= vlan->vid_end; vid++) {
+   for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) {
err = rocker_port_vlan_add(rocker_port, trans,
   vid, vlan->flags);
if (err)
@@ -4378,7 +4378,7 @@ static int rocker_port_vlans_del(struct rocker_port 
*rocker_port,
u16 vid;
int err;
 
-   for (vid = vlan->vid_start; vid <= vlan->vid_end; vid++) {
+   for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) {
err = rocker_port_vlan_del(rocker_port, vid, vlan->flags);
if (err)
return err;
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index d882902..89da893 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -57,7 +57,7 @@ struct switchdev_obj {
union {
struct switchdev_obj_vlan { /* PORT_VLAN */
u16 flags;
-   u16 vid_start;
+   u16 vid_begin;
u16 vid_end;
} vlan;
struct switchdev_obj_ipv4_fib { /* IPV4_FIB */
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 17fc358..574feea 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -54,7 +54,7 @@ static int __vlan_vid_add(struct net_device *dev, struct 
net_bridge *br,
.id = SWITCHDEV_OBJ_PORT_VLAN,
.u.vlan = {
.flags = flags,
-   .vid_start = vid,
+   .vid_begin = vid,
.vid_end = vid,
},
};
@@ -132,7 +132,7 @@ static void __vlan_vid_del(struct net_device *dev, struct 
net_bridge *br,
struct switchdev_obj vlan_obj = {
.id = SWITCHDEV_OBJ_PORT_VLAN,
.u.vlan = {
-   .vid_start = vid,
+   .vid_begin = vid,
.vid_end = vid,
},
};
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index afce9cc..70764c5 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -511,23 +511,23 @@ static int switchdev_port_br_afspec(struct net_device 
*dev,
vinfo = nla_data(attr);
vlan->flags = vinfo->flags;
if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) {
-   if (vlan->vid_start)
+   if (vlan->vid_begin)
return -EINVAL;
-   vlan->vid_start = vinfo->vid;
+   vlan->vid_begin = vinfo->vid;
} else if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END) {
-   if (!vlan->vid_start)
+   if (!vlan->vid_begin)
return -EINVAL;
vlan->vid_end = vinfo->vid;
-   if (vlan->vid_end <= vlan->vid_start)
+   if (vlan->vid_end <= vlan->vid_begin)
return -EINVAL;
err = f(dev, &obj);
if (err)
return err;
memset(vlan, 0, sizeof(*vlan));
} else {
-   if (vlan->vid_start)
+   if (vlan->vid_begin)
return -EINVAL;
-   vlan->vid_start = vinfo->vid;
+   vlan->vid_begin = vinfo->vid;
vlan->vid_end = vinfo->vid;
err = f(dev, &obj);
if (err)
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in


[PATCH net-next 2/2] switchdev; add VLAN support for port's bridge_getlink

2015-06-22 Thread sfeldma
From: Scott Feldman 

One more missing piece of the puzzle.  Add vlan dump support to switchdev
port's bridge_getlink.  iproute2 "bridge vlan show" cmd already knows how
to show the vlans installed on the bridge and the device , but (until now)
no one implemented the port vlan part of the netlink PF_BRIDGE:RTM_GETLINK
msg.  Before this patch, "bridge vlan show":

$ bridge -c vlan show
portvlan ids
sw1p130-34  << bridge side vlans
 57

sw1p1   << device side vlans (missing)

sw1p257

sw1p2

sw1p3

sw1p4

br0 None

(When the port is bridged, the output repeats the vlan list for the vlans
on the bridge side of the port and the vlans on the device side of the
port.  The listing above show no vlans for the device side even though they
are installed).

After this patch:

$ bridge -c vlan show
portvlan ids
sw1p130-34  << bridge side vlan
 57

sw1p130-34  << device side vlans
 57
 3840 PVID

sw1p257

sw1p257
 3840 PVID

sw1p33842 PVID

sw1p43843 PVID

br0 None

I re-used ndo_dflt_bridge_getlink to add vlan fill call-back func.
switchdev support adds an obj dump for VLAN objects, using the same
call-back scheme as FDB dump.  Support included for both compressed and
un-compressed vlan dumps.

Signed-off-by: Scott Feldman 
---
 drivers/net/ethernet/emulex/benet/be_main.c   |2 +-
 drivers/net/ethernet/intel/i40e/i40e_main.c   |4 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |3 +-
 drivers/net/ethernet/rocker/rocker.c  |   25 +
 include/linux/rtnetlink.h |6 +-
 net/core/rtnetlink.c  |   18 +++-
 net/switchdev/switchdev.c |  123 -
 7 files changed, 172 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/emulex/benet/be_main.c 
b/drivers/net/ethernet/emulex/benet/be_main.c
index c0f3484..6f64242 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -5096,7 +5096,7 @@ static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 
pid, u32 seq,
return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
   hsw_mode == PORT_FWD_TYPE_VEPA ?
   BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
-  0, 0, nlflags);
+  0, 0, nlflags, filter_mask, NULL);
 }
 
 #ifdef CONFIG_BE2NET_VXLAN
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c 
b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 52d7d8b..48a52b3 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -8069,7 +8069,7 @@ static int i40e_ndo_bridge_setlink(struct net_device *dev,
 #ifdef HAVE_BRIDGE_FILTER
 static int i40e_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
   struct net_device *dev,
-  u32 __always_unused filter_mask, int nlflags)
+  u32 filter_mask, int nlflags)
 #else
 static int i40e_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
   struct net_device *dev, int nlflags)
@@ -8095,7 +8095,7 @@ static int i40e_ndo_bridge_getlink(struct sk_buff *skb, 
u32 pid, u32 seq,
return 0;
 
return ndo_dflt_bridge_getlink(skb, pid, seq, dev, veb->bridge_mode,
-  nlflags);
+  nlflags, 0, 0, filter_mask, NULL);
 }
 #endif /* HAVE_BRIDGE_ATTRIBS */
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c 
b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 3bf2f3c..9aa6104 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -8095,7 +8095,8 @@ static int ixgbe_ndo_bridge_getlink(struct sk_buff *skb, 
u32 pid, u32 seq,
return 0;
 
return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
-  adapter->bridge_mode, 0, 0, nlflags);
+  adapter->bridge_mode, 0, 0, nlflags,
+  filter_mask, NULL);
 }
 
 static void *ixgbe_fwd_add(struct net_device *pdev, struct net_device *vdev)
diff --git a/drivers/net/ethernet/rocker/rocker.c 
b/drivers/net/ethernet/rocker/rocker.c
index 76c3086..e6ae0a1 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -4456,6 +4456,28 @@ static int rocker_port_fdb_dump(const struct rocker_port 
*rocker_port,
return err;
 }
 
+static int rocker_port_vlan_dump(const struct rocker_

[PATCH net-next 0/2] switchdev; add VLAN support for port's bridge_getlink

2015-06-22 Thread sfeldma
From: Scott Feldman 

One more missing piece of the puzzle.  Add vlan dump support to switchdev
port's bridge_getlink.  iproute2 "bridge vlan show" cmd already knows how
to show the vlans installed on the bridge and the device , but (until now)
no one implemented the port vlan part of the netlink PF_BRIDGE:RTM_GETLINK
msg.  Before this patch, "bridge vlan show":

$ bridge -c vlan show
portvlan ids
sw1p130-34  << bridge side vlans
 57

sw1p1   << device side vlans (missing)

sw1p257

sw1p2

sw1p3

sw1p4

br0 None

(When the port is bridged, the output repeats the vlan list for the vlans
on the bridge side of the port and the vlans on the device side of the
port.  The listing above show no vlans for the device side even though they
are installed).

After this patch:

$ bridge -c vlan show
portvlan ids
sw1p130-34  << bridge side vlan
 57

sw1p130-34  << device side vlans
 57
 3840 PVID

sw1p257

sw1p257
 3840 PVID

sw1p33842 PVID

sw1p43843 PVID

br0 None


Scott Feldman (2):
  switchdev: rename vlan vid_start to vid_begin
  switchdev; add VLAN support for port's bridge_getlink

 drivers/net/ethernet/emulex/benet/be_main.c   |2 +-
 drivers/net/ethernet/intel/i40e/i40e_main.c   |4 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |3 +-
 drivers/net/ethernet/rocker/rocker.c  |   29 +-
 include/linux/rtnetlink.h |6 +-
 include/net/switchdev.h   |2 +-
 net/bridge/br_vlan.c  |4 +-
 net/core/rtnetlink.c  |   18 +++-
 net/switchdev/switchdev.c |  135 +++--
 9 files changed, 183 insertions(+), 20 deletions(-)

-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in


[PATCH net-next] switchdev: fdb filter_dev is always NULL for self (device), so remove check

2015-06-17 Thread sfeldma
From: Scott Feldman 

Remove the filter_dev check when dumping fdb entries, otherwise dump
returns empty list.  filter_dev is always passed as NULL when dumping fdbs
on SELF.  We want the fdbs installed on the device to be listed in the
dump.

Signed-off-by: Scott Feldman 
Fixes: 45d4122c ("switchdev: add support for fdb add/del/dump via 
switchdev_port_obj ops")
---
 net/switchdev/switchdev.c |6 --
 1 file changed, 6 deletions(-)

diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index a5d0f8e..7dda437 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -657,7 +657,6 @@ struct switchdev_fdb_dump {
struct switchdev_obj obj;
struct sk_buff *skb;
struct netlink_callback *cb;
-   struct net_device *filter_dev;
int idx;
 };
 
@@ -670,14 +669,10 @@ static int switchdev_port_fdb_dump_cb(struct net_device 
*dev,
u32 seq = dump->cb->nlh->nlmsg_seq;
struct nlmsghdr *nlh;
struct ndmsg *ndm;
-   struct net_device *master = netdev_master_upper_dev_get(dev);
 
if (dump->idx < dump->cb->args[0])
goto skip;
 
-   if (master && dump->filter_dev != master)
-   goto skip;
-
nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH,
sizeof(*ndm), NLM_F_MULTI);
if (!nlh)
@@ -731,7 +726,6 @@ int switchdev_port_fdb_dump(struct sk_buff *skb, struct 
netlink_callback *cb,
},
.skb = skb,
.cb = cb,
-   .filter_dev = filter_dev,
.idx = idx,
};
int err;
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC PATCH net-next v2 1/5] net: don't reforward packets already forwarded by offload device

2015-06-17 Thread sfeldma
From: Scott Feldman 

Just before queuing skb for xmit on port, check if skb has been marked by
switchdev port driver as already fordwarded by device.  If so, drop skb.  A
non-zero skb->offload_fwd_mark field is set by the switchdev port
driver/device on ingress to indicate the skb has already been forwarded by
the device to egress ports with matching dev->skb_mark.  The switchdev port
driver would assign a non-zero dev->skb_mark for each device port netdev
during registration, for example.

Signed-off-by: Scott Feldman 
---
 include/linux/netdevice.h |6 ++
 include/linux/skbuff.h|4 
 net/core/dev.c|   10 ++
 3 files changed, 20 insertions(+)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e20979d..7be616e1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1448,6 +1448,8 @@ enum netdev_priv_flags {
  *
  * @xps_maps:  XXX: need comments on this one
  *
+ * @offload_fwd_mark:  Offload device fwding mark
+ *
  * @trans_start:   Time (in jiffies) of last Tx
  * @watchdog_timeo:Represents the timeout that is used by
  * the watchdog ( see dev_watchdog() )
@@ -1685,6 +1687,10 @@ struct net_device {
struct xps_dev_maps __rcu *xps_maps;
 #endif
 
+#ifdef CONFIG_NET_SWITCHDEV
+   u32 offload_fwd_mark;
+#endif
+
/* These may be needed for future network-power-down code. */
 
/*
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index d6cdd6e..1533c4f 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -506,6 +506,7 @@ static inline u32 skb_mstamp_us_delta(const struct 
skb_mstamp *t1,
  * @no_fcs:  Request NIC to treat last 4 bytes as Ethernet FCS
   *@napi_id: id of the NAPI struct this skb came from
  * @secmark: security marking
+ * @offload_fwd_mark: fwding offload mark
  * @mark: Generic packet mark
  * @vlan_proto: vlan encapsulation protocol
  * @vlan_tci: vlan tag control information
@@ -653,6 +654,9 @@ struct sk_buff {
 #ifdef CONFIG_NETWORK_SECMARK
__u32   secmark;
 #endif
+#ifdef CONFIG_NET_SWITCHDEV
+   __u32   offload_fwd_mark;
+#endif
union {
__u32   mark;
__u32   reserved_tailroom;
diff --git a/net/core/dev.c b/net/core/dev.c
index 6778a99..9eb517e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3065,6 +3065,16 @@ static int __dev_queue_xmit(struct sk_buff *skb, void 
*accel_priv)
else
skb_dst_force(skb);
 
+#ifdef CONFIG_NET_SWITCHDEV
+   /* Don't forward if offload device already forwarded */
+   if (skb->offload_fwd_mark &&
+   skb->offload_fwd_mark == dev->offload_fwd_mark) {
+   consume_skb(skb);
+   rc = NET_XMIT_SUCCESS;
+   goto out;
+   }
+#endif
+
txq = netdev_pick_tx(dev, skb, accel_priv);
q = rcu_dereference_bh(txq->qdisc);
 
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC PATCH net-next v2 3/5] switchdev: add offload_fwd_mark generator helper

2015-06-17 Thread sfeldma
From: Scott Feldman 

skb->offload_fwd_mark and dev->offload_fwd_mark are 32-bit and should be
unique for device and may even be unique for a sub-set of ports within
device, so add switchdev helper function to generate unique marks based on
port's switch ID and group_ifindex.  group_ifindex would typically be the
container dev's ifindex, such as the bridge's ifindex.

The generator uses a global hash table to store offload_fwd_marks hashed by
{switch ID, group_ifindex} key.

Signed-off-by: Scott Feldman 
---
 include/net/switchdev.h   |9 
 net/switchdev/switchdev.c |  103 +
 2 files changed, 112 insertions(+)

diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 437f8fe..d882902 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -157,6 +157,9 @@ int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr 
*tb[],
 int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
struct net_device *dev,
struct net_device *filter_dev, int idx);
+void switchdev_port_fwd_mark_set(struct net_device *dev,
+struct net_device *group_dev,
+bool joining);
 
 #else
 
@@ -271,6 +274,12 @@ static inline int switchdev_port_fdb_dump(struct sk_buff 
*skb,
return -EOPNOTSUPP;
 }
 
+static inline void switchdev_port_fwd_mark_set(struct net_device *dev,
+  struct net_device *group_dev,
+  bool joining)
+{
+}
+
 #endif
 
 #endif /* _LINUX_SWITCHDEV_H_ */
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 00c67a5..6cb30bf 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -920,3 +920,106 @@ void switchdev_fib_ipv4_abort(struct fib_info *fi)
fi->fib_net->ipv4.fib_offload_disabled = true;
 }
 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_abort);
+
+static bool switchdev_port_same_parent_id(struct net_device *a,
+ struct net_device *b)
+{
+   struct switchdev_attr a_attr = {
+   .id = SWITCHDEV_ATTR_PORT_PARENT_ID,
+   .flags = SWITCHDEV_F_NO_RECURSE,
+   };
+   struct switchdev_attr b_attr = {
+   .id = SWITCHDEV_ATTR_PORT_PARENT_ID,
+   .flags = SWITCHDEV_F_NO_RECURSE,
+   };
+
+   if (switchdev_port_attr_get(a, &a_attr) ||
+   switchdev_port_attr_get(b, &b_attr))
+   return false;
+
+   return netdev_phys_item_id_same(&a_attr.u.ppid, &b_attr.u.ppid);
+}
+
+static u32 switchdev_port_fwd_mark_get(struct net_device *dev,
+  struct net_device *group_dev)
+{
+   struct net_device *lower_dev;
+   struct list_head *iter;
+
+   netdev_for_each_lower_dev(group_dev, lower_dev, iter) {
+   if (lower_dev == dev)
+   continue;
+   if (switchdev_port_same_parent_id(dev, lower_dev))
+   return lower_dev->offload_fwd_mark;
+   return switchdev_port_fwd_mark_get(dev, lower_dev);
+   }
+
+   return dev->ifindex;
+}
+
+static void switchdev_port_fwd_mark_reset(struct net_device *group_dev,
+ u32 old_mark, u32 *reset_mark)
+{
+   struct net_device *lower_dev;
+   struct list_head *iter;
+
+   netdev_for_each_lower_dev(group_dev, lower_dev, iter) {
+   if (lower_dev->offload_fwd_mark == old_mark) {
+   if (!*reset_mark)
+   *reset_mark = lower_dev->ifindex;
+   lower_dev->offload_fwd_mark = *reset_mark;
+   }
+   switchdev_port_fwd_mark_reset(lower_dev, old_mark, reset_mark);
+   }
+}
+
+/**
+ * switchdev_port_fwd_mark_set - Set port offload forwarding mark
+ *
+ * @dev: port device
+ * @group_dev: containing device
+ * @joining: true if dev is joining group; false if leaving group
+ *
+ * An ungrouped port's offload mark is just its ifindex.  A grouped
+ * port's (member of a bridge, for example) offload mark is the ifindex
+ * of one of the ports in the group with the same parent (switch) ID.
+ * Ports on the same device in the same group will have the same mark.
+ *
+ * Example:
+ *
+ * br0 ifindex=9
+ *   sw1p1 ifindex=2   mark=2
+ *   sw1p2 ifindex=3   mark=2
+ *   sw2p1 ifindex=4   mark=5
+ *   sw2p2 ifindex=5   mark=5
+ *
+ * If sw2p2 leaves the bridge, we'll have:
+ *
+ * br0 ifindex=9
+ *   sw1p1 ifindex=2   mark=2
+ *   sw1p2 ifindex=3   mark=2
+ *   sw2p1 ifindex=4   mark=4
+ * sw2p2   ifindex=5

[RFC PATCH net-next v2 2/5] net: add phys ID compare helper to test if two IDs are the same

2015-06-17 Thread sfeldma
From: Scott Feldman 

Signed-off-by: Scott Feldman 
---
 include/linux/netdevice.h |7 +++
 net/switchdev/switchdev.c |8 ++--
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7be616e1..63090ce 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -766,6 +766,13 @@ struct netdev_phys_item_id {
unsigned char id_len;
 };
 
+static inline bool netdev_phys_item_id_same(struct netdev_phys_item_id *a,
+   struct netdev_phys_item_id *b)
+{
+   return ((a->id_len == b->id_len) &&
+   (memcmp(a->id, b->id, a->id_len) == 0));
+}
+
 typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
   struct sk_buff *skb);
 
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index a5d0f8e..00c67a5 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -791,13 +791,9 @@ static struct net_device *switchdev_get_dev_by_nhs(struct 
fib_info *fi)
if (switchdev_port_attr_get(dev, &attr))
return NULL;
 
-   if (nhsel > 0) {
-   if (prev_attr.u.ppid.id_len != attr.u.ppid.id_len)
+   if (nhsel > 0 &&
+   !netdev_phys_item_id_same(&prev_attr.u.ppid, &attr.u.ppid))
return NULL;
-   if (memcmp(prev_attr.u.ppid.id, attr.u.ppid.id,
-  attr.u.ppid.id_len))
-   return NULL;
-   }
 
prev_attr = attr;
}
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC PATCH net-next v2 5/5] switchdev: update documentation for offload_fwd_mark

2015-06-17 Thread sfeldma
From: Scott Feldman 

Signed-off-by: Scott Feldman 
---
 Documentation/networking/switchdev.txt |   14 --
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/Documentation/networking/switchdev.txt 
b/Documentation/networking/switchdev.txt
index c5d7ade..b864e47 100644
--- a/Documentation/networking/switchdev.txt
+++ b/Documentation/networking/switchdev.txt
@@ -279,8 +279,18 @@ and unknown unicast packets to all ports in domain, if 
allowed by port's
 current STP state.  The switch driver, knowing which ports are within which
 vlan L2 domain, can program the switch device for flooding.  The packet should
 also be sent to the port netdev for processing by the bridge driver.  The
-bridge should not reflood the packet to the same ports the device flooded.
-XXX: the mechanism to avoid duplicate flood packets is being discuseed.
+bridge should not reflood the packet to the same ports the device flooded,
+otherwise there will be duplicate packets on the wire.
+
+To avoid duplicate packets, the device/driver can mark a packet as already
+forwarded using skb->offload_fwd_mark.  The same mark is set on the device
+ports in the domain using dev->offload_fwd_mark.  If the skb->offload_fwd_mark
+is non-zero and matches the forwarding egress port's dev->skb_mark, the kernel
+will drop the skb right before transmit on the egress port, with the
+understanding that the device already forwarded the packet on same egress port.
+The driver can use switchdev_port_fwd_mark_set() to set a globally unique mark
+for port's dev->offload_fwd_mark, based on the port's parent ID (switch ID) and
+a group ifindex.
 
 It is possible for the switch device to not handle flooding and push the
 packets up to the bridge driver for flooding.  This is not ideal as the number
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC PATCH net-next v2 4/5] rocker: add offload_fwd_mark support

2015-06-17 Thread sfeldma
From: Scott Feldman 

If device flags ingress packet as "fwd offload", mark the
skb->offlaod_fwd_mark using the ingress port's dev->offlaod_fwd_mark.  This
will be the hint to the kernel that this packet has already been forwarded
by device to egress ports matching skb->offlaod_fwd_mark.

For rocker, derive port dev->offlaod_fwd_mark based on device switch ID and
port ifindex.  If port is bridged, use the bridge ifindex rather than the
port ifindex.

Signed-off-by: Scott Feldman 
---
 drivers/net/ethernet/rocker/rocker.c |   14 +-
 drivers/net/ethernet/rocker/rocker.h |1 +
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/rocker/rocker.c 
b/drivers/net/ethernet/rocker/rocker.c
index d4ec660..b72674c 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -4701,6 +4701,7 @@ static int rocker_port_rx_proc(const struct rocker 
*rocker,
const struct rocker_tlv *attrs[ROCKER_TLV_RX_MAX + 1];
struct sk_buff *skb = rocker_desc_cookie_ptr_get(desc_info);
size_t rx_len;
+   u16 rx_flags = 0;
 
if (!skb)
return -ENOENT;
@@ -4708,6 +4709,8 @@ static int rocker_port_rx_proc(const struct rocker 
*rocker,
rocker_tlv_parse_desc(attrs, ROCKER_TLV_RX_MAX, desc_info);
if (!attrs[ROCKER_TLV_RX_FRAG_LEN])
return -EINVAL;
+   if (attrs[ROCKER_TLV_RX_FLAGS])
+   rx_flags = rocker_tlv_get_u16(attrs[ROCKER_TLV_RX_FLAGS]);
 
rocker_dma_rx_ring_skb_unmap(rocker, attrs);
 
@@ -4715,6 +4718,9 @@ static int rocker_port_rx_proc(const struct rocker 
*rocker,
skb_put(skb, rx_len);
skb->protocol = eth_type_trans(skb, rocker_port->dev);
 
+   if (rx_flags & ROCKER_RX_FLAGS_FWD_OFFLOAD)
+   skb->offload_fwd_mark = rocker_port->dev->offload_fwd_mark;
+
rocker_port->dev->stats.rx_packets++;
rocker_port->dev->stats.rx_bytes += skb->len;
 
@@ -4852,11 +4858,13 @@ static int rocker_probe_port(struct rocker *rocker, 
unsigned int port_number)
}
rocker->ports[port_number] = rocker_port;
 
+   switchdev_port_fwd_mark_set(rocker_port->dev, NULL, false);
+
rocker_port_set_learning(rocker_port, SWITCHDEV_TRANS_NONE);
 
err = rocker_port_ig_tbl(rocker_port, SWITCHDEV_TRANS_NONE, 0);
if (err) {
-   dev_err(&pdev->dev, "install ig port table failed\n");
+   netdev_err(rocker_port->dev, "install ig port table failed\n");
goto err_port_ig_tbl;
}
 
@@ -4876,6 +4884,7 @@ err_untagged_vlan:
rocker_port_ig_tbl(rocker_port, SWITCHDEV_TRANS_NONE,
   ROCKER_OP_FLAG_REMOVE);
 err_port_ig_tbl:
+   rocker->ports[port_number] = NULL;
unregister_netdev(dev);
 err_register_netdev:
free_netdev(dev);
@@ -5131,6 +5140,7 @@ static int rocker_port_bridge_join(struct rocker_port 
*rocker_port,
rocker_port_internal_vlan_id_get(rocker_port, bridge->ifindex);
 
rocker_port->bridge_dev = bridge;
+   switchdev_port_fwd_mark_set(rocker_port->dev, bridge, true);
 
return rocker_port_vlan_add(rocker_port, SWITCHDEV_TRANS_NONE,
untagged_vid, 0);
@@ -5151,6 +5161,8 @@ static int rocker_port_bridge_leave(struct rocker_port 
*rocker_port)
rocker_port_internal_vlan_id_get(rocker_port,
 rocker_port->dev->ifindex);
 
+   switchdev_port_fwd_mark_set(rocker_port->dev, rocker_port->bridge_dev,
+   false);
rocker_port->bridge_dev = NULL;
 
err = rocker_port_vlan_add(rocker_port, SWITCHDEV_TRANS_NONE,
diff --git a/drivers/net/ethernet/rocker/rocker.h 
b/drivers/net/ethernet/rocker/rocker.h
index c61fbf9..f846c0d 100644
--- a/drivers/net/ethernet/rocker/rocker.h
+++ b/drivers/net/ethernet/rocker/rocker.h
@@ -245,6 +245,7 @@ enum {
 #define ROCKER_RX_FLAGS_TCPBIT(5)
 #define ROCKER_RX_FLAGS_UDPBIT(6)
 #define ROCKER_RX_FLAGS_TCP_UDP_CSUM_GOOD  BIT(7)
+#define ROCKER_RX_FLAGS_FWD_OFFLOADBIT(8)
 
 enum {
ROCKER_TLV_TX_UNSPEC,
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC PATCH net-next v2 0/5] switchdev: avoid duplicate packet forwarding

2015-06-17 Thread sfeldma
From: Scott Feldman 

(RFC because we're at rc7+ now)

v2:

 - s/fwd_mark/offload_fwd_mark
 - use consume_skb rather than kfree_skb when dropping pkt on egress.
 - Use Jiri's suggestion to use ifindex of one of the ports in a group
   as the mark for all the ports in the group.  This can be done with
   no additional storage (no hashtable from v1).  To pull it off, we
   need some simple recursive routines to walk the netdev tree ensuring
   all leaves in the tree (ports) in the same group (e.g. bridge)
   belonging to the same switch device will have the same offload fwd mark.
   Maybe someone sees a better design for the recusive routines?  They're
   not too bad, and should cover the stacked driver cases.

v1:

With switchdev support for offloading L2/L3 forwarding data path to a
switch device, we have a general problem where both the device and the
kernel may forward the packet, resulting in duplicate packets on the wire.
Anytime a packet is forwarded by the device and a copy is sent to the CPU,
there is potential for duplicate forwarding, as the kernel may also do a
forwarding lookup and send the packet on the wire.

The specific problem this patch series is interested in solving is avoiding
duplicate packets on bridged ports.  There was a previous RFC from Roopa
(http://marc.info/?l=linux-netdev&m=142687073314252&w=2) to address this
problem, but didn't solve the problem of mixed ports in the bridge from
different devices; there was no way to exclude some ports from forwarding
and include others.  This RFC solves that problem by tagging the ingressing
packet with a unique mark, and then comparing the packet mark with the
egress port mark, and skip forwarding when there is a match.  For the mixed
ports bridge case, only those ports with matching marks are skipped.

The switchdev port driver must do two things:

1) Generate a fwd_mark for each switch port, using some unique key of the
   switch device (and optionally port).  This is done when the port netdev
   is registered or if the port's group membership changes (joins/leaves
   a bridge, for example).

2) On packet ingress from port, mark the skb with the ingress port's
   fwd_mark.  If the device supports it, it's useful to only mark skbs
   which were already forwarded by the device.  If the device does not
   support such indication, all skbs can be marked, even if they're
   local dst.

Two new 32-bit fields are added to struct sk_buff and struct netdevice to
hold the fwd_mark.  I've wrapped these with CONFIG_NET_SWITCHDEV for now. I
tried using skb->mark for this purpose, but ebtables can overwrite the
skb->mark before the bridge gets it, so that will not work.

In general, this fwd_mark can be used for any case where a packet is
forwarded by the device and a copy is sent to the CPU, to avoid the kernel
re-forwarding the packet.  sFlow is another use-case that comes to mind,
but I haven't explored the details.



Scott Feldman (5):
  net: don't reforward packets already forwarded by offload device
  net: add phys ID compare helper to test if two IDs are the same
  switchdev: add offload_fwd_mark generator helper
  rocker: add offload_fwd_mark support
  switchdev: update documentation for offload_fwd_mark

 Documentation/networking/switchdev.txt |   14 +++-
 drivers/net/ethernet/rocker/rocker.c   |   14 +++-
 drivers/net/ethernet/rocker/rocker.h   |1 +
 include/linux/netdevice.h  |   13 
 include/linux/skbuff.h |4 ++
 include/net/switchdev.h|9 +++
 net/core/dev.c |   10 +++
 net/switchdev/switchdev.c  |  111 ++--
 8 files changed, 167 insertions(+), 9 deletions(-)

-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next] bridge: del external_learned fdbs from device on flush or ageout

2015-06-14 Thread sfeldma
From: Scott Feldman 

We need to delete from offload the device externally learnded fdbs when any
one of these events happen:

1) Bridge ages out fdb.  (When bridge is doing ageing vs. device doing
ageing.  If device is doing ageing, it would send SWITCHDEV_FDB_DEL
directly).

2) STP state change flushes fdbs on port.

3) User uses sysfs interface to flush fdbs from bridge or bridge port:

echo 1 >/sys/class/net/BR_DEV/bridge/flush
echo 1 >/sys/class/net/BR_PORT/brport/flush

4) Offload driver send event SWITCHDEV_FDB_DEL to delete fdb entry.

For rocker, we can now get called to delete fdb entry in wait and nowait
contexts, so set NOWAIT flag when deleting fdb entry.

Signed-off-by: Scott Feldman 
---
 Documentation/networking/switchdev.txt |   11 ++-
 drivers/net/ethernet/rocker/rocker.c   |2 +-
 net/bridge/br_fdb.c|   17 +
 3 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/Documentation/networking/switchdev.txt 
b/Documentation/networking/switchdev.txt
index 4a94ebc..e460007 100644
--- a/Documentation/networking/switchdev.txt
+++ b/Documentation/networking/switchdev.txt
@@ -251,15 +251,8 @@ out stale FDB entries.  To keep an FDB entry "alive", the 
driver should refresh
 the FDB entry by calling call_switchdev_notifiers(SWITCHDEV_FDB_ADD, ...).  The
 notification will reset the FDB entry's last-used time to now.  The driver
 should rate limit refresh notifications, for example, no more than once a
-second.  If the FDB entry expires, ndo_fdb_del is called to remove entry from
-the device.  XXX: this last part isn't currently correct: ndo_fdb_del isn't
-called, so the stale entry remains in device...this need to get fixed.
-
-FDB Flush
-^
-
-XXX: Unimplemented.  Need to support FDB flush by bridge driver for port and
-remove both static and learned FDB entries.
+second.  If the FDB entry expires, fdb_delete is called to remove entry from
+the device.
 
 STP State Change on Port
 
diff --git a/drivers/net/ethernet/rocker/rocker.c 
b/drivers/net/ethernet/rocker/rocker.c
index 3aa6caf..22d68c1 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -4392,7 +4392,7 @@ static int rocker_port_fdb_del(struct rocker_port 
*rocker_port,
   const struct switchdev_obj_fdb *fdb)
 {
__be16 vlan_id = rocker_port_vid_to_vlan(rocker_port, fdb->vid, NULL);
-   int flags = ROCKER_OP_FLAG_REMOVE;
+   int flags = ROCKER_OP_FLAG_NOWAIT | ROCKER_OP_FLAG_REMOVE;
 
if (!rocker_port_is_bridged(rocker_port))
return -EINVAL;
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 13949a7..be84b7e 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -24,6 +24,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "br_private.h"
 
 static struct kmem_cache *br_fdb_cache __read_mostly;
@@ -130,11 +131,27 @@ static void fdb_del_hw_addr(struct net_bridge *br, const 
unsigned char *addr)
}
 }
 
+static void fdb_del_external_learn(struct net_bridge_fdb_entry *f)
+{
+   struct switchdev_obj obj = {
+   .id = SWITCHDEV_OBJ_PORT_FDB,
+   .u.fdb = {
+   .addr = f->addr.addr,
+   .vid = f->vlan_id,
+   },
+   };
+
+   switchdev_port_obj_del(f->dst->dev, &obj);
+}
+
 static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f)
 {
if (f->is_static)
fdb_del_hw_addr(br, f->addr.addr);
 
+   if (f->added_by_external_learn)
+   fdb_del_external_learn(f);
+
hlist_del_rcu(&f->hlist);
fdb_notify(br, f, RTM_DELNEIGH);
call_rcu(&f->rcu, fdb_rcu_free);
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC PATCH net-next 2/4] switchdev: add fwd_mark generator helper

2015-06-13 Thread sfeldma
From: Scott Feldman 

skb->fwd_mark and dev->fwd_mark are 32-bit and should be unique for device
and maybe even unique for a sub-set of ports within device, so add
switchdev helper function to generate unique marks based on driver-supplied
key.  Typically, the driver would use device switch ID for key, and maybe
additional fields in key for grouped ports such as bridge ifindex.  The key
can be of arbitrary length.

The generator uses a global hash table to store fwd_marks hashed by key.

Signed-off-by: Scott Feldman 
---
 include/net/switchdev.h   |6 
 net/switchdev/switchdev.c |   72 +
 2 files changed, 78 insertions(+)

diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 437f8fe..6eaceee 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -157,6 +157,7 @@ int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr 
*tb[],
 int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
struct net_device *dev,
struct net_device *filter_dev, int idx);
+u32 switchdev_mark_get(void *key, size_t key_len);
 
 #else
 
@@ -271,6 +272,11 @@ static inline int switchdev_port_fdb_dump(struct sk_buff 
*skb,
return -EOPNOTSUPP;
 }
 
+static inline u32 switchdev_mark_get(void *key, size_t key_len)
+{
+   return 0;
+}
+
 #endif
 
 #endif /* _LINUX_SWITCHDEV_H_ */
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index a5d0f8e..9ca37b3 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -16,6 +16,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 #include 
 #include 
 
@@ -924,3 +926,73 @@ void switchdev_fib_ipv4_abort(struct fib_info *fi)
fi->fib_net->ipv4.fib_offload_disabled = true;
 }
 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_abort);
+
+#define SWITCHDEV_MARK_HT_BITS 5
+static DEFINE_HASHTABLE(switchdev_mark_ht, SWITCHDEV_MARK_HT_BITS);
+static DEFINE_SPINLOCK(switchdev_mark_lock);
+static u32 switchdev_mark_next = 1;
+
+/**
+ * switchdev_mark_get - Generate a unique mark for key
+ *
+ * @key: key used to generate mark
+ * @key_len: length of key in bytes
+ *
+ * Returns unqiue 32-bit mark for given key, or 0 if error.
+ * A small global hash table stores the marks for each key.
+ * The length of the key and key contents are arbitrary.
+ * The marks can be used, for example, to skb->fwd_mark a pkt
+ * to associate the skb with a key.
+ */
+u32 switchdev_mark_get(void *key, size_t key_len)
+{
+   struct switchdev_mark_ht_entry {
+   struct hlist_node entry;
+   void *key;
+   size_t key_len;
+   u32 key_crc32;
+   u32 mark;
+   } *entry;
+   u32 key_crc32 = crc32(~0, key, key_len);
+   u32 mark = 0;
+   unsigned long flags;
+
+   spin_lock_irqsave(&switchdev_mark_lock, flags);
+   hash_for_each_possible(switchdev_mark_ht, entry,
+  entry, key_crc32) {
+   if (entry->key_len != key_len)
+   continue;
+   if (memcmp(entry->key, key, key_len) == 0) {
+   mark = entry->mark;
+   break;
+   }
+   }
+   spin_unlock_irqrestore(&switchdev_mark_lock, flags);
+
+   if (mark)
+   goto out;
+
+   entry = kmalloc(GFP_KERNEL, sizeof(*entry));
+   if (!entry)
+   goto out;
+
+   entry->key = kmalloc(GFP_KERNEL, key_len);
+   if (!entry->key) {
+   kfree(entry);
+   goto out;
+   }
+
+   memcpy(entry->key, key, key_len);
+   entry->key_len = key_len;
+   entry->key_crc32 = key_crc32;
+
+   spin_lock_irqsave(&switchdev_mark_lock, flags);
+   mark = switchdev_mark_next++;
+   entry->mark = mark;
+   hash_add(switchdev_mark_ht, &entry->entry, key_crc32);
+   spin_unlock_irqrestore(&switchdev_mark_lock, flags);
+
+out:
+   return mark;
+}
+EXPORT_SYMBOL_GPL(switchdev_mark_get);
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC PATCH net-next 3/4] rocker: add fwd_mark support

2015-06-13 Thread sfeldma
From: Scott Feldman 

If device flags ingress packet as "fwd offload", mark the skb->fwd_mark
using the ingress port's dev->fwd_mark.  This will be the hint to the
kernel that this packet has already been forwarded by device to egress
ports matching skb->fwd_mark.

For rocker, derive port dev->fwd_mark based on device switch ID.  If port
is bridged, include the bridge's ifindex in the key for deriving
dev->fwd_mark.

Signed-off-by: Scott Feldman 
---
 drivers/net/ethernet/rocker/rocker.c |   24 
 drivers/net/ethernet/rocker/rocker.h |1 +
 2 files changed, 25 insertions(+)

diff --git a/drivers/net/ethernet/rocker/rocker.c 
b/drivers/net/ethernet/rocker/rocker.c
index a06b93d..81407d8 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -4701,6 +4701,7 @@ static int rocker_port_rx_proc(const struct rocker 
*rocker,
const struct rocker_tlv *attrs[ROCKER_TLV_RX_MAX + 1];
struct sk_buff *skb = rocker_desc_cookie_ptr_get(desc_info);
size_t rx_len;
+   u16 rx_flags = 0;
 
if (!skb)
return -ENOENT;
@@ -4708,6 +4709,8 @@ static int rocker_port_rx_proc(const struct rocker 
*rocker,
rocker_tlv_parse_desc(attrs, ROCKER_TLV_RX_MAX, desc_info);
if (!attrs[ROCKER_TLV_RX_FRAG_LEN])
return -EINVAL;
+   if (attrs[ROCKER_TLV_RX_FLAGS])
+   rx_flags = rocker_tlv_get_u16(attrs[ROCKER_TLV_RX_FLAGS]);
 
rocker_dma_rx_ring_skb_unmap(rocker, attrs);
 
@@ -4715,6 +4718,9 @@ static int rocker_port_rx_proc(const struct rocker 
*rocker,
skb_put(skb, rx_len);
skb->protocol = eth_type_trans(skb, rocker_port->dev);
 
+   if (rx_flags & ROCKER_RX_FLAGS_FWD_OFFLOAD)
+   skb->fwd_mark = rocker_port->dev->fwd_mark;
+
rocker_port->dev->stats.rx_packets++;
rocker_port->dev->stats.rx_bytes += skb->len;
 
@@ -4814,6 +4820,21 @@ static void rocker_port_dev_addr_init(struct rocker_port 
*rocker_port)
}
 }
 
+static void rocker_port_fwd_mark_set(struct rocker_port *rocker_port)
+{
+   struct rocker *rocker = rocker_port->rocker;
+   struct {
+   u64 hw_id;
+   int ifindex;
+   } key = {
+   .hw_id = rocker->hw.id,
+   .ifindex = rocker_port_is_bridged(rocker_port) ?
+  rocker_port->bridge_dev->ifindex : 0,
+   };
+
+   rocker_port->dev->fwd_mark = switchdev_mark_get(&key, sizeof(key));
+}
+
 static int rocker_probe_port(struct rocker *rocker, unsigned int port_number)
 {
const struct pci_dev *pdev = rocker->pdev;
@@ -4832,6 +4853,7 @@ static int rocker_probe_port(struct rocker *rocker, 
unsigned int port_number)
rocker_port->pport = port_number + 1;
rocker_port->brport_flags = BR_LEARNING | BR_LEARNING_SYNC;
INIT_LIST_HEAD(&rocker_port->trans_mem);
+   rocker_port_fwd_mark_set(rocker_port);
 
rocker_port_dev_addr_init(rocker_port);
dev->netdev_ops = &rocker_port_netdev_ops;
@@ -5131,6 +5153,7 @@ static int rocker_port_bridge_join(struct rocker_port 
*rocker_port,
rocker_port_internal_vlan_id_get(rocker_port, bridge->ifindex);
 
rocker_port->bridge_dev = bridge;
+   rocker_port_fwd_mark_set(rocker_port);
 
return rocker_port_vlan_add(rocker_port, SWITCHDEV_TRANS_NONE,
untagged_vid, 0);
@@ -5152,6 +5175,7 @@ static int rocker_port_bridge_leave(struct rocker_port 
*rocker_port)
 rocker_port->dev->ifindex);
 
rocker_port->bridge_dev = NULL;
+   rocker_port_fwd_mark_set(rocker_port);
 
err = rocker_port_vlan_add(rocker_port, SWITCHDEV_TRANS_NONE,
   untagged_vid, 0);
diff --git a/drivers/net/ethernet/rocker/rocker.h 
b/drivers/net/ethernet/rocker/rocker.h
index c61fbf9..f846c0d 100644
--- a/drivers/net/ethernet/rocker/rocker.h
+++ b/drivers/net/ethernet/rocker/rocker.h
@@ -245,6 +245,7 @@ enum {
 #define ROCKER_RX_FLAGS_TCPBIT(5)
 #define ROCKER_RX_FLAGS_UDPBIT(6)
 #define ROCKER_RX_FLAGS_TCP_UDP_CSUM_GOOD  BIT(7)
+#define ROCKER_RX_FLAGS_FWD_OFFLOADBIT(8)
 
 enum {
ROCKER_TLV_TX_UNSPEC,
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC PATCH net-next 4/4] switchdev: update documentation for fwd_mark

2015-06-13 Thread sfeldma
From: Scott Feldman 

Signed-off-by: Scott Feldman 
---
 Documentation/networking/switchdev.txt |   13 +++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/Documentation/networking/switchdev.txt 
b/Documentation/networking/switchdev.txt
index da82cd7..d6a8695 100644
--- a/Documentation/networking/switchdev.txt
+++ b/Documentation/networking/switchdev.txt
@@ -286,8 +286,17 @@ and unknown unicast packets to all ports in domain, if 
allowed by port's
 current STP state.  The switch driver, knowing which ports are within which
 vlan L2 domain, can program the switch device for flooding.  The packet should
 also be sent to the port netdev for processing by the bridge driver.  The
-bridge should not reflood the packet to the same ports the device flooded.
-XXX: the mechanism to avoid duplicate flood packets is being discuseed.
+bridge should not reflood the packet to the same ports the device flooded,
+otherwise there will be duplicate packets on the wire.
+
+To avoid duplicate packets, the device/driver can mark a packet as already
+forwarded using skb->fwd_mark.  The same mark is set on the device ports in the
+domain using dev->fwd_mark.  If the skb->fwd_mark is non-zero and matches the
+forwarding egress port's dev->skb_mark, the kernel will drop the skb right
+before transmit on the egress port, with the understanding that the device
+already forwarded the packet on same egress port.  The driver can use
+switchdev_mark_get() to get a globally unique mark for egress port(s)'
+dev->fwd_mark, based on a driver/device-supplied key.
 
 It is possible for the switch device to not handle flooding and push the
 packets up to the bridge driver for flooding.  This is not ideal as the number
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC PATCH net-next 0/4] switchdev: avoid duplicate packet forwarding

2015-06-13 Thread sfeldma
From: Scott Feldman 

(RFC because we're at rc7+ now)

With switchdev support for offloading L2/L3 forwarding data path to a
switch device, we have a general problem where both the device and the
kernel may forward the packet, resulting in duplicate packets on the wire.
Anytime a packet is forwarded by the device and a copy is sent to the CPU,
there is potential for duplicate forwarding, as the kernel may also do a
forwarding lookup and send the packet on the wire.

The specific problem this patch series is interested in solving is avoiding
duplicate packets on bridged ports.  There was a previous RFC from Roopa
(http://marc.info/?l=linux-netdev&m=142687073314252&w=2) to address this
problem, but didn't solve the problem of mixed ports in the bridge from
different devices; there was no way to exclude some ports from forwarding
and include others.  This RFC solves that problem by tagging the ingressing
packet with a unique mark, and then comparing the packet mark with the
egress port mark, and skip forwarding when there is a match.  For the mixed
ports bridge case, only those ports with matching marks are skipped.

The switchdev port driver must do two things:

1) Generate a fwd_mark for each switch port, using some unique key of the
   switch device (and optionally port).  This is a one-time operation done
   when port's netdev is setup.

2) On packet ingress from port, mark the skb with the ingress port's
   fwd_mark.  If the device supports it, it's useful to only mark skbs
   which were already forwarded by the device.  If the device does not
   support such indication, all skbs can be marked, even if they're
   local dst.

Two new 32-bit fields are added to struct sk_buff and struct netdevice to
hold the fwd_mark.  I've wrapped these with CONFIG_NET_SWITCHDEV for now. I
tried using skb->mark for this purpose, but ebtables can overwrite the
skb->mark before the bridge gets it, so that will not work.

In general, this fwd_mark can be used for any case where a packet is
forwarded by the device and a copy is sent to the CPU, to avoid the kernel
re-forwarding the packet.  sFlow is another use-case that comes to mind,
but I haven't explored the details.

Scott Feldman (4):
  net: don't reforward packets already forwarded by offload device
  switchdev: add fwd_mark generator helper
  rocker: add fwd_mark support
  switchdev: update documentation for fwd_mark

 Documentation/networking/switchdev.txt |   13 +-
 drivers/net/ethernet/rocker/rocker.c   |   24 +++
 drivers/net/ethernet/rocker/rocker.h   |1 +
 include/linux/netdevice.h  |6 +++
 include/linux/skbuff.h |4 ++
 include/net/switchdev.h|6 +++
 net/core/dev.c |9 
 net/switchdev/switchdev.c  |   72 
 8 files changed, 133 insertions(+), 2 deletions(-)

-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC PATCH net-next 1/4] net: don't reforward packets already forwarded by offload device

2015-06-13 Thread sfeldma
From: Scott Feldman 

Just before queuing skb for xmit on port, check if skb has been marked by
switchdev port driver as already fordwarded by device.  If so, drop skb.  A
non-zero skb->fwd_mark field is set by the switchdev port driver/device on
ingress to indicate the skb has already been forwarded by the device to
egress ports with matching dev->skb_mark.  The switchdev port driver would
assign a non-zero dev->skb_mark for each device port netdev during
registration, for example.

Signed-off-by: Scott Feldman 
---
 include/linux/netdevice.h |6 ++
 include/linux/skbuff.h|4 
 net/core/dev.c|9 +
 3 files changed, 19 insertions(+)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 6f5f71f..181b08f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1444,6 +1444,8 @@ enum netdev_priv_flags {
  *
  * @xps_maps:  XXX: need comments on this one
  *
+ * @fwd_mark:  Offload device fwding mark
+ *
  * @trans_start:   Time (in jiffies) of last Tx
  * @watchdog_timeo:Represents the timeout that is used by
  * the watchdog ( see dev_watchdog() )
@@ -1681,6 +1683,10 @@ struct net_device {
struct xps_dev_maps __rcu *xps_maps;
 #endif
 
+#ifdef CONFIG_NET_SWITCHDEV
+   u32 fwd_mark;
+#endif
+
/* These may be needed for future network-power-down code. */
 
/*
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index cc612fc..ba98c05 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -501,6 +501,7 @@ static inline u32 skb_mstamp_us_delta(const struct 
skb_mstamp *t1,
  * @no_fcs:  Request NIC to treat last 4 bytes as Ethernet FCS
   *@napi_id: id of the NAPI struct this skb came from
  * @secmark: security marking
+ * @fwd_mark: fwding offload mark
  * @mark: Generic packet mark
  * @vlan_proto: vlan encapsulation protocol
  * @vlan_tci: vlan tag control information
@@ -648,6 +649,9 @@ struct sk_buff {
 #ifdef CONFIG_NETWORK_SECMARK
__u32   secmark;
 #endif
+#ifdef CONFIG_NET_SWITCHDEV
+   __u32   fwd_mark;
+#endif
union {
__u32   mark;
__u32   reserved_tailroom;
diff --git a/net/core/dev.c b/net/core/dev.c
index 6778a99..558bf33 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3065,6 +3065,15 @@ static int __dev_queue_xmit(struct sk_buff *skb, void 
*accel_priv)
else
skb_dst_force(skb);
 
+#ifdef CONFIG_NET_SWITCHDEV
+   /* Don't forward if offload device already forwarded */
+   if (skb->fwd_mark && skb->fwd_mark == dev->fwd_mark) {
+   kfree_skb(skb);
+   rc = NET_XMIT_SUCCESS;
+   goto out;
+   }
+#endif
+
txq = netdev_pick_tx(dev, skb, accel_priv);
q = rcu_dereference_bh(txq->qdisc);
 
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next 3/5] rocker: mark STP update as 'no wait' processing

2015-06-12 Thread sfeldma
From: Scott Feldman 

We can get STP updates from the bridge driver in atomic and non-atomic
contexts.  Since we can't test what context we're getting called in,
do the STP processing as 'no wait', which will cover all cases.

Signed-off-by: Scott Feldman 
---
 drivers/net/ethernet/rocker/rocker.c |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/rocker/rocker.c 
b/drivers/net/ethernet/rocker/rocker.c
index 1995b59..6c15c2e 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -4286,7 +4286,8 @@ static int rocker_port_attr_set(struct net_device *dev,
 
switch (attr->id) {
case SWITCHDEV_ATTR_PORT_STP_STATE:
-   err = rocker_port_stp_update(rocker_port, attr->trans, 0,
+   err = rocker_port_stp_update(rocker_port, attr->trans,
+ROCKER_OP_FLAG_NOWAIT,
 attr->u.stp_state);
break;
case SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS:
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next 1/5] rocker: revert back to support for nowait processes

2015-06-12 Thread sfeldma
From: Scott Feldman 

One of the items removed from the rocker driver in the Spring Cleanup patch
series was the ability to mark processing in the driver as "no wait" for
those contexts where we cannot sleep.  Turns out, we have "no wait"
contexts where we want to program the device.  So re-add the
ROCKER_OP_FLAG_NOWAIT flag to mark such processes, and propagate flags to
mem allocator and to the device cmd executor.  With NOWAIT, mem allocs are
GFP_ATOMIC and device cmds are queued to the device, but the driver will
not wait (sleep) for the response back from the device.

My bad for removing NOWAIT support in the first place; I thought we could
swing non-sleep contexts to process context using a work queue, for
example, but there is push-back to keep processing in original context.

Signed-off-by: Scott Feldman 
---
 drivers/net/ethernet/rocker/rocker.c |  202 +++---
 1 file changed, 112 insertions(+), 90 deletions(-)

diff --git a/drivers/net/ethernet/rocker/rocker.c 
b/drivers/net/ethernet/rocker/rocker.c
index a9d1559..c1910c1 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -326,10 +326,18 @@ static bool rocker_port_is_bridged(const struct 
rocker_port *rocker_port)
return !!rocker_port->bridge_dev;
 }
 
+#define ROCKER_OP_FLAG_REMOVE  BIT(0)
+#define ROCKER_OP_FLAG_NOWAIT  BIT(1)
+#define ROCKER_OP_FLAG_LEARNED BIT(2)
+#define ROCKER_OP_FLAG_REFRESH BIT(3)
+
 static void *__rocker_port_mem_alloc(struct rocker_port *rocker_port,
-enum switchdev_trans trans, size_t size)
+enum switchdev_trans trans, int flags,
+size_t size)
 {
struct list_head *elem = NULL;
+   gfp_t gfp_flags = (flags & ROCKER_OP_FLAG_NOWAIT) ?
+ GFP_ATOMIC : GFP_KERNEL;
 
/* If in transaction prepare phase, allocate the memory
 * and enqueue it on a per-port list.  If in transaction
@@ -342,7 +350,7 @@ static void *__rocker_port_mem_alloc(struct rocker_port 
*rocker_port,
 
switch (trans) {
case SWITCHDEV_TRANS_PREPARE:
-   elem = kzalloc(size + sizeof(*elem), GFP_KERNEL);
+   elem = kzalloc(size + sizeof(*elem), gfp_flags);
if (!elem)
return NULL;
list_add_tail(elem, &rocker_port->trans_mem);
@@ -353,7 +361,7 @@ static void *__rocker_port_mem_alloc(struct rocker_port 
*rocker_port,
list_del_init(elem);
break;
case SWITCHDEV_TRANS_NONE:
-   elem = kzalloc(size + sizeof(*elem), GFP_KERNEL);
+   elem = kzalloc(size + sizeof(*elem), gfp_flags);
if (elem)
INIT_LIST_HEAD(elem);
break;
@@ -365,16 +373,17 @@ static void *__rocker_port_mem_alloc(struct rocker_port 
*rocker_port,
 }
 
 static void *rocker_port_kzalloc(struct rocker_port *rocker_port,
-enum switchdev_trans trans, size_t size)
+enum switchdev_trans trans, int flags,
+size_t size)
 {
-   return __rocker_port_mem_alloc(rocker_port, trans, size);
+   return __rocker_port_mem_alloc(rocker_port, trans, flags, size);
 }
 
 static void *rocker_port_kcalloc(struct rocker_port *rocker_port,
-enum switchdev_trans trans, size_t n,
-size_t size)
+enum switchdev_trans trans, int flags,
+size_t n, size_t size)
 {
-   return __rocker_port_mem_alloc(rocker_port, trans, n * size);
+   return __rocker_port_mem_alloc(rocker_port, trans, flags, n * size);
 }
 
 static void rocker_port_kfree(enum switchdev_trans trans, const void *mem)
@@ -397,11 +406,13 @@ static void rocker_port_kfree(enum switchdev_trans trans, 
const void *mem)
 struct rocker_wait {
wait_queue_head_t wait;
bool done;
+   bool nowait;
 };
 
 static void rocker_wait_reset(struct rocker_wait *wait)
 {
wait->done = false;
+   wait->nowait = false;
 }
 
 static void rocker_wait_init(struct rocker_wait *wait)
@@ -411,11 +422,12 @@ static void rocker_wait_init(struct rocker_wait *wait)
 }
 
 static struct rocker_wait *rocker_wait_create(struct rocker_port *rocker_port,
- enum switchdev_trans trans)
+ enum switchdev_trans trans,
+ int flags)
 {
struct rocker_wait *wait;
 
-   wait = rocker_port_kzalloc(rocker_port, trans, sizeof(*wait));
+   wait = rocker_port_kzalloc(rocker_port, trans, flags, sizeof(*wait));
if (!wait)
return NULL;
rocker_wait_init(wait);
@@ -1386,7 +1398,12 @@ static irqreturn_t rocker_cmd_ir

[PATCH net-next 4/5] rocker: move MAC learn event back to 'no wait' processing

2015-06-12 Thread sfeldma
From: Scott Feldman 

Signed-off-by: Scott Feldman 
---
 drivers/net/ethernet/rocker/rocker.c |   40 +++---
 1 file changed, 3 insertions(+), 37 deletions(-)

diff --git a/drivers/net/ethernet/rocker/rocker.c 
b/drivers/net/ethernet/rocker/rocker.c
index 6c15c2e..8430cb3 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -1459,36 +1459,14 @@ static int rocker_port_fdb(struct rocker_port 
*rocker_port,
   const unsigned char *addr,
   __be16 vlan_id, int flags);
 
-struct rocker_mac_vlan_seen_work {
-   struct work_struct work;
-   struct rocker_port *rocker_port;
-   int flags;
-   unsigned char addr[ETH_ALEN];
-   __be16 vlan_id;
-};
-
-static void rocker_event_mac_vlan_seen_work(struct work_struct *work)
-{
-   const struct rocker_mac_vlan_seen_work *sw =
-   container_of(work, struct rocker_mac_vlan_seen_work, work);
-
-   rtnl_lock();
-   rocker_port_fdb(sw->rocker_port, SWITCHDEV_TRANS_NONE,
-   sw->addr, sw->vlan_id, sw->flags);
-   rtnl_unlock();
-
-   kfree(work);
-}
-
 static int rocker_event_mac_vlan_seen(const struct rocker *rocker,
  const struct rocker_tlv *info)
 {
-   struct rocker_mac_vlan_seen_work *sw;
const struct rocker_tlv *attrs[ROCKER_TLV_EVENT_MAC_VLAN_MAX + 1];
unsigned int port_number;
struct rocker_port *rocker_port;
const unsigned char *addr;
-   int flags = ROCKER_OP_FLAG_LEARNED;
+   int flags = ROCKER_OP_FLAG_NOWAIT | ROCKER_OP_FLAG_LEARNED;
__be16 vlan_id;
 
rocker_tlv_parse_nested(attrs, ROCKER_TLV_EVENT_MAC_VLAN_MAX, info);
@@ -1510,20 +1488,8 @@ static int rocker_event_mac_vlan_seen(const struct 
rocker *rocker,
rocker_port->stp_state != BR_STATE_FORWARDING)
return 0;
 
-   sw = kmalloc(sizeof(*sw), GFP_ATOMIC);
-   if (!sw)
-   return -ENOMEM;
-
-   INIT_WORK(&sw->work, rocker_event_mac_vlan_seen_work);
-
-   sw->rocker_port = rocker_port;
-   sw->flags = flags;
-   ether_addr_copy(sw->addr, addr);
-   sw->vlan_id = vlan_id;
-
-   schedule_work(&sw->work);
-
-   return 0;
+   return rocker_port_fdb(rocker_port, SWITCHDEV_TRANS_NONE,
+  addr, vlan_id, flags);
 }
 
 static int rocker_event_process(const struct rocker *rocker,
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next 5/5] rocker: move port stop to 'no wait' processing

2015-06-12 Thread sfeldma
From: Scott Feldman 

rocker_port_stop can be called from atomic and non-atomic contexts.  Since
we can't test what context we're getting called in, do the processing as
'no wait', which will cover all cases.

Signed-off-by: Scott Feldman 
---
 drivers/net/ethernet/rocker/rocker.c |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/rocker/rocker.c 
b/drivers/net/ethernet/rocker/rocker.c
index 8430cb3..a06b93d 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -4004,7 +4004,8 @@ static int rocker_port_stop(struct net_device *dev)
rocker_port_set_enable(rocker_port, false);
napi_disable(&rocker_port->napi_rx);
napi_disable(&rocker_port->napi_tx);
-   rocker_port_fwd_disable(rocker_port, SWITCHDEV_TRANS_NONE, 0);
+   rocker_port_fwd_disable(rocker_port, SWITCHDEV_TRANS_NONE,
+   ROCKER_OP_FLAG_NOWAIT);
free_irq(rocker_msix_rx_vector(rocker_port), rocker_port);
free_irq(rocker_msix_tx_vector(rocker_port), rocker_port);
rocker_port_dma_rings_fini(rocker_port);
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


  1   2   >