[PATCH net-next 4/6] net: qualcomm: rmnet: Refactor the new rmnet dev creation

2017-09-02 Thread Subash Abhinov Kasiviswanathan
Data format can be directly set from rmnet_newlink() since the
rmnet real dev info is already available.

Since __rmnet_get_real_dev_info() is no longer used in rmnet_config.c
after removal of those functions, move content to
rmnet_get_real_dev_info().

__rmnet_set_endpoint_config() is collapsed into
rmnet_set_endpoint_config() since only mux_id was being set additionally
within it. Remove an unnecessary mux_id check.

Set the mux_id for the rmnet_dev within rmnet_vnd_newlink() itself.

Signed-off-by: Subash Abhinov Kasiviswanathan 
Cc: Dan Williams 
---
 drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c | 96 +-
 drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c| 13 ++-
 drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h|  1 -
 3 files changed, 26 insertions(+), 84 deletions(-)

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c 
b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
index 3ca98ec..c0aed7c 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
@@ -57,16 +57,6 @@ static int rmnet_is_real_dev_registered(const struct 
net_device *real_dev)
return (rx_handler == rmnet_rx_handler);
 }
 
-/* Needs either rcu_read_lock() or rtnl lock */
-static struct rmnet_real_dev_info*
-__rmnet_get_real_dev_info(const struct net_device *real_dev)
-{
-   if (rmnet_is_real_dev_registered(real_dev))
-   return rcu_dereference_rtnl(real_dev->rx_handler_data);
-   else
-   return NULL;
-}
-
 /* Needs rtnl lock */
 static struct rmnet_real_dev_info*
 rmnet_get_real_dev_info_rtnl(const struct net_device *real_dev)
@@ -83,10 +73,7 @@ static int rmnet_is_real_dev_registered(const struct 
net_device *real_dev)
if (!rmnet_is_real_dev_registered(dev)) {
ep = rmnet_vnd_get_endpoint(dev);
} else {
-   r = __rmnet_get_real_dev_info(dev);
-
-   if (!r)
-   return NULL;
+   r = rmnet_get_real_dev_info_rtnl(dev);
 
ep = >muxed_ep[config_id];
}
@@ -139,70 +126,23 @@ static int rmnet_register_real_device(struct net_device 
*real_dev)
return 0;
 }
 
-static int rmnet_set_ingress_data_format(struct net_device *dev, u32 idf)
-{
-   struct rmnet_real_dev_info *r;
-
-   netdev_dbg(dev, "Ingress format 0x%08X\n", idf);
-
-   r = __rmnet_get_real_dev_info(dev);
-
-   r->ingress_data_format = idf;
-
-   return 0;
-}
-
-static int rmnet_set_egress_data_format(struct net_device *dev, u32 edf,
-   u16 agg_size, u16 agg_count)
+static void rmnet_set_endpoint_config(struct net_device *dev,
+ u8 mux_id, u8 rmnet_mode,
+ struct net_device *egress_dev)
 {
-   struct rmnet_real_dev_info *r;
-
-   netdev_dbg(dev, "Egress format 0x%08X agg size %d cnt %d\n",
-  edf, agg_size, agg_count);
-
-   r = __rmnet_get_real_dev_info(dev);
-
-   r->egress_data_format = edf;
-
-   return 0;
-}
-
-static int __rmnet_set_endpoint_config(struct net_device *dev, int config_id,
-  struct rmnet_endpoint *ep)
-{
-   struct rmnet_endpoint *dev_ep;
-
-   dev_ep = rmnet_get_endpoint(dev, config_id);
-
-   if (!dev_ep)
-   return -EINVAL;
-
-   memcpy(dev_ep, ep, sizeof(struct rmnet_endpoint));
-   dev_ep->mux_id = config_id;
-
-   return 0;
-}
-
-static int rmnet_set_endpoint_config(struct net_device *dev,
-int config_id, u8 rmnet_mode,
-struct net_device *egress_dev)
-{
-   struct rmnet_endpoint ep;
+   struct rmnet_endpoint *ep;
 
netdev_dbg(dev, "id %d mode %d dev %s\n",
-  config_id, rmnet_mode, egress_dev->name);
-
-   if (config_id >= RMNET_MAX_LOGICAL_EP)
-   return -EINVAL;
+  mux_id, rmnet_mode, egress_dev->name);
 
+   ep = rmnet_get_endpoint(dev, mux_id);
/* This config is cleared on every set, so its ok to not
 * clear it on a device delete.
 */
-   memset(, 0, sizeof(struct rmnet_endpoint));
-   ep.rmnet_mode = rmnet_mode;
-   ep.egress_dev = egress_dev;
-
-   return __rmnet_set_endpoint_config(dev, config_id, );
+   memset(ep, 0, sizeof(struct rmnet_endpoint));
+   ep->rmnet_mode = rmnet_mode;
+   ep->egress_dev = egress_dev;
+   ep->mux_id = mux_id;
 }
 
 static int rmnet_newlink(struct net *src_net, struct net_device *dev,
@@ -242,9 +182,11 @@ static int rmnet_newlink(struct net *src_net, struct 
net_device *dev,
if (err)
goto err2;
 
-   rmnet_vnd_set_mux(dev, mux_id);
-   rmnet_set_egress_data_format(real_dev, egress_format, 0, 0);
-   rmnet_set_ingress_data_format(real_dev, ingress_format);
+   

[PATCH net-next 3/6] net: qualcomm: rmnet: Move the device creation log

2017-09-02 Thread Subash Abhinov Kasiviswanathan
The current log is not very useful as it does not log the device
name since it it is prior to registration -

(unnamed net_device) (uninitialized): Setting up device

Modify to log after the device registration -

rmnet1: rmnet dev created

Signed-off-by: Subash Abhinov Kasiviswanathan 
---
 drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c 
b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
index 0e0001b..7c73052 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
@@ -73,8 +73,6 @@ static int rmnet_vnd_change_mtu(struct net_device *rmnet_dev, 
int new_mtu)
  */
 void rmnet_vnd_setup(struct net_device *rmnet_dev)
 {
-   netdev_dbg(rmnet_dev, "Setting up device %s\n", rmnet_dev->name);
-
rmnet_dev->netdev_ops = _vnd_ops;
rmnet_dev->mtu = RMNET_DFLT_PACKET_SIZE;
rmnet_dev->needed_headroom = RMNET_NEEDED_HEADROOM;
@@ -105,6 +103,7 @@ int rmnet_vnd_newlink(u8 id, struct net_device *rmnet_dev,
r->rmnet_devices[id] = rmnet_dev;
r->nr_rmnet_devs++;
rmnet_dev->rtnl_link_ops = _link_ops;
+   netdev_dbg(rmnet_dev, "rmnet dev created\n");
}
 
return rc;
-- 
1.9.1



[PATCH net-next 6/6] net: qualcomm: rmnet: Rename real_dev_info to port

2017-09-02 Thread Subash Abhinov Kasiviswanathan
Make it similar to drivers like ipvlan / macvlan so it is easier to read.

Signed-off-by: Subash Abhinov Kasiviswanathan 
Cc: Dan Williams 
---
 drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c | 65 +++---
 drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h |  5 +-
 .../net/ethernet/qualcomm/rmnet/rmnet_handlers.c   | 56 +--
 drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h|  6 +-
 .../ethernet/qualcomm/rmnet/rmnet_map_command.c| 13 ++---
 .../net/ethernet/qualcomm/rmnet/rmnet_map_data.c   |  3 +-
 drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c| 17 +++---
 drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h|  4 +-
 8 files changed, 82 insertions(+), 87 deletions(-)

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c 
b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
index 6f21cd0..98f2255 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
@@ -46,7 +46,7 @@
 struct rmnet_walk_data {
struct net_device *real_dev;
struct list_head *head;
-   struct rmnet_real_dev_info *real_dev_info;
+   struct rmnet_port *port;
 };
 
 static int rmnet_is_real_dev_registered(const struct net_device *real_dev)
@@ -58,8 +58,8 @@ static int rmnet_is_real_dev_registered(const struct 
net_device *real_dev)
 }
 
 /* Needs rtnl lock */
-static struct rmnet_real_dev_info*
-rmnet_get_real_dev_info_rtnl(const struct net_device *real_dev)
+static struct rmnet_port*
+rmnet_get_port_rtnl(const struct net_device *real_dev)
 {
return rtnl_dereference(real_dev->rx_handler_data);
 }
@@ -67,27 +67,27 @@ static int rmnet_is_real_dev_registered(const struct 
net_device *real_dev)
 static struct rmnet_endpoint*
 rmnet_get_endpoint(struct net_device *dev, int config_id)
 {
-   struct rmnet_real_dev_info *r;
struct rmnet_endpoint *ep;
+   struct rmnet_port *port;
 
if (!rmnet_is_real_dev_registered(dev)) {
ep = rmnet_vnd_get_endpoint(dev);
} else {
-   r = rmnet_get_real_dev_info_rtnl(dev);
+   port = rmnet_get_port_rtnl(dev);
 
-   ep = >muxed_ep[config_id];
+   ep = >muxed_ep[config_id];
}
 
return ep;
 }
 
 static int rmnet_unregister_real_device(struct net_device *real_dev,
-   struct rmnet_real_dev_info *r)
+   struct rmnet_port *port)
 {
-   if (r->nr_rmnet_devs)
+   if (port->nr_rmnet_devs)
return -EINVAL;
 
-   kfree(r);
+   kfree(port);
 
netdev_rx_handler_unregister(real_dev);
 
@@ -100,7 +100,7 @@ static int rmnet_unregister_real_device(struct net_device 
*real_dev,
 
 static int rmnet_register_real_device(struct net_device *real_dev)
 {
-   struct rmnet_real_dev_info *r;
+   struct rmnet_port *port;
int rc;
 
ASSERT_RTNL();
@@ -108,14 +108,14 @@ static int rmnet_register_real_device(struct net_device 
*real_dev)
if (rmnet_is_real_dev_registered(real_dev))
return 0;
 
-   r = kzalloc(sizeof(*r), GFP_ATOMIC);
-   if (!r)
+   port = kzalloc(sizeof(*port), GFP_ATOMIC);
+   if (!port)
return -ENOMEM;
 
-   r->dev = real_dev;
-   rc = netdev_rx_handler_register(real_dev, rmnet_rx_handler, r);
+   port->dev = real_dev;
+   rc = netdev_rx_handler_register(real_dev, rmnet_rx_handler, port);
if (rc) {
-   kfree(r);
+   kfree(port);
return -EBUSY;
}
 
@@ -154,9 +154,9 @@ static int rmnet_newlink(struct net *src_net, struct 
net_device *dev,
 RMNET_INGRESS_FORMAT_MAP;
int egress_format = RMNET_EGRESS_FORMAT_MUXING |
RMNET_EGRESS_FORMAT_MAP;
-   struct rmnet_real_dev_info *r;
struct net_device *real_dev;
int mode = RMNET_EPMODE_VND;
+   struct rmnet_port *port;
int err = 0;
u16 mux_id;
 
@@ -173,8 +173,8 @@ static int rmnet_newlink(struct net *src_net, struct 
net_device *dev,
if (err)
goto err0;
 
-   r = rmnet_get_real_dev_info_rtnl(real_dev);
-   err = rmnet_vnd_newlink(mux_id, dev, r, real_dev);
+   port = rmnet_get_port_rtnl(real_dev);
+   err = rmnet_vnd_newlink(mux_id, dev, port, real_dev);
if (err)
goto err1;
 
@@ -184,25 +184,25 @@ static int rmnet_newlink(struct net *src_net, struct 
net_device *dev,
 
netdev_dbg(dev, "data format [ingress 0x%08X] [egress 0x%08X]\n",
   ingress_format, egress_format);
-   r->egress_data_format = egress_format;
-   r->ingress_data_format = ingress_format;
+   port->egress_data_format = egress_format;
+   port->ingress_data_format = ingress_format;
 
rmnet_set_endpoint_config(real_dev, mux_id, mode, dev);

[PATCH net-next 5/6] net: qualcomm: rmnet: Implement ndo_get_iflink

2017-09-02 Thread Subash Abhinov Kasiviswanathan
This makes it easier to find out the parent dev.

Signed-off-by: Subash Abhinov Kasiviswanathan 
Cc: Dan Williams 
---
 drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c |  2 +-
 drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h |  1 +
 drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c| 12 +++-
 drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h|  3 ++-
 4 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c 
b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
index c0aed7c..6f21cd0 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
@@ -174,7 +174,7 @@ static int rmnet_newlink(struct net *src_net, struct 
net_device *dev,
goto err0;
 
r = rmnet_get_real_dev_info_rtnl(real_dev);
-   err = rmnet_vnd_newlink(mux_id, dev, r);
+   err = rmnet_vnd_newlink(mux_id, dev, r, real_dev);
if (err)
goto err1;
 
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h 
b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
index 40b6b32..3212b25 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
@@ -47,6 +47,7 @@ struct rmnet_real_dev_info {
 struct rmnet_priv {
struct rmnet_endpoint local_ep;
u8 mux_id;
+   struct net_device *real_dev;
 };
 
 struct rmnet_real_dev_info*
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c 
b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
index a6de041..f5c63b7 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
@@ -63,9 +63,17 @@ static int rmnet_vnd_change_mtu(struct net_device 
*rmnet_dev, int new_mtu)
return 0;
 }
 
+static int rmnet_vnd_get_iflink(const struct net_device *dev)
+{
+   struct rmnet_priv *priv = netdev_priv(dev);
+
+   return priv->real_dev->ifindex;
+}
+
 static const struct net_device_ops rmnet_vnd_ops = {
.ndo_start_xmit = rmnet_vnd_start_xmit,
.ndo_change_mtu = rmnet_vnd_change_mtu,
+   .ndo_get_iflink = rmnet_vnd_get_iflink,
 };
 
 /* Called by kernel whenever a new rmnet device is created. Sets MTU,
@@ -91,7 +99,8 @@ void rmnet_vnd_setup(struct net_device *rmnet_dev)
 /* Exposed API */
 
 int rmnet_vnd_newlink(u8 id, struct net_device *rmnet_dev,
- struct rmnet_real_dev_info *r)
+ struct rmnet_real_dev_info *r,
+ struct net_device *real_dev)
 {
struct rmnet_priv *priv;
int rc;
@@ -107,6 +116,7 @@ int rmnet_vnd_newlink(u8 id, struct net_device *rmnet_dev,
 
priv = netdev_priv(rmnet_dev);
priv->mux_id = id;
+   priv->real_dev = real_dev;
 
netdev_dbg(rmnet_dev, "rmnet dev created\n");
}
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h 
b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h
index 4823f38..9084e44 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h
@@ -19,7 +19,8 @@
 int rmnet_vnd_do_flow_control(struct net_device *dev, int enable);
 struct rmnet_endpoint *rmnet_vnd_get_endpoint(struct net_device *dev);
 int rmnet_vnd_newlink(u8 id, struct net_device *rmnet_dev,
- struct rmnet_real_dev_info *r);
+ struct rmnet_real_dev_info *r,
+ struct net_device *real_dev);
 int rmnet_vnd_dellink(u8 id, struct rmnet_real_dev_info *r);
 void rmnet_vnd_rx_fixup(struct sk_buff *skb, struct net_device *dev);
 void rmnet_vnd_tx_fixup(struct sk_buff *skb, struct net_device *dev);
-- 
1.9.1



[PATCH net-next 1/6] net: qualcomm: rmnet: Fix memory corruption if mux_id is greater than 32

2017-09-02 Thread Subash Abhinov Kasiviswanathan
rmnet_rtnl_validate() was checking for upto mux_id 254, however the
rmnet_devices devices could hold upto 32 entries only. Fix this by
increasing the size of the rmnet_devices.

Fixes: ceed73a2cf4a ("drivers: net: ethernet: qualcomm: rmnet: Initial 
implementation")
Signed-off-by: Subash Abhinov Kasiviswanathan 
Cc: Dan Williams 
---
 drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h  | 3 +--
 drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h | 1 -
 drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c | 2 +-
 3 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h 
b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
index 985d372..40b6b32 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
@@ -19,7 +19,6 @@
 #define _RMNET_CONFIG_H_
 
 #define RMNET_MAX_LOGICAL_EP 255
-#define RMNET_MAX_VND32
 
 /* Information about the next device to deliver the packet to.
  * Exact usage of this parameter depends on the rmnet_mode.
@@ -39,7 +38,7 @@ struct rmnet_real_dev_info {
struct rmnet_endpoint muxed_ep[RMNET_MAX_LOGICAL_EP];
u32 ingress_data_format;
u32 egress_data_format;
-   struct net_device *rmnet_devices[RMNET_MAX_VND];
+   struct net_device *rmnet_devices[RMNET_MAX_LOGICAL_EP];
u8 nr_rmnet_devs;
 };
 
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h 
b/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h
index ed820b5..7967198 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h
@@ -13,7 +13,6 @@
 #ifndef _RMNET_PRIVATE_H_
 #define _RMNET_PRIVATE_H_
 
-#define RMNET_MAX_VND  32
 #define RMNET_MAX_PACKET_SIZE  16384
 #define RMNET_DFLT_PACKET_SIZE 1500
 #define RMNET_NEEDED_HEADROOM  16
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c 
b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
index bf7455f..0e0001b 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
@@ -112,7 +112,7 @@ int rmnet_vnd_newlink(u8 id, struct net_device *rmnet_dev,
 
 int rmnet_vnd_dellink(u8 id, struct rmnet_real_dev_info *r)
 {
-   if (id >= RMNET_MAX_VND || !r->rmnet_devices[id])
+   if (id >= RMNET_MAX_LOGICAL_EP || !r->rmnet_devices[id])
return -EINVAL;
 
r->rmnet_devices[id] = NULL;
-- 
1.9.1



[PATCH net-next 0/6] net: qualcomm: rmnet: Fix comments on initial patchset

2017-09-02 Thread Subash Abhinov Kasiviswanathan
This series fixes the comments from Dan on the first patch series.

Fixes a memory corruption which could occur if mux_id was higher than 32.
Remove the RMNET_LOCAL_LOGICAL_ENDPOINT which is no longer used.
Make a log message more useful.
Combine __rmnet_set_endpoint_config() with rmnet_set_endpoint_config().
Set the mux_id in rmnet_vnd_newlink().
Set the ingress and egress data format directly in newlink.
Implement ndo_get_iflink to find the real_dev.
Rename the real_dev_info to port to make it similar to other drivers.

The conversion of rmnet_devices to a list and hash lookup will be sent
as part of a seperate patch.

Subash Abhinov Kasiviswanathan (6):
  net: qualcomm: rmnet: Fix memory corruption if mux_id is greater than
32
  net: qualcomm: rmnet: Remove the unused endpoint -1
  net: qualcomm: rmnet: Move the device creation log
  net: qualcomm: rmnet: Refactor the new rmnet dev creation
  net: qualcomm: rmnet: Implement ndo_get_iflink
  net: qualcomm: rmnet: Rename real_dev_info to port

 drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c | 163 +++--
 drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h |   9 +-
 .../net/ethernet/qualcomm/rmnet/rmnet_handlers.c   |  56 +++
 drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h|   6 +-
 .../ethernet/qualcomm/rmnet/rmnet_map_command.c|  13 +-
 .../net/ethernet/qualcomm/rmnet/rmnet_map_data.c   |   3 +-
 .../net/ethernet/qualcomm/rmnet/rmnet_private.h|   1 -
 drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c|  43 +++---
 drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h|   6 +-
 9 files changed, 119 insertions(+), 181 deletions(-)

-- 
1.9.1



[PATCH net-next 2/6] net: qualcomm: rmnet: Remove the unused endpoint -1

2017-09-02 Thread Subash Abhinov Kasiviswanathan
This was used only in the original patch series where the IOCTLs were
present and is no longer in use.

Fixes: ceed73a2cf4a ("drivers: net: ethernet: qualcomm: rmnet: Initial 
implementation")
Signed-off-by: Subash Abhinov Kasiviswanathan 
Cc: Dan Williams 
---
 drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c | 14 +++---
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c 
b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
index e836d26..3ca98ec 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
@@ -42,7 +42,6 @@
  */
 
 /* Local Definitions and Declarations */
-#define RMNET_LOCAL_LOGICAL_ENDPOINT -1
 
 struct rmnet_walk_data {
struct net_device *real_dev;
@@ -89,10 +88,7 @@ static int rmnet_is_real_dev_registered(const struct 
net_device *real_dev)
if (!r)
return NULL;
 
-   if (config_id == RMNET_LOCAL_LOGICAL_ENDPOINT)
-   ep = >local_ep;
-   else
-   ep = >muxed_ep[config_id];
+   ep = >muxed_ep[config_id];
}
 
return ep;
@@ -182,10 +178,7 @@ static int __rmnet_set_endpoint_config(struct net_device 
*dev, int config_id,
return -EINVAL;
 
memcpy(dev_ep, ep, sizeof(struct rmnet_endpoint));
-   if (config_id == RMNET_LOCAL_LOGICAL_ENDPOINT)
-   dev_ep->mux_id = 0;
-   else
-   dev_ep->mux_id = config_id;
+   dev_ep->mux_id = config_id;
 
return 0;
 }
@@ -199,8 +192,7 @@ static int rmnet_set_endpoint_config(struct net_device *dev,
netdev_dbg(dev, "id %d mode %d dev %s\n",
   config_id, rmnet_mode, egress_dev->name);
 
-   if (config_id < RMNET_LOCAL_LOGICAL_ENDPOINT ||
-   config_id >= RMNET_MAX_LOGICAL_EP)
+   if (config_id >= RMNET_MAX_LOGICAL_EP)
return -EINVAL;
 
/* This config is cleared on every set, so its ok to not
-- 
1.9.1



[net-next 06/17] net/mlx5e: NAPI busy-poll when UMR post is in progress

2017-09-02 Thread Saeed Mahameed
From: Tariq Toukan 

If a UMR post is in progress, it means that there's a missing
WQE in RQ, and that a completion will be shortly available in
ICO SQ completion queue. Prefer busy-poll to handle it as soon
as possible.

Signed-off-by: Tariq Toukan 
Signed-off-by: Saeed Mahameed 
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 9 -
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index fb3b83609aea..8af6577b7501 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -456,17 +456,16 @@ void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
mlx5e_free_rx_mpwqe(rq, wi);
 }
 
-#define RQ_CANNOT_POST(rq) \
-   (!test_bit(MLX5E_RQ_STATE_ENABLED, >state) || \
-test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, >state))
-
 bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq)
 {
struct mlx5_wq_ll *wq = >wq;
 
-   if (unlikely(RQ_CANNOT_POST(rq)))
+   if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, >state)))
return false;
 
+   if (test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, >state))
+   return true;
+
while (!mlx5_wq_ll_is_full(wq)) {
struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(wq, wq->head);
int err;
-- 
2.13.0



[net-next 07/17] net/mlx5e: Early-return on empty completion queues

2017-09-02 Thread Saeed Mahameed
From: Tariq Toukan 

NAPI context handles different kinds of completion queues
(RX, TX, and others). Hence, upon a poll trial, some of them
might be empty.
Here we early-return upon empty completion queues, as well as
full rx buffer, and save unnecessary logic and memory barriers.

Signed-off-by: Tariq Toukan 
Signed-off-by: Saeed Mahameed 
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 40 ++---
 drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 16 +-
 2 files changed, 32 insertions(+), 24 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 8af6577b7501..ab1213a3615e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -459,16 +459,19 @@ void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
 bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq)
 {
struct mlx5_wq_ll *wq = >wq;
+   int err;
 
if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, >state)))
return false;
 
+   if (mlx5_wq_ll_is_full(wq))
+   return false;
+
if (test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, >state))
return true;
 
-   while (!mlx5_wq_ll_is_full(wq)) {
+   do {
struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(wq, wq->head);
-   int err;
 
err = rq->alloc_wqe(rq, wqe, wq->head);
if (err == -EBUSY)
@@ -479,14 +482,14 @@ bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq)
}
 
mlx5_wq_ll_push(wq, be16_to_cpu(wqe->next.next_wqe_index));
-   }
+   } while (!mlx5_wq_ll_is_full(wq));
 
/* ensure wqes are visible to device before updating doorbell record */
dma_wmb();
 
mlx5_wq_ll_update_db_record(wq);
 
-   return !mlx5_wq_ll_is_full(wq);
+   return !!err;
 }
 
 static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe,
@@ -981,7 +984,8 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct 
mlx5_cqe64 *cqe)
 int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
 {
struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq);
-   struct mlx5e_xdpsq *xdpsq = >xdpsq;
+   struct mlx5e_xdpsq *xdpsq;
+   struct mlx5_cqe64 *cqe;
int work_done = 0;
 
if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, >state)))
@@ -990,12 +994,13 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
if (cq->decmprs_left)
work_done += mlx5e_decompress_cqes_cont(rq, cq, 0, budget);
 
-   for (; work_done < budget; work_done++) {
-   struct mlx5_cqe64 *cqe = mlx5_cqwq_get_cqe(>wq);
+   cqe = mlx5_cqwq_get_cqe(>wq);
+   if (!cqe)
+   return 0;
 
-   if (!cqe)
-   break;
+   xdpsq = >xdpsq;
 
+   do {
if (mlx5_get_cqe_format(cqe) == MLX5_COMPRESSED) {
work_done +=
mlx5e_decompress_cqes_start(rq, cq,
@@ -1006,7 +1011,7 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
mlx5_cqwq_pop(>wq);
 
rq->handle_rx_cqe(rq, cqe);
-   }
+   } while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(>wq)));
 
if (xdpsq->db.doorbell) {
mlx5e_xmit_xdp_doorbell(xdpsq);
@@ -1024,6 +1029,7 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
 bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
 {
struct mlx5e_xdpsq *sq;
+   struct mlx5_cqe64 *cqe;
struct mlx5e_rq *rq;
u16 sqcc;
int i;
@@ -1033,6 +1039,10 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, >state)))
return false;
 
+   cqe = mlx5_cqwq_get_cqe(>wq);
+   if (!cqe)
+   return false;
+
rq = container_of(sq, struct mlx5e_rq, xdpsq);
 
/* sq->cc must be updated only after mlx5_cqwq_update_db_record(),
@@ -1040,15 +1050,11 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
 */
sqcc = sq->cc;
 
-   for (i = 0; i < MLX5E_TX_CQ_POLL_BUDGET; i++) {
-   struct mlx5_cqe64 *cqe;
+   i = 0;
+   do {
u16 wqe_counter;
bool last_wqe;
 
-   cqe = mlx5_cqwq_get_cqe(>wq);
-   if (!cqe)
-   break;
-
mlx5_cqwq_pop(>wq);
 
wqe_counter = be16_to_cpu(cqe->wqe_counter);
@@ -1066,7 +1072,7 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
/* Recycle RX page */
mlx5e_page_release(rq, di, true);
} while (!last_wqe);
-   }
+   } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = 
mlx5_cqwq_get_cqe(>wq)));
 
mlx5_cqwq_update_db_record(>wq);
 
diff --git 

[net-next 12/17] net/mlx5e: Remove unnecessary fields in ICO SQ

2017-09-02 Thread Saeed Mahameed
From: Tariq Toukan 

As of current design, in each NAPI, only a single UMR WQE
completion could be available in the completion queue of the
the internal control operations (ICO) send queue, in addition
to nop operations that require no actions upon completion.
This renders the consume index obsolete, as the wqe_counter
field in CQE is sufficient.

This helps removing a memory barrier, and obsoletes the need
for tracking the num_wqebbs to update the consumer counter.

In addition, remove other unused fields in icosq struct:
pdev, dma_fifo_pc, and prev_cc.

Signed-off-by: Tariq Toukan 
Signed-off-by: Saeed Mahameed 
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |  7 ---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c |  2 --
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c   | 19 ++-
 3 files changed, 2 insertions(+), 26 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 8d29a6eb9406..e55d9439bc12 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -343,7 +343,6 @@ enum {
 
 struct mlx5e_sq_wqe_info {
u8  opcode;
-   u8  num_wqebbs;
 };
 
 struct mlx5e_txqsq {
@@ -419,13 +418,8 @@ struct mlx5e_xdpsq {
 struct mlx5e_icosq {
/* data path */
 
-   /* dirtied @completion */
-   u16cc;
-
/* dirtied @xmit */
u16pc cacheline_aligned_in_smp;
-   u32dma_fifo_pc;
-   u16prev_cc;
 
struct mlx5e_cqcq;
 
@@ -439,7 +433,6 @@ struct mlx5e_icosq {
void __iomem  *uar_map;
u32sqn;
u16edge;
-   struct device *pdev;
__be32 mkey_be;
unsigned long  state;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 162ba6ab749a..a4c9a0a2c408 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -950,7 +950,6 @@ static void mlx5e_activate_rq(struct mlx5e_rq *rq)
 
set_bit(MLX5E_RQ_STATE_ENABLED, >state);
sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP;
-   sq->db.ico_wqe[pi].num_wqebbs = 1;
nopwqe = mlx5e_post_nop(>wq, sq->sqn, >pc);
mlx5e_notify_hw(>wq, sq->pc, sq->uar_map, >ctrl);
 }
@@ -1052,7 +1051,6 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c,
struct mlx5_core_dev *mdev = c->mdev;
int err;
 
-   sq->pdev  = c->pdev;
sq->mkey_be   = c->mkey_be;
sq->channel   = c;
sq->uar_map   = mdev->mlx5e_res.bfreg.map;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index b236dfd71c18..88a8749c67d6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -357,7 +357,6 @@ static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, 
u16 ix)
/* fill sq edge with nops to avoid wqe wrap around */
while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) {
sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP;
-   sq->db.ico_wqe[pi].num_wqebbs = 1;
mlx5e_post_nop(wq, sq->sqn, >pc);
}
 
@@ -368,7 +367,6 @@ static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, 
u16 ix)
MLX5_OPCODE_UMR);
 
sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_UMR;
-   sq->db.ico_wqe[pi].num_wqebbs = num_wqebbs;
sq->pc += num_wqebbs;
mlx5e_notify_hw(>wq, sq->pc, sq->uar_map, >ctrl);
 }
@@ -487,15 +485,13 @@ bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq)
 static inline void mlx5e_poll_ico_single_cqe(struct mlx5e_cq *cq,
 struct mlx5e_icosq *sq,
 struct mlx5e_rq *rq,
-struct mlx5_cqe64 *cqe,
-u16 *sqcc)
+struct mlx5_cqe64 *cqe)
 {
struct mlx5_wq_cyc *wq = >wq;
u16 ci = be16_to_cpu(cqe->wqe_counter) & wq->sz_m1;
struct mlx5e_sq_wqe_info *icowi = >db.ico_wqe[ci];
 
mlx5_cqwq_pop(>wq);
-   *sqcc += icowi->num_wqebbs;
 
if (unlikely((cqe->op_own >> 4) != MLX5_CQE_REQ)) {
WARN_ONCE(true, "mlx5e: Bad OP in ICOSQ CQE: 0x%x\n",
@@ -518,7 +514,6 @@ static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq, struct 
mlx5e_rq *rq)
 {
struct mlx5e_icosq *sq = container_of(cq, struct mlx5e_icosq, cq);
struct mlx5_cqe64 *cqe;
-   u16 sqcc;
 
if (unlikely(!MLX5E_TEST_BIT(sq->state, MLX5E_SQ_STATE_ENABLED)))
 

[net-next 02/17] net/mlx5e: Replace multiplication by stride size with a shift

2017-09-02 Thread Saeed Mahameed
From: Tariq Toukan 

In RX data-path, use shift operations instead of a regular multiplication
by stride size, as it is a power of two.

Signed-off-by: Tariq Toukan 
Signed-off-by: Saeed Mahameed 
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 4 ++--
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c   | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index d964db286c95..44bd8df905ca 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -535,8 +535,8 @@ struct mlx5e_rq {
struct {
struct mlx5e_mpw_info *info;
void  *mtt_no_align;
-   u16stride_sz;
u16num_strides;
+   u8 log_stride_sz;
} mpwqe;
};
struct {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 94761d0e1b33..7a25d952c922 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -615,10 +615,10 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
goto err_rq_wq_destroy;
}
 
-   rq->mpwqe.stride_sz = BIT(params->mpwqe_log_stride_sz);
+   rq->mpwqe.log_stride_sz = params->mpwqe_log_stride_sz;
rq->mpwqe.num_strides = BIT(params->mpwqe_log_num_strides);
 
-   rq->buff.wqe_sz = rq->mpwqe.stride_sz * rq->mpwqe.num_strides;
+   rq->buff.wqe_sz = rq->mpwqe.num_strides << 
rq->mpwqe.log_stride_sz;
byte_count = rq->buff.wqe_sz;
 
err = mlx5e_create_rq_umr_mkey(mdev, rq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 1b50f1e7e48a..aa5cc1590859 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -304,7 +304,7 @@ static inline void mlx5e_add_skb_frag_mpwqe(struct mlx5e_rq 
*rq,
u32 page_idx, u32 frag_offset,
u32 len)
 {
-   unsigned int truesize = ALIGN(len, rq->mpwqe.stride_sz);
+   unsigned int truesize = ALIGN(len, BIT(rq->mpwqe.log_stride_sz));
 
dma_sync_single_for_cpu(rq->pdev,
wi->umr.dma_info[page_idx].addr + frag_offset,
@@ -910,7 +910,7 @@ static inline void mlx5e_mpwqe_fill_rx_skb(struct mlx5e_rq 
*rq,
   struct sk_buff *skb)
 {
u16 stride_ix  = mpwrq_get_cqe_stride_index(cqe);
-   u32 wqe_offset = stride_ix * rq->mpwqe.stride_sz;
+   u32 wqe_offset = stride_ix << rq->mpwqe.log_stride_sz;
u32 head_offset= wqe_offset & (PAGE_SIZE - 1);
u32 page_idx   = wqe_offset >> PAGE_SHIFT;
u32 head_page_idx  = page_idx;
-- 
2.13.0



[net-next 11/17] net/mlx5e: Type-specific optimizations for RX post WQEs function

2017-09-02 Thread Saeed Mahameed
From: Tariq Toukan 

Separate the RX post WQEs function of the different RQ types.
This enables RQ type-specific optimizations in data-path.

Poll the ICOSQ completion queue only for Striding RQ,
and only when a UMR post completion could be possibly available.

Signed-off-by: Tariq Toukan 
Signed-off-by: Saeed Mahameed 
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |  10 +--
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c |   4 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c   | 101 ++
 drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c |  64 +-
 4 files changed, 92 insertions(+), 87 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index bce2080eb86a..8d29a6eb9406 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -517,7 +517,7 @@ struct mlx5e_page_cache {
 
 struct mlx5e_rq;
 typedef void (*mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq*, struct mlx5_cqe64*);
-typedef int (*mlx5e_fp_alloc_wqe)(struct mlx5e_rq*, struct mlx5e_rx_wqe*, u16);
+typedef bool (*mlx5e_fp_post_rx_wqes)(struct mlx5e_rq *rq);
 typedef void (*mlx5e_fp_dealloc_wqe)(struct mlx5e_rq*, u16);
 
 struct mlx5e_rq {
@@ -547,6 +547,7 @@ struct mlx5e_rq {
u8 map_dir;   /* dma map direction */
} buff;
 
+   struct mlx5e_channel  *channel;
struct device *pdev;
struct net_device *netdev;
struct mlx5e_tstamp   *tstamp;
@@ -555,7 +556,7 @@ struct mlx5e_rq {
struct mlx5e_page_cache page_cache;
 
mlx5e_fp_handle_rx_cqe handle_rx_cqe;
-   mlx5e_fp_alloc_wqe alloc_wqe;
+   mlx5e_fp_post_rx_wqes  post_wqes;
mlx5e_fp_dealloc_wqe   dealloc_wqe;
 
unsigned long  state;
@@ -572,7 +573,6 @@ struct mlx5e_rq {
__be32 mkey_be;
u8 wq_type;
u32rqn;
-   struct mlx5e_channel  *channel;
struct mlx5_core_dev  *mdev;
struct mlx5_core_mkey  umr_mkey;
 } cacheline_aligned_in_smp;
@@ -853,11 +853,9 @@ void mlx5e_page_release(struct mlx5e_rq *rq, struct 
mlx5e_dma_info *dma_info,
 void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
 void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
 bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq);
-int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix);
-int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe,
u16 ix);
+bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq);
 void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix);
 void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix);
-void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq);
 void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi);
 
 void mlx5e_rx_am(struct mlx5e_rq *rq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 2767a3ee81bc..162ba6ab749a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -598,7 +598,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
switch (rq->wq_type) {
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
 
-   rq->alloc_wqe = mlx5e_alloc_rx_mpwqe;
+   rq->post_wqes = mlx5e_post_rx_mpwqes;
rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe;
 
rq->handle_rx_cqe = 
c->priv->profile->rx_handlers.handle_rx_cqe_mpwqe;
@@ -637,7 +637,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
err = -ENOMEM;
goto err_rq_wq_destroy;
}
-   rq->alloc_wqe = mlx5e_alloc_rx_wqe;
+   rq->post_wqes = mlx5e_post_rx_wqes;
rq->dealloc_wqe = mlx5e_dealloc_rx_wqe;
 
 #ifdef CONFIG_MLX5_EN_IPSEC
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 11dba9940029..b236dfd71c18 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -252,7 +252,7 @@ static inline bool mlx5e_page_reuse(struct mlx5e_rq *rq,
!mlx5e_page_is_reserved(wi->di.page);
 }
 
-int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix)
+static int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, 
u16 ix)
 {
struct mlx5e_wqe_frag_info *wi = >wqe.frag_info[ix];
 
@@ -417,18 +417,13 @@ void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct 
mlx5e_mpw_info *wi)
}
 }
 
-void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq)
+static void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq)
 {
struct mlx5_wq_ll *wq = >wq;
struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(wq, 

[net-next 08/17] net/mlx5e: Refactor data-path lro header function

2017-09-02 Thread Saeed Mahameed
From: Tariq Toukan 

Refactor function mlx5e_lro_update_hdr() to reduce number of
branches.

Signed-off-by: Tariq Toukan 
Signed-off-by: Saeed Mahameed 
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 45 +++--
 include/linux/mlx5/device.h |  2 +-
 2 files changed, 21 insertions(+), 26 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index ab1213a3615e..9d9c13ae6b83 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -496,56 +496,51 @@ static void mlx5e_lro_update_hdr(struct sk_buff *skb, 
struct mlx5_cqe64 *cqe,
 u32 cqe_bcnt)
 {
struct ethhdr   *eth = (struct ethhdr *)(skb->data);
-   struct iphdr*ipv4;
-   struct ipv6hdr  *ipv6;
struct tcphdr   *tcp;
int network_depth = 0;
__be16 proto;
u16 tot_len;
+   void *ip_p;
 
u8 l4_hdr_type = get_cqe_l4_hdr_type(cqe);
-   int tcp_ack = ((l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA) ||
-  (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA));
+   u8 tcp_ack = (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA) ||
+   (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA);
 
skb->mac_len = ETH_HLEN;
proto = __vlan_get_protocol(skb, eth->h_proto, _depth);
 
-   ipv4 = (struct iphdr *)(skb->data + network_depth);
-   ipv6 = (struct ipv6hdr *)(skb->data + network_depth);
tot_len = cqe_bcnt - network_depth;
+   ip_p = skb->data + network_depth;
 
if (proto == htons(ETH_P_IP)) {
-   tcp = (struct tcphdr *)(skb->data + network_depth +
-   sizeof(struct iphdr));
-   ipv6 = NULL;
-   skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
-   } else {
-   tcp = (struct tcphdr *)(skb->data + network_depth +
-   sizeof(struct ipv6hdr));
-   ipv4 = NULL;
-   skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
-   }
-
-   if (get_cqe_lro_tcppsh(cqe))
-   tcp->psh= 1;
+   struct iphdr *ipv4 = ip_p;
 
-   if (tcp_ack) {
-   tcp->ack= 1;
-   tcp->ack_seq= cqe->lro_ack_seq_num;
-   tcp->window = cqe->lro_tcp_win;
-   }
+   tcp = ip_p + sizeof(struct iphdr);
+   skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
 
-   if (ipv4) {
ipv4->ttl   = cqe->lro_min_ttl;
ipv4->tot_len   = cpu_to_be16(tot_len);
ipv4->check = 0;
ipv4->check = ip_fast_csum((unsigned char *)ipv4,
   ipv4->ihl);
} else {
+   struct ipv6hdr *ipv6 = ip_p;
+
+   tcp = ip_p + sizeof(struct ipv6hdr);
+   skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
+
ipv6->hop_limit = cqe->lro_min_ttl;
ipv6->payload_len   = cpu_to_be16(tot_len -
  sizeof(struct ipv6hdr));
}
+
+   tcp->psh = get_cqe_lro_tcppsh(cqe);
+
+   if (tcp_ack) {
+   tcp->ack= 1;
+   tcp->ack_seq= cqe->lro_ack_seq_num;
+   tcp->window = cqe->lro_tcp_win;
+   }
 }
 
 static inline void mlx5e_skb_set_hash(struct mlx5_cqe64 *cqe,
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 3c7442b56460..7031d655ec32 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -709,7 +709,7 @@ static inline int mlx5_get_cqe_format(struct mlx5_cqe64 
*cqe)
return (cqe->op_own >> 2) & 0x3;
 }
 
-static inline int get_cqe_lro_tcppsh(struct mlx5_cqe64 *cqe)
+static inline u8 get_cqe_lro_tcppsh(struct mlx5_cqe64 *cqe)
 {
return (cqe->lro_tcppsh_abort_dupack >> 6) & 1;
 }
-- 
2.13.0



[net-next 05/17] net/mlx5e: Small enhancements for RX MPWQE allocation and free

2017-09-02 Thread Saeed Mahameed
From: Tariq Toukan 

The dma offset of a MPWQE (Multi-Packet WQE) in memory region
is fixed for all rounds. Calculate it once on creation time,
instead of in runtime. This also obsoletes the wqe argument in
the function.

In addition, optimize dma_info iterator calculation.

Signed-off-by: Tariq Toukan 
Signed-off-by: Saeed Mahameed 
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c |  6 ++
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c   | 18 ++
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 591e0dca9671..5aa4681f7c3c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -674,6 +674,12 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
for (i = 0; i < wq_sz; i++) {
struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(>wq, i);
 
+   if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
+   u64 dma_offset = (u64)mlx5e_get_wqe_mtt_offset(rq, i) 
<< PAGE_SHIFT;
+
+   wqe->data.addr = cpu_to_be64(dma_offset);
+   }
+
wqe->data.byte_count = cpu_to_be32(byte_count);
wqe->data.lkey = rq->mkey_be;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index db372dcecbe0..fb3b83609aea 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -374,18 +374,15 @@ static inline void mlx5e_post_umr_wqe(struct mlx5e_rq 
*rq, u16 ix)
 }
 
 static int mlx5e_alloc_rx_umr_mpwqe(struct mlx5e_rq *rq,
-   struct mlx5e_rx_wqe *wqe,
u16 ix)
 {
struct mlx5e_mpw_info *wi = >mpwqe.info[ix];
-   u64 dma_offset = (u64)mlx5e_get_wqe_mtt_offset(rq, ix) << PAGE_SHIFT;
int pg_strides = mlx5e_mpwqe_strides_per_page(rq);
+   struct mlx5e_dma_info *dma_info = >umr.dma_info[0];
int err;
int i;
 
-   for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) {
-   struct mlx5e_dma_info *dma_info = >umr.dma_info[i];
-
+   for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++, dma_info++) {
err = mlx5e_page_alloc_mapped(rq, dma_info);
if (unlikely(err))
goto err_unmap;
@@ -395,14 +392,12 @@ static int mlx5e_alloc_rx_umr_mpwqe(struct mlx5e_rq *rq,
 
memset(wi->skbs_frags, 0, sizeof(*wi->skbs_frags) * 
MLX5_MPWRQ_PAGES_PER_WQE);
wi->consumed_strides = 0;
-   wqe->data.addr = cpu_to_be64(dma_offset);
 
return 0;
 
 err_unmap:
while (--i >= 0) {
-   struct mlx5e_dma_info *dma_info = >umr.dma_info[i];
-
+   dma_info--;
page_ref_sub(dma_info->page, pg_strides);
mlx5e_page_release(rq, dma_info, true);
}
@@ -413,11 +408,10 @@ static int mlx5e_alloc_rx_umr_mpwqe(struct mlx5e_rq *rq,
 void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi)
 {
int pg_strides = mlx5e_mpwqe_strides_per_page(rq);
+   struct mlx5e_dma_info *dma_info = >umr.dma_info[0];
int i;
 
-   for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) {
-   struct mlx5e_dma_info *dma_info = >umr.dma_info[i];
-
+   for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++, dma_info++) {
page_ref_sub(dma_info->page, pg_strides - wi->skbs_frags[i]);
mlx5e_page_release(rq, dma_info, true);
}
@@ -447,7 +441,7 @@ int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct 
mlx5e_rx_wqe *wqe, u16 ix)
 {
int err;
 
-   err = mlx5e_alloc_rx_umr_mpwqe(rq, wqe, ix);
+   err = mlx5e_alloc_rx_umr_mpwqe(rq, ix);
if (unlikely(err))
return err;
set_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, >state);
-- 
2.13.0



[net-next 04/17] net/mlx5e: Use memset to init skbs_frags array to zeros

2017-09-02 Thread Saeed Mahameed
From: Tariq Toukan 

In RX data-path, use memset() instead of loop assignment
to init the whole skbs_frags array.

Signed-off-by: Tariq Toukan 
Signed-off-by: Saeed Mahameed 
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index aa5cc1590859..db372dcecbe0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -391,9 +391,9 @@ static int mlx5e_alloc_rx_umr_mpwqe(struct mlx5e_rq *rq,
goto err_unmap;
wi->umr.mtt[i] = cpu_to_be64(dma_info->addr | MLX5_EN_WR);
page_ref_add(dma_info->page, pg_strides);
-   wi->skbs_frags[i] = 0;
}
 
+   memset(wi->skbs_frags, 0, sizeof(*wi->skbs_frags) * 
MLX5_MPWRQ_PAGES_PER_WQE);
wi->consumed_strides = 0;
wqe->data.addr = cpu_to_be64(dma_offset);
 
-- 
2.13.0



[pull request][net-next 00/17] Mellanox, mlx5 updates 2017-09-03

2017-09-02 Thread Saeed Mahameed
Hi Dave,

This series from Tariq includes micro data path optimization for mlx5e
netdevice driver.

Sorry about the late submission but most of the patches are really
small and trivial.

For more details please see tag log message below.
Please pull and let me know if there's any problem.

Thanks,
Saeed.

---

The following changes since commit 32d9b70a053a835b4dfb33158fc03795ea103e44:

  Merge branch 'hv_netvsc-channel-settings-cleanups-and-fixes' (2017-09-01 
20:39:12 -0700)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux.git 
tags/mlx5-updates-2017-09-03

for you to fetch changes up to d4b6c48800dda97f5a0824305d7c8175a127d414:

  net/mlx5e: Distribute RSS table among all RX rings (2017-09-03 06:34:09 +0300)


mlx5-updates-2017-09-03

This series from Tariq includes micro data path optimization for mlx5e
netdevice driver.

Mainly Tariq introduces the following changes to NAPI and RX handling
path of the driver:
 - RX ring structure reorganizing
 - Trivial code refactoring and optimization
 - NAPI busy-poll for when fast UMR is in progress
 - Non-atomic state operations in NAPI context
 - Remove unnecessary fields from fast path structures
 - page-cache micro optimization
 - Rely on NAPI to avoid missing an IRQ for RX/TX shared NAPI contexts
 - Stop NAPI when irq changes affinity
 - Distribute RSS table among all RX rings

Thanks,
Saeed.


Tariq Toukan (17):
  net/mlx5e: Reorganize struct mlx5e_rq
  net/mlx5e: Replace multiplication by stride size with a shift
  net/mlx5e: Remove unnecessary wqe_sz field from RQ buffer
  net/mlx5e: Use memset to init skbs_frags array to zeros
  net/mlx5e: Small enhancements for RX MPWQE allocation and free
  net/mlx5e: NAPI busy-poll when UMR post is in progress
  net/mlx5e: Early-return on empty completion queues
  net/mlx5e: Refactor data-path lro header function
  net/mlx5e: Non-atomic indicator for ring enabled state
  net/mlx5e: Non-atomic RQ state indicator for UMR WQE in progress
  net/mlx5e: Type-specific optimizations for RX post WQEs function
  net/mlx5e: Remove unnecessary fields in ICO SQ
  net/mlx5e: Don't recycle page if moved to far NUMA
  net/mlx5e: Slightly increase RX page-cache size
  net/mlx5e: Use kernel's mechanism to avoid missing NAPIs
  net/mlx5e: Stop NAPI when irq balancer changes affinity
  net/mlx5e: Distribute RSS table among all RX rings

 drivers/net/ethernet/mellanox/mlx5/core/en.h   |  49 ++---
 .../net/ethernet/mellanox/mlx5/core/en_ethtool.c   |   3 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |  55 +++--
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c| 225 +
 drivers/net/ethernet/mellanox/mlx5/core/en_stats.h |   4 +
 drivers/net/ethernet/mellanox/mlx5/core/en_tx.c|  18 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c  |  88 ++--
 include/linux/mlx5/device.h|   2 +-
 8 files changed, 216 insertions(+), 228 deletions(-)


[net-next 10/17] net/mlx5e: Non-atomic RQ state indicator for UMR WQE in progress

2017-09-02 Thread Saeed Mahameed
From: Tariq Toukan 

The indication for a UMR WQE in progress is needed only within
the NAPI context, and hence no races possible and no need for
the use of atomic operations.
The only place the flag is read outside of NAPI context is
in closure flow, after RQ is disabled flag is no more accessed
in NAPI.
Use a boolean instead of a bit in ring state, so that its
non-atomic set operations do not race with the atomic sets of
the other bits.

Signed-off-by: Tariq Toukan 
Signed-off-by: Saeed Mahameed 
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c |  3 ++-
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c   | 10 +-
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 0c4f1f30085a..bce2080eb86a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -291,7 +291,6 @@ struct mlx5e_tstamp {
 
 enum {
MLX5E_RQ_STATE_ENABLED,
-   MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS,
MLX5E_RQ_STATE_AM,
 };
 
@@ -539,6 +538,7 @@ struct mlx5e_rq {
void  *mtt_no_align;
u16num_strides;
u8 log_stride_sz;
+   bool   umr_in_progress;
} mpwqe;
};
struct {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 411fb68794bc..2767a3ee81bc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -886,7 +886,8 @@ static void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
u16 wqe_ix;
 
/* UMR WQE (if in progress) is always at wq->head */
-   if (test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, >state))
+   if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ &&
+   rq->mpwqe.umr_in_progress)
mlx5e_free_rx_mpwqe(rq, >mpwqe.info[wq->head]);
 
while (!mlx5_wq_ll_is_empty(wq)) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index a5522c3992a2..11dba9940029 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -422,7 +422,7 @@ void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq)
struct mlx5_wq_ll *wq = >wq;
struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(wq, wq->head);
 
-   clear_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, >state);
+   rq->mpwqe.umr_in_progress = false;
 
if (unlikely(!MLX5E_TEST_BIT(rq->state, MLX5E_RQ_STATE_ENABLED))) {
mlx5e_free_rx_mpwqe(rq, >mpwqe.info[wq->head]);
@@ -441,10 +441,13 @@ int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct 
mlx5e_rx_wqe *wqe, u16 ix)
 {
int err;
 
+   if (rq->mpwqe.umr_in_progress)
+   return -EBUSY;
+
err = mlx5e_alloc_rx_umr_mpwqe(rq, ix);
if (unlikely(err))
return err;
-   set_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, >state);
+   rq->mpwqe.umr_in_progress = true;
mlx5e_post_umr_wqe(rq, ix);
return -EBUSY;
 }
@@ -467,9 +470,6 @@ bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq)
if (mlx5_wq_ll_is_full(wq))
return false;
 
-   if (test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, >state))
-   return true;
-
do {
struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(wq, wq->head);
 
-- 
2.13.0



[net-next 14/17] net/mlx5e: Slightly increase RX page-cache size

2017-09-02 Thread Saeed Mahameed
From: Tariq Toukan 

In XDP_TX flow, we now get back quicker to each page in page-cache,
and on some occasions refcount does not get back to 1 on time, causing
some costly page allocations.
Slightly increase the size of RX page-cache to significantly decrease
the chances for this to happen.

Signed-off-by: Tariq Toukan 
Signed-off-by: Saeed Mahameed 
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index e55d9439bc12..9c0c07ffe557 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -501,7 +501,7 @@ struct mlx5e_rx_am { /* Adaptive Moderation */
  */
 #define MLX5E_CACHE_UNIT   (MLX5_MPWRQ_PAGES_PER_WQE > NAPI_POLL_WEIGHT ? \
 MLX5_MPWRQ_PAGES_PER_WQE : NAPI_POLL_WEIGHT)
-#define MLX5E_CACHE_SIZE   (2 * roundup_pow_of_two(MLX5E_CACHE_UNIT))
+#define MLX5E_CACHE_SIZE   (4 * roundup_pow_of_two(MLX5E_CACHE_UNIT))
 struct mlx5e_page_cache {
u32 head;
u32 tail;
-- 
2.13.0



[net-next 15/17] net/mlx5e: Use kernel's mechanism to avoid missing NAPIs

2017-09-02 Thread Saeed Mahameed
From: Tariq Toukan 

We used a channel state bit MLX5E_CHANNEL_NAPI_SCHED to make
sure no NAPI is missed when a channel's napi_schedule() is called
for completion events of the different channel's resources/rings
while NAPI is currently running.
Now, as similar mechanism is implemented in kernel,
("39e6c8208d7b net: solve a NAPI race"),
we obsolete our own implementation and rely on the return value
of napi_complete_done().

This patch removes a redundant overhead of atomic bit operations.

Signed-off-by: Tariq Toukan 
Cc: Eric Dumazet 
Signed-off-by: Saeed Mahameed 
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |  5 -
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c |  1 -
 drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c | 10 +-
 3 files changed, 1 insertion(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 9c0c07ffe557..7c046ae8b18e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -570,10 +570,6 @@ struct mlx5e_rq {
struct mlx5_core_mkey  umr_mkey;
 } cacheline_aligned_in_smp;
 
-enum channel_flags {
-   MLX5E_CHANNEL_NAPI_SCHED = 1,
-};
-
 struct mlx5e_channel {
/* data path */
struct mlx5e_rqrq;
@@ -585,7 +581,6 @@ struct mlx5e_channel {
struct net_device *netdev;
__be32 mkey_be;
u8 num_tc;
-   unsigned long  flags;
 
/* control */
struct mlx5e_priv *priv;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 2da2ea222aaa..b2f689ec0d72 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3697,7 +3697,6 @@ static int mlx5e_xdp_set(struct net_device *netdev, 
struct bpf_prog *prog)
 
set_bit(MLX5E_RQ_STATE_ENABLED, >rq.state);
/* napi_schedule in case we have missed anything */
-   set_bit(MLX5E_CHANNEL_NAPI_SCHED, >flags);
napi_schedule(>napi);
 
if (old_prog)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
index 439ba1f2ffbc..92d9aa1cddd6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
@@ -40,8 +40,6 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
int work_done;
int i;
 
-   clear_bit(MLX5E_CHANNEL_NAPI_SCHED, >flags);
-
for (i = 0; i < c->num_tc; i++)
busy |= mlx5e_poll_tx_cq(>sq[i].cq, budget);
 
@@ -56,13 +54,8 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
if (busy)
return budget;
 
-   napi_complete_done(napi, work_done);
-
-   /* avoid losing completion event during/after polling cqs */
-   if (test_bit(MLX5E_CHANNEL_NAPI_SCHED, >flags)) {
-   napi_schedule(napi);
+   if (unlikely(!napi_complete_done(napi, work_done)))
return work_done;
-   }
 
for (i = 0; i < c->num_tc; i++)
mlx5e_cq_arm(>sq[i].cq);
@@ -81,7 +74,6 @@ void mlx5e_completion_event(struct mlx5_core_cq *mcq)
struct mlx5e_cq *cq = container_of(mcq, struct mlx5e_cq, mcq);
 
cq->event_ctr++;
-   set_bit(MLX5E_CHANNEL_NAPI_SCHED, >channel->flags);
napi_schedule(cq->napi);
 }
 
-- 
2.13.0



[net-next 16/17] net/mlx5e: Stop NAPI when irq balancer changes affinity

2017-09-02 Thread Saeed Mahameed
From: Tariq Toukan 

NAPI context keeps rescheduling on same CPU as long as it's busy.
This doesn't give the oppurtunity for changes in irq affinities
to take effect.
Fix that by calling napi_complete_done() upon a change in affinity.
This would stop the NAPI and reschedule it on the new CPU.

Signed-off-by: Tariq Toukan 
Signed-off-by: Saeed Mahameed 
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |  3 +++
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c |  5 +
 drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c | 20 ++--
 3 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 7c046ae8b18e..1388a1e2f835 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -582,6 +582,9 @@ struct mlx5e_channel {
__be32 mkey_be;
u8 num_tc;
 
+   /* data path - accessed per napi poll */
+   struct irq_desc *irq_desc;
+
/* control */
struct mlx5e_priv *priv;
struct mlx5_core_dev  *mdev;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index b2f689ec0d72..20f34131d4e5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1761,7 +1761,9 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, 
int ix,
struct net_device *netdev = priv->netdev;
int cpu = mlx5e_get_cpu(priv, ix);
struct mlx5e_channel *c;
+   unsigned int irq;
int err;
+   int eqn;
 
c = kzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu));
if (!c)
@@ -1778,6 +1780,9 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, 
int ix,
c->num_tc   = params->num_tc;
c->xdp  = !!params->xdp_prog;
 
+   mlx5_vector2eqn(priv->mdev, ix, , );
+   c->irq_desc = irq_to_desc(irq);
+
netif_napi_add(netdev, >napi, mlx5e_napi_poll, 64);
 
err = mlx5e_open_cq(c, icocq_moder, >icosq_cq, >icosq.cq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
index 92d9aa1cddd6..e906b754415c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
@@ -30,8 +30,20 @@
  * SOFTWARE.
  */
 
+#include 
 #include "en.h"
 
+static inline bool mlx5e_channel_no_affinity_change(struct mlx5e_channel *c)
+{
+   int current_cpu = smp_processor_id();
+   const struct cpumask *aff;
+   struct irq_data *idata;
+
+   idata = irq_desc_get_irq_data(c->irq_desc);
+   aff = irq_data_get_affinity_mask(idata);
+   return cpumask_test_cpu(current_cpu, aff);
+}
+
 int mlx5e_napi_poll(struct napi_struct *napi, int budget)
 {
struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel,
@@ -51,8 +63,12 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
 
busy |= c->rq.post_wqes(>rq);
 
-   if (busy)
-   return budget;
+   if (busy) {
+   if (likely(mlx5e_channel_no_affinity_change(c)))
+   return budget;
+   if (work_done == budget)
+   work_done--;
+   }
 
if (unlikely(!napi_complete_done(napi, work_done)))
return work_done;
-- 
2.13.0



[net-next 01/17] net/mlx5e: Reorganize struct mlx5e_rq

2017-09-02 Thread Saeed Mahameed
From: Tariq Toukan 

Bring fast-path fields together, and combine RX WQE mutual
exclusive fields into a union.

Page-reuse and XDP are mutually exclusive and cannot be used at
the same time.
Use a union to combine their footprints.

Signed-off-by: Tariq Toukan 
Signed-off-by: Saeed Mahameed 
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  | 16 +---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 10 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c   | 13 ++---
 3 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 6c2abeccfa5a..d964db286c95 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -527,20 +527,24 @@ struct mlx5e_rq {
struct {
struct mlx5e_wqe_frag_info *frag_info;
u32 frag_sz;/* max possible skb frag_sz */
-   bool page_reuse;
-   bool xdp_xmit;
+   union {
+   bool page_reuse;
+   bool xdp_xmit;
+   };
} wqe;
struct {
struct mlx5e_mpw_info *info;
void  *mtt_no_align;
+   u16stride_sz;
+   u16num_strides;
} mpwqe;
};
struct {
-   u8 page_order;
u32wqe_sz;/* wqe data buffer size */
+   u16headroom;
+   u8 page_order;
u8 map_dir;   /* dma map direction */
} buff;
-   __be32 mkey_be;
 
struct device *pdev;
struct net_device *netdev;
@@ -555,7 +559,6 @@ struct mlx5e_rq {
 
unsigned long  state;
intix;
-   u16rx_headroom;
 
struct mlx5e_rx_am am; /* Adaptive Moderation */
 
@@ -565,9 +568,8 @@ struct mlx5e_rq {
 
/* control */
struct mlx5_wq_ctrlwq_ctrl;
+   __be32 mkey_be;
u8 wq_type;
-   u32mpwqe_stride_sz;
-   u32mpwqe_num_strides;
u32rqn;
struct mlx5e_channel  *channel;
struct mlx5_core_dev  *mdev;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 85841e24c65b..94761d0e1b33 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -593,7 +593,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
}
 
rq->buff.map_dir = rq->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
-   rq->rx_headroom = params->rq_headroom;
+   rq->buff.headroom = params->rq_headroom;
 
switch (rq->wq_type) {
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
@@ -615,10 +615,10 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
goto err_rq_wq_destroy;
}
 
-   rq->mpwqe_stride_sz = BIT(params->mpwqe_log_stride_sz);
-   rq->mpwqe_num_strides = BIT(params->mpwqe_log_num_strides);
+   rq->mpwqe.stride_sz = BIT(params->mpwqe_log_stride_sz);
+   rq->mpwqe.num_strides = BIT(params->mpwqe_log_num_strides);
 
-   rq->buff.wqe_sz = rq->mpwqe_stride_sz * rq->mpwqe_num_strides;
+   rq->buff.wqe_sz = rq->mpwqe.stride_sz * rq->mpwqe.num_strides;
byte_count = rq->buff.wqe_sz;
 
err = mlx5e_create_rq_umr_mkey(mdev, rq);
@@ -665,7 +665,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
byte_count = rq->buff.wqe_sz;
 
/* calc the required page order */
-   rq->wqe.frag_sz = MLX5_SKB_FRAG_SZ(rq->rx_headroom + 
byte_count);
+   rq->wqe.frag_sz = MLX5_SKB_FRAG_SZ(rq->buff.headroom + 
byte_count);
npages = DIV_ROUND_UP(rq->wqe.frag_sz, PAGE_SIZE);
rq->buff.page_order = order_base_2(npages);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index be8197a75a63..1b50f1e7e48a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -263,8 +263,7 @@ int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct 
mlx5e_rx_wqe *wqe, u16 ix)
wi->offset = 0;
}
 
-   wqe->data.addr = cpu_to_be64(wi->di.addr + wi->offset +
-rq->rx_headroom);
+   wqe->data.addr = 

[net-next 17/17] net/mlx5e: Distribute RSS table among all RX rings

2017-09-02 Thread Saeed Mahameed
From: Tariq Toukan 

In default, uniformly distribute the RSS indirection table entries
among all RX rings, rather than restricting this only to the rings
on the close NUMA node. irqbalancer would anyway dynamically override
the default affinities set to the RX rings.
This gives better multi-stream performance and CPU util.

Signed-off-by: Tariq Toukan 
Signed-off-by: Saeed Mahameed 
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h |  3 +--
 drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c |  3 +--
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c| 15 ++-
 3 files changed, 4 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 1388a1e2f835..8b7d83bcc11a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -925,8 +925,7 @@ void mlx5e_switch_priv_channels(struct mlx5e_priv *priv,
 void mlx5e_activate_priv_channels(struct mlx5e_priv *priv);
 void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv);
 
-void mlx5e_build_default_indir_rqt(struct mlx5_core_dev *mdev,
-  u32 *indirection_rqt, int len,
+void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len,
   int num_channels);
 int mlx5e_get_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 6127e0d2f310..d12e9fc0d76b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -663,8 +663,7 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv,
new_channels.params = priv->channels.params;
new_channels.params.num_channels = count;
if (!netif_is_rxfh_configured(priv->netdev))
-   mlx5e_build_default_indir_rqt(priv->mdev,
- 
new_channels.params.indirection_rqt,
+   
mlx5e_build_default_indir_rqt(new_channels.params.indirection_rqt,
  MLX5E_INDIR_RQT_SIZE, count);
 
if (!test_bit(MLX5E_STATE_OPENED, >state)) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 20f34131d4e5..77068609a153 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3833,22 +3833,11 @@ u16 mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev)
   2 /*sizeof(mlx5e_tx_wqe.inline_hdr_start)*/;
 }
 
-void mlx5e_build_default_indir_rqt(struct mlx5_core_dev *mdev,
-  u32 *indirection_rqt, int len,
+void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len,
   int num_channels)
 {
-   int node = mdev->priv.numa_node;
-   int node_num_of_cores;
int i;
 
-   if (node == -1)
-   node = first_online_node;
-
-   node_num_of_cores = cpumask_weight(cpumask_of_node(node));
-
-   if (node_num_of_cores)
-   num_channels = min_t(int, num_channels, node_num_of_cores);
-
for (i = 0; i < len; i++)
indirection_rqt[i] = i % num_channels;
 }
@@ -3987,7 +3976,7 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
/* RSS */
params->rss_hfunc = ETH_RSS_HASH_XOR;
netdev_rss_key_fill(params->toeplitz_hash_key, 
sizeof(params->toeplitz_hash_key));
-   mlx5e_build_default_indir_rqt(mdev, params->indirection_rqt,
+   mlx5e_build_default_indir_rqt(params->indirection_rqt,
  MLX5E_INDIR_RQT_SIZE, max_channels);
 }
 
-- 
2.13.0



[net-next 09/17] net/mlx5e: Non-atomic indicator for ring enabled state

2017-09-02 Thread Saeed Mahameed
From: Tariq Toukan 

Rings enabled state change occurs in control path only, and is always
followed by a napi_sychronize(), so that following NAPIs read the
new value. This read does not need to be atomic.

The RQ auto-moderation bit is not set/cleared in data-path.
No need for atomic read, a regular read operation is sufficient.
In RQ creation time as well, there's no multiple threads trying
to access it yet, hence a regular read can be used.

Signed-off-by: Tariq Toukan 
Signed-off-by: Saeed Mahameed 
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  | 2 ++
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c   | 8 
 drivers/net/ethernet/mellanox/mlx5/core/en_tx.c   | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c | 4 ++--
 5 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index ce8b4f648757..0c4f1f30085a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -295,6 +295,8 @@ enum {
MLX5E_RQ_STATE_AM,
 };
 
+#define MLX5E_TEST_BIT(state, nr) (state & BIT(nr))
+
 struct mlx5e_cq {
/* data path - accessed per cqe */
struct mlx5_cqwq   wq;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 5aa4681f7c3c..411fb68794bc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -929,7 +929,7 @@ static int mlx5e_open_rq(struct mlx5e_channel *c,
goto err_destroy_rq;
 
if (params->rx_am_enabled)
-   set_bit(MLX5E_RQ_STATE_AM, >rq.state);
+   c->rq.state |= BIT(MLX5E_RQ_STATE_AM);
 
return 0;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 9d9c13ae6b83..a5522c3992a2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -424,7 +424,7 @@ void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq)
 
clear_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, >state);
 
-   if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, >state))) {
+   if (unlikely(!MLX5E_TEST_BIT(rq->state, MLX5E_RQ_STATE_ENABLED))) {
mlx5e_free_rx_mpwqe(rq, >mpwqe.info[wq->head]);
return;
}
@@ -461,7 +461,7 @@ bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq)
struct mlx5_wq_ll *wq = >wq;
int err;
 
-   if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, >state)))
+   if (unlikely(!MLX5E_TEST_BIT(rq->state, MLX5E_RQ_STATE_ENABLED)))
return false;
 
if (mlx5_wq_ll_is_full(wq))
@@ -983,7 +983,7 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
struct mlx5_cqe64 *cqe;
int work_done = 0;
 
-   if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, >state)))
+   if (unlikely(!MLX5E_TEST_BIT(rq->state, MLX5E_RQ_STATE_ENABLED)))
return 0;
 
if (cq->decmprs_left)
@@ -1031,7 +1031,7 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
 
sq = container_of(cq, struct mlx5e_xdpsq, cq);
 
-   if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, >state)))
+   if (unlikely(!MLX5E_TEST_BIT(sq->state, MLX5E_SQ_STATE_ENABLED)))
return false;
 
cqe = mlx5_cqwq_get_cqe(>wq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 80d2121643ee..fee43e40fa16 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -403,7 +403,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
 
sq = container_of(cq, struct mlx5e_txqsq, cq);
 
-   if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, >state)))
+   if (unlikely(!MLX5E_TEST_BIT(sq->state, MLX5E_SQ_STATE_ENABLED)))
return false;
 
cqe = mlx5_cqwq_get_cqe(>wq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
index 92db28a9ed43..7311b937e434 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
@@ -69,7 +69,7 @@ static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq)
struct mlx5_cqe64 *cqe;
u16 sqcc;
 
-   if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, >state)))
+   if (unlikely(!MLX5E_TEST_BIT(sq->state, MLX5E_SQ_STATE_ENABLED)))
return;
 
cqe = mlx5_cqwq_get_cqe(>wq);
@@ -129,7 +129,7 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
for (i = 0; i < c->num_tc; i++)
mlx5e_cq_arm(>sq[i].cq);
 
-   if (test_bit(MLX5E_RQ_STATE_AM, >rq.state))
+   if 

[net-next 13/17] net/mlx5e: Don't recycle page if moved to far NUMA

2017-09-02 Thread Saeed Mahameed
From: Tariq Toukan 

Avoid recycling an RX page if it moved to another NUMA node.
Add an ethtool counter to count such events.

Signed-off-by: Tariq Toukan 
Signed-off-by: Saeed Mahameed 
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  | 1 +
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c| 6 --
 drivers/net/ethernet/mellanox/mlx5/core/en_stats.h | 4 
 3 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index a4c9a0a2c408..2da2ea222aaa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -208,6 +208,7 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv 
*priv)
s->rx_cache_full  += rq_stats->cache_full;
s->rx_cache_empty += rq_stats->cache_empty;
s->rx_cache_busy  += rq_stats->cache_busy;
+   s->rx_cache_waive += rq_stats->cache_waive;
 
for (j = 0; j < priv->channels.params.num_tc; j++) {
sq_stats = >sq[j].stats;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 88a8749c67d6..f1dd638384d3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -163,7 +163,7 @@ static inline u32 mlx5e_decompress_cqes_start(struct 
mlx5e_rq *rq,
 
 static inline bool mlx5e_page_is_reserved(struct page *page)
 {
-   return page_is_pfmemalloc(page) || page_to_nid(page) != numa_node_id();
+   return page_is_pfmemalloc(page) || page_to_nid(page) != numa_mem_id();
 }
 
 static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq,
@@ -177,8 +177,10 @@ static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq,
return false;
}
 
-   if (unlikely(page_is_pfmemalloc(dma_info->page)))
+   if (unlikely(mlx5e_page_is_reserved(dma_info->page))) {
+   rq->stats.cache_waive++;
return false;
+   }
 
cache->page_cache[cache->tail] = *dma_info;
cache->tail = tail_next;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index 6761796e803c..6d199ffb1c0b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -84,6 +84,7 @@ struct mlx5e_sw_stats {
u64 rx_cache_full;
u64 rx_cache_empty;
u64 rx_cache_busy;
+   u64 rx_cache_waive;
 
/* Special handling counters */
u64 link_down_events_phy;
@@ -123,6 +124,7 @@ static const struct counter_desc sw_stats_desc[] = {
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_full) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_empty) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_busy) },
+   { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_waive) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, link_down_events_phy) },
 };
 
@@ -354,6 +356,7 @@ struct mlx5e_rq_stats {
u64 cache_full;
u64 cache_empty;
u64 cache_busy;
+   u64 cache_waive;
 };
 
 static const struct counter_desc rq_stats_desc[] = {
@@ -377,6 +380,7 @@ static const struct counter_desc rq_stats_desc[] = {
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_full) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_empty) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_busy) },
+   { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_waive) },
 };
 
 struct mlx5e_sq_stats {
-- 
2.13.0



[net-next 03/17] net/mlx5e: Remove unnecessary wqe_sz field from RQ buffer

2017-09-02 Thread Saeed Mahameed
From: Tariq Toukan 

Field is used only locally within the RQ create function.
The use of a local variable is sufficient.

Signed-off-by: Tariq Toukan 
Signed-off-by: Saeed Mahameed 
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  | 1 -
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 8 +++-
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 44bd8df905ca..ce8b4f648757 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -540,7 +540,6 @@ struct mlx5e_rq {
} mpwqe;
};
struct {
-   u32wqe_sz;/* wqe data buffer size */
u16headroom;
u8 page_order;
u8 map_dir;   /* dma map direction */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 7a25d952c922..591e0dca9671 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -618,8 +618,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
rq->mpwqe.log_stride_sz = params->mpwqe_log_stride_sz;
rq->mpwqe.num_strides = BIT(params->mpwqe_log_num_strides);
 
-   rq->buff.wqe_sz = rq->mpwqe.num_strides << 
rq->mpwqe.log_stride_sz;
-   byte_count = rq->buff.wqe_sz;
+   byte_count = rq->mpwqe.num_strides << rq->mpwqe.log_stride_sz;
 
err = mlx5e_create_rq_umr_mkey(mdev, rq);
if (err)
@@ -654,15 +653,14 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
goto err_rq_wq_destroy;
}
 
-   rq->buff.wqe_sz = params->lro_en  ?
+   byte_count = params->lro_en  ?
params->lro_wqe_sz :
MLX5E_SW2HW_MTU(c->priv, c->netdev->mtu);
 #ifdef CONFIG_MLX5_EN_IPSEC
if (MLX5_IPSEC_DEV(mdev))
-   rq->buff.wqe_sz += MLX5E_METADATA_ETHER_LEN;
+   byte_count += MLX5E_METADATA_ETHER_LEN;
 #endif
rq->wqe.page_reuse = !params->xdp_prog && !params->lro_en;
-   byte_count = rq->buff.wqe_sz;
 
/* calc the required page order */
rq->wqe.frag_sz = MLX5_SKB_FRAG_SZ(rq->buff.headroom + 
byte_count);
-- 
2.13.0



Re: [pull request][net-next 0/3] Mellanox, mlx5 GRE tunnel offloads

2017-09-02 Thread Saeed Mahameed
On Sat, Sep 2, 2017 at 6:37 PM, Tom Herbert  wrote:
> On Sat, Sep 2, 2017 at 6:32 PM, Hannes Frederic Sowa
>  wrote:
>> Hi Saeed,
>>
>> On Sun, Sep 3, 2017, at 01:01, Saeed Mahameed wrote:
>>> On Thu, Aug 31, 2017 at 6:51 AM, Hannes Frederic Sowa
>>>  wrote:
>>> > Saeed Mahameed  writes:
>>> >
>>> >> The first patch from Gal and Ariel provides the mlx5 driver support for
>>> >> ConnectX capability to perform IP version identification and matching in
>>> >> order to distinguish between IPv4 and IPv6 without the need to specify 
>>> >> the
>>> >> encapsulation type, thus perform RSS in MPLS automatically without
>>> >> specifying MPLS ethertyoe. This patch will also serve for inner GRE 
>>> >> IPv4/6
>>> >> classification for inner GRE RSS.
>>> >
>>> > I don't think this is legal at all or did I misunderstood something?
>>> >
>>> > 
>>>
>>> It seems you misunderstood the cover letter.  The HW will still
>>> identify MPLS (IPv4/IPv6) packets using a new bit we specify in the HW
>>> steering rules rather than adding new specific rules with  {MPLS
>>> ethertype} X {IPv4,IPv6} to classify MPLS IPv{4,6} traffic, Same
>>> functionality a better and general way to approach it.
>>> Bottom line the hardware is capable of processing MPLS headers and
>>> perform RSS on the inner packet (IPv4/6) without the need of the
>>> driver to provide precise steering MPLS rules.
>>
>> Sorry, I think I am still confused.
>>
>> I just want to make sure that you don't use the first nibble after the
>> mpls bottom of stack label in any way as an indicator if that is an IPv4
>> or IPv6 packet by default. It can be anything. The forward equivalence
>> class tells the stack which protocol you see.
>>
>> If you match on the first nibble behind the MPLS bottom of stack label
>> the '4' or '6' respectively could be part of a MAC address with its
>> first nibble being 4 or 6, because the particular pseudowire is EoMPLS
>> and uses no control world.
>>
>> I wanted to mention it, because with addition of e.g. VPLS this could
>> cause problems down the road and should at least be controllable? It is
>> probably better to use Entropy Labels in future.
>>
> Or just use IPv6 with flow label for RSS (or MPLS/UDP, GRE/UDP if you
> prefer) then all this protocol specific DPI for RSS just goes away ;-)

Hi Tom,

How does MPLS/UDP or GRE/UDP RSS works without protocol specific DPI ?
unlike vxlan those protocols are not over UDP and you can't just play
with the outer header udp src port, or do you ?

Can you elaborate ?

Thanks,
Saeed.

>
> Tom
>
>> Thanks,
>> Hannes


Re: [pull request][net-next 0/3] Mellanox, mlx5 GRE tunnel offloads

2017-09-02 Thread Saeed Mahameed
On Sat, Sep 2, 2017 at 6:32 PM, Hannes Frederic Sowa
 wrote:
> Hi Saeed,
>
> On Sun, Sep 3, 2017, at 01:01, Saeed Mahameed wrote:
>> On Thu, Aug 31, 2017 at 6:51 AM, Hannes Frederic Sowa
>>  wrote:
>> > Saeed Mahameed  writes:
>> >
>> >> The first patch from Gal and Ariel provides the mlx5 driver support for
>> >> ConnectX capability to perform IP version identification and matching in
>> >> order to distinguish between IPv4 and IPv6 without the need to specify the
>> >> encapsulation type, thus perform RSS in MPLS automatically without
>> >> specifying MPLS ethertyoe. This patch will also serve for inner GRE IPv4/6
>> >> classification for inner GRE RSS.
>> >
>> > I don't think this is legal at all or did I misunderstood something?
>> >
>> > 
>>
>> It seems you misunderstood the cover letter.  The HW will still
>> identify MPLS (IPv4/IPv6) packets using a new bit we specify in the HW
>> steering rules rather than adding new specific rules with  {MPLS
>> ethertype} X {IPv4,IPv6} to classify MPLS IPv{4,6} traffic, Same
>> functionality a better and general way to approach it.
>> Bottom line the hardware is capable of processing MPLS headers and
>> perform RSS on the inner packet (IPv4/6) without the need of the
>> driver to provide precise steering MPLS rules.
>
> Sorry, I think I am still confused.
>
> I just want to make sure that you don't use the first nibble after the
> mpls bottom of stack label in any way as an indicator if that is an IPv4
> or IPv6 packet by default. It can be anything. The forward equivalence
> class tells the stack which protocol you see.
>
> If you match on the first nibble behind the MPLS bottom of stack label
> the '4' or '6' respectively could be part of a MAC address with its
> first nibble being 4 or 6, because the particular pseudowire is EoMPLS
> and uses no control world.
>
> I wanted to mention it, because with addition of e.g. VPLS this could
> cause problems down the road and should at least be controllable? It is
> probably better to use Entropy Labels in future.
>

Hi Hannes,

I see your concern now, but still it has nothing to do with the
driver, the whole change is only to simplify driver code to not push
full blown matching steering rules into the HW, and simply replace it
with a one bit command.

Regarding your concern, I will need to check with the HW guys and
review the processing algorithm that identifies IP packets over MPLs,
and will get back to you.

if there is really a problem, then yes, we might need to make it
controllable ..

> Thanks,
> Hannes


Re: [PATCH net-next 2/2] net: convert (struct ubuf_info)->refcnt to refcount_t

2017-09-02 Thread Willem de Bruijn
On Sat, Sep 2, 2017 at 5:58 PM, kbuild test robot  wrote:
> Hi Eric,
>
> [auto build test WARNING on net-next/master]
>
> url:
> https://github.com/0day-ci/linux/commits/Eric-Dumazet/net-ubuf_info-refcnt-conversion/20170903-043506
> config: i386-randconfig-i1-201736 (attached as .config)
> compiler: gcc-4.8 (Debian 4.8.4-1) 4.8.4
> reproduce:
> # save the attached .config to linux build tree
> make ARCH=i386
>
> All warnings (new ones prefixed by >>):
>
>drivers//vhost/net.c: In function 'handle_tx':
>>> drivers//vhost/net.c:536:4: warning: passing argument 1 of 'atomic_set' 
>>> from incompatible pointer type [enabled by default]
>atomic_set(>refcnt, 1);
>^
>In file included from include/linux/atomic.h:4:0,
> from arch/x86/include/asm/thread_info.h:53,
> from include/linux/thread_info.h:37,
> from arch/x86/include/asm/preempt.h:6,
> from include/linux/preempt.h:80,
> from include/linux/spinlock.h:50,
> from include/linux/wait.h:8,
> from include/linux/eventfd.h:12,
> from drivers//vhost/net.c:10:
>arch/x86/include/asm/atomic.h:36:29: note: expected 'struct atomic_t *' 
> but argument is of type 'struct refcount_t *'
> static __always_inline void atomic_set(atomic_t *v, int i)
> ^

This is a false positive. This patch

  [net-next,2/2] net: convert (struct ubuf_info)->refcnt to refcount_t
  http://patchwork.ozlabs.org/patch/808402/

was superseded by

  [v2,net-next,2/2] net: convert (struct ubuf_info)->refcnt to refcount_t
  http://patchwork.ozlabs.org/patch/808477/

which has been merged into net-next as commit c1d1b437816f

That patch has the necessary change:

-  atomic_set(>refcnt, 1);
+   refcount_set(>refcnt, 1);


[net-next:master 428/478] drivers/net//ethernet/ti/netcp_core.c:1349:21: error: 'DMA_MEM_TO_DEV' undeclared

2017-09-02 Thread kbuild test robot
tree:   https://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git 
master
head:   32d9b70a053a835b4dfb33158fc03795ea103e44
commit: 0dd5759dbb1c9a862e7d90c09d6cf398c45f1100 [428/478] net: remove 
dmaengine.h inclusion from netdevice.h
config: arm-allmodconfig (attached as .config)
compiler: arm-linux-gnueabi-gcc (Debian 6.1.1-9) 6.1.1 20160705
reproduce:
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
git checkout 0dd5759dbb1c9a862e7d90c09d6cf398c45f1100
# save the attached .config to linux build tree
make.cross ARCH=arm 

All errors (new ones prefixed by >>):

   In file included from drivers/net//ethernet/ti/netcp_core.c:30:0:
   include/linux/soc/ti/knav_dma.h:129:30: error: field 'direction' has 
incomplete type
 enum dma_transfer_direction direction;
 ^
   drivers/net//ethernet/ti/netcp_core.c: In function 'netcp_txpipe_open':
>> drivers/net//ethernet/ti/netcp_core.c:1349:21: error: 'DMA_MEM_TO_DEV' 
>> undeclared (first use in this function)
 config.direction = DMA_MEM_TO_DEV;
^~
   drivers/net//ethernet/ti/netcp_core.c:1349:21: note: each undeclared 
identifier is reported only once for each function it appears in
   drivers/net//ethernet/ti/netcp_core.c: In function 
'netcp_setup_navigator_resources':
   drivers/net//ethernet/ti/netcp_core.c:1659:22: error: 'DMA_DEV_TO_MEM' 
undeclared (first use in this function)
 config.direction  = DMA_DEV_TO_MEM;
 ^~

vim +/DMA_MEM_TO_DEV +1349 drivers/net//ethernet/ti/netcp_core.c

84640e27f Karicheri, Muralidharan 2015-01-15  1340  
84640e27f Karicheri, Muralidharan 2015-01-15  1341  int 
netcp_txpipe_open(struct netcp_tx_pipe *tx_pipe)
84640e27f Karicheri, Muralidharan 2015-01-15  1342  {
84640e27f Karicheri, Muralidharan 2015-01-15  1343  struct device *dev = 
tx_pipe->netcp_device->device;
84640e27f Karicheri, Muralidharan 2015-01-15  1344  struct knav_dma_cfg 
config;
84640e27f Karicheri, Muralidharan 2015-01-15  1345  int ret = 0;
84640e27f Karicheri, Muralidharan 2015-01-15  1346  u8 name[16];
84640e27f Karicheri, Muralidharan 2015-01-15  1347  
84640e27f Karicheri, Muralidharan 2015-01-15  1348  memset(, 0, 
sizeof(config));
84640e27f Karicheri, Muralidharan 2015-01-15 @1349  config.direction = 
DMA_MEM_TO_DEV;
84640e27f Karicheri, Muralidharan 2015-01-15  1350  config.u.tx.filt_einfo 
= false;
84640e27f Karicheri, Muralidharan 2015-01-15  1351  
config.u.tx.filt_pswords = false;
84640e27f Karicheri, Muralidharan 2015-01-15  1352  config.u.tx.priority = 
DMA_PRIO_MED_L;
84640e27f Karicheri, Muralidharan 2015-01-15  1353  
84640e27f Karicheri, Muralidharan 2015-01-15  1354  tx_pipe->dma_channel = 
knav_dma_open_channel(dev,
84640e27f Karicheri, Muralidharan 2015-01-15  1355  
tx_pipe->dma_chan_name, );
5b6cb43b4 Ivan Khoronzhuk 2017-05-10  1356  if 
(IS_ERR(tx_pipe->dma_channel)) {
84640e27f Karicheri, Muralidharan 2015-01-15  1357  dev_err(dev, 
"failed opening tx chan(%s)\n",
84640e27f Karicheri, Muralidharan 2015-01-15  1358  
tx_pipe->dma_chan_name);
5b6cb43b4 Ivan Khoronzhuk 2017-05-10  1359  ret = 
PTR_ERR(tx_pipe->dma_channel);
84640e27f Karicheri, Muralidharan 2015-01-15  1360  goto err;
84640e27f Karicheri, Muralidharan 2015-01-15  1361  }
84640e27f Karicheri, Muralidharan 2015-01-15  1362  
84640e27f Karicheri, Muralidharan 2015-01-15  1363  snprintf(name, 
sizeof(name), "tx-pipe-%s", dev_name(dev));
84640e27f Karicheri, Muralidharan 2015-01-15  1364  tx_pipe->dma_queue = 
knav_queue_open(name, tx_pipe->dma_queue_id,
84640e27f Karicheri, Muralidharan 2015-01-15  1365  
 KNAV_QUEUE_SHARED);
84640e27f Karicheri, Muralidharan 2015-01-15  1366  if 
(IS_ERR(tx_pipe->dma_queue)) {
84640e27f Karicheri, Muralidharan 2015-01-15  1367  dev_err(dev, 
"Could not open DMA queue for channel \"%s\": %d\n",
84640e27f Karicheri, Muralidharan 2015-01-15  1368  name, 
ret);
84640e27f Karicheri, Muralidharan 2015-01-15  1369  ret = 
PTR_ERR(tx_pipe->dma_queue);
84640e27f Karicheri, Muralidharan 2015-01-15  1370  goto err;
84640e27f Karicheri, Muralidharan 2015-01-15  1371  }
84640e27f Karicheri, Muralidharan 2015-01-15  1372  
84640e27f Karicheri, Muralidharan 2015-01-15  1373  dev_dbg(dev, "opened tx 
pipe %s\n", name);
84640e27f Karicheri, Muralidharan 2015-01-15  1374  return 0;
84640e27f Karicheri, Muralidharan 2015-01-15  1375  
84640e27f Karicheri, Muralidharan 2015-01-15  1376  err:
84640e27f Karicheri, Muralidharan 2015-01-15  1377  if 
(!IS_ERR_OR_NULL(tx_pipe->dma_channel))
84640e27f Karicheri, Muralidharan 2015-01-15  1378  

Re: [pull request][net-next 0/3] Mellanox, mlx5 GRE tunnel offloads

2017-09-02 Thread Tom Herbert
On Sat, Sep 2, 2017 at 6:32 PM, Hannes Frederic Sowa
 wrote:
> Hi Saeed,
>
> On Sun, Sep 3, 2017, at 01:01, Saeed Mahameed wrote:
>> On Thu, Aug 31, 2017 at 6:51 AM, Hannes Frederic Sowa
>>  wrote:
>> > Saeed Mahameed  writes:
>> >
>> >> The first patch from Gal and Ariel provides the mlx5 driver support for
>> >> ConnectX capability to perform IP version identification and matching in
>> >> order to distinguish between IPv4 and IPv6 without the need to specify the
>> >> encapsulation type, thus perform RSS in MPLS automatically without
>> >> specifying MPLS ethertyoe. This patch will also serve for inner GRE IPv4/6
>> >> classification for inner GRE RSS.
>> >
>> > I don't think this is legal at all or did I misunderstood something?
>> >
>> > 
>>
>> It seems you misunderstood the cover letter.  The HW will still
>> identify MPLS (IPv4/IPv6) packets using a new bit we specify in the HW
>> steering rules rather than adding new specific rules with  {MPLS
>> ethertype} X {IPv4,IPv6} to classify MPLS IPv{4,6} traffic, Same
>> functionality a better and general way to approach it.
>> Bottom line the hardware is capable of processing MPLS headers and
>> perform RSS on the inner packet (IPv4/6) without the need of the
>> driver to provide precise steering MPLS rules.
>
> Sorry, I think I am still confused.
>
> I just want to make sure that you don't use the first nibble after the
> mpls bottom of stack label in any way as an indicator if that is an IPv4
> or IPv6 packet by default. It can be anything. The forward equivalence
> class tells the stack which protocol you see.
>
> If you match on the first nibble behind the MPLS bottom of stack label
> the '4' or '6' respectively could be part of a MAC address with its
> first nibble being 4 or 6, because the particular pseudowire is EoMPLS
> and uses no control world.
>
> I wanted to mention it, because with addition of e.g. VPLS this could
> cause problems down the road and should at least be controllable? It is
> probably better to use Entropy Labels in future.
>
Or just use IPv6 with flow label for RSS (or MPLS/UDP, GRE/UDP if you
prefer) then all this protocol specific DPI for RSS just goes away ;-)

Tom

> Thanks,
> Hannes


Re: [pull request][net-next 0/3] Mellanox, mlx5 GRE tunnel offloads

2017-09-02 Thread Hannes Frederic Sowa
Hi Saeed,

On Sun, Sep 3, 2017, at 01:01, Saeed Mahameed wrote:
> On Thu, Aug 31, 2017 at 6:51 AM, Hannes Frederic Sowa
>  wrote:
> > Saeed Mahameed  writes:
> >
> >> The first patch from Gal and Ariel provides the mlx5 driver support for
> >> ConnectX capability to perform IP version identification and matching in
> >> order to distinguish between IPv4 and IPv6 without the need to specify the
> >> encapsulation type, thus perform RSS in MPLS automatically without
> >> specifying MPLS ethertyoe. This patch will also serve for inner GRE IPv4/6
> >> classification for inner GRE RSS.
> >
> > I don't think this is legal at all or did I misunderstood something?
> >
> > 
> 
> It seems you misunderstood the cover letter.  The HW will still
> identify MPLS (IPv4/IPv6) packets using a new bit we specify in the HW
> steering rules rather than adding new specific rules with  {MPLS
> ethertype} X {IPv4,IPv6} to classify MPLS IPv{4,6} traffic, Same
> functionality a better and general way to approach it.
> Bottom line the hardware is capable of processing MPLS headers and
> perform RSS on the inner packet (IPv4/6) without the need of the
> driver to provide precise steering MPLS rules.

Sorry, I think I am still confused.

I just want to make sure that you don't use the first nibble after the
mpls bottom of stack label in any way as an indicator if that is an IPv4
or IPv6 packet by default. It can be anything. The forward equivalence
class tells the stack which protocol you see.

If you match on the first nibble behind the MPLS bottom of stack label
the '4' or '6' respectively could be part of a MAC address with its
first nibble being 4 or 6, because the particular pseudowire is EoMPLS
and uses no control world.

I wanted to mention it, because with addition of e.g. VPLS this could
cause problems down the road and should at least be controllable? It is
probably better to use Entropy Labels in future.

Thanks,
Hannes


[PATCH net-next 2/6] nfp: flower: base lifetime of representors on existence of lower vNIC

2017-09-02 Thread Jakub Kicinski
Create representors after lower vNIC is registered and destroy
them before it is destroyed.  Move the code out of start/stop
callbacks directly into vnic_init/clean callbacks.  Make sure
SR-IOV callbacks don't try to create representors when lower
device does not exist.

Signed-off-by: Jakub Kicinski 
Reviewed-by: Simon Horman 
---
 drivers/net/ethernet/netronome/nfp/flower/main.c | 66 +++-
 1 file changed, 43 insertions(+), 23 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.c 
b/drivers/net/ethernet/netronome/nfp/flower/main.c
index db59858c0f19..91fe03617106 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.c
@@ -127,6 +127,11 @@ nfp_flower_repr_netdev_stop(struct nfp_app *app, struct 
nfp_repr *repr)
 
 static void nfp_flower_sriov_disable(struct nfp_app *app)
 {
+   struct nfp_flower_priv *priv = app->priv;
+
+   if (!priv->nn)
+   return;
+
nfp_reprs_clean_and_free_by_type(app, NFP_REPR_TYPE_VF);
 }
 
@@ -203,18 +208,16 @@ nfp_flower_spawn_vnic_reprs(struct nfp_app *app,
 
 static int nfp_flower_sriov_enable(struct nfp_app *app, int num_vfs)
 {
+   struct nfp_flower_priv *priv = app->priv;
+
+   if (!priv->nn)
+   return 0;
+
return nfp_flower_spawn_vnic_reprs(app,
   NFP_FLOWER_CMSG_PORT_VNIC_TYPE_VF,
   NFP_REPR_TYPE_VF, num_vfs);
 }
 
-static void nfp_flower_stop(struct nfp_app *app)
-{
-   nfp_reprs_clean_and_free_by_type(app, NFP_REPR_TYPE_PF);
-   nfp_reprs_clean_and_free_by_type(app, NFP_REPR_TYPE_PHYS_PORT);
-
-}
-
 static int
 nfp_flower_spawn_phy_reprs(struct nfp_app *app, struct nfp_flower_priv *priv)
 {
@@ -300,19 +303,6 @@ nfp_flower_spawn_phy_reprs(struct nfp_app *app, struct 
nfp_flower_priv *priv)
return err;
 }
 
-static int nfp_flower_start(struct nfp_app *app)
-{
-   int err;
-
-   err = nfp_flower_spawn_phy_reprs(app, app->priv);
-   if (err)
-   return err;
-
-   return nfp_flower_spawn_vnic_reprs(app,
-  NFP_FLOWER_CMSG_PORT_VNIC_TYPE_PF,
-  NFP_REPR_TYPE_PF, 1);
-}
-
 static int nfp_flower_vnic_alloc(struct nfp_app *app, struct nfp_net *nn,
 unsigned int id)
 {
@@ -335,16 +325,49 @@ static void nfp_flower_vnic_clean(struct nfp_app *app, 
struct nfp_net *nn)
 {
struct nfp_flower_priv *priv = app->priv;
 
+   if (app->pf->num_vfs)
+   nfp_reprs_clean_and_free_by_type(app, NFP_REPR_TYPE_VF);
+   nfp_reprs_clean_and_free_by_type(app, NFP_REPR_TYPE_PF);
+   nfp_reprs_clean_and_free_by_type(app, NFP_REPR_TYPE_PHYS_PORT);
+
priv->nn = NULL;
 }
 
 static int nfp_flower_vnic_init(struct nfp_app *app, struct nfp_net *nn)
 {
struct nfp_flower_priv *priv = app->priv;
+   int err;
 
priv->nn = nn;
 
+   err = nfp_flower_spawn_phy_reprs(app, app->priv);
+   if (err)
+   goto err_clear_nn;
+
+   err = nfp_flower_spawn_vnic_reprs(app,
+ NFP_FLOWER_CMSG_PORT_VNIC_TYPE_PF,
+ NFP_REPR_TYPE_PF, 1);
+   if (err)
+   goto err_destroy_reprs_phy;
+
+   if (app->pf->num_vfs) {
+   err = nfp_flower_spawn_vnic_reprs(app,
+ 
NFP_FLOWER_CMSG_PORT_VNIC_TYPE_VF,
+ NFP_REPR_TYPE_VF,
+ app->pf->num_vfs);
+   if (err)
+   goto err_destroy_reprs_pf;
+   }
+
return 0;
+
+err_destroy_reprs_pf:
+   nfp_reprs_clean_and_free_by_type(app, NFP_REPR_TYPE_PF);
+err_destroy_reprs_phy:
+   nfp_reprs_clean_and_free_by_type(app, NFP_REPR_TYPE_PHYS_PORT);
+err_clear_nn:
+   priv->nn = NULL;
+   return err;
 }
 
 static int nfp_flower_init(struct nfp_app *app)
@@ -430,9 +453,6 @@ const struct nfp_app_type app_flower = {
.repr_open  = nfp_flower_repr_netdev_open,
.repr_stop  = nfp_flower_repr_netdev_stop,
 
-   .start  = nfp_flower_start,
-   .stop   = nfp_flower_stop,
-
.ctrl_msg_rx= nfp_flower_cmsg_rx,
 
.sriov_enable   = nfp_flower_sriov_enable,
-- 
2.14.1



[PATCH net-next 4/6] nfp: be drop monitor friendly

2017-09-02 Thread Jakub Kicinski
Use dev_consume_skb_any() in place of dev_kfree_skb_any()
when control frame has been successfully processed in flower
and on the driver's main TX completion path.

Signed-off-by: Jakub Kicinski 
Reviewed-by: Simon Horman 
---
 drivers/net/ethernet/netronome/nfp/flower/cmsg.c| 3 +++
 drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c 
b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
index d82d9888d676..e014d862b9b6 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
@@ -189,8 +189,11 @@ nfp_flower_cmsg_process_one_rx(struct nfp_app *app, struct 
sk_buff *skb)
default:
nfp_flower_cmsg_warn(app, "Cannot handle invalid repr control 
type %u\n",
 type);
+   goto out;
}
 
+   dev_consume_skb_any(skb);
+   return;
 out:
dev_kfree_skb_any(skb);
 }
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c 
b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 2920889fa6d6..1c0187f0af51 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -991,7 +991,7 @@ static void nfp_net_tx_complete(struct nfp_net_tx_ring 
*tx_ring)
 
/* check for last gather fragment */
if (fidx == nr_frags - 1)
-   dev_kfree_skb_any(skb);
+   dev_consume_skb_any(skb);
 
tx_ring->txbufs[idx].dma_addr = 0;
tx_ring->txbufs[idx].skb = NULL;
-- 
2.14.1



[PATCH net-next 6/6] nfp: flower: restore RTNL locking around representor updates

2017-09-02 Thread Jakub Kicinski
When we moved to updating representors from a workqueue grabbing
the RTNL somehow got lost in the process.  Restore it, and make
sure RCU lock is not held while we are grabbing the RTNL.  RCU
protects the representor table, so since we will be under RTNL
we can drop RCU lock as soon as we find the netdev pointer.
RTNL is needed for the dev_set_mtu() call.

Fixes: 2dff19622421 ("nfp: process MTU updates from firmware flower app")
Signed-off-by: Jakub Kicinski 
Reviewed-by: Simon Horman 
---
 drivers/net/ethernet/netronome/nfp/flower/cmsg.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c 
b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
index e014d862b9b6..c3ca05d10fe1 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
@@ -141,12 +141,14 @@ nfp_flower_cmsg_portmod_rx(struct nfp_app *app, struct 
sk_buff *skb)
msg = nfp_flower_cmsg_get_data(skb);
link = msg->info & NFP_FLOWER_CMSG_PORTMOD_INFO_LINK;
 
+   rtnl_lock();
rcu_read_lock();
netdev = nfp_app_repr_get(app, be32_to_cpu(msg->portnum));
+   rcu_read_unlock();
if (!netdev) {
nfp_flower_cmsg_warn(app, "ctrl msg for unknown port 0x%08x\n",
 be32_to_cpu(msg->portnum));
-   rcu_read_unlock();
+   rtnl_unlock();
return;
}
 
@@ -161,7 +163,7 @@ nfp_flower_cmsg_portmod_rx(struct nfp_app *app, struct 
sk_buff *skb)
} else {
netif_carrier_off(netdev);
}
-   rcu_read_unlock();
+   rtnl_unlock();
 }
 
 static void
-- 
2.14.1



[PATCH net-next 5/6] nfp: build the flower offload by default

2017-09-02 Thread Jakub Kicinski
It's reasonable to assume that if user selects to build the NFP
driver all offload capabilities will be enabled by default.
Change the CONFIG_NFP_APP_FLOWER to default to enabled.

Signed-off-by: Jakub Kicinski 
Reviewed-by: Simon Horman 
---
 drivers/net/ethernet/netronome/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/netronome/Kconfig 
b/drivers/net/ethernet/netronome/Kconfig
index 0e331e2f685a..ae0c46ba7546 100644
--- a/drivers/net/ethernet/netronome/Kconfig
+++ b/drivers/net/ethernet/netronome/Kconfig
@@ -29,6 +29,7 @@ config NFP_APP_FLOWER
bool "NFP4000/NFP6000 TC Flower offload support"
depends on NFP
depends on NET_SWITCHDEV
+   default y
---help---
  Enable driver support for TC Flower offload on NFP4000 and NFP6000.
  Say Y, if you are planning to make use of TC Flower offload
-- 
2.14.1



[PATCH net-next 0/6] nfp: refactor app init, and minor flower fixes

2017-09-02 Thread Jakub Kicinski
Hi!

This series is a part 2 to what went into net as a simpler fix.
In net we simply moved when existing callbacks are invoked to
ensure flower app does not still use representors when lower 
netdev has already been destroyed.  In this series we add a
callback to notify apps when vNIC netdevs are fully initialized
and they are about to be destroyed.  This allows flower to spawn
representors at the right time, while keeping the start/stop
callbacks for what they are intended to be used - FW initialization
over control channel.

Patch 4 improves drop monitor interaction and patch 5 changes 
the default Kconfig selection of flower offload.  Patch 6 fixes
locking around representor updates which got lost in net-next.

Jakub Kicinski (6):
  nfp: separate app vNIC init/clean from alloc/free
  nfp: flower: base lifetime of representors on existence of lower vNIC
  nfp: move the start/stop app callbacks back
  nfp: be drop monitor friendly
  nfp: build the flower offload by default
  nfp: flower: restore RTNL locking around representor updates

 drivers/net/ethernet/netronome/Kconfig |  1 +
 drivers/net/ethernet/netronome/nfp/bpf/main.c  | 10 +--
 drivers/net/ethernet/netronome/nfp/flower/cmsg.c   | 11 ++-
 drivers/net/ethernet/netronome/nfp/flower/main.c   | 93 +++---
 drivers/net/ethernet/netronome/nfp/flower/main.h   |  2 +
 drivers/net/ethernet/netronome/nfp/nfp_app.c   |  2 +-
 drivers/net/ethernet/netronome/nfp/nfp_app.h   | 35 +---
 drivers/net/ethernet/netronome/nfp/nfp_app_nic.c   |  4 +-
 .../net/ethernet/netronome/nfp/nfp_net_common.c|  2 +-
 drivers/net/ethernet/netronome/nfp/nfp_net_main.c  | 42 ++
 drivers/net/ethernet/netronome/nfp/nic/main.c  |  2 +-
 11 files changed, 136 insertions(+), 68 deletions(-)

-- 
2.14.1



[PATCH net-next 1/6] nfp: separate app vNIC init/clean from alloc/free

2017-09-02 Thread Jakub Kicinski
We currently only have one app callback for vNIC creation
and destruction.  This is insufficient, because some actions
have to be taken before netdev is registered, after it's
registered and after it's unregistered.  Old callbacks
were really corresponding to alloc/free actions.  Rename
them and add proper init/clean.  Apps using representors
will be able to use new callbacks to manage lifetime of
upper devices.

Signed-off-by: Jakub Kicinski 
Reviewed-by: Simon Horman 
---
 drivers/net/ethernet/netronome/nfp/bpf/main.c | 10 +++
 drivers/net/ethernet/netronome/nfp/flower/cmsg.c  |  2 +-
 drivers/net/ethernet/netronome/nfp/flower/main.c  | 27 +
 drivers/net/ethernet/netronome/nfp/flower/main.h  |  2 ++
 drivers/net/ethernet/netronome/nfp/nfp_app.c  |  2 +-
 drivers/net/ethernet/netronome/nfp/nfp_app.h  | 35 +--
 drivers/net/ethernet/netronome/nfp/nfp_app_nic.c  |  4 +--
 drivers/net/ethernet/netronome/nfp/nfp_net_main.c | 16 +--
 drivers/net/ethernet/netronome/nfp/nic/main.c |  2 +-
 9 files changed, 73 insertions(+), 27 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c 
b/drivers/net/ethernet/netronome/nfp/bpf/main.c
index f4de3a7377b0..be2cf10a2cd7 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c
@@ -84,7 +84,7 @@ static const char *nfp_bpf_extra_cap(struct nfp_app *app, 
struct nfp_net *nn)
 }
 
 static int
-nfp_bpf_vnic_init(struct nfp_app *app, struct nfp_net *nn, unsigned int id)
+nfp_bpf_vnic_alloc(struct nfp_app *app, struct nfp_net *nn, unsigned int id)
 {
struct nfp_net_bpf_priv *priv;
int ret;
@@ -106,14 +106,14 @@ nfp_bpf_vnic_init(struct nfp_app *app, struct nfp_net 
*nn, unsigned int id)
setup_timer(>rx_filter_stats_timer,
nfp_net_filter_stats_timer, (unsigned long)nn);
 
-   ret = nfp_app_nic_vnic_init(app, nn, id);
+   ret = nfp_app_nic_vnic_alloc(app, nn, id);
if (ret)
kfree(priv);
 
return ret;
 }
 
-static void nfp_bpf_vnic_clean(struct nfp_app *app, struct nfp_net *nn)
+static void nfp_bpf_vnic_free(struct nfp_app *app, struct nfp_net *nn)
 {
if (nn->dp.bpf_offload_xdp)
nfp_bpf_xdp_offload(app, nn, NULL);
@@ -149,8 +149,8 @@ const struct nfp_app_type app_bpf = {
 
.extra_cap  = nfp_bpf_extra_cap,
 
-   .vnic_init  = nfp_bpf_vnic_init,
-   .vnic_clean = nfp_bpf_vnic_clean,
+   .vnic_alloc = nfp_bpf_vnic_alloc,
+   .vnic_free  = nfp_bpf_vnic_free,
 
.setup_tc   = nfp_bpf_setup_tc,
.tc_busy= nfp_bpf_tc_busy,
diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c 
b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
index 806924b82adc..d82d9888d676 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
@@ -203,7 +203,7 @@ void nfp_flower_cmsg_process_rx(struct work_struct *work)
priv = container_of(work, struct nfp_flower_priv, cmsg_work);
 
while ((skb = skb_dequeue(>cmsg_skbs)))
-   nfp_flower_cmsg_process_one_rx(priv->nn->app, skb);
+   nfp_flower_cmsg_process_one_rx(priv->app, skb);
 }
 
 void nfp_flower_cmsg_rx(struct nfp_app *app, struct sk_buff *skb)
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.c 
b/drivers/net/ethernet/netronome/nfp/flower/main.c
index 126a6b5233bf..db59858c0f19 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.c
@@ -313,18 +313,14 @@ static int nfp_flower_start(struct nfp_app *app)
   NFP_REPR_TYPE_PF, 1);
 }
 
-static int nfp_flower_vnic_init(struct nfp_app *app, struct nfp_net *nn,
-   unsigned int id)
+static int nfp_flower_vnic_alloc(struct nfp_app *app, struct nfp_net *nn,
+unsigned int id)
 {
-   struct nfp_flower_priv *priv = app->priv;
-
if (id > 0) {
nfp_warn(app->cpp, "FlowerNIC doesn't support more than one 
data vNIC\n");
goto err_invalid_port;
}
 
-   priv->nn = nn;
-
eth_hw_addr_random(nn->dp.netdev);
netif_keep_dst(nn->dp.netdev);
 
@@ -335,6 +331,22 @@ static int nfp_flower_vnic_init(struct nfp_app *app, 
struct nfp_net *nn,
return PTR_ERR_OR_ZERO(nn->port);
 }
 
+static void nfp_flower_vnic_clean(struct nfp_app *app, struct nfp_net *nn)
+{
+   struct nfp_flower_priv *priv = app->priv;
+
+   priv->nn = NULL;
+}
+
+static int nfp_flower_vnic_init(struct nfp_app *app, struct nfp_net *nn)
+{
+   struct nfp_flower_priv *priv = app->priv;
+
+   priv->nn = nn;
+
+   return 0;
+}
+
 static int nfp_flower_init(struct nfp_app *app)
 {
const struct nfp_pf *pf = app->pf;
@@ -374,6 +386,7 @@ 

[PATCH net-next 3/6] nfp: move the start/stop app callbacks back

2017-09-02 Thread Jakub Kicinski
Since representors are now created with a separate callback
start/stop app callbacks can be moved again to their original
location.  They are intended to app-specific init/clean up
over the control channel.

Signed-off-by: Jakub Kicinski 
Reviewed-by: Simon Horman 
---
 drivers/net/ethernet/netronome/nfp/nfp_net_main.c | 26 ++-
 1 file changed, 11 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c 
b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
index f2a1a4e2ce8b..5abb9ba31e7d 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
@@ -469,10 +469,14 @@ static int nfp_net_pf_app_start(struct nfp_pf *pf)
 {
int err;
 
-   err = nfp_app_start(pf->app, pf->ctrl_vnic);
+   err = nfp_net_pf_app_start_ctrl(pf);
if (err)
return err;
 
+   err = nfp_app_start(pf->app, pf->ctrl_vnic);
+   if (err)
+   goto err_ctrl_stop;
+
if (pf->num_vfs) {
err = nfp_app_sriov_enable(pf->app, pf->num_vfs);
if (err)
@@ -483,6 +487,8 @@ static int nfp_net_pf_app_start(struct nfp_pf *pf)
 
 err_app_stop:
nfp_app_stop(pf->app);
+err_ctrl_stop:
+   nfp_net_pf_app_stop_ctrl(pf);
return err;
 }
 
@@ -491,6 +497,7 @@ static void nfp_net_pf_app_stop(struct nfp_pf *pf)
if (pf->num_vfs)
nfp_app_sriov_disable(pf->app);
nfp_app_stop(pf->app);
+   nfp_net_pf_app_stop_ctrl(pf);
 }
 
 static void nfp_net_pci_unmap_mem(struct nfp_pf *pf)
@@ -582,7 +589,7 @@ static int nfp_net_pci_map_mem(struct nfp_pf *pf)
 
 static void nfp_net_pci_remove_finish(struct nfp_pf *pf)
 {
-   nfp_net_pf_app_stop_ctrl(pf);
+   nfp_net_pf_app_stop(pf);
/* stop app first, to avoid double free of ctrl vNIC's ddir */
nfp_net_debugfs_dir_clean(>ddir);
 
@@ -713,7 +720,6 @@ int nfp_net_pci_probe(struct nfp_pf *pf)
 {
struct nfp_net_fw_version fw_ver;
u8 __iomem *ctrl_bar, *qc_bar;
-   struct nfp_net *nn;
int stride;
int err;
 
@@ -790,7 +796,7 @@ int nfp_net_pci_probe(struct nfp_pf *pf)
if (err)
goto err_free_vnics;
 
-   err = nfp_net_pf_app_start_ctrl(pf);
+   err = nfp_net_pf_app_start(pf);
if (err)
goto err_free_irqs;
 
@@ -798,20 +804,12 @@ int nfp_net_pci_probe(struct nfp_pf *pf)
if (err)
goto err_stop_app;
 
-   err = nfp_net_pf_app_start(pf);
-   if (err)
-   goto err_clean_vnics;
-
mutex_unlock(>lock);
 
return 0;
 
-err_clean_vnics:
-   list_for_each_entry(nn, >vnics, vnic_list)
-   if (nfp_net_is_data_vnic(nn))
-   nfp_net_pf_clean_vnic(pf, nn);
 err_stop_app:
-   nfp_net_pf_app_stop_ctrl(pf);
+   nfp_net_pf_app_stop(pf);
 err_free_irqs:
nfp_net_pf_free_irqs(pf);
 err_free_vnics:
@@ -835,8 +833,6 @@ void nfp_net_pci_remove(struct nfp_pf *pf)
if (list_empty(>vnics))
goto out;
 
-   nfp_net_pf_app_stop(pf);
-
list_for_each_entry(nn, >vnics, vnic_list)
if (nfp_net_is_data_vnic(nn))
nfp_net_pf_clean_vnic(pf, nn);
-- 
2.14.1



[net-next:master 428/478] include/linux/soc/ti/knav_dma.h:129:30: error: field 'direction' has incomplete type

2017-09-02 Thread kbuild test robot
tree:   https://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git 
master
head:   32d9b70a053a835b4dfb33158fc03795ea103e44
commit: 0dd5759dbb1c9a862e7d90c09d6cf398c45f1100 [428/478] net: remove 
dmaengine.h inclusion from netdevice.h
config: arm-keystone_defconfig (attached as .config)
compiler: arm-linux-gnueabi-gcc (Debian 6.1.1-9) 6.1.1 20160705
reproduce:
wget 
https://raw.githubusercontent.com/01org/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
git checkout 0dd5759dbb1c9a862e7d90c09d6cf398c45f1100
# save the attached .config to linux build tree
make.cross ARCH=arm 

All errors (new ones prefixed by >>):

   In file included from drivers/net/ethernet/ti/netcp_core.c:30:0:
>> include/linux/soc/ti/knav_dma.h:129:30: error: field 'direction' has 
>> incomplete type
 enum dma_transfer_direction direction;
 ^
   drivers/net/ethernet/ti/netcp_core.c: In function 'netcp_txpipe_open':
   drivers/net/ethernet/ti/netcp_core.c:1349:21: error: 'DMA_MEM_TO_DEV' 
undeclared (first use in this function)
 config.direction = DMA_MEM_TO_DEV;
^~
   drivers/net/ethernet/ti/netcp_core.c:1349:21: note: each undeclared 
identifier is reported only once for each function it appears in
   drivers/net/ethernet/ti/netcp_core.c: In function 
'netcp_setup_navigator_resources':
>> drivers/net/ethernet/ti/netcp_core.c:1659:22: error: 'DMA_DEV_TO_MEM' 
>> undeclared (first use in this function)
 config.direction  = DMA_DEV_TO_MEM;
 ^~
--
   In file included from drivers/net/ethernet/ti/netcp.h:25:0,
from drivers/net/ethernet/ti/netcp_ethss.c:31:
>> include/linux/soc/ti/knav_dma.h:129:30: error: field 'direction' has 
>> incomplete type
 enum dma_transfer_direction direction;
 ^
--
   In file included from drivers/net//ethernet/ti/netcp_core.c:30:0:
>> include/linux/soc/ti/knav_dma.h:129:30: error: field 'direction' has 
>> incomplete type
 enum dma_transfer_direction direction;
 ^
   drivers/net//ethernet/ti/netcp_core.c: In function 'netcp_txpipe_open':
   drivers/net//ethernet/ti/netcp_core.c:1349:21: error: 'DMA_MEM_TO_DEV' 
undeclared (first use in this function)
 config.direction = DMA_MEM_TO_DEV;
^~
   drivers/net//ethernet/ti/netcp_core.c:1349:21: note: each undeclared 
identifier is reported only once for each function it appears in
   drivers/net//ethernet/ti/netcp_core.c: In function 
'netcp_setup_navigator_resources':
   drivers/net//ethernet/ti/netcp_core.c:1659:22: error: 'DMA_DEV_TO_MEM' 
undeclared (first use in this function)
 config.direction  = DMA_DEV_TO_MEM;
 ^~

vim +/direction +129 include/linux/soc/ti/knav_dma.h

88139ed0 Santosh Shilimkar 2014-03-30  121  
88139ed0 Santosh Shilimkar 2014-03-30  122  /**
88139ed0 Santosh Shilimkar 2014-03-30  123   * struct knav_dma_cfg: Pktdma 
channel configuration
88139ed0 Santosh Shilimkar 2014-03-30  124   * @sl_cfg: Slave 
configuration
88139ed0 Santosh Shilimkar 2014-03-30  125   * @tx: 
Tx channel configuration
88139ed0 Santosh Shilimkar 2014-03-30  126   * @rx: 
Rx flow configuration
88139ed0 Santosh Shilimkar 2014-03-30  127   */
88139ed0 Santosh Shilimkar 2014-03-30  128  struct knav_dma_cfg {
88139ed0 Santosh Shilimkar 2014-03-30 @129  enum dma_transfer_direction 
direction;
88139ed0 Santosh Shilimkar 2014-03-30  130  union {
88139ed0 Santosh Shilimkar 2014-03-30  131  struct knav_dma_tx_cfg  
tx;
88139ed0 Santosh Shilimkar 2014-03-30  132  struct knav_dma_rx_cfg  
rx;
88139ed0 Santosh Shilimkar 2014-03-30  133  } u;
88139ed0 Santosh Shilimkar 2014-03-30  134  };
88139ed0 Santosh Shilimkar 2014-03-30  135  

:: The code at line 129 was first introduced by commit
:: 88139ed030583557751e279968e13e892ae10825 soc: ti: add Keystone Navigator 
DMA support

:: TO: Santosh Shilimkar 
:: CC: Santosh Shilimkar 

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip


Re: [PATCH] staging: r8822be: Fix typo for CONFIG_RTLWIFI_DEBUG

2017-09-02 Thread kbuild test robot
Hi Andreas,

[auto build test ERROR on staging/staging-testing]
[cannot apply to v4.13-rc7 next-20170901]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Andreas-Ziegler/staging-r8822be-Fix-typo-for-CONFIG_RTLWIFI_DEBUG/20170830-144151
config: sparc64-allmodconfig (attached as .config)
compiler: sparc64-linux-gnu-gcc (Debian 6.1.1-9) 6.1.1 20160705
reproduce:
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
make.cross ARCH=sparc64 

All errors (new ones prefixed by >>):

   drivers/staging//rtlwifi/halmac/rtl_halmac.c: In function 'deinit_priv':
>> drivers/staging//rtlwifi/halmac/rtl_halmac.c:395:22: error: 'struct 
>> rtl_halmac_indicator' has no member named 'sctx'
if (!indicator[i].sctx)
 ^
   In file included from drivers/staging//rtlwifi/halmac/../wifi.h:38:0,
from drivers/staging//rtlwifi/halmac/halmac_2_platform.h:28,
from drivers/staging//rtlwifi/halmac/halmac_api.h:38,
from drivers/staging//rtlwifi/halmac/rtl_halmac.c:26:
   drivers/staging//rtlwifi/halmac/rtl_halmac.c:399:6: error: 'rtlpriv' 
undeclared (first use in this function)
 rtlpriv, COMP_HALMAC, DBG_LOUD,
 ^
   drivers/staging//rtlwifi/halmac/../debug.h:185:17: note: in definition of 
macro 'RT_TRACE'
 _rtl_dbg_trace(rtlpriv, comp, level,\
^~~
   drivers/staging//rtlwifi/halmac/rtl_halmac.c:399:6: note: each undeclared 
identifier is reported only once for each function it appears in
 rtlpriv, COMP_HALMAC, DBG_LOUD,
 ^
   drivers/staging//rtlwifi/halmac/../debug.h:185:17: note: in definition of 
macro 'RT_TRACE'
 _rtl_dbg_trace(rtlpriv, comp, level,\
^~~
   drivers/staging//rtlwifi/halmac/rtl_halmac.c:403:24: error: 'struct 
rtl_halmac_indicator' has no member named 'sctx'
sctx = indicator[i].sctx;
   ^
   drivers/staging//rtlwifi/halmac/rtl_halmac.c:404:17: error: 'struct 
rtl_halmac_indicator' has no member named 'sctx'
indicator[i].sctx = NULL;
^
   drivers/staging//rtlwifi/halmac/rtl_halmac.c:405:5: error: implicit 
declaration of function 'rtl_mfree' [-Werror=implicit-function-declaration]
rtl_mfree((u8 *)sctx, sizeof(*sctx));
^
>> drivers/staging//rtlwifi/halmac/rtl_halmac.c:405:34: error: dereferencing 
>> pointer to incomplete type 'struct submit_ctx'
rtl_mfree((u8 *)sctx, sizeof(*sctx));
 ^
   cc1: some warnings being treated as errors

vim +405 drivers/staging//rtlwifi/halmac/rtl_halmac.c

938a0447 Ping-Ke Shih2017-08-17  378  
938a0447 Ping-Ke Shih2017-08-17  379  static void deinit_priv(struct 
rtl_halmac *halmac)
938a0447 Ping-Ke Shih2017-08-17  380  {
938a0447 Ping-Ke Shih2017-08-17  381struct rtl_halmac_indicator 
*indicator;
938a0447 Ping-Ke Shih2017-08-17  382  
938a0447 Ping-Ke Shih2017-08-17  383indicator = halmac->indicator;
938a0447 Ping-Ke Shih2017-08-17  384halmac->indicator = NULL;
938a0447 Ping-Ke Shih2017-08-17  385if (indicator) {
938a0447 Ping-Ke Shih2017-08-17  386u32 count, size;
938a0447 Ping-Ke Shih2017-08-17  387  
938a0447 Ping-Ke Shih2017-08-17  388count = 
HALMAC_FEATURE_ALL + 1;
bb304b2b Andreas Ziegler 2017-08-29  389  #ifdef CONFIG_RTLWIFI_DEBUG
938a0447 Ping-Ke Shih2017-08-17  390{
938a0447 Ping-Ke Shih2017-08-17  391struct 
submit_ctx *sctx;
938a0447 Ping-Ke Shih2017-08-17  392u32 i;
938a0447 Ping-Ke Shih2017-08-17  393  
938a0447 Ping-Ke Shih2017-08-17  394for (i = 0; i < 
count; i++) {
938a0447 Ping-Ke Shih2017-08-17 @395if 
(!indicator[i].sctx)
938a0447 Ping-Ke Shih2017-08-17  396
continue;
938a0447 Ping-Ke Shih2017-08-17  397  
938a0447 Ping-Ke Shih2017-08-17  398
RT_TRACE(
938a0447 Ping-Ke Shih2017-08-17  399
rtlpriv, COMP_HALMAC, DBG_LOUD,
938a0447 Ping-Ke Shih2017-08-17  400
"%s:  %s id(%d) sctx still exist!!\n",
938a0447 Ping-Ke Shih2017-08-17  401
__func__, RTL_HALMAC_FEATURE_NAME[i],
938a0447 Ping-Ke Shih2017-08-17  402
i);
938a0447 Ping-Ke Shih2017-08-17 @403sctx = 
indicator[i].sctx;
938a0447 Ping-Ke Shih2017-08-17  404
indicator[i].sctx = 

[no subject]

2017-09-02 Thread netgalley


16.doc
Description: MS-Word document


Re: [pull request][net-next 0/3] Mellanox, mlx5 GRE tunnel offloads

2017-09-02 Thread Saeed Mahameed
On Thu, Aug 31, 2017 at 6:51 AM, Hannes Frederic Sowa
 wrote:
> Saeed Mahameed  writes:
>
>> The first patch from Gal and Ariel provides the mlx5 driver support for
>> ConnectX capability to perform IP version identification and matching in
>> order to distinguish between IPv4 and IPv6 without the need to specify the
>> encapsulation type, thus perform RSS in MPLS automatically without
>> specifying MPLS ethertyoe. This patch will also serve for inner GRE IPv4/6
>> classification for inner GRE RSS.
>
> I don't think this is legal at all or did I misunderstood something?
>
> 

It seems you misunderstood the cover letter.  The HW will still
identify MPLS (IPv4/IPv6) packets using a new bit we specify in the HW
steering rules rather than adding new specific rules with  {MPLS
ethertype} X {IPv4,IPv6} to classify MPLS IPv{4,6} traffic, Same
functionality a better and general way to approach it.
Bottom line the hardware is capable of processing MPLS headers and
perform RSS on the inner packet (IPv4/6) without the need of the
driver to provide precise steering MPLS rules.

>
> Thanks,
> Hannes


Re: [PATCH net-next 2/2] net: convert (struct ubuf_info)->refcnt to refcount_t

2017-09-02 Thread kbuild test robot
Hi Eric,

[auto build test WARNING on net-next/master]

url:
https://github.com/0day-ci/linux/commits/Eric-Dumazet/net-ubuf_info-refcnt-conversion/20170903-043506
config: i386-randconfig-i1-201736 (attached as .config)
compiler: gcc-4.8 (Debian 4.8.4-1) 4.8.4
reproduce:
# save the attached .config to linux build tree
make ARCH=i386 

All warnings (new ones prefixed by >>):

   drivers//vhost/net.c: In function 'handle_tx':
>> drivers//vhost/net.c:536:4: warning: passing argument 1 of 'atomic_set' from 
>> incompatible pointer type [enabled by default]
   atomic_set(>refcnt, 1);
   ^
   In file included from include/linux/atomic.h:4:0,
from arch/x86/include/asm/thread_info.h:53,
from include/linux/thread_info.h:37,
from arch/x86/include/asm/preempt.h:6,
from include/linux/preempt.h:80,
from include/linux/spinlock.h:50,
from include/linux/wait.h:8,
from include/linux/eventfd.h:12,
from drivers//vhost/net.c:10:
   arch/x86/include/asm/atomic.h:36:29: note: expected 'struct atomic_t *' but 
argument is of type 'struct refcount_t *'
static __always_inline void atomic_set(atomic_t *v, int i)
^

vim +/atomic_set +536 drivers//vhost/net.c

0ed005ce0 Jason Wang 2017-01-18  442  
3a4d5c94e Michael S. Tsirkin 2010-01-14  443  /* Expects to be always run from 
workqueue - which acts as
3a4d5c94e Michael S. Tsirkin 2010-01-14  444   * read-size critical section for 
our kind of RCU. */
3a4d5c94e Michael S. Tsirkin 2010-01-14  445  static void handle_tx(struct 
vhost_net *net)
3a4d5c94e Michael S. Tsirkin 2010-01-14  446  {
2839400f8 Asias He   2013-04-27  447struct vhost_net_virtqueue *nvq 
= >vqs[VHOST_NET_VQ_TX];
81f95a558 Michael S. Tsirkin 2013-04-28  448struct vhost_virtqueue *vq = 
>vq;
98a527aac Al Viro2014-12-10  449unsigned out, in;
d5675bd20 Michael S. Tsirkin 2010-06-24  450int head;
3a4d5c94e Michael S. Tsirkin 2010-01-14  451struct msghdr msg = {
3a4d5c94e Michael S. Tsirkin 2010-01-14  452.msg_name = NULL,
3a4d5c94e Michael S. Tsirkin 2010-01-14  453.msg_namelen = 0,
3a4d5c94e Michael S. Tsirkin 2010-01-14  454.msg_control = NULL,
3a4d5c94e Michael S. Tsirkin 2010-01-14  455.msg_controllen = 0,
3a4d5c94e Michael S. Tsirkin 2010-01-14  456.msg_flags = 
MSG_DONTWAIT,
3a4d5c94e Michael S. Tsirkin 2010-01-14  457};
3a4d5c94e Michael S. Tsirkin 2010-01-14  458size_t len, total_len = 0;
70181d512 Jason Wang 2013-04-10  459int err;
3a4d5c94e Michael S. Tsirkin 2010-01-14  460size_t hdr_size;
28457ee69 Arnd Bergmann  2010-03-09  461struct socket *sock;
fe729a57c Asias He   2013-05-06  462struct vhost_net_ubuf_ref 
*uninitialized_var(ubufs);
cedb9bdce Michael S. Tsirkin 2012-12-06  463bool zcopy, zcopy_used;
28457ee69 Arnd Bergmann  2010-03-09  464  
2e26af79b Asias He   2013-05-07  465mutex_lock(>mutex);
2e26af79b Asias He   2013-05-07  466sock = vq->private_data;
3a4d5c94e Michael S. Tsirkin 2010-01-14  467if (!sock)
2e26af79b Asias He   2013-05-07  468goto out;
3a4d5c94e Michael S. Tsirkin 2010-01-14  469  
6b1e6cc78 Jason Wang 2016-06-23  470if (!vq_iotlb_prefetch(vq))
6b1e6cc78 Jason Wang 2016-06-23  471goto out;
6b1e6cc78 Jason Wang 2016-06-23  472  
8ea8cf89e Michael S. Tsirkin 2011-05-20  473vhost_disable_notify(>dev, 
vq);
3a4d5c94e Michael S. Tsirkin 2010-01-14  474  
81f95a558 Michael S. Tsirkin 2013-04-28  475hdr_size = nvq->vhost_hlen;
2839400f8 Asias He   2013-04-27  476zcopy = nvq->ubufs;
3a4d5c94e Michael S. Tsirkin 2010-01-14  477  
3a4d5c94e Michael S. Tsirkin 2010-01-14  478for (;;) {
bab632d69 Michael S. Tsirkin 2011-07-18  479/* Release DMAs done 
buffers first */
bab632d69 Michael S. Tsirkin 2011-07-18  480if (zcopy)
eaae8132e Michael S. Tsirkin 2012-11-01  481
vhost_zerocopy_signal_used(net, vq);
bab632d69 Michael S. Tsirkin 2011-07-18  482  
f7c6be404 Jason Wang 2013-09-02  483/* If more outstanding 
DMAs, queue the work.
f7c6be404 Jason Wang 2013-09-02  484 * Handle upend_idx 
wrap around
f7c6be404 Jason Wang 2013-09-02  485 */
0ed005ce0 Jason Wang 2017-01-18  486if 
(unlikely(vhost_exceeds_maxpend(net)))
f7c6be404 Jason Wang 2013-09-02  487break;
f7c6be404 Jason Wang 2013-09-02  488  
030881372 Jason Wang 2016-03-04  489head = 
vhost_net_tx_get_vq_desc(net, vq, vq->iov,
3a4d5c94e Michael S. Tsirkin 2010-01-14  490
ARRAY_SIZE(vq->iov),
030881372 Jason Wang 2016-03-04  491

Re: [PATCH net-next 2/2] net: convert (struct ubuf_info)->refcnt to refcount_t

2017-09-02 Thread kbuild test robot
Hi Eric,

[auto build test ERROR on net-next/master]

url:
https://github.com/0day-ci/linux/commits/Eric-Dumazet/net-ubuf_info-refcnt-conversion/20170903-043506
config: x86_64-acpi-redef (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
# save the attached .config to linux build tree
make ARCH=x86_64 

All errors (new ones prefixed by >>):

   drivers//vhost/net.c: In function 'handle_tx':
>> drivers//vhost/net.c:536:15: error: passing argument 1 of 'atomic_set' from 
>> incompatible pointer type [-Werror=incompatible-pointer-types]
   atomic_set(>refcnt, 1);
  ^
   In file included from include/linux/atomic.h:4:0,
from include/linux/jump_label.h:183,
from arch/x86/include/asm/string_64.h:5,
from arch/x86/include/asm/string.h:4,
from include/linux/string.h:18,
from include/linux/bitmap.h:8,
from include/linux/cpumask.h:11,
from arch/x86/include/asm/cpumask.h:4,
from arch/x86/include/asm/msr.h:10,
from arch/x86/include/asm/processor.h:20,
from arch/x86/include/asm/cpufeature.h:4,
from arch/x86/include/asm/thread_info.h:52,
from include/linux/thread_info.h:37,
from arch/x86/include/asm/preempt.h:6,
from include/linux/preempt.h:80,
from include/linux/spinlock.h:50,
from include/linux/seqlock.h:35,
from include/linux/time.h:5,
from include/linux/stat.h:18,
from include/linux/compat.h:12,
from drivers//vhost/net.c:9:
   arch/x86/include/asm/atomic.h:36:29: note: expected 'atomic_t * {aka struct 
 *}' but argument is of type 'refcount_t * {aka struct 
refcount_struct *}'
static __always_inline void atomic_set(atomic_t *v, int i)
^~
   cc1: some warnings being treated as errors

vim +/atomic_set +536 drivers//vhost/net.c

0ed005ce0 Jason Wang 2017-01-18  442  
3a4d5c94e Michael S. Tsirkin 2010-01-14  443  /* Expects to be always run from 
workqueue - which acts as
3a4d5c94e Michael S. Tsirkin 2010-01-14  444   * read-size critical section for 
our kind of RCU. */
3a4d5c94e Michael S. Tsirkin 2010-01-14  445  static void handle_tx(struct 
vhost_net *net)
3a4d5c94e Michael S. Tsirkin 2010-01-14  446  {
2839400f8 Asias He   2013-04-27  447struct vhost_net_virtqueue *nvq 
= >vqs[VHOST_NET_VQ_TX];
81f95a558 Michael S. Tsirkin 2013-04-28  448struct vhost_virtqueue *vq = 
>vq;
98a527aac Al Viro2014-12-10  449unsigned out, in;
d5675bd20 Michael S. Tsirkin 2010-06-24  450int head;
3a4d5c94e Michael S. Tsirkin 2010-01-14  451struct msghdr msg = {
3a4d5c94e Michael S. Tsirkin 2010-01-14  452.msg_name = NULL,
3a4d5c94e Michael S. Tsirkin 2010-01-14  453.msg_namelen = 0,
3a4d5c94e Michael S. Tsirkin 2010-01-14  454.msg_control = NULL,
3a4d5c94e Michael S. Tsirkin 2010-01-14  455.msg_controllen = 0,
3a4d5c94e Michael S. Tsirkin 2010-01-14  456.msg_flags = 
MSG_DONTWAIT,
3a4d5c94e Michael S. Tsirkin 2010-01-14  457};
3a4d5c94e Michael S. Tsirkin 2010-01-14  458size_t len, total_len = 0;
70181d512 Jason Wang 2013-04-10  459int err;
3a4d5c94e Michael S. Tsirkin 2010-01-14  460size_t hdr_size;
28457ee69 Arnd Bergmann  2010-03-09  461struct socket *sock;
fe729a57c Asias He   2013-05-06  462struct vhost_net_ubuf_ref 
*uninitialized_var(ubufs);
cedb9bdce Michael S. Tsirkin 2012-12-06  463bool zcopy, zcopy_used;
28457ee69 Arnd Bergmann  2010-03-09  464  
2e26af79b Asias He   2013-05-07  465mutex_lock(>mutex);
2e26af79b Asias He   2013-05-07  466sock = vq->private_data;
3a4d5c94e Michael S. Tsirkin 2010-01-14  467if (!sock)
2e26af79b Asias He   2013-05-07  468goto out;
3a4d5c94e Michael S. Tsirkin 2010-01-14  469  
6b1e6cc78 Jason Wang 2016-06-23  470if (!vq_iotlb_prefetch(vq))
6b1e6cc78 Jason Wang 2016-06-23  471goto out;
6b1e6cc78 Jason Wang 2016-06-23  472  
8ea8cf89e Michael S. Tsirkin 2011-05-20  473vhost_disable_notify(>dev, 
vq);
3a4d5c94e Michael S. Tsirkin 2010-01-14  474  
81f95a558 Michael S. Tsirkin 2013-04-28  475hdr_size = nvq->vhost_hlen;
2839400f8 Asias He   2013-04-27  476zcopy = nvq->ubufs;
3a4d5c94e Michael S. Tsirkin 2010-01-14  477  
3a4d5c94e Michael S. Tsirkin 2010-01-14  478for (;;) {
bab632d69 Michael S. Tsirkin 2011-07-18  479/* Release DMAs done 
buffers first */
bab632d69 Michael S. Tsirkin 2011-07-18  480if (zcopy)
eaae8132e Michael S. Tsirkin 2012-11-01  481
vhost_zerocopy_signal_used(net, 

[patch net-next v2 02/21] mlxsw: reg: Update RATR to support IP-in-IP tunnels

2017-09-02 Thread Jiri Pirko
From: Petr Machata 

So far, adjacencies have always been of type Ethernet (with value of 0),
and thus there was no need to explicitly support RATR type. However to
support IP-in-IP adjacencies, this type and a suite of IP-in-IP-specific
attributes need to be added.

Signed-off-by: Petr Machata 
Reviewed-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h | 66 +++
 1 file changed, 66 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h 
b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 8736f8492..6a7757f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -4362,6 +4362,38 @@ MLXSW_ITEM32(reg, ratr, v, 0x00, 24, 1);
  */
 MLXSW_ITEM32(reg, ratr, a, 0x00, 16, 1);
 
+enum mlxsw_reg_ratr_type {
+   /* Ethernet */
+   MLXSW_REG_RATR_TYPE_ETHERNET,
+   /* IPoIB Unicast without GRH.
+* Reserved for Spectrum.
+*/
+   MLXSW_REG_RATR_TYPE_IPOIB_UC,
+   /* IPoIB Unicast with GRH. Supported only in table 0 (Ethernet unicast
+* adjacency).
+* Reserved for Spectrum.
+*/
+   MLXSW_REG_RATR_TYPE_IPOIB_UC_W_GRH,
+   /* IPoIB Multicast.
+* Reserved for Spectrum.
+*/
+   MLXSW_REG_RATR_TYPE_IPOIB_MC,
+   /* MPLS.
+* Reserved for SwitchX/-2.
+*/
+   MLXSW_REG_RATR_TYPE_MPLS,
+   /* IPinIP Encap.
+* Reserved for SwitchX/-2.
+*/
+   MLXSW_REG_RATR_TYPE_IPIP,
+};
+
+/* reg_ratr_type
+ * Adjacency entry type.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ratr, type, 0x04, 28, 4);
+
 /* reg_ratr_adjacency_index_low
  * Bits 15:0 of index into the adjacency table.
  * For SwitchX and SwitchX-2, the adjacency table is linear and
@@ -4416,6 +4448,34 @@ MLXSW_ITEM32(reg, ratr, trap_id, 0x0C, 0, 8);
  */
 MLXSW_ITEM_BUF(reg, ratr, eth_destination_mac, 0x12, 6);
 
+enum mlxsw_reg_ratr_ipip_type {
+   /* IPv4, address set by mlxsw_reg_ratr_ipip_ipv4_udip. */
+   MLXSW_REG_RATR_IPIP_TYPE_IPV4,
+   /* IPv6, address set by mlxsw_reg_ratr_ipip_ipv6_ptr. */
+   MLXSW_REG_RATR_IPIP_TYPE_IPV6,
+};
+
+/* reg_ratr_ipip_type
+ * Underlay destination ip type.
+ * Note: the type field must match the protocol of the router interface.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ratr, ipip_type, 0x10, 16, 4);
+
+/* reg_ratr_ipip_ipv4_udip
+ * Underlay ipv4 dip.
+ * Reserved when ipip_type is IPv6.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ratr, ipip_ipv4_udip, 0x18, 0, 32);
+
+/* reg_ratr_ipip_ipv6_ptr
+ * Pointer to IPv6 underlay destination ip address.
+ * For Spectrum: Pointer to KVD linear space.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ratr, ipip_ipv6_ptr, 0x1C, 0, 24);
+
 static inline void
 mlxsw_reg_ratr_pack(char *payload,
enum mlxsw_reg_ratr_op op, bool valid,
@@ -4435,6 +4495,12 @@ static inline void mlxsw_reg_ratr_eth_entry_pack(char 
*payload,
mlxsw_reg_ratr_eth_destination_mac_memcpy_to(payload, dest_mac);
 }
 
+static inline void mlxsw_reg_ratr_ipip4_entry_pack(char *payload, u32 
ipv4_udip)
+{
+   mlxsw_reg_ratr_ipip_type_set(payload, MLXSW_REG_RATR_IPIP_TYPE_IPV4);
+   mlxsw_reg_ratr_ipip_ipv4_udip_set(payload, ipv4_udip);
+}
+
 /* RICNT - Router Interface Counter Register
  * -
  * The RICNT register retrieves per port performance counters
-- 
2.9.3



[patch net-next v2 05/21] mlxsw: reg: Add Routing Tunnel Decap Properties Register

2017-09-02 Thread Jiri Pirko
From: Petr Machata 

The RTDP register is used for configuring the tunnel decap properties of
NVE and IPinIP.

Signed-off-by: Petr Machata 
Reviewed-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h | 129 ++
 1 file changed, 129 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h 
b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 24296cf..a6eb96f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -5,6 +5,7 @@
  * Copyright (c) 2015 Elad Raz 
  * Copyright (c) 2015-2017 Jiri Pirko 
  * Copyright (c) 2016 Yotam Gigi 
+ * Copyright (c) 2017 Petr Machata 
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -5463,6 +5464,133 @@ static inline void 
mlxsw_reg_rauhtd_ent_ipv6_unpack(char *payload,
mlxsw_reg_rauhtd_ipv6_ent_dip_memcpy_from(payload, rec_index, p_dip);
 }
 
+/* RTDP - Routing Tunnel Decap Properties Register
+ * ---
+ * The RTDP register is used for configuring the tunnel decap properties of NVE
+ * and IPinIP.
+ */
+#define MLXSW_REG_RTDP_ID 0x8020
+#define MLXSW_REG_RTDP_LEN 0x44
+
+MLXSW_REG_DEFINE(rtdp, MLXSW_REG_RTDP_ID, MLXSW_REG_RTDP_LEN);
+
+enum mlxsw_reg_rtdp_type {
+   MLXSW_REG_RTDP_TYPE_NVE,
+   MLXSW_REG_RTDP_TYPE_IPIP,
+};
+
+/* reg_rtdp_type
+ * Type of the RTDP entry as per enum mlxsw_reg_rtdp_type.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rtdp, type, 0x00, 28, 4);
+
+/* reg_rtdp_tunnel_index
+ * Index to the Decap entry.
+ * For Spectrum, Index to KVD Linear.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, rtdp, tunnel_index, 0x00, 0, 24);
+
+/* IPinIP */
+
+/* reg_rtdp_ipip_irif
+ * Ingress Router Interface for the overlay router
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rtdp, ipip_irif, 0x04, 16, 16);
+
+enum mlxsw_reg_rtdp_ipip_sip_check {
+   /* No sip checks. */
+   MLXSW_REG_RTDP_IPIP_SIP_CHECK_NO,
+   /* Filter packet if underlay is not IPv4 or if underlay SIP does not
+* equal ipv4_usip.
+*/
+   MLXSW_REG_RTDP_IPIP_SIP_CHECK_FILTER_IPV4,
+   /* Filter packet if underlay is not IPv6 or if underlay SIP does not
+* equal ipv6_usip.
+*/
+   MLXSW_REG_RTDP_IPIP_SIP_CHECK_FILTER_IPV6 = 3,
+};
+
+/* reg_rtdp_ipip_sip_check
+ * SIP check to perform. If decapsulation failed due to these configurations
+ * then trap_id is IPIP_DECAP_ERROR.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rtdp, ipip_sip_check, 0x04, 0, 3);
+
+/* If set, allow decapsulation of IPinIP (without GRE). */
+#define MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_IPIP  BIT(0)
+/* If set, allow decapsulation of IPinGREinIP without a key. */
+#define MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_GRE   BIT(1)
+/* If set, allow decapsulation of IPinGREinIP with a key. */
+#define MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_GRE_KEY   BIT(2)
+
+/* reg_rtdp_ipip_type_check
+ * Flags as per MLXSW_REG_RTDP_IPIP_TYPE_CHECK_*. If decapsulation failed due 
to
+ * these configurations then trap_id is IPIP_DECAP_ERROR.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rtdp, ipip_type_check, 0x08, 24, 3);
+
+/* reg_rtdp_ipip_gre_key_check
+ * Whether GRE key should be checked. When check is enabled:
+ * - A packet received as IPinIP (without GRE) will always pass.
+ * - A packet received as IPinGREinIP without a key will not pass the check.
+ * - A packet received as IPinGREinIP with a key will pass the check only if 
the
+ *   key in the packet is equal to expected_gre_key.
+ * If decapsulation failed due to GRE key then trap_id is IPIP_DECAP_ERROR.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rtdp, ipip_gre_key_check, 0x08, 23, 1);
+
+/* reg_rtdp_ipip_ipv4_usip
+ * Underlay IPv4 address for ipv4 source address check.
+ * Reserved when sip_check is not '1'.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rtdp, ipip_ipv4_usip, 0x0C, 0, 32);
+
+/* reg_rtdp_ipip_ipv6_usip_ptr
+ * This field is valid when sip_check is "sipv6 check explicitly". This is a
+ * pointer to the IPv6 DIP which is configured by RIPS. For Spectrum, the index
+ * is to the KVD linear.
+ * Reserved when sip_check is not MLXSW_REG_RTDP_IPIP_SIP_CHECK_FILTER_IPV6.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rtdp, ipip_ipv6_usip_ptr, 0x10, 0, 24);
+
+/* reg_rtdp_ipip_expected_gre_key
+ * GRE key for checking.
+ * Reserved when gre_key_check is '0'.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rtdp, ipip_expected_gre_key, 0x14, 0, 32);
+
+static inline void mlxsw_reg_rtdp_pack(char *payload,
+  enum mlxsw_reg_rtdp_type type,
+  u32 tunnel_index)
+{
+   MLXSW_REG_ZERO(rtdp, payload);
+   mlxsw_reg_rtdp_type_set(payload, type);
+   

[patch net-next v2 03/21] mlxsw: reg: Move enum mlxsw_reg_ratr_trap_id

2017-09-02 Thread Jiri Pirko
From: Petr Machata 

This enum is used with reg_ratr_trap_id, so move it next to the register
definition.

While at it, drop the enumerator initializers.

Signed-off-by: Petr Machata 
Reviewed-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h 
b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 6a7757f..bf936b6 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -4423,17 +4423,17 @@ enum mlxsw_reg_ratr_trap_action {
  */
 MLXSW_ITEM32(reg, ratr, trap_action, 0x0C, 28, 4);
 
-enum mlxsw_reg_ratr_trap_id {
-   MLXSW_REG_RATR_TRAP_ID_RTR_EGRESS0 = 0,
-   MLXSW_REG_RATR_TRAP_ID_RTR_EGRESS1 = 1,
-};
-
 /* reg_ratr_adjacency_index_high
  * Bits 23:16 of the adjacency_index.
  * Access: Index
  */
 MLXSW_ITEM32(reg, ratr, adjacency_index_high, 0x0C, 16, 8);
 
+enum mlxsw_reg_ratr_trap_id {
+   MLXSW_REG_RATR_TRAP_ID_RTR_EGRESS0,
+   MLXSW_REG_RATR_TRAP_ID_RTR_EGRESS1,
+};
+
 /* reg_ratr_trap_id
  * Trap ID to be reported to CPU.
  * Trap-ID is RTR_EGRESS0 or RTR_EGRESS1.
-- 
2.9.3



[patch net-next v2 06/21] mlxsw: reg: Extract mlxsw_reg_ritr_mac_pack()

2017-09-02 Thread Jiri Pirko
From: Petr Machata 

Unlike other interface types, loopback RIFs do not have MAC address. So
drop the corresponding argument from mlxsw_reg_ritr_pack() and move it
to a new function. Call that from callers of mlxsw_reg_ritr_pack.

Signed-off-by: Petr Machata 
Reviewed-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h | 7 +--
 drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 7 ---
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h 
b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index a6eb96f..d80cf9b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -4263,8 +4263,7 @@ static inline void mlxsw_reg_ritr_sp_if_pack(char 
*payload, bool lag,
 
 static inline void mlxsw_reg_ritr_pack(char *payload, bool enable,
   enum mlxsw_reg_ritr_if_type type,
-  u16 rif, u16 vr_id, u16 mtu,
-  const char *mac)
+  u16 rif, u16 vr_id, u16 mtu)
 {
bool op = enable ? MLXSW_REG_RITR_RIF_CREATE : MLXSW_REG_RITR_RIF_DEL;
 
@@ -4280,6 +4279,10 @@ static inline void mlxsw_reg_ritr_pack(char *payload, 
bool enable,
mlxsw_reg_ritr_lb_en_set(payload, 1);
mlxsw_reg_ritr_virtual_router_set(payload, vr_id);
mlxsw_reg_ritr_mtu_set(payload, mtu);
+}
+
+static inline void mlxsw_reg_ritr_mac_pack(char *payload, const char *mac)
+{
mlxsw_reg_ritr_if_mac_memcpy_to(payload, mac);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index de15eac..3ddfbe3 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -4955,8 +4955,8 @@ static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif 
*rif, bool enable)
 
rif_subport = mlxsw_sp_rif_subport_rif(rif);
mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_SP_IF,
-   rif->rif_index, rif->vr_id, rif->dev->mtu,
-   rif->dev->dev_addr);
+   rif->rif_index, rif->vr_id, rif->dev->mtu);
+   mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
mlxsw_reg_ritr_sp_if_pack(ritr_pl, rif_subport->lag,
  rif_subport->lag ? rif_subport->lag_id :
 rif_subport->system_port,
@@ -4998,7 +4998,8 @@ static int mlxsw_sp_rif_vlan_fid_op(struct mlxsw_sp_rif 
*rif,
char ritr_pl[MLXSW_REG_RITR_LEN];
 
mlxsw_reg_ritr_pack(ritr_pl, enable, type, rif->rif_index, rif->vr_id,
-   rif->dev->mtu, rif->dev->dev_addr);
+   rif->dev->mtu);
+   mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
mlxsw_reg_ritr_fid_set(ritr_pl, type, vid_fid);
 
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
-- 
2.9.3




[patch net-next v2 14/21] mlxsw: spectrum_router: Make nexthops typed

2017-09-02 Thread Jiri Pirko
From: Petr Machata 

In the router, some next hops may reference an encapsulating netdevice,
such as GRE or IPIP. To properly offload these next hops, mlxsw needs to
keep track of whether a given next hop is a regular Ethernet entry, or
an IP-in-IP tunneling entry.

To facilitate this book-keeping, add a type field to struct
mlxsw_sp_nexthop. There is, as of this patch, only one next hop type:
MLXSW_SP_NEXTHOP_TYPE_ETH. Follow-up patches will introduce the IP-in-IP
variant.

There are several places where next hops are initialized in the IPv4
path. Instead of replicating the logic at every one of them, factor it
out to a function mlxsw_sp_nexthop4_type_init(). The corresponding fini
is actually protocol-neutral, so put it to mlxsw_sp_nexthop_type_fini(),
but create a corresponding protocoled _fini function that dispatches to
the protocol-neutral one.

The IPv6 path is simpler, but for symmetry with IPv4, create the same
suite of functions with corresponding logic.

Signed-off-by: Petr Machata 
Reviewed-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 .../net/ethernet/mellanox/mlxsw/spectrum_router.c  | 138 ++---
 1 file changed, 95 insertions(+), 43 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 563e803..53bdd0f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -1652,6 +1652,10 @@ static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp 
*mlxsw_sp,
}
 }
 
+enum mlxsw_sp_nexthop_type {
+   MLXSW_SP_NEXTHOP_TYPE_ETH,
+};
+
 struct mlxsw_sp_nexthop_key {
struct fib_nh *fib_nh;
 };
@@ -1676,7 +1680,10 @@ struct mlxsw_sp_nexthop {
   update:1; /* set indicates that MAC of this neigh should be
  * updated in HW
  */
-   struct mlxsw_sp_neigh_entry *neigh_entry;
+   enum mlxsw_sp_nexthop_type type;
+   union {
+   struct mlxsw_sp_neigh_entry *neigh_entry;
+   };
 };
 
 struct mlxsw_sp_nexthop_group {
@@ -1964,9 +1971,9 @@ static int mlxsw_sp_nexthop_mac_update(struct mlxsw_sp 
*mlxsw_sp, u32 adj_index,
 }
 
 static int
-mlxsw_sp_nexthop_group_mac_update(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_nexthop_group *nh_grp,
- bool reallocate)
+mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop_group *nh_grp,
+ bool reallocate)
 {
u32 adj_index = nh_grp->adj_index; /* base */
struct mlxsw_sp_nexthop *nh;
@@ -1982,8 +1989,12 @@ mlxsw_sp_nexthop_group_mac_update(struct mlxsw_sp 
*mlxsw_sp,
}
 
if (nh->update || reallocate) {
-   err = mlxsw_sp_nexthop_mac_update(mlxsw_sp,
- adj_index, nh);
+   switch (nh->type) {
+   case MLXSW_SP_NEXTHOP_TYPE_ETH:
+   err = mlxsw_sp_nexthop_mac_update
+   (mlxsw_sp, adj_index, nh);
+   break;
+   }
if (err)
return err;
nh->update = 0;
@@ -2071,8 +2082,7 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
/* Nothing was added or removed, so no need to reallocate. Just
 * update MAC on existing adjacency indexes.
 */
-   err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp,
-   false);
+   err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, false);
if (err) {
dev_warn(mlxsw_sp->bus_info->dev, "Failed to update 
neigh MAC in adjacency table.\n");
goto set_trap;
@@ -2099,7 +2109,7 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
nh_grp->adj_index_valid = 1;
nh_grp->adj_index = adj_index;
nh_grp->ecmp_size = ecmp_size;
-   err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp, true);
+   err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, true);
if (err) {
dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC 
in adjacency table.\n");
goto set_trap;
@@ -2287,6 +2297,48 @@ static bool mlxsw_sp_netdev_ipip_type(const struct 
mlxsw_sp *mlxsw_sp,
return false;
 }
 
+static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp,
+  struct mlxsw_sp_nexthop *nh)
+{
+   switch (nh->type) {
+   case MLXSW_SP_NEXTHOP_TYPE_ETH:
+   mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
+ 

[patch net-next v2 11/21] mlxsw: spectrum_router: Introduce loopback RIFs

2017-09-02 Thread Jiri Pirko
From: Petr Machata 

When offloading L3 tunnels, an adjacency entry is created that loops the
packet back into the underlay router. Loopback interfaces then hold the
corresponding information and are created for IP-in-IP netdevices.

Signed-off-by: Petr Machata 
Reviewed-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.h |   1 +
 .../net/ethernet/mellanox/mlxsw/spectrum_router.c  | 152 -
 .../net/ethernet/mellanox/mlxsw/spectrum_router.h  |  13 ++
 3 files changed, 164 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index f8c7f7e..84ce83a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -77,6 +77,7 @@ enum mlxsw_sp_rif_type {
MLXSW_SP_RIF_TYPE_SUBPORT,
MLXSW_SP_RIF_TYPE_VLAN,
MLXSW_SP_RIF_TYPE_FID,
+   MLXSW_SP_RIF_TYPE_IPIP_LB, /* IP-in-IP loopback. */
MLXSW_SP_RIF_TYPE_MAX,
 };
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 38477c5..72e386b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -1,9 +1,10 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
- * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016-2017 Mellanox Technologies. All rights reserved.
  * Copyright (c) 2016 Jiri Pirko 
  * Copyright (c) 2016 Ido Schimmel 
  * Copyright (c) 2016 Yotam Gigi 
+ * Copyright (c) 2017 Petr Machata 
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -51,6 +52,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -131,6 +133,17 @@ struct mlxsw_sp_rif_subport {
bool lag;
 };
 
+struct mlxsw_sp_rif_ipip_lb {
+   struct mlxsw_sp_rif common;
+   struct mlxsw_sp_rif_ipip_lb_config lb_config;
+   u16 ul_vr_id; /* Reserved for Spectrum-2. */
+};
+
+struct mlxsw_sp_rif_params_ipip_lb {
+   struct mlxsw_sp_rif_params common;
+   struct mlxsw_sp_rif_ipip_lb_config lb_config;
+};
+
 struct mlxsw_sp_rif_ops {
enum mlxsw_sp_rif_type type;
size_t rif_size;
@@ -883,6 +896,25 @@ static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
kfree(mlxsw_sp->router->vrs);
 }
 
+static struct net_device *
+__mlxsw_sp_ipip_netdev_ul_dev_get(const struct net_device *ol_dev)
+{
+   struct ip_tunnel *tun = netdev_priv(ol_dev);
+   struct net *net = dev_net(ol_dev);
+
+   return __dev_get_by_index(net, tun->parms.link);
+}
+
+static u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev)
+{
+   struct net_device *d = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
+
+   if (d)
+   return l3mdev_fib_table(d) ? : RT_TABLE_MAIN;
+   else
+   return l3mdev_fib_table(ol_dev) ? : RT_TABLE_MAIN;
+}
+
 struct mlxsw_sp_neigh_key {
struct neighbour *n;
 };
@@ -2236,6 +2268,25 @@ static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp 
*mlxsw_sp,
neigh_release(n);
 }
 
+static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *dev,
+ enum mlxsw_sp_ipip_type *p_type)
+{
+   struct mlxsw_sp_router *router = mlxsw_sp->router;
+   const struct mlxsw_sp_ipip_ops *ipip_ops;
+   enum mlxsw_sp_ipip_type ipipt;
+
+   for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) {
+   ipip_ops = router->ipip_ops_arr[ipipt];
+   if (dev->type == ipip_ops->dev_type) {
+   if (p_type)
+   *p_type = ipipt;
+   return true;
+   }
+   }
+   return false;
+}
+
 static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
  struct mlxsw_sp_nexthop_group *nh_grp,
  struct mlxsw_sp_nexthop *nh,
@@ -4374,7 +4425,10 @@ mlxsw_sp_dev_rif_type(const struct mlxsw_sp *mlxsw_sp,
 {
enum mlxsw_sp_fid_type type;
 
-   /* RIF type is derived from the type of the underlying FID */
+   if (mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL))
+   return MLXSW_SP_RIF_TYPE_IPIP_LB;
+
+   /* Otherwise RIF type is derived from the type of the underlying FID. */
if (is_vlan_dev(dev) && netif_is_bridge_master(vlan_dev_real_dev(dev)))
type = MLXSW_SP_FID_TYPE_8021Q;
else if (netif_is_bridge_master(dev) && br_vlan_enabled(dev))
@@ -5164,10 +5218,104 @@ static 

[patch net-next v2 08/21] mlxsw: spectrum_router: Publish mlxsw_sp_l3proto

2017-09-02 Thread Jiri Pirko
From: Petr Machata 

The spectrum_ipip module that will be introduced in the follow-up
patches needs to know the data type.

Signed-off-by: Petr Machata 
Reviewed-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 5 -
 drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h | 5 +
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 4e47d45..f866ac5 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -405,11 +405,6 @@ struct mlxsw_sp_rt6 {
struct rt6_info *rt;
 };
 
-enum mlxsw_sp_l3proto {
-   MLXSW_SP_L3_PROTO_IPV4,
-   MLXSW_SP_L3_PROTO_IPV6,
-};
-
 struct mlxsw_sp_lpm_tree {
u8 id; /* tree ID */
unsigned int ref_count;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
index 87a04af..67c6c1d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
@@ -37,6 +37,11 @@
 
 #include "spectrum.h"
 
+enum mlxsw_sp_l3proto {
+   MLXSW_SP_L3_PROTO_IPV4,
+   MLXSW_SP_L3_PROTO_IPV6,
+};
+
 enum mlxsw_sp_rif_counter_dir {
MLXSW_SP_RIF_COUNTER_INGRESS,
MLXSW_SP_RIF_COUNTER_EGRESS,
-- 
2.9.3



[patch net-next v2 13/21] mlxsw: spectrum_router: Extract mlxsw_sp_rt6_is_gateway()

2017-09-02 Thread Jiri Pirko
From: Petr Machata 

IPv6 counterpart of the previous patch: introduce a function to
determine whether a given route is a gateway route.

The new function takes a mlxsw_sp argument which follow-up patches will
use. Thus mlxsw_sp_fib6_entry_type_set() got that argument as well.

Signed-off-by: Petr Machata 
Reviewed-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 15 +++
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 04e1887..563e803 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -3526,6 +3526,12 @@ static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp 
*mlxsw_sp,
mlxsw_sp_nexthop_rif_fini(nh);
 }
 
+static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
+   const struct rt6_info *rt)
+{
+   return rt->rt6i_flags & RTF_GATEWAY;
+}
+
 static struct mlxsw_sp_nexthop_group *
 mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp,
   struct mlxsw_sp_fib6_entry *fib6_entry)
@@ -3548,7 +3554,7 @@ mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp,
 #endif
mlxsw_sp_rt6 = list_first_entry(_entry->rt6_list,
struct mlxsw_sp_rt6, list);
-   nh_grp->gateway = !!(mlxsw_sp_rt6->rt->rt6i_flags & RTF_GATEWAY);
+   nh_grp->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt);
nh_grp->count = fib6_entry->nrt6;
for (i = 0; i < nh_grp->count; i++) {
struct rt6_info *rt = mlxsw_sp_rt6->rt;
@@ -3705,7 +3711,8 @@ mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
 }
 
-static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp_fib_entry *fib_entry,
+static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
+struct mlxsw_sp_fib_entry *fib_entry,
 const struct rt6_info *rt)
 {
/* Packets hitting RTF_REJECT routes need to be discarded by the
@@ -3718,7 +3725,7 @@ static void mlxsw_sp_fib6_entry_type_set(struct 
mlxsw_sp_fib_entry *fib_entry,
fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
else if (rt->rt6i_flags & RTF_REJECT)
fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
-   else if (rt->rt6i_flags & RTF_GATEWAY)
+   else if (mlxsw_sp_rt6_is_gateway(mlxsw_sp, rt))
fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
else
fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
@@ -3758,7 +3765,7 @@ mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
goto err_rt6_create;
}
 
-   mlxsw_sp_fib6_entry_type_set(fib_entry, mlxsw_sp_rt6->rt);
+   mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, mlxsw_sp_rt6->rt);
 
INIT_LIST_HEAD(_entry->rt6_list);
list_add_tail(_sp_rt6->list, _entry->rt6_list);
-- 
2.9.3



[patch net-next v2 16/21] mlxsw: spectrum_router: Support IPv6 overlay encap

2017-09-02 Thread Jiri Pirko
From: Petr Machata 

Add the missing bits to recognize IPv6 next hops as IPIP ones to enable
offloading of IPv6 overlay encapsulation.

Signed-off-by: Petr Machata 
Reviewed-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 .../net/ethernet/mellanox/mlxsw/spectrum_router.c| 20 +++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 231b597..05afd53 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -3757,15 +3757,32 @@ mlxsw_sp_fib6_entry_rt_find(const struct 
mlxsw_sp_fib6_entry *fib6_entry,
return NULL;
 }
 
+static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
+   const struct rt6_info *rt,
+   enum mlxsw_sp_ipip_type *ret)
+{
+   return rt->dst.dev &&
+  mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->dst.dev, ret);
+}
+
 static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
   struct mlxsw_sp_nexthop_group *nh_grp,
   struct mlxsw_sp_nexthop *nh,
   const struct rt6_info *rt)
 {
+   struct mlxsw_sp_router *router = mlxsw_sp->router;
struct net_device *dev = rt->dst.dev;
+   enum mlxsw_sp_ipip_type ipipt;
struct mlxsw_sp_rif *rif;
int err;
 
+   if (mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, ) &&
+   router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, dev,
+MLXSW_SP_L3_PROTO_IPV6)) {
+   nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
+   return mlxsw_sp_nexthop_ipip_init(mlxsw_sp, ipipt, nh, dev);
+   }
+
nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
if (!rif)
@@ -3815,7 +3832,8 @@ static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp 
*mlxsw_sp,
 static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
const struct rt6_info *rt)
 {
-   return rt->rt6i_flags & RTF_GATEWAY;
+   return rt->rt6i_flags & RTF_GATEWAY ||
+  mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
 }
 
 static struct mlxsw_sp_nexthop_group *
-- 
2.9.3



[patch net-next v2 12/21] mlxsw: spectrum_router: Extract mlxsw_sp_fi_is_gateway()

2017-09-02 Thread Jiri Pirko
From: Petr Machata 

For IPv4 IP-in-IP offload, routes that direct traffic to IP-in-IP
devices need to be considered gateway routes as well. That involves a
bit more logic, so extract the current test to a separate function,
where the logic can be later added.

Signed-off-by: Petr Machata 
Reviewed-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 14 ++
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 72e386b..04e1887 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -2382,6 +2382,12 @@ static void mlxsw_sp_nexthop_rif_gone_sync(struct 
mlxsw_sp *mlxsw_sp,
}
 }
 
+static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp,
+  const struct fib_info *fi)
+{
+   return fi->fib_nh->nh_scope == RT_SCOPE_LINK;
+}
+
 static struct mlxsw_sp_nexthop_group *
 mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
 {
@@ -2401,7 +2407,7 @@ mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, 
struct fib_info *fi)
INIT_LIST_HEAD(_grp->fib_list);
nh_grp->neigh_tbl = _tbl;
 
-   nh_grp->gateway = fi->fib_nh->nh_scope == RT_SCOPE_LINK;
+   nh_grp->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, fi);
nh_grp->count = fi->fib_nhs;
fib_info_hold(fi);
for (i = 0; i < nh_grp->count; i++) {
@@ -2801,10 +2807,10 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
return 0;
case RTN_UNICAST:
-   if (fi->fib_nh->nh_scope != RT_SCOPE_LINK)
-   fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
-   else
+   if (mlxsw_sp_fi_is_gateway(mlxsw_sp, fi))
fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
+   else
+   fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
return 0;
default:
return -EINVAL;
-- 
2.9.3



[patch net-next v2 09/21] mlxsw: spectrum_router: Add mlxsw_sp_ipip_ops

2017-09-02 Thread Jiri Pirko
From: Petr Machata 

Details of individual tunnel types are kept in an array of
mlxsw_sp_ipip_ops objects. Follow-up patches will use the list to
determine whether a constructed RIF should be a loopback, and to decide
whether a next hop references a tunnel.

The list is currently empty, follow-up patches will add support for GRE.

Signed-off-by: Petr Machata 
Reviewed-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 drivers/net/ethernet/mellanox/mlxsw/Makefile   |  4 +-
 .../net/ethernet/mellanox/mlxsw/spectrum_ipip.c| 38 
 .../net/ethernet/mellanox/mlxsw/spectrum_ipip.h| 51 ++
 .../net/ethernet/mellanox/mlxsw/spectrum_router.c  | 19 
 4 files changed, 110 insertions(+), 2 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
 create mode 100644 drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h

diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile 
b/drivers/net/ethernet/mellanox/mlxsw/Makefile
index f9956f3..891ff41 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Makefile
+++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile
@@ -16,8 +16,8 @@ mlxsw_spectrum-objs   := spectrum.o 
spectrum_buffers.o \
   spectrum_switchdev.o spectrum_router.o \
   spectrum_kvdl.o spectrum_acl_tcam.o \
   spectrum_acl.o spectrum_flower.o \
-  spectrum_cnt.o \
-  spectrum_fid.o
+  spectrum_cnt.o spectrum_fid.o \
+  spectrum_ipip.o
 mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB)+= spectrum_dcb.o
 mlxsw_spectrum-$(CONFIG_NET_DEVLINK) += spectrum_dpipe.o
 obj-$(CONFIG_MLXSW_MINIMAL)+= mlxsw_minimal.o
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
new file mode 100644
index 000..20b0b96
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
@@ -0,0 +1,38 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017 Petr Machata 
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *notice, this list of conditions and the following disclaimer in the
+ *documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *contributors may be used to endorse or promote products derived from
+ *this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spectrum_ipip.h"
+
+const struct mlxsw_sp_ipip_ops *mlxsw_sp_ipip_ops_arr[] = {
+};
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
new file mode 100644
index 000..65eb493
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
@@ -0,0 +1,51 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017 Petr Machata 
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *notice, this list 

[patch net-next v2 17/21] mlxsw: spectrum_router: Support IPv4 underlay decap

2017-09-02 Thread Jiri Pirko
From: Petr Machata 

Unlike encapsulation, which is represented by a next hop forwarding to
an IPIP tunnel, decapsulation is a type of local route. It is created
for local routes whose prefix corresponds to the local address of one of
offloaded IPIP tunnels. When the tunnel is removed (i.e. all the encap
next hops are removed), the decap offload is migrated back to a trap for
resolution in slow path.

This patch assumes that decap route is already present when encap route
is added. A follow-up patch will fix this issue.

Note that this patch only supports IPv4 underlay. Support for IPv6
underlay will be subject to follow-up work apart from this patchset.

Signed-off-by: Petr Machata 
Reviewed-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 .../net/ethernet/mellanox/mlxsw/spectrum_ipip.h|   7 +
 .../net/ethernet/mellanox/mlxsw/spectrum_router.c  | 141 -
 .../net/ethernet/mellanox/mlxsw/spectrum_router.h  |   3 +
 3 files changed, 146 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
index 7b40aa2..cd986bd 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
@@ -36,6 +36,7 @@
 #define _MLXSW_IPIP_H_
 
 #include "spectrum_router.h"
+#include 
 
 enum mlxsw_sp_ipip_type {
MLXSW_SP_IPIP_TYPE_MAX,
@@ -46,6 +47,7 @@ struct mlxsw_sp_ipip_entry {
struct net_device *ol_dev; /* Overlay. */
struct mlxsw_sp_rif_ipip_lb *ol_lb;
unsigned int ref_count; /* Number of next hops using the tunnel. */
+   struct mlxsw_sp_fib_entry *decap_fib_entry;
struct list_head ipip_list_node;
 };
 
@@ -64,6 +66,11 @@ struct mlxsw_sp_ipip_ops {
struct mlxsw_sp_rif_ipip_lb_config
(*ol_loopback_config)(struct mlxsw_sp *mlxsw_sp,
  const struct net_device *ol_dev);
+
+   int (*fib_entry_op)(struct mlxsw_sp *mlxsw_sp,
+   struct mlxsw_sp_ipip_entry *ipip_entry,
+   enum mlxsw_reg_ralue_op op,
+   u32 tunnel_index);
 };
 
 extern const struct mlxsw_sp_ipip_ops *mlxsw_sp_ipip_ops_arr[];
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 05afd53..0d09f81 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -381,6 +381,14 @@ enum mlxsw_sp_fib_entry_type {
MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
+
+   /* This is a special case of local delivery, where a packet should be
+* decapsulated on reception. Note that there is no corresponding ENCAP,
+* because that's a type of next hop, not of FIB entry. (There can be
+* several next hops in a REMOTE entry, and some of them may be
+* encapsulating entries.)
+*/
+   MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP,
 };
 
 struct mlxsw_sp_nexthop_group;
@@ -394,12 +402,18 @@ struct mlxsw_sp_fib_node {
struct mlxsw_sp_fib_key key;
 };
 
+struct mlxsw_sp_fib_entry_decap {
+   struct mlxsw_sp_ipip_entry *ipip_entry;
+   u32 tunnel_index;
+};
+
 struct mlxsw_sp_fib_entry {
struct list_head list;
struct mlxsw_sp_fib_node *fib_node;
enum mlxsw_sp_fib_entry_type type;
struct list_head nexthop_group_node;
struct mlxsw_sp_nexthop_group *nh_group;
+   struct mlxsw_sp_fib_entry_decap decap; /* Valid for decap entries. */
 };
 
 struct mlxsw_sp_fib4_entry {
@@ -1031,6 +1045,48 @@ mlxsw_sp_ipip_entry_saddr_matches(struct mlxsw_sp 
*mlxsw_sp,
   mlxsw_sp_l3addr_eq(_saddr, );
 }
 
+static int
+mlxsw_sp_fib_entry_decap_init(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib_entry *fib_entry,
+ struct mlxsw_sp_ipip_entry *ipip_entry)
+{
+   u32 tunnel_index;
+   int err;
+
+   err = mlxsw_sp_kvdl_alloc(mlxsw_sp, 1, _index);
+   if (err)
+   return err;
+
+   ipip_entry->decap_fib_entry = fib_entry;
+   fib_entry->decap.ipip_entry = ipip_entry;
+   fib_entry->decap.tunnel_index = tunnel_index;
+   return 0;
+}
+
+static void mlxsw_sp_fib_entry_decap_fini(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib_entry *fib_entry)
+{
+   /* Unlink this node from the IPIP entry that it's the decap entry of. */
+   fib_entry->decap.ipip_entry->decap_fib_entry = NULL;
+   fib_entry->decap.ipip_entry = NULL;
+   mlxsw_sp_kvdl_free(mlxsw_sp, fib_entry->decap.tunnel_index);
+}
+
+static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
+struct mlxsw_sp_fib_entry *fib_entry);
+

[patch net-next v2 04/21] mlxsw: reg: Add mlxsw_reg_ralue_act_ip2me_tun_pack()

2017-09-02 Thread Jiri Pirko
From: Petr Machata 

To implement IP-in-IP decapsulation, Spectrum uses LPM entries of type
IP2ME with tunnel validity bit and tunnel pointer set. The necessary
register fields are already available, so add a function to pack the
RALUE as appropriate.

Signed-off-by: Petr Machata 
Reviewed-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h | 9 +
 1 file changed, 9 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h 
b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index bf936b6..24296cf 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -5054,6 +5054,15 @@ mlxsw_reg_ralue_act_ip2me_pack(char *payload)
MLXSW_REG_RALUE_ACTION_TYPE_IP2ME);
 }
 
+static inline void
+mlxsw_reg_ralue_act_ip2me_tun_pack(char *payload, u32 tunnel_ptr)
+{
+   mlxsw_reg_ralue_action_type_set(payload,
+   MLXSW_REG_RALUE_ACTION_TYPE_IP2ME);
+   mlxsw_reg_ralue_ip2me_v_set(payload, 1);
+   mlxsw_reg_ralue_ip2me_tunnel_ptr_set(payload, tunnel_ptr);
+}
+
 /* RAUHT - Router Algorithmic LPM Unicast Host Table Register
  * --
  * The RAUHT register is used to configure and query the Unicast Host table in
-- 
2.9.3



[patch net-next v2 19/21] mlxsw: spectrum: Register for IPIP_DECAP_ERROR trap

2017-09-02 Thread Jiri Pirko
From: Petr Machata 

These traps are generated for packets that fail checks for source IP,
encapsulation type, or GRE key. Trap these packets to CPU for follow-up
handling by the kernel, which will send ICMP destination unreachable
responses.

Signed-off-by: Petr Machata 
Reviewed-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 1 +
 drivers/net/ethernet/mellanox/mlxsw/trap.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 992cbfa..ed7cd6c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -3400,6 +3400,7 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
MLXSW_SP_RXL_MARK(HOST_MISS_IPV6, TRAP_TO_CPU, HOST_MISS, false),
MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV4, TRAP_TO_CPU, ROUTER_EXP, false),
MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV6, TRAP_TO_CPU, ROUTER_EXP, false),
+   MLXSW_SP_RXL_MARK(IPIP_DECAP_ERROR, TRAP_TO_CPU, ROUTER_EXP, false),
/* PKT Sample trap */
MLXSW_RXL(mlxsw_sp_rx_listener_sample_func, PKT_SAMPLE, MIRROR_TO_CPU,
  false, SP_IP2ME, DISCARD),
diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h 
b/drivers/net/ethernet/mellanox/mlxsw/trap.h
index 6165239..f396a1f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/trap.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h
@@ -85,6 +85,7 @@ enum {
MLXSW_TRAP_ID_HOST_MISS_IPV4 = 0x90,
MLXSW_TRAP_ID_IPV6_MC_LINK_LOCAL_DEST = 0x91,
MLXSW_TRAP_ID_HOST_MISS_IPV6 = 0x92,
+   MLXSW_TRAP_ID_IPIP_DECAP_ERROR = 0xB1,
MLXSW_TRAP_ID_ROUTER_ALERT_IPV4 = 0xD6,
MLXSW_TRAP_ID_ROUTER_ALERT_IPV6 = 0xD7,
MLXSW_TRAP_ID_ACL0 = 0x1C0,
-- 
2.9.3



[patch net-next v2 01/21] mlxsw: reg: Update RITR to support loopback device

2017-09-02 Thread Jiri Pirko
From: Petr Machata 

Update the register so that loopback RIFs can be created and loopback
properties specified.

Signed-off-by: Petr Machata 
Reviewed-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h | 88 +++
 1 file changed, 88 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h 
b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 11e290c..8736f8492 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -3998,6 +3998,8 @@ enum mlxsw_reg_ritr_if_type {
MLXSW_REG_RITR_FID_IF,
/* Sub-port interface. */
MLXSW_REG_RITR_SP_IF,
+   /* Loopback Interface. */
+   MLXSW_REG_RITR_LOOPBACK_IF,
 };
 
 /* reg_ritr_type
@@ -4129,6 +4131,67 @@ MLXSW_ITEM32(reg, ritr, sp_if_system_port, 0x08, 0, 16);
  */
 MLXSW_ITEM32(reg, ritr, sp_if_vid, 0x18, 0, 12);
 
+/* Loopback Interface */
+
+enum mlxsw_reg_ritr_loopback_protocol {
+   /* IPinIP IPv4 underlay Unicast */
+   MLXSW_REG_RITR_LOOPBACK_PROTOCOL_IPIP_IPV4,
+   /* IPinIP IPv6 underlay Unicast */
+   MLXSW_REG_RITR_LOOPBACK_PROTOCOL_IPIP_IPV6,
+};
+
+/* reg_ritr_loopback_protocol
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, loopback_protocol, 0x08, 28, 4);
+
+enum mlxsw_reg_ritr_loopback_ipip_type {
+   /* Tunnel is IPinIP. */
+   MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_IP,
+   /* Tunnel is GRE, no key. */
+   MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_GRE_IN_IP,
+   /* Tunnel is GRE, with a key. */
+   MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_GRE_KEY_IN_IP,
+};
+
+/* reg_ritr_loopback_ipip_type
+ * Encapsulation type.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, loopback_ipip_type, 0x10, 24, 4);
+
+enum mlxsw_reg_ritr_loopback_ipip_options {
+   /* The key is defined by gre_key. */
+   MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET,
+};
+
+/* reg_ritr_loopback_ipip_options
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, loopback_ipip_options, 0x10, 20, 4);
+
+/* reg_ritr_loopback_ipip_uvr
+ * Underlay Virtual Router ID.
+ * Range is 0..cap_max_virtual_routers-1.
+ * Reserved for Spectrum-2.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, loopback_ipip_uvr, 0x10, 0, 16);
+
+/* reg_ritr_loopback_ipip_usip*
+ * Encapsulation Underlay source IP.
+ * Access: RW
+ */
+MLXSW_ITEM_BUF(reg, ritr, loopback_ipip_usip6, 0x18, 16);
+MLXSW_ITEM32(reg, ritr, loopback_ipip_usip4, 0x24, 0, 32);
+
+/* reg_ritr_loopback_ipip_gre_key
+ * GRE Key.
+ * Reserved when ipip_type is not IP_IN_GRE_KEY_IN_IP.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, loopback_ipip_gre_key, 0x28, 0, 32);
+
 /* Shared between ingress/egress */
 enum mlxsw_reg_ritr_counter_set_type {
/* No Count. */
@@ -4219,6 +4282,31 @@ static inline void mlxsw_reg_ritr_pack(char *payload, 
bool enable,
mlxsw_reg_ritr_if_mac_memcpy_to(payload, mac);
 }
 
+static inline void
+mlxsw_reg_ritr_loopback_ipip_common_pack(char *payload,
+   enum mlxsw_reg_ritr_loopback_ipip_type ipip_type,
+   enum mlxsw_reg_ritr_loopback_ipip_options options,
+   u16 uvr_id, u32 gre_key)
+{
+   mlxsw_reg_ritr_loopback_ipip_type_set(payload, ipip_type);
+   mlxsw_reg_ritr_loopback_ipip_options_set(payload, options);
+   mlxsw_reg_ritr_loopback_ipip_uvr_set(payload, uvr_id);
+   mlxsw_reg_ritr_loopback_ipip_gre_key_set(payload, gre_key);
+}
+
+static inline void
+mlxsw_reg_ritr_loopback_ipip4_pack(char *payload,
+   enum mlxsw_reg_ritr_loopback_ipip_type ipip_type,
+   enum mlxsw_reg_ritr_loopback_ipip_options options,
+   u16 uvr_id, u32 usip, u32 gre_key)
+{
+   mlxsw_reg_ritr_loopback_protocol_set(payload,
+   MLXSW_REG_RITR_LOOPBACK_PROTOCOL_IPIP_IPV4);
+   mlxsw_reg_ritr_loopback_ipip_common_pack(payload, ipip_type, options,
+uvr_id, gre_key);
+   mlxsw_reg_ritr_loopback_ipip_usip4_set(payload, usip);
+}
+
 /* RATR - Router Adjacency Table Register
  * --
  * The RATR register is used to configure the Router Adjacency (next-hop)
-- 
2.9.3



[patch net-next v2 18/21] mlxsw: spectrum_router: Use existing decap route

2017-09-02 Thread Jiri Pirko
From: Petr Machata 

The local route that points at IPIP's underlay device (decap route) can
be present long before the GRE device. Thus when an encap route is
added, it's necessary to look inside the underlay FIB if the decap route
is already present. If so, the current trap offload needs to be
withdrawn and replaced with a decap offload.

Signed-off-by: Petr Machata 
Reviewed-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 .../net/ethernet/mellanox/mlxsw/spectrum_router.c  | 80 --
 1 file changed, 76 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 0d09f81..2189cf0 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -1072,6 +1072,9 @@ static void mlxsw_sp_fib_entry_decap_fini(struct mlxsw_sp 
*mlxsw_sp,
mlxsw_sp_kvdl_free(mlxsw_sp, fib_entry->decap.tunnel_index);
 }
 
+static struct mlxsw_sp_fib_node *
+mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
+size_t addr_len, unsigned char prefix_len);
 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
 struct mlxsw_sp_fib_entry *fib_entry);
 
@@ -1087,6 +1090,73 @@ mlxsw_sp_ipip_entry_demote_decap(struct mlxsw_sp 
*mlxsw_sp,
mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
 }
 
+static void
+mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_ipip_entry *ipip_entry,
+ struct mlxsw_sp_fib_entry *decap_fib_entry)
+{
+   if (mlxsw_sp_fib_entry_decap_init(mlxsw_sp, decap_fib_entry,
+ ipip_entry))
+   return;
+   decap_fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
+
+   if (mlxsw_sp_fib_entry_update(mlxsw_sp, decap_fib_entry))
+   mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
+}
+
+/* Given an IPIP entry, find the corresponding decap route. */
+static struct mlxsw_sp_fib_entry *
+mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
+  struct mlxsw_sp_ipip_entry *ipip_entry)
+{
+   static struct mlxsw_sp_fib_node *fib_node;
+   const struct mlxsw_sp_ipip_ops *ipip_ops;
+   struct mlxsw_sp_fib_entry *fib_entry;
+   unsigned char saddr_prefix_len;
+   union mlxsw_sp_l3addr saddr;
+   struct mlxsw_sp_fib *ul_fib;
+   struct mlxsw_sp_vr *ul_vr;
+   const void *saddrp;
+   size_t saddr_len;
+   u32 ul_tb_id;
+   u32 saddr4;
+
+   ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
+
+   ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
+   ul_vr = mlxsw_sp_vr_find(mlxsw_sp, ul_tb_id);
+   if (!ul_vr)
+   return NULL;
+
+   ul_fib = mlxsw_sp_vr_fib(ul_vr, ipip_ops->ul_proto);
+   saddr = mlxsw_sp_ipip_netdev_saddr(ipip_ops->ul_proto,
+  ipip_entry->ol_dev);
+
+   switch (ipip_ops->ul_proto) {
+   case MLXSW_SP_L3_PROTO_IPV4:
+   saddr4 = be32_to_cpu(saddr.addr4);
+   saddrp = 
+   saddr_len = 4;
+   saddr_prefix_len = 32;
+   break;
+   case MLXSW_SP_L3_PROTO_IPV6:
+   WARN_ON(1);
+   return NULL;
+   }
+
+   fib_node = mlxsw_sp_fib_node_lookup(ul_fib, saddrp, saddr_len,
+   saddr_prefix_len);
+   if (!fib_node || list_empty(_node->entry_list))
+   return NULL;
+
+   fib_entry = list_first_entry(_node->entry_list,
+struct mlxsw_sp_fib_entry, list);
+   if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP)
+   return NULL;
+
+   return fib_entry;
+}
+
 static struct mlxsw_sp_ipip_entry *
 mlxsw_sp_ipip_entry_get(struct mlxsw_sp *mlxsw_sp,
enum mlxsw_sp_ipip_type ipipt,
@@ -1094,6 +1164,7 @@ mlxsw_sp_ipip_entry_get(struct mlxsw_sp *mlxsw_sp,
 {
u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
struct mlxsw_sp_router *router = mlxsw_sp->router;
+   struct mlxsw_sp_fib_entry *decap_fib_entry;
struct mlxsw_sp_ipip_entry *ipip_entry;
enum mlxsw_sp_l3proto ul_proto;
union mlxsw_sp_l3addr saddr;
@@ -1118,6 +1189,11 @@ mlxsw_sp_ipip_entry_get(struct mlxsw_sp *mlxsw_sp,
if (IS_ERR(ipip_entry))
return ipip_entry;
 
+   decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry);
+   if (decap_fib_entry)
+   mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry,
+ decap_fib_entry);
+
list_add_tail(_entry->ipip_list_node,
  

[patch net-next v2 00/21] mlxsw: Offloading GRE tunnels

2017-09-02 Thread Jiri Pirko
From: Jiri Pirko 

Petr says:

This patch series introduces to mlxsw driver support for offloading
IP-in-IP tunnels in general, and for (subset of) GRE in particular.

This patchset supports two ways of configuring GRE:

- So called "hierarchical configuration", where the GRE device has a bound
  dummy device, which is in a different VRF. The VRF with host traffic is
  called "overlay", the one with encapsulated traffic is called "underlay".

- So called "flat configuration", where the GRE device doesn't have a bound
  device, and overlay and underlay are both in the same VRF (possibly the
  default one).

Two routes are then interesting: a route that directs traffic to a GRE
device (which would typically be in overlay VRF, but could be in another
one), and a local route for the tunnel's local address (in underlay).
Handling of these two route types is then introduced as patches to support,
respectively, IPv4 and IPv6 encapsulation and IPv4 decapsulation.

The encap and decap routes then reference a loopback device, a new type of
RIF introduced by this patchset for the specific use of offloading tunnels.

The encap and decap code is abstract with respect to the particulars of
individual L3 tunnel types. This patchset introduces support for GRE
tunnels in particular.

Limitations:

- Each tunnel needs to have a different local address (within a given VRF).
  When two tunnels are used that are in conflict, FIB abort is triggered
  and the driver ceases offloading FIBs. Full handling of such
  configurations needs special setup in the hardware, such that the tunnels
  that share an address are dispatched correctly according to their key (or
  lack thereof). That's currently not implemented, and to keep things
  deterministic, the driver triggers FIB abort.

- A next hop that uses an incompletely-specified tunnel (e.g. such that are
  used for LWT) is not offloaded, but doesn't trigger FIB abort like the
  above. If such routes end up being in a de facto conflict with other
  tunnels, then if there already is an offload for that address, the
  traffic for the conflicting tunnel will end up mismatching the
  configuration of the offloaded tunnel, and thus gets to slow path through
  an error trap.

- GRE checksumming and sequence numbers are not supported and TTL and TOS
  need to be set to inherit. Tunnels with a different configuration are not
  offloaded and their traffic is trapping to slow path.

  Note in particular that TOS of inherit is not the default configuration
  and needs to be explicitly specified when the tunnel is created.

- The only feature that is not graciously handled is that if a change is
  made to the tunnel, e.g. through "ip tunnel change", such changes are not
  reflected in the driver. There is currently no notification mechanism for
  these changes. Introduction of this mechanism and its leverage in the
  driver will be subject of follow-up work. For now this limitation can be
  worked around by removing and re-adding the encap route.

---
v1->v2:
-fix order of patch 5

Petr Machata (21):
  mlxsw: reg: Update RITR to support loopback device
  mlxsw: reg: Update RATR to support IP-in-IP tunnels
  mlxsw: reg: Move enum mlxsw_reg_ratr_trap_id
  mlxsw: reg: Add mlxsw_reg_ralue_act_ip2me_tun_pack()
  mlxsw: reg: Add Routing Tunnel Decap Properties Register
  mlxsw: reg: Extract mlxsw_reg_ritr_mac_pack()
  mlxsw: reg: Give mlxsw_reg_ratr_pack a type parameter
  mlxsw: spectrum_router: Publish mlxsw_sp_l3proto
  mlxsw: spectrum_router: Add mlxsw_sp_ipip_ops
  mlxsw: spectrum_router: Support FID-less RIFs
  mlxsw: spectrum_router: Introduce loopback RIFs
  mlxsw: spectrum_router: Extract mlxsw_sp_fi_is_gateway()
  mlxsw: spectrum_router: Extract mlxsw_sp_rt6_is_gateway()
  mlxsw: spectrum_router: Make nexthops typed
  mlxsw: spectrum_router: Support IPv4 overlay encap
  mlxsw: spectrum_router: Support IPv6 overlay encap
  mlxsw: spectrum_router: Support IPv4 underlay decap
  mlxsw: spectrum_router: Use existing decap route
  mlxsw: spectrum: Register for IPIP_DECAP_ERROR trap
  mlxsw: spectrum_router: Add loopback accessors
  mlxsw: spectrum_router: Support GRE tunnels

 drivers/net/ethernet/mellanox/mlxsw/Makefile   |   4 +-
 drivers/net/ethernet/mellanox/mlxsw/reg.h  | 311 ++-
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c |   1 +
 drivers/net/ethernet/mellanox/mlxsw/spectrum.h |   1 +
 .../net/ethernet/mellanox/mlxsw/spectrum_ipip.c| 214 +
 .../net/ethernet/mellanox/mlxsw/spectrum_ipip.h|  79 ++
 .../net/ethernet/mellanox/mlxsw/spectrum_router.c  | 947 +++--
 .../net/ethernet/mellanox/mlxsw/spectrum_router.h  |  28 +
 drivers/net/ethernet/mellanox/mlxsw/trap.h |   1 +
 9 files changed, 1485 insertions(+), 101 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
 create mode 100644 drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h

-- 
2.9.3



[patch net-next v2 15/21] mlxsw: spectrum_router: Support IPv4 overlay encap

2017-09-02 Thread Jiri Pirko
From: Petr Machata 

This introduces some common code for tracking of offloaded IP-in-IP
tunnels, and support for offloading IPv4 overlay encapsulating routes in
particular. A follow-up patch will introduce IPv6 overlay as well.

Offloaded tunnels are kept in a linked list of mlxsw_sp_ipip_entry
objects hooked up in mlxsw_sp_router. A network device that represents
the tunnel is used as a key to look up the corresponding IPIP entry.
Note that in the future, more general keying mechanism will be needed,
because parts of the tunnel information can be provided by the route.

IPIP entries are reference counted, because several next hops may end up
using the same tunnel, and we only want to offload it once.

Encapsulation path hooks into next hop handling. Routes that forward to
a tunnel are now considered gateway routes, thus giving them the same
treatment that other remote routes get. An IPIP next hop type is
introduced.

Details of individual tunnel types are kept in an array of
mlxsw_sp_ipip_ops objects. If a tunnel type doesn't match any of the
known tunnel types, the next-hop is not considered an IPIP next hop.

The list of IPIP tunnel types is currently empty, follow-up patches will
add support for GRE. Traffic to IPIP tunnel types that are not
explicitly recognized by the driver traps and is handled in slow path.

Signed-off-by: Petr Machata 
Reviewed-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 .../net/ethernet/mellanox/mlxsw/spectrum_ipip.h|  20 ++
 .../net/ethernet/mellanox/mlxsw/spectrum_router.c  | 238 -
 2 files changed, 257 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
index 65eb493..7b40aa2 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
@@ -41,9 +41,29 @@ enum mlxsw_sp_ipip_type {
MLXSW_SP_IPIP_TYPE_MAX,
 };
 
+struct mlxsw_sp_ipip_entry {
+   enum mlxsw_sp_ipip_type ipipt;
+   struct net_device *ol_dev; /* Overlay. */
+   struct mlxsw_sp_rif_ipip_lb *ol_lb;
+   unsigned int ref_count; /* Number of next hops using the tunnel. */
+   struct list_head ipip_list_node;
+};
+
 struct mlxsw_sp_ipip_ops {
int dev_type;
enum mlxsw_sp_l3proto ul_proto; /* Underlay. */
+
+   int (*nexthop_update)(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
+ struct mlxsw_sp_ipip_entry *ipip_entry);
+
+   bool (*can_offload)(const struct mlxsw_sp *mlxsw_sp,
+   const struct net_device *ol_dev,
+   enum mlxsw_sp_l3proto ol_proto);
+
+   /* Return a configuration for creating an overlay loopback RIF. */
+   struct mlxsw_sp_rif_ipip_lb_config
+   (*ol_loopback_config)(struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *ol_dev);
 };
 
 extern const struct mlxsw_sp_ipip_ops *mlxsw_sp_ipip_ops_arr[];
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 53bdd0f..231b597 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -89,6 +89,7 @@ struct mlxsw_sp_router {
struct delayed_work nexthop_probe_dw;
 #define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */
struct list_head nexthop_neighs_list;
+   struct list_head ipip_list;
bool aborted;
struct notifier_block fib_nb;
const struct mlxsw_sp_rif_ops **rif_ops_arr;
@@ -915,6 +916,170 @@ static u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct 
net_device *ol_dev)
return l3mdev_fib_table(ol_dev) ? : RT_TABLE_MAIN;
 }
 
+static struct mlxsw_sp_rif *
+mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
+   const struct mlxsw_sp_rif_params *params);
+
+static struct mlxsw_sp_rif_ipip_lb *
+mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp,
+   enum mlxsw_sp_ipip_type ipipt,
+   struct net_device *ol_dev)
+{
+   struct mlxsw_sp_rif_params_ipip_lb lb_params;
+   const struct mlxsw_sp_ipip_ops *ipip_ops;
+   struct mlxsw_sp_rif *rif;
+
+   ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
+   lb_params = (struct mlxsw_sp_rif_params_ipip_lb) {
+   .common.dev = ol_dev,
+   .common.lag = false,
+   .lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev),
+   };
+
+   rif = mlxsw_sp_rif_create(mlxsw_sp, _params.common);
+   if (IS_ERR(rif))
+   return ERR_CAST(rif);
+   return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
+}
+
+static struct mlxsw_sp_ipip_entry *
+mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
+ enum mlxsw_sp_ipip_type 

[patch net-next v2 07/21] mlxsw: reg: Give mlxsw_reg_ratr_pack a type parameter

2017-09-02 Thread Jiri Pirko
From: Petr Machata 

To support IPIP, the driver needs to be able to construct an IPIP
adjacency. Change mlxsw_reg_ratr_pack to take an adjacency type as an
argument. Adjust the one existing caller.

Signed-off-by: Petr Machata 
Reviewed-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h | 2 ++
 drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 3 ++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h 
b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index d80cf9b..cc27c5d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -4483,11 +4483,13 @@ MLXSW_ITEM32(reg, ratr, ipip_ipv6_ptr, 0x1C, 0, 24);
 static inline void
 mlxsw_reg_ratr_pack(char *payload,
enum mlxsw_reg_ratr_op op, bool valid,
+   enum mlxsw_reg_ratr_type type,
u32 adjacency_index, u16 egress_rif)
 {
MLXSW_REG_ZERO(ratr, payload);
mlxsw_reg_ratr_op_set(payload, op);
mlxsw_reg_ratr_v_set(payload, valid);
+   mlxsw_reg_ratr_type_set(payload, type);
mlxsw_reg_ratr_adjacency_index_low_set(payload, adjacency_index);
mlxsw_reg_ratr_adjacency_index_high_set(payload, adjacency_index >> 16);
mlxsw_reg_ratr_egress_router_interface_set(payload, egress_rif);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 3ddfbe3..4e47d45 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -1928,7 +1928,8 @@ static int mlxsw_sp_nexthop_mac_update(struct mlxsw_sp 
*mlxsw_sp, u32 adj_index,
char ratr_pl[MLXSW_REG_RATR_LEN];
 
mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
-   true, adj_index, neigh_entry->rif);
+   true, MLXSW_REG_RATR_TYPE_ETHERNET,
+   adj_index, neigh_entry->rif);
mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
 }
-- 
2.9.3



[patch net-next v2 10/21] mlxsw: spectrum_router: Support FID-less RIFs

2017-09-02 Thread Jiri Pirko
From: Petr Machata 

Loopback RIFs, which will be introduced in a follow-up patch, differ
from other RIFs in that they do not have a FID associated with them.

To support this, demote FID allocation from mlxsw_sp_rif_create to
configure op of the existing RIF types, and likewise the FID release
from mlxsw_sp_rif_destroy to deconfigure op.

Signed-off-by: Petr Machata 
Reviewed-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 .../net/ethernet/mellanox/mlxsw/spectrum_router.c  | 85 --
 1 file changed, 63 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index f85d249..38477c5 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -,9 +,9 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
 {
u32 tb_id = l3mdev_fib_table(params->dev);
const struct mlxsw_sp_rif_ops *ops;
+   struct mlxsw_sp_fid *fid = NULL;
enum mlxsw_sp_rif_type type;
struct mlxsw_sp_rif *rif;
-   struct mlxsw_sp_fid *fid;
struct mlxsw_sp_vr *vr;
u16 rif_index;
int err;
@@ -4470,12 +4470,14 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
rif->mlxsw_sp = mlxsw_sp;
rif->ops = ops;
 
-   fid = ops->fid_get(rif);
-   if (IS_ERR(fid)) {
-   err = PTR_ERR(fid);
-   goto err_fid_get;
+   if (ops->fid_get) {
+   fid = ops->fid_get(rif);
+   if (IS_ERR(fid)) {
+   err = PTR_ERR(fid);
+   goto err_fid_get;
+   }
+   rif->fid = fid;
}
-   rif->fid = fid;
 
if (ops->setup)
ops->setup(rif, params);
@@ -4484,22 +4486,15 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
if (err)
goto err_configure;
 
-   err = mlxsw_sp_rif_fdb_op(mlxsw_sp, params->dev->dev_addr,
- mlxsw_sp_fid_index(fid), true);
-   if (err)
-   goto err_rif_fdb_op;
-
mlxsw_sp_rif_counters_alloc(rif);
-   mlxsw_sp_fid_rif_set(fid, rif);
mlxsw_sp->router->rifs[rif_index] = rif;
vr->rif_count++;
 
return rif;
 
-err_rif_fdb_op:
-   ops->deconfigure(rif);
 err_configure:
-   mlxsw_sp_fid_put(fid);
+   if (fid)
+   mlxsw_sp_fid_put(fid);
 err_fid_get:
kfree(rif);
 err_rif_alloc:
@@ -4520,12 +4515,11 @@ void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
 
vr->rif_count--;
mlxsw_sp->router->rifs[rif->rif_index] = NULL;
-   mlxsw_sp_fid_rif_set(fid, NULL);
mlxsw_sp_rif_counters_free(rif);
-   mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->dev->dev_addr,
-   mlxsw_sp_fid_index(fid), false);
ops->deconfigure(rif);
-   mlxsw_sp_fid_put(fid);
+   if (fid)
+   /* Loopback RIFs are not associated with a FID. */
+   mlxsw_sp_fid_put(fid);
kfree(rif);
mlxsw_sp_vr_put(vr);
 }
@@ -4965,11 +4959,32 @@ static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif 
*rif, bool enable)
 
 static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif)
 {
-   return mlxsw_sp_rif_subport_op(rif, true);
+   int err;
+
+   err = mlxsw_sp_rif_subport_op(rif, true);
+   if (err)
+   return err;
+
+   err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
+ mlxsw_sp_fid_index(rif->fid), true);
+   if (err)
+   goto err_rif_fdb_op;
+
+   mlxsw_sp_fid_rif_set(rif->fid, rif);
+   return 0;
+
+err_rif_fdb_op:
+   mlxsw_sp_rif_subport_op(rif, false);
+   return err;
 }
 
 static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif *rif)
 {
+   struct mlxsw_sp_fid *fid = rif->fid;
+
+   mlxsw_sp_fid_rif_set(fid, NULL);
+   mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
+   mlxsw_sp_fid_index(fid), false);
mlxsw_sp_rif_subport_op(rif, false);
 }
 
@@ -5028,8 +5043,17 @@ static int mlxsw_sp_rif_vlan_configure(struct 
mlxsw_sp_rif *rif)
if (err)
goto err_fid_bc_flood_set;
 
+   err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
+ mlxsw_sp_fid_index(rif->fid), true);
+   if (err)
+   goto err_rif_fdb_op;
+
+   mlxsw_sp_fid_rif_set(rif->fid, rif);
return 0;
 
+err_rif_fdb_op:
+   mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
+  mlxsw_sp_router_port(mlxsw_sp), false);
 err_fid_bc_flood_set:
mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
   mlxsw_sp_router_port(mlxsw_sp), false);
@@ -5040,9 +5064,13 @@ static int 

[patch net-next v2 20/21] mlxsw: spectrum_router: Add loopback accessors

2017-09-02 Thread Jiri Pirko
From: Petr Machata 

struct mlxsw_sp_rif is a router-private structure, and therefore
everything related to it is as well: parameters, and derived RIF types
including loopbacks. IPIP module needs access to some details of
loopback interfaces, but exporting all the RIF shebang would create too
large an interface.

So instead export just the bare minimum necessary: accessors for RIF
index and underlay VRF ID.

Signed-off-by: Petr Machata 
Reviewed-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 10 ++
 drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h |  3 +++
 2 files changed, 13 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 2189cf0..6068eea 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -5007,6 +5007,16 @@ u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif)
return rif->rif_index;
 }
 
+u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
+{
+   return lb_rif->common.rif_index;
+}
+
+u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
+{
+   return lb_rif->ul_vr_id;
+}
+
 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
 {
return rif->dev->ifindex;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
index 4276f5f..9632476 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
@@ -48,6 +48,7 @@ union mlxsw_sp_l3addr {
struct in6_addr addr6;
 };
 
+struct mlxsw_sp_rif_ipip_lb;
 struct mlxsw_sp_rif_ipip_lb_config {
enum mlxsw_reg_ritr_loopback_ipip_type lb_ipipt;
u32 okey;
@@ -65,6 +66,8 @@ struct mlxsw_sp_neigh_entry;
 struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
   u16 rif_index);
 u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif);
+u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *rif);
+u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *rif);
 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif);
 int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
   struct mlxsw_sp_rif *rif,
-- 
2.9.3



[patch net-next v2 21/21] mlxsw: spectrum_router: Support GRE tunnels

2017-09-02 Thread Jiri Pirko
From: Petr Machata 

This patch introduces callbacks and tunnel type to offload GRE tunnels.

Signed-off-by: Petr Machata 
Reviewed-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 .../net/ethernet/mellanox/mlxsw/spectrum_ipip.c| 176 +
 .../net/ethernet/mellanox/mlxsw/spectrum_ipip.h|   1 +
 .../net/ethernet/mellanox/mlxsw/spectrum_router.c  |  26 +++
 .../net/ethernet/mellanox/mlxsw/spectrum_router.h  |   4 +
 4 files changed, 207 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
index 20b0b96..702fe94 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
@@ -32,7 +32,183 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include 
+
 #include "spectrum_ipip.h"
 
+static bool
+mlxsw_sp_ipip_netdev_has_ikey(const struct net_device *ol_dev)
+{
+   struct ip_tunnel *tun = netdev_priv(ol_dev);
+
+   return !!(tun->parms.i_flags & TUNNEL_KEY);
+}
+
+static bool
+mlxsw_sp_ipip_netdev_has_okey(const struct net_device *ol_dev)
+{
+   struct ip_tunnel *tun = netdev_priv(ol_dev);
+
+   return !!(tun->parms.o_flags & TUNNEL_KEY);
+}
+
+static u32 mlxsw_sp_ipip_netdev_ikey(const struct net_device *ol_dev)
+{
+   struct ip_tunnel *tun = netdev_priv(ol_dev);
+
+   return mlxsw_sp_ipip_netdev_has_ikey(ol_dev) ?
+   be32_to_cpu(tun->parms.i_key) : 0;
+}
+
+static u32 mlxsw_sp_ipip_netdev_okey(const struct net_device *ol_dev)
+{
+   struct ip_tunnel *tun = netdev_priv(ol_dev);
+
+   return mlxsw_sp_ipip_netdev_has_okey(ol_dev) ?
+   be32_to_cpu(tun->parms.o_key) : 0;
+}
+
+static int
+mlxsw_sp_ipip_nexthop_update_gre4(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
+ struct mlxsw_sp_ipip_entry *ipip_entry)
+{
+   u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb);
+   __be32 daddr4 = mlxsw_sp_ipip_netdev_daddr4(ipip_entry->ol_dev);
+   char ratr_pl[MLXSW_REG_RATR_LEN];
+
+   mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
+   true, MLXSW_REG_RATR_TYPE_IPIP,
+   adj_index, rif_index);
+   mlxsw_reg_ratr_ipip4_entry_pack(ratr_pl, be32_to_cpu(daddr4));
+
+   return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
+}
+
+static int
+mlxsw_sp_ipip_fib_entry_op_gre4_rtdp(struct mlxsw_sp *mlxsw_sp,
+u32 tunnel_index,
+struct mlxsw_sp_ipip_entry *ipip_entry)
+{
+   bool has_ikey = mlxsw_sp_ipip_netdev_has_ikey(ipip_entry->ol_dev);
+   u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb);
+   u32 ikey = mlxsw_sp_ipip_netdev_ikey(ipip_entry->ol_dev);
+   char rtdp_pl[MLXSW_REG_RTDP_LEN];
+   unsigned int type_check;
+   u32 daddr4;
+
+   mlxsw_reg_rtdp_pack(rtdp_pl, MLXSW_REG_RTDP_TYPE_IPIP, tunnel_index);
+
+   type_check = has_ikey ?
+   MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_GRE_KEY :
+   MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_GRE;
+
+   /* Linux demuxes tunnels based on packet SIP (which must match tunnel
+* remote IP). Thus configure decap so that it filters out packets that
+* are not IPv4 or have the wrong SIP. IPIP_DECAP_ERROR trap is
+* generated for packets that fail this criterion. Linux then handles
+* such packets in slow path and generates ICMP destination unreachable.
+*/
+   daddr4 = be32_to_cpu(mlxsw_sp_ipip_netdev_daddr4(ipip_entry->ol_dev));
+   mlxsw_reg_rtdp_ipip4_pack(rtdp_pl, rif_index,
+ MLXSW_REG_RTDP_IPIP_SIP_CHECK_FILTER_IPV4,
+ type_check, has_ikey, daddr4, ikey);
+
+   return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtdp), rtdp_pl);
+}
+
+static int
+mlxsw_sp_ipip_fib_entry_op_gre4_ralue(struct mlxsw_sp *mlxsw_sp,
+ u32 dip, u8 prefix_len, u16 ul_vr_id,
+ enum mlxsw_reg_ralue_op op,
+ u32 tunnel_index)
+{
+   char ralue_pl[MLXSW_REG_RALUE_LEN];
+
+   mlxsw_reg_ralue_pack4(ralue_pl, MLXSW_REG_RALXX_PROTOCOL_IPV4, op,
+ ul_vr_id, prefix_len, dip);
+   mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl, tunnel_index);
+   return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
+}
+
+static int mlxsw_sp_ipip_fib_entry_op_gre4(struct mlxsw_sp *mlxsw_sp,
+   struct mlxsw_sp_ipip_entry *ipip_entry,
+   enum mlxsw_reg_ralue_op op,
+   u32 tunnel_index)
+{
+   u16 ul_vr_id = mlxsw_sp_ipip_lb_ul_vr_id(ipip_entry->ol_lb);
+   __be32 dip;
+   int err;
+
+ 

Re: [RFC PATCH v1 0/5] TCP Wave

2017-09-02 Thread Natale Patriciello
Hello all,
first of all, we would like to thank everyone that commented our
patches; we are working to include all the suggestions (we are at a good
point).

However, we have two questions:

1) How to retrieve information about each connection? Right now we used
debug messages, but we understand it isn't the best option. TCP Wave
users have other values to track rather than congestion window and slow
start threshold. It seems we have two alternatives: (a) use get_info,
that returns strings to be read with ss, (b) open a file under /proc/net
and write data to it, in the same way as tcp_probe do. With option (a)
it is necessary a poll from userspace (for instance, using watch), but
is subjected to delays and maybe not suitable for fast connections
(watch minimum interval is 100 ms). Option (b), toggled with a module
parameter, seems the more viable. Is that correct?

The second one is inline:

On 28/07/17 at 10:33pm, Eric Dumazet wrote:
> On Fri, 2017-07-28 at 21:59 +0200, Natale Patriciello wrote:
> > Hi,
[cut]
> > TCP Wave (TCPW) replaces the window-based transmission paradigm of the 
> > standard
> > TCP with a burst-based transmission, the ACK-clock scheduling with a
> > self-managed timer and the RTT-based congestion control loop with an 
> > Ack-based
> > Capacity and Congestion Estimation (ACCE) module. In non-technical words, it
> > sends data down the stack when its internal timer expires, and the timing of
> > the received ACKs contribute to updating this timer regularly.
>
> This patch series seems to have missed recent efforts in TCP stack,
> namely TCP pacing.
>
> commit 218af599fa635b107cfe10acf3249c4dfe5e4123 ("tcp: internal
> implementation for pacing") added a timer already to get fine grained
> packet xmits.

Thank you, Eric, for this suggestion; in fact, we had problems with our
implementation of the timer, and we would like to switch to the new
pacing timer entirely. However, a pacing approach is exactly the
opposite we would like to achieve: we want to send a burst of data
(let's say, ten segments) and then wait some amount of time. Do you
think that adding a new congestion control callback that returns the
number of segments to send when the timer expires (default to 1) and another
callback for retrieving the pacing time can be a sound strategy?

Thank you again, have a nice day.

Natale


[PATCH net-next v2 2/4] net: dsa: tag_brcm: Set output queue from skb queue mapping

2017-09-02 Thread Florian Fainelli
We originally used skb->priority but that was not quite correct as this
bitfield needs to contain the egress switch queue we intend to send this
SKB to.

Signed-off-by: Florian Fainelli 
---
 net/dsa/tag_brcm.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
index de74c3f77818..dbb016434ace 100644
--- a/net/dsa/tag_brcm.c
+++ b/net/dsa/tag_brcm.c
@@ -62,6 +62,7 @@
 static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, struct net_device 
*dev)
 {
struct dsa_slave_priv *p = netdev_priv(dev);
+   u16 queue = skb_get_queue_mapping(skb);
u8 *brcm_tag;
 
if (skb_cow_head(skb, BRCM_TAG_LEN) < 0)
@@ -78,7 +79,7 @@ static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, 
struct net_device *dev
 * deprecated
 */
brcm_tag[0] = (1 << BRCM_OPCODE_SHIFT) |
-   ((skb->priority << BRCM_IG_TC_SHIFT) & BRCM_IG_TC_MASK);
+  ((queue & BRCM_IG_TC_MASK) << BRCM_IG_TC_SHIFT);
brcm_tag[1] = 0;
brcm_tag[2] = 0;
if (p->dp->index == 8)
-- 
2.11.0



[PATCH net-next v2 3/4] net: dsa: bcm_sf2: Advertise number of egress queues

2017-09-02 Thread Florian Fainelli
The switch supports 8 egress queues per port, so indicate that such that
net/dsa/slave.c::dsa_slave_create can allocate the right number of TX queues.
While at it use SF2_NUM_EGRESS_QUEUE as a define for the number of queues we
support.

Signed-off-by: Florian Fainelli 
---
 drivers/net/dsa/bcm_sf2.c  | 5 -
 drivers/net/dsa/bcm_sf2_regs.h | 3 +++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index 554fe2df9365..6b184bafa235 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -244,7 +244,7 @@ static int bcm_sf2_port_setup(struct dsa_switch *ds, int 
port,
 * to a different queue number
 */
reg = core_readl(priv, CORE_PORT_TC2_QOS_MAP_PORT(port));
-   for (i = 0; i < 8; i++)
+   for (i = 0; i < SF2_NUM_EGRESS_QUEUES; i++)
reg |= i << (PRT_TO_QID_SHIFT * i);
core_writel(priv, reg, CORE_PORT_TC2_QOS_MAP_PORT(port));
 
@@ -1151,6 +1151,9 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev)
ds = dev->ds;
ds->ops = _sf2_ops;
 
+   /* Advertise the 8 egress queues */
+   ds->num_tx_queues = SF2_NUM_EGRESS_QUEUES;
+
dev_set_drvdata(>dev, priv);
 
spin_lock_init(>indir_lock);
diff --git a/drivers/net/dsa/bcm_sf2_regs.h b/drivers/net/dsa/bcm_sf2_regs.h
index 26052450091e..49695fcc2ea8 100644
--- a/drivers/net/dsa/bcm_sf2_regs.h
+++ b/drivers/net/dsa/bcm_sf2_regs.h
@@ -401,4 +401,7 @@ enum bcm_sf2_reg_offs {
 
 #define CFP_NUM_RULES  256
 
+/* Number of egress queues per port */
+#define SF2_NUM_EGRESS_QUEUES  8
+
 #endif /* __BCM_SF2_REGS_H */
-- 
2.11.0



[PATCH net-next v2 4/4] net: dsa: bcm_sf2: Configure IMP port TC2QOS mapping

2017-09-02 Thread Florian Fainelli
Even though TC2QOS mapping is for switch egress queues, we need to
configure it correclty in order for the Broadcom tag ingress (CPU ->
switch) queue selection to work correctly since there is a 1:1 mapping
between switch egress queues and ingress queues.

Signed-off-by: Florian Fainelli 
---
 drivers/net/dsa/bcm_sf2.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index 6b184bafa235..d7b53d53c116 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -103,6 +103,7 @@ static void bcm_sf2_brcm_hdr_setup(struct bcm_sf2_priv 
*priv, int port)
 static void bcm_sf2_imp_setup(struct dsa_switch *ds, int port)
 {
struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
+   unsigned int i;
u32 reg, offset;
 
if (priv->type == BCM7445_DEVICE_ID)
@@ -129,6 +130,14 @@ static void bcm_sf2_imp_setup(struct dsa_switch *ds, int 
port)
reg |= MII_DUMB_FWDG_EN;
core_writel(priv, reg, CORE_SWITCH_CTRL);
 
+   /* Configure Traffic Class to QoS mapping, allow each priority to map
+* to a different queue number
+*/
+   reg = core_readl(priv, CORE_PORT_TC2_QOS_MAP_PORT(port));
+   for (i = 0; i < SF2_NUM_EGRESS_QUEUES; i++)
+   reg |= i << (PRT_TO_QID_SHIFT * i);
+   core_writel(priv, reg, CORE_PORT_TC2_QOS_MAP_PORT(port));
+
bcm_sf2_brcm_hdr_setup(priv, port);
 
/* Force link status for IMP port */
-- 
2.11.0



[PATCH net-next v2 0/4] net: dsa: Allow switch drivers to indicate number of TX queues

2017-09-02 Thread Florian Fainelli
Hi all,

This patch series extracts the parts of the patch set that are likely not to be
controversial and actually bringing multi-queue support to DSA-created network
devices.

With these patches, we can now use sch_multiq as documented under
Documentation/networking/multique.txt and let applications dedice the switch
port output queue they want to use. Currently only Broadcom tags utilize that
information.

Changes in v2:
- use a proper define for the number of TX queues in bcm_sf2.c (Andrew)

Changes from RFC:

- dropped the ability to configure RX queues since we don't do anything with
  those just yet
- dropped the patches that dealt with binding the DSA slave network devices
  queues with their master network devices queues this will be worked on
  separately.

Florian Fainelli (4):
  net: dsa: Allow switch drivers to indicate number of TX queues
  net: dsa: tag_brcm: Set output queue from skb queue mapping
  net: dsa: bcm_sf2: Advertise number of egress queues
  net: dsa: bcm_sf2: Configure IMP port TC2QOS mapping

 drivers/net/dsa/bcm_sf2.c  | 14 +-
 drivers/net/dsa/bcm_sf2_regs.h |  3 +++
 include/net/dsa.h  |  3 +++
 net/dsa/slave.c|  8 ++--
 net/dsa/tag_brcm.c |  3 ++-
 5 files changed, 27 insertions(+), 4 deletions(-)

-- 
2.11.0



[PATCH net-next v2 1/4] net: dsa: Allow switch drivers to indicate number of TX queues

2017-09-02 Thread Florian Fainelli
Let switch drivers indicate how many TX queues they support. Some
switches, such as Broadcom Starfighter 2 are designed with 8 egress
queues. Future changes will allow us to leverage the queue mapping and
direct the transmission towards a particular queue.

Signed-off-by: Florian Fainelli 
---
 include/net/dsa.h | 3 +++
 net/dsa/slave.c   | 8 ++--
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 398ca8d70ccd..dd44d6ce1097 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -243,6 +243,9 @@ struct dsa_switch {
/* devlink used to represent this switch device */
struct devlink  *devlink;
 
+   /* Number of switch port queues */
+   unsigned intnum_tx_queues;
+
/* Dynamically allocated ports, keep last */
size_t num_ports;
struct dsa_port ports[];
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 78e78a6e6833..2afa99506f8b 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1259,8 +1259,12 @@ int dsa_slave_create(struct dsa_port *port, const char 
*name)
cpu_dp = ds->dst->cpu_dp;
master = cpu_dp->netdev;
 
-   slave_dev = alloc_netdev(sizeof(struct dsa_slave_priv), name,
-NET_NAME_UNKNOWN, ether_setup);
+   if (!ds->num_tx_queues)
+   ds->num_tx_queues = 1;
+
+   slave_dev = alloc_netdev_mqs(sizeof(struct dsa_slave_priv), name,
+NET_NAME_UNKNOWN, ether_setup,
+ds->num_tx_queues, 1);
if (slave_dev == NULL)
return -ENOMEM;
 
-- 
2.11.0



Re: [PATCH net-next 4/4] net: dsa: bcm_sf2: Configure IMP port TC2QOS mapping

2017-09-02 Thread Florian Fainelli


On 09/02/2017 10:47 AM, Andrew Lunn wrote:
> On Sat, Sep 02, 2017 at 09:32:08AM -0700, Florian Fainelli wrote:
>>
>>
>> On 09/02/2017 09:30 AM, Andrew Lunn wrote:
 @@ -129,6 +130,14 @@ static void bcm_sf2_imp_setup(struct dsa_switch *ds, 
 int port)
reg |= MII_DUMB_FWDG_EN;
core_writel(priv, reg, CORE_SWITCH_CTRL);
  
 +  /* Configure Traffic Class to QoS mapping, allow each priority to map
 +   * to a different queue number
 +   */
 +  reg = core_readl(priv, CORE_PORT_TC2_QOS_MAP_PORT(port));
 +  for (i = 0; i < 8; i++)
 +  reg |= i << (PRT_TO_QID_SHIFT * i);
>>>
>>> Hi Florian
>>>
>>> Rather than 8, would ds->num_tx_queues be more descriptive?
>>
>> I actually have a patch on top of this which defines a constant for the
>> number of queues instead of open coding 8 here. Thanks!
> 
> Hi Florian
> 
> It sounds like it should be part of 3/4?

Right, let me re-submit with that change included. Thanks
-- 
Florian


Re: [PATCH net-next 4/4] net: dsa: bcm_sf2: Configure IMP port TC2QOS mapping

2017-09-02 Thread Andrew Lunn
On Sat, Sep 02, 2017 at 09:32:08AM -0700, Florian Fainelli wrote:
> 
> 
> On 09/02/2017 09:30 AM, Andrew Lunn wrote:
> >> @@ -129,6 +130,14 @@ static void bcm_sf2_imp_setup(struct dsa_switch *ds, 
> >> int port)
> >>reg |= MII_DUMB_FWDG_EN;
> >>core_writel(priv, reg, CORE_SWITCH_CTRL);
> >>  
> >> +  /* Configure Traffic Class to QoS mapping, allow each priority to map
> >> +   * to a different queue number
> >> +   */
> >> +  reg = core_readl(priv, CORE_PORT_TC2_QOS_MAP_PORT(port));
> >> +  for (i = 0; i < 8; i++)
> >> +  reg |= i << (PRT_TO_QID_SHIFT * i);
> > 
> > Hi Florian
> > 
> > Rather than 8, would ds->num_tx_queues be more descriptive?
> 
> I actually have a patch on top of this which defines a constant for the
> number of queues instead of open coding 8 here. Thanks!

Hi Florian

It sounds like it should be part of 3/4?

   Andrew


Re: [PATCH] Fix build on fedora-14 (and other older systems)

2017-09-02 Thread Michal Kubecek
On Sat, Sep 02, 2017 at 07:15:02AM -0700, gree...@candelatech.com wrote:
> From: Ben Greear 
> 
> Seems Fedora-20 and below fail, hopefully this fixes
> them.
> 
> Signed-off-by: Ben Greear 
> ---
>  include/linux/sysinfo.h | 8 
>  ip/ipxfrm.c | 1 +
>  ip/xfrm_policy.c| 1 +
>  ip/xfrm_state.c | 1 +
>  4 files changed, 11 insertions(+)

Works for me from openSUSE Tumbleweed to SLE11 SP4 (kernel 3.0). The
build on SLE11 SP2 (also 3.0 but older headers in /usr/include)
fails with unknown MS_PRIVATE and MS_REC in lib/bpf.c
Newer systems have these defined in 

Including  directly in lib/bpf.c fixes that but if we do
that, a copy of the file would have to be included.

> diff --git a/include/linux/sysinfo.h b/include/linux/sysinfo.h
> index 934335a..3596b02 100644
> --- a/include/linux/sysinfo.h
> +++ b/include/linux/sysinfo.h
> @@ -3,6 +3,14 @@
>  
>  #include 
>  
> +/* So we can compile on older OSs, hopefully this is correct. --Ben */
> +#ifndef __kernel_long_t
> +typedef long __kernel_long_t;
> +#endif
> +#ifndef __kernel_ulong_t
> +typedef unsigned long __kernel_ulong_t;
> +#endif
> +
>  #define SI_LOAD_SHIFT16
>  struct sysinfo {
>   __kernel_long_t uptime; /* Seconds since boot */

I'm not sure if it is acceptable to modify a file which is supposed to
be a direct copy of kernel uapi header.

Michal Kubecek



Re: [PATCH net-next 0/4] net: dsa: add master interface

2017-09-02 Thread Florian Fainelli
Hi Vivien,

On 08/31/2017 11:37 AM, Vivien Didelot wrote:
> Currently the SoC network interface (called master) to which a switch
> fabric hangs, has its dsa_ptr pointing to a dsa_switch_tree instance.
> 
> This is not quite correct, because this interface is physically wired to
> one of the switch ports (called CPU port), and because in a switch
> fabric with multiple CPU ports, several master interfaces will point to
> several CPU ports of the same dsa_switch_tree.
> 
> This patchset adds a new dsa_master structure to represent the pipe
> between the SoC master interface and its switch CPU port. This structure
> will store specific data such as the master ethtool_ops copy and the
> tagging protocol used to pass frames with the associated slave ports.
> The dsa_ptr is changed to a dsa_master instance, and each DSA slave now
> has a pointer to a master port.
> 
> This is a step forward better control over the CPU conduit and support
> for multiple CPU ports.

Looked briefly over this series and this looks good to me, Andrew has a
valid point about reducing the number of dereferences in hot paths
though. Thanks!

> 
> Vivien Didelot (4):
>   net: dsa: introduce dsa_master
>   net: dsa: move master ethtool ops in dsa_master
>   net: dsa: change dsa_ptr for a dsa_master
>   net: dsa: assign a master to slave ports
> 
>  drivers/net/dsa/b53/b53_common.c |   4 +-
>  drivers/net/dsa/bcm_sf2.c|   8 +--
>  drivers/net/dsa/mt7530.c |   4 +-
>  drivers/net/dsa/mv88e6060.c  |   2 +-
>  drivers/net/dsa/qca8k.c  |   2 +-
>  include/linux/netdevice.h|   4 +-
>  include/net/dsa.h|  42 +--
>  net/dsa/Makefile |   2 +-
>  net/dsa/dsa.c|  34 +
>  net/dsa/dsa2.c   |  38 +-
>  net/dsa/dsa_priv.h   |  24 +++
>  net/dsa/legacy.c |  34 +
>  net/dsa/master.c | 149 
> +++
>  net/dsa/slave.c  | 117 +-
>  net/dsa/tag_brcm.c   |   5 +-
>  net/dsa/tag_dsa.c|   3 +-
>  net/dsa/tag_edsa.c   |   3 +-
>  net/dsa/tag_ksz.c|   5 +-
>  net/dsa/tag_lan9303.c|   6 +-
>  net/dsa/tag_mtk.c|  12 +---
>  net/dsa/tag_qca.c|  12 +---
>  net/dsa/tag_trailer.c|   5 +-
>  22 files changed, 265 insertions(+), 250 deletions(-)
>  create mode 100644 net/dsa/master.c
> 

-- 
Florian


Re: [PATCH net-next 4/4] net: dsa: bcm_sf2: Configure IMP port TC2QOS mapping

2017-09-02 Thread Florian Fainelli


On 09/02/2017 09:30 AM, Andrew Lunn wrote:
>> @@ -129,6 +130,14 @@ static void bcm_sf2_imp_setup(struct dsa_switch *ds, 
>> int port)
>>  reg |= MII_DUMB_FWDG_EN;
>>  core_writel(priv, reg, CORE_SWITCH_CTRL);
>>  
>> +/* Configure Traffic Class to QoS mapping, allow each priority to map
>> + * to a different queue number
>> + */
>> +reg = core_readl(priv, CORE_PORT_TC2_QOS_MAP_PORT(port));
>> +for (i = 0; i < 8; i++)
>> +reg |= i << (PRT_TO_QID_SHIFT * i);
> 
> Hi Florian
> 
> Rather than 8, would ds->num_tx_queues be more descriptive?

I actually have a patch on top of this which defines a constant for the
number of queues instead of open coding 8 here. Thanks!
-- 
Florian


Re: [PATCH net-next 4/4] net: dsa: bcm_sf2: Configure IMP port TC2QOS mapping

2017-09-02 Thread Andrew Lunn
> @@ -129,6 +130,14 @@ static void bcm_sf2_imp_setup(struct dsa_switch *ds, int 
> port)
>   reg |= MII_DUMB_FWDG_EN;
>   core_writel(priv, reg, CORE_SWITCH_CTRL);
>  
> + /* Configure Traffic Class to QoS mapping, allow each priority to map
> +  * to a different queue number
> +  */
> + reg = core_readl(priv, CORE_PORT_TC2_QOS_MAP_PORT(port));
> + for (i = 0; i < 8; i++)
> + reg |= i << (PRT_TO_QID_SHIFT * i);

Hi Florian

Rather than 8, would ds->num_tx_queues be more descriptive?

   Andrew


Re: [PATCH net-next 4/4] net: dsa: assign a master to slave ports

2017-09-02 Thread Florian Fainelli


On 09/02/2017 08:34 AM, Andrew Lunn wrote:
> On Thu, Aug 31, 2017 at 02:37:46PM -0400, Vivien Didelot wrote:
>> Because each DSA slave port may use a different DSA master port, add a
>> pointer to a master in the slave structure. This is a preparatory patch
>> for multiple CPU ports.
>>
>> Signed-off-by: Vivien Didelot 
>> ---
>>  net/dsa/dsa_priv.h |  7 ++-
>>  net/dsa/slave.c| 33 ++---
>>  2 files changed, 20 insertions(+), 20 deletions(-)
>>
>> diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
>> index 59f155cbbe87..a8cd6cbe4061 100644
>> --- a/net/dsa/dsa_priv.h
>> +++ b/net/dsa/dsa_priv.h
>> @@ -66,6 +66,8 @@ struct dsa_notifier_vlan_info {
>>  };
>>  
>>  struct dsa_slave_priv {
>> +struct dsa_master *master;
>> +
>>  /* Copy of the master xmit tagging op for faster access in hot path */
>>  struct sk_buff *(*xmit)(struct sk_buff *skb,
>>  struct net_device *dev);
>> @@ -179,9 +181,4 @@ extern const struct dsa_device_ops qca_netdev_ops;
>>  /* tag_trailer.c */
>>  extern const struct dsa_device_ops trailer_netdev_ops;
>>  
>> -static inline struct net_device *dsa_master_netdev(struct dsa_slave_priv *p)
>> -{
>> -return p->dp->ds->dst->master->netdev;
>> -}
>> -
> 
> Hi Vivien
> 
> Why remove this helper, when it could become p->master->netdev ?

Agreed, I would keep the helper at this point to minimize the delta, it
should really identical in terms of code generated.
-- 
Florian


Re: [PATCH net] net: dsa: loop: Do not unregister invalid fixed PHY

2017-09-02 Thread Andrew Lunn
On Sat, Sep 02, 2017 at 08:56:45AM -0700, Florian Fainelli wrote:
> During error injection it was possible to crash in dsa_loop_exit() because of
> an attempt to unregister an invalid PHY. We actually want to the driver 
> probing
> in dsa_loop_init() even though fixed_phy_register() may return an error to
> exercise how DSA deals with such cases, but we should not be crashing during
> driver removal.
> 
> Fixes: 98cd1552ea27 ("net: dsa: Mock-up driver")
> Signed-off-by: Florian Fainelli 

Reviewed-by: Andrew Lunn 

Andrew


[PATCH net] net: dsa: loop: Do not unregister invalid fixed PHY

2017-09-02 Thread Florian Fainelli
During error injection it was possible to crash in dsa_loop_exit() because of
an attempt to unregister an invalid PHY. We actually want to the driver probing
in dsa_loop_init() even though fixed_phy_register() may return an error to
exercise how DSA deals with such cases, but we should not be crashing during
driver removal.

Fixes: 98cd1552ea27 ("net: dsa: Mock-up driver")
Signed-off-by: Florian Fainelli 
---
 drivers/net/dsa/dsa_loop.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/dsa/dsa_loop.c b/drivers/net/dsa/dsa_loop.c
index fdd8f3872102..8150e3a3c8dd 100644
--- a/drivers/net/dsa/dsa_loop.c
+++ b/drivers/net/dsa/dsa_loop.c
@@ -390,7 +390,7 @@ static void __exit dsa_loop_exit(void)
 
mdio_driver_unregister(_loop_drv);
for (i = 0; i < NUM_FIXED_PHYS; i++)
-   if (phydevs[i])
+   if (!IS_ERR(phydevs[i]))
fixed_phy_unregister(phydevs[i]);
 }
 module_exit(dsa_loop_exit);
-- 
2.11.0



Re: [PATCH] DSA support for Micrel KSZ8895

2017-09-02 Thread Pavel Machek
Hi!

>  Section 4.8 of the datasheet says:
> 
>   All the registers defined in this section can be also accessed
>   via the SPI interface.
> 
>  Meaning all PHY registers can be access via the SPI interface. So you
>  should be able to make a standard Linux MDIO bus driver which performs
>  SPI reads.
> >>>
> >>> As far as I can tell (and their driver confirms) -- yes, all those 
> >>> registers can be
> >>> accessed over the SPI, they are just shuffled around... hence MDIO
> >>> emulation code. I copied it from their code (see the copyrights) so no, I 
> >>> don't
> >>> believe there's nicer solution.
> >>>
> >>> Best regards,
> >>
> >> Can you hold on your developing work on KSZ8895 driver?  I am afraid your 
> >> effort may be in vain.  We at Microchip are planning to release DSA 
> >> drivers for all KSZ switches, starting at KSZ8795, then KSZ8895, and 
> >> KSZ8863.
> >>
> > 
> > Well, thanks for heads up... but its too late to stop now. I already
> > have working code, without the advanced features.
> 
> No driver has landed yet nor has any driver been posted in a proper form
> or shape, so at this point neither of you are able to make any claims as
> to which one should be chosen.

I certainly do not want to make any claims. Tristram's driver is
likely to support all (most?) features of the chip, which is not my
goal.

> > I don't know how far away you are with the development. You may want
> > to start from my driver (but its probably too late now).
> 
> I would tend to favor Tristram's submission when we see it because he
> claims support for more devices and it is likely to be backed and
> maintained by Microchip in the future.

Well, I guess we decide when we see the code, that's how it works, right?

> I am sure there will be opportunity for you to contribute a lot to this
> driver. Of course, this all depends on the code quality and timing, but
> having two people work on the same things in parallel is just a complete
> waste of each other's time so we might as well wait for Tristram to post
> the said driver and define a plan of action from there?

Well, it would be good to see the code, so we can judge the
quality. Normally, code is posted before testing, so this kind of
problems does not arise.

Best regards,

Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) 
http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html


signature.asc
Description: Digital signature


Re: [PATCH net-next 4/4] net: dsa: assign a master to slave ports

2017-09-02 Thread Andrew Lunn
On Thu, Aug 31, 2017 at 02:37:46PM -0400, Vivien Didelot wrote:
> Because each DSA slave port may use a different DSA master port, add a
> pointer to a master in the slave structure. This is a preparatory patch
> for multiple CPU ports.
> 
> Signed-off-by: Vivien Didelot 
> ---
>  net/dsa/dsa_priv.h |  7 ++-
>  net/dsa/slave.c| 33 ++---
>  2 files changed, 20 insertions(+), 20 deletions(-)
> 
> diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
> index 59f155cbbe87..a8cd6cbe4061 100644
> --- a/net/dsa/dsa_priv.h
> +++ b/net/dsa/dsa_priv.h
> @@ -66,6 +66,8 @@ struct dsa_notifier_vlan_info {
>  };
>  
>  struct dsa_slave_priv {
> + struct dsa_master *master;
> +
>   /* Copy of the master xmit tagging op for faster access in hot path */
>   struct sk_buff *(*xmit)(struct sk_buff *skb,
>   struct net_device *dev);
> @@ -179,9 +181,4 @@ extern const struct dsa_device_ops qca_netdev_ops;
>  /* tag_trailer.c */
>  extern const struct dsa_device_ops trailer_netdev_ops;
>  
> -static inline struct net_device *dsa_master_netdev(struct dsa_slave_priv *p)
> -{
> - return p->dp->ds->dst->master->netdev;
> -}
> -

Hi Vivien

Why remove this helper, when it could become p->master->netdev ?

Andrew


Re: [PATCH net-next 3/4] net: dsa: change dsa_ptr for a dsa_master

2017-09-02 Thread Andrew Lunn
Hi Vivien


> diff --git a/include/net/dsa.h b/include/net/dsa.h
> index f4a5afc4255b..d5b24cd10f79 100644
> --- a/include/net/dsa.h
> +++ b/include/net/dsa.h
> @@ -130,11 +130,6 @@ struct dsa_switch_tree {
>*/
>   struct dsa_platform_data*pd;
>  
> - /* Copy of tag_ops->rcv for faster access in hot path */
> - struct sk_buff *(*rcv)(struct sk_buff *skb,
> -struct net_device *dev,
> -struct packet_type *pt);
> -
>   /*
>* The switch port to which the CPU is attached.
>*/


> diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
> index fbf9ca954773..7897bbd1a110 100644
> --- a/net/dsa/tag_dsa.c
> +++ b/net/dsa/tag_dsa.c
> @@ -67,7 +67,8 @@ static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct 
> net_device *dev)
>  static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev,
>  struct packet_type *pt)
>  {
> - struct dsa_switch_tree *dst = dev->dsa_ptr;
> + struct dsa_master *master = dev->dsa_ptr;
> + struct dsa_switch_tree *dst = master->port->ds->dst;

This is on the hot path. Every frame received comes through here. We
have gone from one dereference to 4 dereference. That is going to
impact performance. 

How about keeping a copy of dst and ds in master?

Andrew


[patch net-next 04/21] mlxsw: reg: Add mlxsw_reg_ralue_act_ip2me_tun_pack()

2017-09-02 Thread Jiri Pirko
From: Petr Machata 

To implement IP-in-IP decapsulation, Spectrum uses LPM entries of type
IP2ME with tunnel validity bit and tunnel pointer set. The necessary
register fields are already available, so add a function to pack the
RALUE as appropriate.

Signed-off-by: Petr Machata 
Reviewed-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h | 9 +
 1 file changed, 9 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h 
b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index bf936b6..24296cf 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -5054,6 +5054,15 @@ mlxsw_reg_ralue_act_ip2me_pack(char *payload)
MLXSW_REG_RALUE_ACTION_TYPE_IP2ME);
 }
 
+static inline void
+mlxsw_reg_ralue_act_ip2me_tun_pack(char *payload, u32 tunnel_ptr)
+{
+   mlxsw_reg_ralue_action_type_set(payload,
+   MLXSW_REG_RALUE_ACTION_TYPE_IP2ME);
+   mlxsw_reg_ralue_ip2me_v_set(payload, 1);
+   mlxsw_reg_ralue_ip2me_tunnel_ptr_set(payload, tunnel_ptr);
+}
+
 /* RAUHT - Router Algorithmic LPM Unicast Host Table Register
  * --
  * The RAUHT register is used to configure and query the Unicast Host table in
-- 
2.9.3



[patch net-next 06/21] mlxsw: reg: Give mlxsw_reg_ratr_pack a type parameter

2017-09-02 Thread Jiri Pirko
From: Petr Machata 

To support IPIP, the driver needs to be able to construct an IPIP
adjacency. Change mlxsw_reg_ratr_pack to take an adjacency type as an
argument. Adjust the one existing caller.

Signed-off-by: Petr Machata 
Reviewed-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h | 2 ++
 drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 3 ++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h 
b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 27a378d..680bc7e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -4482,11 +4482,13 @@ MLXSW_ITEM32(reg, ratr, ipip_ipv6_ptr, 0x1C, 0, 24);
 static inline void
 mlxsw_reg_ratr_pack(char *payload,
enum mlxsw_reg_ratr_op op, bool valid,
+   enum mlxsw_reg_ratr_type type,
u32 adjacency_index, u16 egress_rif)
 {
MLXSW_REG_ZERO(ratr, payload);
mlxsw_reg_ratr_op_set(payload, op);
mlxsw_reg_ratr_v_set(payload, valid);
+   mlxsw_reg_ratr_type_set(payload, type);
mlxsw_reg_ratr_adjacency_index_low_set(payload, adjacency_index);
mlxsw_reg_ratr_adjacency_index_high_set(payload, adjacency_index >> 16);
mlxsw_reg_ratr_egress_router_interface_set(payload, egress_rif);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 3ddfbe3..4e47d45 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -1928,7 +1928,8 @@ static int mlxsw_sp_nexthop_mac_update(struct mlxsw_sp 
*mlxsw_sp, u32 adj_index,
char ratr_pl[MLXSW_REG_RATR_LEN];
 
mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
-   true, adj_index, neigh_entry->rif);
+   true, MLXSW_REG_RATR_TYPE_ETHERNET,
+   adj_index, neigh_entry->rif);
mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
 }
-- 
2.9.3



[patch net-next 03/21] mlxsw: reg: Move enum mlxsw_reg_ratr_trap_id

2017-09-02 Thread Jiri Pirko
From: Petr Machata 

This enum is used with reg_ratr_trap_id, so move it next to the register
definition.

While at it, drop the enumerator initializers.

Signed-off-by: Petr Machata 
Reviewed-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h 
b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 6a7757f..bf936b6 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -4423,17 +4423,17 @@ enum mlxsw_reg_ratr_trap_action {
  */
 MLXSW_ITEM32(reg, ratr, trap_action, 0x0C, 28, 4);
 
-enum mlxsw_reg_ratr_trap_id {
-   MLXSW_REG_RATR_TRAP_ID_RTR_EGRESS0 = 0,
-   MLXSW_REG_RATR_TRAP_ID_RTR_EGRESS1 = 1,
-};
-
 /* reg_ratr_adjacency_index_high
  * Bits 23:16 of the adjacency_index.
  * Access: Index
  */
 MLXSW_ITEM32(reg, ratr, adjacency_index_high, 0x0C, 16, 8);
 
+enum mlxsw_reg_ratr_trap_id {
+   MLXSW_REG_RATR_TRAP_ID_RTR_EGRESS0,
+   MLXSW_REG_RATR_TRAP_ID_RTR_EGRESS1,
+};
+
 /* reg_ratr_trap_id
  * Trap ID to be reported to CPU.
  * Trap-ID is RTR_EGRESS0 or RTR_EGRESS1.
-- 
2.9.3



[patch net-next 15/21] mlxsw: spectrum_router: Support IPv6 overlay encap

2017-09-02 Thread Jiri Pirko
From: Petr Machata 

Add the missing bits to recognize IPv6 next hops as IPIP ones to enable
offloading of IPv6 overlay encapsulation.

Signed-off-by: Petr Machata 
Reviewed-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 .../net/ethernet/mellanox/mlxsw/spectrum_router.c| 20 +++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 231b597..05afd53 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -3757,15 +3757,32 @@ mlxsw_sp_fib6_entry_rt_find(const struct 
mlxsw_sp_fib6_entry *fib6_entry,
return NULL;
 }
 
+static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
+   const struct rt6_info *rt,
+   enum mlxsw_sp_ipip_type *ret)
+{
+   return rt->dst.dev &&
+  mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->dst.dev, ret);
+}
+
 static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
   struct mlxsw_sp_nexthop_group *nh_grp,
   struct mlxsw_sp_nexthop *nh,
   const struct rt6_info *rt)
 {
+   struct mlxsw_sp_router *router = mlxsw_sp->router;
struct net_device *dev = rt->dst.dev;
+   enum mlxsw_sp_ipip_type ipipt;
struct mlxsw_sp_rif *rif;
int err;
 
+   if (mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, ) &&
+   router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, dev,
+MLXSW_SP_L3_PROTO_IPV6)) {
+   nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
+   return mlxsw_sp_nexthop_ipip_init(mlxsw_sp, ipipt, nh, dev);
+   }
+
nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
if (!rif)
@@ -3815,7 +3832,8 @@ static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp 
*mlxsw_sp,
 static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
const struct rt6_info *rt)
 {
-   return rt->rt6i_flags & RTF_GATEWAY;
+   return rt->rt6i_flags & RTF_GATEWAY ||
+  mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
 }
 
 static struct mlxsw_sp_nexthop_group *
-- 
2.9.3



[patch net-next 17/21] mlxsw: spectrum_router: Use existing decap route

2017-09-02 Thread Jiri Pirko
From: Petr Machata 

The local route that points at IPIP's underlay device (decap route) can
be present long before the GRE device. Thus when an encap route is
added, it's necessary to look inside the underlay FIB if the decap route
is already present. If so, the current trap offload needs to be
withdrawn and replaced with a decap offload.

Signed-off-by: Petr Machata 
Reviewed-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 .../net/ethernet/mellanox/mlxsw/spectrum_router.c  | 80 --
 1 file changed, 76 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 0d09f81..2189cf0 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -1072,6 +1072,9 @@ static void mlxsw_sp_fib_entry_decap_fini(struct mlxsw_sp 
*mlxsw_sp,
mlxsw_sp_kvdl_free(mlxsw_sp, fib_entry->decap.tunnel_index);
 }
 
+static struct mlxsw_sp_fib_node *
+mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
+size_t addr_len, unsigned char prefix_len);
 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
 struct mlxsw_sp_fib_entry *fib_entry);
 
@@ -1087,6 +1090,73 @@ mlxsw_sp_ipip_entry_demote_decap(struct mlxsw_sp 
*mlxsw_sp,
mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
 }
 
+static void
+mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_ipip_entry *ipip_entry,
+ struct mlxsw_sp_fib_entry *decap_fib_entry)
+{
+   if (mlxsw_sp_fib_entry_decap_init(mlxsw_sp, decap_fib_entry,
+ ipip_entry))
+   return;
+   decap_fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
+
+   if (mlxsw_sp_fib_entry_update(mlxsw_sp, decap_fib_entry))
+   mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
+}
+
+/* Given an IPIP entry, find the corresponding decap route. */
+static struct mlxsw_sp_fib_entry *
+mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
+  struct mlxsw_sp_ipip_entry *ipip_entry)
+{
+   static struct mlxsw_sp_fib_node *fib_node;
+   const struct mlxsw_sp_ipip_ops *ipip_ops;
+   struct mlxsw_sp_fib_entry *fib_entry;
+   unsigned char saddr_prefix_len;
+   union mlxsw_sp_l3addr saddr;
+   struct mlxsw_sp_fib *ul_fib;
+   struct mlxsw_sp_vr *ul_vr;
+   const void *saddrp;
+   size_t saddr_len;
+   u32 ul_tb_id;
+   u32 saddr4;
+
+   ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
+
+   ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
+   ul_vr = mlxsw_sp_vr_find(mlxsw_sp, ul_tb_id);
+   if (!ul_vr)
+   return NULL;
+
+   ul_fib = mlxsw_sp_vr_fib(ul_vr, ipip_ops->ul_proto);
+   saddr = mlxsw_sp_ipip_netdev_saddr(ipip_ops->ul_proto,
+  ipip_entry->ol_dev);
+
+   switch (ipip_ops->ul_proto) {
+   case MLXSW_SP_L3_PROTO_IPV4:
+   saddr4 = be32_to_cpu(saddr.addr4);
+   saddrp = 
+   saddr_len = 4;
+   saddr_prefix_len = 32;
+   break;
+   case MLXSW_SP_L3_PROTO_IPV6:
+   WARN_ON(1);
+   return NULL;
+   }
+
+   fib_node = mlxsw_sp_fib_node_lookup(ul_fib, saddrp, saddr_len,
+   saddr_prefix_len);
+   if (!fib_node || list_empty(_node->entry_list))
+   return NULL;
+
+   fib_entry = list_first_entry(_node->entry_list,
+struct mlxsw_sp_fib_entry, list);
+   if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP)
+   return NULL;
+
+   return fib_entry;
+}
+
 static struct mlxsw_sp_ipip_entry *
 mlxsw_sp_ipip_entry_get(struct mlxsw_sp *mlxsw_sp,
enum mlxsw_sp_ipip_type ipipt,
@@ -1094,6 +1164,7 @@ mlxsw_sp_ipip_entry_get(struct mlxsw_sp *mlxsw_sp,
 {
u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
struct mlxsw_sp_router *router = mlxsw_sp->router;
+   struct mlxsw_sp_fib_entry *decap_fib_entry;
struct mlxsw_sp_ipip_entry *ipip_entry;
enum mlxsw_sp_l3proto ul_proto;
union mlxsw_sp_l3addr saddr;
@@ -1118,6 +1189,11 @@ mlxsw_sp_ipip_entry_get(struct mlxsw_sp *mlxsw_sp,
if (IS_ERR(ipip_entry))
return ipip_entry;
 
+   decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry);
+   if (decap_fib_entry)
+   mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry,
+ decap_fib_entry);
+
list_add_tail(_entry->ipip_list_node,
  

[patch net-next 10/21] mlxsw: spectrum_router: Introduce loopback RIFs

2017-09-02 Thread Jiri Pirko
From: Petr Machata 

When offloading L3 tunnels, an adjacency entry is created that loops the
packet back into the underlay router. Loopback interfaces then hold the
corresponding information and are created for IP-in-IP netdevices.

Signed-off-by: Petr Machata 
Reviewed-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.h |   1 +
 .../net/ethernet/mellanox/mlxsw/spectrum_router.c  | 152 -
 .../net/ethernet/mellanox/mlxsw/spectrum_router.h  |  13 ++
 3 files changed, 164 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index f8c7f7e..84ce83a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -77,6 +77,7 @@ enum mlxsw_sp_rif_type {
MLXSW_SP_RIF_TYPE_SUBPORT,
MLXSW_SP_RIF_TYPE_VLAN,
MLXSW_SP_RIF_TYPE_FID,
+   MLXSW_SP_RIF_TYPE_IPIP_LB, /* IP-in-IP loopback. */
MLXSW_SP_RIF_TYPE_MAX,
 };
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 38477c5..72e386b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -1,9 +1,10 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
- * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016-2017 Mellanox Technologies. All rights reserved.
  * Copyright (c) 2016 Jiri Pirko 
  * Copyright (c) 2016 Ido Schimmel 
  * Copyright (c) 2016 Yotam Gigi 
+ * Copyright (c) 2017 Petr Machata 
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -51,6 +52,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -131,6 +133,17 @@ struct mlxsw_sp_rif_subport {
bool lag;
 };
 
+struct mlxsw_sp_rif_ipip_lb {
+   struct mlxsw_sp_rif common;
+   struct mlxsw_sp_rif_ipip_lb_config lb_config;
+   u16 ul_vr_id; /* Reserved for Spectrum-2. */
+};
+
+struct mlxsw_sp_rif_params_ipip_lb {
+   struct mlxsw_sp_rif_params common;
+   struct mlxsw_sp_rif_ipip_lb_config lb_config;
+};
+
 struct mlxsw_sp_rif_ops {
enum mlxsw_sp_rif_type type;
size_t rif_size;
@@ -883,6 +896,25 @@ static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
kfree(mlxsw_sp->router->vrs);
 }
 
+static struct net_device *
+__mlxsw_sp_ipip_netdev_ul_dev_get(const struct net_device *ol_dev)
+{
+   struct ip_tunnel *tun = netdev_priv(ol_dev);
+   struct net *net = dev_net(ol_dev);
+
+   return __dev_get_by_index(net, tun->parms.link);
+}
+
+static u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev)
+{
+   struct net_device *d = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
+
+   if (d)
+   return l3mdev_fib_table(d) ? : RT_TABLE_MAIN;
+   else
+   return l3mdev_fib_table(ol_dev) ? : RT_TABLE_MAIN;
+}
+
 struct mlxsw_sp_neigh_key {
struct neighbour *n;
 };
@@ -2236,6 +2268,25 @@ static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp 
*mlxsw_sp,
neigh_release(n);
 }
 
+static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *dev,
+ enum mlxsw_sp_ipip_type *p_type)
+{
+   struct mlxsw_sp_router *router = mlxsw_sp->router;
+   const struct mlxsw_sp_ipip_ops *ipip_ops;
+   enum mlxsw_sp_ipip_type ipipt;
+
+   for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) {
+   ipip_ops = router->ipip_ops_arr[ipipt];
+   if (dev->type == ipip_ops->dev_type) {
+   if (p_type)
+   *p_type = ipipt;
+   return true;
+   }
+   }
+   return false;
+}
+
 static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
  struct mlxsw_sp_nexthop_group *nh_grp,
  struct mlxsw_sp_nexthop *nh,
@@ -4374,7 +4425,10 @@ mlxsw_sp_dev_rif_type(const struct mlxsw_sp *mlxsw_sp,
 {
enum mlxsw_sp_fid_type type;
 
-   /* RIF type is derived from the type of the underlying FID */
+   if (mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL))
+   return MLXSW_SP_RIF_TYPE_IPIP_LB;
+
+   /* Otherwise RIF type is derived from the type of the underlying FID. */
if (is_vlan_dev(dev) && netif_is_bridge_master(vlan_dev_real_dev(dev)))
type = MLXSW_SP_FID_TYPE_8021Q;
else if (netif_is_bridge_master(dev) && br_vlan_enabled(dev))
@@ -5164,10 +5218,104 @@ static 

[patch net-next 18/21] mlxsw: spectrum: Register for IPIP_DECAP_ERROR trap

2017-09-02 Thread Jiri Pirko
From: Petr Machata 

These traps are generated for packets that fail checks for source IP,
encapsulation type, or GRE key. Trap these packets to CPU for follow-up
handling by the kernel, which will send ICMP destination unreachable
responses.

Signed-off-by: Petr Machata 
Reviewed-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 1 +
 drivers/net/ethernet/mellanox/mlxsw/trap.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 992cbfa..ed7cd6c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -3400,6 +3400,7 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
MLXSW_SP_RXL_MARK(HOST_MISS_IPV6, TRAP_TO_CPU, HOST_MISS, false),
MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV4, TRAP_TO_CPU, ROUTER_EXP, false),
MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV6, TRAP_TO_CPU, ROUTER_EXP, false),
+   MLXSW_SP_RXL_MARK(IPIP_DECAP_ERROR, TRAP_TO_CPU, ROUTER_EXP, false),
/* PKT Sample trap */
MLXSW_RXL(mlxsw_sp_rx_listener_sample_func, PKT_SAMPLE, MIRROR_TO_CPU,
  false, SP_IP2ME, DISCARD),
diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h 
b/drivers/net/ethernet/mellanox/mlxsw/trap.h
index 6165239..f396a1f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/trap.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h
@@ -85,6 +85,7 @@ enum {
MLXSW_TRAP_ID_HOST_MISS_IPV4 = 0x90,
MLXSW_TRAP_ID_IPV6_MC_LINK_LOCAL_DEST = 0x91,
MLXSW_TRAP_ID_HOST_MISS_IPV6 = 0x92,
+   MLXSW_TRAP_ID_IPIP_DECAP_ERROR = 0xB1,
MLXSW_TRAP_ID_ROUTER_ALERT_IPV4 = 0xD6,
MLXSW_TRAP_ID_ROUTER_ALERT_IPV6 = 0xD7,
MLXSW_TRAP_ID_ACL0 = 0x1C0,
-- 
2.9.3



[patch net-next 21/21] mlxsw: reg: Add Routing Tunnel Decap Properties Register

2017-09-02 Thread Jiri Pirko
From: Petr Machata 

The RTDP register is used for configuring the tunnel decap properties of
NVE and IPinIP.

Signed-off-by: Petr Machata 
Reviewed-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h | 129 ++
 1 file changed, 129 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h 
b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 680bc7e..cc27c5d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -5,6 +5,7 @@
  * Copyright (c) 2015 Elad Raz 
  * Copyright (c) 2015-2017 Jiri Pirko 
  * Copyright (c) 2016 Yotam Gigi 
+ * Copyright (c) 2017 Petr Machata 
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -5468,6 +5469,133 @@ static inline void 
mlxsw_reg_rauhtd_ent_ipv6_unpack(char *payload,
mlxsw_reg_rauhtd_ipv6_ent_dip_memcpy_from(payload, rec_index, p_dip);
 }
 
+/* RTDP - Routing Tunnel Decap Properties Register
+ * ---
+ * The RTDP register is used for configuring the tunnel decap properties of NVE
+ * and IPinIP.
+ */
+#define MLXSW_REG_RTDP_ID 0x8020
+#define MLXSW_REG_RTDP_LEN 0x44
+
+MLXSW_REG_DEFINE(rtdp, MLXSW_REG_RTDP_ID, MLXSW_REG_RTDP_LEN);
+
+enum mlxsw_reg_rtdp_type {
+   MLXSW_REG_RTDP_TYPE_NVE,
+   MLXSW_REG_RTDP_TYPE_IPIP,
+};
+
+/* reg_rtdp_type
+ * Type of the RTDP entry as per enum mlxsw_reg_rtdp_type.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rtdp, type, 0x00, 28, 4);
+
+/* reg_rtdp_tunnel_index
+ * Index to the Decap entry.
+ * For Spectrum, Index to KVD Linear.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, rtdp, tunnel_index, 0x00, 0, 24);
+
+/* IPinIP */
+
+/* reg_rtdp_ipip_irif
+ * Ingress Router Interface for the overlay router
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rtdp, ipip_irif, 0x04, 16, 16);
+
+enum mlxsw_reg_rtdp_ipip_sip_check {
+   /* No sip checks. */
+   MLXSW_REG_RTDP_IPIP_SIP_CHECK_NO,
+   /* Filter packet if underlay is not IPv4 or if underlay SIP does not
+* equal ipv4_usip.
+*/
+   MLXSW_REG_RTDP_IPIP_SIP_CHECK_FILTER_IPV4,
+   /* Filter packet if underlay is not IPv6 or if underlay SIP does not
+* equal ipv6_usip.
+*/
+   MLXSW_REG_RTDP_IPIP_SIP_CHECK_FILTER_IPV6 = 3,
+};
+
+/* reg_rtdp_ipip_sip_check
+ * SIP check to perform. If decapsulation failed due to these configurations
+ * then trap_id is IPIP_DECAP_ERROR.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rtdp, ipip_sip_check, 0x04, 0, 3);
+
+/* If set, allow decapsulation of IPinIP (without GRE). */
+#define MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_IPIP  BIT(0)
+/* If set, allow decapsulation of IPinGREinIP without a key. */
+#define MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_GRE   BIT(1)
+/* If set, allow decapsulation of IPinGREinIP with a key. */
+#define MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_GRE_KEY   BIT(2)
+
+/* reg_rtdp_ipip_type_check
+ * Flags as per MLXSW_REG_RTDP_IPIP_TYPE_CHECK_*. If decapsulation failed due 
to
+ * these configurations then trap_id is IPIP_DECAP_ERROR.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rtdp, ipip_type_check, 0x08, 24, 3);
+
+/* reg_rtdp_ipip_gre_key_check
+ * Whether GRE key should be checked. When check is enabled:
+ * - A packet received as IPinIP (without GRE) will always pass.
+ * - A packet received as IPinGREinIP without a key will not pass the check.
+ * - A packet received as IPinGREinIP with a key will pass the check only if 
the
+ *   key in the packet is equal to expected_gre_key.
+ * If decapsulation failed due to GRE key then trap_id is IPIP_DECAP_ERROR.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rtdp, ipip_gre_key_check, 0x08, 23, 1);
+
+/* reg_rtdp_ipip_ipv4_usip
+ * Underlay IPv4 address for ipv4 source address check.
+ * Reserved when sip_check is not '1'.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rtdp, ipip_ipv4_usip, 0x0C, 0, 32);
+
+/* reg_rtdp_ipip_ipv6_usip_ptr
+ * This field is valid when sip_check is "sipv6 check explicitly". This is a
+ * pointer to the IPv6 DIP which is configured by RIPS. For Spectrum, the index
+ * is to the KVD linear.
+ * Reserved when sip_check is not MLXSW_REG_RTDP_IPIP_SIP_CHECK_FILTER_IPV6.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rtdp, ipip_ipv6_usip_ptr, 0x10, 0, 24);
+
+/* reg_rtdp_ipip_expected_gre_key
+ * GRE key for checking.
+ * Reserved when gre_key_check is '0'.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rtdp, ipip_expected_gre_key, 0x14, 0, 32);
+
+static inline void mlxsw_reg_rtdp_pack(char *payload,
+  enum mlxsw_reg_rtdp_type type,
+  u32 tunnel_index)
+{
+   MLXSW_REG_ZERO(rtdp, payload);
+   mlxsw_reg_rtdp_type_set(payload, type);
+   

[patch net-next 16/21] mlxsw: spectrum_router: Support IPv4 underlay decap

2017-09-02 Thread Jiri Pirko
From: Petr Machata 

Unlike encapsulation, which is represented by a next hop forwarding to
an IPIP tunnel, decapsulation is a type of local route. It is created
for local routes whose prefix corresponds to the local address of one of
offloaded IPIP tunnels. When the tunnel is removed (i.e. all the encap
next hops are removed), the decap offload is migrated back to a trap for
resolution in slow path.

This patch assumes that decap route is already present when encap route
is added. A follow-up patch will fix this issue.

Note that this patch only supports IPv4 underlay. Support for IPv6
underlay will be subject to follow-up work apart from this patchset.

Signed-off-by: Petr Machata 
Reviewed-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 .../net/ethernet/mellanox/mlxsw/spectrum_ipip.h|   7 +
 .../net/ethernet/mellanox/mlxsw/spectrum_router.c  | 141 -
 .../net/ethernet/mellanox/mlxsw/spectrum_router.h  |   3 +
 3 files changed, 146 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
index 7b40aa2..cd986bd 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
@@ -36,6 +36,7 @@
 #define _MLXSW_IPIP_H_
 
 #include "spectrum_router.h"
+#include 
 
 enum mlxsw_sp_ipip_type {
MLXSW_SP_IPIP_TYPE_MAX,
@@ -46,6 +47,7 @@ struct mlxsw_sp_ipip_entry {
struct net_device *ol_dev; /* Overlay. */
struct mlxsw_sp_rif_ipip_lb *ol_lb;
unsigned int ref_count; /* Number of next hops using the tunnel. */
+   struct mlxsw_sp_fib_entry *decap_fib_entry;
struct list_head ipip_list_node;
 };
 
@@ -64,6 +66,11 @@ struct mlxsw_sp_ipip_ops {
struct mlxsw_sp_rif_ipip_lb_config
(*ol_loopback_config)(struct mlxsw_sp *mlxsw_sp,
  const struct net_device *ol_dev);
+
+   int (*fib_entry_op)(struct mlxsw_sp *mlxsw_sp,
+   struct mlxsw_sp_ipip_entry *ipip_entry,
+   enum mlxsw_reg_ralue_op op,
+   u32 tunnel_index);
 };
 
 extern const struct mlxsw_sp_ipip_ops *mlxsw_sp_ipip_ops_arr[];
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 05afd53..0d09f81 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -381,6 +381,14 @@ enum mlxsw_sp_fib_entry_type {
MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
+
+   /* This is a special case of local delivery, where a packet should be
+* decapsulated on reception. Note that there is no corresponding ENCAP,
+* because that's a type of next hop, not of FIB entry. (There can be
+* several next hops in a REMOTE entry, and some of them may be
+* encapsulating entries.)
+*/
+   MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP,
 };
 
 struct mlxsw_sp_nexthop_group;
@@ -394,12 +402,18 @@ struct mlxsw_sp_fib_node {
struct mlxsw_sp_fib_key key;
 };
 
+struct mlxsw_sp_fib_entry_decap {
+   struct mlxsw_sp_ipip_entry *ipip_entry;
+   u32 tunnel_index;
+};
+
 struct mlxsw_sp_fib_entry {
struct list_head list;
struct mlxsw_sp_fib_node *fib_node;
enum mlxsw_sp_fib_entry_type type;
struct list_head nexthop_group_node;
struct mlxsw_sp_nexthop_group *nh_group;
+   struct mlxsw_sp_fib_entry_decap decap; /* Valid for decap entries. */
 };
 
 struct mlxsw_sp_fib4_entry {
@@ -1031,6 +1045,48 @@ mlxsw_sp_ipip_entry_saddr_matches(struct mlxsw_sp 
*mlxsw_sp,
   mlxsw_sp_l3addr_eq(_saddr, );
 }
 
+static int
+mlxsw_sp_fib_entry_decap_init(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib_entry *fib_entry,
+ struct mlxsw_sp_ipip_entry *ipip_entry)
+{
+   u32 tunnel_index;
+   int err;
+
+   err = mlxsw_sp_kvdl_alloc(mlxsw_sp, 1, _index);
+   if (err)
+   return err;
+
+   ipip_entry->decap_fib_entry = fib_entry;
+   fib_entry->decap.ipip_entry = ipip_entry;
+   fib_entry->decap.tunnel_index = tunnel_index;
+   return 0;
+}
+
+static void mlxsw_sp_fib_entry_decap_fini(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib_entry *fib_entry)
+{
+   /* Unlink this node from the IPIP entry that it's the decap entry of. */
+   fib_entry->decap.ipip_entry->decap_fib_entry = NULL;
+   fib_entry->decap.ipip_entry = NULL;
+   mlxsw_sp_kvdl_free(mlxsw_sp, fib_entry->decap.tunnel_index);
+}
+
+static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
+struct mlxsw_sp_fib_entry *fib_entry);
+

[patch net-next 19/21] mlxsw: spectrum_router: Add loopback accessors

2017-09-02 Thread Jiri Pirko
From: Petr Machata 

struct mlxsw_sp_rif is a router-private structure, and therefore
everything related to it is as well: parameters, and derived RIF types
including loopbacks. IPIP module needs access to some details of
loopback interfaces, but exporting all the RIF shebang would create too
large an interface.

So instead export just the bare minimum necessary: accessors for RIF
index and underlay VRF ID.

Signed-off-by: Petr Machata 
Reviewed-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 10 ++
 drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h |  3 +++
 2 files changed, 13 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 2189cf0..6068eea 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -5007,6 +5007,16 @@ u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif)
return rif->rif_index;
 }
 
+u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
+{
+   return lb_rif->common.rif_index;
+}
+
+u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
+{
+   return lb_rif->ul_vr_id;
+}
+
 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
 {
return rif->dev->ifindex;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
index 4276f5f..9632476 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
@@ -48,6 +48,7 @@ union mlxsw_sp_l3addr {
struct in6_addr addr6;
 };
 
+struct mlxsw_sp_rif_ipip_lb;
 struct mlxsw_sp_rif_ipip_lb_config {
enum mlxsw_reg_ritr_loopback_ipip_type lb_ipipt;
u32 okey;
@@ -65,6 +66,8 @@ struct mlxsw_sp_neigh_entry;
 struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
   u16 rif_index);
 u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif);
+u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *rif);
+u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *rif);
 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif);
 int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
   struct mlxsw_sp_rif *rif,
-- 
2.9.3



[patch net-next 11/21] mlxsw: spectrum_router: Extract mlxsw_sp_fi_is_gateway()

2017-09-02 Thread Jiri Pirko
From: Petr Machata 

For IPv4 IP-in-IP offload, routes that direct traffic to IP-in-IP
devices need to be considered gateway routes as well. That involves a
bit more logic, so extract the current test to a separate function,
where the logic can be later added.

Signed-off-by: Petr Machata 
Reviewed-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 14 ++
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 72e386b..04e1887 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -2382,6 +2382,12 @@ static void mlxsw_sp_nexthop_rif_gone_sync(struct 
mlxsw_sp *mlxsw_sp,
}
 }
 
+static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp,
+  const struct fib_info *fi)
+{
+   return fi->fib_nh->nh_scope == RT_SCOPE_LINK;
+}
+
 static struct mlxsw_sp_nexthop_group *
 mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
 {
@@ -2401,7 +2407,7 @@ mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, 
struct fib_info *fi)
INIT_LIST_HEAD(_grp->fib_list);
nh_grp->neigh_tbl = _tbl;
 
-   nh_grp->gateway = fi->fib_nh->nh_scope == RT_SCOPE_LINK;
+   nh_grp->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, fi);
nh_grp->count = fi->fib_nhs;
fib_info_hold(fi);
for (i = 0; i < nh_grp->count; i++) {
@@ -2801,10 +2807,10 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
return 0;
case RTN_UNICAST:
-   if (fi->fib_nh->nh_scope != RT_SCOPE_LINK)
-   fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
-   else
+   if (mlxsw_sp_fi_is_gateway(mlxsw_sp, fi))
fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
+   else
+   fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
return 0;
default:
return -EINVAL;
-- 
2.9.3



[patch net-next 20/21] mlxsw: spectrum_router: Support GRE tunnels

2017-09-02 Thread Jiri Pirko
From: Petr Machata 

This patch introduces callbacks and tunnel type to offload GRE tunnels.

Signed-off-by: Petr Machata 
Reviewed-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 .../net/ethernet/mellanox/mlxsw/spectrum_ipip.c| 176 +
 .../net/ethernet/mellanox/mlxsw/spectrum_ipip.h|   1 +
 .../net/ethernet/mellanox/mlxsw/spectrum_router.c  |  26 +++
 .../net/ethernet/mellanox/mlxsw/spectrum_router.h  |   4 +
 4 files changed, 207 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
index 20b0b96..702fe94 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
@@ -32,7 +32,183 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include 
+
 #include "spectrum_ipip.h"
 
+static bool
+mlxsw_sp_ipip_netdev_has_ikey(const struct net_device *ol_dev)
+{
+   struct ip_tunnel *tun = netdev_priv(ol_dev);
+
+   return !!(tun->parms.i_flags & TUNNEL_KEY);
+}
+
+static bool
+mlxsw_sp_ipip_netdev_has_okey(const struct net_device *ol_dev)
+{
+   struct ip_tunnel *tun = netdev_priv(ol_dev);
+
+   return !!(tun->parms.o_flags & TUNNEL_KEY);
+}
+
+static u32 mlxsw_sp_ipip_netdev_ikey(const struct net_device *ol_dev)
+{
+   struct ip_tunnel *tun = netdev_priv(ol_dev);
+
+   return mlxsw_sp_ipip_netdev_has_ikey(ol_dev) ?
+   be32_to_cpu(tun->parms.i_key) : 0;
+}
+
+static u32 mlxsw_sp_ipip_netdev_okey(const struct net_device *ol_dev)
+{
+   struct ip_tunnel *tun = netdev_priv(ol_dev);
+
+   return mlxsw_sp_ipip_netdev_has_okey(ol_dev) ?
+   be32_to_cpu(tun->parms.o_key) : 0;
+}
+
+static int
+mlxsw_sp_ipip_nexthop_update_gre4(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
+ struct mlxsw_sp_ipip_entry *ipip_entry)
+{
+   u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb);
+   __be32 daddr4 = mlxsw_sp_ipip_netdev_daddr4(ipip_entry->ol_dev);
+   char ratr_pl[MLXSW_REG_RATR_LEN];
+
+   mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
+   true, MLXSW_REG_RATR_TYPE_IPIP,
+   adj_index, rif_index);
+   mlxsw_reg_ratr_ipip4_entry_pack(ratr_pl, be32_to_cpu(daddr4));
+
+   return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
+}
+
+static int
+mlxsw_sp_ipip_fib_entry_op_gre4_rtdp(struct mlxsw_sp *mlxsw_sp,
+u32 tunnel_index,
+struct mlxsw_sp_ipip_entry *ipip_entry)
+{
+   bool has_ikey = mlxsw_sp_ipip_netdev_has_ikey(ipip_entry->ol_dev);
+   u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb);
+   u32 ikey = mlxsw_sp_ipip_netdev_ikey(ipip_entry->ol_dev);
+   char rtdp_pl[MLXSW_REG_RTDP_LEN];
+   unsigned int type_check;
+   u32 daddr4;
+
+   mlxsw_reg_rtdp_pack(rtdp_pl, MLXSW_REG_RTDP_TYPE_IPIP, tunnel_index);
+
+   type_check = has_ikey ?
+   MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_GRE_KEY :
+   MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_GRE;
+
+   /* Linux demuxes tunnels based on packet SIP (which must match tunnel
+* remote IP). Thus configure decap so that it filters out packets that
+* are not IPv4 or have the wrong SIP. IPIP_DECAP_ERROR trap is
+* generated for packets that fail this criterion. Linux then handles
+* such packets in slow path and generates ICMP destination unreachable.
+*/
+   daddr4 = be32_to_cpu(mlxsw_sp_ipip_netdev_daddr4(ipip_entry->ol_dev));
+   mlxsw_reg_rtdp_ipip4_pack(rtdp_pl, rif_index,
+ MLXSW_REG_RTDP_IPIP_SIP_CHECK_FILTER_IPV4,
+ type_check, has_ikey, daddr4, ikey);
+
+   return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtdp), rtdp_pl);
+}
+
+static int
+mlxsw_sp_ipip_fib_entry_op_gre4_ralue(struct mlxsw_sp *mlxsw_sp,
+ u32 dip, u8 prefix_len, u16 ul_vr_id,
+ enum mlxsw_reg_ralue_op op,
+ u32 tunnel_index)
+{
+   char ralue_pl[MLXSW_REG_RALUE_LEN];
+
+   mlxsw_reg_ralue_pack4(ralue_pl, MLXSW_REG_RALXX_PROTOCOL_IPV4, op,
+ ul_vr_id, prefix_len, dip);
+   mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl, tunnel_index);
+   return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
+}
+
+static int mlxsw_sp_ipip_fib_entry_op_gre4(struct mlxsw_sp *mlxsw_sp,
+   struct mlxsw_sp_ipip_entry *ipip_entry,
+   enum mlxsw_reg_ralue_op op,
+   u32 tunnel_index)
+{
+   u16 ul_vr_id = mlxsw_sp_ipip_lb_ul_vr_id(ipip_entry->ol_lb);
+   __be32 dip;
+   int err;
+
+ 

[patch net-next 13/21] mlxsw: spectrum_router: Make nexthops typed

2017-09-02 Thread Jiri Pirko
From: Petr Machata 

In the router, some next hops may reference an encapsulating netdevice,
such as GRE or IPIP. To properly offload these next hops, mlxsw needs to
keep track of whether a given next hop is a regular Ethernet entry, or
an IP-in-IP tunneling entry.

To facilitate this book-keeping, add a type field to struct
mlxsw_sp_nexthop. There is, as of this patch, only one next hop type:
MLXSW_SP_NEXTHOP_TYPE_ETH. Follow-up patches will introduce the IP-in-IP
variant.

There are several places where next hops are initialized in the IPv4
path. Instead of replicating the logic at every one of them, factor it
out to a function mlxsw_sp_nexthop4_type_init(). The corresponding fini
is actually protocol-neutral, so put it to mlxsw_sp_nexthop_type_fini(),
but create a corresponding protocoled _fini function that dispatches to
the protocol-neutral one.

The IPv6 path is simpler, but for symmetry with IPv4, create the same
suite of functions with corresponding logic.

Signed-off-by: Petr Machata 
Reviewed-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 .../net/ethernet/mellanox/mlxsw/spectrum_router.c  | 138 ++---
 1 file changed, 95 insertions(+), 43 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 563e803..53bdd0f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -1652,6 +1652,10 @@ static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp 
*mlxsw_sp,
}
 }
 
+enum mlxsw_sp_nexthop_type {
+   MLXSW_SP_NEXTHOP_TYPE_ETH,
+};
+
 struct mlxsw_sp_nexthop_key {
struct fib_nh *fib_nh;
 };
@@ -1676,7 +1680,10 @@ struct mlxsw_sp_nexthop {
   update:1; /* set indicates that MAC of this neigh should be
  * updated in HW
  */
-   struct mlxsw_sp_neigh_entry *neigh_entry;
+   enum mlxsw_sp_nexthop_type type;
+   union {
+   struct mlxsw_sp_neigh_entry *neigh_entry;
+   };
 };
 
 struct mlxsw_sp_nexthop_group {
@@ -1964,9 +1971,9 @@ static int mlxsw_sp_nexthop_mac_update(struct mlxsw_sp 
*mlxsw_sp, u32 adj_index,
 }
 
 static int
-mlxsw_sp_nexthop_group_mac_update(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_nexthop_group *nh_grp,
- bool reallocate)
+mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop_group *nh_grp,
+ bool reallocate)
 {
u32 adj_index = nh_grp->adj_index; /* base */
struct mlxsw_sp_nexthop *nh;
@@ -1982,8 +1989,12 @@ mlxsw_sp_nexthop_group_mac_update(struct mlxsw_sp 
*mlxsw_sp,
}
 
if (nh->update || reallocate) {
-   err = mlxsw_sp_nexthop_mac_update(mlxsw_sp,
- adj_index, nh);
+   switch (nh->type) {
+   case MLXSW_SP_NEXTHOP_TYPE_ETH:
+   err = mlxsw_sp_nexthop_mac_update
+   (mlxsw_sp, adj_index, nh);
+   break;
+   }
if (err)
return err;
nh->update = 0;
@@ -2071,8 +2082,7 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
/* Nothing was added or removed, so no need to reallocate. Just
 * update MAC on existing adjacency indexes.
 */
-   err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp,
-   false);
+   err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, false);
if (err) {
dev_warn(mlxsw_sp->bus_info->dev, "Failed to update 
neigh MAC in adjacency table.\n");
goto set_trap;
@@ -2099,7 +2109,7 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
nh_grp->adj_index_valid = 1;
nh_grp->adj_index = adj_index;
nh_grp->ecmp_size = ecmp_size;
-   err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp, true);
+   err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, true);
if (err) {
dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC 
in adjacency table.\n");
goto set_trap;
@@ -2287,6 +2297,48 @@ static bool mlxsw_sp_netdev_ipip_type(const struct 
mlxsw_sp *mlxsw_sp,
return false;
 }
 
+static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp,
+  struct mlxsw_sp_nexthop *nh)
+{
+   switch (nh->type) {
+   case MLXSW_SP_NEXTHOP_TYPE_ETH:
+   mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
+ 

[patch net-next 08/21] mlxsw: spectrum_router: Add mlxsw_sp_ipip_ops

2017-09-02 Thread Jiri Pirko
From: Petr Machata 

Details of individual tunnel types are kept in an array of
mlxsw_sp_ipip_ops objects. Follow-up patches will use the list to
determine whether a constructed RIF should be a loopback, and to decide
whether a next hop references a tunnel.

The list is currently empty, follow-up patches will add support for GRE.

Signed-off-by: Petr Machata 
Reviewed-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 drivers/net/ethernet/mellanox/mlxsw/Makefile   |  4 +-
 .../net/ethernet/mellanox/mlxsw/spectrum_ipip.c| 38 
 .../net/ethernet/mellanox/mlxsw/spectrum_ipip.h| 51 ++
 .../net/ethernet/mellanox/mlxsw/spectrum_router.c  | 19 
 4 files changed, 110 insertions(+), 2 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
 create mode 100644 drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h

diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile 
b/drivers/net/ethernet/mellanox/mlxsw/Makefile
index f9956f3..891ff41 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Makefile
+++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile
@@ -16,8 +16,8 @@ mlxsw_spectrum-objs   := spectrum.o 
spectrum_buffers.o \
   spectrum_switchdev.o spectrum_router.o \
   spectrum_kvdl.o spectrum_acl_tcam.o \
   spectrum_acl.o spectrum_flower.o \
-  spectrum_cnt.o \
-  spectrum_fid.o
+  spectrum_cnt.o spectrum_fid.o \
+  spectrum_ipip.o
 mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB)+= spectrum_dcb.o
 mlxsw_spectrum-$(CONFIG_NET_DEVLINK) += spectrum_dpipe.o
 obj-$(CONFIG_MLXSW_MINIMAL)+= mlxsw_minimal.o
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
new file mode 100644
index 000..20b0b96
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
@@ -0,0 +1,38 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017 Petr Machata 
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *notice, this list of conditions and the following disclaimer in the
+ *documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *contributors may be used to endorse or promote products derived from
+ *this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spectrum_ipip.h"
+
+const struct mlxsw_sp_ipip_ops *mlxsw_sp_ipip_ops_arr[] = {
+};
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
new file mode 100644
index 000..65eb493
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
@@ -0,0 +1,51 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017 Petr Machata 
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *notice, this list 

[patch net-next 14/21] mlxsw: spectrum_router: Support IPv4 overlay encap

2017-09-02 Thread Jiri Pirko
From: Petr Machata 

This introduces some common code for tracking of offloaded IP-in-IP
tunnels, and support for offloading IPv4 overlay encapsulating routes in
particular. A follow-up patch will introduce IPv6 overlay as well.

Offloaded tunnels are kept in a linked list of mlxsw_sp_ipip_entry
objects hooked up in mlxsw_sp_router. A network device that represents
the tunnel is used as a key to look up the corresponding IPIP entry.
Note that in the future, more general keying mechanism will be needed,
because parts of the tunnel information can be provided by the route.

IPIP entries are reference counted, because several next hops may end up
using the same tunnel, and we only want to offload it once.

Encapsulation path hooks into next hop handling. Routes that forward to
a tunnel are now considered gateway routes, thus giving them the same
treatment that other remote routes get. An IPIP next hop type is
introduced.

Details of individual tunnel types are kept in an array of
mlxsw_sp_ipip_ops objects. If a tunnel type doesn't match any of the
known tunnel types, the next-hop is not considered an IPIP next hop.

The list of IPIP tunnel types is currently empty, follow-up patches will
add support for GRE. Traffic to IPIP tunnel types that are not
explicitly recognized by the driver traps and is handled in slow path.

Signed-off-by: Petr Machata 
Reviewed-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 .../net/ethernet/mellanox/mlxsw/spectrum_ipip.h|  20 ++
 .../net/ethernet/mellanox/mlxsw/spectrum_router.c  | 238 -
 2 files changed, 257 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
index 65eb493..7b40aa2 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
@@ -41,9 +41,29 @@ enum mlxsw_sp_ipip_type {
MLXSW_SP_IPIP_TYPE_MAX,
 };
 
+struct mlxsw_sp_ipip_entry {
+   enum mlxsw_sp_ipip_type ipipt;
+   struct net_device *ol_dev; /* Overlay. */
+   struct mlxsw_sp_rif_ipip_lb *ol_lb;
+   unsigned int ref_count; /* Number of next hops using the tunnel. */
+   struct list_head ipip_list_node;
+};
+
 struct mlxsw_sp_ipip_ops {
int dev_type;
enum mlxsw_sp_l3proto ul_proto; /* Underlay. */
+
+   int (*nexthop_update)(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
+ struct mlxsw_sp_ipip_entry *ipip_entry);
+
+   bool (*can_offload)(const struct mlxsw_sp *mlxsw_sp,
+   const struct net_device *ol_dev,
+   enum mlxsw_sp_l3proto ol_proto);
+
+   /* Return a configuration for creating an overlay loopback RIF. */
+   struct mlxsw_sp_rif_ipip_lb_config
+   (*ol_loopback_config)(struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *ol_dev);
 };
 
 extern const struct mlxsw_sp_ipip_ops *mlxsw_sp_ipip_ops_arr[];
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 53bdd0f..231b597 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -89,6 +89,7 @@ struct mlxsw_sp_router {
struct delayed_work nexthop_probe_dw;
 #define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */
struct list_head nexthop_neighs_list;
+   struct list_head ipip_list;
bool aborted;
struct notifier_block fib_nb;
const struct mlxsw_sp_rif_ops **rif_ops_arr;
@@ -915,6 +916,170 @@ static u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct 
net_device *ol_dev)
return l3mdev_fib_table(ol_dev) ? : RT_TABLE_MAIN;
 }
 
+static struct mlxsw_sp_rif *
+mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
+   const struct mlxsw_sp_rif_params *params);
+
+static struct mlxsw_sp_rif_ipip_lb *
+mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp,
+   enum mlxsw_sp_ipip_type ipipt,
+   struct net_device *ol_dev)
+{
+   struct mlxsw_sp_rif_params_ipip_lb lb_params;
+   const struct mlxsw_sp_ipip_ops *ipip_ops;
+   struct mlxsw_sp_rif *rif;
+
+   ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
+   lb_params = (struct mlxsw_sp_rif_params_ipip_lb) {
+   .common.dev = ol_dev,
+   .common.lag = false,
+   .lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev),
+   };
+
+   rif = mlxsw_sp_rif_create(mlxsw_sp, _params.common);
+   if (IS_ERR(rif))
+   return ERR_CAST(rif);
+   return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
+}
+
+static struct mlxsw_sp_ipip_entry *
+mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
+ enum mlxsw_sp_ipip_type 

[patch net-next 07/21] mlxsw: spectrum_router: Publish mlxsw_sp_l3proto

2017-09-02 Thread Jiri Pirko
From: Petr Machata 

The spectrum_ipip module that will be introduced in the follow-up
patches needs to know the data type.

Signed-off-by: Petr Machata 
Reviewed-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 5 -
 drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h | 5 +
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 4e47d45..f866ac5 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -405,11 +405,6 @@ struct mlxsw_sp_rt6 {
struct rt6_info *rt;
 };
 
-enum mlxsw_sp_l3proto {
-   MLXSW_SP_L3_PROTO_IPV4,
-   MLXSW_SP_L3_PROTO_IPV6,
-};
-
 struct mlxsw_sp_lpm_tree {
u8 id; /* tree ID */
unsigned int ref_count;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
index 87a04af..67c6c1d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
@@ -37,6 +37,11 @@
 
 #include "spectrum.h"
 
+enum mlxsw_sp_l3proto {
+   MLXSW_SP_L3_PROTO_IPV4,
+   MLXSW_SP_L3_PROTO_IPV6,
+};
+
 enum mlxsw_sp_rif_counter_dir {
MLXSW_SP_RIF_COUNTER_INGRESS,
MLXSW_SP_RIF_COUNTER_EGRESS,
-- 
2.9.3



  1   2   >